Regular expressions are a very powerful tool, but the conventional wisdom is that once they are written, they are very difficult to understand, so maintaining them is not a pleasant experience. Collected here are tips to help make them more readable.
PHP PCRE β PHP 7.3, PCRE2 β . PHP , , . PHP , ctype*, URL-, β . IDE , , , .
, , . . , - PHP ( PHP 7.3). , . , PHP, JavaScript , ES2018.
:
-;
;
;
;
;
.
-
β . -, β . :
/(foo|bar)/i
(foo|bar)
β , i
β , , /
β . /
, . , ~, !, @, #, $
. , , \
β . : {}, (), [], <>
, . , , , . - , . , , . (, ^, $,
, ), . , , , . , /
, β , , URL-. :
preg_match('/^https:\/\/example.com\/path/i', $uri);
β#β, , :
preg_match('#^https://example.com/path#i', $uri);
- . . , .
, *
, +
, $
. , /Username: @[a-z\.0-9]/
β.β , .
, , . , -
. , , , , .
, /[A-Z]/
, A Z. (/[A\-Z]/)
, β , A, Z . , , , . , /[AZ-]/
, /[A\-Z]/
, .
( , ), . , :
/Price: [0-9\-\$\.\+]+/
/Price: [0-9$.+-]+/
X
, , , . , , , , . β :
preg_match('/x\yz/X', ''); // "y" β , β
:
Warning: preg_match(): Compilation failed: unrecognized character follows \ at offset 2 in ... on line ...
, ()
, , , , , , .
, βPrice: β¬24
β.
$pattern = '/Price: (Β£|β¬)(\d+)/';
$text = 'Price: β¬24';
preg_match($pattern, $text, $matches);
2 , , ((Β£|β¬))
, β . , $matches
, , :
var_dump($matches);
array(3) {
[0]=> string(12) "Price: β¬24"
[1]=> string(3) "β¬"
[2]=> string(2) "24"
}
, . , , ?:
. , , . , , (Β£|β¬)
, , : (?:Β£|β¬)
.
$pattern = '/Price: (?:Β£|β¬)(\d+)/';
$text = 'Price: β¬24';
preg_match($pattern, $text, $matches);
var_dump($matches);
$matches
1 β :
array(2) {
[0]=> string(12) "Price: β¬24"
[1]=> string(2) "24"
}
, , , , , .
, . , , ,
, , :
/Price: (?<currency>Β£|β¬)(?<price>\d+)/
, (?
, , . , (?<currency>Β£|β¬)
β currency, (?<price>\d+)
β price. , , β . , :
$pattern = '/Price: (?<currency>Β£|β¬)(?<price>\d+)/';
$text = 'Price: β¬24';
preg_match($pattern, $text, $matches);
var_dump($matches);
:
array(5) {
[0]=> string(12) "Price: β¬24"
["currency"]=> string(3) "β¬"
[1]=> string(3) "β¬"
["price"]=> string(2) "24"
[2]=> string(2) "24"
}
, $matches
, , .
, , ["currency"]=> "β¬"
, [1]=> "β¬"
.
PHP , :
Warning: preg_match(): Compilation failed: two named subpatterns have the same name (PCRE2_DUPNAMES not set) at offset ... in ... on line ....
/Price: (?<currency>Β£|β¬)?(?<price>\d+)(?<currency>Β£|β¬)?/J
2 currency, , J
. , currency , . , , :
$pattern = '/Price: (?<currency>Β£|β¬)?(?<price>\d+)(?<currency>Β£|β¬)?/J';
$text = 'Price: β¬24Β£';
preg_match($pattern, $text, $matches);
var_dump($matches);
array(6) {
[0]=> string(14) "Price: β¬24Β£"
["currency"]=> string(2) "Β£"
[1]=> string(3) "β¬"
["price"]=> string(2) "24"
[2]=> string(2) "24"
[3]=> string(2) "Β£"
}
, . , , PHP-, , . β . .
, :
$pattern = '/Price: (?<currency>Β£|β¬)(?<price>\d+)/i';
:
$pattern = '/Price: ';
$pattern .= '(?<currency>Β£|β¬)'; // Capture currency symbols Β£ or β¬
$pattern .= '(?<price>\d+)'; // Capture price without decimals.
$pattern .= '/i'; // Flags: Case-insensitive
. x
, , . , . :
/Price: (?<currency>Β£|β¬)(?<price>\d+)/i
/Price: \s (?<currency>Β£|β¬) (?<price>\d+) /ix
, , x
. , , , . , , \s
.
x
, #
, PHP . , . , :
/Price: (?<currency>Β£|β¬)(?<price>\d+)/i
:
/Price: # Check for the label "Price:"
\s # Ensure a white-space after.
(?<currency>Β£|β¬) # Capture currency symbols Β£ or β¬
(?<price>\d+) # Capture price without decimals.
/ix
PHP, Heredoc Nowdoc . , :
$pattern = <<<PATTERN
/Price: # Check for the label "Price:"
\s # Ensure a white-space after.
(?<currency>Β£|β¬) # Capture currency symbols Β£ or β¬
(?<price>\d+) # Capture price without decimals.
/ix # Flags: Case-insensitive
PATTERN;
preg_match($pattern, 'Price: Β£42', $matches);
, , , . , β \d
, , [0-9]
. \D
, β , [^0-9]
. , , , , , :
/Number: [0-9][^0-9]/
:
/Number: \d\D/
\w
β , ,[A-Za-z0-9_]
,
/[A-Za-z0-9_]/
:
/\w/
[:xdigit:]
β ,[A-Fa-f0-9]
,
/[a-zA-F0-9]/
:
/[[:xdigit:]]/
\s
β ,[ \t\r\n\v\f]
,
/ \t\r\n\v\f/
/\s/
/u
, , . \p{_}
, _
β . \p
"p" , \P{FOO}
, β , . , , , \p{Sc}
, , , , , . , : \p{Currency_Symbol}
, PHP.
:
$pattern = '/Price: \p{Sc}\d+/u';
:
$text = 'Price: Β₯42';
, . , , , . , . , \p{Sinhala}
, \x{0D80}-\x{0DFF
}. , :
$pattern = '/[\x{0D80}-\x{0DFF}]/u';
, :
$pattern = '/\p{Sinhala}/u';
,
$text = 'ΰΆ΄ΰ·ΰΆΰΆ ΰ·ΰΆ΄ΰ·.ΰ·ΰ·ΰΆ ΰ·`;
$contains_sinhala = preg_match($pattern, $text);
, , , !
P.S. β - . , .