How to improve the readability of regular expressions in PHP

Regular expressions are a very powerful tool, but the conventional wisdom is that once they are written, they are very difficult to understand, so maintaining them is not a pleasant experience. Collected here are tips to help make them more readable.





PHP PCRE β€” PHP 7.3, PCRE2 β€” . PHP , , . PHP , ctype*, URL-, β€” . IDE , , , .





, , . . , - PHP ( PHP 7.3). , . , PHP, JavaScript , ES2018.





:





  • -;





  • ;





  • ;





  • ;





  • ;





  • .





-

β€” . -, β€” . :





/(foo|bar)/i
      
      



(foo|bar)



β€” , i



β€” , , /



β€” . /



, . , ~, !, @, #, $



. , , \



β€” . : {}, (), [], <>



, . , , , . - , . , , . (, ^, $,



, ), . , , , . , /



, β€” , , URL-. : 





preg_match('/^https:\/\/example.com\/path/i', $uri);
      
      



β€œ#”, , :





preg_match('#^https://example.com/path#i', $uri);
      
      



- . . , .



, *



, +



, $



. , /Username: @[a-z\.0-9]/



β€œ.” , . 





, , . , -



. , , , , .





, /[A-Z]/



, A Z. (/[A\-Z]/)



, β€” , A, Z . , , , . , /[AZ-]/



, /[A\-Z]/



, .





( , ), . , :





/Price: [0-9\-\$\.\+]+/
      
      







/Price: [0-9$.+-]+/
      
      



X



, ,   , . , , , , . β€” :





preg_match('/x\yz/X', ''); //  "y" β€” ,   β€”  
      
      



:





Warning: preg_match(): Compilation failed: unrecognized character follows \ at offset 2 in ... on line ...
      
      



, ()



, , ,   , , , .





, β€œPrice: €24



”.





$pattern = '/Price: (Β£|€)(\d+)/';
$text    = 'Price: €24';
preg_match($pattern, $text, $matches);
      
      



2 , , ((Β£|€))



, β€” . , $matches



, ,  :





var_dump($matches);

array(3) {
  [0]=> string(12) "Price: €24"
  [1]=> string(3) "€"
  [2]=> string(2) "24"
}
      
      



, . , , ?:



. , , . , , (Β£|€)



, , : (?:Β£|€)



.





$pattern = '/Price: (?:Β£|€)(\d+)/';
$text    = 'Price: €24';
preg_match($pattern, $text, $matches);
var_dump($matches);
      
      



$matches



1 β€” :





array(2) {
  [0]=> string(12) "Price: €24"
  [1]=> string(2) "24"
}
      
      



, , , , , .





, . , , ,  





, , :





/Price: (?<currency>Β£|€)(?<price>\d+)/
      
      



, (?



, , . , (?<currency>Β£|€)



  β€” currency, (?<price>\d+)



β€” price. , , β€” . , :





$pattern = '/Price: (?<currency>Β£|€)(?<price>\d+)/';
$text    = 'Price: €24';
preg_match($pattern, $text, $matches);
var_dump($matches);
      
      



:





array(5) {
 [0]=> string(12) "Price: €24"
["currency"]=> string(3) "€"
[1]=> string(3) "€"
["price"]=> string(2) "24"
[2]=> string(2) "24"
}
      
      



, $matches



, , .





 , , ["currency"]=> "€"



, [1]=> "€"



.





PHP , : 





Warning: preg_match(): Compilation failed: two named subpatterns have the same name (PCRE2_DUPNAMES not set) at offset ... in ... on line ....
      
      



, J



(UPD: , PHP 7.2.0, ?J



):





/Price: (?<currency>Β£|€)?(?<price>\d+)(?<currency>Β£|€)?/J
      
      



2 currency, , J



. , currency , . , , :





$pattern = '/Price: (?<currency>Β£|€)?(?<price>\d+)(?<currency>Β£|€)?/J';
$text    = 'Price: €24Β£';
preg_match($pattern, $text, $matches);
var_dump($matches);

array(6) {
  [0]=> string(14) "Price: €24Β£"
  ["currency"]=> string(2) "Β£"
  [1]=> string(3) "€"
  ["price"]=> string(2) "24"
  [2]=> string(2) "24"
  [3]=> string(2) "Β£"
}
      
      



, . , , PHP-, , . β€” . .





, :





$pattern  = '/Price: (?<currency>Β£|€)(?<price>\d+)/i';
      
      



:





$pattern  = '/Price: ';
$pattern .= '(?<currency>Β£|€)'; // Capture currency symbols Β£ or €
$pattern .= '(?<price>\d+)'; // Capture price without decimals.
$pattern .= '/i'; // Flags: Case-insensitive
      
      



. x



, , . , . :





/Price: (?<currency>Β£|€)(?<price>\d+)/i
      
      







/Price:  \s  (?<currency>Β£|€)  (?<price>\d+)  /ix
      
      



, , x



. , , , . , , \s



.





x



, #



, PHP . , . , :





/Price: (?<currency>Β£|€)(?<price>\d+)/i
      
      



:





/Price:           # Check for the label "Price:"
\s                # Ensure a white-space after.
(?<currency>Β£|€)  # Capture currency symbols Β£ or €
(?<price>\d+)     # Capture price without decimals.
/ix
      
      



PHP, Heredoc Nowdoc . , :





$pattern = <<<PATTERN
  /Price:           # Check for the label "Price:"
  \s                # Ensure a white-space after.
  (?<currency>Β£|€)  # Capture currency symbols Β£ or €
  (?<price>\d+)     # Capture price without decimals.
  /ix               # Flags: Case-insensitive
PATTERN;

preg_match($pattern, 'Price: Β£42', $matches);

      
      



, , , . , β€” \d



,  , [0-9]



. \D



, β€” , [^0-9]



. , , , , , : 





/Number: [0-9][^0-9]/
      
      



:





/Number: \d\D/
      
      



, . :





  • \w



    β€” , , [A-Za-z0-9_]



    ,





 





/[A-Za-z0-9_]/
      
      



:





/\w/
      
      



  • [:xdigit:]



    β€” , [A-Fa-f0-9]



    ,









/[a-zA-F0-9]/
      
      



:





/[[:xdigit:]]/
      
      



  • \s



    β€” ,  [ \t\r\n\v\f]



    ,









/ \t\r\n\v\f/
      
      







/\s/
      
      



/u



, , . \p{_}



, _



β€” . \p



"p" , \P{FOO}



, β€” , . , , , \p{Sc}



, , , , , . , : \p{Currency_Symbol}



, PHP.





:





$pattern = '/Price: \p{Sc}\d+/u';
      
      



:





$text = 'Price: Β₯42';
      
      



, . , , , . , . , \p{Sinhala}



, \x{0D80}-\x{0DFF



}. , :





$pattern = '/[\x{0D80}-\x{0DFF}]/u';
      
      



, :





$pattern = '/\p{Sinhala}/u';
      
      



,





$text = 'ΰΆ΄ΰ·“ΰΆ‘ΰΆ ΰ·ŠΰΆ΄ΰ·“.ΰ·€ΰ·œΰΆ ΰ·Š`;
$contains_sinhala = preg_match($pattern, $text);
      
      



, , , !





P.S. β€” - . , .








All Articles