Hi, Habr. We invite future students of the "C ++ Developer. Professional" course to sign up for an open lesson on the topic "Backend in modern C ++"
In the meantime, let's share the traditional translation of useful material.
(Regular expressions , , regex — ) — C++. . - , — 20-30% . , ( “learn once, write anywhere”).
: C++ ("flavours") , (, , ) ECMAScript.
, . , 24- (.. :), .
\b([01]?[0-9]|2[0-3]):([0-5]\d)\b
! ?
, , , 100% . . , , .
(↓), , 2-3 , , , . , , , .
, . . . (, ) , , .
https://regexone.com. . , , (<= , ) . .
, , :
std::regex std::regexerror
int main() {
try {
static const auto r = std::regex(R"(\)"); // Escape sequence error
} catch (const std::regex_error &e) {
assert(strcmp(e.what(), "Unexpected end of regex when escaping.") == 0);
assert(e.code() == std::regex_constants::error_escape);
}
return EXIT_SUCCESS;
}
std::regex
( ), ( allocator aware). , std::regex
(. C++ Weekly - Ep 74 - std::regex optimize by Jason Turner). , -, , , — std::regex::markcount()
, .
std::regex_search
int main() {
const string input = "ABC:1-> PQR:2;;; XYZ:3<<<"s;
const regex r(R"((\w+):(\w+);)");
smatch m;
if (regex_search(input, m, r)) {
assert(m.size() == 3);
assert(m[0].str() == "PQR:2;"); // Entire match
assert(m[1].str() == "PQR"); // Substring that matches 1st group
assert(m[2].str() == "2"); // Substring that matches 2nd group
assert(m.prefix().str() == "ABC:1-> "); // All before 1st character match
assert(m.suffix().str() == ";; XYZ:3<<<"); // All after last character match
// for (string &&str : m) { // Alternatively. You can also do
// cout << str << endl;
// }
}
return EXIT_SUCCESS;
}
smatch
— std::match_results, ().
std::regex_match
, , - . std::regexmatch
.
bool is_valid_email_id(string_view str) {
static const regex r(R"(\w+@\w+\.(?:com|in))");
return regex_match(str.data(), r);
}
int main() {
assert(is_valid_email_id("vishalchovatiya@ymail.com") == true);
assert(is_valid_email_id("@abc.com") == false);
return EXIT_SUCCESS;
}
return EXIT¨C14Cmatch
, std::regex¨C15Cmatch
() .
— (static const regex), («/») .
, 30 -O3. . , ISO C++. , . ( ).
std::regex_match std::regex_search
, , ? . , cppreference, . , (, StackOverflow):
int main() {
const string input = "ABC:1-> PQR:2;;; XYZ:3<<<"s;
const regex r(R"((\w+):(\w+);)");
smatch m;
assert(regex_match(input, m, r) == false);
assert(regex_search(input, m, r) == true && m.ready() == true && m[1] == "PQR");
return EXIT_SUCCESS;
}
std::regexmatch
true
, , std::regexsearch
true
, .
std::regex_iterator
std::regex_iterator
, .
#define C_ALL(X) cbegin(X), cend(X)
int main() {
const string input = "ABC:1-> PQR:2;;; XYZ:3<<<"s;
const regex r(R"((\w+):(\d))");
const vector<smatch> matches{
sregex_iterator{C_ALL(input), r},
sregex_iterator{}
};
assert(matches[0].str(0) == "ABC:1"
&& matches[0].str(1) == "ABC"
&& matches[0].str(2) == "1");
assert(matches[1].str(0) == "PQR:2"
&& matches[1].str(1) == "PQR"
&& matches[1].str(2) == "2");
assert(matches[2].str(0) == "XYZ:3"
&& matches[2].str(1) == "XYZ"
&& matches[2].str(2) == "3");
return EXIT_SUCCESS;
}
( C++11) , , std::regex_interator
. C++14.
std::regex_token_iterator
std::regextokeniterator
— , 80% . std::regexiterator
. td::regexiterator
std::regextokeniterator
,
std::regexiterator
.
std::regextokeniterator
.
std::regextoken_iterator
.
#define C_ALL(X) cbegin(X), cend(X)
int main() {
const string input = "ABC:1-> PQR:2;;; XYZ:3<<<"s;
const regex r(R"((\w+):(\d))");
// Note: vector<string> here, unlike vector<smatch> as in std::regex_iterator
const vector<string> full_match{
sregex_token_iterator{C_ALL(input), r, 0}, // Mark `0` here i.e. whole regex match
sregex_token_iterator{}
};
assert((full_match == decltype(full_match){"ABC:1", "PQR:2", "XYZ:3"}));
const vector<string> cptr_grp_1st{
sregex_token_iterator{C_ALL(input), r, 1}, // Mark `1` here i.e. 1st capture group
sregex_token_iterator{}
};
assert((cptr_grp_1st == decltype(cptr_grp_1st){"ABC", "PQR", "XYZ"}));
const vector<string> cptr_grp_2nd{
sregex_token_iterator{C_ALL(input), r, 2}, // Mark `2` here i.e. 2nd capture group
sregex_token_iterator{}
};
assert((cptr_grp_2nd == decltype(cptr_grp_2nd){"1", "2", "3"}));
return EXIT_SUCCESS;
}
std::regex_token_iterator
#define C_ALL(X) cbegin(X), cend(X)
int main() {
const string input = "ABC:1-> PQR:2;;; XYZ:3<<<"s;
const regex r(R"((\w+):(\d))");
const vector<string> inverted{
sregex_token_iterator{C_ALL(input), r, -1}, // `-1` = parts that are not matched
sregex_token_iterator{}
};
assert((inverted == decltype(inverted){
"",
"-> ",
";;; ",
"<<<",
}));
return EXIT_SUCCESS;
}
std::regex_replace
string transform_pair(string_view text, regex_constants::match_flag_type f = {}) {
static const auto r = regex(R"((\w+):(\d))");
return regex_replace(text.data(), r, "$2", f);
}
int main() {
assert(transform_pair("ABC:1, PQR:2"s) == "1, 2"s);
// Things that aren't matched are not copied
assert(transform_pair("ABC:1, PQR:2"s, regex_constants::format_no_copy) == "12"s);
return EXIT_SUCCESS;
}
, transformpair
std::regexconstants::formatnocopy
, , . std::regexconstant .
, , . , , -, , , . , ! std::regexreplace
:
int main() {
const string input = "ABC:1-> PQR:2;;; XYZ:3<<<"s;
const regex r(R"(-|>|<|;| )");
// Prints "ABC:1 PQR:2 XYZ:3 "
regex_replace(ostreambuf_iterator<char>(cout), C_ALL(input), r, " ");
return EXIT_SUCCESS;
}
(delimiter)
std::strtok
, :
#define C_ALL(X) cbegin(X), cend(X)
vector<string> split(const string& str, string_view pattern) {
const auto r = regex(pattern.data());
return vector<string>{
sregex_token_iterator(C_ALL(str), r, -1),
sregex_token_iterator()
};
}
int main() {
assert((split("/root/home/vishal", "/")
== vector<string>{"", "root", "home", "vishal"}));
return EXIT_SUCCESS;
}
string trim(string_view text) {
static const auto r = regex(R"(\s+)");
return regex_replace(text.data(), r, "");
}
int main() {
assert(trim("12 3 4 5"s) == "12345"s);
return EXIT_SUCCESS;
}
, ,
string join(const vector<string>& words, const string& delimiter) {
return accumulate(next(begin(words)), end(words), words[0],
[&delimiter](string& p, const string& word)
{
return p + delimiter + word;
});
}
vector<string> lines_containing(const string& file, const vector<string>& words) {
auto prefix = "^.*?\\b("s;
auto suffix = ")\\b.*$"s;
// ^.*?\b(one|two|three)\b.*$
const auto pattern = move(prefix) + join(words, "|") + move(suffix);
ifstream infile(file);
vector<string> result;
for (string line; getline(infile, line);) {
if(regex_match(line, regex(pattern))) {
result.emplace_back(move(line));
}
}
return result;
}
int main() {
assert((lines_containing("test.txt", {"one","two"})
== vector<string>{"This is one",
"This is two"}));
return EXIT_SUCCESS;
}
/* test.txt
This is one
This is two
This is three
This is four
*/
, ^((?!(one|two|three)).)*$
.
namespace fs = std::filesystem;
vector<fs::directory_entry> find_files(const fs::path &path, string_view rg) {
vector<fs::directory_entry> result;
regex r(rg.data());
copy_if(
fs::recursive_directory_iterator(path),
fs::recursive_directory_iterator(),
back_inserter(result),
[&r](const fs::directory_entry &entry) {
return fs::is_regular_file(entry.path()) &&
regex_match(entry.path().filename().string(), r);
});
return result;
}
int main() {
const auto dir = fs::temp_directory_path();
const auto pattern = R"(\w+\.png)";
const auto result = find_files(fs::current_path(), pattern);
for (const auto &entry : result) {
cout << entry.path().string() << endl;
}
return EXIT_SUCCESS;
}
C++ .
, https://regex101.com. regex101, ( ).
, , , .
:
(alternation), ,
com|net|org
.
.
.
.
, .
, C++ . IDE ( vscode ) Linux. , . ( , ). , , .
, ; , .
— ( 19 2020 ) . Boost, CTRE Std. , . , CppCon 2018 2019 , .