16#ifndef _PCRE2CPP_REGEX_HPP_
17 #define _PCRE2CPP_REGEX_HPP_
21 #if !_PCRE2CPP_HAS_CXX17
39 template<utf_type utf>
42 using _pcre2_data_t = utils::pcre2_data<utf>;
44 using _code_type =
typename _pcre2_data_t::code_type;
45 using _code_ptr = std::shared_ptr<_code_type>;
46 using _match_data_type =
typename _pcre2_data_t::match_data_type;
47 using _match_data_ptr = std::shared_ptr<_match_data_type>;
48 using _string_type =
typename _pcre2_data_t::string_type;
49 using _string_view_type =
typename _pcre2_data_t::string_view_type;
50 using _string_char_type =
typename _pcre2_data_t::string_char_type;
51 using _match_value_type = basic_match_value<utf>;
52 using _match_result_type = basic_match_result<utf>;
53 using _sptr_type =
typename _pcre2_data_t::sptr_type;
54 using _named_sub_values_table = std::unordered_map<_string_type, size_t>;
55 using _named_sub_values_table_ptr = std::shared_ptr<_named_sub_values_table>;
56 using _uchar_type =
typename _pcre2_data_t::uchar_type;
57 #if _PCRE2CPP_HAS_EXCEPTIONS
58 using _regex_exception = basic_regex_exception<utf>;
62 _code_ptr _code =
nullptr;
64 _match_data_ptr _match_data =
nullptr;
66 _named_sub_values_table_ptr _named_sub_values =
nullptr;
71 size_t _error_offset = 0;
74 #if _PCRE2CPP_HAS_UTF8
78 #if _PCRE2CPP_HAS_UTF16
80 return u
"Regex was not initialized!!";
84 #if _PCRE2CPP_HAS_UTF32
86 return U
"Regex was not initialized!!";
91 return _string_type();
100 _code_type* code = _pcre2_data_t::compile(
reinterpret_cast<_sptr_type
>(pattern.data()), pattern.size(), opts,
101 &_error_code, &_error_offset,
nullptr);
103 if (code ==
nullptr) {
104 #if !_PCRE2CPP_HAS_EXCEPTIONS
105 std::string message = fmt::format(
"Failed to initialize code: {}",
106 convert_any_utf_to_utf8<utf>(generate_error_message<utf>(_error_code, _error_offset)));
110 throw _regex_exception(_error_code, _error_offset);
114 _code = std::shared_ptr<_code_type>(code, _pcre2_data_t::code_free);
117 _named_sub_values = std::make_shared<_named_sub_values_table>();
119 size_t name_count = 0;
120 _uchar_type* name_table =
nullptr;
121 size_t name_entry_size = 0;
123 _pcre2_data_t::get_info(_code.get(), PCRE2_INFO_NAMECOUNT, &name_count);
124 _pcre2_data_t::get_info(_code.get(), PCRE2_INFO_NAMETABLE, &name_table);
125 _pcre2_data_t::get_info(_code.get(), PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
127 for (
size_t i = 0; i != name_count; ++i) {
128 _uchar_type* entry = name_table + i * name_entry_size + 2;
129 const int index = _pcre2_data_t::substring_number_from_name(_code.get(), entry);
131 _uchar_type* entry_end = entry + 1;
132 while (*entry_end != 0 && entry_end - entry < name_entry_size - 3) { entry_end += 1; }
133 _named_sub_values->emplace(_string_type(entry, entry_end),
static_cast<size_t>(index) - 1);
137 _match_data_type* match_data = _pcre2_data_t::match_data_from_pattern(_code.get(),
nullptr);
138 _match_data = std::shared_ptr<_match_data_type>(match_data, _pcre2_data_t::match_data_free);
154 #pragma region CHECK_INITIALIZATION
159 #pragma endregion CHECK_INITIALIZATION
166 #if _PCRE2CPP_HAS_UTF8
170 #if _PCRE2CPP_HAS_UTF16
176 #if _PCRE2CPP_HAS_UTF32
183 return _string_type();
186 return pcre2cpp::generate_error_message<utf>(_error_code, _error_offset);
189 #pragma endregion ERROR
195 #if !_PCRE2CPP_HAS_EXCEPTIONS
199 throw _regex_exception(_get_regex_not_initialized_error());
203 const int match_code = _pcre2_data_t::match(_code.get(),
reinterpret_cast<_sptr_type
>(text.data()), text.size(),
204 offset, opts, _match_data.get(),
nullptr);
206 return match_code !=
static_cast<int>(match_error_codes::NoMatch) && match_code > 0;
213 #if !_PCRE2CPP_HAS_EXCEPTIONS
217 throw _regex_exception(_get_regex_not_initialized_error());
221 const int match_code = _pcre2_data_t::match(_code.get(),
reinterpret_cast<_sptr_type
>(text.data()), text.size(),
222 offset, opts, _match_data.get(),
nullptr);
224 if (match_code ==
static_cast<int>(match_error_codes::NoMatch) || match_code <= 0) {
225 result = _match_result_type(
static_cast<match_error_codes
>(match_code));
229 const size_t* offsetVector = _pcre2_data_t::get_ovector_ptr(_match_data.get());
230 const size_t matchStart = offsetVector[0];
231 const size_t matchEnd = offsetVector[1];
232 _match_value_type value = { .relative_offset = matchStart - offset,
233 .value = _string_type(text.substr(matchStart, matchEnd - matchStart)) };
235 const size_t offsetVectorsCount = _pcre2_data_t::get_ovector_count(_match_data.get());
236 std::vector<std::optional<sub_match_value> > sub_values;
237 sub_values.reserve(offsetVectorsCount);
238 for (
size_t i = 1; i != offsetVectorsCount; ++i) {
239 const size_t subMatchStart = offsetVector[i * 2];
240 const size_t subMatchEnd = offsetVector[i * 2 + 1];
242 if (subMatchStart == PCRE2_UNSET || subMatchEnd == PCRE2_UNSET) { sub_values.emplace_back(); }
244 sub_values.push_back(sub_match_value { .relative_offset = subMatchStart - matchStart,
245 .size = subMatchEnd - subMatchStart });
249 result = _match_result_type(offset, value, sub_values, _named_sub_values);
255 _match_result_type result;
256 return match_at(text, result, offset);
261 const size_t offset = 0) const noexcept {
262 if (!
match(text, result, offset)) {
return false; }
264 if (result.get_result_relative_offset() != 0) {
265 result = _match_result_type(offset, _named_sub_values);
274 size_t offset = 0) const noexcept {
275 size_t start_offset = offset;
276 _match_result_type result;
277 while (
match(text, result, offset)) {
278 results.emplace_back(start_offset,
279 _match_value_type { .relative_offset = offset - start_offset + result.get_result_relative_offset(),
280 .value = result.get_result_value() },
281 result.get_sub_results(), _named_sub_values);
282 offset += result.get_result_relative_offset() + result.get_result_size();
285 return results.size() != 0;
289 #if _PCRE2CPP_HAS_UTF8
292 #if _PCRE2CPP_HAS_UTF16
295 #if _PCRE2CPP_HAS_UTF32
299 #if _PCRE2CPP_HAS_UTF8
301 #elif _PCRE2CPP_HAS_UTF16
303 #elif _PCRE2CPP_HAS_UTF32
Basic PCRE2 Regex container.
Definition pcre2cpp.hpp:1665
_PCRE2CPP_CONSTEXPR17 bool match_all(const _string_view_type text, std::vector< _match_result_type > &results, size_t offset=0) const noexcept
returns true if any match was found and all results store in results array
Definition pcre2cpp.hpp:1898
_PCRE2CPP_CONSTEXPR20 basic_regex(const _string_view_type pattern, const compile_options opts=compile_options_bits::None) _PCRE2CPP_NOEXCEPT
basic regex container with pattern and compile options
Definition pcre2cpp.hpp:1722
_PCRE2CPP_CONSTEXPR17 bool match(const _string_view_type text, const size_t offset=0, const match_options opts=match_options_bits::None) const _PCRE2CPP_NOEXCEPT
returns true if match was found
Definition pcre2cpp.hpp:1817
_PCRE2CPP_CONSTEXPR17 bool is_initialized() const noexcept
returns true if regex was initialized
Definition pcre2cpp.hpp:1782
_PCRE2CPP_CONSTEXPR17 _string_type get_error_message() const noexcept
returns error message if there is any compilation error
Definition pcre2cpp.hpp:1789
_PCRE2CPP_CONSTEXPR20 ~basic_regex() noexcept=default
default destructor
_PCRE2CPP_CONSTEXPR17 bool match_at(const _string_view_type text, const size_t offset=0) const noexcept
returns true if match was found, and it has relative offset == 0
Definition pcre2cpp.hpp:1879
#define _PCRE2CPP_NOEXCEPT
Definition config.hpp:178
mstd::flags< match_options_bits > match_options
Match options flags group.
Definition pcre2cpp.hpp:998
mstd::flags< compile_options_bits > compile_options
Compile options flags group.
Definition pcre2cpp.hpp:1623
@ UTF_16
Definition pcre2cpp.hpp:357
@ UTF_32
Definition pcre2cpp.hpp:360
@ UTF_8
Definition pcre2cpp.hpp:354
@ None
No options set (default).
Definition pcre2cpp.hpp:1558
@ None
No options set (default).
Definition pcre2cpp.hpp:967
#define _PCRE2CPP_CONSTEXPR17
constexpr for c++17 and higher
Definition config.hpp:239
#define _PCRE2CPP_CONSTEXPR20
constexpr keyword for c++20 and higher
Definition config.hpp:257
#define _PCRE2CPP_ERROR(MESSAGE)
compiler error
Definition config.hpp:278
#define pcre2cpp_assert(expression,...)
pcre2cpp assert
Definition pcre2cpp.hpp:1957
Main namespace of pcre2cpp library.
basic_regex< utf_type::UTF_16 > u16regex
Definition pcre2cpp.hpp:1918
u8regex regex
Definition pcre2cpp.hpp:1925
basic_regex< utf_type::UTF_32 > u32regex
Definition pcre2cpp.hpp:1921
basic_regex< utf_type::UTF_8 > u8regex
Definition pcre2cpp.hpp:1915