PCRE2 C++ Wrapper 1.2.4
pcre2cpp
Loading...
Searching...
No Matches
regex.hpp
Go to the documentation of this file.
1/*
2 * pcre2cpp - PCRE2 cpp wrapper
3 *
4 * Licensed under the BSD 3-Clause License with Attribution Requirement.
5 * See the LICENSE file for details: https://github.com/MAIPA01/pcre2cpp/blob/main/LICENSE
6 *
7 * Copyright (c) 2025, Patryk Antosik (MAIPA01)
8 *
9 * PCRE2 library included in this project:
10 * Copyright (c) 2016-2024, University of Cambridge.
11 *
12 * See the LICENSE_PCRE2 file for details: https://github.com/MAIPA01/pcre2cpp/blob/main/LICENSE_PCRE2
13 */
14
15#pragma once
16#ifndef _PCRE2CPP_REGEX_HPP_
17 #define _PCRE2CPP_REGEX_HPP_
18
19 #include <pcre2cpp/config.hpp>
20
21 #if !_PCRE2CPP_HAS_CXX17
22_PCRE2CPP_ERROR("This is only available for c++17 and greater!");
23 #else
24
30 #include <pcre2cpp/types.hpp>
32
33namespace pcre2cpp {
39 template<utf_type utf>
40 class basic_regex {
41 private:
42 using _pcre2_data_t = utils::pcre2_data<utf>;
43
44 using _code_type = typename _pcre2_data_t::code_type;
45 using _code_ptr = std::shared_ptr<_code_type>;
46 using _match_data_type = typename _pcre2_data_t::match_data_type;
47 using _match_data_ptr = std::shared_ptr<_match_data_type>;
48 using _string_type = typename _pcre2_data_t::string_type;
49 using _string_view_type = typename _pcre2_data_t::string_view_type;
50 using _string_char_type = typename _pcre2_data_t::string_char_type;
51 using _match_value_type = basic_match_value<utf>;
52 using _match_result_type = basic_match_result<utf>;
53 using _sptr_type = typename _pcre2_data_t::sptr_type;
54 using _named_sub_values_table = std::unordered_map<_string_type, size_t>;
55 using _named_sub_values_table_ptr = std::shared_ptr<_named_sub_values_table>;
56 using _uchar_type = typename _pcre2_data_t::uchar_type;
57 #if _PCRE2CPP_HAS_EXCEPTIONS
58 using _regex_exception = basic_regex_exception<utf>;
59 #endif
60
62 _code_ptr _code = nullptr;
64 _match_data_ptr _match_data = nullptr;
66 _named_sub_values_table_ptr _named_sub_values = nullptr;
67
69 int _error_code = 0;
71 size_t _error_offset = 0;
72
73 static _PCRE2CPP_CONSTEXPR17 _string_type _get_regex_not_initialized_error() noexcept {
74 #if _PCRE2CPP_HAS_UTF8
75 if _PCRE2CPP_CONSTEXPR17 (utf == utf_type::UTF_8) { return "Regex was not initialized!!"; }
76 else
77 #endif
78 #if _PCRE2CPP_HAS_UTF16
80 return u"Regex was not initialized!!";
81 }
82 else
83 #endif
84 #if _PCRE2CPP_HAS_UTF32
86 return U"Regex was not initialized!!";
87 }
88 else
89 #endif
90 {
91 return _string_type();
92 }
93 }
94
95 public:
97 _PCRE2CPP_CONSTEXPR20 explicit basic_regex(const _string_view_type pattern,
99 // Compile Code
100 _code_type* code = _pcre2_data_t::compile(reinterpret_cast<_sptr_type>(pattern.data()), pattern.size(), opts,
101 &_error_code, &_error_offset, nullptr);
102
103 if (code == nullptr) {
104 #if !_PCRE2CPP_HAS_EXCEPTIONS
105 std::string message = fmt::format("Failed to initialize code: {}",
106 convert_any_utf_to_utf8<utf>(generate_error_message<utf>(_error_code, _error_offset)));
107 pcre2cpp_assert(false, "{}", message);
108 return;
109 #else
110 throw _regex_exception(_error_code, _error_offset);
111 #endif
112 }
113
114 _code = std::shared_ptr<_code_type>(code, _pcre2_data_t::code_free);
115
116 // Get Named Sub Values
117 _named_sub_values = std::make_shared<_named_sub_values_table>();
118
119 size_t name_count = 0;
120 _uchar_type* name_table = nullptr;
121 size_t name_entry_size = 0;
122
123 _pcre2_data_t::get_info(_code.get(), PCRE2_INFO_NAMECOUNT, &name_count);
124 _pcre2_data_t::get_info(_code.get(), PCRE2_INFO_NAMETABLE, &name_table);
125 _pcre2_data_t::get_info(_code.get(), PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
126
127 for (size_t i = 0; i != name_count; ++i) {
128 _uchar_type* entry = name_table + i * name_entry_size + 2;
129 const int index = _pcre2_data_t::substring_number_from_name(_code.get(), entry);
130
131 _uchar_type* entry_end = entry + 1;
132 while (*entry_end != 0 && entry_end - entry < name_entry_size - 3) { entry_end += 1; }
133 _named_sub_values->emplace(_string_type(entry, entry_end), static_cast<size_t>(index) - 1);
134 }
135
136 // Create Match Data
137 _match_data_type* match_data = _pcre2_data_t::match_data_from_pattern(_code.get(), nullptr);
138 _match_data = std::shared_ptr<_match_data_type>(match_data, _pcre2_data_t::match_data_free);
139 }
140
142 _PCRE2CPP_CONSTEXPR17 basic_regex(const basic_regex& other) noexcept = default;
144 _PCRE2CPP_CONSTEXPR17 basic_regex(basic_regex&& other) noexcept = default;
145
147 _PCRE2CPP_CONSTEXPR20 ~basic_regex() noexcept = default;
148
150 _PCRE2CPP_CONSTEXPR17 basic_regex& operator=(const basic_regex& other) noexcept = default;
152 _PCRE2CPP_CONSTEXPR17 basic_regex& operator=(basic_regex&& other) noexcept = default;
153
154 #pragma region CHECK_INITIALIZATION
155
157 _PCRE2CPP_CONSTEXPR17 bool is_initialized() const noexcept { return _code != nullptr; }
158
159 #pragma endregion CHECK_INITIALIZATION
160
161 #pragma region ERROR
162
164 _PCRE2CPP_CONSTEXPR17 _string_type get_error_message() const noexcept {
165 if (is_initialized()) {
166 #if _PCRE2CPP_HAS_UTF8
167 if _PCRE2CPP_CONSTEXPR17 (utf == utf_type::UTF_8) { return ""; }
168 else
169 #endif
170 #if _PCRE2CPP_HAS_UTF16
172 return L"";
173 }
174 else
175 #endif
176 #if _PCRE2CPP_HAS_UTF32
178 return U"";
179 }
180 else
181 #endif
182 {
183 return _string_type();
184 }
185 }
186 return pcre2cpp::generate_error_message<utf>(_error_code, _error_offset);
187 }
188
189 #pragma endregion ERROR
190
192 _PCRE2CPP_CONSTEXPR17 bool match(const _string_view_type text, const size_t offset = 0,
194 if (!is_initialized()) {
195 #if !_PCRE2CPP_HAS_EXCEPTIONS
196 pcre2cpp_assert(false, "Regex was not initialized!!");
197 return false;
198 #else
199 throw _regex_exception(_get_regex_not_initialized_error());
200 #endif
201 }
202
203 const int match_code = _pcre2_data_t::match(_code.get(), reinterpret_cast<_sptr_type>(text.data()), text.size(),
204 offset, opts, _match_data.get(), nullptr);
205
206 return match_code != static_cast<int>(match_error_codes::NoMatch) && match_code > 0;
207 }
208
210 _PCRE2CPP_CONSTEXPR20 bool match(const _string_view_type text, _match_result_type& result, const size_t offset = 0,
211 const match_options opts = match_options_bits::None) const noexcept {
212 if (!is_initialized()) {
213 #if !_PCRE2CPP_HAS_EXCEPTIONS
214 pcre2cpp_assert(false, "Regex was not initialized!!");
215 return false;
216 #else
217 throw _regex_exception(_get_regex_not_initialized_error());
218 #endif
219 }
220
221 const int match_code = _pcre2_data_t::match(_code.get(), reinterpret_cast<_sptr_type>(text.data()), text.size(),
222 offset, opts, _match_data.get(), nullptr);
223
224 if (match_code == static_cast<int>(match_error_codes::NoMatch) || match_code <= 0) {
225 result = _match_result_type(static_cast<match_error_codes>(match_code));
226 return false;
227 }
228
229 const size_t* offsetVector = _pcre2_data_t::get_ovector_ptr(_match_data.get());
230 const size_t matchStart = offsetVector[0];
231 const size_t matchEnd = offsetVector[1];
232 _match_value_type value = { .relative_offset = matchStart - offset,
233 .value = _string_type(text.substr(matchStart, matchEnd - matchStart)) };
234
235 const size_t offsetVectorsCount = _pcre2_data_t::get_ovector_count(_match_data.get());
236 std::vector<std::optional<sub_match_value> > sub_values;
237 sub_values.reserve(offsetVectorsCount);
238 for (size_t i = 1; i != offsetVectorsCount; ++i) {
239 const size_t subMatchStart = offsetVector[i * 2];
240 const size_t subMatchEnd = offsetVector[i * 2 + 1];
241
242 if (subMatchStart == PCRE2_UNSET || subMatchEnd == PCRE2_UNSET) { sub_values.emplace_back(); }
243 else {
244 sub_values.push_back(sub_match_value { .relative_offset = subMatchStart - matchStart,
245 .size = subMatchEnd - subMatchStart });
246 }
247 }
248
249 result = _match_result_type(offset, value, sub_values, _named_sub_values);
250 return true;
251 }
252
254 _PCRE2CPP_CONSTEXPR17 bool match_at(const _string_view_type text, const size_t offset = 0) const noexcept {
255 _match_result_type result;
256 return match_at(text, result, offset);
257 }
258
260 _PCRE2CPP_CONSTEXPR17 bool match_at(const _string_view_type text, _match_result_type& result,
261 const size_t offset = 0) const noexcept {
262 if (!match(text, result, offset)) { return false; }
263
264 if (result.get_result_relative_offset() != 0) {
265 result = _match_result_type(offset, _named_sub_values);
266 return false;
267 }
268
269 return true;
270 }
271
273 _PCRE2CPP_CONSTEXPR17 bool match_all(const _string_view_type text, std::vector<_match_result_type>& results,
274 size_t offset = 0) const noexcept {
275 size_t start_offset = offset;
276 _match_result_type result;
277 while (match(text, result, offset)) {
278 results.emplace_back(start_offset,
279 _match_value_type { .relative_offset = offset - start_offset + result.get_result_relative_offset(),
280 .value = result.get_result_value() },
281 result.get_sub_results(), _named_sub_values);
282 offset += result.get_result_relative_offset() + result.get_result_size();
283 }
284
285 return results.size() != 0;
286 }
287 };
288
289 #if _PCRE2CPP_HAS_UTF8
291 #endif
292 #if _PCRE2CPP_HAS_UTF16
294 #endif
295 #if _PCRE2CPP_HAS_UTF32
297 #endif
298
299 #if _PCRE2CPP_HAS_UTF8
300 using regex = u8regex;
301 #elif _PCRE2CPP_HAS_UTF16
302 using regex = u16regex;
303 #elif _PCRE2CPP_HAS_UTF32
304 using regex = u32regex;
305 #endif
306} // namespace pcre2cpp
307 #endif
308#endif
Basic PCRE2 Regex container.
Definition pcre2cpp.hpp:1665
_PCRE2CPP_CONSTEXPR17 bool match_all(const _string_view_type text, std::vector< _match_result_type > &results, size_t offset=0) const noexcept
returns true if any match was found and all results store in results array
Definition pcre2cpp.hpp:1898
_PCRE2CPP_CONSTEXPR20 basic_regex(const _string_view_type pattern, const compile_options opts=compile_options_bits::None) _PCRE2CPP_NOEXCEPT
basic regex container with pattern and compile options
Definition pcre2cpp.hpp:1722
_PCRE2CPP_CONSTEXPR17 bool match(const _string_view_type text, const size_t offset=0, const match_options opts=match_options_bits::None) const _PCRE2CPP_NOEXCEPT
returns true if match was found
Definition pcre2cpp.hpp:1817
_PCRE2CPP_CONSTEXPR17 bool is_initialized() const noexcept
returns true if regex was initialized
Definition pcre2cpp.hpp:1782
_PCRE2CPP_CONSTEXPR17 _string_type get_error_message() const noexcept
returns error message if there is any compilation error
Definition pcre2cpp.hpp:1789
_PCRE2CPP_CONSTEXPR20 ~basic_regex() noexcept=default
default destructor
_PCRE2CPP_CONSTEXPR17 bool match_at(const _string_view_type text, const size_t offset=0) const noexcept
returns true if match was found, and it has relative offset == 0
Definition pcre2cpp.hpp:1879
#define _PCRE2CPP_NOEXCEPT
Definition config.hpp:178
mstd::flags< match_options_bits > match_options
Match options flags group.
Definition pcre2cpp.hpp:998
mstd::flags< compile_options_bits > compile_options
Compile options flags group.
Definition pcre2cpp.hpp:1623
@ UTF_16
Definition pcre2cpp.hpp:357
@ UTF_32
Definition pcre2cpp.hpp:360
@ UTF_8
Definition pcre2cpp.hpp:354
@ None
No options set (default).
Definition pcre2cpp.hpp:1558
@ None
No options set (default).
Definition pcre2cpp.hpp:967
#define _PCRE2CPP_CONSTEXPR17
constexpr for c++17 and higher
Definition config.hpp:239
#define _PCRE2CPP_CONSTEXPR20
constexpr keyword for c++20 and higher
Definition config.hpp:257
#define _PCRE2CPP_ERROR(MESSAGE)
compiler error
Definition config.hpp:278
#define pcre2cpp_assert(expression,...)
pcre2cpp assert
Definition pcre2cpp.hpp:1957
Main namespace of pcre2cpp library.
basic_regex< utf_type::UTF_16 > u16regex
Definition pcre2cpp.hpp:1918
u8regex regex
Definition pcre2cpp.hpp:1925
basic_regex< utf_type::UTF_32 > u32regex
Definition pcre2cpp.hpp:1921
basic_regex< utf_type::UTF_8 > u8regex
Definition pcre2cpp.hpp:1915