From e56a44c27e2b166098da76a1dbc24da4b8d3a6e1 Mon Sep 17 00:00:00 2001 From: Elias Kosunen Date: Fri, 10 Nov 2023 03:18:37 +0200 Subject: [PATCH] Experimentation with localized [character set] reading --- src/scn/impl/algorithms/read.h | 36 +++++++++++++++++ src/scn/impl/reader/string_reader.h | 60 +++++++++++++++++++++-------- 2 files changed, 81 insertions(+), 15 deletions(-) diff --git a/src/scn/impl/algorithms/read.h b/src/scn/impl/algorithms/read.h index 8fdd9f32..3f7a203f 100644 --- a/src/scn/impl/algorithms/read.h +++ b/src/scn/impl/algorithms/read.h @@ -582,6 +582,42 @@ namespace scn { return pred(cp) || ctype_facet.is(mask, ch); }); } + + template + simple_borrowed_iterator_t + read_until_localized_mask_or_inverted_mask_or_code_point( + Range&& range, + detail::locale_ref loc, + std::ctype_base::mask mask, + std::ctype_base::mask inverted_mask, + function_ref pred) + { + const auto& ctype_facet = get_facet>(loc); + + return read_until_code_point(SCN_FWD(range), [&](char32_t cp) { + auto ch = *encode_code_point_as_wide_character(cp, false); + return pred(cp) || ctype_facet.is(mask, ch) || + !ctype_facet.is(inverted_mask, ch); + }); + } + + template + simple_borrowed_iterator_t + read_while_localized_mask_or_inverted_mask_or_code_point( + Range&& range, + detail::locale_ref loc, + std::ctype_base::mask mask, + std::ctype_base::mask inverted_mask, + function_ref pred) + { + const auto& ctype_facet = get_facet>(loc); + + return read_while_code_point(SCN_FWD(range), [&](char32_t cp) { + auto ch = *encode_code_point_as_wide_character(cp, false); + return pred(cp) || ctype_facet.is(mask, ch) || + !ctype_facet.is(inverted_mask, ch); + }); + } #endif // !SCN_DISABLE_LOCALE template diff --git a/src/scn/impl/reader/string_reader.h b/src/scn/impl/reader/string_reader.h index 73674d51..2d68ec0c 100644 --- a/src/scn/impl/reader/string_reader.h +++ b/src/scn/impl/reader/string_reader.h @@ -469,11 +469,12 @@ namespace scn { return {}; } - std::pair map_localized_mask() - const + std::pair, + std::pair> + map_localized_mask() const { - std::ctype_base::mask mask{}; - bool is_exhaustive = true; + std::ctype_base::mask mask{}, inverted_mask{}; + bool is_exhaustive{true}, is_inverted_exhaustive{true}; if ((specs.charset_specifiers & detail::character_set_specifier::space_literal) != @@ -532,9 +533,32 @@ namespace scn { mask |= std::ctype_base::cntrl; } - // TODO: inverted flags + if ((specs.charset_specifiers & + detail::character_set_specifier::inverted_letters) != + detail::character_set_specifier::none) { + inverted_mask |= std::ctype_base::alpha; + } + if ((specs.charset_specifiers & + detail::character_set_specifier:: + inverted_alnum_underscore) != + detail::character_set_specifier::none) { + inverted_mask |= std::ctype_base::alnum; + is_inverted_exhaustive = false; + } + if ((specs.charset_specifiers & + detail::character_set_specifier:: + inverted_whitespace) != + detail::character_set_specifier::none) { + inverted_mask |= std::ctype_base::space; + } + if ((specs.charset_specifiers & + detail::character_set_specifier::inverted_numbers) != + detail::character_set_specifier::none) { + inverted_mask |= std::ctype_base::digit; + } - return {mask, is_exhaustive}; + return {{mask, is_exhaustive}, + {inverted_mask, is_inverted_exhaustive}}; } #endif // !SCN_DISABLE_LOCALE @@ -648,16 +672,18 @@ namespace scn { read_source_callback cb_wrapper{helper, loc}; - const auto [mask, is_mask_exhaustive] = - helper.map_localized_mask(); + const auto [m1, m2] = helper.map_localized_mask(); + const auto [mask, is_mask_exhaustive] = m1; + const auto [inverted_mask, is_inverted_mask_exhaustive] = m2; const bool has_any_ascii_literals = ranges::any_of(helper.specs.charset_literals, [](auto b) SCN_NOEXCEPT { return b != 0; }); const bool has_any_nonascii_literals = !helper.nonascii.extra_ranges.empty(); - if (is_mask_exhaustive && !has_any_ascii_literals && - !has_any_nonascii_literals) { + if (is_mask_exhaustive && is_inverted_mask_exhaustive && + inverted_mask == std::ctype_base::mask{} && + !has_any_ascii_literals && !has_any_nonascii_literals) { if (is_inverted) { auto it = read_until_localized_mask(range, loc, mask); return check_nonempty(it, range); @@ -670,7 +696,9 @@ namespace scn { return cb_wrapper.on_localized(cp); }; - if (is_mask_exhaustive && mask == std::ctype_base::mask{}) { + if (is_mask_exhaustive && is_inverted_mask_exhaustive && + mask == std::ctype_base::mask{} && + inverted_mask == std::ctype_base::mask{}) { if (is_inverted) { auto it = read_until_code_point(range, cb); return check_nonempty(it, range); @@ -680,12 +708,14 @@ namespace scn { } if (is_inverted) { - auto it = read_until_localized_mask_or_code_point( - range, loc, mask, cb); + auto it = + read_until_localized_mask_or_inverted_mask_or_code_point( + range, loc, mask, inverted_mask, cb); return check_nonempty(it, range); } - auto it = read_while_localized_mask_or_code_point(range, loc, - mask, cb); + auto it = + read_while_localized_mask_or_inverted_mask_or_code_point( + range, loc, mask, inverted_mask, cb); return check_nonempty(it, range); } #endif // !SCN_DISABLE_LOCALE