Skip to content

Commit

Permalink
Experimentation with localized [character set] reading
Browse files Browse the repository at this point in the history
  • Loading branch information
eliaskosunen committed Nov 10, 2023
1 parent c04bdfc commit e56a44c
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 15 deletions.
36 changes: 36 additions & 0 deletions src/scn/impl/algorithms/read.h
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,42 @@ namespace scn {
return pred(cp) || ctype_facet.is(mask, ch);
});
}

template <typename Range>
simple_borrowed_iterator_t<Range>
read_until_localized_mask_or_inverted_mask_or_code_point(
Range&& range,
detail::locale_ref loc,
std::ctype_base::mask mask,
std::ctype_base::mask inverted_mask,
function_ref<bool(char32_t)> pred)
{
const auto& ctype_facet = get_facet<std::ctype<wchar_t>>(loc);

return read_until_code_point(SCN_FWD(range), [&](char32_t cp) {
auto ch = *encode_code_point_as_wide_character(cp, false);
return pred(cp) || ctype_facet.is(mask, ch) ||
!ctype_facet.is(inverted_mask, ch);
});
}

template <typename Range>
simple_borrowed_iterator_t<Range>
read_while_localized_mask_or_inverted_mask_or_code_point(
Range&& range,
detail::locale_ref loc,
std::ctype_base::mask mask,
std::ctype_base::mask inverted_mask,
function_ref<bool(char32_t)> pred)
{
const auto& ctype_facet = get_facet<std::ctype<wchar_t>>(loc);

return read_while_code_point(SCN_FWD(range), [&](char32_t cp) {
auto ch = *encode_code_point_as_wide_character(cp, false);
return pred(cp) || ctype_facet.is(mask, ch) ||
!ctype_facet.is(inverted_mask, ch);
});
}
#endif // !SCN_DISABLE_LOCALE

template <typename Range, typename Iterator>
Expand Down
60 changes: 45 additions & 15 deletions src/scn/impl/reader/string_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -469,11 +469,12 @@ namespace scn {
return {};
}

std::pair<std::ctype_base::mask, bool> map_localized_mask()
const
std::pair<std::pair<std::ctype_base::mask, bool>,
std::pair<std::ctype_base::mask, bool>>
map_localized_mask() const
{
std::ctype_base::mask mask{};
bool is_exhaustive = true;
std::ctype_base::mask mask{}, inverted_mask{};
bool is_exhaustive{true}, is_inverted_exhaustive{true};

if ((specs.charset_specifiers &
detail::character_set_specifier::space_literal) !=
Expand Down Expand Up @@ -532,9 +533,32 @@ namespace scn {
mask |= std::ctype_base::cntrl;
}

// TODO: inverted flags
if ((specs.charset_specifiers &
detail::character_set_specifier::inverted_letters) !=
detail::character_set_specifier::none) {
inverted_mask |= std::ctype_base::alpha;
}
if ((specs.charset_specifiers &
detail::character_set_specifier::
inverted_alnum_underscore) !=
detail::character_set_specifier::none) {
inverted_mask |= std::ctype_base::alnum;
is_inverted_exhaustive = false;
}
if ((specs.charset_specifiers &
detail::character_set_specifier::
inverted_whitespace) !=
detail::character_set_specifier::none) {
inverted_mask |= std::ctype_base::space;
}
if ((specs.charset_specifiers &
detail::character_set_specifier::inverted_numbers) !=
detail::character_set_specifier::none) {
inverted_mask |= std::ctype_base::digit;
}

return {mask, is_exhaustive};
return {{mask, is_exhaustive},
{inverted_mask, is_inverted_exhaustive}};
}
#endif // !SCN_DISABLE_LOCALE

Expand Down Expand Up @@ -648,16 +672,18 @@ namespace scn {

read_source_callback cb_wrapper{helper, loc};

const auto [mask, is_mask_exhaustive] =
helper.map_localized_mask();
const auto [m1, m2] = helper.map_localized_mask();
const auto [mask, is_mask_exhaustive] = m1;
const auto [inverted_mask, is_inverted_mask_exhaustive] = m2;
const bool has_any_ascii_literals =
ranges::any_of(helper.specs.charset_literals,
[](auto b) SCN_NOEXCEPT { return b != 0; });
const bool has_any_nonascii_literals =
!helper.nonascii.extra_ranges.empty();

if (is_mask_exhaustive && !has_any_ascii_literals &&
!has_any_nonascii_literals) {
if (is_mask_exhaustive && is_inverted_mask_exhaustive &&
inverted_mask == std::ctype_base::mask{} &&
!has_any_ascii_literals && !has_any_nonascii_literals) {
if (is_inverted) {
auto it = read_until_localized_mask(range, loc, mask);
return check_nonempty(it, range);
Expand All @@ -670,7 +696,9 @@ namespace scn {
return cb_wrapper.on_localized(cp);
};

if (is_mask_exhaustive && mask == std::ctype_base::mask{}) {
if (is_mask_exhaustive && is_inverted_mask_exhaustive &&
mask == std::ctype_base::mask{} &&
inverted_mask == std::ctype_base::mask{}) {
if (is_inverted) {
auto it = read_until_code_point(range, cb);
return check_nonempty(it, range);
Expand All @@ -680,12 +708,14 @@ namespace scn {
}

if (is_inverted) {
auto it = read_until_localized_mask_or_code_point(
range, loc, mask, cb);
auto it =
read_until_localized_mask_or_inverted_mask_or_code_point(
range, loc, mask, inverted_mask, cb);
return check_nonempty(it, range);
}
auto it = read_while_localized_mask_or_code_point(range, loc,
mask, cb);
auto it =
read_while_localized_mask_or_inverted_mask_or_code_point(
range, loc, mask, inverted_mask, cb);
return check_nonempty(it, range);
}
#endif // !SCN_DISABLE_LOCALE
Expand Down

0 comments on commit e56a44c

Please sign in to comment.