From 292b93d60166ef02fd7000f93272b33ef3025d7f Mon Sep 17 00:00:00 2001 From: Elias Kosunen Date: Fri, 29 Sep 2023 00:22:59 +0300 Subject: [PATCH] Remove invalid_encoding errors --- include/scn/detail/error.h | 4 +- include/scn/detail/format_string_parser.h | 8 +- include/scn/detail/unicode.h | 63 +++- include/scn/util/expected_impl.h | 1 + src/scn/impl/algorithms/read.h | 304 +++++++++--------- src/scn/impl/algorithms/read_simple.h | 3 +- src/scn/impl/algorithms/take_width_view.h | 8 +- .../impl/reader/code_unit_and_point_reader.h | 9 +- src/scn/impl/reader/common.h | 16 +- src/scn/impl/reader/float_reader.cpp | 2 +- src/scn/impl/reader/float_reader.h | 33 +- src/scn/impl/reader/string_reader.h | 136 ++++---- src/scn/impl/unicode/unicode.h | 116 +++---- src/scn/impl/unicode/unicode_whitespace.h | 11 +- src/scn/impl/util/text_width.h | 8 +- src/scn/vscan_impl.h | 8 +- .../impl_tests/read_algorithms_test.cpp | 120 +++---- .../impl_tests/string_reader_test.cpp | 8 +- tests/unittests/string_test.cpp | 10 +- tests/unittests/string_view_test.cpp | 13 +- 20 files changed, 426 insertions(+), 455 deletions(-) diff --git a/include/scn/detail/error.h b/include/scn/detail/error.h index 7fc9a5da..1b10bc10 100644 --- a/include/scn/detail/error.h +++ b/include/scn/detail/error.h @@ -46,8 +46,6 @@ namespace scn { /// Scanned value was out of range for the desired type. /// (e.g. `>2^32` for an `uint32_t`) value_out_of_range, - /// Source range has invalid (utf-8 or utf-16) encoding - invalid_encoding, /// The source range emitted an error that cannot be recovered /// from. The library can't use the source range in this state. /// Can only happen when using an istream as the input. @@ -95,7 +93,7 @@ namespace scn { return m_code; } /// Get error message - SCN_NODISCARD constexpr auto msg() const SCN_NOEXCEPT -> const char* + SCN_NODISCARD constexpr auto msg() const SCN_NOEXCEPT->const char* { return m_msg; } diff --git a/include/scn/detail/format_string_parser.h b/include/scn/detail/format_string_parser.h index efe30a0f..d3ae5fbf 100644 --- a/include/scn/detail/format_string_parser.h +++ b/include/scn/detail/format_string_parser.h @@ -649,8 +649,8 @@ namespace scn { if constexpr (sizeof(CharT) == 1) { // UTF-8 - auto cp = - decode_utf8_code_point(std::string_view{&*cp_begin, len}); + auto cp = decode_utf8_code_point_exhaustive( + std::string_view{&*cp_begin, len}); if (SCN_UNLIKELY(cp == invalid_code_point)) { handler.on_error( "Invalid Unicode code point in format string argument"); @@ -660,8 +660,8 @@ namespace scn { } else if constexpr (sizeof(CharT) == 2) { // UTF-16 - auto cp = - decode_utf16_code_point(std::wstring_view{&*cp_begin, len}); + auto cp = decode_utf16_code_point_exhaustive( + std::wstring_view{&*cp_begin, len}); if (SCN_UNLIKELY(cp == invalid_code_point)) { handler.on_error( "Invalid Unicode code point in format string argument"); diff --git a/include/scn/detail/unicode.h b/include/scn/detail/unicode.h index a8f13462..84941e0c 100644 --- a/include/scn/detail/unicode.h +++ b/include/scn/detail/unicode.h @@ -87,7 +87,8 @@ namespace scn { inline constexpr char32_t invalid_code_point = 0x110000; - inline constexpr char32_t decode_utf8_code_point(std::string_view input) + inline constexpr char32_t decode_utf8_code_point_exhaustive( + std::string_view input) { SCN_EXPECT(!input.empty() && input.size() <= 4); @@ -165,8 +166,66 @@ namespace scn { SCN_UNREACHABLE; } + inline constexpr char32_t decode_utf8_code_point_exhaustive_valid( + std::string_view input) + { + SCN_EXPECT(!input.empty() && input.size() <= 4); + + const auto is_trailing_code_unit = [](char ch) { + return static_cast(ch) >> 6 == 0x2; + }; + + if (input.size() == 1) { + SCN_EXPECT(static_cast(input[0]) < 0x80); + return static_cast(input[0]); + } + + if (input.size() == 2) { + SCN_EXPECT((static_cast(input[0]) & 0xe0) == + 0xc0); + SCN_EXPECT(is_trailing_code_unit(input[1])); + + char32_t cp{}; + cp |= (static_cast(input[0]) & 0x1f) << 6; + cp |= (static_cast(input[1]) & 0x3f) << 0; + return cp; + } + + if (input.size() == 3) { + SCN_EXPECT((static_cast(input[0]) & 0xf0) == + 0xe0); + SCN_EXPECT(is_trailing_code_unit(input[1])); + SCN_EXPECT(is_trailing_code_unit(input[2])); + + char32_t cp{}; + cp |= (static_cast(input[0]) & 0x0f) << 12; + cp |= (static_cast(input[1]) & 0x3f) << 6; + cp |= (static_cast(input[2]) & 0x3f) << 0; + return cp; + } + + if (input.size() == 4) { + SCN_EXPECT((static_cast(input[0]) & 0xf8) == + 0xf0); + SCN_EXPECT(static_cast(input[0]) <= 0xf4); + SCN_EXPECT(is_trailing_code_unit(input[1])); + SCN_EXPECT(is_trailing_code_unit(input[2])); + SCN_EXPECT(is_trailing_code_unit(input[3])); + + char32_t cp{}; + cp |= (static_cast(input[0]) & 0x07) << 18; + cp |= (static_cast(input[1]) & 0x3f) << 12; + cp |= (static_cast(input[2]) & 0x3f) << 6; + cp |= (static_cast(input[3]) & 0x3f) << 0; + return cp; + } + + SCN_EXPECT(false); + SCN_UNREACHABLE; + } + template - inline constexpr char32_t decode_utf16_code_point( + inline constexpr char32_t decode_utf16_code_point_exhaustive( std::basic_string_view input) { if constexpr (sizeof(CharT) == 2) { diff --git a/include/scn/util/expected_impl.h b/include/scn/util/expected_impl.h index 8afcc2c2..a3934ab8 100644 --- a/include/scn/util/expected_impl.h +++ b/include/scn/util/expected_impl.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/src/scn/impl/algorithms/read.h b/src/scn/impl/algorithms/read.h index b8b6d2b4..c3b1fce4 100644 --- a/src/scn/impl/algorithms/read.h +++ b/src/scn/impl/algorithms/read.h @@ -35,68 +35,60 @@ namespace scn { namespace impl { template - scan_expected iterator_value_result< simple_borrowed_iterator_t, - contiguous_range_factory>>> - read_code_point_into(Range&& range) + contiguous_range_factory>> { - using rettype = iterator_value_result< - simple_borrowed_iterator_t, - contiguous_range_factory>>; - - if (auto e = eof_check(range); SCN_UNLIKELY(!e)) { - return unexpected(e); - } + SCN_EXPECT(ranges::begin(range) != ranges::end(range)); auto it = ranges::begin(range); const auto len = code_point_length_by_starting_code_unit(*it); - if (SCN_UNLIKELY(!len)) { - return unexpected(len.error()); + + if (SCN_UNLIKELY(len == 0)) { + for (; it != ranges::end(range); ++it) { + if (code_point_length_by_starting_code_unit(*it) != 0) { + break; + } + } + + auto cp_view = make_contiguous_buffer( + ranges::subrange{ranges::begin(range), it}); + return {it, cp_view}; } - if (*len == 1) { + if (len == 1) { ++it; auto cp_view = make_contiguous_buffer( ranges::subrange{ranges::begin(range), it}); - return rettype{it, cp_view}; + return {it, cp_view}; } if constexpr (ranges::sized_range) { auto sz = ranges_polyfill::usize(range); - if (SCN_UNLIKELY(sz < *len)) { - return unexpected_scan_error(scan_error::invalid_encoding, - "Incomplete code point"); + if (SCN_UNLIKELY(sz < len)) { + ranges::advance(it, ranges::end(range)); + } + else { + ranges::advance( + it, + static_cast>(len)); } - ranges::advance( - it, static_cast>(*len)); } else { ++it; size_t i = 1; - for (; i < *len && it != ranges::end(range); ++i, (void)++it) {} - if (SCN_UNLIKELY(i != *len)) { - return unexpected_scan_error(scan_error::invalid_encoding, - "Incomplete code point"); - } + for (; i < len && it != ranges::end(range); ++i, (void)++it) {} } auto cp_view = make_contiguous_buffer( ranges::subrange{ranges::begin(range), it}); - if (SCN_UNLIKELY(!validate_unicode(cp_view.view()))) { - return unexpected_scan_error(scan_error::invalid_encoding, - "Invalid code point"); - } - - return rettype{it, cp_view}; + return {it, cp_view}; } template - scan_expected> read_code_point( - Range&& range) + simple_borrowed_iterator_t read_code_point(Range&& range) { - return read_code_point_into(SCN_FWD(range)) - .transform([](auto&& result) - SCN_NOEXCEPT { return result.iterator; }); + return read_code_point_into(SCN_FWD(range)).iterator; } template @@ -120,56 +112,51 @@ namespace scn { return unexpected(e); } - auto result = read_code_point(rng); - if (SCN_UNLIKELY(!result)) { - return unexpected(result.error()); - } - - it = *result; + it = read_code_point(rng); } return it; } template - scan_expected> - read_exactly_n_width_units(Range&& range, - ranges::range_difference_t count) + simple_borrowed_iterator_t read_exactly_n_width_units( + Range&& range, + ranges::range_difference_t count) { auto it = ranges::begin(range); ranges::range_difference_t acc_width = 0; while (it != ranges::end(range)) { - auto read_result = read_code_point_into( + auto [iter, val] = read_code_point_into( ranges::subrange{it, ranges::end(range)}); - if (SCN_UNLIKELY(!read_result)) { - return unexpected(read_result.error()); + + if (SCN_UNLIKELY(!validate_unicode(val.view()))) { + ++acc_width; + } + else { + acc_width += calculate_valid_text_width(val.view()); } - acc_width += - calculate_valid_text_width(read_result->value.view()); if (acc_width > count) { break; } - it = read_result->iterator; + it = iter; } return it; } template - scan_expected> read_until_code_unit( - Range&& range, - Predicate pred) + simple_borrowed_iterator_t read_until_code_unit(Range&& range, + Predicate pred) { return ranges::find_if(range, pred); } template - scan_expected> read_while_code_unit( - Range&& range, - Predicate pred) + simple_borrowed_iterator_t read_while_code_unit(Range&& range, + Predicate pred) { return ranges::find_if_not(range, pred); } @@ -179,16 +166,13 @@ namespace scn { Range&& range, Predicate pred) { - return read_until_code_unit(range, pred) - .and_then([&](auto it) -> scan_expected< - simple_borrowed_iterator_t> { - if (it == ranges::begin(range)) { - return unexpected_scan_error( - scan_error::invalid_scanned_value, - "read_until1_code_unit: No matching code units"); - } - return it; - }); + auto it = read_until_code_unit(range, pred); + if (it == ranges::begin(range)) { + return unexpected_scan_error( + scan_error::invalid_scanned_value, + "read_until1_code_unit: No matching code units"); + } + return it; } template @@ -196,20 +180,17 @@ namespace scn { Range&& range, Predicate pred) { - return read_while_code_unit(range, pred) - .and_then([&](auto it) -> scan_expected< - simple_borrowed_iterator_t> { - if (it == ranges::begin(range)) { - return unexpected_scan_error( - scan_error::invalid_scanned_value, - "read_while1_code_unit: No matching code units"); - } - return it; - }); + auto it = read_while_code_unit(range, pred); + if (it == ranges::begin(range)) { + return unexpected_scan_error( + scan_error::invalid_scanned_value, + "read_while1_code_unit: No matching code units"); + } + return it; } template - scan_expected> read_until_code_units( + simple_borrowed_iterator_t read_until_code_units( Range&& range, CodeUnits&& needle) { @@ -217,43 +198,45 @@ namespace scn { } template - scan_expected> read_until_code_point( - Range&& range, - Predicate pred) + simple_borrowed_iterator_t read_until_code_point(Range&& range, + Predicate pred) { auto it = ranges::begin(range); while (it != ranges::end(range)) { - const auto result = read_code_point_into( + const auto [iter, value] = read_code_point_into( ranges::subrange{it, ranges::end(range)}); - if (SCN_UNLIKELY(!result)) { - return unexpected(result.error()); - } - const auto cp = - decode_code_point_exhaustive_valid(result->value.view()); - if (pred(cp)) { - break; + if (SCN_UNLIKELY(!validate_unicode(value.view()))) { + if (pred(detail::invalid_code_point)) { + break; + } + } + else { + const auto cp = + decode_code_point_exhaustive_valid(value.view()); + if (pred(cp)) { + break; + } } - it = result->iterator; + it = iter; } return it; } template - scan_expected> read_while_code_point( - Range&& range, - Predicate pred) + simple_borrowed_iterator_t read_while_code_point(Range&& range, + Predicate pred) { return read_until_code_point( SCN_FWD(range), [&](char32_t cp) { return !pred(cp); }); } template - scan_expected> - read_until_classic_space(Range&& range) + simple_borrowed_iterator_t read_until_classic_space( + Range&& range) { if constexpr (ranges::contiguous_range && ranges::sized_range && @@ -271,8 +254,8 @@ namespace scn { } template - scan_expected> - read_while_classic_space(Range&& range) + simple_borrowed_iterator_t read_while_classic_space( + Range&& range) { if constexpr (ranges::contiguous_range && ranges::sized_range && @@ -291,38 +274,42 @@ namespace scn { scan_expected> read_matching_code_unit(Range&& range, detail::char_t ch) { - return read_code_unit(range).and_then( - [&](auto it) - -> scan_expected> { - if (SCN_UNLIKELY(*ranges::begin(range) != - static_cast>(ch))) { - return unexpected_scan_error( - scan_error::invalid_scanned_value, - "read_matching_code_unit: No match"); - } + auto it = read_code_unit(range); - return it; - }); + if (SCN_UNLIKELY(*ranges::begin(range) != + static_cast>(ch))) { + return unexpected_scan_error( + scan_error::invalid_scanned_value, + "read_matching_code_unit: No match"); + } + + return it; } template scan_expected> read_matching_code_point(Range&& range, char32_t cp) { - return read_code_point_into(range).and_then( - [&](auto result) - -> scan_expected> { - auto decoded_cp = - decode_code_point_exhaustive_valid(result.value.view()); - - if (SCN_UNLIKELY(decoded_cp != cp)) { - return unexpected_scan_error( - scan_error::invalid_scanned_value, - "read_matching_code_point: No match"); - } + auto [it, value] = read_code_point_into(range); - return result.iterator; - }); + if (SCN_UNLIKELY(!validate_unicode(value.view()))) { + if (SCN_UNLIKELY(cp != detail::invalid_code_point)) { + return unexpected_scan_error( + scan_error::invalid_scanned_value, + "read_matching_code_point: No match"); + } + } + else { + auto decoded_cp = + decode_code_point_exhaustive_valid(value.view()); + if (SCN_UNLIKELY(decoded_cp != cp)) { + return unexpected_scan_error( + scan_error::invalid_scanned_value, + "read_matching_code_point: No match"); + } + } + + return it; } template @@ -439,28 +426,28 @@ namespace scn { Range&& range, std::string_view str) { - return read_code_unit(range).and_then( - [&](auto it) - -> scan_expected> { - for (auto ch : str) { - if (*ranges::begin(range) == - static_cast>(ch)) { - return it; - } - } + auto it = read_code_unit(range); + if (SCN_UNLIKELY(!it)) { + return unexpected(it.error()); + } - return unexpected_scan_error( - scan_error::invalid_scanned_value, - "read_one_of_code_unit: No match"); - }); + for (auto ch : str) { + if (*ranges::begin(range) == + static_cast>(ch)) { + return *it; + } + } + + return unexpected_scan_error(scan_error::invalid_scanned_value, + "read_one_of_code_unit: No match"); } template - scan_expected> - read_localized_mask_impl(Range&& range, - detail::locale_ref loc, - std::ctype_base::mask mask, - bool read_until) + simple_borrowed_iterator_t read_localized_mask_impl( + Range&& range, + detail::locale_ref loc, + std::ctype_base::mask mask, + bool read_until) { const auto& ctype_facet = get_facet>(loc); @@ -492,51 +479,51 @@ namespace scn { } } else { - return read_until_code_point( - SCN_FWD(range), [&](char32_t cp) { - auto ch = - *encode_code_point_as_wide_character(cp, false); - return ctype_facet.is(mask, ch) == read_until; - }); + return read_until_code_point(SCN_FWD(range), [&](char32_t cp) { + auto ch = *encode_code_point_as_wide_character(cp, false); + return ctype_facet.is(mask, ch) == read_until; + }); } } template - scan_expected> - read_until_localized_mask(Range&& range, - detail::locale_ref loc, - std::ctype_base::mask mask) + simple_borrowed_iterator_t read_until_localized_mask( + Range&& range, + detail::locale_ref loc, + std::ctype_base::mask mask) { return read_localized_mask_impl(SCN_FWD(range), loc, mask, true); } template - scan_expected> - read_while_localized_mask(Range&& range, - detail::locale_ref loc, - std::ctype_base::mask mask) + simple_borrowed_iterator_t read_while_localized_mask( + Range&& range, + detail::locale_ref loc, + std::ctype_base::mask mask) { return read_localized_mask_impl(SCN_FWD(range), loc, mask, false); } template - scan_expected> - read_until_localized_space(Range&& range, detail::locale_ref loc) + simple_borrowed_iterator_t read_until_localized_space( + Range&& range, + detail::locale_ref loc) { return read_until_localized_mask(SCN_FWD(range), loc, std::ctype_base::space); } template - scan_expected> - read_while_localized_space(Range&& range, detail::locale_ref loc) + simple_borrowed_iterator_t read_while_localized_space( + Range&& range, + detail::locale_ref loc) { return read_while_localized_mask(SCN_FWD(range), loc, std::ctype_base::space); } template - scan_expected> + simple_borrowed_iterator_t read_until_localized_mask_or_code_point(Range&& range, detail::locale_ref loc, std::ctype_base::mask mask, @@ -551,7 +538,7 @@ namespace scn { } template - scan_expected> + simple_borrowed_iterator_t read_while_localized_mask_or_code_point(Range&& range, detail::locale_ref loc, std::ctype_base::mask mask, @@ -564,6 +551,7 @@ namespace scn { return pred(cp) || ctype_facet.is(mask, ch); }); } + template simple_borrowed_iterator_t apply_opt( scan_expected&& result, diff --git a/src/scn/impl/algorithms/read_simple.h b/src/scn/impl/algorithms/read_simple.h index eee6c010..b03d6d5e 100644 --- a/src/scn/impl/algorithms/read_simple.h +++ b/src/scn/impl/algorithms/read_simple.h @@ -25,8 +25,7 @@ namespace scn { namespace impl { template - scan_expected> read_all( - Range&& range) + simple_borrowed_iterator_t read_all(Range&& range) { return ranges::next(ranges::begin(range), ranges::end(range)); } diff --git a/src/scn/impl/algorithms/take_width_view.h b/src/scn/impl/algorithms/take_width_view.h index dc10cc75..984bd3cb 100644 --- a/src/scn/impl/algorithms/take_width_view.h +++ b/src/scn/impl/algorithms/take_width_view.h @@ -340,12 +340,8 @@ namespace scn { private: difference_type _get_cp_length_at_current() const { - const auto r = - code_point_length_by_starting_code_unit(*m_current); - if (!r) { - return 0; - } - return static_cast(*r); + return static_cast( + code_point_length_by_starting_code_unit(*m_current)); } difference_type _get_width_at_current_cp_start( diff --git a/src/scn/impl/reader/code_unit_and_point_reader.h b/src/scn/impl/reader/code_unit_and_point_reader.h index ff57bcfd..fd104316 100644 --- a/src/scn/impl/reader/code_unit_and_point_reader.h +++ b/src/scn/impl/reader/code_unit_and_point_reader.h @@ -53,12 +53,9 @@ namespace scn { SourceRange&& range, char32_t& cp) { - return read_code_point_into(SCN_FWD(range)) - .transform([&](auto result) { - cp = decode_code_point_exhaustive_valid( - result.value.view()); - return result.iterator; - }); + auto result = read_code_point_into(SCN_FWD(range)); + cp = decode_code_point_exhaustive_valid(result.value.view()); + return result.iterator; } }; diff --git a/src/scn/impl/reader/common.h b/src/scn/impl/reader/common.h index ba83b016..d81fd0ea 100644 --- a/src/scn/impl/reader/common.h +++ b/src/scn/impl/reader/common.h @@ -44,16 +44,14 @@ namespace scn { bool allow_exhaustion = false) { if (!allow_exhaustion) { - return read_while_classic_space(range).and_then( - [&](auto it) -> scan_expected { - if (auto e = eof_check( - ranges::subrange{it, ranges::end(range)}); - SCN_UNLIKELY(!e)) { - return unexpected(e); - } + auto it = read_while_classic_space(range); + if (auto e = + eof_check(ranges::subrange{it, ranges::end(range)}); + SCN_UNLIKELY(!e)) { + return unexpected(e); + } - return it; - }); + return it; } return read_while_classic_space(SCN_FWD(range)); diff --git a/src/scn/impl/reader/float_reader.cpp b/src/scn/impl/reader/float_reader.cpp index d2d3e20d..b5ebdbad 100644 --- a/src/scn/impl/reader/float_reader.cpp +++ b/src/scn/impl/reader/float_reader.cpp @@ -174,7 +174,7 @@ namespace scn { read_until_classic_space(input.view()); SCN_EXPECT(first_space); input.assign(std::basic_string{ - input.view().begin(), *first_space}); + input.view().begin(), first_space}); } if (this->m_kind == diff --git a/src/scn/impl/reader/float_reader.h b/src/scn/impl/reader/float_reader.h index 87bab8c1..200a7ede 100644 --- a/src/scn/impl/reader/float_reader.h +++ b/src/scn/impl/reader/float_reader.h @@ -142,15 +142,12 @@ namespace scn { -> scan_expected< simple_borrowed_iterator_t> { auto res = read_all(rr); - if (SCN_UNLIKELY(!res)) { - return unexpected(res.error()); - } - if (SCN_UNLIKELY(*res == ranges::begin(r))) { + if (SCN_UNLIKELY(res == ranges::begin(r))) { return unexpected_scan_error( scan_error::invalid_scanned_value, "Invalid float value"); } - return *res; + return res; }; return do_read_source_impl(r, cb, cb); } @@ -291,18 +288,16 @@ namespace scn { } auto payload_beg_it = it; - if (auto r = read_while_code_unit( - ranges::subrange{it, ranges::end(range)}, - [](char_type ch) SCN_NOEXCEPT { - return is_ascii_char(ch) && - ((ch >= '0' && ch <= '9') || - (ch >= 'a' && ch <= 'z') || - (ch >= 'A' && ch <= 'Z') || ch == '_'); - })) { - it = *r; - m_nan_payload_buffer.assign( - ranges::subrange{payload_beg_it, it}); - } + it = read_while_code_unit( + ranges::subrange{it, ranges::end(range)}, + [](char_type ch) SCN_NOEXCEPT { + return is_ascii_char(ch) && + ((ch >= '0' && ch <= '9') || + (ch >= 'a' && ch <= 'z') || + (ch >= 'A' && ch <= 'Z') || ch == '_'); + }); + m_nan_payload_buffer.assign( + ranges::subrange{payload_beg_it, it}); m_kind = float_kind::nan_with_payload; if (auto r = read_matching_code_unit( @@ -393,8 +388,8 @@ namespace scn { } template - scan_expected> - read_regular_float(Range&& range) + scan_expected> read_regular_float( + Range&& range) { const bool allowed_exp = (m_options & allow_scientific) != 0; const bool required_exp = diff --git a/src/scn/impl/reader/string_reader.h b/src/scn/impl/reader/string_reader.h index 9c3aaa46..cb44b3cb 100644 --- a/src/scn/impl/reader/string_reader.h +++ b/src/scn/impl/reader/string_reader.h @@ -35,11 +35,7 @@ namespace scn { std::basic_string& dst) { dst.clear(); - if (!transcode_to_string(src, dst)) { - SCN_UNLIKELY_ATTR - return scan_error(scan_error::invalid_encoding, - "Failed to transcode string value"); - } + transcode_to_string(src, dst); return {}; } @@ -49,11 +45,6 @@ namespace scn { std::basic_string& dest) { if constexpr (std::is_same_v) { - if (SCN_UNLIKELY(!validate_unicode(source.view()))) { - return {scan_error::invalid_encoding, - "Failed to validate string value"}; - } - dest.assign(source.view()); } else { @@ -69,11 +60,6 @@ namespace scn { std::basic_string& dest) { if constexpr (std::is_same_v) { - if (SCN_UNLIKELY(!validate_unicode(source.view()))) { - return {scan_error::invalid_encoding, - "Failed to validate string value"}; - } - if (source.stores_allocated_string()) { dest.assign(SCN_MOVE(source.get_allocated_string())); } @@ -94,11 +80,6 @@ namespace scn { std::basic_string& dest) { if constexpr (std::is_same_v) { - if (SCN_UNLIKELY(!validate_unicode(source.view()))) { - return {scan_error::invalid_encoding, - "Failed to validate string value"}; - } - dest.assign(source.view()); } else { @@ -110,43 +91,41 @@ namespace scn { template auto read_string_impl(Range& range, - scan_expected&& result, + Iterator&& result, std::basic_string& value) -> scan_expected> { - if (SCN_UNLIKELY(!result)) { - return unexpected(result.error()); - } + static_assert( + ranges_std::forward_iterator>); auto src = make_contiguous_buffer( - ranges::subrange{ranges::begin(range), *result}); + ranges::subrange{ranges::begin(range), result}); if (auto e = transcode_if_necessary(SCN_MOVE(src), value); SCN_UNLIKELY(!e)) { return unexpected(e); } - return *result; + return result; } template auto read_string_view_impl(Range& range, - scan_expected&& result, + Iterator&& result, std::basic_string_view& value) -> scan_expected> { - if (SCN_UNLIKELY(!result)) { - return unexpected(result.error()); - } + static_assert( + ranges_std::forward_iterator>); auto src = [&]() { if constexpr (detail::is_specialization_of_v) { return make_contiguous_buffer(ranges::subrange{ - ranges::begin(range).base(), result->base()}); + ranges::begin(range).base(), result.base()}); } else { return make_contiguous_buffer( - ranges::subrange{ranges::begin(range), *result}); + ranges::subrange{ranges::begin(range), result}); } }(); using src_type = decltype(src); @@ -165,17 +144,11 @@ namespace scn { "transcoding)"); } else { - if (SCN_UNLIKELY(!validate_unicode(src.view()))) { - return unexpected_scan_error( - scan_error::invalid_encoding, - "Failed to validate string_view value"); - } - const auto view = src.view(); value = std::basic_string_view( ranges::data(view), ranges_polyfill::usize(view)); - return *result; + return result; } } @@ -335,8 +308,12 @@ namespace scn { const detail::basic_format_specs& specs, std::basic_string& value) { - return read_string_impl( - range, read_source_classic_impl(range, {specs}), value); + auto it = read_source_classic_impl(range, {specs}); + if (SCN_UNLIKELY(!it)) { + return unexpected(it.error()); + } + + return read_string_impl(range, *it, value); } template @@ -346,9 +323,12 @@ namespace scn { const detail::basic_format_specs& specs, std::basic_string& value) { - return read_string_impl( - range, read_source_localized_impl(range, {specs}, loc), - value); + auto it = read_source_localized_impl(range, {specs}, loc); + if (SCN_UNLIKELY(!it)) { + return unexpected(it.error()); + } + + return read_string_impl(range, *it, value); } template @@ -357,8 +337,12 @@ namespace scn { const detail::basic_format_specs& specs, std::basic_string_view& value) { - return read_string_view_impl( - range, read_source_classic_impl(range, {specs}), value); + auto it = read_source_classic_impl(range, {specs}); + if (SCN_UNLIKELY(!it)) { + return unexpected(it.error()); + } + + return read_string_view_impl(range, *it, value); } template @@ -368,9 +352,12 @@ namespace scn { const detail::basic_format_specs& specs, std::basic_string_view& value) { - return read_string_view_impl( - range, read_source_localized_impl(range, {specs}, loc), - value); + auto it = read_source_localized_impl(range, {specs}, loc); + if (SCN_UNLIKELY(!it)) { + return unexpected(it.error()); + } + + return read_string_view_impl(range, *it, value); } private: @@ -599,11 +586,11 @@ namespace scn { }; if (is_inverted) { - return read_until_code_point(range, cb).and_then( - [&](auto it) { return check_nonempty(it, range); }); + auto it = read_until_code_point(range, cb); + return check_nonempty(it, range); } - return read_while_code_point(range, cb).and_then( - [&](auto it) { return check_nonempty(it, range); }); + auto it = read_while_code_point(range, cb); + return check_nonempty(it, range); } const auto cb = [&](SourceCharT ch) { @@ -611,11 +598,11 @@ namespace scn { }; if (is_inverted) { - return read_until_code_unit(range, cb).and_then( - [&](auto it) { return check_nonempty(it, range); }); + auto it = read_until_code_unit(range, cb); + return check_nonempty(it, range); } - return read_while_code_unit(range, cb).and_then( - [&](auto it) { return check_nonempty(it, range); }); + auto it = read_while_code_unit(range, cb); + return check_nonempty(it, range); } template @@ -650,14 +637,11 @@ namespace scn { if (is_mask_exhaustive && !has_any_ascii_literals && !has_any_nonascii_literals) { if (is_inverted) { - return read_until_localized_mask(range, loc, mask) - .and_then([&](auto it) { - return check_nonempty(it, range); - }); + auto it = read_until_localized_mask(range, loc, mask); + return check_nonempty(it, range); } - return read_while_localized_mask(range, loc, mask) - .and_then( - [&](auto it) { return check_nonempty(it, range); }); + auto it = read_while_localized_mask(range, loc, mask); + return check_nonempty(it, range); } const auto cb = [&](char32_t cp) { @@ -666,23 +650,21 @@ namespace scn { if (is_mask_exhaustive && mask == std::ctype_base::mask{}) { if (is_inverted) { - return read_until_code_point(range, cb).and_then( - [&](auto it) { return check_nonempty(it, range); }); + auto it = read_until_code_point(range, cb); + return check_nonempty(it, range); } - return read_while_code_point(range, cb).and_then( - [&](auto it) { return check_nonempty(it, range); }); + auto it = read_while_code_point(range, cb); + return check_nonempty(it, range); } if (is_inverted) { - return read_until_localized_mask_or_code_point(range, loc, - mask, cb) - .and_then( - [&](auto it) { return check_nonempty(it, range); }); - } - return read_while_localized_mask_or_code_point(range, loc, mask, - cb) - .and_then( - [&](auto it) { return check_nonempty(it, range); }); + auto it = read_until_localized_mask_or_code_point( + range, loc, mask, cb); + return check_nonempty(it, range); + } + auto it = read_while_localized_mask_or_code_point(range, loc, + mask, cb); + return check_nonempty(it, range); } template diff --git a/src/scn/impl/unicode/unicode.h b/src/scn/impl/unicode/unicode.h index 553fcf96..60bd41b4 100644 --- a/src/scn/impl/unicode/unicode.h +++ b/src/scn/impl/unicode/unicode.h @@ -105,15 +105,9 @@ namespace scn { } template - scan_expected code_point_length_by_starting_code_unit( - CharT ch) + std::size_t code_point_length_by_starting_code_unit(CharT ch) { - auto len = detail::utf_code_point_length_by_starting_code_unit(ch); - if (SCN_UNLIKELY(len == 0)) { - return unexpected_scan_error(scan_error::invalid_encoding, - "Invalid Unicode code point"); - } - return len; + return detail::utf_code_point_length_by_starting_code_unit(ch); } template @@ -125,8 +119,7 @@ namespace scn { { const auto len = code_point_length_by_starting_code_unit(input[0]); - SCN_EXPECT(len); - SCN_EXPECT(*len == input.size()); + SCN_EXPECT(len == input.size()); } SCN_EXPECT(validate_unicode(input)); @@ -183,11 +176,8 @@ namespace scn { SCN_EXPECT(!input.empty()); SCN_EXPECT(validate_unicode(input)); - const auto len_wrapped = - code_point_length_by_starting_code_unit(input[0]); - SCN_EXPECT(len_wrapped); - const auto len = *len_wrapped; - SCN_ASSUME(len != 0); + const auto len = code_point_length_by_starting_code_unit(input[0]); + SCN_EXPECT(len != 0); constexpr auto enc = get_encoding(); char32_t output{}; @@ -211,21 +201,32 @@ namespace scn { input.begin() + len, static_cast(output)}; } + template + auto get_start_of_next_code_point(std::basic_string_view input) + -> ranges::iterator_t> + { + auto it = input.begin(); + for (; it != input.end(); ++it) { + if (code_point_length_by_starting_code_unit(*it) != 0) { + break; + } + } + + return it; + } + template auto get_next_code_point(std::basic_string_view input) - -> scan_expected iterator_value_result< ranges::iterator_t>, - char32_t>> + char32_t> { SCN_EXPECT(!input.empty()); const auto len = code_point_length_by_starting_code_unit(input[0]); - if (SCN_UNLIKELY(!len)) { - return unexpected(len.error()); - } - if (SCN_UNLIKELY(*len == 0)) { - return unexpected_scan_error(scan_error::invalid_encoding, - "Invalid encoding"); + if (SCN_UNLIKELY(len == 0)) { + return {get_start_of_next_code_point(input), + detail::invalid_code_point}; } constexpr auto enc = get_encoding(); @@ -233,25 +234,24 @@ namespace scn { char32_t output{}; if constexpr (enc == encoding::utf8) { result = simdutf::convert_utf8_to_utf32( - reinterpret_cast(input.data()), *len, &output); + reinterpret_cast(input.data()), len, &output); } else if constexpr (enc == encoding::utf16) { result = simdutf::convert_utf16_to_utf32( - reinterpret_cast(input.data()), *len, + reinterpret_cast(input.data()), len, &output); } else if constexpr (enc == encoding::utf32) { + SCN_EXPECT(len == 1); output = static_cast(input[0]); } if (SCN_UNLIKELY(result != 1)) { - return unexpected_scan_error(scan_error::invalid_encoding, - "Invalid encoding"); + return {get_start_of_next_code_point(input.substr(1)), + detail::invalid_code_point}; } - return iterator_value_result< - ranges::iterator_t>, char32_t>{ - input.begin() + *len, static_cast(output)}; + return {input.begin() + len, output}; } template @@ -278,18 +278,6 @@ namespace scn { } } - template - scan_expected validate_and_count_code_points( - std::basic_string_view input) - { - if (SCN_UNLIKELY(!validate_unicode(input))) { - return unexpected_scan_error(scan_error::invalid_encoding, - "Invalid encoding"); - } - - return count_valid_code_ponts(input); - } - template std::size_t count_valid_transcoded_code_units( std::basic_string_view input) @@ -343,18 +331,6 @@ namespace scn { } } - template - scan_expected validate_and_count_transcoded_code_units( - std::basic_string_view input) - { - if (SCN_UNLIKELY(!validate_unicode(input))) { - return unexpected_scan_error(scan_error::invalid_encoding, - "Invalid encoding"); - } - - return count_valid_transcoded_code_units(input); - } - template span::iterator get_valid_code_points( std::basic_string_view input, @@ -466,17 +442,43 @@ namespace scn { } template - bool transcode_to_string(std::basic_string_view source, + void transcode_invalid_to_string( + std::basic_string_view source, + std::basic_string& dest) + { + auto it = source.begin(); + while (it != source.end()) { + auto [iter, cp] = get_next_code_point( + detail::make_string_view_from_iterators( + it, source.end())); + + if (SCN_UNLIKELY(cp == detail::invalid_code_point)) { + cp = 0xfffd; // Replacement character + } + + auto cp_input = std::basic_string_view{&cp, 1}; + SCN_EXPECT(validate_unicode(cp_input)); + + std::array temp{0}; + auto ret = transcode_valid( + cp_input, span{temp.data(), temp.size()}); + SCN_EXPECT(ret == 1); + + dest.append(temp.data()); + } + } + + template + void transcode_to_string(std::basic_string_view source, std::basic_string& dest) { static_assert(!std::is_same_v); if (SCN_UNLIKELY(!validate_unicode(source))) { - return false; + return transcode_invalid_to_string(source, dest); } transcode_valid_to_string(source, dest); - return true; } template diff --git a/src/scn/impl/unicode/unicode_whitespace.h b/src/scn/impl/unicode/unicode_whitespace.h index ea56fbbb..a23a8f50 100644 --- a/src/scn/impl/unicode/unicode_whitespace.h +++ b/src/scn/impl/unicode/unicode_whitespace.h @@ -27,7 +27,8 @@ namespace scn { constexpr bool is_cp_space(char32_t cp) SCN_NOEXCEPT { // Pattern_White_Space property - return (cp >= 0x09 && cp <= 0x0d) || cp == 0x20 || // ASCII space characters + return (cp >= 0x09 && cp <= 0x0d) || + cp == 0x20 || // ASCII space characters cp == 0x85 || // NEXT LINE (NEL) cp == 0x200e || // LEFT-TO-RIGHT MARK cp == 0x200f || // RIGHT-TO-LEFT MARK @@ -43,12 +44,8 @@ namespace scn { { // TODO: optimize SCN_EXPECT(!str.empty()); - auto cp = get_next_code_point(str); - if (!cp) { - return {ranges::next(str.begin()), false}; - } - - return {cp->iterator, is_cp_space(cp->value)}; + auto res = get_next_code_point(str); + return {res.iterator, is_cp_space(res.value)}; } } // namespace impl diff --git a/src/scn/impl/util/text_width.h b/src/scn/impl/util/text_width.h index bfa0d445..2e7780cd 100644 --- a/src/scn/impl/util/text_width.h +++ b/src/scn/impl/util/text_width.h @@ -118,8 +118,8 @@ namespace scn { set_clocale_classic_guard clocale_guard{LC_CTYPE}; std::wstring winput; - transcode_valid_to_string( - std::u32string_view{&cp, 1}, winput); + transcode_valid_to_string(std::u32string_view{&cp, 1}, + winput); const auto n = ::wcswidth(winput.data(), winput.size()); SCN_ENSURE(n != -1); return static_cast(n); @@ -131,8 +131,8 @@ namespace scn { case text_width_algorithm::code_units: { std::wstring winput; - transcode_valid_to_string( - std::u32string_view{&cp, 1}, winput); + transcode_valid_to_string(std::u32string_view{&cp, 1}, + winput); return winput.size(); } diff --git a/src/scn/vscan_impl.h b/src/scn/vscan_impl.h index 99570650..69cd6678 100644 --- a/src/scn/vscan_impl.h +++ b/src/scn/vscan_impl.h @@ -135,12 +135,8 @@ namespace scn { impl::is_first_char_space( std::basic_string_view{begin, end}); is_space) { - auto ret = impl::read_while_classic_space(ctx.range()); - if (!ret) { - // TODO - return on_error(ret.error()); - } - ctx.advance_to(*ret); + ctx.advance_to( + impl::read_while_classic_space(ctx.range())); return; } diff --git a/tests/unittests/impl_tests/read_algorithms_test.cpp b/tests/unittests/impl_tests/read_algorithms_test.cpp index e9e24d50..f85d7f5f 100644 --- a/tests/unittests/impl_tests/read_algorithms_test.cpp +++ b/tests/unittests/impl_tests/read_algorithms_test.cpp @@ -28,22 +28,18 @@ TEST(ReadAllTest, Contiguous) { auto src = "foo"sv; auto it = scn::impl::read_all(src); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.end()); + EXPECT_EQ(it, src.end()); } TEST(ReadAllTest, NonContiguous) { auto src = scn::erased_range{"foo"sv}; auto it = scn::impl::read_all(src); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.end()); + EXPECT_EQ(it, src.end()); } TEST(ReadAllTest, NonBorrowed) { auto it = scn::impl::read_all(scn::erased_range{"foo"sv}); - ASSERT_TRUE(it); - static_assert(std::is_same_v>); + static_assert(std::is_same_v); } // read_code_unit @@ -155,9 +151,7 @@ TEST(ReadExactlyNCodeUnitsTest, ReadMoreNonBorrowed) TEST(ReadCodePointIntoTest, SingleCodeUnitCodePointFromContiguous) { auto src = "ab"sv; - auto ret = scn::impl::read_code_point_into(src); - ASSERT_TRUE(ret); - auto [it, cp] = *ret; + auto [it, cp] = scn::impl::read_code_point_into(src); EXPECT_EQ(it, src.begin() + 1); EXPECT_EQ(cp.view(), "a"sv); EXPECT_FALSE(cp.stores_allocated_string()); @@ -165,18 +159,14 @@ TEST(ReadCodePointIntoTest, SingleCodeUnitCodePointFromContiguous) TEST(ReadCodePointIntoTest, SingleCodeUnitCodePointFromNonContiguous) { auto src = scn::erased_range{"ab"sv}; - auto ret = scn::impl::read_code_point_into(src); - ASSERT_TRUE(ret); - auto [it, cp] = *ret; + auto [it, cp] = scn::impl::read_code_point_into(src); EXPECT_EQ(it, scn::ranges::next(src.begin())); EXPECT_EQ(cp.view(), "a"sv); EXPECT_TRUE(cp.stores_allocated_string()); } TEST(ReadCodePointIntoTest, SingleCodeUnitCodePointFromNonBorrowed) { - auto ret = scn::impl::read_code_point_into(scn::erased_range{"ab"sv}); - ASSERT_TRUE(ret); - auto [it, cp] = *ret; + auto [it, cp] = scn::impl::read_code_point_into(scn::erased_range{"ab"sv}); static_assert(std::is_same_v); EXPECT_EQ(cp.view(), "a"sv); EXPECT_TRUE(cp.stores_allocated_string()); @@ -184,9 +174,7 @@ TEST(ReadCodePointIntoTest, SingleCodeUnitCodePointFromNonBorrowed) TEST(ReadCodePointIntoTest, MultipleCodeUnitCodePointFromContiguous) { auto src = "äö"sv; - auto ret = scn::impl::read_code_point_into(src); - ASSERT_TRUE(ret); - auto [it, cp] = *ret; + auto [it, cp] = scn::impl::read_code_point_into(src); EXPECT_EQ(it, src.begin() + 2); EXPECT_EQ(cp.view(), "ä"sv); EXPECT_FALSE(cp.stores_allocated_string()); @@ -194,18 +182,14 @@ TEST(ReadCodePointIntoTest, MultipleCodeUnitCodePointFromContiguous) TEST(ReadCodePointIntoTest, MultipleCodeUnitCodePointFromNonContiguous) { auto src = scn::erased_range{"äö"sv}; - auto ret = scn::impl::read_code_point_into(src); - ASSERT_TRUE(ret); - auto [it, cp] = *ret; + auto [it, cp] = scn::impl::read_code_point_into(src); EXPECT_EQ(it, scn::ranges::next(src.begin(), 2)); EXPECT_EQ(cp.view(), "ä"sv); EXPECT_TRUE(cp.stores_allocated_string()); } TEST(ReadCodePointIntoTest, MultipleCodeUnitCodePointFromNonBorrowed) { - auto ret = scn::impl::read_code_point_into(scn::erased_range{"äö"sv}); - ASSERT_TRUE(ret); - auto [it, cp] = *ret; + auto [it, cp] = scn::impl::read_code_point_into(scn::erased_range{"äö"sv}); static_assert(std::is_same_v); EXPECT_EQ(cp.view(), "ä"sv); EXPECT_TRUE(cp.stores_allocated_string()); @@ -285,65 +269,57 @@ TEST(ReadUntilCodeUnit, ReadSomeContiguous) { auto src = "a b"sv; auto it = scn::impl::read_until_code_unit(src, is_literal_space); - ASSERT_TRUE(it); - EXPECT_EQ(**it, ' '); + EXPECT_EQ(*it, ' '); } TEST(ReadUntilCodeUnit, ReadSomeNonContiguous) { auto src = scn::erased_range{"a b"sv}; auto it = scn::impl::read_until_code_unit(src, is_literal_space); - ASSERT_TRUE(it); - EXPECT_EQ(**it, ' '); + EXPECT_EQ(*it, ' '); } TEST(ReadUntilCodeUnit, ReadSomeNonBorrowed) { auto it = scn::impl::read_until_code_unit(scn::erased_range{"a b"sv}, is_literal_space); - ASSERT_TRUE(it); - static_assert(std::is_same_v); + static_assert(std::is_same_v); } TEST(ReadUntilCodeUnit, ReadNoneContiguous) { auto src = " ab"sv; auto it = scn::impl::read_until_code_unit(src, is_literal_space); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.begin()); - EXPECT_EQ(**it, ' '); + EXPECT_EQ(it, src.begin()); + EXPECT_EQ(*it, ' '); } TEST(ReadUntilCodeUnit, ReadNoneNonContiguous) { auto src = scn::erased_range{" ab"sv}; auto it = scn::impl::read_until_code_unit(src, is_literal_space); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.begin()); - EXPECT_EQ(**it, ' '); + EXPECT_EQ(it, src.begin()); + EXPECT_EQ(*it, ' '); } TEST(ReadUntilCodeUnit, ReadNoneNonBorrowed) { auto it = scn::impl::read_until_code_unit(scn::erased_range{" ab"sv}, is_literal_space); - ASSERT_TRUE(it); - static_assert(std::is_same_v); + static_assert(std::is_same_v); } TEST(ReadUntilCodeUnit, ReadAllContiguous) { auto src = "abc"sv; auto it = scn::impl::read_until_code_unit(src, is_literal_space); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.end()); + EXPECT_EQ(it, src.end()); } TEST(ReadUntilCodeUnit, ReadAllNonContiguous) { auto src = scn::erased_range{"abc"sv}; auto it = scn::impl::read_until_code_unit(src, is_literal_space); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.end()); + EXPECT_EQ(it, src.end()); } TEST(ReadUntilCodeUnit, ReadAllNonBorrowed) { auto it = scn::impl::read_until_code_unit(scn::erased_range{"abc"sv}, is_literal_space); - ASSERT_TRUE(it); + static_assert(std::is_same_v); } // read_while_code_unit @@ -357,65 +333,57 @@ TEST(ReadWhileCodeUnit, ReadSomeContiguous) { auto src = "a b"sv; auto it = scn::impl::read_while_code_unit(src, is_not_literal_space); - ASSERT_TRUE(it); - EXPECT_EQ(**it, ' '); + EXPECT_EQ(*it, ' '); } TEST(ReadWhileCodeUnit, ReadSomeNonContiguous) { auto src = scn::erased_range{"a b"sv}; auto it = scn::impl::read_while_code_unit(src, is_not_literal_space); - ASSERT_TRUE(it); - EXPECT_EQ(**it, ' '); + EXPECT_EQ(*it, ' '); } TEST(ReadWhileCodeUnit, ReadSomeNonBorrowed) { auto it = scn::impl::read_while_code_unit(scn::erased_range{"a b"sv}, is_not_literal_space); - ASSERT_TRUE(it); - static_assert(std::is_same_v); + static_assert(std::is_same_v); } TEST(ReadWhileCodeUnit, ReadNoneContiguous) { auto src = " ab"sv; auto it = scn::impl::read_while_code_unit(src, is_not_literal_space); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.begin()); - EXPECT_EQ(**it, ' '); + EXPECT_EQ(it, src.begin()); + EXPECT_EQ(*it, ' '); } TEST(ReadWhileCodeUnit, ReadNoneNonContiguous) { auto src = scn::erased_range{" ab"sv}; auto it = scn::impl::read_while_code_unit(src, is_not_literal_space); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.begin()); - EXPECT_EQ(**it, ' '); + EXPECT_EQ(it, src.begin()); + EXPECT_EQ(*it, ' '); } TEST(ReadWhileCodeUnit, ReadNoneNonBorrowed) { auto it = scn::impl::read_while_code_unit(scn::erased_range{" ab"sv}, is_not_literal_space); - ASSERT_TRUE(it); - static_assert(std::is_same_v); + static_assert(std::is_same_v); } TEST(ReadWhileCodeUnit, ReadAllContiguous) { auto src = "abc"sv; auto it = scn::impl::read_while_code_unit(src, is_not_literal_space); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.end()); + EXPECT_EQ(it, src.end()); } TEST(ReadWhileCodeUnit, ReadAllNonContiguous) { auto src = scn::erased_range{"abc"sv}; auto it = scn::impl::read_while_code_unit(src, is_not_literal_space); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.end()); + EXPECT_EQ(it, src.end()); } TEST(ReadWhileCodeUnit, ReadAllNonBorrowed) { auto it = scn::impl::read_while_code_unit(scn::erased_range{"abc"sv}, is_not_literal_space); - ASSERT_TRUE(it); + static_assert(std::is_same_v); } // read_until1_code_unit @@ -477,63 +445,55 @@ TEST(ReadUntilCodePoint, ReadSomeContiguous) { auto src = "a😊b"sv; auto it = scn::impl::read_until_code_point(src, is_smiling_emoji); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.begin() + 1); + EXPECT_EQ(it, src.begin() + 1); } TEST(ReadUntilCodePoint, ReadSomeNonContiguous) { auto src = scn::erased_range{"a😊b"sv}; auto it = scn::impl::read_until_code_point(src, is_smiling_emoji); - ASSERT_TRUE(it); - EXPECT_EQ(*it, scn::ranges::next(src.begin(), 1)); + EXPECT_EQ(it, scn::ranges::next(src.begin(), 1)); } TEST(ReadUntilCodePoint, ReadSomeNonBorrowed) { auto it = scn::impl::read_until_code_point(scn::erased_range{"a😊b"sv}, is_smiling_emoji); - ASSERT_TRUE(it); - static_assert(std::is_same_v); + static_assert(std::is_same_v); } TEST(ReadUntilCodePoint, ReadNoneContiguous) { auto src = "😊ab"sv; auto it = scn::impl::read_until_code_point(src, is_smiling_emoji); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.begin()); + EXPECT_EQ(it, src.begin()); } TEST(ReadUntilCodePoint, ReadNoneNonContiguous) { auto src = scn::erased_range{"😊ab"sv}; auto it = scn::impl::read_until_code_point(src, is_smiling_emoji); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.begin()); + EXPECT_EQ(it, src.begin()); } TEST(ReadUntilCodePoint, ReadNoneNonBorrowed) { auto it = scn::impl::read_until_code_point(scn::erased_range{"😊ab"sv}, is_smiling_emoji); - ASSERT_TRUE(it); - static_assert(std::is_same_v); + static_assert(std::is_same_v); } TEST(ReadUntilCodePoint, ReadAllContiguous) { auto src = "abc"sv; auto it = scn::impl::read_until_code_point(src, is_smiling_emoji); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.end()); + EXPECT_EQ(it, src.end()); } TEST(ReadUntilCodePoint, ReadAllNonContiguous) { auto src = scn::erased_range{"abc"sv}; auto it = scn::impl::read_until_code_point(src, is_smiling_emoji); - ASSERT_TRUE(it); - EXPECT_EQ(*it, src.end()); + EXPECT_EQ(it, src.end()); } TEST(ReadUntilCodePoint, ReadAllNonBorrowed) { auto it = scn::impl::read_until_code_point(scn::erased_range{"abc"sv}, is_smiling_emoji); - ASSERT_TRUE(it); + static_assert(std::is_same_v); } // read_matching_code_unit diff --git a/tests/unittests/impl_tests/string_reader_test.cpp b/tests/unittests/impl_tests/string_reader_test.cpp index 2ed2b079..26c2b15f 100644 --- a/tests/unittests/impl_tests/string_reader_test.cpp +++ b/tests/unittests/impl_tests/string_reader_test.cpp @@ -132,9 +132,8 @@ struct test_type_pack { narrowed_val = val; } else { - auto r = scn::impl::transcode_to_string( + scn::impl::transcode_to_string( std::basic_string_view(val), narrowed_val); - SCN_EXPECT(r); } if (narrowed_val.size() != expected.size()) { @@ -332,10 +331,7 @@ class StringCharacterSetReaderTest : public testing::Test { SCN_GCC_PUSH SCN_GCC_IGNORE("-Wconversion") - { - auto r = scn::impl::transcode_to_string(f, tmp_specs_str); - SCN_EXPECT(r); - } + scn::impl::transcode_to_string(f, tmp_specs_str); auto widened_sv = std::wstring_view{tmp_specs_str}; SCN_GCC_POP diff --git a/tests/unittests/string_test.cpp b/tests/unittests/string_test.cpp index 953eb49b..68122ee9 100644 --- a/tests/unittests/string_test.cpp +++ b/tests/unittests/string_test.cpp @@ -149,7 +149,11 @@ TEST(StringTest, CharacterSetPresentationWideStringFromNarrowSource) TEST(StringTest, WonkyInput) { - auto result = scn::scan("o \x0f\n\n\xc3", "{:64c}"); - ASSERT_FALSE(result); - EXPECT_EQ(result.error().code(), scn::scan_error::invalid_encoding); + const char source[] = {'o', ' ', '\x0f', '\n', '\n', '\xc3'}; + auto input = std::string_view{source, sizeof(source)}; + + auto result = scn::scan(input, "{:64c}"); + ASSERT_TRUE(result); + EXPECT_TRUE(result->range().empty()); + EXPECT_EQ(result->value(), input); } diff --git a/tests/unittests/string_view_test.cpp b/tests/unittests/string_view_test.cpp index aa0f42ec..d65219d5 100644 --- a/tests/unittests/string_view_test.cpp +++ b/tests/unittests/string_view_test.cpp @@ -91,8 +91,9 @@ TEST(StringViewTest, InvalidUtf8) { auto source = std::string_view{"\x82\xf5"}; auto result = scn::scan(source, "{:64c}"); - ASSERT_FALSE(result); - EXPECT_EQ(result.error(), scn::scan_error::invalid_encoding); + ASSERT_TRUE(result); + EXPECT_TRUE(result->range().empty()); + EXPECT_EQ(result->value(), source); } TEST(StringViewTest, WonkyInput) @@ -113,8 +114,10 @@ TEST(StringViewTest, WonkyInput) TEST(StringViewTest, WonkyInput2) { const char source[] = {'o', ' ', '\x0f', '\n', '\n', '\xc3'}; - auto range = std::string_view{source, sizeof(source)}; + auto input = std::string_view{source, sizeof(source)}; - auto result = scn::scan(range, "{:64c}"); - ASSERT_FALSE(result); + auto result = scn::scan(input, "{:64c}"); + ASSERT_TRUE(result); + EXPECT_TRUE(result->range().empty()); + EXPECT_EQ(result->value(), input); }