Skip to content

Commit

Permalink
Remove invalid_encoding errors
Browse files Browse the repository at this point in the history
  • Loading branch information
eliaskosunen committed Sep 28, 2023
1 parent f28cdd0 commit 292b93d
Show file tree
Hide file tree
Showing 20 changed files with 426 additions and 455 deletions.
4 changes: 1 addition & 3 deletions include/scn/detail/error.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ namespace scn {
/// Scanned value was out of range for the desired type.
/// (e.g. `>2^32` for an `uint32_t`)
value_out_of_range,
/// Source range has invalid (utf-8 or utf-16) encoding
invalid_encoding,
/// The source range emitted an error that cannot be recovered
/// from. The library can't use the source range in this state.
/// Can only happen when using an istream as the input.
Expand Down Expand Up @@ -95,7 +93,7 @@ namespace scn {
return m_code;
}
/// Get error message
SCN_NODISCARD constexpr auto msg() const SCN_NOEXCEPT -> const char*
SCN_NODISCARD constexpr auto msg() const SCN_NOEXCEPT->const char*
{
return m_msg;
}
Expand Down
8 changes: 4 additions & 4 deletions include/scn/detail/format_string_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -649,8 +649,8 @@ namespace scn {

if constexpr (sizeof(CharT) == 1) {
// UTF-8
auto cp =
decode_utf8_code_point(std::string_view{&*cp_begin, len});
auto cp = decode_utf8_code_point_exhaustive(
std::string_view{&*cp_begin, len});
if (SCN_UNLIKELY(cp == invalid_code_point)) {
handler.on_error(
"Invalid Unicode code point in format string argument");
Expand All @@ -660,8 +660,8 @@ namespace scn {
}
else if constexpr (sizeof(CharT) == 2) {
// UTF-16
auto cp =
decode_utf16_code_point(std::wstring_view{&*cp_begin, len});
auto cp = decode_utf16_code_point_exhaustive(
std::wstring_view{&*cp_begin, len});
if (SCN_UNLIKELY(cp == invalid_code_point)) {
handler.on_error(
"Invalid Unicode code point in format string argument");
Expand Down
63 changes: 61 additions & 2 deletions include/scn/detail/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ namespace scn {

inline constexpr char32_t invalid_code_point = 0x110000;

inline constexpr char32_t decode_utf8_code_point(std::string_view input)
inline constexpr char32_t decode_utf8_code_point_exhaustive(
std::string_view input)
{
SCN_EXPECT(!input.empty() && input.size() <= 4);

Expand Down Expand Up @@ -165,8 +166,66 @@ namespace scn {
SCN_UNREACHABLE;
}

inline constexpr char32_t decode_utf8_code_point_exhaustive_valid(
std::string_view input)
{
SCN_EXPECT(!input.empty() && input.size() <= 4);

const auto is_trailing_code_unit = [](char ch) {
return static_cast<unsigned char>(ch) >> 6 == 0x2;
};

if (input.size() == 1) {
SCN_EXPECT(static_cast<unsigned char>(input[0]) < 0x80);
return static_cast<char32_t>(input[0]);
}

if (input.size() == 2) {
SCN_EXPECT((static_cast<unsigned char>(input[0]) & 0xe0) ==
0xc0);
SCN_EXPECT(is_trailing_code_unit(input[1]));

char32_t cp{};
cp |= (static_cast<char32_t>(input[0]) & 0x1f) << 6;
cp |= (static_cast<char32_t>(input[1]) & 0x3f) << 0;
return cp;
}

if (input.size() == 3) {
SCN_EXPECT((static_cast<unsigned char>(input[0]) & 0xf0) ==
0xe0);
SCN_EXPECT(is_trailing_code_unit(input[1]));
SCN_EXPECT(is_trailing_code_unit(input[2]));

char32_t cp{};
cp |= (static_cast<char32_t>(input[0]) & 0x0f) << 12;
cp |= (static_cast<char32_t>(input[1]) & 0x3f) << 6;
cp |= (static_cast<char32_t>(input[2]) & 0x3f) << 0;
return cp;
}

if (input.size() == 4) {
SCN_EXPECT((static_cast<unsigned char>(input[0]) & 0xf8) ==
0xf0);
SCN_EXPECT(static_cast<unsigned char>(input[0]) <= 0xf4);
SCN_EXPECT(is_trailing_code_unit(input[1]));
SCN_EXPECT(is_trailing_code_unit(input[2]));
SCN_EXPECT(is_trailing_code_unit(input[3]));

char32_t cp{};
cp |= (static_cast<char32_t>(input[0]) & 0x07) << 18;
cp |= (static_cast<char32_t>(input[1]) & 0x3f) << 12;
cp |= (static_cast<char32_t>(input[2]) & 0x3f) << 6;
cp |= (static_cast<char32_t>(input[3]) & 0x3f) << 0;
return cp;
}

SCN_EXPECT(false);
SCN_UNREACHABLE;
}

template <typename CharT>
inline constexpr char32_t decode_utf16_code_point(
inline constexpr char32_t decode_utf16_code_point_exhaustive(
std::basic_string_view<CharT> input)
{
if constexpr (sizeof(CharT) == 2) {
Expand Down
1 change: 1 addition & 0 deletions include/scn/util/expected_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <array>
#include <cassert>
#include <functional>
#include <memory>
#include <type_traits>
#include <utility>

Expand Down
Loading

0 comments on commit 292b93d

Please sign in to comment.