Skip to content

Commit

Permalink
Fixes to custom format string parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
eliaskosunen committed Oct 23, 2024
1 parent f3b00ec commit cb455ae
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 107 deletions.
30 changes: 20 additions & 10 deletions include/scn/scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -3218,19 +3218,15 @@ class view_interface {
typename = std::enable_if_t<contiguous_iterator<iterator_t<R>>>>
constexpr auto data()
{
return ranges::empty(derived())
? nullptr
: std::addressof(*ranges::begin(derived()));
return detail::to_address(ranges::begin(derived()));
}

template <typename R = D,
typename = std::enable_if_t<
range<const R> && contiguous_iterator<iterator_t<const R>>>>
constexpr auto data() const
{
return ranges::empty(derived())
? nullptr
: std::addressof(*ranges::begin(derived()));
return detail::to_address(ranges::begin(derived()));
}

template <typename R = D,
Expand Down Expand Up @@ -4042,6 +4038,18 @@ inline constexpr char32_t decode_code_point_exhaustive_valid(
}
}

inline constexpr bool is_cp_space(char32_t cp) noexcept
{
// Pattern_White_Space property
return (cp >= 0x09 && cp <= 0x0d) ||
cp == 0x20 || // ASCII space characters
cp == 0x85 || // NEXT LINE (NEL)
cp == 0x200e || // LEFT-TO-RIGHT MARK
cp == 0x200f || // RIGHT-TO-LEFT MARK
cp == 0x2028 || // LINE SEPARATOR
cp == 0x2029; // PARAGRAPH SEPARATOR
}

} // namespace detail

/////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -4192,6 +4200,7 @@ class basic_scan_buffer<CharT>::forward_iterator {

bool stores_parent() const
{
assert(m_begin);
return m_end == nullptr;
}

Expand Down Expand Up @@ -4937,8 +4946,9 @@ class arg_value {
auto& pctx_ref = *static_cast<parse_context_type*>(pctx);
auto& ctx_ref = *static_cast<context_type*>(ctx);

SCN_TRY_ERR(_, s.parse(pctx_ref));
SCN_UNUSED(_);
SCN_TRY_ERR(fmt_it, s.parse(pctx_ref));
pctx_ref.advance_to(fmt_it);

SCN_TRY_ERR(it, s.scan(arg_ref, ctx_ref));
ctx_ref.advance_to(SCN_MOVE(it));

Expand Down Expand Up @@ -7867,7 +7877,7 @@ class format_string_checker {
};

template <typename Source, typename... Args, typename Str>
auto check_format_string(const Str&)
constexpr auto check_format_string(const Str&)
-> std::enable_if_t<!is_compile_string_v<Str>>
{
// TODO: SCN_ENFORE_COMPILE_STRING?
Expand All @@ -7879,7 +7889,7 @@ auto check_format_string(const Str&)
}

template <typename Source, typename... Args, typename Str>
auto check_format_string(Str format_str)
constexpr auto check_format_string(Str format_str)
-> std::enable_if_t<is_compile_string_v<Str>>
{
using char_type = typename Str::char_type;
Expand Down
4 changes: 2 additions & 2 deletions src/scn/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,15 +193,15 @@ std::string_view::iterator find_classic_space_narrow_fast(
{
return find_classic_impl(
source, [](char ch) { return is_ascii_space(ch); },
[](char32_t cp) { return is_cp_space(cp); });
[](char32_t cp) { return detail::is_cp_space(cp); });
}

std::string_view::iterator find_classic_nonspace_narrow_fast(
std::string_view source)
{
return find_classic_impl(
source, [](char ch) { return !is_ascii_space(ch); },
[](char32_t cp) { return !is_cp_space(cp); });
[](char32_t cp) { return !detail::is_cp_space(cp); });
}

std::string_view::iterator find_nondecimal_digit_narrow_fast(
Expand Down
21 changes: 5 additions & 16 deletions src/scn/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1195,18 +1195,6 @@ constexpr auto get_next_code_point_valid(std::basic_string_view<CharT> input)
detail::decode_code_point_exhaustive_valid(input.substr(0, len))};
}

constexpr bool is_cp_space(char32_t cp) noexcept
{
// Pattern_White_Space property
return (cp >= 0x09 && cp <= 0x0d) ||
cp == 0x20 || // ASCII space characters
cp == 0x85 || // NEXT LINE (NEL)
cp == 0x200e || // LEFT-TO-RIGHT MARK
cp == 0x200f || // RIGHT-TO-LEFT MARK
cp == 0x2028 || // LINE SEPARATOR
cp == 0x2029; // PARAGRAPH SEPARATOR
}

template <typename CharT>
struct is_first_char_space_result {
ranges::iterator_t<std::basic_string_view<CharT>> iterator;
Expand All @@ -1221,7 +1209,7 @@ inline constexpr auto is_first_char_space(std::basic_string_view<CharT> str)
// TODO: optimize
SCN_EXPECT(!str.empty());
auto res = get_next_code_point(str);
return {res.iterator, res.value, is_cp_space(res.value)};
return {res.iterator, res.value, detail::is_cp_space(res.value)};
}

inline constexpr scan_expected<wchar_t> encode_code_point_as_wide_character(
Expand Down Expand Up @@ -2107,7 +2095,7 @@ auto read_until_classic_space(Range range) -> ranges::const_iterator_t<Range>

return read_until_code_point(
ranges::subrange{it, range.end()},
[](char32_t cp) noexcept { return is_cp_space(cp); });
[](char32_t cp) noexcept { return detail::is_cp_space(cp); });
}
}

Expand All @@ -2134,8 +2122,9 @@ auto read_while_classic_space(Range range) -> ranges::const_iterator_t<Range>
ranges::advance(it, seg.size());
}

return read_while_code_point(
range, [](char32_t cp) noexcept { return is_cp_space(cp); });
return read_while_code_point(range, [](char32_t cp) noexcept {
return detail::is_cp_space(cp);
});
}
}

Expand Down
195 changes: 116 additions & 79 deletions tests/unittests/custom_type_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,131 +19,168 @@

#include <scn/scan.h>

struct mytype {
int i{}, j{};
// Simple wrapper over an `int`, inherits all its scanning properties
struct integer_wrapper {
int value{};
};

template <>
struct scn::scanner<mytype, char> : scn::scanner<std::string, char> {
struct scn::scanner<integer_wrapper, char> : scn::scanner<int, char> {
template <typename Context>
scn::scan_expected<typename Context::iterator> scan(mytype& val,
scn::scan_expected<typename Context::iterator> scan(integer_wrapper& val,
Context& ctx) const
{
return scn::scan<int, int>(ctx.range(), "{} {}")
.transform([&](auto result) {
std::tie(val.i, val.j) = result.values();
return result.begin();
});
return scn::scanner<int, char>::scan(val.value, ctx);
}
};

struct mytype2 {
char ch{};
};
TEST(CustomTypeTest, IntegerWrapperWithDefaultFormatString)
{
auto result = scn::scan<integer_wrapper>("123", "{}");
ASSERT_TRUE(result);
EXPECT_EQ(*result->begin(), '\0');

template <>
struct scn::scanner<mytype2, char> : scn::scanner<std::string, char> {
template <typename Context>
scn::scan_expected<typename Context::iterator> scan(mytype2& val,
Context& ctx) const
{
return scn::scan<scn::discard<char>, char>(ctx.range(), "{} {}")
.transform([&](auto result) {
std::tie(std::ignore, val.ch) = result.values();
return result.begin();
});
}
};
const auto val = result->value().value;
EXPECT_EQ(val, 123);
}

TEST(CustomTypeTest, IntegerWrapperWithCustomFormatString)
{
auto result = scn::scan<integer_wrapper>("123", "{:x}");
ASSERT_TRUE(result);
EXPECT_EQ(*result->begin(), '\0');

struct mytype3 {
int i{};
const auto val = result->value().value;
EXPECT_EQ(val, 0x123);
}

// Wrapper over a variant,
// with fully custom format string parsing
struct variant_wrapper {
std::variant<int, char, double, std::string> value{};
};

template <>
struct scn::scanner<mytype3> {
template <typename ParseCtx>
constexpr auto parse(ParseCtx& pctx)
-> scan_expected<typename ParseCtx::iterator>
struct scn::scanner<variant_wrapper, char> {
template <typename ParseContext>
constexpr scn::scan_expected<typename ParseContext::iterator> parse(
ParseContext& pctx)
{
return pctx.begin();
if (pctx.begin() == pctx.end() || *pctx.begin() == '}') {
return scn::unexpected(pctx.on_error(
"Invalid format string: format specifier required"));
}

auto it = pctx.begin();
switch (*it) {
case 'i':
format = format_int;
break;
case 'c':
format = format_char;
break;
case 'f':
format = format_double;
break;
case 's':
format = format_string;
break;
default:
return scn::unexpected(pctx.on_error(
"Invalid format string: invalid format specifier"));
}
return ++it;
}

template <typename Context>
auto scan(mytype3& val, Context& ctx) const
-> scan_expected<typename Context::iterator>
scn::scan_expected<typename Context::iterator> scan(variant_wrapper& val,
Context& ctx) const
{
return scn::scan<int>(ctx.range(), "{}").transform([&](auto result) {
val = {result.value()};
return result.begin();
});
switch (format) {
case format_int: {
return scn::scanner<int, char>{}.scan(val.value.emplace<int>(),
ctx);
}
case format_char:
return scn::scanner<char, char>{}.scan(
val.value.emplace<char>(), ctx);
case format_double:
return scn::scanner<double, char>{}.scan(
val.value.emplace<double>(), ctx);
case format_string:
return scn::scanner<std::string, char>{}.scan(
val.value.emplace<std::string>(), ctx);
}

SCN_UNREACHABLE;
}

private:
enum {
format_int,
format_char,
format_double,
format_string,
} format{};
};

TEST(CustomTypeTest, Simple)
TEST(CustomTypeTest, VariantWrapperWithDefaultFormatString)
{
auto result = scn::scan<mytype>("123 456", "{}");
ASSERT_TRUE(result);
EXPECT_EQ(*result->begin(), '\0');

const auto& val = std::get<0>(result->values());
EXPECT_EQ(val.i, 123);
EXPECT_EQ(val.j, 456);
auto result = scn::scan<variant_wrapper>("123", "{}");
ASSERT_FALSE(result);
EXPECT_EQ(result.error().code(), scn::scan_error::invalid_format_string);
}

TEST(CustomTypeTest, DISABLED_CustomFormatString)
TEST(CustomTypeTest, VariantWrapperWithIntegerFormat)
{
auto input = std::string_view{"123 456"};
auto result = scn::scan<mytype>(input, "{:6}");
auto result = scn::scan<variant_wrapper>("123", "{:i}");
ASSERT_TRUE(result);
EXPECT_EQ(result->begin(), input.end() - 1);
EXPECT_EQ(result->value().i, 123);
EXPECT_EQ(result->value().j, 45);
EXPECT_EQ(*result->begin(), '\0');

const auto& val = result->value().value;
ASSERT_TRUE(std::holds_alternative<int>(val));
EXPECT_EQ(std::get<int>(val), 123);
}

TEST(CustomTypeTest, Multiple)
TEST(CustomTypeTest, VariantWrapperWithCharFormat)
{
auto result = scn::scan<mytype, mytype>("12 34 56 78", "{} {}");
auto result = scn::scan<variant_wrapper>("123", "{:c}");
ASSERT_TRUE(result);
EXPECT_EQ(*result->begin(), '\0');
EXPECT_STREQ(result->begin(), "23");

const auto& [a, b] = result->values();
EXPECT_EQ(a.i, 12);
EXPECT_EQ(a.j, 34);
EXPECT_EQ(b.i, 56);
EXPECT_EQ(b.j, 78);
const auto& val = result->value().value;
ASSERT_TRUE(std::holds_alternative<char>(val));
EXPECT_EQ(std::get<char>(val), '1');
}

TEST(CustomTypeTest, Surrounded)
TEST(CustomTypeTest, VariantWrapperWithDoubleFormat)
{
auto result = scn::scan<int, mytype, int>("1 2 3 4", "{} {} {}");
auto result = scn::scan<variant_wrapper>("123", "{:f}");
ASSERT_TRUE(result);
EXPECT_EQ(*result->begin(), '\0');

const auto& [a, val, b] = result->values();
EXPECT_EQ(a, 1);
EXPECT_EQ(val.i, 2);
EXPECT_EQ(val.j, 3);
EXPECT_EQ(b, 4);
const auto& val = result->value().value;
ASSERT_TRUE(std::holds_alternative<double>(val));
EXPECT_DOUBLE_EQ(std::get<double>(val), 123.0);
}

TEST(CustomTypeTest, WhiteSpaceNotSkipped)
TEST(CustomTypeTest, VariantWrapperWithStringFormat)
{
auto result = scn::scan<mytype2>(" abc", "{}");
auto result = scn::scan<variant_wrapper>("123", "{:s}");
ASSERT_TRUE(result);
EXPECT_STREQ(result->range().data(), "bc");
EXPECT_EQ(*result->begin(), '\0');

EXPECT_EQ(result->value().ch, 'a');
const auto& val = result->value().value;
ASSERT_TRUE(std::holds_alternative<std::string>(val));
EXPECT_EQ(std::get<std::string>(val), "123");
}

TEST(CustomTypeTest, OnlyEmptyFormatStringAllowed)
TEST(CustomTypeTest, VariantWrapperInvalidFormat)
{
auto result = scn::scan<mytype3>("42", "{}");
ASSERT_TRUE(result);
EXPECT_EQ(result->value().i, 42);
}
auto result =
scn::scan<variant_wrapper>("123", scn::runtime_format("{:d}"));
ASSERT_FALSE(result);

TEST(CustomTypeTest, OnlyEmptyFormatStringAllowed_Failure)
{
auto result = scn::scan<mytype3>("42", scn::runtime_format("{:d}"));
result = scn::scan<variant_wrapper>("123", scn::runtime_format("{}"));
ASSERT_FALSE(result);
}

0 comments on commit cb455ae

Please sign in to comment.