Skip to content

Commit

Permalink
Add regex presentation_type for strings
Browse files Browse the repository at this point in the history
  • Loading branch information
eliaskosunen committed Dec 1, 2023
1 parent c116f81 commit 91d9027
Show file tree
Hide file tree
Showing 9 changed files with 168 additions and 31 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -468,8 +468,6 @@ jobs:
- std
- Boost
- re2
- pcre
- ctre

steps:
- name: Setup CMake and Ninja
Expand Down
2 changes: 0 additions & 2 deletions cmake/buildflags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ function(get_config_flags flags)
set(regex_flag -DSCN_REGEX_BACKEND=1)
elseif (SCN_REGEX_BACKEND STREQUAL "re2")
set(regex_flag -DSCN_REGEX_BACKEND=2)
elseif (SCN_REGEX_BACKEND STREQUAL "ctre")
set(regex_flag -DSCN_REGEX_BACKEND=3)
endif ()
endif ()
set(${flags}
Expand Down
30 changes: 15 additions & 15 deletions cmake/dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -134,21 +134,21 @@ endif ()

# ctre

if (SCN_REGEX_BACKEND STREQUAL "ctre")
if (SCN_USE_EXTERNAL_REGEX_BACKEND)
find_package(ctre REQUIRED)
else ()
FetchContent_Declare(
ctre
GIT_REPOSITORY https://github.com/hanickadot/compile-time-regular-expressions.git
GIT_TAG v3.8.1
GIT_SHALLOW TRUE
)

list(APPEND SCN_OPTIONAL_DEPENDENCIES "ctre")
endif ()
set(SCN_REGEX_BACKEND_TARGET ctre::ctre)
endif ()
# if (SCN_REGEX_BACKEND STREQUAL "ctre")
# if (SCN_USE_EXTERNAL_REGEX_BACKEND)
# find_package(ctre REQUIRED)
# else ()
# FetchContent_Declare(
# ctre
# GIT_REPOSITORY https://github.com/hanickadot/compile-time-regular-expressions.git
# GIT_TAG v3.8.1
# GIT_SHALLOW TRUE
# )
#
# list(APPEND SCN_OPTIONAL_DEPENDENCIES "ctre")
# endif ()
# set(SCN_REGEX_BACKEND_TARGET ctre::ctre)
# endif ()

# make available

Expand Down
2 changes: 1 addition & 1 deletion cmake/options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ option(SCN_WERROR "Halt compilation in case of a warning" ${SCN_CI})

option(SCN_DISABLE_REGEX "Disable regex support" OFF)
set(SCN_REGEX_BACKEND "std" CACHE STRING "Regex backend to use")
set_property(CACHE SCN_REGEX_BACKEND PROPERTY STRINGS "std" "Boost" "re2" "ctre")
set_property(CACHE SCN_REGEX_BACKEND PROPERTY STRINGS "std" "Boost" "re2")

option(SCN_USE_EXTERNAL_SIMDUTF "Use find_package for simdutf, instead of FetchContent" OFF)
option(SCN_USE_EXTERNAL_FAST_FLOAT "Use find_package for fast_float, instead of FetchContent" OFF)
Expand Down
3 changes: 0 additions & 3 deletions cmake/scn-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ endif ()
if ((@SCN_REGEX_BACKEND@ STREQUAL "re2") AND @SCN_USE_EXTERNAL_REGEX_BACKEND@)
find_dependency(re2 11.0.0)
endif ()
if ((@SCN_REGEX_BACKEND@ STREQUAL "ctre") AND @SCN_USE_EXTERNAL_REGEX_BACKEND@)
find_dependency(ctre)
endif ()

check_required_components(scn)

Expand Down
8 changes: 4 additions & 4 deletions include/scn/detail/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@
#define SCN_REGEX_BACKEND_BOOST 1
// Google RE2
#define SCN_REGEX_BACKEND_RE2 2
// PCRE (Perl Compatible Regular Expressions)
#define SCN_REGEX_BACKEND_PCRE 3
// CTRE (Compile-Time Regular Expressions)
#define SCN_REGEX_BACKEND_CTRE 4
// TODO: PCRE (Perl Compatible Regular Expressions)
// #define SCN_REGEX_BACKEND_PCRE 3
// TODO: CTRE (Compile-Time Regular Expressions)
// #define SCN_REGEX_BACKEND_CTRE 4

// Default to std::regex
#ifndef SCN_REGEX_BACKEND
Expand Down
56 changes: 56 additions & 0 deletions src/scn/impl/reader/regex_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
#pragma once

#include <scn/detail/regex.h>

#if !SCN_DISABLE_REGEX

#include <scn/impl/reader/common.h>

#if SCN_REGEX_BACKEND == SCN_REGEX_BACKEND_STD
Expand All @@ -32,6 +35,57 @@ namespace scn {
SCN_BEGIN_NAMESPACE

namespace impl {
template <typename CharT>
auto read_regex_string_impl(std::basic_string_view<CharT> pattern,
std::basic_string_view<CharT> input)
-> scan_expected<typename std::basic_string_view<CharT>::iterator>
{
#if SCN_REGEX_BACKEND == SCN_REGEX_BACKEND_STD
auto re = std::basic_regex<CharT>{pattern.data(), pattern.size()};
std::match_results<const CharT*> matches{};
bool found = std::regex_search(
input.data(), input.data() + input.size(), matches, re,
std::regex_constants::match_continuous);
if (!found || matches.prefix().matched) {
return unexpected_scan_error(scan_error::invalid_scanned_value,
"Regular expression didn't match");
}
return input.begin() +
ranges::distance(input.data(), matches[0].second);
#elif SCN_REGEX_BACKEND == SCN_REGEX_BACKEND_BOOST
auto re = boost::basic_regex<CharT>{pattern.data(), pattern.size(),
boost::regex_constants::normal};
boost::match_results<const CharT*> matches{};
bool found = boost::regex_search(
input.data(), input.data() + input.size(), matches, re,
boost::regex_constants::match_continuous);
if (!found || matches.prefix().matched) {
return unexpected_scan_error(scan_error::invalid_scanned_value,
"Regular expression didn't match");
}
return input.begin() +
ranges::distance(input.data(), matches[0].second);
#elif SCN_REGEX_BACKEND == SCN_REGEX_BACKEND_RE2
static_assert(std::is_same_v<CharT, char>);
auto re = re2::RE2{pattern, RE2::Quiet};
if (!re.ok()) {
return unexpected_scan_error(
scan_error::invalid_format_string,
"Failed to parse regular expression");
}
auto new_input = input;
bool found = re2::RE2::Consume(&new_input, re);
if (!found) {
return unexpected_scan_error(scan_error::invalid_scanned_value,
"Regular expression didn't match");
}
return input.begin() +
ranges::distance(input.data(), new_input.data());
#else
#error TODO
#endif
}

template <typename CharT>
auto read_regex_matches_impl(std::basic_string_view<CharT> pattern,
std::basic_string_view<CharT> input,
Expand Down Expand Up @@ -192,3 +246,5 @@ namespace scn {

SCN_END_NAMESPACE
} // namespace scn

#endif
67 changes: 66 additions & 1 deletion src/scn/impl/reader/string_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <scn/detail/format_string_parser.h>
#include <scn/impl/algorithms/take_width_view.h>
#include <scn/impl/reader/common.h>
#include <scn/impl/reader/regex_reader.h>
#include <scn/impl/util/ascii_ctype.h>
#include <scn/impl/util/bits.h>

Expand Down Expand Up @@ -126,6 +127,59 @@ namespace scn {
}
};

#if !SCN_DISABLE_REGEX
template <typename SourceCharT>
class regex_string_reader_impl {
public:
template <typename Range, typename ValueCharT>
scan_expected<simple_borrowed_iterator_t<Range>> read(
Range&& range,
std::basic_string_view<SourceCharT> pattern,
std::basic_string<ValueCharT>& value)
{
SCN_TRY(it, impl(range, pattern));
return read_string_impl(range, it, value);
}

template <typename Range, typename ValueCharT>
scan_expected<simple_borrowed_iterator_t<Range>> read(
Range&& range,
std::basic_string_view<SourceCharT> pattern,
std::basic_string_view<ValueCharT>& value)
{
SCN_TRY(it, impl(range, pattern));
return read_string_view_impl(range, it, value);
}

private:
template <typename Range>
auto impl(Range&& range,
std::basic_string_view<SourceCharT> pattern)
-> scan_expected<simple_borrowed_iterator_t<Range>>
{
if constexpr (!ranges::contiguous_range<Range>) {
return unexpected_scan_error(
scan_error::invalid_scanned_value,
"Cannot use regex with a non-contiguous source range");
}
else if constexpr (!SCN_REGEX_SUPPORTS_WIDE_STRINGS &&
!std::is_same_v<SourceCharT, char>) {
return unexpected_scan_error(
scan_error::invalid_scanned_value,
"Regex backend doesn't support wide strings as input");
}
else {
auto input = detail::make_string_view_from_pointers(
ranges::data(range),
ranges::data(range) + ranges::size(range));
SCN_TRY(it, read_regex_string_impl(pattern, input));
return ranges::begin(range) +
ranges::distance(input.begin(), it);
}
}
};
#endif

template <typename SourceCharT>
class character_reader_impl {
public:
Expand Down Expand Up @@ -434,6 +488,11 @@ namespace scn {
case detail::presentation_type::string_set:
m_type = reader_type::character_set;
break;

case detail::presentation_type::regex:
case detail::presentation_type::regex_escaped:
m_type = reader_type::regex;
break;
}

SCN_CLANG_POP // -Wswitch-enum, -Wcovered-switch-default
Expand Down Expand Up @@ -466,7 +525,7 @@ namespace scn {
}

protected:
enum class reader_type { word, character, character_set };
enum class reader_type { word, character, character_set, regex };

template <typename Range, typename Value>
scan_expected<simple_borrowed_iterator_t<Range>> read_impl(
Expand All @@ -490,6 +549,12 @@ namespace scn {
return character_set_reader_impl<SourceCharT>{}.read(
SCN_FWD(range), specs, value);

#if !SCN_DISABLE_REGEX
case reader_type::regex:
return regex_string_reader_impl<SourceCharT>{}.read(
SCN_FWD(range), specs.charset_string, value);
#endif

default:
SCN_EXPECT(false);
SCN_UNREACHABLE;
Expand Down
29 changes: 26 additions & 3 deletions tests/unittests/regex_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,35 @@

using namespace std::string_view_literals;

TEST(RegexTest, String)
{
auto r = scn::scan<std::string>("foobar123", "{:/([a-zA-Z]+)/}");
ASSERT_TRUE(r);
EXPECT_FALSE(r->range().empty());
EXPECT_EQ(r->value(), "foobar");
}

TEST(RegexTest, StringView)
{
auto r =
scn::scan<std::string_view>("foobar123", "{:/([a-zA-Z]+)/}");
ASSERT_TRUE(r);
EXPECT_FALSE(r->range().empty());
EXPECT_EQ(r->value(), "foobar");
}

TEST(RegexTest, Matches)
{
auto r = scn::scan<scn::regex_matches>("foobar", "{:/[a-zA-Z]+/}");
auto r =
scn::scan<scn::regex_matches>("foobar123", "{:/([a-zA-Z]+)([0-9]+)/}");
ASSERT_TRUE(r);
EXPECT_TRUE(r->range().empty());
EXPECT_THAT(r->value().matches,
testing::ElementsAre(testing::Optional(testing::Property(
&scn::regex_matches::match::get, "foobar"sv))));
testing::ElementsAre(
testing::Optional(testing::Property(
&scn::regex_matches::match::get, "foobar123"sv)),
testing::Optional(testing::Property(
&scn::regex_matches::match::get, "foobar"sv)),
testing::Optional(testing::Property(
&scn::regex_matches::match::get, "123"sv))));
}

0 comments on commit 91d9027

Please sign in to comment.