Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more constexpr #81

Merged
merged 3 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ portions of modified code from the Chromium project licensed as follows:

-------------------------------------------------------------------------------

Files config.h, url_utf.cpp, url_utf.h contains portions of modified code from
the ICU project licensed as follows:
Files config.h, url_utf.h contains portions of modified code from the ICU
project licensed as follows:

UNICODE LICENSE V3

Expand Down
21 changes: 11 additions & 10 deletions include/upa/str_arg.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ inline void procfn(StrT&& str) {
#ifndef UPA_STR_ARG_H
#define UPA_STR_ARG_H

#include "config.h"
#include "url_utf.h"
#include <cassert>
#include <cstddef>
Expand Down Expand Up @@ -76,26 +77,26 @@ class str_arg {
>;

// constructors
str_arg(const str_arg&) noexcept = default;
constexpr str_arg(const str_arg&) noexcept = default;

str_arg(const CharT* s)
constexpr str_arg(const CharT* s)
: first_(s)
, last_(s + traits_type::length(s))
{}

template <typename SizeT, std::enable_if_t<is_size_type_v<SizeT>, int> = 0>
str_arg(const CharT* s, SizeT length)
constexpr str_arg(const CharT* s, SizeT length)
: first_(s)
, last_(s + length)
{ assert(length >= 0); }

str_arg(const CharT* first, const CharT* last)
constexpr str_arg(const CharT* first, const CharT* last)
: first_(first)
, last_(last)
{ assert(first <= last); }

// destructor
~str_arg() noexcept = default;
UPA_CONSTEXPR_20 ~str_arg() noexcept = default;

// assignment is not used
str_arg& operator=(const str_arg&) = delete;
Expand Down Expand Up @@ -176,7 +177,7 @@ constexpr bool convertible_to_string_view_v =
template<typename CharT, class ArgT>
struct str_arg_char_common {
using type = CharT;
static str_arg<CharT> to_str_arg(ArgT str) {
static constexpr str_arg<CharT> to_str_arg(ArgT str) {
return { str.data(), str.size() };
}
};
Expand Down Expand Up @@ -234,7 +235,7 @@ struct str_arg_char : detail::str_arg_char_default<StrT> {};
template<class CharT>
struct str_arg_char<CharT*, std::enable_if_t<is_char_type_v<remove_cvref_t<CharT>>>> {
using type = remove_cvref_t<CharT>;
static str_arg<type> to_str_arg(const type* s) {
static constexpr str_arg<type> to_str_arg(const type* s) {
return s;
}
};
Expand All @@ -243,7 +244,7 @@ struct str_arg_char<CharT*, std::enable_if_t<is_char_type_v<remove_cvref_t<CharT
template<class CharT>
struct str_arg_char<str_arg<CharT>> {
using type = CharT;
static str_arg<type> to_str_arg(str_arg<type> s) {
static constexpr str_arg<type> to_str_arg(str_arg<type> s) {
return s;
}
};
Expand All @@ -267,7 +268,7 @@ using enable_if_str_arg_t = std::enable_if_t<
// String arguments helper function

template <class StrT>
inline auto make_str_arg(StrT&& str) -> str_arg<str_arg_char_t<StrT>> {
constexpr auto make_str_arg(StrT&& str) -> str_arg<str_arg_char_t<StrT>> {
return str_arg_char_s<StrT>::to_str_arg(std::forward<StrT>(str));
}

Expand Down Expand Up @@ -304,7 +305,7 @@ inline std::string&& make_string(std::string&& str) {
}

template <class StrT, enable_if_str_arg_to_char8_t<StrT> = 0>
inline string_view make_string(StrT&& str) {
constexpr string_view make_string(StrT&& str) {
const auto inp = make_str_arg(std::forward<StrT>(str));
return { inp.data(), inp.length() };
}
Expand Down
18 changes: 9 additions & 9 deletions include/upa/url.h
Original file line number Diff line number Diff line change
Expand Up @@ -907,7 +907,7 @@ class url_parser {
// part start
extern const uint8_t kPartStart[url::PART_COUNT];

inline int port_from_str(const char* first, const char* last) noexcept {
constexpr int port_from_str(const char* first, const char* last) noexcept {
int port = 0;
for (auto it = first; it != last; ++it) {
port = port * 10 + (*it - '0');
Expand All @@ -930,7 +930,7 @@ constexpr bool is_removable_char(CharT ch) noexcept {
}

template <typename CharT>
inline void do_trim(const CharT*& first, const CharT*& last) noexcept {
constexpr void do_trim(const CharT*& first, const CharT*& last) noexcept {
// remove leading C0 controls and space
while (first < last && is_trim_char(*first))
++first;
Expand Down Expand Up @@ -965,7 +965,7 @@ inline void do_remove_whitespace(const CharT*& first, const CharT*& last, simple
// reverse find

template<class InputIt, class T>
inline InputIt find_last(InputIt first, InputIt last, const T& value) {
constexpr InputIt find_last(InputIt first, InputIt last, const T& value) {
for (auto it = last; it > first;) {
--it;
if (*it == value) return it;
Expand Down Expand Up @@ -1023,7 +1023,7 @@ constexpr bool is_normalized_windows_drive(CharT c1, CharT c2) noexcept {

// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
template <typename CharT>
inline bool starts_with_windows_drive(const CharT* pointer, const CharT* last) noexcept {
constexpr bool starts_with_windows_drive(const CharT* pointer, const CharT* last) noexcept {
const auto length = last - pointer;
return
(length == 2 || (length > 2 && detail::is_special_authority_end_char(pointer[2]))) &&
Expand All @@ -1042,7 +1042,7 @@ inline bool starts_with_windows_drive(const CharT* pointer, const CharT* last) n

// Check url's pathname has Windows drive, i.e. starts with "/C:/" or is "/C:"
// see also: detail::starts_with_windows_drive
inline bool pathname_has_windows_os_drive(string_view pathname) noexcept {
constexpr bool pathname_has_windows_os_drive(string_view pathname) noexcept {
return
(pathname.length() == 3 || (pathname.length() > 3 && is_windows_slash(pathname[3]))) &&
is_windows_slash(pathname[0]) &&
Expand Down Expand Up @@ -2319,11 +2319,11 @@ inline void url_parser::parse_path(url_serializer& urls, const CharT* first, con
// path state; includes:
// 1. [ (/,\) - 1, 2, 3, 4 - [ 1 (if first segment), 2 ] ]
// 2. [ 1 ... 4 ]
static const auto escaped_dot = [](const CharT* const pointer) -> bool {
static constexpr auto escaped_dot = [](const CharT* const pointer) constexpr -> bool {
// "%2e" or "%2E"
return pointer[0] == '%' && pointer[1] == '2' && (pointer[2] | 0x20) == 'e';
};
static const auto double_dot = [](const CharT* const pointer, const std::size_t len) -> bool {
static constexpr auto double_dot = [](const CharT* const pointer, const std::size_t len) constexpr -> bool {
switch (len) {
case 2: // ".."
return pointer[0] == '.' && pointer[1] == '.';
Expand All @@ -2336,7 +2336,7 @@ inline void url_parser::parse_path(url_serializer& urls, const CharT* first, con
return false;
}
};
static const auto single_dot = [](const CharT* const pointer, const std::size_t len) -> bool {
static constexpr auto single_dot = [](const CharT* const pointer, const std::size_t len) constexpr -> bool {
switch (len) {
case 1: return pointer[0] == '.';
case 3: return escaped_dot(pointer); // "%2e"
Expand Down Expand Up @@ -3031,7 +3031,7 @@ inline const CharT* is_unc_path(const CharT* first, const CharT* last)
/// @param[in] is_slash function to check char is slash (or backslash)
/// @return true if path contains ".." segment
template <typename CharT, typename IsSlash>
inline bool has_dot_dot_segment(const CharT* first, const CharT* last, IsSlash is_slash) {
constexpr bool has_dot_dot_segment(const CharT* first, const CharT* last, IsSlash is_slash) {
if (last - first >= 2) {
const auto* ptr = first;
const auto* end = last - 1;
Expand Down
14 changes: 7 additions & 7 deletions include/upa/url_percent_encode.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,37 +297,37 @@ inline constexpr code_points_multiset code_points;
// Check char is in predefined set

template <typename CharT>
inline bool is_char_in_set(CharT c, const code_point_set& cpset) {
constexpr bool is_char_in_set(CharT c, const code_point_set& cpset) {
return cpset[c];
}

template <typename CharT>
inline bool is_ipv4_char(CharT c) {
constexpr bool is_ipv4_char(CharT c) {
return code_points.char_in_set(c, IPV4_CHAR_SET);
}

template <typename CharT>
inline bool is_hex_char(CharT c) {
constexpr bool is_hex_char(CharT c) {
return code_points.char_in_set(c, HEX_DIGIT_SET);
}

template <typename CharT>
inline bool is_scheme_char(CharT c) {
constexpr bool is_scheme_char(CharT c) {
return code_points.char_in_set(c, SCHEME_SET);
}

template <typename CharT>
inline bool is_forbidden_domain_char(CharT c) {
constexpr bool is_forbidden_domain_char(CharT c) {
return code_points.char_in_set(c, DOMAIN_FORBIDDEN_SET);
}

template <typename CharT>
inline bool is_forbidden_host_char(CharT c) {
constexpr bool is_forbidden_host_char(CharT c) {
return code_points.char_in_set(c, HOST_FORBIDDEN_SET);
}

template <typename CharT>
inline bool is_ascii_domain_char(CharT c) {
constexpr bool is_ascii_domain_char(CharT c) {
return code_points.char_in_set(c, ASCII_DOMAIN_SET);
}

Expand Down
6 changes: 3 additions & 3 deletions include/upa/url_result.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,11 @@ struct result_value {
T value{};
R result{};

result_value(R res) noexcept
constexpr result_value(R res) noexcept
: result(res) {}
result_value(R res, T val) noexcept
constexpr result_value(R res, T val) noexcept
: value(val), result(res) {}
[[nodiscard]] operator R() const noexcept {
[[nodiscard]] constexpr operator R() const noexcept {
return result;
}
};
Expand Down
49 changes: 35 additions & 14 deletions include/upa/url_utf.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,25 @@
// Distributed under the BSD-style license that can be
// found in the LICENSE file.
//
// This file contains portions of modified code from the ICU project.
// Copyright (c) 2016-2023 Unicode, Inc.
//

#ifndef UPA_URL_UTF_H
#define UPA_URL_UTF_H

#include "url_result.h"
#include <cstdint> // uint8_t, uint32_t
#include <string>
#include <string_view>


namespace upa {

class url_utf {
public:
template <typename CharT>
static detail::result_value<uint32_t> read_utf_char(const CharT*& first, const CharT* last) noexcept;
static constexpr detail::result_value<uint32_t> read_utf_char(const CharT*& first, const CharT* last) noexcept;

template <typename CharT>
static void read_char_append_utf8(const CharT*& it, const CharT* last, std::string& output);
Expand All @@ -40,13 +44,30 @@ class url_utf {
static int compare_by_code_units(const char* first1, const char* last1, const char* first2, const char* last2) noexcept;
protected:
// low level
static bool read_code_point(const char*& first, const char* last, uint32_t& code_point) noexcept;
static bool read_code_point(const char16_t*& first, const char16_t* last, uint32_t& code_point) noexcept;
static bool read_code_point(const char32_t*& first, const char32_t* last, uint32_t& code_point) noexcept;
static constexpr bool read_code_point(const char*& first, const char* last, uint32_t& code_point) noexcept;
static constexpr bool read_code_point(const char16_t*& first, const char16_t* last, uint32_t& code_point) noexcept;
static constexpr bool read_code_point(const char32_t*& first, const char32_t* last, uint32_t& code_point) noexcept;
private:
const static char kReplacementCharUtf8[];
const static uint8_t k_U8_LEAD3_T1_BITS[16];
const static uint8_t k_U8_LEAD4_T1_BITS[16];
// Replacement character (U+FFFD)
static inline constexpr std::string_view kReplacementCharUtf8{ "\xEF\xBF\xBD" };

// Following two arrays have values from corresponding macros in ICU 74.1 library's
// include\unicode\utf8.h file.

// Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
// Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
// Lead byte E0..EF bits 3..0 are used as byte index,
// first trail byte bits 7..5 are used as bit index into that byte.
static inline constexpr uint8_t k_U8_LEAD3_T1_BITS[16] = {
0x20, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x10, 0x30, 0x30
};
// Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
// Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
// First trail byte bits 7..4 are used as byte index,
// lead byte F0..F4 bits 2..0 are used as bit index into that byte.
static inline constexpr uint8_t k_U8_LEAD4_T1_BITS[16] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x0F, 0x0F, 0x0F, 0x00, 0x00, 0x00, 0x00
};
};


Expand All @@ -69,9 +90,9 @@ class url_utf {
// and advances `first` to point to the next character.

template <typename CharT>
inline detail::result_value<uint32_t> url_utf::read_utf_char(const CharT*& first, const CharT* last) noexcept {
constexpr detail::result_value<uint32_t> url_utf::read_utf_char(const CharT*& first, const CharT* last) noexcept {
// read_code_point always initializes code_point
uint32_t code_point; // NOLINT(cppcoreguidelines-init-variables)
uint32_t code_point{};
if (read_code_point(first, last, code_point))
return { true, code_point };
return { false, 0xFFFD }; // REPLACEMENT CHARACTER
Expand All @@ -95,7 +116,7 @@ inline void url_utf::read_char_append_utf8(const char*& it, const char* last, st
if (read_code_point(it, last, code_point))
output.append(start, it);
else
output.append(static_cast<const char*>(kReplacementCharUtf8));
output.append(kReplacementCharUtf8);
}

// ------------------------------------------------------------------------
Expand All @@ -110,7 +131,7 @@ inline void url_utf::read_char_append_utf8(const char*& it, const char* last, st

// Modified version of the U8_INTERNAL_NEXT_OR_SUB macro in utf8.h from ICU

inline bool url_utf::read_code_point(const char*& first, const char* last, uint32_t& c) noexcept {
constexpr bool url_utf::read_code_point(const char*& first, const char* last, uint32_t& c) noexcept {
c = static_cast<uint8_t>(*first++);
if (c & 0x80) {
uint8_t tmp = 0;
Expand Down Expand Up @@ -179,15 +200,15 @@ namespace detail {
// Get a supplementary code point value (U+10000..U+10ffff)
// from its lead and trail surrogates.
// Based on U16_GET_SUPPLEMENTARY in utf16.h from ICU
inline uint32_t u16_get_supplementary(uint32_t lead, uint32_t trail) noexcept {
constexpr uint32_t u16_get_supplementary(uint32_t lead, uint32_t trail) noexcept {
constexpr uint32_t u16_surrogate_offset = (0xd800 << 10UL) + 0xdc00 - 0x10000;
return (lead << 10UL) + trail - u16_surrogate_offset;
}
} // namespace detail

// Modified version of the U16_NEXT_OR_FFFD macro in utf16.h from ICU

inline bool url_utf::read_code_point(const char16_t*& first, const char16_t* last, uint32_t& c) noexcept {
constexpr bool url_utf::read_code_point(const char16_t*& first, const char16_t* last, uint32_t& c) noexcept {
c = *first++;
if (detail::u16_is_surrogate(c)) {
if (detail::u16_is_surrogate_lead(c) && first != last && detail::u16_is_trail(*first)) {
Expand All @@ -201,7 +222,7 @@ inline bool url_utf::read_code_point(const char16_t*& first, const char16_t* las
return true;
}

inline bool url_utf::read_code_point(const char32_t*& first, const char32_t*, uint32_t& c) noexcept {
constexpr bool url_utf::read_code_point(const char32_t*& first, const char32_t*, uint32_t& c) noexcept {
// no conversion
c = *first++;
// don't allow surogates (U+D800..U+DFFF) and too high values
Expand Down
Loading