From 5e7d5c11477cf7aa3e071503dd7a7c9deda14f91 Mon Sep 17 00:00:00 2001 From: MacroModel Date: Mon, 2 Dec 2024 21:15:21 +0800 Subject: [PATCH 1/7] Improved scanning speed of fast_io sto char16_t --- .../integers/sto/sto_contiguous.h | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/include/fast_io_core_impl/integers/sto/sto_contiguous.h b/include/fast_io_core_impl/integers/sto/sto_contiguous.h index b65e87d0..74142827 100644 --- a/include/fast_io_core_impl/integers/sto/sto_contiguous.h +++ b/include/fast_io_core_impl/integers/sto/sto_contiguous.h @@ -620,22 +620,48 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha { ::std::uint_least64_t val; ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); - + val = ::fast_io::little_endian(val); constexpr ::std::uint_least64_t pow_base_sizeof_u64{ ::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), u64_size_of_c16)}; constexpr ::std::uint_least64_t first_bound{0x7fc67fc67fc67fc6 + 0x0001000100010001 * (10 - base_char_type)}; - if (((val + first_bound) | (val - 0x0030003000300030)) & 0x8000800080008000) [[unlikely]] + if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x0030003000300030)) & 0x8000800080008000}; cval) { - break; + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -16}; + + if (!valid_bits) [[unlikely]] + { + goto after_tail; + } + + val <<= 64 - valid_bits; + + ::std::uint_least64_t all_zero{0x0030003000300030}; + + all_zero >>= valid_bits; + + val |= all_zero; + + constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; + + constexpr ::std::uint_least64_t mask{0x000000000000FFFF}; + + val -= 0x0030003000300030; + val = (val * base_char_type) + (val >> 16); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 32) & mask)); + res = static_cast(res * ::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), ctrz_cval / 16) + val); + + first += ctrz_cval / 16; + goto after_tail; } constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; constexpr ::std::uint_least64_t mask{0x000000000000FFFF}; - val -= 0x0030003000300030; + val -= 0x0030003000300030; val = (val * base_char_type) + (val >> 16); val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 32) & mask)); res = static_cast(res * pow_base_sizeof_u64 + val); @@ -825,6 +851,9 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha res *= base_char_type; res += ch; } + +after_tail: + bool overflow{}; if (first != last) [[likely]] { From 42f2b26e0c537b419386d07cf99fbfeb741f9626 Mon Sep 17 00:00:00 2001 From: MacroModel Date: Mon, 2 Dec 2024 21:53:13 +0800 Subject: [PATCH 2/7] update sto --- .../integers/sto/sto_contiguous.h | 369 ++++++++++++++++-- 1 file changed, 330 insertions(+), 39 deletions(-) diff --git a/include/fast_io_core_impl/integers/sto/sto_contiguous.h b/include/fast_io_core_impl/integers/sto/sto_contiguous.h index 74142827..dccfe9bd 100644 --- a/include/fast_io_core_impl/integers/sto/sto_contiguous.h +++ b/include/fast_io_core_impl/integers/sto/sto_contiguous.h @@ -505,12 +505,91 @@ inline constexpr char_type const *skip_digits(char_type const *first, char_type return first; } +template +inline constexpr parse_result compile_time_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, char_type const *last, T &res) noexcept +{ + using unsigned_char_type = ::std::make_unsigned_t; + using unsigned_type = my_make_unsigned_t<::std::remove_cvref_t>; + constexpr unsigned_char_type base_char_type{base}; + constexpr unsigned_type risky_uint_max{static_cast(-1)}; + constexpr unsigned_type risky_value{risky_uint_max / base}; + constexpr unsigned_char_type risky_digit(risky_uint_max % base); + constexpr bool isspecialbase{base == 2 || base == 4 || base == 16}; + constexpr ::std::size_t max_size{details::cal_max_int_size() - (!isspecialbase)}; + ::std::size_t const diff{static_cast<::std::size_t>(last - first)}; + ::std::size_t mn_val{max_size}; + + if (diff < mn_val) + { + mn_val = diff; + } + + auto first_phase_last{first + mn_val}; + + for (; first != first_phase_last; ++first) + { + unsigned_char_type ch{static_cast(*first)}; + if (char_digit_to_literal(ch)) [[unlikely]] + { + break; + } + res *= base_char_type; + res += ch; + } + + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; +} + +template +inline constexpr ::fast_io::freestanding::array generate_pow_table() noexcept +{ + ::fast_io::freestanding::array tmp{}; + for (::std::size_t i{}; i < Arr_size; i++) + { + tmp[i] = ::fast_io::details::compile_time_pow(static_cast(base), i); + } + return tmp; +} + template #if defined(__SSE4_1__) && __has_cpp_attribute(__gnu__::__cold__) && (defined(__x86_64__) || defined(_M_AMD64)) [[__gnu__::__cold__]] #endif -inline constexpr parse_result -scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, char_type const *last, T &res) noexcept +inline parse_result +runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, char_type const *last, T &res) noexcept { using unsigned_char_type = ::std::make_unsigned_t; using unsigned_type = my_make_unsigned_t<::std::remove_cvref_t>; @@ -538,7 +617,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha { if constexpr (!::fast_io::details::is_ebcdic) { - // Decimal: + // Basic algorithm: // https://github.com/fastfloat/fast_float // Copyright(c) 2021 The fast_float authors // @@ -559,13 +638,43 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); val = ::fast_io::little_endian(val); - + constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), sizeof(::std::uint_least64_t))}; constexpr ::std::uint_least64_t first_bound{0x4646464646464646 + 0x0101010101010101 * (10 - base_char_type)}; - if (((val + first_bound) | (val - 0x3030303030303030)) & 0x8080808080808080) [[unlikely]] + if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x3030303030303030)) & 0x8080808080808080}; cval) { - break; + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; + + if (!valid_bits) [[unlikely]] + { + goto after_tail; + } + + val <<= 64 - valid_bits; + + ::std::uint_least64_t all_zero{0x3030303030303030}; + + all_zero >>= valid_bits; + + val |= all_zero; + constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)}; + + constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; + constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; + constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; + val -= 0x3030303030303030; + + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + constexpr auto pow_table{generate_pow_table()}; + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + + first += ctrz_cval / (8 * sizeof(char_type)); + goto after_tail; } constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; @@ -575,8 +684,8 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; - val -= 0x3030303030303030; + val -= 0x3030303030303030; val = (val * base_char_type) + (val >> 8); val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; res = static_cast(res * pow_base_sizeof_u64 + val); @@ -590,19 +699,51 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha { ::std::uint_least32_t val; ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); - + val = ::fast_io::little_endian(val); constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), sizeof(::std::uint_least32_t))}; constexpr ::std::uint_least32_t first_bound{0x46464646 + 0x01010101 * (10 - base_char_type)}; - if (!(((val + first_bound) | (val - 0x30303030)) & 0x80808080)) [[likely]] + if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) { + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; + + if (!valid_bits) [[unlikely]] + { + goto after_tail; + } + + val <<= 32 - valid_bits; + + ::std::uint_least32_t all_zero{0x30303030}; + + all_zero >>= valid_bits; + + val |= all_zero; + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; constexpr ::std::uint_least32_t mask{0x000000FF}; + val -= 0x30303030; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + constexpr auto pow_table{generate_pow_table()}; + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + + first += ctrz_cval / (8 * sizeof(char_type)); + goto after_tail; + } + else + { + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; + + constexpr ::std::uint_least32_t mask{0x000000FF}; + + val -= 0x30303030; val = (val * base_char_type) + (val >> 8); val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); res = static_cast(res * pow_base_sizeof_u32 + val); @@ -652,9 +793,11 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha val -= 0x0030003000300030; val = (val * base_char_type) + (val >> 16); val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 32) & mask)); - res = static_cast(res * ::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), ctrz_cval / 16) + val); + constexpr auto pow_table{generate_pow_table()}; + + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); - first += ctrz_cval / 16; + first += ctrz_cval / (8 * sizeof(char_type)); goto after_tail; } constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; @@ -680,21 +823,55 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha { ::std::uint_least64_t val; ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); - + val = ::fast_io::little_endian(val); constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), sizeof(::std::uint_least64_t))}; constexpr ::std::uint_least64_t first_bound1{0x3939393939393939 + 0x0101010101010101 * (16 - base_char_type)}; constexpr ::std::uint_least64_t first_bound2{0x1919191919191919 + 0x0101010101010101 * (16 - base_char_type)}; - if (((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & - ((val + first_bound1) | (val - 0x4040404040404040)) & - ((val + first_bound2) | (val - 0x6060606060606060))) | - ~(((val + 0x3f3f3f3f3f3f3f3f) | (val - 0x4040404040404040)) & - ((val + 0x1f1f1f1f1f1f1f1f) | (val - 0x6060606060606060)))) & - 0x8080808080808080) [[unlikely]] + if (::std::uint_least64_t const cval{((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & + ((val + first_bound1) | (val - 0x4040404040404040)) & + ((val + first_bound2) | (val - 0x6060606060606060))) | + ~(((val + 0x3f3f3f3f3f3f3f3f) | (val - 0x4040404040404040)) & + ((val + 0x1f1f1f1f1f1f1f1f) | (val - 0x6060606060606060)))) & + 0x8080808080808080}; + cval) { - break; + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; + + if (!valid_bits) [[unlikely]] + { + goto after_tail; + } + + val <<= 64 - valid_bits; + + ::std::uint_least64_t all_zero{0x3030303030303030}; + + all_zero >>= valid_bits; + + val |= all_zero; + constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)}; + + constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; + constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; + constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; + + val -= 0x3030303030303030; + val = (val & 0x0f0f0f0f0f0f0f0f) + ((val & 0x1010101010101010) >> 4) * 9; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + + constexpr auto pow_table{generate_pow_table()}; + + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + + first += ctrz_cval / (8 * sizeof(char_type)); + goto after_tail; } constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; @@ -704,10 +881,9 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; - val -= 0x3030303030303030; + val -= 0x3030303030303030; val = (val & 0x0f0f0f0f0f0f0f0f) + ((val & 0x1010101010101010) >> 4) * 9; - val = (val * base_char_type) + (val >> 8); val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; res = static_cast(res * pow_base_sizeof_u64 + val); @@ -731,20 +907,54 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha constexpr ::std::uint_least32_t first_bound1{0x39393939 + 0x01010101 * (16 - base_char_type)}; constexpr ::std::uint_least32_t first_bound2{0x19191919 + 0x01010101 * (16 - base_char_type)}; - if (!(((((val + 0x46464646) | (val - 0x30303030)) & - ((val + first_bound1) | (val - 0x40404040)) & - ((val + first_bound2) | (val - 0x60606060))) | - ~(((val + 0x3f3f3f3f) | (val - 0x40404040)) & - ((val + 0x1f1f1f1f) | (val - 0x60606060)))) & - 0x80808080)) [[likely]] + if (::std::uint_least32_t const cval{((((val + 0x46464646) | (val - 0x30303030)) & + ((val + first_bound1) | (val - 0x40404040)) & + ((val + first_bound2) | (val - 0x60606060))) | + ~(((val + 0x3f3f3f3f) | (val - 0x40404040)) & + ((val + 0x1f1f1f1f) | (val - 0x60606060)))) & + 0x80808080}; + cval) { + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; + + if (!valid_bits) [[unlikely]] + { + goto after_tail; + } + + val <<= 32 - valid_bits; + + ::std::uint_least32_t all_zero{0x30303030}; + + all_zero >>= valid_bits; + + val |= all_zero; + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; constexpr ::std::uint_least32_t mask{0x000000FF}; - val -= 0x30303030; + val -= 0x30303030; val = (val & 0x0f0f0f0f) + ((val & 0x10101010) >> 4) * 9; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + + constexpr auto pow_table{generate_pow_table()}; + + res = static_cast(res * pow_table[ ctrz_cval / (8 * sizeof(char_type))] + val); + first += ctrz_cval / (8 * sizeof(char_type)); + goto after_tail; + } + else + { + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; + + constexpr ::std::uint_least32_t mask{0x000000FF}; + + val -= 0x30303030; + val = (val & 0x0f0f0f0f) + ((val & 0x10101010) >> 4) * 9; val = (val * base_char_type) + (val >> 8); val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); res = static_cast(res * pow_base_sizeof_u32 + val); @@ -768,22 +978,51 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha { ::std::uint_least32_t val; ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); - + val = ::fast_io::little_endian(val); constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), sizeof(::std::uint_least32_t))}; constexpr ::std::uint_least32_t first_bound{0x46464646 + 0x01010101 * (10 - base_char_type)}; - if (((val + first_bound) | (val - 0x30303030)) & 0x80808080) [[unlikely]] + if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[unlikely]] { - break; + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; + + if (!valid_bits) [[unlikely]] + { + goto after_tail; + } + + val <<= 32 - valid_bits; + + ::std::uint_least32_t all_zero{0x30303030}; + + all_zero >>= valid_bits; + + val |= all_zero; + + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; + + constexpr ::std::uint_least32_t mask{0x000000FF}; + + val -= 0x30303030; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + + constexpr auto pow_table{generate_pow_table()}; + + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + + first += ctrz_cval / (8 * sizeof(char_type)); + goto after_tail; } constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; constexpr ::std::uint_least32_t mask{0x000000FF}; - val -= 0x30303030; + val -= 0x30303030; val = (val * base_char_type) + (val >> 8); val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); res = static_cast(res * pow_base_sizeof_u32 + val); @@ -812,15 +1051,46 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha constexpr ::std::uint_least32_t first_bound1{0x39393939 + 0x01010101 * (16 - base_char_type)}; constexpr ::std::uint_least32_t first_bound2{0x19191919 + 0x01010101 * (16 - base_char_type)}; - if (((((val + 0x46464646) | (val - 0x30303030)) & - ((val + first_bound1) | (val - 0x40404040)) & - ((val + first_bound2) | (val - 0x60606060))) | - ~(((val + 0x3f3f3f3f) | (val - 0x40404040)) & - ((val + 0x1f1f1f1f) | (val - 0x60606060)))) & - 0x80808080) [[unlikely]] + if (::std::uint_least32_t const cval{((((val + 0x46464646) | (val - 0x30303030)) & + ((val + first_bound1) | (val - 0x40404040)) & + ((val + first_bound2) | (val - 0x60606060))) | + ~(((val + 0x3f3f3f3f) | (val - 0x40404040)) & + ((val + 0x1f1f1f1f) | (val - 0x60606060)))) & + 0x80808080}; + cval) { - break; + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; + + if (!valid_bits) [[unlikely]] + { + goto after_tail; + } + + val <<= 32 - valid_bits; + + ::std::uint_least32_t all_zero{0x30303030}; + + all_zero >>= valid_bits; + + val |= all_zero; + + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; + + constexpr ::std::uint_least32_t mask{0x000000FF}; + + val -= 0x30303030; + val = (val & 0x0f0f0f0f) + ((val & 0x10101010) >> 4) * 9; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + + constexpr auto pow_table{generate_pow_table()}; + res = static_cast(res * pow_table[ ctrz_cval / (8 * sizeof(char_type))]+ val); + + first += ctrz_cval / (8 * sizeof(char_type)); + goto after_tail; } + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; constexpr ::std::uint_least32_t mask{0x000000FF}; @@ -890,6 +1160,27 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; } +template +#if defined(__SSE4_1__) && __has_cpp_attribute(__gnu__::__cold__) && (defined(__x86_64__) || defined(_M_AMD64)) +[[__gnu__::__cold__]] +#endif +inline constexpr parse_result +scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, char_type const *last, T &res) noexcept +{ +#if __cpp_if_consteval >= 202106L + if !consteval +#else + if (!__builtin_is_constant_evaluated()) +#endif + { + return runtime_scan_int_contiguous_none_simd_space_part_define_impl(first, last, res); + } + else + { + return compile_time_scan_int_contiguous_none_simd_space_part_define_impl(first, last, res); + } +} + inline constexpr parse_code ongoing_parse_code{static_cast(::std::numeric_limits::max())}; template From 0b3a51a03cff47e7bfa1dcfaa14bb25abe2c1270 Mon Sep 17 00:00:00 2001 From: MacroModel Date: Mon, 2 Dec 2024 21:58:44 +0800 Subject: [PATCH 3/7] update --- .../integers/sto/sto_contiguous.h | 470 +++++++++--------- 1 file changed, 232 insertions(+), 238 deletions(-) diff --git a/include/fast_io_core_impl/integers/sto/sto_contiguous.h b/include/fast_io_core_impl/integers/sto/sto_contiguous.h index dccfe9bd..5d360986 100644 --- a/include/fast_io_core_impl/integers/sto/sto_contiguous.h +++ b/include/fast_io_core_impl/integers/sto/sto_contiguous.h @@ -609,74 +609,50 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi auto first_phase_last{first + mn_val}; -#if __cpp_if_consteval >= 202106L - if !consteval -#else - if (!__builtin_is_constant_evaluated()) -#endif + if constexpr (!::fast_io::details::is_ebcdic) { - if constexpr (!::fast_io::details::is_ebcdic) - { - // Basic algorithm: - // https://github.com/fastfloat/fast_float - // Copyright(c) 2021 The fast_float authors - // - // Binary to Hexadecimal: - // Copyright(c) 2024 MacroModel + // Basic algorithm: + // https://github.com/fastfloat/fast_float + // Copyright(c) 2021 The fast_float authors + // + // Binary to Hexadecimal: + // Copyright(c) 2024 MacroModel - if constexpr (sizeof(::std::uint_least32_t) < sizeof(::std::size_t)) + if constexpr (sizeof(::std::uint_least32_t) < sizeof(::std::size_t)) + { + if constexpr (base_char_type <= 10) { - if constexpr (base_char_type <= 10) + if constexpr (sizeof(char_type) == sizeof(char8_t)) { - if constexpr (sizeof(char_type) == sizeof(char8_t)) + if constexpr (max_size >= sizeof(::std::uint_least64_t)) { - if constexpr (max_size >= sizeof(::std::uint_least64_t)) + while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) { - while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) - { - ::std::uint_least64_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); - - val = ::fast_io::little_endian(val); - - constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), sizeof(::std::uint_least64_t))}; - constexpr ::std::uint_least64_t first_bound{0x4646464646464646 + 0x0101010101010101 * (10 - base_char_type)}; - - if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x3030303030303030)) & 0x8080808080808080}; cval) - { - auto const ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -8}; - - if (!valid_bits) [[unlikely]] - { - goto after_tail; - } + ::std::uint_least64_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); - val <<= 64 - valid_bits; + val = ::fast_io::little_endian(val); - ::std::uint_least64_t all_zero{0x3030303030303030}; + constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), sizeof(::std::uint_least64_t))}; + constexpr ::std::uint_least64_t first_bound{0x4646464646464646 + 0x0101010101010101 * (10 - base_char_type)}; - all_zero >>= valid_bits; + if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x3030303030303030)) & 0x8080808080808080}; cval) + { + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; - val |= all_zero; - constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; - constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)}; - constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)}; + if (!valid_bits) [[unlikely]] + { + goto after_tail; + } - constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; - constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; - constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; - val -= 0x3030303030303030; + val <<= 64 - valid_bits; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; - constexpr auto pow_table{generate_pow_table()}; - res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + ::std::uint_least64_t all_zero{0x3030303030303030}; - first += ctrz_cval / (8 * sizeof(char_type)); - goto after_tail; - } + all_zero >>= valid_bits; + val |= all_zero; constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)}; constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)}; @@ -684,122 +660,126 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; - val -= 0x3030303030303030; + val = (val * base_char_type) + (val >> 8); val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; - res = static_cast(res * pow_base_sizeof_u64 + val); - first += sizeof(::std::uint_least64_t); + constexpr auto pow_table{generate_pow_table()}; + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + + first += ctrz_cval / (8 * sizeof(char_type)); + goto after_tail; } + + constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)}; + + constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; + constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; + constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; + + val -= 0x3030303030303030; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + res = static_cast(res * pow_base_sizeof_u64 + val); + first += sizeof(::std::uint_least64_t); } + } - if constexpr (max_size >= sizeof(::std::uint_least32_t)) + if constexpr (max_size >= sizeof(::std::uint_least32_t)) + { + if (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) { - if (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) - { - ::std::uint_least32_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); + ::std::uint_least32_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); - val = ::fast_io::little_endian(val); + val = ::fast_io::little_endian(val); - constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), sizeof(::std::uint_least32_t))}; - constexpr ::std::uint_least32_t first_bound{0x46464646 + 0x01010101 * (10 - base_char_type)}; + constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), sizeof(::std::uint_least32_t))}; + constexpr ::std::uint_least32_t first_bound{0x46464646 + 0x01010101 * (10 - base_char_type)}; - if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) - { - auto const ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -8}; + if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) + { + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; - if (!valid_bits) [[unlikely]] - { - goto after_tail; - } + if (!valid_bits) [[unlikely]] + { + goto after_tail; + } - val <<= 32 - valid_bits; + val <<= 32 - valid_bits; - ::std::uint_least32_t all_zero{0x30303030}; + ::std::uint_least32_t all_zero{0x30303030}; - all_zero >>= valid_bits; + all_zero >>= valid_bits; - val |= all_zero; + val |= all_zero; - constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; - constexpr ::std::uint_least32_t mask{0x000000FF}; + constexpr ::std::uint_least32_t mask{0x000000FF}; - val -= 0x30303030; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + val -= 0x30303030; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); - constexpr auto pow_table{generate_pow_table()}; - res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + constexpr auto pow_table{generate_pow_table()}; + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); - first += ctrz_cval / (8 * sizeof(char_type)); - goto after_tail; - } - else - { - constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; + first += ctrz_cval / (8 * sizeof(char_type)); + goto after_tail; + } + else + { + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; - constexpr ::std::uint_least32_t mask{0x000000FF}; + constexpr ::std::uint_least32_t mask{0x000000FF}; - val -= 0x30303030; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); - res = static_cast(res * pow_base_sizeof_u32 + val); - first += sizeof(::std::uint_least32_t); - } + val -= 0x30303030; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + res = static_cast(res * pow_base_sizeof_u32 + val); + first += sizeof(::std::uint_least32_t); } } } - else if constexpr (sizeof(char_type) == sizeof(char16_t)) + } + else if constexpr (sizeof(char_type) == sizeof(char16_t)) + { + constexpr ::std::size_t u64_size_of_c16{sizeof(::std::uint_least64_t) / sizeof(char16_t)}; + if constexpr (max_size >= u64_size_of_c16) { - constexpr ::std::size_t u64_size_of_c16{sizeof(::std::uint_least64_t) / sizeof(char16_t)}; - if constexpr (max_size >= u64_size_of_c16) + while (static_cast<::std::size_t>(first_phase_last - first) >= u64_size_of_c16) { - while (static_cast<::std::size_t>(first_phase_last - first) >= u64_size_of_c16) - { - ::std::uint_least64_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); - - val = ::fast_io::little_endian(val); - - constexpr ::std::uint_least64_t pow_base_sizeof_u64{ - ::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), u64_size_of_c16)}; - constexpr ::std::uint_least64_t first_bound{0x7fc67fc67fc67fc6 + 0x0001000100010001 * (10 - base_char_type)}; - - if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x0030003000300030)) & 0x8000800080008000}; cval) - { - auto const ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -16}; - - if (!valid_bits) [[unlikely]] - { - goto after_tail; - } + ::std::uint_least64_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); - val <<= 64 - valid_bits; + val = ::fast_io::little_endian(val); - ::std::uint_least64_t all_zero{0x0030003000300030}; + constexpr ::std::uint_least64_t pow_base_sizeof_u64{ + ::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), u64_size_of_c16)}; + constexpr ::std::uint_least64_t first_bound{0x7fc67fc67fc67fc6 + 0x0001000100010001 * (10 - base_char_type)}; - all_zero >>= valid_bits; + if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x0030003000300030)) & 0x8000800080008000}; cval) + { + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -16}; - val |= all_zero; + if (!valid_bits) [[unlikely]] + { + goto after_tail; + } - constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; + val <<= 64 - valid_bits; - constexpr ::std::uint_least64_t mask{0x000000000000FFFF}; + ::std::uint_least64_t all_zero{0x0030003000300030}; - val -= 0x0030003000300030; - val = (val * base_char_type) + (val >> 16); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 32) & mask)); - constexpr auto pow_table{generate_pow_table()}; + all_zero >>= valid_bits; - res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + val |= all_zero; - first += ctrz_cval / (8 * sizeof(char_type)); - goto after_tail; - } constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; constexpr ::std::uint_least64_t mask{0x000000000000FFFF}; @@ -807,73 +787,66 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi val -= 0x0030003000300030; val = (val * base_char_type) + (val >> 16); val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 32) & mask)); - res = static_cast(res * pow_base_sizeof_u64 + val); - first += u64_size_of_c16; + constexpr auto pow_table{generate_pow_table()}; + + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + + first += ctrz_cval / (8 * sizeof(char_type)); + goto after_tail; } + constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; + + constexpr ::std::uint_least64_t mask{0x000000000000FFFF}; + + val -= 0x0030003000300030; + val = (val * base_char_type) + (val >> 16); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 32) & mask)); + res = static_cast(res * pow_base_sizeof_u64 + val); + first += u64_size_of_c16; } } } - else if constexpr (base_char_type <= 16) + } + else if constexpr (base_char_type <= 16) + { + if constexpr (sizeof(char_type) == sizeof(char8_t)) { - if constexpr (sizeof(char_type) == sizeof(char8_t)) + if constexpr (max_size >= sizeof(::std::uint_least64_t)) { - if constexpr (max_size >= sizeof(::std::uint_least64_t)) + while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) { - while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) + ::std::uint_least64_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); + + val = ::fast_io::little_endian(val); + + constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), sizeof(::std::uint_least64_t))}; + constexpr ::std::uint_least64_t first_bound1{0x3939393939393939 + 0x0101010101010101 * (16 - base_char_type)}; + constexpr ::std::uint_least64_t first_bound2{0x1919191919191919 + 0x0101010101010101 * (16 - base_char_type)}; + + if (::std::uint_least64_t const cval{((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & + ((val + first_bound1) | (val - 0x4040404040404040)) & + ((val + first_bound2) | (val - 0x6060606060606060))) | + ~(((val + 0x3f3f3f3f3f3f3f3f) | (val - 0x4040404040404040)) & + ((val + 0x1f1f1f1f1f1f1f1f) | (val - 0x6060606060606060)))) & + 0x8080808080808080}; + cval) { - ::std::uint_least64_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); - - val = ::fast_io::little_endian(val); + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; - constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), sizeof(::std::uint_least64_t))}; - constexpr ::std::uint_least64_t first_bound1{0x3939393939393939 + 0x0101010101010101 * (16 - base_char_type)}; - constexpr ::std::uint_least64_t first_bound2{0x1919191919191919 + 0x0101010101010101 * (16 - base_char_type)}; - - if (::std::uint_least64_t const cval{((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & - ((val + first_bound1) | (val - 0x4040404040404040)) & - ((val + first_bound2) | (val - 0x6060606060606060))) | - ~(((val + 0x3f3f3f3f3f3f3f3f) | (val - 0x4040404040404040)) & - ((val + 0x1f1f1f1f1f1f1f1f) | (val - 0x6060606060606060)))) & - 0x8080808080808080}; - cval) + if (!valid_bits) [[unlikely]] { - auto const ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -8}; - - if (!valid_bits) [[unlikely]] - { - goto after_tail; - } - - val <<= 64 - valid_bits; - - ::std::uint_least64_t all_zero{0x3030303030303030}; - - all_zero >>= valid_bits; - - val |= all_zero; - constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; - constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)}; - constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)}; - - constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; - constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; - constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; - - val -= 0x3030303030303030; - val = (val & 0x0f0f0f0f0f0f0f0f) + ((val & 0x1010101010101010) >> 4) * 9; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + goto after_tail; + } - constexpr auto pow_table{generate_pow_table()}; + val <<= 64 - valid_bits; - res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + ::std::uint_least64_t all_zero{0x3030303030303030}; - first += ctrz_cval / (8 * sizeof(char_type)); - goto after_tail; - } + all_zero >>= valid_bits; + val |= all_zero; constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)}; constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)}; @@ -886,10 +859,31 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi val = (val & 0x0f0f0f0f0f0f0f0f) + ((val & 0x1010101010101010) >> 4) * 9; val = (val * base_char_type) + (val >> 8); val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; - res = static_cast(res * pow_base_sizeof_u64 + val); - first += sizeof(::std::uint_least64_t); + + constexpr auto pow_table{generate_pow_table()}; + + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + + first += ctrz_cval / (8 * sizeof(char_type)); + goto after_tail; } + + constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)}; + + constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; + constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; + constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; + + val -= 0x3030303030303030; + val = (val & 0x0f0f0f0f0f0f0f0f) + ((val & 0x1010101010101010) >> 4) * 9; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + res = static_cast(res * pow_base_sizeof_u64 + val); + first += sizeof(::std::uint_least64_t); } + } // It seems that it is unnecessary to read 4-bit data in base11-hexadecimal system, // and the time for 4-bit data reading of this algorithm even exceeds 4 times of cyclic reading @@ -963,60 +957,44 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi } } #endif - } } } - else if constexpr (sizeof(::std::uint_least16_t) < sizeof(::std::size_t)) + } + else if constexpr (sizeof(::std::uint_least16_t) < sizeof(::std::size_t)) + { + if constexpr (base_char_type <= 10) { - if constexpr (base_char_type <= 10) + if constexpr (sizeof(char_type) == sizeof(char8_t)) { - if constexpr (sizeof(char_type) == sizeof(char8_t)) + if constexpr (max_size >= sizeof(::std::uint_least32_t)) { - if constexpr (max_size >= sizeof(::std::uint_least32_t)) + while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) { - while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) - { - ::std::uint_least32_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); - - val = ::fast_io::little_endian(val); - - constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), sizeof(::std::uint_least32_t))}; - constexpr ::std::uint_least32_t first_bound{0x46464646 + 0x01010101 * (10 - base_char_type)}; - - if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[unlikely]] - { - auto const ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -8}; + ::std::uint_least32_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); - if (!valid_bits) [[unlikely]] - { - goto after_tail; - } + val = ::fast_io::little_endian(val); - val <<= 32 - valid_bits; + constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), sizeof(::std::uint_least32_t))}; + constexpr ::std::uint_least32_t first_bound{0x46464646 + 0x01010101 * (10 - base_char_type)}; - ::std::uint_least32_t all_zero{0x30303030}; - - all_zero >>= valid_bits; - - val |= all_zero; - - constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; + if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[unlikely]] + { + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; - constexpr ::std::uint_least32_t mask{0x000000FF}; + if (!valid_bits) [[unlikely]] + { + goto after_tail; + } - val -= 0x30303030; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + val <<= 32 - valid_bits; - constexpr auto pow_table{generate_pow_table()}; + ::std::uint_least32_t all_zero{0x30303030}; - res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + all_zero >>= valid_bits; - first += ctrz_cval / (8 * sizeof(char_type)); - goto after_tail; - } + val |= all_zero; constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; @@ -1025,18 +1003,34 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi val -= 0x30303030; val = (val * base_char_type) + (val >> 8); val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); - res = static_cast(res * pow_base_sizeof_u32 + val); - first += sizeof(::std::uint_least32_t); + + constexpr auto pow_table{generate_pow_table()}; + + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + + first += ctrz_cval / (8 * sizeof(char_type)); + goto after_tail; } + + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; + + constexpr ::std::uint_least32_t mask{0x000000FF}; + + val -= 0x30303030; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + res = static_cast(res * pow_base_sizeof_u32 + val); + first += sizeof(::std::uint_least32_t); } } } - else if constexpr (base_char_type <= 16) + } + else if constexpr (base_char_type <= 16) + { + if constexpr (sizeof(char_type) == sizeof(char8_t)) { - if constexpr (sizeof(char_type) == sizeof(char8_t)) - { - // It seems that it is unnecessary to read 4-bit data in base11-hexadecimal system, - // and the time for 4-bit data reading of this algorithm even exceeds 4 times of cyclic reading + // It seems that it is unnecessary to read 4-bit data in base11-hexadecimal system, + // and the time for 4-bit data reading of this algorithm even exceeds 4 times of cyclic reading #if 0 if constexpr (max_size >= sizeof(::std::uint_least32_t)) { @@ -1105,12 +1099,12 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi } } #endif - } } } } } + for (; first != first_phase_last; ++first) { unsigned_char_type ch{static_cast(*first)}; @@ -1122,7 +1116,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi res += ch; } -after_tail: +[[maybe_unused]] after_tail: bool overflow{}; if (first != last) [[likely]] From 469d0f6109f50713f51ecc2ad41e9b3781fa028f Mon Sep 17 00:00:00 2001 From: MacroModel Date: Mon, 2 Dec 2024 22:09:58 +0800 Subject: [PATCH 4/7] Dead mother msvc, who is a piece of shit but makes my code become shit https://github.com/cppfastio/fast_io/actions/runs/12120884733/job/33790588817?pr=1001 --- .../integers/sto/sto_contiguous.h | 1020 +++++++++++------ 1 file changed, 686 insertions(+), 334 deletions(-) diff --git a/include/fast_io_core_impl/integers/sto/sto_contiguous.h b/include/fast_io_core_impl/integers/sto/sto_contiguous.h index 5d360986..bf2eaffd 100644 --- a/include/fast_io_core_impl/integers/sto/sto_contiguous.h +++ b/include/fast_io_core_impl/integers/sto/sto_contiguous.h @@ -505,74 +505,6 @@ inline constexpr char_type const *skip_digits(char_type const *first, char_type return first; } -template -inline constexpr parse_result compile_time_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, char_type const *last, T &res) noexcept -{ - using unsigned_char_type = ::std::make_unsigned_t; - using unsigned_type = my_make_unsigned_t<::std::remove_cvref_t>; - constexpr unsigned_char_type base_char_type{base}; - constexpr unsigned_type risky_uint_max{static_cast(-1)}; - constexpr unsigned_type risky_value{risky_uint_max / base}; - constexpr unsigned_char_type risky_digit(risky_uint_max % base); - constexpr bool isspecialbase{base == 2 || base == 4 || base == 16}; - constexpr ::std::size_t max_size{details::cal_max_int_size() - (!isspecialbase)}; - ::std::size_t const diff{static_cast<::std::size_t>(last - first)}; - ::std::size_t mn_val{max_size}; - - if (diff < mn_val) - { - mn_val = diff; - } - - auto first_phase_last{first + mn_val}; - - for (; first != first_phase_last; ++first) - { - unsigned_char_type ch{static_cast(*first)}; - if (char_digit_to_literal(ch)) [[unlikely]] - { - break; - } - res *= base_char_type; - res += ch; - } - - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; -} - template inline constexpr ::fast_io::freestanding::array generate_pow_table() noexcept { @@ -588,8 +520,8 @@ template #if defined(__SSE4_1__) && __has_cpp_attribute(__gnu__::__cold__) && (defined(__x86_64__) || defined(_M_AMD64)) [[__gnu__::__cold__]] #endif -inline parse_result -runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, char_type const *last, T &res) noexcept +inline constexpr parse_result +scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, char_type const *last, T &res) noexcept { using unsigned_char_type = ::std::make_unsigned_t; using unsigned_type = my_make_unsigned_t<::std::remove_cvref_t>; @@ -608,51 +540,141 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi } auto first_phase_last{first + mn_val}; - - if constexpr (!::fast_io::details::is_ebcdic) +#if __cpp_if_consteval >= 202106L + if !consteval +#else + if (!__builtin_is_constant_evaluated()) +#endif { - // Basic algorithm: - // https://github.com/fastfloat/fast_float - // Copyright(c) 2021 The fast_float authors - // - // Binary to Hexadecimal: - // Copyright(c) 2024 MacroModel - - if constexpr (sizeof(::std::uint_least32_t) < sizeof(::std::size_t)) + if constexpr (!::fast_io::details::is_ebcdic) { - if constexpr (base_char_type <= 10) + // Inspired by: + // https://github.com/fastfloat/fast_float + // Copyright(c) 2021 The fast_float authors + // + // Binary to Hexadecimal: + // Copyright(c) 2024 MacroModel + + if constexpr (sizeof(::std::uint_least32_t) < sizeof(::std::size_t)) { - if constexpr (sizeof(char_type) == sizeof(char8_t)) + if constexpr (base_char_type <= 10) { - if constexpr (max_size >= sizeof(::std::uint_least64_t)) + if constexpr (sizeof(char_type) == sizeof(char8_t)) { - while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) + if constexpr (max_size >= sizeof(::std::uint_least64_t)) { - ::std::uint_least64_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); - - val = ::fast_io::little_endian(val); + while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) + { + ::std::uint_least64_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); - constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), sizeof(::std::uint_least64_t))}; - constexpr ::std::uint_least64_t first_bound{0x4646464646464646 + 0x0101010101010101 * (10 - base_char_type)}; + val = ::fast_io::little_endian(val); - if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x3030303030303030)) & 0x8080808080808080}; cval) - { - auto const ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -8}; + constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), sizeof(::std::uint_least64_t))}; + constexpr ::std::uint_least64_t first_bound{0x4646464646464646 + 0x0101010101010101 * (10 - base_char_type)}; - if (!valid_bits) [[unlikely]] + if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x3030303030303030)) & 0x8080808080808080}; cval) { - goto after_tail; - } + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; + + if (!valid_bits) [[unlikely]] + { + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + } - val <<= 64 - valid_bits; + val <<= 64 - valid_bits; - ::std::uint_least64_t all_zero{0x3030303030303030}; + ::std::uint_least64_t all_zero{0x3030303030303030}; - all_zero >>= valid_bits; + all_zero >>= valid_bits; + + val |= all_zero; + constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)}; + + constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; + constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; + constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; + val -= 0x3030303030303030; + + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + constexpr auto pow_table{generate_pow_table()}; + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + + first += ctrz_cval / (8 * sizeof(char_type)); + + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + } - val |= all_zero; constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)}; constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)}; @@ -660,126 +682,257 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; - val -= 0x3030303030303030; + val -= 0x3030303030303030; val = (val * base_char_type) + (val >> 8); val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; - constexpr auto pow_table{generate_pow_table()}; - res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); - - first += ctrz_cval / (8 * sizeof(char_type)); - goto after_tail; + res = static_cast(res * pow_base_sizeof_u64 + val); + first += sizeof(::std::uint_least64_t); } - - constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; - constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)}; - constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)}; - - constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; - constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; - constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; - - val -= 0x3030303030303030; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; - res = static_cast(res * pow_base_sizeof_u64 + val); - first += sizeof(::std::uint_least64_t); } - } - if constexpr (max_size >= sizeof(::std::uint_least32_t)) - { - if (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) + if constexpr (max_size >= sizeof(::std::uint_least32_t)) { - ::std::uint_least32_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); - - val = ::fast_io::little_endian(val); + if (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) + { + ::std::uint_least32_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); - constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), sizeof(::std::uint_least32_t))}; - constexpr ::std::uint_least32_t first_bound{0x46464646 + 0x01010101 * (10 - base_char_type)}; + val = ::fast_io::little_endian(val); - if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) - { - auto const ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -8}; + constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), sizeof(::std::uint_least32_t))}; + constexpr ::std::uint_least32_t first_bound{0x46464646 + 0x01010101 * (10 - base_char_type)}; - if (!valid_bits) [[unlikely]] + if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) { - goto after_tail; - } + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; - val <<= 32 - valid_bits; + if (!valid_bits) [[unlikely]] + { + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + } - ::std::uint_least32_t all_zero{0x30303030}; + val <<= 32 - valid_bits; - all_zero >>= valid_bits; + ::std::uint_least32_t all_zero{0x30303030}; - val |= all_zero; + all_zero >>= valid_bits; - constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; + val |= all_zero; - constexpr ::std::uint_least32_t mask{0x000000FF}; + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; - val -= 0x30303030; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + constexpr ::std::uint_least32_t mask{0x000000FF}; + + val -= 0x30303030; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); - constexpr auto pow_table{generate_pow_table()}; - res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + constexpr auto pow_table{generate_pow_table()}; + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); - first += ctrz_cval / (8 * sizeof(char_type)); - goto after_tail; - } - else - { - constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; + first += ctrz_cval / (8 * sizeof(char_type)); - constexpr ::std::uint_least32_t mask{0x000000FF}; - val -= 0x30303030; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); - res = static_cast(res * pow_base_sizeof_u32 + val); - first += sizeof(::std::uint_least32_t); + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + } + else + { + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; + + constexpr ::std::uint_least32_t mask{0x000000FF}; + + val -= 0x30303030; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + res = static_cast(res * pow_base_sizeof_u32 + val); + first += sizeof(::std::uint_least32_t); + } } } } - } - else if constexpr (sizeof(char_type) == sizeof(char16_t)) - { - constexpr ::std::size_t u64_size_of_c16{sizeof(::std::uint_least64_t) / sizeof(char16_t)}; - if constexpr (max_size >= u64_size_of_c16) + else if constexpr (sizeof(char_type) == sizeof(char16_t)) { - while (static_cast<::std::size_t>(first_phase_last - first) >= u64_size_of_c16) + constexpr ::std::size_t u64_size_of_c16{sizeof(::std::uint_least64_t) / sizeof(char16_t)}; + if constexpr (max_size >= u64_size_of_c16) { - ::std::uint_least64_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); + while (static_cast<::std::size_t>(first_phase_last - first) >= u64_size_of_c16) + { + ::std::uint_least64_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); - val = ::fast_io::little_endian(val); + val = ::fast_io::little_endian(val); - constexpr ::std::uint_least64_t pow_base_sizeof_u64{ - ::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), u64_size_of_c16)}; - constexpr ::std::uint_least64_t first_bound{0x7fc67fc67fc67fc6 + 0x0001000100010001 * (10 - base_char_type)}; + constexpr ::std::uint_least64_t pow_base_sizeof_u64{ + ::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), u64_size_of_c16)}; + constexpr ::std::uint_least64_t first_bound{0x7fc67fc67fc67fc6 + 0x0001000100010001 * (10 - base_char_type)}; - if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x0030003000300030)) & 0x8000800080008000}; cval) - { - auto const ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -16}; - - if (!valid_bits) [[unlikely]] + if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x0030003000300030)) & 0x8000800080008000}; cval) { - goto after_tail; - } + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -16}; - val <<= 64 - valid_bits; + if (!valid_bits) [[unlikely]] + { + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + } - ::std::uint_least64_t all_zero{0x0030003000300030}; + val <<= 64 - valid_bits; - all_zero >>= valid_bits; + ::std::uint_least64_t all_zero{0x0030003000300030}; - val |= all_zero; + all_zero >>= valid_bits; + val |= all_zero; + + constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; + + constexpr ::std::uint_least64_t mask{0x000000000000FFFF}; + + val -= 0x0030003000300030; + val = (val * base_char_type) + (val >> 16); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 32) & mask)); + constexpr auto pow_table{generate_pow_table()}; + + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + + first += ctrz_cval / (8 * sizeof(char_type)); + + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + } constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; constexpr ::std::uint_least64_t mask{0x000000000000FFFF}; @@ -787,66 +940,140 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi val -= 0x0030003000300030; val = (val * base_char_type) + (val >> 16); val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 32) & mask)); - constexpr auto pow_table{generate_pow_table()}; - - res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); - - first += ctrz_cval / (8 * sizeof(char_type)); - goto after_tail; + res = static_cast(res * pow_base_sizeof_u64 + val); + first += u64_size_of_c16; } - constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; - - constexpr ::std::uint_least64_t mask{0x000000000000FFFF}; - - val -= 0x0030003000300030; - val = (val * base_char_type) + (val >> 16); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 32) & mask)); - res = static_cast(res * pow_base_sizeof_u64 + val); - first += u64_size_of_c16; } } } - } - else if constexpr (base_char_type <= 16) - { - if constexpr (sizeof(char_type) == sizeof(char8_t)) + else if constexpr (base_char_type <= 16) { - if constexpr (max_size >= sizeof(::std::uint_least64_t)) + if constexpr (sizeof(char_type) == sizeof(char8_t)) { - while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) + if constexpr (max_size >= sizeof(::std::uint_least64_t)) { - ::std::uint_least64_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); - - val = ::fast_io::little_endian(val); - - constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), sizeof(::std::uint_least64_t))}; - constexpr ::std::uint_least64_t first_bound1{0x3939393939393939 + 0x0101010101010101 * (16 - base_char_type)}; - constexpr ::std::uint_least64_t first_bound2{0x1919191919191919 + 0x0101010101010101 * (16 - base_char_type)}; - - if (::std::uint_least64_t const cval{((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & - ((val + first_bound1) | (val - 0x4040404040404040)) & - ((val + first_bound2) | (val - 0x6060606060606060))) | - ~(((val + 0x3f3f3f3f3f3f3f3f) | (val - 0x4040404040404040)) & - ((val + 0x1f1f1f1f1f1f1f1f) | (val - 0x6060606060606060)))) & - 0x8080808080808080}; - cval) + while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) { - auto const ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -8}; + ::std::uint_least64_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); + + val = ::fast_io::little_endian(val); - if (!valid_bits) [[unlikely]] + constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), sizeof(::std::uint_least64_t))}; + constexpr ::std::uint_least64_t first_bound1{0x3939393939393939 + 0x0101010101010101 * (16 - base_char_type)}; + constexpr ::std::uint_least64_t first_bound2{0x1919191919191919 + 0x0101010101010101 * (16 - base_char_type)}; + + if (::std::uint_least64_t const cval{((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & + ((val + first_bound1) | (val - 0x4040404040404040)) & + ((val + first_bound2) | (val - 0x6060606060606060))) | + ~(((val + 0x3f3f3f3f3f3f3f3f) | (val - 0x4040404040404040)) & + ((val + 0x1f1f1f1f1f1f1f1f) | (val - 0x6060606060606060)))) & + 0x8080808080808080}; + cval) { - goto after_tail; - } + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; + + if (!valid_bits) [[unlikely]] + { + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + } + + val <<= 64 - valid_bits; + + ::std::uint_least64_t all_zero{0x3030303030303030}; + + all_zero >>= valid_bits; + + val |= all_zero; + constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)}; + + constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; + constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; + constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; + + val -= 0x3030303030303030; + val = (val & 0x0f0f0f0f0f0f0f0f) + ((val & 0x1010101010101010) >> 4) * 9; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + + constexpr auto pow_table{generate_pow_table()}; - val <<= 64 - valid_bits; + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); - ::std::uint_least64_t all_zero{0x3030303030303030}; + first += ctrz_cval / (8 * sizeof(char_type)); - all_zero >>= valid_bits; + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + } - val |= all_zero; constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)}; constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)}; @@ -859,31 +1086,10 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi val = (val & 0x0f0f0f0f0f0f0f0f) + ((val & 0x1010101010101010) >> 4) * 9; val = (val * base_char_type) + (val >> 8); val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; - - constexpr auto pow_table{generate_pow_table()}; - - res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); - - first += ctrz_cval / (8 * sizeof(char_type)); - goto after_tail; + res = static_cast(res * pow_base_sizeof_u64 + val); + first += sizeof(::std::uint_least64_t); } - - constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; - constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)}; - constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)}; - - constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; - constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; - constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; - - val -= 0x3030303030303030; - val = (val & 0x0f0f0f0f0f0f0f0f) + ((val & 0x1010101010101010) >> 4) * 9; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; - res = static_cast(res * pow_base_sizeof_u64 + val); - first += sizeof(::std::uint_least64_t); } - } // It seems that it is unnecessary to read 4-bit data in base11-hexadecimal system, // and the time for 4-bit data reading of this algorithm even exceeds 4 times of cyclic reading @@ -894,7 +1100,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi { ::std::uint_least32_t val; ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); - + val = ::fast_io::little_endian(val); constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), sizeof(::std::uint_least32_t))}; @@ -914,7 +1120,40 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi if (!valid_bits) [[unlikely]] { - goto after_tail; + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; } val <<= 32 - valid_bits; @@ -936,12 +1175,46 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi constexpr auto pow_table{generate_pow_table()}; - res = static_cast(res * pow_table[ ctrz_cval / (8 * sizeof(char_type))] + val); + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); first += ctrz_cval / (8 * sizeof(char_type)); - goto after_tail; + + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; } - else + else { constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; @@ -957,80 +1230,147 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi } } #endif + } } } - } - else if constexpr (sizeof(::std::uint_least16_t) < sizeof(::std::size_t)) - { - if constexpr (base_char_type <= 10) + else if constexpr (sizeof(::std::uint_least16_t) < sizeof(::std::size_t)) { - if constexpr (sizeof(char_type) == sizeof(char8_t)) + if constexpr (base_char_type <= 10) { - if constexpr (max_size >= sizeof(::std::uint_least32_t)) + if constexpr (sizeof(char_type) == sizeof(char8_t)) { - while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) + if constexpr (max_size >= sizeof(::std::uint_least32_t)) { - ::std::uint_least32_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); - - val = ::fast_io::little_endian(val); + while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) + { + ::std::uint_least32_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); - constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), sizeof(::std::uint_least32_t))}; - constexpr ::std::uint_least32_t first_bound{0x46464646 + 0x01010101 * (10 - base_char_type)}; + val = ::fast_io::little_endian(val); - if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[unlikely]] - { - auto const ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -8}; + constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), sizeof(::std::uint_least32_t))}; + constexpr ::std::uint_least32_t first_bound{0x46464646 + 0x01010101 * (10 - base_char_type)}; - if (!valid_bits) [[unlikely]] + if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[unlikely]] { - goto after_tail; - } + auto const ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; - val <<= 32 - valid_bits; + if (!valid_bits) [[unlikely]] + { + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + } - ::std::uint_least32_t all_zero{0x30303030}; + val <<= 32 - valid_bits; - all_zero >>= valid_bits; + ::std::uint_least32_t all_zero{0x30303030}; - val |= all_zero; + all_zero >>= valid_bits; - constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; + val |= all_zero; - constexpr ::std::uint_least32_t mask{0x000000FF}; + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; - val -= 0x30303030; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + constexpr ::std::uint_least32_t mask{0x000000FF}; - constexpr auto pow_table{generate_pow_table()}; + val -= 0x30303030; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); - res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); + constexpr auto pow_table{generate_pow_table()}; - first += ctrz_cval / (8 * sizeof(char_type)); - goto after_tail; - } + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); - constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; + first += ctrz_cval / (8 * sizeof(char_type)); + + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + } + + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; - constexpr ::std::uint_least32_t mask{0x000000FF}; + constexpr ::std::uint_least32_t mask{0x000000FF}; - val -= 0x30303030; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); - res = static_cast(res * pow_base_sizeof_u32 + val); - first += sizeof(::std::uint_least32_t); + val -= 0x30303030; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + res = static_cast(res * pow_base_sizeof_u32 + val); + first += sizeof(::std::uint_least32_t); + } } } } - } - else if constexpr (base_char_type <= 16) - { - if constexpr (sizeof(char_type) == sizeof(char8_t)) + else if constexpr (base_char_type <= 16) { - // It seems that it is unnecessary to read 4-bit data in base11-hexadecimal system, - // and the time for 4-bit data reading of this algorithm even exceeds 4 times of cyclic reading + if constexpr (sizeof(char_type) == sizeof(char8_t)) + { + // It seems that it is unnecessary to read 4-bit data in base11-hexadecimal system, + // and the time for 4-bit data reading of this algorithm even exceeds 4 times of cyclic reading #if 0 if constexpr (max_size >= sizeof(::std::uint_least32_t)) { @@ -1038,7 +1378,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi { ::std::uint_least32_t val; ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); - + val = ::fast_io::little_endian(val); constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), sizeof(::std::uint_least32_t))}; @@ -1051,7 +1391,7 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi ~(((val + 0x3f3f3f3f) | (val - 0x40404040)) & ((val + 0x1f1f1f1f) | (val - 0x60606060)))) & 0x80808080}; - cval) + cval) { auto const ctrz_cval{::std::countr_zero(cval)}; auto const valid_bits{ctrz_cval & -8}; @@ -1079,10 +1419,45 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); constexpr auto pow_table{generate_pow_table()}; - res = static_cast(res * pow_table[ ctrz_cval / (8 * sizeof(char_type))]+ val); + res = static_cast(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val); first += ctrz_cval / (8 * sizeof(char_type)); - goto after_tail; + + + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; } constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; @@ -1099,12 +1474,12 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi } } #endif + } } } } } - for (; first != first_phase_last; ++first) { unsigned_char_type ch{static_cast(*first)}; @@ -1116,8 +1491,6 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi res += ch; } -[[maybe_unused]] after_tail: - bool overflow{}; if (first != last) [[likely]] { @@ -1154,27 +1527,6 @@ runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *fi return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; } -template -#if defined(__SSE4_1__) && __has_cpp_attribute(__gnu__::__cold__) && (defined(__x86_64__) || defined(_M_AMD64)) -[[__gnu__::__cold__]] -#endif -inline constexpr parse_result -scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, char_type const *last, T &res) noexcept -{ -#if __cpp_if_consteval >= 202106L - if !consteval -#else - if (!__builtin_is_constant_evaluated()) -#endif - { - return runtime_scan_int_contiguous_none_simd_space_part_define_impl(first, last, res); - } - else - { - return compile_time_scan_int_contiguous_none_simd_space_part_define_impl(first, last, res); - } -} - inline constexpr parse_code ongoing_parse_code{static_cast(::std::numeric_limits::max())}; template From 436f0d6ad3580fccbbcebc225a3b1863e2b05ee8 Mon Sep 17 00:00:00 2001 From: MacroModel Date: Mon, 2 Dec 2024 22:19:48 +0800 Subject: [PATCH 5/7] rm shit --- .../integers/sto/sto_contiguous.h | 547 +++--------------- 1 file changed, 69 insertions(+), 478 deletions(-) diff --git a/include/fast_io_core_impl/integers/sto/sto_contiguous.h b/include/fast_io_core_impl/integers/sto/sto_contiguous.h index bf2eaffd..0e023ffb 100644 --- a/include/fast_io_core_impl/integers/sto/sto_contiguous.h +++ b/include/fast_io_core_impl/integers/sto/sto_contiguous.h @@ -505,6 +505,60 @@ inline constexpr char_type const *skip_digits(char_type const *first, char_type return first; } +template +#if __has_cpp_attribute(__gnu__::__always_inline__) +[[__gnu__::__always_inline__]] +#elif __has_cpp_attribute(msvc::forceinline) +[[msvc::forceinline]] +#endif +inline constexpr parse_result +scan_int_contiguous_none_simd_space_part_check_overflow_impl(char_type const *first, char_type const *last, T &res) noexcept +{ + using unsigned_char_type = ::std::make_unsigned_t; + using unsigned_type = my_make_unsigned_t<::std::remove_cvref_t>; + constexpr unsigned_char_type base_char_type{base}; + constexpr unsigned_type risky_uint_max{static_cast(-1)}; + constexpr unsigned_type risky_value{risky_uint_max / base}; + constexpr unsigned_char_type risky_digit(risky_uint_max % base); + constexpr bool isspecialbase{base == 2 || base == 4 || base == 16}; + constexpr ::std::size_t max_size{details::cal_max_int_size() - (!isspecialbase)}; + + bool overflow{}; + if (first != last) [[likely]] + { + unsigned_char_type ch{static_cast(*first)}; + if constexpr (isspecialbase) + { + if (char_is_digit(ch)) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + else + { + if (!char_digit_to_literal(ch)) [[unlikely]] + { + overflow = res > risky_value || (risky_value == res && ch > risky_digit); + if (!overflow) + { + res *= base_char_type; + res += ch; + } + ++first; + if (first != last && char_is_digit(static_cast(*first))) + { + ++first; + first = skip_digits(first, last); + overflow = true; + } + } + } + } + return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; +} + template inline constexpr ::fast_io::freestanding::array generate_pow_table() noexcept { @@ -580,40 +634,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha if (!valid_bits) [[unlikely]] { - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } val <<= 64 - valid_bits; @@ -639,40 +660,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha first += ctrz_cval / (8 * sizeof(char_type)); - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; @@ -710,40 +698,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha if (!valid_bits) [[unlikely]] { - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } val <<= 32 - valid_bits; @@ -768,40 +723,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha first += ctrz_cval / (8 * sizeof(char_type)); - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } else { @@ -841,40 +763,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha if (!valid_bits) [[unlikely]] { - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } val <<= 64 - valid_bits; @@ -898,40 +787,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha first += ctrz_cval / (8 * sizeof(char_type)); - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; @@ -976,40 +832,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha if (!valid_bits) [[unlikely]] { - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } val <<= 64 - valid_bits; @@ -1038,40 +861,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha first += ctrz_cval / (8 * sizeof(char_type)); - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)}; @@ -1120,40 +910,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha if (!valid_bits) [[unlikely]] { - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } val <<= 32 - valid_bits; @@ -1179,40 +936,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha first += ctrz_cval / (8 * sizeof(char_type)); - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } else { @@ -1258,40 +982,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha if (!valid_bits) [[unlikely]] { - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } val <<= 32 - valid_bits; @@ -1316,40 +1007,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha first += ctrz_cval / (8 * sizeof(char_type)); - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; @@ -1398,7 +1056,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha if (!valid_bits) [[unlikely]] { - goto after_tail; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } val <<= 32 - valid_bits; @@ -1423,41 +1081,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha first += ctrz_cval / (8 * sizeof(char_type)); - - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least32_t>(static_cast<::std::uint_least32_t>(base_char_type), 2)}; @@ -1491,40 +1115,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha res += ch; } - bool overflow{}; - if (first != last) [[likely]] - { - unsigned_char_type ch{static_cast(*first)}; - if constexpr (isspecialbase) - { - if (char_is_digit(ch)) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - else - { - if (!char_digit_to_literal(ch)) [[unlikely]] - { - overflow = res > risky_value || (risky_value == res && ch > risky_digit); - if (!overflow) - { - res *= base_char_type; - res += ch; - } - ++first; - if (first != last && char_is_digit(static_cast(*first))) - { - ++first; - first = skip_digits(first, last); - overflow = true; - } - } - } - } - return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))}; + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } inline constexpr parse_code ongoing_parse_code{static_cast(::std::numeric_limits::max())}; From 6b63ba27748d1fcc7a6dd1a84e2181b5acc5fb30 Mon Sep 17 00:00:00 2001 From: MacroModel Date: Mon, 2 Dec 2024 22:23:18 +0800 Subject: [PATCH 6/7] rm unused --- include/fast_io_core_impl/integers/sto/sto_contiguous.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/fast_io_core_impl/integers/sto/sto_contiguous.h b/include/fast_io_core_impl/integers/sto/sto_contiguous.h index 0e023ffb..754a1edb 100644 --- a/include/fast_io_core_impl/integers/sto/sto_contiguous.h +++ b/include/fast_io_core_impl/integers/sto/sto_contiguous.h @@ -521,7 +521,6 @@ scan_int_contiguous_none_simd_space_part_check_overflow_impl(char_type const *fi constexpr unsigned_type risky_value{risky_uint_max / base}; constexpr unsigned_char_type risky_digit(risky_uint_max % base); constexpr bool isspecialbase{base == 2 || base == 4 || base == 16}; - constexpr ::std::size_t max_size{details::cal_max_int_size() - (!isspecialbase)}; bool overflow{}; if (first != last) [[likely]] From f3226d05f5b99b5916841553dfe7606677a69025 Mon Sep 17 00:00:00 2001 From: MacroModel Date: Mon, 2 Dec 2024 22:27:47 +0800 Subject: [PATCH 7/7] rm unused --- include/fast_io_core_impl/integers/sto/sto_contiguous.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/fast_io_core_impl/integers/sto/sto_contiguous.h b/include/fast_io_core_impl/integers/sto/sto_contiguous.h index 754a1edb..f8776575 100644 --- a/include/fast_io_core_impl/integers/sto/sto_contiguous.h +++ b/include/fast_io_core_impl/integers/sto/sto_contiguous.h @@ -579,9 +579,6 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha using unsigned_char_type = ::std::make_unsigned_t; using unsigned_type = my_make_unsigned_t<::std::remove_cvref_t>; constexpr unsigned_char_type base_char_type{base}; - constexpr unsigned_type risky_uint_max{static_cast(-1)}; - constexpr unsigned_type risky_value{risky_uint_max / base}; - constexpr unsigned_char_type risky_digit(risky_uint_max % base); constexpr bool isspecialbase{base == 2 || base == 4 || base == 16}; constexpr ::std::size_t max_size{details::cal_max_int_size() - (!isspecialbase)}; ::std::size_t const diff{static_cast<::std::size_t>(last - first)}; @@ -605,7 +602,8 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha // https://github.com/fastfloat/fast_float // Copyright(c) 2021 The fast_float authors // - // Binary to Hexadecimal: + // Implementation of higher performance (Binary to Hexadecimal): + // Optimize both fixed range and infinite range (suitable for scan) // Copyright(c) 2024 MacroModel if constexpr (sizeof(::std::uint_least32_t) < sizeof(::std::size_t))