From d4cc79c0f1bca7781cf3c7643e1d07131b40ba51 Mon Sep 17 00:00:00 2001 From: MacroModel Date: Tue, 3 Dec 2024 14:20:20 +0800 Subject: [PATCH 1/2] support constexpr sto --- .../integers/sto/sto_contiguous.h | 506 ++++++++++-------- 1 file changed, 275 insertions(+), 231 deletions(-) diff --git a/include/fast_io_core_impl/integers/sto/sto_contiguous.h b/include/fast_io_core_impl/integers/sto/sto_contiguous.h index 93398368..40cdd891 100644 --- a/include/fast_io_core_impl/integers/sto/sto_contiguous.h +++ b/include/fast_io_core_impl/integers/sto/sto_contiguous.h @@ -528,7 +528,7 @@ inline constexpr ::fast_io::freestanding::array pow_table_n{::fast_io::det template inline parse_result -scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, char_type const *last, T &res) noexcept +runtime_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, char_type const *last, T &res) noexcept { using unsigned_char_type = ::std::make_unsigned_t; using unsigned_type = my_make_unsigned_t<::std::remove_cvref_t>; @@ -539,348 +539,341 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha ::std::size_t const diff{static_cast<::std::size_t>(last - first)}; ::std::size_t mn_val{max_size}; - if (diff < mn_val) { mn_val = diff; } auto first_phase_last{first + mn_val}; -#ifdef __cpp_if_consteval - if !consteval -#else - if (!__builtin_is_constant_evaluated()) -#endif + + constexpr bool isebcdic{::fast_io::details::is_ebcdic}; + if constexpr (!isebcdic && (::std::numeric_limits<::std::uint_least64_t>::digits == 64u)) { - constexpr bool isebcdic{::fast_io::details::is_ebcdic}; - if constexpr (!isebcdic && (::std::numeric_limits<::std::uint_least64_t>::digits == 64u)) + if constexpr (sizeof(::std::uint_least32_t) < sizeof(::std::size_t)) { - if constexpr (sizeof(::std::uint_least32_t) < sizeof(::std::size_t)) + if constexpr (base_char_type <= 10) { - if constexpr (base_char_type <= 10) + if constexpr (sizeof(char_type) == sizeof(char8_t)) { - if constexpr (sizeof(char_type) == sizeof(char8_t)) + if constexpr (max_size >= sizeof(::std::uint_least64_t)) { - if constexpr (max_size >= sizeof(::std::uint_least64_t)) - { - constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, 2>}; - constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, 4>}; - constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, 6>}; - constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, sizeof(::std::uint_least64_t)>}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, 2>}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, 4>}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, 6>}; + constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, sizeof(::std::uint_least64_t)>}; - constexpr ::std::uint_least64_t baseval{0x0101010101010101}; - constexpr ::std::uint_least64_t zero_lower_bound{isebcdic ? baseval * 0xF0 : baseval * 0x30}; - constexpr ::std::uint_least64_t first_bound{0x4646464646464646 + baseval * (10 - base_char_type)}; - constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; - constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; - constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; - constexpr ::std::uint_least64_t fullmask{baseval * 0x80}; + constexpr ::std::uint_least64_t baseval{0x0101010101010101}; + constexpr ::std::uint_least64_t zero_lower_bound{isebcdic ? baseval * 0xF0 : baseval * 0x30}; + constexpr ::std::uint_least64_t first_bound{0x4646464646464646 + baseval * (10 - base_char_type)}; + constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; + constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; + constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; + constexpr ::std::uint_least64_t fullmask{baseval * 0x80}; - while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) + while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) + { + ::std::uint_least64_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); + + if constexpr (::std::endian::little != ::std::endian::native) { - ::std::uint_least64_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); + val = ::fast_io::little_endian(val); + } - if constexpr (::std::endian::little != ::std::endian::native) - { - val = ::fast_io::little_endian(val); - } + if (::std::uint_least64_t const cval{((val + first_bound) | (val - zero_lower_bound)) & fullmask}; cval) + { + auto ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; - if (::std::uint_least64_t const cval{((val + first_bound) | (val - zero_lower_bound)) & fullmask}; cval) + if (valid_bits) [[likely]] { - auto ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -8}; + val <<= 64 - valid_bits; - if (valid_bits) [[likely]] - { - val <<= 64 - valid_bits; + ::std::uint_least64_t all_zero{zero_lower_bound}; - ::std::uint_least64_t all_zero{zero_lower_bound}; + all_zero >>= valid_bits; - all_zero >>= valid_bits; + val |= all_zero; + val -= zero_lower_bound; - val |= all_zero; - val -= zero_lower_bound; - - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; - ctrz_cval >>= shifter; - res = static_cast(res * ::fast_io::details::pow_table_n.index_unchecked(ctrz_cval) + val); + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + ctrz_cval >>= shifter; + res = static_cast(res * ::fast_io::details::pow_table_n.index_unchecked(ctrz_cval) + val); - first += ctrz_cval; - } + first += ctrz_cval; + } #if defined(_MSC_VER) && !defined(__clang__) - return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); #else - goto nextlabel; + goto nextlabel; #endif - } - - val -= zero_lower_bound; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; - res = static_cast(res * pow_base_sizeof_u64 + val); - first += sizeof(::std::uint_least64_t); } + + val -= zero_lower_bound; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + res = static_cast(res * pow_base_sizeof_u64 + val); + first += sizeof(::std::uint_least64_t); } + } + + if constexpr (max_size >= sizeof(::std::uint_least32_t)) + { + constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, sizeof(::std::uint_least32_t)>}; + constexpr ::std::uint_least32_t first_bound{0x46464646 + 0x01010101 * (10 - base_char_type)}; + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_pow_n<::std::uint_least32_t, base_char_type, 2>}; + constexpr ::std::uint_least32_t mask{0x000000FF}; - if constexpr (max_size >= sizeof(::std::uint_least32_t)) + if (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) { - constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, sizeof(::std::uint_least32_t)>}; - constexpr ::std::uint_least32_t first_bound{0x46464646 + 0x01010101 * (10 - base_char_type)}; - constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_pow_n<::std::uint_least32_t, base_char_type, 2>}; - constexpr ::std::uint_least32_t mask{0x000000FF}; + ::std::uint_least32_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); - if (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) + if constexpr (::std::endian::little != ::std::endian::native) { - ::std::uint_least32_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); + val = ::fast_io::little_endian(val); + } - if constexpr (::std::endian::little != ::std::endian::native) - { - val = ::fast_io::little_endian(val); - } + if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) + { + auto ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; - if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) + if (valid_bits) [[likely]] { - auto ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -8}; - - if (valid_bits) [[likely]] - { - val <<= 32 - valid_bits; + val <<= 32 - valid_bits; - ::std::uint_least32_t all_zero{0x30303030}; + ::std::uint_least32_t all_zero{0x30303030}; - all_zero >>= valid_bits; + all_zero >>= valid_bits; - val |= all_zero; + val |= all_zero; - val -= 0x30303030; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + val -= 0x30303030; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); - ctrz_cval >>= shifter; - res = static_cast(res * ::fast_io::details::pow_table_n.index_unchecked(ctrz_cval) + val); + ctrz_cval >>= shifter; + res = static_cast(res * ::fast_io::details::pow_table_n.index_unchecked(ctrz_cval) + val); - first += ctrz_cval; - } + first += ctrz_cval; + } #if defined(_MSC_VER) && !defined(__clang__) - return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); #else - goto nextlabel; + goto nextlabel; #endif - } - else - { - val -= 0x30303030; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); - res = static_cast(res * pow_base_sizeof_u32 + val); - first += sizeof(::std::uint_least32_t); - } + } + else + { + val -= 0x30303030; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + res = static_cast(res * pow_base_sizeof_u32 + val); + first += sizeof(::std::uint_least32_t); } } } - else if constexpr (sizeof(char_type) == sizeof(char16_t)) + } + else if constexpr (sizeof(char_type) == sizeof(char16_t)) + { + constexpr ::std::size_t u64_size_of_c16{sizeof(::std::uint_least64_t) / sizeof(char16_t)}; + constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, u64_size_of_c16>}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, 2>}; + constexpr ::std::uint_least64_t mask{0x000000000000FFFF}; + constexpr ::std::uint_least64_t first_bound{0x7fc67fc67fc67fc6 + 0x0001000100010001 * (10 - base)}; + if constexpr (max_size >= u64_size_of_c16) { - constexpr ::std::size_t u64_size_of_c16{sizeof(::std::uint_least64_t) / sizeof(char16_t)}; - constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, u64_size_of_c16>}; - constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, 2>}; - constexpr ::std::uint_least64_t mask{0x000000000000FFFF}; - constexpr ::std::uint_least64_t first_bound{0x7fc67fc67fc67fc6 + 0x0001000100010001 * (10 - base)}; - if constexpr (max_size >= u64_size_of_c16) + while (static_cast<::std::size_t>(first_phase_last - first) >= u64_size_of_c16) { - while (static_cast<::std::size_t>(first_phase_last - first) >= u64_size_of_c16) + ::std::uint_least64_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); + + if constexpr (::std::endian::little != ::std::endian::native) { - ::std::uint_least64_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); + val = ::fast_io::little_endian(val); + } - if constexpr (::std::endian::little != ::std::endian::native) - { - val = ::fast_io::little_endian(val); - } + if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x0030003000300030)) & 0x8000800080008000}; cval) + { + auto ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -16}; - if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x0030003000300030)) & 0x8000800080008000}; cval) + if (valid_bits) [[likely]] { - auto ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -16}; + val <<= 64 - valid_bits; - if (valid_bits) [[likely]] - { - val <<= 64 - valid_bits; + ::std::uint_least64_t all_zero{0x0030003000300030}; - ::std::uint_least64_t all_zero{0x0030003000300030}; + all_zero >>= valid_bits; - all_zero >>= valid_bits; + val |= all_zero; - val |= all_zero; + val -= 0x0030003000300030; + val = (val * base_char_type) + (val >> 16); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 32) & mask)); - val -= 0x0030003000300030; - val = (val * base_char_type) + (val >> 16); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 32) & mask)); + ctrz_cval >>= shifter; + res = static_cast(res * ::fast_io::details::pow_table_n.index_unchecked(ctrz_cval) + val); - ctrz_cval >>= shifter; - res = static_cast(res * ::fast_io::details::pow_table_n.index_unchecked(ctrz_cval) + val); - - first += ctrz_cval; - } + first += ctrz_cval; + } #if defined(_MSC_VER) && !defined(__clang__) - return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); #else - goto nextlabel; + goto nextlabel; #endif - } - val -= 0x0030003000300030; - val = (val * base_char_type) + (val >> 16); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 32) & mask)); - res = static_cast(res * pow_base_sizeof_u64 + val); - first += u64_size_of_c16; } + val -= 0x0030003000300030; + val = (val * base_char_type) + (val >> 16); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 32) & mask)); + res = static_cast(res * pow_base_sizeof_u64 + val); + first += u64_size_of_c16; } } } - else if constexpr (base_char_type <= 16) + } + else if constexpr (base_char_type <= 16) + { + if constexpr (sizeof(char_type) == sizeof(char8_t)) { - if constexpr (sizeof(char_type) == sizeof(char8_t)) + if constexpr (max_size >= sizeof(::std::uint_least64_t)) { - if constexpr (max_size >= sizeof(::std::uint_least64_t)) + constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, 2>}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, 4>}; + constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, 6>}; + constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, sizeof(::std::uint_least64_t)>}; + constexpr ::std::uint_least64_t first_bound1{0x3939393939393939 + 0x0101010101010101 * (16 - base_char_type)}; + constexpr ::std::uint_least64_t first_bound2{0x1919191919191919 + 0x0101010101010101 * (16 - base_char_type)}; + + constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; + constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; + constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; + while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) { - constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, 2>}; - constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, 4>}; - constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, 6>}; - constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_pow_n<::std::uint_least64_t, base_char_type, sizeof(::std::uint_least64_t)>}; - constexpr ::std::uint_least64_t first_bound1{0x3939393939393939 + 0x0101010101010101 * (16 - base_char_type)}; - constexpr ::std::uint_least64_t first_bound2{0x1919191919191919 + 0x0101010101010101 * (16 - base_char_type)}; - - constexpr ::std::uint_least64_t mask{0x000000FF000000FF}; - constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)}; - constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)}; - while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t)) + ::std::uint_least64_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); + + if constexpr (::std::endian::little != ::std::endian::native) { - ::std::uint_least64_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t)); + val = ::fast_io::little_endian(val); + } - if constexpr (::std::endian::little != ::std::endian::native) - { - val = ::fast_io::little_endian(val); - } + if (::std::uint_least64_t const cval{((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & + ((val + first_bound1) | (val - 0x4040404040404040)) & + ((val + first_bound2) | (val - 0x6060606060606060))) | + ~(((val + 0x3f3f3f3f3f3f3f3f) | (val - 0x4040404040404040)) & + ((val + 0x1f1f1f1f1f1f1f1f) | (val - 0x6060606060606060)))) & + 0x8080808080808080}; + cval) + { + auto ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; - if (::std::uint_least64_t const cval{((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & - ((val + first_bound1) | (val - 0x4040404040404040)) & - ((val + first_bound2) | (val - 0x6060606060606060))) | - ~(((val + 0x3f3f3f3f3f3f3f3f) | (val - 0x4040404040404040)) & - ((val + 0x1f1f1f1f1f1f1f1f) | (val - 0x6060606060606060)))) & - 0x8080808080808080}; - cval) + if (valid_bits) [[likely]] { - auto ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -8}; - - if (valid_bits) [[likely]] - { - val <<= 64 - valid_bits; + val <<= 64 - valid_bits; - ::std::uint_least64_t all_zero{0x3030303030303030}; + ::std::uint_least64_t all_zero{0x3030303030303030}; - all_zero >>= valid_bits; + all_zero >>= valid_bits; - val |= all_zero; + val |= all_zero; - val -= 0x3030303030303030; - val = (val & 0x0f0f0f0f0f0f0f0f) + ((val & 0x1010101010101010) >> 4) * 9; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + val -= 0x3030303030303030; + val = (val & 0x0f0f0f0f0f0f0f0f) + ((val & 0x1010101010101010) >> 4) * 9; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; - ctrz_cval >>= shifter; + ctrz_cval >>= shifter; - res = static_cast(res * ::fast_io::details::pow_table_n.index_unchecked(ctrz_cval) + val); - first += ctrz_cval; - } + res = static_cast(res * ::fast_io::details::pow_table_n.index_unchecked(ctrz_cval) + val); + first += ctrz_cval; + } #if defined(_MSC_VER) && !defined(__clang__) - return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); #else - goto nextlabel; + goto nextlabel; #endif - } - - val -= 0x3030303030303030; - val = (val & 0x0f0f0f0f0f0f0f0f) + ((val & 0x1010101010101010) >> 4) * 9; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; - res = static_cast(res * pow_base_sizeof_u64 + val); - first += sizeof(::std::uint_least64_t); } + + val -= 0x3030303030303030; + val = (val & 0x0f0f0f0f0f0f0f0f) + ((val & 0x1010101010101010) >> 4) * 9; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + res = static_cast(res * pow_base_sizeof_u64 + val); + first += sizeof(::std::uint_least64_t); } } } } - else if constexpr (sizeof(::std::uint_least16_t) < sizeof(::std::size_t)) + } + else if constexpr (sizeof(::std::uint_least16_t) < sizeof(::std::size_t)) + { + if constexpr (base_char_type <= 10) { - if constexpr (base_char_type <= 10) + if constexpr (sizeof(char_type) == sizeof(char8_t)) { - if constexpr (sizeof(char_type) == sizeof(char8_t)) + if constexpr (max_size >= sizeof(::std::uint_least32_t)) { - if constexpr (max_size >= sizeof(::std::uint_least32_t)) + constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_pow_n<::std::uint_least32_t, base_char_type, sizeof(::std::uint_least32_t)>}; + constexpr ::std::uint_least32_t first_bound{0x46464646 + 0x01010101 * (10 - base_char_type)}; + + constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_pow_n<::std::uint_least32_t, base_char_type, 2>}; + constexpr ::std::uint_least32_t mask{0x000000FF}; + while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) { - constexpr ::std::uint_least32_t pow_base_sizeof_u32{::fast_io::details::compile_pow_n<::std::uint_least32_t, base_char_type, sizeof(::std::uint_least32_t)>}; - constexpr ::std::uint_least32_t first_bound{0x46464646 + 0x01010101 * (10 - base_char_type)}; + ::std::uint_least32_t val; + ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); - constexpr ::std::uint_least32_t pow_base_sizeof_base_2{::fast_io::details::compile_pow_n<::std::uint_least32_t, base_char_type, 2>}; - constexpr ::std::uint_least32_t mask{0x000000FF}; - while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least32_t)) - { - ::std::uint_least32_t val; - ::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least32_t)); + val = ::fast_io::little_endian(val); - val = ::fast_io::little_endian(val); + if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[unlikely]] + { + auto ctrz_cval{::std::countr_zero(cval)}; + auto const valid_bits{ctrz_cval & -8}; - if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[unlikely]] + if (valid_bits) [[likely]] { - auto ctrz_cval{::std::countr_zero(cval)}; - auto const valid_bits{ctrz_cval & -8}; + val <<= 32 - valid_bits; - if (valid_bits) [[likely]] - { - val <<= 32 - valid_bits; + ::std::uint_least32_t all_zero{0x30303030}; - ::std::uint_least32_t all_zero{0x30303030}; + all_zero >>= valid_bits; - all_zero >>= valid_bits; + val |= all_zero; - val |= all_zero; - - val -= 0x30303030; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); - ctrz_cval >>= shifter; + val -= 0x30303030; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + ctrz_cval >>= shifter; - res = static_cast(res * ::fast_io::details::pow_table_n.index_unchecked(ctrz_cval) + val); + res = static_cast(res * ::fast_io::details::pow_table_n.index_unchecked(ctrz_cval) + val); - first += ctrz_cval; - } + first += ctrz_cval; + } #if defined(_MSC_VER) && !defined(__clang__) - return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); #else - goto nextlabel; + goto nextlabel; #endif - } - - - val -= 0x30303030; - val = (val * base_char_type) + (val >> 8); - val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); - res = static_cast(res * pow_base_sizeof_u32 + val); - first += sizeof(::std::uint_least32_t); } + + val -= 0x30303030; + val = (val * base_char_type) + (val >> 8); + val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask)); + res = static_cast(res * pow_base_sizeof_u32 + val); + first += sizeof(::std::uint_least32_t); } } } } } } + for (; first != first_phase_last; ++first) { unsigned_char_type ch{static_cast(*first)}; @@ -891,12 +884,63 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha res *= base_char_type; res += ch; } + #if !defined(_MSC_VER) || defined(__clang__) [[maybe_unused]] nextlabel:; #endif + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); } +template +inline constexpr parse_result +compile_time_scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, char_type const *last, T &res) noexcept +{ + using unsigned_char_type = ::std::make_unsigned_t; + using unsigned_type = my_make_unsigned_t<::std::remove_cvref_t>; + constexpr char8_t base_char_type{base}; + constexpr bool isspecialbase{base == 2 || base == 4 || base == 16}; + constexpr ::std::size_t max_size{::fast_io::details::max_int_size_result - (!isspecialbase)}; + ::std::size_t const diff{static_cast<::std::size_t>(last - first)}; + ::std::size_t mn_val{max_size}; + + if (diff < mn_val) + { + mn_val = diff; + } + + for (; first != first_phase_last; ++first) + { + unsigned_char_type ch{static_cast(*first)}; + if (char_digit_to_literal(ch)) [[unlikely]] + { + break; + } + res *= base_char_type; + res += ch; + } + + return scan_int_contiguous_none_simd_space_part_check_overflow_impl(first, last, res); +} + +template +inline constexpr parse_result +scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, char_type const *last, T &res) noexcept +{ +#ifdef __cpp_if_consteval + if !consteval +#else + if (!__builtin_is_constant_evaluated()) +#endif + { + return runtime_scan_int_contiguous_none_simd_space_part_define_impl(first, last, res); + } + else + { + return compile_time_scan_int_contiguous_none_simd_space_part_define_impl(first, last, res); + } +} + inline constexpr parse_code ongoing_parse_code{static_cast(::std::numeric_limits::max())}; template From 7d6ca9e4bdf56fc67135497175fb0f7abbdaf5df Mon Sep 17 00:00:00 2001 From: MacroModel Date: Tue, 3 Dec 2024 14:23:12 +0800 Subject: [PATCH 2/2] fix missing --- include/fast_io_core_impl/integers/sto/sto_contiguous.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/fast_io_core_impl/integers/sto/sto_contiguous.h b/include/fast_io_core_impl/integers/sto/sto_contiguous.h index 40cdd891..6fb25ef5 100644 --- a/include/fast_io_core_impl/integers/sto/sto_contiguous.h +++ b/include/fast_io_core_impl/integers/sto/sto_contiguous.h @@ -909,6 +909,8 @@ compile_time_scan_int_contiguous_none_simd_space_part_define_impl(char_type cons mn_val = diff; } + auto first_phase_last{first + mn_val}; + for (; first != first_phase_last; ++first) { unsigned_char_type ch{static_cast(*first)};