Skip to content

Commit

Permalink
Merge pull request #1002 from trcrsired/master
Browse files Browse the repository at this point in the history
Fix around braindeath caused by macromodel
  • Loading branch information
trcrsired authored Dec 2, 2024
2 parents 5329d05 + 26c4217 commit 291ef59
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 58 deletions.
34 changes: 32 additions & 2 deletions include/fast_io_core_impl/freestanding/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,34 @@ struct array
}
inline constexpr T const &back() const noexcept
{
return element[N - 1];
constexpr size_type nm1{N - 1};
return element[nm1];
}
inline constexpr T &back() noexcept
{
return element[N - 1];
constexpr size_type nm1{N - 1};
return element[nm1];
}

inline constexpr T const &front_unchecked() const noexcept
{
return *element;
}
inline constexpr T &front_unchecked() noexcept
{
return *element;
}
inline constexpr T const &back_unchecked() const noexcept
{
constexpr size_type nm1{N - 1};
return element[nm1];
}
inline constexpr T &back_unchecked() noexcept
{
constexpr size_type nm1{N - 1};
return element[nm1];
}

inline constexpr T const &operator[](::std::size_t i) const noexcept
{
return element[i];
Expand All @@ -79,6 +101,14 @@ struct array
{
return element[i];
}
inline constexpr T const &index_unchecked(::std::size_t i) const noexcept
{
return element[i];
}
inline constexpr T &index_unchecked(::std::size_t i) noexcept
{
return element[i];
}
};

} // namespace fast_io::freestanding
92 changes: 50 additions & 42 deletions include/fast_io_core_impl/integers/sto/sto_contiguous.h
Original file line number Diff line number Diff line change
Expand Up @@ -558,17 +558,26 @@ scan_int_contiguous_none_simd_space_part_check_overflow_impl(char_type const *fi
return {first, (overflow ? (parse_code::overflow) : (parse_code::ok))};
}

template <char8_t base, my_unsigned_integral T, ::std::size_t Arr_size>
inline constexpr ::fast_io::freestanding::array<T, Arr_size> generate_pow_table() noexcept
template <char8_t base, my_unsigned_integral T, ::std::size_t n>
inline constexpr ::fast_io::freestanding::array<T, n> generate_pow_table() noexcept
{
::fast_io::freestanding::array<T, Arr_size> tmp{};
for (::std::size_t i{}; i < Arr_size; i++)
::fast_io::freestanding::array<T, n> tmp;
if (n != 0)
{
tmp[i] = ::fast_io::details::compile_time_pow<T>(static_cast<T>(base), i);
tmp.front_unchecked() = 1;
}
T b{1};
for (::std::size_t i{1}; i < n; ++i)
{
tmp.index_unchecked(i) = b;
b *= base;
}
return tmp;
}

template <char8_t base, my_unsigned_integral T, ::std::size_t n>
inline constexpr ::fast_io::freestanding::array<T, n> pow_table_n{::fast_io::details::generate_pow_table<base, T, n>()};

template <char8_t base, ::std::integral char_type, my_unsigned_integral T>
#if defined(__SSE4_1__) && __has_cpp_attribute(__gnu__::__cold__) && (defined(__x86_64__) || defined(_M_AMD64))
[[__gnu__::__cold__]]
Expand All @@ -578,12 +587,14 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha
{
using unsigned_char_type = ::std::make_unsigned_t<char_type>;
using unsigned_type = my_make_unsigned_t<::std::remove_cvref_t<T>>;
constexpr unsigned_char_type base_char_type{base};
constexpr char8_t base_char_type{base};
constexpr bool isspecialbase{base == 2 || base == 4 || base == 16};
constexpr ::std::size_t max_size{details::cal_max_int_size<unsigned_type, base>() - (!isspecialbase)};
constexpr ::std::size_t max_size{::fast_io::details::max_int_size_result<unsigned_type, base> - (!isspecialbase)};
constexpr auto shifter{2 + ::std::bit_width(sizeof(char_type))};
::std::size_t const diff{static_cast<::std::size_t>(last - first)};
::std::size_t mn_val{max_size};


if (diff < mn_val)
{
mn_val = diff;
Expand All @@ -596,15 +607,13 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha
if (!__builtin_is_constant_evaluated())
#endif
{
if constexpr (!::fast_io::details::is_ebcdic<char_type>)
constexpr bool isebcdic{::fast_io::details::is_ebcdic<char_type>};
if constexpr (!isebcdic && (::std::numeric_limits<::std::uint_least64_t>::digits == 64u) && false)
{
// Inspired by:
// https://github.com/fastfloat/fast_float
// Copyright(c) 2021 The fast_float authors
//
// Implementation of higher performance (Binary to Hexadecimal):
// Optimize both fixed range and infinite range (suitable for scan)
// Copyright(c) 2024 MacroModel

if constexpr (sizeof(::std::uint_least32_t) < sizeof(::std::size_t))
{
Expand All @@ -614,19 +623,33 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha
{
if constexpr (max_size >= sizeof(::std::uint_least64_t))
{
constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::max_int_size_result<static_cast<::std::uint_least64_t>(base_char_type), 2>};
constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::max_int_size_result<static_cast<::std::uint_least64_t>(base_char_type), 4>};
constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::max_int_size_result<static_cast<::std::uint_least64_t>(base_char_type), 6>};
constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::max_int_size_result<static_cast<::std::uint_least64_t>(base_char_type), sizeof(::std::uint_least64_t)>};


constexpr ::std::uint_least64_t baseval{0x0101010101010101};
constexpr ::std::uint_least64_t zero_lower_bound{isebcdic ? baseval * 0xF0 : baseval * 0x30};
constexpr ::std::uint_least64_t first_bound{0x4646464646464646 + baseval * (10 - base_char_type)};
constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)};
constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)};
constexpr ::std::uint_least64_t mask{0x000000FF000000FF};
constexpr ::std::uint_least64_t fullmask{baseval * 0x80};

while (static_cast<::std::size_t>(first_phase_last - first) >= sizeof(::std::uint_least64_t))
{
::std::uint_least64_t val;
::fast_io::freestanding::my_memcpy(__builtin_addressof(val), first, sizeof(::std::uint_least64_t));

val = ::fast_io::little_endian(val);

constexpr ::std::uint_least64_t pow_base_sizeof_u64{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), sizeof(::std::uint_least64_t))};
constexpr ::std::uint_least64_t first_bound{0x4646464646464646 + 0x0101010101010101 * (10 - base_char_type)};
if constexpr (::std::endian::little != ::std::endian::native)
{
val = ::fast_io::little_endian(val);
}

if (::std::uint_least64_t const cval{((val + first_bound) | (val - 0x3030303030303030)) & 0x8080808080808080}; cval)
if (::std::uint_least64_t const cval{((val + first_bound) | (val - zero_lower_bound)) & fullmask}; cval)
{
auto const ctrz_cval{::std::countr_zero(cval)};
auto ctrz_cval{::std::countr_zero(cval)};
auto const valid_bits{ctrz_cval & -8};

if (!valid_bits) [[unlikely]]
Expand All @@ -636,39 +659,25 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha

val <<= 64 - valid_bits;

::std::uint_least64_t all_zero{0x3030303030303030};
::std::uint_least64_t all_zero{zero_lower_bound};

all_zero >>= valid_bits;

val |= all_zero;
constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)};
constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)};
constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)};

constexpr ::std::uint_least64_t mask{0x000000FF000000FF};
constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)};
constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)};
val -= 0x3030303030303030;
val -= zero_lower_bound;

val = (val * base_char_type) + (val >> 8);
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
constexpr auto pow_table{generate_pow_table<base_char_type, ::std::uint_least64_t, 8>()};
res = static_cast<T>(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val);
ctrz_cval >>= shifter;
res = static_cast<T>(res * ::fast_io::details::pow_table_n<base_char_type, ::std::uint_least64_t, 8>.index_unchecked(ctrz_cval) + val);

first += ctrz_cval / (8 * sizeof(char_type));
first += ctrz_cval;

return scan_int_contiguous_none_simd_space_part_check_overflow_impl<base, char_type, T>(first, last, res);
}

constexpr ::std::uint_least64_t pow_base_sizeof_base_2{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 2)};
constexpr ::std::uint_least64_t pow_base_sizeof_base_4{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 4)};
constexpr ::std::uint_least64_t pow_base_sizeof_base_6{::fast_io::details::compile_time_pow<::std::uint_least64_t>(static_cast<::std::uint_least64_t>(base_char_type), 6)};

constexpr ::std::uint_least64_t mask{0x000000FF000000FF};
constexpr ::std::uint_least64_t mul1{pow_base_sizeof_base_2 + (pow_base_sizeof_base_6 << 32)};
constexpr ::std::uint_least64_t mul2{1 + (pow_base_sizeof_base_4 << 32)};

val -= 0x3030303030303030;
val -= zero_lower_bound;
val = (val * base_char_type) + (val >> 8);
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
res = static_cast<T>(res * pow_base_sizeof_u64 + val);
Expand Down Expand Up @@ -974,7 +983,7 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha

if (::std::uint_least32_t const cval{((val + first_bound) | (val - 0x30303030)) & 0x80808080}; cval) [[unlikely]]
{
auto const ctrz_cval{::std::countr_zero(cval)};
auto ctrz_cval{::std::countr_zero(cval)};
auto const valid_bits{ctrz_cval & -8};

if (!valid_bits) [[unlikely]]
Expand All @@ -997,12 +1006,11 @@ scan_int_contiguous_none_simd_space_part_define_impl(char_type const *first, cha
val -= 0x30303030;
val = (val * base_char_type) + (val >> 8);
val = (((val & mask) * pow_base_sizeof_base_2) + ((val >> 16) & mask));
ctrz_cval >>= shifter;

constexpr auto pow_table{generate_pow_table<base_char_type, ::std::uint_least32_t, 4>()};

res = static_cast<T>(res * pow_table[ctrz_cval / (8 * sizeof(char_type))] + val);
res = static_cast<T>(res * ::fast_io::details::pow_table_n<base_char_type, ::std::uint_least32_t, 4>.index_unchecked(ctrz_cval) + val);

first += ctrz_cval / (8 * sizeof(char_type));
first += ctrz_cval;

return scan_int_contiguous_none_simd_space_part_check_overflow_impl<base, char_type, T>(first, last, res);
}
Expand Down
17 changes: 10 additions & 7 deletions include/fast_io_core_impl/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ struct io_lock_guard

inline
#if __cpp_constexpr >= 201907L
constexpr
constexpr
#endif
~io_lock_guard() noexcept
{
Expand All @@ -75,7 +75,7 @@ struct io_flush_guard

inline
#if __cpp_constexpr >= 201907L
constexpr
constexpr
#endif
~io_flush_guard() noexcept
{
Expand Down Expand Up @@ -166,19 +166,19 @@ concept my_floating_point = ::std::floating_point<T>
|| ::std::same_as<::std::remove_cv_t<T>, __float128>
#endif
#ifdef __STDCPP_BFLOAT16_T__
|| ::std::same_as<::std::remove_cv_t<T>, decltype(0.0bf16)>
|| ::std::same_as<::std::remove_cv_t<T>, decltype(0.0bf16)>
#endif
#ifdef __STDCPP_FLOAT16_T__
|| ::std::same_as<::std::remove_cv_t<T>, _Float16>
|| ::std::same_as<::std::remove_cv_t<T>, _Float16>
#endif
#ifdef __STDCPP_FLOAT32_T__
|| ::std::same_as<::std::remove_cv_t<T>, _Float32>
|| ::std::same_as<::std::remove_cv_t<T>, _Float32>
#endif
#ifdef __STDCPP_FLOAT64_T__
|| ::std::same_as<::std::remove_cv_t<T>, _Float64>
|| ::std::same_as<::std::remove_cv_t<T>, _Float64>
#endif
#ifdef __STDCPP_FLOAT128_T__
|| ::std::same_as<::std::remove_cv_t<T>, _Float128>
|| ::std::same_as<::std::remove_cv_t<T>, _Float128>
#endif
;

Expand Down Expand Up @@ -903,6 +903,9 @@ inline constexpr ::std::size_t cal_max_int_size() noexcept
return i;
}

template <my_integral T, char8_t base>
inline constexpr auto max_int_size_result{cal_max_int_size<T, base>()};

// static_assert(cal_max_int_size<::std::uint_least64_t,10>()==20);
// static_assert(cal_max_int_size<::std::uint_least32_t,10>()==10);
template <typename char_type, ::std::size_t N>
Expand Down
19 changes: 12 additions & 7 deletions include/fast_io_hosted/process/process/win32.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@ inline win32_user_process_information win32_process_create_impl(void *__restrict
char16_t NtPath[64];
char16_t *RetStr{};
::std::size_t NtPathLen{};
for (char16_t i{65}; i < static_cast<char16_t>(26 + 65); i++)
constexpr char16_t bg{65};
constexpr char16_t ed{bg+26};
for (char16_t i{bg}; i != ed; ++i)
{
DosDevice[0] = i;
if (::fast_io::win32::QueryDosDeviceW(DosDevice, NtPath, 64))
Expand Down Expand Up @@ -278,7 +280,7 @@ inline win32_user_process_information win32_process_create_impl(void *__restrict
auto address_begin{pszFilename};

// change nt path to dos path
auto k32_module{::fast_io::win32::GetModuleHandleA(reinterpret_cast<char const *>(u8"Kernel32.dll"))};
auto k32_module{::fast_io::win32::GetModuleHandleA(reinterpret_cast<char const *>(u8"kernel32.dll"))};
if (k32_module)
{
using QueryDosDeviceA_t = ::std::uint_least32_t (*)(char const *, char *, ::std::uint_least32_t) noexcept;
Expand All @@ -289,14 +291,17 @@ inline win32_user_process_information win32_process_create_impl(void *__restrict
{
if (pszFilename[0] == u8'\\')
{
char DosDevice[4]{0, u8':', 0, 0};
char NtPath[64];
char8_t DosDevice[4]{0, u8':', 0, 0};
constexpr ::std::size_t ntpathsize{64};
char NtPath[ntpathsize];
char *RetStr{};
::std::size_t NtPathLen{};
for (char i{65}; i < static_cast<char>(26 + 65); i++)
constexpr char8_t bg{static_cast<char8_t>(ntpathsize)};
constexpr char8_t ed{bg+26};
for (char8_t i{bg}; i != ed; ++i)
{
DosDevice[0] = i;
if (QueryDosDeviceA_p(DosDevice, NtPath, 64))
*DosDevice = i;
if (QueryDosDeviceA_p(reinterpret_cast<char const*>(DosDevice), NtPath, ntpathsize))
{
NtPathLen = ::fast_io::cstr_len(NtPath);

Expand Down

0 comments on commit 291ef59

Please sign in to comment.