forked from biojppm/c4core
-
Notifications
You must be signed in to change notification settings - Fork 0
/
charconv.hpp
2670 lines (2445 loc) · 97.1 KB
/
charconv.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#ifndef _C4_CHARCONV_HPP_
#define _C4_CHARCONV_HPP_
/** @file charconv.hpp Lightweight generic type-safe wrappers for
* converting individual values to/from strings.
*/
#include "c4/language.hpp"
#include <inttypes.h>
#include <type_traits>
#include <climits>
#include <limits>
#include <utility>
#include "c4/config.hpp"
#include "c4/substr.hpp"
#include "c4/std/std_fwd.hpp"
#include "c4/memory_util.hpp"
#include "c4/szconv.hpp"
#ifndef C4CORE_NO_FAST_FLOAT
# if (C4_CPP >= 17)
# if defined(_MSC_VER)
# if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019) // VS2017 and lower do not have these macros
# include <charconv>
# define C4CORE_HAVE_STD_TOCHARS 1
# define C4CORE_HAVE_STD_FROMCHARS 0 // prefer fast_float with MSVC
# define C4CORE_HAVE_FAST_FLOAT 1
# else
# define C4CORE_HAVE_STD_TOCHARS 0
# define C4CORE_HAVE_STD_FROMCHARS 0
# define C4CORE_HAVE_FAST_FLOAT 1
# endif
# else
# if __has_include(<charconv>)
# include <charconv>
# if defined(__cpp_lib_to_chars)
# define C4CORE_HAVE_STD_TOCHARS 1
# define C4CORE_HAVE_STD_FROMCHARS 0 // glibc uses fast_float internally
# define C4CORE_HAVE_FAST_FLOAT 1
# else
# define C4CORE_HAVE_STD_TOCHARS 0
# define C4CORE_HAVE_STD_FROMCHARS 0
# define C4CORE_HAVE_FAST_FLOAT 1
# endif
# else
# define C4CORE_HAVE_STD_TOCHARS 0
# define C4CORE_HAVE_STD_FROMCHARS 0
# define C4CORE_HAVE_FAST_FLOAT 1
# endif
# endif
# else
# define C4CORE_HAVE_STD_TOCHARS 0
# define C4CORE_HAVE_STD_FROMCHARS 0
# define C4CORE_HAVE_FAST_FLOAT 1
# endif
# if C4CORE_HAVE_FAST_FLOAT
C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wsign-conversion")
C4_SUPPRESS_WARNING_GCC("-Warray-bounds")
# if defined(__GNUC__) && __GNUC__ >= 5
C4_SUPPRESS_WARNING_GCC("-Wshift-count-overflow")
# endif
# include "c4/ext/fast_float.hpp"
C4_SUPPRESS_WARNING_GCC_POP
# endif
#elif (C4_CPP >= 17)
# define C4CORE_HAVE_FAST_FLOAT 0
# if defined(_MSC_VER)
# if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019) // VS2017 and lower do not have these macros
# include <charconv>
# define C4CORE_HAVE_STD_TOCHARS 1
# define C4CORE_HAVE_STD_FROMCHARS 1
# else
# define C4CORE_HAVE_STD_TOCHARS 0
# define C4CORE_HAVE_STD_FROMCHARS 0
# endif
# else
# if __has_include(<charconv>)
# include <charconv>
# if defined(__cpp_lib_to_chars)
# define C4CORE_HAVE_STD_TOCHARS 1
# define C4CORE_HAVE_STD_FROMCHARS 1 // glibc uses fast_float internally
# else
# define C4CORE_HAVE_STD_TOCHARS 0
# define C4CORE_HAVE_STD_FROMCHARS 0
# endif
# else
# define C4CORE_HAVE_STD_TOCHARS 0
# define C4CORE_HAVE_STD_FROMCHARS 0
# endif
# endif
#else
# define C4CORE_HAVE_STD_TOCHARS 0
# define C4CORE_HAVE_STD_FROMCHARS 0
# define C4CORE_HAVE_FAST_FLOAT 0
#endif
#if !C4CORE_HAVE_STD_FROMCHARS
#include <cstdio>
#endif
#if defined(_MSC_VER)
# pragma warning(push)
# pragma warning(disable: 4996) // snprintf/scanf: this function or variable may be unsafe
# if C4_MSVC_VERSION != C4_MSVC_VERSION_2017
# pragma warning(disable: 4800) //'int': forcing value to bool 'true' or 'false' (performance warning)
# endif
#endif
#if defined(__clang__)
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wtautological-constant-out-of-range-compare"
# pragma clang diagnostic ignored "-Wformat-nonliteral"
# pragma clang diagnostic ignored "-Wdouble-promotion" // implicit conversion increases floating-point precision
# pragma clang diagnostic ignored "-Wold-style-cast"
#elif defined(__GNUC__)
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wformat-nonliteral"
# pragma GCC diagnostic ignored "-Wdouble-promotion" // implicit conversion increases floating-point precision
# pragma GCC diagnostic ignored "-Wuseless-cast"
# pragma GCC diagnostic ignored "-Wold-style-cast"
#endif
#if defined(__clang__)
#define C4_NO_UBSAN_IOVRFLW __attribute__((no_sanitize("signed-integer-overflow")))
#elif defined(__GNUC__)
#if __GNUC__ > 7
#define C4_NO_UBSAN_IOVRFLW __attribute__((no_sanitize("signed-integer-overflow")))
#else
#define C4_NO_UBSAN_IOVRFLW
#endif
#else
#define C4_NO_UBSAN_IOVRFLW
#endif
namespace c4 {
/** @defgroup doc_charconv Charconv utilities
*
* Lightweight, very fast generic type-safe wrappers for converting
* individual values to/from strings. These are the main generic
* functions:
* - @ref doc_to_chars and its alias @ref doc_xtoa: implemented by calling @ref itoa()/@ref utoa()/@ref ftoa()/@ref dtoa() (or generically @ref xtoa())
* - @ref doc_from_chars and its alias @ref doc_atox: implemented by calling @ref atoi()/@ref atou()/@ref atof()/@ref atod() (or generically @ref atox())
* - @ref to_chars_sub()
* - @ref from_chars_first()
* - @ref xtoa()/@ref atox() are implemented in terms @ref write_dec()/@ref read_dec() et al (see @ref doc_write/@ref doc_read())
*
* And also some modest brag is in order: these functions are really
* fast: faster even than C++17 `std::to_chars()` and
* `std::to_chars()`, and many dozens of times faster than the
* iostream abominations.
*
* For example, here are some benchmark comparisons for @ref
* doc_from_chars (link leads to the main project README, where these
* results are shown more systematically).
*
* <table>
* <caption id="atox-i64-results">atox,int64_t</caption>
* <tr><th>g++12, linux <th>Visual Studio 2019
* <tr><td> \image html linux-x86_64-gxx12.1-Release-c4core-bm-charconv-atox-mega_bytes_per_second-i64.png <td> \image html windows-x86_64-vs2019-Release-c4core-bm-charconv-atox-mega_bytes_per_second-i64.png
* </table>
*
* <table>
* <caption id="xtoa-i64-results">xtoa,int64_t</caption>
* <tr><th>g++12, linux <th>Visual Studio 2019
* <tr><td> \image html linux-x86_64-gxx12.1-Release-c4core-bm-charconv-xtoa-mega_bytes_per_second-i64.png <td> \image html windows-x86_64-vs2019-Release-c4core-bm-charconv-xtoa-mega_bytes_per_second-i64.png
* </table>
*
* To parse floating point, c4core uses
* [fastfloat](https://github.com/fastfloat/fast_float), which is
* extremely fast, by an even larger factor:
*
* <table>
* <caption id="atox-float-results">atox,float</caption>
* <tr><th>g++12, linux <th>Visual Studio 2019
* <tr><td> \image html linux-x86_64-gxx12.1-Release-c4core-bm-charconv-atof-mega_bytes_per_second-float.png <td> \image html windows-x86_64-vs2019-Release-c4core-bm-charconv-atof-mega_bytes_per_second-float.png
* </table>
*
* @{
*/
#if C4CORE_HAVE_STD_TOCHARS
/** @warning Use only the symbol. Do not rely on the type or naked value of this enum. */
typedef enum : std::underlying_type<std::chars_format>::type {
/** print the real number in floating point format (like %f) */
FTOA_FLOAT = static_cast<std::underlying_type<std::chars_format>::type>(std::chars_format::fixed),
/** print the real number in scientific format (like %e) */
FTOA_SCIENT = static_cast<std::underlying_type<std::chars_format>::type>(std::chars_format::scientific),
/** print the real number in flexible format (like %g) */
FTOA_FLEX = static_cast<std::underlying_type<std::chars_format>::type>(std::chars_format::general),
/** print the real number in hexadecimal format (like %a) */
FTOA_HEXA = static_cast<std::underlying_type<std::chars_format>::type>(std::chars_format::hex),
} RealFormat_e;
#else
/** @warning Use only the symbol. Do not rely on the type or naked value of this enum. */
typedef enum : char {
/** print the real number in floating point format (like %f) */
FTOA_FLOAT = 'f',
/** print the real number in scientific format (like %e) */
FTOA_SCIENT = 'e',
/** print the real number in flexible format (like %g) */
FTOA_FLEX = 'g',
/** print the real number in hexadecimal format (like %a) */
FTOA_HEXA = 'a',
} RealFormat_e;
#endif
/** @cond dev */
/** in some platforms, int,unsigned int
* are not any of int8_t...int64_t and
* long,unsigned long are not any of uint8_t...uint64_t */
template<class T>
struct is_fixed_length
{
enum : bool {
/** true if T is one of the fixed length signed types */
value_i = (std::is_integral<T>::value
&& (std::is_same<T, int8_t>::value
|| std::is_same<T, int16_t>::value
|| std::is_same<T, int32_t>::value
|| std::is_same<T, int64_t>::value)),
/** true if T is one of the fixed length unsigned types */
value_u = (std::is_integral<T>::value
&& (std::is_same<T, uint8_t>::value
|| std::is_same<T, uint16_t>::value
|| std::is_same<T, uint32_t>::value
|| std::is_same<T, uint64_t>::value)),
/** true if T is one of the fixed length signed or unsigned types */
value = value_i || value_u
};
};
/** @endcond */
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
#ifdef _MSC_VER
# pragma warning(push)
#elif defined(__clang__)
# pragma clang diagnostic push
#elif defined(__GNUC__)
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wconversion"
# if __GNUC__ >= 6
# pragma GCC diagnostic ignored "-Wnull-dereference"
# endif
#endif
/** @cond dev */
namespace detail {
/* python command to get the values below:
def dec(v):
return str(v)
for bits in (8, 16, 32, 64):
imin, imax, umax = (-(1 << (bits - 1))), (1 << (bits - 1)) - 1, (1 << bits) - 1
for vname, v in (("imin", imin), ("imax", imax), ("umax", umax)):
for f in (bin, oct, dec, hex):
print(f"{bits}b: {vname}={v} {f.__name__}: len={len(f(v)):2d}: {v} {f(v)}")
*/
// do not use the type as the template argument because in some
// platforms long!=int32 and long!=int64. Just use the numbytes
// which is more generic and spares lengthy SFINAE code.
template<size_t num_bytes, bool is_signed> struct charconv_digits_;
template<class T> using charconv_digits = charconv_digits_<sizeof(T), std::is_signed<T>::value>;
template<> struct charconv_digits_<1u, true> // int8_t
{
enum : size_t {
maxdigits_bin = 1 + 2 + 8, // -128==-0b10000000
maxdigits_oct = 1 + 2 + 3, // -128==-0o200
maxdigits_dec = 1 + 3, // -128
maxdigits_hex = 1 + 2 + 2, // -128==-0x80
maxdigits_bin_nopfx = 8, // -128==-0b10000000
maxdigits_oct_nopfx = 3, // -128==-0o200
maxdigits_dec_nopfx = 3, // -128
maxdigits_hex_nopfx = 2, // -128==-0x80
};
// min values without sign!
static constexpr csubstr min_value_dec() noexcept { return csubstr("128"); }
static constexpr csubstr min_value_hex() noexcept { return csubstr("80"); }
static constexpr csubstr min_value_oct() noexcept { return csubstr("200"); }
static constexpr csubstr min_value_bin() noexcept { return csubstr("10000000"); }
static constexpr csubstr max_value_dec() noexcept { return csubstr("127"); }
static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 3) || (str.len == 3 && str[0] <= '1')); }
};
template<> struct charconv_digits_<1u, false> // uint8_t
{
enum : size_t {
maxdigits_bin = 2 + 8, // 255 0b11111111
maxdigits_oct = 2 + 3, // 255 0o377
maxdigits_dec = 3, // 255
maxdigits_hex = 2 + 2, // 255 0xff
maxdigits_bin_nopfx = 8, // 255 0b11111111
maxdigits_oct_nopfx = 3, // 255 0o377
maxdigits_dec_nopfx = 3, // 255
maxdigits_hex_nopfx = 2, // 255 0xff
};
static constexpr csubstr max_value_dec() noexcept { return csubstr("255"); }
static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 3) || (str.len == 3 && str[0] <= '3')); }
};
template<> struct charconv_digits_<2u, true> // int16_t
{
enum : size_t {
maxdigits_bin = 1 + 2 + 16, // -32768 -0b1000000000000000
maxdigits_oct = 1 + 2 + 6, // -32768 -0o100000
maxdigits_dec = 1 + 5, // -32768 -32768
maxdigits_hex = 1 + 2 + 4, // -32768 -0x8000
maxdigits_bin_nopfx = 16, // -32768 -0b1000000000000000
maxdigits_oct_nopfx = 6, // -32768 -0o100000
maxdigits_dec_nopfx = 5, // -32768 -32768
maxdigits_hex_nopfx = 4, // -32768 -0x8000
};
// min values without sign!
static constexpr csubstr min_value_dec() noexcept { return csubstr("32768"); }
static constexpr csubstr min_value_hex() noexcept { return csubstr("8000"); }
static constexpr csubstr min_value_oct() noexcept { return csubstr("100000"); }
static constexpr csubstr min_value_bin() noexcept { return csubstr("1000000000000000"); }
static constexpr csubstr max_value_dec() noexcept { return csubstr("32767"); }
static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 6)); }
};
template<> struct charconv_digits_<2u, false> // uint16_t
{
enum : size_t {
maxdigits_bin = 2 + 16, // 65535 0b1111111111111111
maxdigits_oct = 2 + 6, // 65535 0o177777
maxdigits_dec = 6, // 65535 65535
maxdigits_hex = 2 + 4, // 65535 0xffff
maxdigits_bin_nopfx = 16, // 65535 0b1111111111111111
maxdigits_oct_nopfx = 6, // 65535 0o177777
maxdigits_dec_nopfx = 6, // 65535 65535
maxdigits_hex_nopfx = 4, // 65535 0xffff
};
static constexpr csubstr max_value_dec() noexcept { return csubstr("65535"); }
static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 6) || (str.len == 6 && str[0] <= '1')); }
};
template<> struct charconv_digits_<4u, true> // int32_t
{
enum : size_t {
maxdigits_bin = 1 + 2 + 32, // len=35: -2147483648 -0b10000000000000000000000000000000
maxdigits_oct = 1 + 2 + 11, // len=14: -2147483648 -0o20000000000
maxdigits_dec = 1 + 10, // len=11: -2147483648 -2147483648
maxdigits_hex = 1 + 2 + 8, // len=11: -2147483648 -0x80000000
maxdigits_bin_nopfx = 32, // len=35: -2147483648 -0b10000000000000000000000000000000
maxdigits_oct_nopfx = 11, // len=14: -2147483648 -0o20000000000
maxdigits_dec_nopfx = 10, // len=11: -2147483648 -2147483648
maxdigits_hex_nopfx = 8, // len=11: -2147483648 -0x80000000
};
// min values without sign!
static constexpr csubstr min_value_dec() noexcept { return csubstr("2147483648"); }
static constexpr csubstr min_value_hex() noexcept { return csubstr("80000000"); }
static constexpr csubstr min_value_oct() noexcept { return csubstr("20000000000"); }
static constexpr csubstr min_value_bin() noexcept { return csubstr("10000000000000000000000000000000"); }
static constexpr csubstr max_value_dec() noexcept { return csubstr("2147483647"); }
static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 11) || (str.len == 11 && str[0] <= '1')); }
};
template<> struct charconv_digits_<4u, false> // uint32_t
{
enum : size_t {
maxdigits_bin = 2 + 32, // len=34: 4294967295 0b11111111111111111111111111111111
maxdigits_oct = 2 + 11, // len=13: 4294967295 0o37777777777
maxdigits_dec = 10, // len=10: 4294967295 4294967295
maxdigits_hex = 2 + 8, // len=10: 4294967295 0xffffffff
maxdigits_bin_nopfx = 32, // len=34: 4294967295 0b11111111111111111111111111111111
maxdigits_oct_nopfx = 11, // len=13: 4294967295 0o37777777777
maxdigits_dec_nopfx = 10, // len=10: 4294967295 4294967295
maxdigits_hex_nopfx = 8, // len=10: 4294967295 0xffffffff
};
static constexpr csubstr max_value_dec() noexcept { return csubstr("4294967295"); }
static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 11) || (str.len == 11 && str[0] <= '3')); }
};
template<> struct charconv_digits_<8u, true> // int32_t
{
enum : size_t {
maxdigits_bin = 1 + 2 + 64, // len=67: -9223372036854775808 -0b1000000000000000000000000000000000000000000000000000000000000000
maxdigits_oct = 1 + 2 + 22, // len=25: -9223372036854775808 -0o1000000000000000000000
maxdigits_dec = 1 + 19, // len=20: -9223372036854775808 -9223372036854775808
maxdigits_hex = 1 + 2 + 16, // len=19: -9223372036854775808 -0x8000000000000000
maxdigits_bin_nopfx = 64, // len=67: -9223372036854775808 -0b1000000000000000000000000000000000000000000000000000000000000000
maxdigits_oct_nopfx = 22, // len=25: -9223372036854775808 -0o1000000000000000000000
maxdigits_dec_nopfx = 19, // len=20: -9223372036854775808 -9223372036854775808
maxdigits_hex_nopfx = 16, // len=19: -9223372036854775808 -0x8000000000000000
};
static constexpr csubstr min_value_dec() noexcept { return csubstr("9223372036854775808"); }
static constexpr csubstr min_value_hex() noexcept { return csubstr("8000000000000000"); }
static constexpr csubstr min_value_oct() noexcept { return csubstr("1000000000000000000000"); }
static constexpr csubstr min_value_bin() noexcept { return csubstr("1000000000000000000000000000000000000000000000000000000000000000"); }
static constexpr csubstr max_value_dec() noexcept { return csubstr("9223372036854775807"); }
static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 22)); }
};
template<> struct charconv_digits_<8u, false>
{
enum : size_t {
maxdigits_bin = 2 + 64, // len=66: 18446744073709551615 0b1111111111111111111111111111111111111111111111111111111111111111
maxdigits_oct = 2 + 22, // len=24: 18446744073709551615 0o1777777777777777777777
maxdigits_dec = 20, // len=20: 18446744073709551615 18446744073709551615
maxdigits_hex = 2 + 16, // len=18: 18446744073709551615 0xffffffffffffffff
maxdigits_bin_nopfx = 64, // len=66: 18446744073709551615 0b1111111111111111111111111111111111111111111111111111111111111111
maxdigits_oct_nopfx = 22, // len=24: 18446744073709551615 0o1777777777777777777777
maxdigits_dec_nopfx = 20, // len=20: 18446744073709551615 18446744073709551615
maxdigits_hex_nopfx = 16, // len=18: 18446744073709551615 0xffffffffffffffff
};
static constexpr csubstr max_value_dec() noexcept { return csubstr("18446744073709551615"); }
static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 22) || (str.len == 22 && str[0] <= '1')); }
};
} // namespace detail
// Helper macros, undefined below
#define _c4append(c) { if(C4_LIKELY(pos < buf.len)) { buf.str[pos++] = static_cast<char>(c); } else { ++pos; } }
#define _c4appendhex(i) { if(C4_LIKELY(pos < buf.len)) { buf.str[pos++] = hexchars[i]; } else { ++pos; } }
/** @endcond */
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
/** @defgroup doc_digits Get number of digits
*
* @note At first sight this code may look heavily branchy and
* therefore inefficient. However, measurements revealed this to be
* the fastest among the alternatives.
*
* @see https://github.com/biojppm/c4core/pull/77
*
* @{
*/
/** decimal digits for 8 bit integers */
template<class T>
C4_CONSTEXPR14 C4_ALWAYS_INLINE
auto digits_dec(T v) noexcept
-> typename std::enable_if<sizeof(T) == 1u, unsigned>::type
{
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v >= 0);
return ((v >= 100) ? 3u : ((v >= 10) ? 2u : 1u));
}
/** decimal digits for 16 bit integers */
template<class T>
C4_CONSTEXPR14 C4_ALWAYS_INLINE
auto digits_dec(T v) noexcept
-> typename std::enable_if<sizeof(T) == 2u, unsigned>::type
{
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v >= 0);
return ((v >= 10000) ? 5u : (v >= 1000) ? 4u : (v >= 100) ? 3u : (v >= 10) ? 2u : 1u);
}
/** decimal digits for 32 bit integers */
template<class T>
C4_CONSTEXPR14 C4_ALWAYS_INLINE
auto digits_dec(T v) noexcept
-> typename std::enable_if<sizeof(T) == 4u, unsigned>::type
{
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v >= 0);
return ((v >= 1000000000) ? 10u : (v >= 100000000) ? 9u : (v >= 10000000) ? 8u :
(v >= 1000000) ? 7u : (v >= 100000) ? 6u : (v >= 10000) ? 5u :
(v >= 1000) ? 4u : (v >= 100) ? 3u : (v >= 10) ? 2u : 1u);
}
/** decimal digits for 64 bit integers */
template<class T>
C4_CONSTEXPR14 C4_ALWAYS_INLINE
auto digits_dec(T v) noexcept
-> typename std::enable_if<sizeof(T) == 8u, unsigned>::type
{
// thanks @fargies!!!
// https://github.com/biojppm/c4core/pull/77#issuecomment-1063753568
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v >= 0);
if(v >= 1000000000) // 10
{
if(v >= 100000000000000) // 15 [15-20] range
{
if(v >= 100000000000000000) // 18 (15 + (20 - 15) / 2)
{
if((typename std::make_unsigned<T>::type)v >= 10000000000000000000u) // 20
return 20u;
else
return (v >= 1000000000000000000) ? 19u : 18u;
}
else if(v >= 10000000000000000) // 17
return 17u;
else
return(v >= 1000000000000000) ? 16u : 15u;
}
else if(v >= 1000000000000) // 13
return (v >= 10000000000000) ? 14u : 13u;
else if(v >= 100000000000) // 12
return 12;
else
return(v >= 10000000000) ? 11u : 10u;
}
else if(v >= 10000) // 5 [5-9] range
{
if(v >= 10000000) // 8
return (v >= 100000000) ? 9u : 8u;
else if(v >= 1000000) // 7
return 7;
else
return (v >= 100000) ? 6u : 5u;
}
else if(v >= 100)
return (v >= 1000) ? 4u : 3u;
else
return (v >= 10) ? 2u : 1u;
}
/** return the number of digits required to encode an hexadecimal number. */
template<class T>
C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_hex(T v) noexcept
{
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v >= 0);
return v ? 1u + (msb((typename std::make_unsigned<T>::type)v) >> 2u) : 1u;
}
/** return the number of digits required to encode a binary number. */
template<class T>
C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_bin(T v) noexcept
{
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v >= 0);
return v ? 1u + msb((typename std::make_unsigned<T>::type)v) : 1u;
}
/** return the number of digits required to encode an octal number. */
template<class T>
C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_oct(T v_) noexcept
{
// TODO: is there a better way?
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v_ >= 0);
using U = typename
std::conditional<sizeof(T) <= sizeof(unsigned),
unsigned,
typename std::make_unsigned<T>::type>::type;
U v = (U) v_; // safe because we require v_ >= 0
unsigned __n = 1;
const unsigned __b2 = 64u;
const unsigned __b3 = __b2 * 8u;
const unsigned long __b4 = __b3 * 8u;
while(true)
{
if(v < 8u)
return __n;
if(v < __b2)
return __n + 1;
if(v < __b3)
return __n + 2;
if(v < __b4)
return __n + 3;
v /= (U) __b4;
__n += 4;
}
}
/** @} */
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
/** @cond dev */
namespace detail {
C4_INLINE_CONSTEXPR const char hexchars[] = "0123456789abcdef";
C4_INLINE_CONSTEXPR const char digits0099[] =
"0001020304050607080910111213141516171819"
"2021222324252627282930313233343536373839"
"4041424344454647484950515253545556575859"
"6061626364656667686970717273747576777879"
"8081828384858687888990919293949596979899";
} // namespace detail
/** @endcond */
C4_SUPPRESS_WARNING_GCC_PUSH
C4_SUPPRESS_WARNING_GCC("-Warray-bounds") // gcc has false positives here
#if (defined(__GNUC__) && (__GNUC__ >= 7))
C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow") // gcc has false positives here
#endif
/** @defgroup doc_write_unchecked Write with known number of digits
*
* Writes a value without checking the buffer length with regards to
* the required number of digits to encode the value. It is the
* responsibility of the caller to ensure that the provided number of
* digits is enough to write the given value. Notwithstanding the
* name, assertions are liberally performed, so this code is safe.
*
* @{ */
template<class T>
C4_HOT C4_ALWAYS_INLINE
void write_dec_unchecked(substr buf, T v, unsigned digits_v) noexcept
{
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v >= 0);
C4_ASSERT(buf.len >= digits_v);
C4_XASSERT(digits_v == digits_dec(v));
// in bm_xtoa: checkoncelog_singlediv_write2
while(v >= T(100))
{
T quo = v;
quo /= T(100);
const auto num = (v - quo * T(100)) << 1u;
v = quo;
buf.str[--digits_v] = detail::digits0099[num + 1];
buf.str[--digits_v] = detail::digits0099[num];
}
if(v >= T(10))
{
C4_ASSERT(digits_v == 2);
const auto num = v << 1u;
buf.str[1] = detail::digits0099[num + 1];
buf.str[0] = detail::digits0099[num];
}
else
{
C4_ASSERT(digits_v == 1);
buf.str[0] = (char)('0' + v);
}
}
template<class T>
C4_HOT C4_ALWAYS_INLINE
void write_hex_unchecked(substr buf, T v, unsigned digits_v) noexcept
{
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v >= 0);
C4_ASSERT(buf.len >= digits_v);
C4_XASSERT(digits_v == digits_hex(v));
do {
buf.str[--digits_v] = detail::hexchars[v & T(15)];
v >>= 4;
} while(v);
C4_ASSERT(digits_v == 0);
}
template<class T>
C4_HOT C4_ALWAYS_INLINE
void write_oct_unchecked(substr buf, T v, unsigned digits_v) noexcept
{
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v >= 0);
C4_ASSERT(buf.len >= digits_v);
C4_XASSERT(digits_v == digits_oct(v));
do {
buf.str[--digits_v] = (char)('0' + (v & T(7)));
v >>= 3;
} while(v);
C4_ASSERT(digits_v == 0);
}
template<class T>
C4_HOT C4_ALWAYS_INLINE
void write_bin_unchecked(substr buf, T v, unsigned digits_v) noexcept
{
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v >= 0);
C4_ASSERT(buf.len >= digits_v);
C4_XASSERT(digits_v == digits_bin(v));
do {
buf.str[--digits_v] = (char)('0' + (v & T(1)));
v >>= 1;
} while(v);
C4_ASSERT(digits_v == 0);
}
/** @} */ // write_unchecked
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
/** @defgroup doc_write Write a value
*
* Writes a value without checking the buffer length
* decimal number -- but asserting.
*
* @{ */
/** write an integer to a string in decimal format. This is the
* lowest level (and the fastest) function to do this task.
* @note does not accept negative numbers
* @note the resulting string is NOT zero-terminated.
* @note it is ok to call this with an empty or too-small buffer;
* no writes will occur, and the required size will be returned
* @return the number of characters required for the buffer. */
template<class T>
C4_ALWAYS_INLINE size_t write_dec(substr buf, T v) noexcept
{
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v >= 0);
unsigned digits = digits_dec(v);
if(C4_LIKELY(buf.len >= digits))
write_dec_unchecked(buf, v, digits);
return digits;
}
/** write an integer to a string in hexadecimal format. This is the
* lowest level (and the fastest) function to do this task.
* @note does not accept negative numbers
* @note does not prefix with 0x
* @note the resulting string is NOT zero-terminated.
* @note it is ok to call this with an empty or too-small buffer;
* no writes will occur, and the required size will be returned
* @return the number of characters required for the buffer. */
template<class T>
C4_ALWAYS_INLINE size_t write_hex(substr buf, T v) noexcept
{
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v >= 0);
unsigned digits = digits_hex(v);
if(C4_LIKELY(buf.len >= digits))
write_hex_unchecked(buf, v, digits);
return digits;
}
/** write an integer to a string in octal format. This is the
* lowest level (and the fastest) function to do this task.
* @note does not accept negative numbers
* @note does not prefix with 0o
* @note the resulting string is NOT zero-terminated.
* @note it is ok to call this with an empty or too-small buffer;
* no writes will occur, and the required size will be returned
* @return the number of characters required for the buffer. */
template<class T>
C4_ALWAYS_INLINE size_t write_oct(substr buf, T v) noexcept
{
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v >= 0);
unsigned digits = digits_oct(v);
if(C4_LIKELY(buf.len >= digits))
write_oct_unchecked(buf, v, digits);
return digits;
}
/** write an integer to a string in binary format. This is the
* lowest level (and the fastest) function to do this task.
* @note does not accept negative numbers
* @note does not prefix with 0b
* @note the resulting string is NOT zero-terminated.
* @note it is ok to call this with an empty or too-small buffer;
* no writes will occur, and the required size will be returned
* @return the number of characters required for the buffer. */
template<class T>
C4_ALWAYS_INLINE size_t write_bin(substr buf, T v) noexcept
{
C4_STATIC_ASSERT(std::is_integral<T>::value);
C4_ASSERT(v >= 0);
unsigned digits = digits_bin(v);
C4_ASSERT(digits > 0);
if(C4_LIKELY(buf.len >= digits))
write_bin_unchecked(buf, v, digits);
return digits;
}
/** @cond dev */
namespace detail {
template<class U> using NumberWriter = size_t (*)(substr, U);
template<class T, NumberWriter<T> writer>
size_t write_num_digits(substr buf, T v, size_t num_digits) noexcept
{
C4_STATIC_ASSERT(std::is_integral<T>::value);
size_t ret = writer(buf, v);
if(ret >= num_digits)
return ret;
else if(ret >= buf.len || num_digits > buf.len)
return num_digits;
C4_ASSERT(num_digits >= ret);
size_t delta = static_cast<size_t>(num_digits - ret);
memmove(buf.str + delta, buf.str, ret);
memset(buf.str, '0', delta);
return num_digits;
}
} // namespace detail
/** @endcond */
/** same as c4::write_dec(), but pad with zeroes on the left
* such that the resulting string is @p num_digits wide.
* If the given number is requires more than num_digits, then the number prevails. */
template<class T>
C4_ALWAYS_INLINE size_t write_dec(substr buf, T val, size_t num_digits) noexcept
{
return detail::write_num_digits<T, &write_dec<T>>(buf, val, num_digits);
}
/** same as c4::write_hex(), but pad with zeroes on the left
* such that the resulting string is @p num_digits wide.
* If the given number is requires more than num_digits, then the number prevails. */
template<class T>
C4_ALWAYS_INLINE size_t write_hex(substr buf, T val, size_t num_digits) noexcept
{
return detail::write_num_digits<T, &write_hex<T>>(buf, val, num_digits);
}
/** same as c4::write_bin(), but pad with zeroes on the left
* such that the resulting string is @p num_digits wide.
* If the given number is requires more than num_digits, then the number prevails. */
template<class T>
C4_ALWAYS_INLINE size_t write_bin(substr buf, T val, size_t num_digits) noexcept
{
return detail::write_num_digits<T, &write_bin<T>>(buf, val, num_digits);
}
/** same as c4::write_oct(), but pad with zeroes on the left
* such that the resulting string is @p num_digits wide.
* If the given number is requires more than num_digits, then the number prevails. */
template<class T>
C4_ALWAYS_INLINE size_t write_oct(substr buf, T val, size_t num_digits) noexcept
{
return detail::write_num_digits<T, &write_oct<T>>(buf, val, num_digits);
}
/** @} */ // write
C4_SUPPRESS_WARNING_GCC_POP
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
C4_SUPPRESS_WARNING_MSVC_PUSH
C4_SUPPRESS_WARNING_MSVC(4365) // '=': conversion from 'int' to 'I', signed/unsigned mismatch
/** @defgroup doc_read Read a value
*
* @{ */
/** read a decimal integer from a string. This is the
* lowest level (and the fastest) function to do this task.
* @note does not accept negative numbers
* @note The string must be trimmed. Whitespace is not accepted.
* @note the string must not be empty
* @note there is no check for overflow; the value wraps around
* in a way similar to the standard C/C++ overflow behavior.
* For example, `read_dec<int8_t>("128", &val)` returns true
* and val will be set to 0 because 127 is the max i8 value.
* @see overflows<T>() to find out if a number string overflows a type range
* @return true if the conversion was successful (no overflow check) */
template<class I>
C4_NO_UBSAN_IOVRFLW
C4_ALWAYS_INLINE bool read_dec(csubstr s, I *C4_RESTRICT v) noexcept
{
C4_STATIC_ASSERT(std::is_integral<I>::value);
C4_ASSERT(!s.empty());
*v = 0;
for(char c : s)
{
if(C4_UNLIKELY(c < '0' || c > '9'))
return false;
*v = (*v) * I(10) + (I(c) - I('0'));
}
return true;
}
/** read an hexadecimal integer from a string. This is the
* lowest level (and the fastest) function to do this task.
* @note does not accept negative numbers
* @note does not accept leading 0x or 0X
* @note the string must not be empty
* @note the string must be trimmed. Whitespace is not accepted.
* @note there is no check for overflow; the value wraps around
* in a way similar to the standard C/C++ overflow behavior.
* For example, `read_hex<int8_t>("80", &val)` returns true
* and val will be set to 0 because 7f is the max i8 value.
* @see overflows<T>() to find out if a number string overflows a type range
* @return true if the conversion was successful (no overflow check) */
template<class I>
C4_NO_UBSAN_IOVRFLW
C4_ALWAYS_INLINE bool read_hex(csubstr s, I *C4_RESTRICT v) noexcept
{
C4_STATIC_ASSERT(std::is_integral<I>::value);
C4_ASSERT(!s.empty());
*v = 0;
for(char c : s)
{
I cv;
if(c >= '0' && c <= '9')
cv = I(c) - I('0');
else if(c >= 'a' && c <= 'f')
cv = I(10) + (I(c) - I('a'));
else if(c >= 'A' && c <= 'F')
cv = I(10) + (I(c) - I('A'));
else
return false;
*v = (*v) * I(16) + cv;
}
return true;
}
/** read a binary integer from a string. This is the
* lowest level (and the fastest) function to do this task.
* @note does not accept negative numbers
* @note does not accept leading 0b or 0B
* @note the string must not be empty
* @note the string must be trimmed. Whitespace is not accepted.
* @note there is no check for overflow; the value wraps around
* in a way similar to the standard C/C++ overflow behavior.
* For example, `read_bin<int8_t>("10000000", &val)` returns true
* and val will be set to 0 because 1111111 is the max i8 value.
* @see overflows<T>() to find out if a number string overflows a type range
* @return true if the conversion was successful (no overflow check) */
template<class I>
C4_NO_UBSAN_IOVRFLW
C4_ALWAYS_INLINE bool read_bin(csubstr s, I *C4_RESTRICT v) noexcept
{
C4_STATIC_ASSERT(std::is_integral<I>::value);
C4_ASSERT(!s.empty());
*v = 0;
for(char c : s)
{
*v <<= 1;
if(c == '1')
*v |= 1;
else if(c != '0')
return false;
}
return true;
}
/** read an octal integer from a string. This is the
* lowest level (and the fastest) function to do this task.
* @note does not accept negative numbers
* @note does not accept leading 0o or 0O
* @note the string must not be empty
* @note the string must be trimmed. Whitespace is not accepted.
* @note there is no check for overflow; the value wraps around
* in a way similar to the standard C/C++ overflow behavior.
* For example, `read_oct<int8_t>("200", &val)` returns true
* and val will be set to 0 because 177 is the max i8 value.
* @see overflows<T>() to find out if a number string overflows a type range
* @return true if the conversion was successful (no overflow check) */
template<class I>
C4_NO_UBSAN_IOVRFLW
C4_ALWAYS_INLINE bool read_oct(csubstr s, I *C4_RESTRICT v) noexcept
{
C4_STATIC_ASSERT(std::is_integral<I>::value);
C4_ASSERT(!s.empty());
*v = 0;
for(char c : s)
{
if(C4_UNLIKELY(c < '0' || c > '7'))
return false;
*v = (*v) * I(8) + (I(c) - I('0'));
}
return true;
}
/** @} */
C4_SUPPRESS_WARNING_MSVC_POP
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wswitch-default")
/** @cond dev */
namespace detail {
inline size_t _itoa2buf(substr buf, size_t pos, csubstr val) noexcept
{
C4_ASSERT(pos + val.len <= buf.len);
memcpy(buf.str + pos, val.str, val.len);
return pos + val.len;
}
inline size_t _itoa2bufwithdigits(substr buf, size_t pos, size_t num_digits, csubstr val) noexcept
{
num_digits = num_digits > val.len ? num_digits - val.len : 0;
C4_ASSERT(num_digits + val.len <= buf.len);
for(size_t i = 0; i < num_digits; ++i)
_c4append('0');
return detail::_itoa2buf(buf, pos, val);
}
template<class I>
C4_NO_INLINE size_t _itoadec2buf(substr buf) noexcept