diff --git a/.cirrus.yml b/.cirrus.yml index 9ae19d8cf29a6..b49c99291eead 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -81,7 +81,7 @@ task: cpu: 2 memory: 5G docker_arguments: - CI_IMAGE_NAME_TAG: ubuntu:lunar + CI_IMAGE_NAME_TAG: ubuntu:23.04 FILE_ENV: "./ci/test/00_setup_env_native_tidy.sh" # For faster CI feedback, immediately schedule the linters << : *CREDITS_TEMPLATE @@ -210,7 +210,7 @@ task: cpu: 6 # Increase CPU and Memory to avoid timeout memory: 24G docker_arguments: - CI_IMAGE_NAME_TAG: ubuntu:lunar + CI_IMAGE_NAME_TAG: ubuntu:23.04 FILE_ENV: "./ci/test/00_setup_env_native_tsan.sh" env: << : *CIRRUS_EPHEMERAL_WORKER_TEMPLATE_ENV diff --git a/build_msvc/bench_bench_navcoin/bench_bench_navcoin.vcxproj b/build_msvc/bench_bench_navcoin/bench_bench_navcoin.vcxproj index bd7f72484a37f..f81f98db6e6e9 100644 --- a/build_msvc/bench_bench_navcoin/bench_bench_navcoin.vcxproj +++ b/build_msvc/bench_bench_navcoin/bench_bench_navcoin.vcxproj @@ -18,6 +18,9 @@ $(IntDir)bench_bech32.obj + + $(IntDir)bench_bech32_mod.obj + $(IntDir)bench_bench.obj diff --git a/ci/test/00_setup_env_i686_multiprocess.sh b/ci/test/00_setup_env_i686_multiprocess.sh index 0032d7ef5312a..c598ee50424bd 100755 --- a/ci/test/00_setup_env_i686_multiprocess.sh +++ b/ci/test/00_setup_env_i686_multiprocess.sh @@ -8,7 +8,7 @@ export LC_ALL=C.UTF-8 export HOST=i686-pc-linux-gnu export CONTAINER_NAME=ci_i686_multiprocess -export CI_IMAGE_NAME_TAG=ubuntu:20.04 +export CI_IMAGE_NAME_TAG="docker.io/amd64/ubuntu:20.04" export PACKAGES="cmake python3 llvm clang g++-multilib" export DEP_OPTS="DEBUG=1 MULTIPROCESS=1" export GOAL="install" diff --git a/ci/test/00_setup_env_mac.sh b/ci/test/00_setup_env_mac.sh index c837c38708bba..e454282b695e7 100755 --- a/ci/test/00_setup_env_mac.sh +++ b/ci/test/00_setup_env_mac.sh @@ -7,12 +7,12 @@ export LC_ALL=C.UTF-8 export CONTAINER_NAME=ci_macos_cross -export CI_IMAGE_NAME_TAG=ubuntu:20.04 # Check that Focal can cross-compile to macos +export CI_IMAGE_NAME_TAG=ubuntu:22.04 # Check that Jammy can cross-compile to macos export HOST=x86_64-apple-darwin -export PACKAGES="cmake libz-dev libtinfo5 python3-setuptools xorriso" +export PACKAGES="cmake libz-dev libtinfo5 python3-setuptools xorriso zip" export XCODE_VERSION=12.2 export XCODE_BUILD_ID=12B45b export RUN_UNIT_TESTS=false export RUN_FUNCTIONAL_TESTS=false export GOAL="deploy" -export BITCOIN_CONFIG="--enable-reduce-exports" +export BITCOIN_CONFIG="--enable-reduce-exports LDFLAGS=-Wno-error=unused-command-line-argument" diff --git a/ci/test/00_setup_env_native_nowallet_libbitcoinkernel.sh b/ci/test/00_setup_env_native_nowallet_libbitcoinkernel.sh index b86fa6f321212..73e9ea1fc5799 100755 --- a/ci/test/00_setup_env_native_nowallet_libbitcoinkernel.sh +++ b/ci/test/00_setup_env_native_nowallet_libbitcoinkernel.sh @@ -7,7 +7,7 @@ export LC_ALL=C.UTF-8 export CONTAINER_NAME=ci_native_nowallet_libbitcoinkernel -export CI_IMAGE_NAME_TAG="ubuntu:20.04" +export CI_IMAGE_NAME_TAG="docker.io/ubuntu:20.04" # Use minimum supported python3.8 and clang-8, see doc/dependencies.md export PACKAGES="python3-zmq clang-8 llvm-8 libc++abi-8-dev libc++-8-dev" export DEP_OPTS="NO_WALLET=1 CC=clang-8 CXX='clang++-8 -stdlib=libc++'" diff --git a/doc/bech32-mod-gen-poly.md b/doc/bech32-mod-gen-poly.md new file mode 100644 index 0000000000000..7117937570bbe --- /dev/null +++ b/doc/bech32-mod-gen-poly.md @@ -0,0 +1,287 @@ +# Bech32_mod generator polynomial generation + +## Summary +We made modification to bech32 implementation of Bitcoin so that it worked with 165-character bech32 string perfectly detecting up to 5 errors. + +To accomplish that, we replaced the 6-degree generator polynomial originally used by bech32 by an 8-degree one as [Bitcoin Cash's cashaddr implementation](https://github.com/bitcoin-cash-node/bitcoin-cash-node/blob/master/src/cashaddr.cpp) and [Jamtis](https://gist.github.com/tevador/50160d160d24cfc6c52ae02eb3d17024) of Monero have done. + +In order to find an 8-degree polynomial for our need, we followed the Jamtis polynomial search procedure which is explained in detail in [this document](https://gist.github.com/tevador/5b3fbbd0877a3412ede07263c6b2663d) with a little modificaiton to meet our requirements. + +Here are the requirements we had: + +1. The generator polynomial should be capable of perfectly detecting up to 5 errors in 165-character bech32 string. + - We encode 96-byte double public keys into bech32 format. Converting 96-byte vector utilizing all bits of each byte into those that only uses 5 bits of each byte, we end up with 154-byte vector (96 * 8 / 5 = 153.6). In addition, 8-byte checksum, 2-byte HRP and 1-byte separator are needed. Putting those together, the resulting bech32 string became 165-character long. +2. The generator polynomial should have the lowest false-positive error rate for 7 and 8 error cases when the input string is 50-character long. + +To find a polynomial satisfying above requirements, we first generated 10-million random degree-8 polynomials, and computed false positive error rates for them. + +Amongst all, there were two generator polynomials satisfying the first requirement: + +``` +U1PIRGA7 +AJ4RJKVB +``` + +For both of the polynomials, we computed false positive errors rates for 7 and 8 error cases, and we concluded that `U1PIRGA7` performed better and chose it as the generator polynomial. + +## Actual procedure + +### 1. Generation of random 10-million degree-8 polynomials +We used [gen_crc.py](https://gist.github.com/tevador/5b3fbbd0877a3412ede07263c6b2663d#:~:text=2.1-,gen_crc.py,-The%20gen_crc.py) used in Jamtis search that is shown below: + +```python +# gen_crc.py + +import random + +CHARSET = "0123456789ABCDEFGHIJKLMNOPQRSTUV" + +def gen_to_str(val, degree): + gen_str = "" + for i in range(degree): + gen_str = CHARSET[int(val) % 32] + gen_str + val /= 32 + return gen_str + +def gen_crc(degree, count, seed=None): + random.seed(seed) + for i in range(count): + while True: + r = random.getrandbits(5 * degree) + if (r % 32) != 0: + break + print(gen_to_str(r, degree)) + +gen_crc(8, 10000000, 0x584d52) +``` + +### 2. Calculation of false-positive error rates + +To see the performance of all generated polynomials, we used [crccollide.cpp](https://github.com/sipa/ezbase32/blob/master/crccollide.cpp) that is developed by Bitcoin developers. We compiled it with the default parameters as in: + +```bash +$ g++ ezbase32/crccollide.cpp -o crccollide -lpthread -O3 +``` + +Then we run it with 5 errors and 120-character threshold. + +```bash +$ mkdir results1 +$ parallel -a list.txt ./crccollide {} 5 120 ">" results1/{}.txt +``` + +The execution took approximately 25 days on Core i5-13500 + +```bash +39762158.30s user 2845631.54s system 1975% cpu 599:14:56.86 total +``` + +and generated a huge number of the output files in `results1` directory. + +After removing polynomials with a result below the threshold by: + +```bash +$ find results1 -name "*.txt" -type f -size -2k -delete +``` + +`16,976` polynomials were left in the `results1` directory. + +```bash +$ ls -1 results1 | wc -l +16976 +``` + +Each file in the `results1` directory looked like: + +```bash +... +A00C78KL 123 0.000000000000000 0.000000000000000 0.000000000000000 0.000000000000000 0.000000000000000 1.031711484752184 # 100% done +A00C78KL 124 0.000000000000000 0.000000000000000 0.000000000000000 0.000000000000000 0.010575746914933 1.030752602001270 # 100% done +... +``` + +The descriptions of the columns are: +1. Polynomial in bech32 hex +1. Input string size +1. False positive error rate for 1-error case +1. False positive error rate for 2-error case +1. False positive error rate for 3-error case +1. False positive error rate for 4-error case +1. False positive error rate for 5-error case +1. False positive error rate for 6-error case + +## 3. Extraction of polynomials satisfying the requirements + +To extract polynomials that can perfectly detect up to 5 errors, we used `err6-high-perf.py` script below that is a modified version of Jamis's [crc_res.py](https://gist.github.com/tevador/5b3fbbd0877a3412ede07263c6b2663d#:~:text=2.3-,crc_res.py,-The%20crc_res.py) script: + +```python +# err6-high-perf.py + +import os +from typing import Optional, Tuple + +def get_rate(filename, num_char) -> Optional[Tuple[str, float]]: + gen = '' + with open(filename) as file: + for line in file: + tokens = line.split() + if len(tokens) == 2 and tokens[1] == "starting": + gen = tokens[0].rstrip(':') + continue + if tokens[0] == gen: + curr_num_char = int(tokens[1]) + if curr_num_char != num_char: + continue + err4 = float(tokens[1 + 4]) + err5 = float(tokens[1 + 5]) + err6 = float(tokens[1 + 6]) + if err4 > 0 or err5 > 0: + return None + return (gen, err6) + return None + +num_char = 165 +dirpath = 'results1' +top_n = 10 + +gens = [] + +for entry in os.listdir(dirpath): + filename = os.path.join(dirpath, entry) + if os.path.isfile(filename): + res = get_rate(filename, num_char) + if res is not None: + gens.append(res) + +gens.sort(key=lambda x: x[1]) + +for gen in gens[:top_n]: + print(f"{gen[0]}") +``` + +This script extracted 2 polynomials. + +```bash +$ ./err6-high-perf.py > gens.txt +$ cat gens.txt +U1PIRGA7 +AJ4RJKVB +``` + +Then we built [crccollide.cpp](https://github.com/sipa/ezbase32/blob/master/crccollide.cpp) again with `LENGTH=50` and `ERRORS=4` parameters, and calculated false positive error detection rates of the extracted generators for 7 an 8 error cases: + +```bash +$ g++ ezbase32/crccollide.cpp -o crccollide_50_4 -lpthread -O3 -DLENGTH=50 -DERRORS=4 -DTHREADS=4 +$ mkdir results2 +$ parallel -a gens.txt ./crccollide_50_4 {} ">" results2/{}.txt +``` + +Comparing the results manually, we found that `U1PIRGA7` is slightly performing better and selected it as the best-performing generator polynomial. + +## 4. Generation of mod constants +With the below `enc-gen-to-sage-code.py` script, we generated `SageMath` code to define `U1PIRGA7` as `G`: + +```Python +# enc-gen-to-sage-code.py + +import sys + +if len(sys.argv) < 2: + exit(f'Usage: {sys.argv[0]} [8-char-poly]') + +gen = sys.argv[1] + +CHARSET = "0123456789ABCDEFGHIJKLMNOPQRSTUV" +degree = 8 + +def gen_to_str(gen): + gen_str = "" + for i in range(degree): + gen_str = CHARSET[int(gen) % 32] + gen_str + gen /= 32 + return gen_str + +def str_to_gen(s): + acc = 0 + coeffs = [] + for c in s: + acc <<= 5 + i = CHARSET.index(c) + coeffs.append(i) + acc += i + return (acc, coeffs) + +def pf_coeffs(coeffs): + terms = [f'x^{len(coeffs)}'] + for (i,coeff) in enumerate(coeffs): + if i == len(coeffs) - 1: + terms.append(f'c({coeff})') + else: + terms.append(f'c({coeff})*x^{len(coeffs)-i-1}') + term_str = ' + '.join(terms) + return f'G = {term_str}' + +acc_coeffs = str_to_gen(gen) +print(acc_coeffs) + +recovered_gen = gen_to_str(acc_coeffs[0]) +if recovered_gen != gen: + exit(f'Expected recovered generator to be {gen}, but got {recov +ered_gen}') + +print(pf_coeffs(acc_coeffs[1])) +``` + +The output was: + +```bash +$ ./enc-gen-to-sage-code.py U1PIRGA7 +(1032724529479, [30, 1, 25, 18, 27, 16, 10, 7]) +G = x^8 + c(30)*x^7 + c(1)*x^6 + c(25)*x^5 + c(18)*x^4 + c(27)*x^3 ++ c(16)*x^2 + c(10)*x^1 + c(7) +``` + +Then we embedded the generated `G = ...` line to the below `SageMath` script which is a modified version of the script in `bech32.cpp` comment, and run it to generate `C++` code to compute a modulo by the generator polynomial. + +```python +B = GF(2) # Binary field +BP. = B[] # Polynomials over the binary field +F_mod = b**5 + b**3 + 1 +F. = GF(32, modulus=F_mod, repr='int') # GF(32) definition +FP. = F[] # Polynomials over GF(32) +E_mod = x**2 + F.fetch_int(9)*x + F.fetch_int(23) +E. = F.extension(E_mod) # GF(1024) extension field definition +for p in divisors(E.order() - 1): # Verify e has order 1023. + assert((e**p == 1) == (p % 1023 == 0)) + +c = lambda n: F.fetch_int(n) +G = x^8 + c(30)*x^7 + c(1)*x^6 + c(25)*x^5 + c(18)*x^4 + c(27)*x^3 + c(16)*x^2 + c(10)*x^1 + c(7) + +print(G) # Print out the generator + +mod_consts = [] + +for i in [1,2,4,8,16]: # Print out {1,2,4,8,16}*(g(x) mod x^6), packed in hex integers. + v = 0 + for coef in reversed((F.fetch_int(i)*(G % x**8)).coefficients(sparse=True)): + v = v*32 + coef.integer_representation() + mod_consts.append("0x%x" % v) + +for (i, mod_const) in enumerate(mod_consts): + p = 2**i + s = f' if (c0 & {p}) c ^= {mod_const}; // {{{p}}}k(x) =' + print(s) +``` + +The generated `C++` code was: + +```c++ +if (c0 & 1) c ^= 0xf0732dc147; // {1}k(x) = +if (c0 & 2) c ^= 0xa8b6dfa68e; // {2}k(x) = +if (c0 & 4) c ^= 0x193fabc83c; // {4}k(x) = +if (c0 & 8) c ^= 0x322fd3b451; // {8}k(x) = +if (c0 & 16) c ^= 0x640f37688b; // {16}k(x) = +``` + +We replaced the corresponding part of the `PolyMod` function with this to use `U1PIRGA7` as the generator polynomial. + diff --git a/src/Makefile.am b/src/Makefile.am index 0b38e99e7e5f8..bbb978f74fbc7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -154,6 +154,7 @@ BITCOIN_CORE_H = \ banman.h \ base58.h \ bech32.h \ + bech32_mod.h \ blockencodings.h \ blockfilter.h \ blsct/arith/mcl/mcl.h \ @@ -780,6 +781,7 @@ libbitcoin_common_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS) libbitcoin_common_a_SOURCES = \ base58.cpp \ bech32.cpp \ + bech32_mod.cpp \ blsct/arith/elements.cpp \ blsct/double_public_key.cpp \ blsct/public_key.cpp \ diff --git a/src/Makefile.bench.include b/src/Makefile.bench.include index e4dae0b23d1ba..de3e77b43c529 100644 --- a/src/Makefile.bench.include +++ b/src/Makefile.bench.include @@ -15,6 +15,7 @@ bench_bench_navcoin_SOURCES = \ bench/addrman.cpp \ bench/base58.cpp \ bench/bech32.cpp \ + bench/bech32_mod.cpp \ bench/bench.cpp \ bench/bench.h \ bench/bench_bitcoin.cpp \ diff --git a/src/Makefile.test.include b/src/Makefile.test.include index b9a2a85c8508b..69acb20bfc7da 100644 --- a/src/Makefile.test.include +++ b/src/Makefile.test.include @@ -77,6 +77,7 @@ BITCOIN_TESTS =\ test/base58_tests.cpp \ test/base64_tests.cpp \ test/bech32_tests.cpp \ + test/bech32_mod_tests.cpp \ test/bip32_tests.cpp \ test/blockchain_tests.cpp \ test/blockencodings_tests.cpp \ @@ -269,6 +270,7 @@ test_fuzz_fuzz_SOURCES = \ test/fuzz/banman.cpp \ test/fuzz/base_encode_decode.cpp \ test/fuzz/bech32.cpp \ + test/fuzz/bech32_mod.cpp \ test/fuzz/bitdeque.cpp \ test/fuzz/block.cpp \ test/fuzz/block_header.cpp \ diff --git a/src/bech32_mod.cpp b/src/bech32_mod.cpp new file mode 100644 index 0000000000000..ad52acdaeb533 --- /dev/null +++ b/src/bech32_mod.cpp @@ -0,0 +1,246 @@ +// Copyright (c) 2017, 2021 Pieter Wuille +// Copyright (c) 2021-2022 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include + +#include +#include +#include +#include + +namespace bech32_mod +{ + +namespace +{ + +typedef std::vector data; + +/** The Bech32 and Bech32m character set for encoding. */ +const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l"; + +/** The Bech32 and Bech32m character set for decoding. */ +const int8_t CHARSET_REV[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 15, -1, 10, 17, 21, 20, 26, 30, 7, 5, -1, -1, -1, -1, -1, -1, + -1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1, + 1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1, + -1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1, + 1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1 +}; + +/* Determine the final constant to use for the specified encoding. */ +uint32_t EncodingConstant(Encoding encoding) { + assert(encoding == Encoding::BECH32 || encoding == Encoding::BECH32M); + return encoding == Encoding::BECH32 ? 1 : 0x2bc830a3; +} + +/** This function will compute what 8 5-bit values to XOR into the last 8 input values, in order to + * make the checksum 0. These 8 values are packed together in a single 40-bit integer. The higher + * bits correspond to earlier values. */ +uint64_t PolyMod(const data& v) +{ + // The input is interpreted as a list of coefficients of a polynomial over F = GF(32), with an + // implicit 1 in front. If the input is [v0,v1,v2,v3,v4], that polynomial is v(x) = + // 1*x^7 + v0*x^6 + v1*x^5 + v2*x^4 + v3*x^3 + v4*x^2 + v5*x + v6. The implicit 1 guarantees that + // [v0,v1,v2,...] has a distinct checksum from [0,v0,v1,v2,...]. + + // The output is a 40-bit integer whose 5-bit groups are the coefficients of the remainder of + // v(x) mod g(x), where g(x) is the Bech32 generator, + // x^8 + {30}*x^7 + {1}x^6 + {25}*x^5 + {18}*x^4 + {27}*x^3 + {16}*x^2 + {10}*x + {7}. g(x) is + // chosen in such a way that the resulting code is a BCH code, guaranteeing detection of up to 5 + // errors in a 165-character bech32 string. + + // Note that the coefficients are elements of GF(32), here represented as decimal numbers + // between {}. In this finite field, addition is just XOR of the corresponding numbers. For + // example, {27} + {13} = {27 ^ 13} = {22}. Multiplication is more complicated, and requires + // treating the bits of values themselves as coefficients of a polynomial over a smaller field, + // GF(2), and multiplying those polynomials mod a^5 + a^3 + 1. For example, {5} * {26} = + // (a^2 + 1) * (a^4 + a^3 + a) = (a^4 + a^3 + a) * a^2 + (a^4 + a^3 + a) = a^6 + a^5 + a^4 + a + // = a^3 + 1 (mod a^5 + a^3 + 1) = {9}. + + // During the course of the loop below, `c` contains the bitpacked coefficients of the + // polynomial constructed from just the values of v that were processed so far, mod g(x). In + // the above example, `c` initially corresponds to 1 mod g(x), and after processing 2 inputs of + // v, it corresponds to x^2 + v0*x + v1 mod g(x). As 1 mod g(x) = 1, that is the starting value + // for `c`. + + // The process of selecting the generator poynomial is discussed in detail in [bech32-mod-gen-poly.md](../doc/bech32-mod-gen-poly.md) + + uint64_t c = 1; + for (const auto v_i : v) { + // We want to update `c` to correspond to a polynomial with one extra term. If the initial + // value of `c` consists of the coefficients of c(x) = f(x) mod g(x), we modify it to + // correspond to c'(x) = (f(x) * x + v_i) mod g(x), where v_i is the next input to + // process. Simplifying: + // c'(x) = (f(x) * x + v_i) mod g(x) + // ((f(x) mod g(x)) * x + v_i) mod g(x) + // (c(x) * x + v_i) mod g(x) + // If c(x) = c0*x^7 + c1*x^6 + c2*x^5 + c3*x^4 + c4*x^3 + c5*x^2 + c6*x + c7, we want to compute + // c'(x) = (c0*x^7 + c1*x^6 + c2*x^5 + c3*x^4 + c4*x^3 + c5*x^2 + c6*x + c7) * x + v_i mod g(x) + // = c0*x^8 + c1*x^7 + c2*x^6 + c3*x^5 + c4*x^4 + c5*x^3 + c6*x^2 + c7*x + v_i mod g(x) + // = c0*(x^8 mod g(x)) + c1*x^7 + c2*x^6 + c3*x^5 + c4*x^4 + c5*x^3 + c6*x^2 + c7*x + v_i + // If we call (x^8 mod g(x)) = k(x), this can be written as + // c'(x) = (c1*x^7 + c2*x^6 + c3*x^5 + c4*x^4 + c5*x^3 + c6*x^2 + c7*x + v_i) + c0*k(x) + + // First, determine the value of c0: + uint8_t c0 = c >> 35; + + // Then compute c1*x^7 + c2*x^6 + c3*x^5 + c4*x^4 + c5*x^3 + c6*x^2 + c7*x + v_i: + c = ((c & 0x07ffffffff) << 5) ^ v_i; + + // Finally, for each set bit n in c0, conditionally add {2^n}k(x). These constants can be + // computed using the following Sage code (continuing the code above): + // + // for i in [1,2,4,8,16]: # Print out {1,2,4,8,16}*(g(x) mod x^8), packed in hex integers. + // v = 0 + // for coef in reversed((F.fetch_int(i)*(G % x**8)).coefficients(sparse=True)): + // v = v*32 + coef.integer_representation() + // print("0x%x" % v) + // + if (c0 & 1) c ^= 0xf0732dc147; // {1}k(x) = {30}*x^7 + {1}x^6 + {25}*x^5 + {18}*x^4 + {27}*x^3 + {16}*x^2 + {10}*x + {7} + if (c0 & 2) c ^= 0xa8b6dfa68e; // {2}k(x) + if (c0 & 4) c ^= 0x193fabc83c; // {4}k(x) + if (c0 & 8) c ^= 0x322fd3b451; // {8}k(x) + if (c0 & 16) c ^= 0x640f37688b; // {16}k(x) + } + return c; +} + +/** Convert to lower case. */ +inline unsigned char LowerCase(unsigned char c) +{ + return (c >= 'A' && c <= 'Z') ? (c - 'A') + 'a' : c; +} + +/** Return indices of invalid characters in a Bech32 string. */ +bool CheckCharacters(const std::string& str, std::vector& errors) +{ + bool lower = false, upper = false; + for (size_t i = 0; i < str.size(); ++i) { + unsigned char c{(unsigned char)(str[i])}; + if (c >= 'a' && c <= 'z') { + if (upper) { + errors.push_back(i); + } else { + lower = true; + } + } else if (c >= 'A' && c <= 'Z') { + if (lower) { + errors.push_back(i); + } else { + upper = true; + } + } else if (c < 33 || c > 126) { + errors.push_back(i); + } + } + return errors.empty(); +} + +/** Expand a HRP for use in checksum computation. */ +data ExpandHRP(const std::string& hrp) +{ + data ret; + ret.reserve(hrp.size() + 154 + 5 + 1); // 154=data part, 5=expanded 2-char hrp, 1=separator + ret.resize(hrp.size() * 2 + 1); + for (size_t i = 0; i < hrp.size(); ++i) { + unsigned char c = hrp[i]; + ret[i] = c >> 5; + ret[i + hrp.size() + 1] = c & 0x1f; + } + ret[hrp.size()] = 0; + return ret; +} + +/** Verify a checksum. */ +Encoding VerifyChecksum(const std::string& hrp, const data& values) +{ + // PolyMod computes what value to xor into the final values to make the checksum 0. However, + // if we required that the checksum was 0, it would be the case that appending a 0 to a valid + // list of values would result in a new valid list. For that reason, Bech32 requires the + // resulting checksum to be 1 instead. In Bech32m, this constant was amended. See + // https://gist.github.com/sipa/14c248c288c3880a3b191f978a34508e for details. + const uint64_t check = PolyMod(Cat(ExpandHRP(hrp), values)); + if (check == EncodingConstant(Encoding::BECH32)) return Encoding::BECH32; + if (check == EncodingConstant(Encoding::BECH32M)) return Encoding::BECH32M; + return Encoding::INVALID; +} + +/** Create a checksum. */ +data CreateChecksum(Encoding encoding, const std::string& hrp, const data& values) +{ + auto exp_hrp = ExpandHRP(hrp); + data enc = Cat(ExpandHRP(hrp), values); + enc.resize(enc.size() + 8); // Append 8 zeroes + uint64_t mod = PolyMod(enc) ^ EncodingConstant(encoding); // Determine what to XOR into those 8 zeroes. + data ret(8); + for (size_t i = 0; i < 8; ++i) { + // Convert the 5-bit groups in mod to checksum values. + ret[i] = (mod >> (5 * (7 - i))) & 31; + } + return ret; +} + +} // namespace + +/** Encode a Bech32 or Bech32m string. */ +std::string Encode(Encoding encoding, const std::string& hrp, const data& values) { + // First ensure that the HRP is all lowercase. BIP-173 and BIP350 require an encoder + // to return a lowercase Bech32/Bech32m string, but if given an uppercase HRP, the + // result will always be invalid. + for (const char& c : hrp) assert(c < 'A' || c > 'Z'); + data checksum = CreateChecksum(encoding, hrp, values); + data combined = Cat(values, checksum); + std::string ret = hrp + '1'; + ret.reserve(ret.size() + combined.size()); + for (const auto c : combined) { + ret += CHARSET[c]; + } + return ret; +} + +/** Decode a Bech32 or Bech32m string. */ +DecodeResult Decode(const std::string& str) { + std::vector errors; + if (!CheckCharacters(str, errors)) return {}; + size_t pos = str.rfind('1'); + + // double public key bech32 string is 165-byte long and consists of: + // - 2-byte hrp + // - 1-byte separator '1' + // - 154-byte key data (96 bytes / 5 bits = 153.6) + // - 8-byte checksum + if (str.size() != 165 // double public key should be encoded to 165-byte bech32 string + || pos == str.npos // separator '1' should be included + || pos == 0 // hrp part should not be empty + || pos + 9 > str.size() // data part should not be empty + ) { + return {}; + } + data values(str.size() - 1 - pos); + for (size_t i = 0; i < str.size() - 1 - pos; ++i) { + unsigned char c = str[i + pos + 1]; + int8_t rev = CHARSET_REV[c]; + + if (rev == -1) { + return {}; + } + values[i] = rev; + } + std::string hrp; + for (size_t i = 0; i < pos; ++i) { + hrp += LowerCase(str[i]); + } + Encoding result = VerifyChecksum(hrp, values); + if (result == Encoding::INVALID) return {}; + return {result, std::move(hrp), data(values.begin(), values.end() - 8)}; +} + +} // namespace bech32_mod + diff --git a/src/bech32_mod.h b/src/bech32_mod.h new file mode 100644 index 0000000000000..56f5a1be203d7 --- /dev/null +++ b/src/bech32_mod.h @@ -0,0 +1,50 @@ +// Copyright (c) 2017, 2021 Pieter Wuille +// Copyright (c) 2021 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +// Bech32 and Bech32m are string encoding formats used in newer +// address types. The outputs consist of a human-readable part +// (alphanumeric), a separator character (1), and a base32 data +// section, the last 6 characters of which are a checksum. The +// module is namespaced under bech32 for historical reasons. +// +// For more information, see BIP 173 and BIP 350. + +#ifndef BITCOIN_BECH32_MOD_H +#define BITCOIN_BECH32_MOD_H + +#include +#include +#include + +namespace bech32_mod +{ + +enum class Encoding { + INVALID, //!< Failed decoding + + BECH32, //!< Bech32 encoding as defined in BIP173 + BECH32M, //!< Bech32m encoding as defined in BIP350 +}; + +/** Encode a Bech32 or Bech32m string. If hrp contains uppercase characters, this will cause an + * assertion error. Encoding must be one of BECH32 or BECH32M. */ +std::string Encode(Encoding encoding, const std::string& hrp, const std::vector& values); + +struct DecodeResult +{ + Encoding encoding; //!< What encoding was detected in the result; Encoding::INVALID if failed. + std::string hrp; //!< The human readable part + std::vector data; //!< The payload (excluding checksum) + + DecodeResult() : encoding(Encoding::INVALID) {} + DecodeResult(Encoding enc, std::string&& h, std::vector&& d) : encoding(enc), hrp(std::move(h)), data(std::move(d)) {} +}; + +/** Decode a Bech32 or Bech32m string. */ +DecodeResult Decode(const std::string& str); + +} // namespace bech32_mod + +#endif // BITCOIN_BECH32_MOD_H diff --git a/src/bench/bech32_mod.cpp b/src/bench/bech32_mod.cpp new file mode 100644 index 0000000000000..a4c8d7e7b731c --- /dev/null +++ b/src/bench/bech32_mod.cpp @@ -0,0 +1,36 @@ +// Copyright (c) 2023 The Navcoin developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include + +#include +#include + +#include +#include + + +static void Bech32ModEncode(benchmark::Bench& bench) +{ + std::vector v = ParseHex("c97f5a67ec381b760aeaf67573bc164845ff39a3bb26a1cee401ac67243b48db1a2b3c4d5e6f7890abcdefc97f5a67ec381b760aeaf67573bc164845ff39a3bb26a1cee401ac67243b48db1a2b3c4d5e6f7890abcdefc97f5a67ec381b760aea"); + std::vector tmp; + tmp.reserve(154); // 96 * 8 / 5 = 153.6 + ConvertBits<8, 5, true>([&](unsigned char c) { tmp.push_back(c); }, v.begin(), v.end()); + bench.batch(v.size()).unit("byte").run([&] { + bech32_mod::Encode(bech32_mod::Encoding::BECH32M, "nv", tmp); + }); +} + + +static void Bech32ModDecode(benchmark::Bench& bench) +{ + std::string addr = "nv1d3fqq4j2w384smpjxgm95anexe4rjwzr2pc553t0xduxuc3sd35nvatcxpnn2mjyde45sqrtfapnws3kwfc85sm3v9ekzc2r2ajnquzf09282e62xddykn25x3"; + bench.batch(addr.size()).unit("byte").run([&] { + bech32_mod::Decode(addr); + }); +} + + +BENCHMARK(Bech32ModEncode, benchmark::PriorityLevel::HIGH); +BENCHMARK(Bech32ModDecode, benchmark::PriorityLevel::HIGH); diff --git a/src/test/bech32_mod_tests.cpp b/src/test/bech32_mod_tests.cpp new file mode 100644 index 0000000000000..0d3c99f583cf9 --- /dev/null +++ b/src/test/bech32_mod_tests.cpp @@ -0,0 +1,125 @@ +// Copyright (c) 2017 Pieter Wuille +// Copyright (c) 2021 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include + +#include + +#include +#include +#include +#include + +/** The Bech32 and Bech32m character set for encoding. */ +const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l"; + +/** The Bech32 and Bech32m character set for decoding. */ +const int8_t CHARSET_REV[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 15, -1, 10, 17, 21, 20, 26, 30, 7, 5, -1, -1, -1, -1, -1, -1, + -1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1, + 1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1, + -1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1, + 1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1 +}; +BOOST_AUTO_TEST_SUITE(bech32_mod_tests) + +// randomly embed errors to the given string +void embed_errors(std::string& s, const size_t num_errors) { + // build a list of indices to change + std::random_device rd; + std::mt19937 gen(rd()); + + // randonly select indices to change + std::set indices; + + auto sep_idx = s.rfind('1'); + while (indices.size() < num_errors) { + std::uniform_int_distribution dist(sep_idx + 1, s.size() - 9); + indices.insert(dist(gen)); + } + + // change characters at the indices + for (auto it = indices.begin(); it != indices.end(); ++it) { + auto from_idx = CHARSET_REV[static_cast(s[*it])]; + auto to_idx = (from_idx + 1) % 32; + s[*it] = CHARSET[to_idx]; + } +} + +std::string gen_random_str(const size_t size) { + static const char charset[] = + "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz"; + const size_t max_index = (sizeof(charset) - 1); + std::string s; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dist(0, max_index); + + for (size_t i = 0; i < size; ++i) { + s += charset[dist(gen)]; + } + return s; +} + +size_t test_error_detection( + const size_t num_errors, + const size_t num_tests, + const bool expect_errors +) { + std::string hrp = "nv"; + size_t unexpected_results = 0; + + for (size_t i=0; i dpk_v8(dpk.begin(), dpk.end()); + std::vector dpk_v5; + ConvertBits<8, 5, true>([&](uint8_t c) { dpk_v5.push_back(c); }, dpk_v8.begin(), dpk_v8.end()); + + auto dpk_bech32 = bech32_mod::Encode(encoding, hrp, dpk_v5); + embed_errors(dpk_bech32, num_errors); + + auto res = bech32_mod::Decode(dpk_bech32); + std::vector dpk_v8r; + ConvertBits<5, 8, false>([&](uint8_t c) { dpk_v8r.push_back(c); }, res.data.begin(), res.data.end()); + + if (expect_errors) { + if (dpk_v8r == dpk_v8) ++unexpected_results; + } else { + if (dpk_v8r != dpk_v8) ++unexpected_results; + } + } + } + return unexpected_results; +} + +BOOST_AUTO_TEST_CASE(bech32_mod_test_detecting_errors) +{ + bool failed = false; + + for (size_t num_errors = 0; num_errors <= 5; ++num_errors) { + size_t unexpected_results = + test_error_detection(num_errors, 10000, num_errors > 0); + + if (unexpected_results > 0) { + std::cout << num_errors << "-error cases failed " << unexpected_results << " times" << std::endl; + failed = true; + } + } + BOOST_CHECK(!failed); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/fuzz/bech32_mod.cpp b/src/test/fuzz/bech32_mod.cpp new file mode 100644 index 0000000000000..60ca715bb1ac1 --- /dev/null +++ b/src/test/fuzz/bech32_mod.cpp @@ -0,0 +1,59 @@ +// Copyright (c) 2023 The Navcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +FUZZ_TARGET(bech32_mod) +{ + // create 154-byte buf from the given buffer + std::vector buf(buffer.begin(), buffer.end()); + + if (buf.size() > 154) { + buf.resize(154); + } else if (buf.size() < 154) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dist(0, 255); + + while (buf.size() < 154) { + buf.push_back(dist(gen)); + } + } + + const std::string random_string(buf.begin(), buf.end()); + const auto r1 = bech32_mod::Decode(random_string); + if (r1.hrp.empty()) { + assert(r1.encoding == bech32_mod::Encoding::INVALID); + assert(r1.data.empty()); + } else { + assert(r1.encoding != bech32_mod::Encoding::INVALID); + const std::string reencoded = bech32_mod::Encode(r1.encoding, r1.hrp, r1.data); + assert(CaseInsensitiveEqual(random_string, reencoded)); + } + + // make the buf valid for encoding + buf.resize(96); + std::vector input; + ConvertBits<8, 5, true>([&](unsigned char c) { input.push_back(c); }, buf.begin(), buf.end()); + + for (auto encoding : {bech32_mod::Encoding::BECH32, bech32_mod::Encoding::BECH32M}) { + const std::string encoded = bech32_mod::Encode(encoding, "nv", input); + assert(!encoded.empty()); + const auto r2 = bech32_mod::Decode(encoded); + assert(r2.encoding == encoding); + assert(r2.hrp == "nv"); + assert(r2.data == input); + } +}