diff --git a/constantine.nimble b/constantine.nimble index 31086b2c..e1b03079 100644 --- a/constantine.nimble +++ b/constantine.nimble @@ -496,7 +496,7 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[ ("tests/t_ethereum_evm_precompiles.nim", false), ("tests/t_ethereum_bls_signatures.nim", false), ("tests/t_ethereum_eip2333_bls12381_key_derivation.nim", false), - ("tests/t_ethereum_eip4844_kzg.nim", false), + ("tests/t_ethereum_eip4844_deneb_kzg.nim", false), ] const testDescNvidia: seq[string] = @[ diff --git a/constantine/commitments/kzg_polynomial_commitments.nim b/constantine/commitments/kzg_polynomial_commitments.nim index 7bc4b326..16d09da1 100644 --- a/constantine/commitments/kzg_polynomial_commitments.nim +++ b/constantine/commitments/kzg_polynomial_commitments.nim @@ -190,7 +190,8 @@ func kzg_prove*[N: static int, C: static Curve]( poly: PolynomialEval[N, Fr[C]], domain: PolyDomainEval[N, Fr[C]], challenge: Fr[C], - powers_of_tau: PolynomialEval[N, G1aff[C]]) = + powers_of_tau: PolynomialEval[N, G1aff[C]], + isBitReversedDomain: static bool) = # Note: # The order of inputs in @@ -227,7 +228,7 @@ func kzg_prove*[N: static int, C: static Curve]( # q(x) = (p(x) - p(z)) / (x - z) diffQuotientPolyFr[].differenceQuotientEvalInDomain( - poly, zIndex, invRootsMinusZ[], domain) + poly, uint32 zIndex, invRootsMinusZ[], domain, isBitReversedDomain) freeHeapAligned(invRootsMinusZ) @@ -242,7 +243,7 @@ func kzg_prove*[N: static int, C: static Curve]( var proofJac {.noInit.}: ECP_ShortW_Jac[Fp[C], G1] proofJac.multiScalarMul_vartime(diffQuotientPolyBigInt[], powers_of_tau.evals) proof.affine(proofJac) - + freeHeapAligned(diffQuotientPolyBigInt) diff --git a/constantine/ethereum_eip4844_kzg_polynomial_commitments.nim b/constantine/ethereum_eip4844_kzg_polynomial_commitments.nim index 4dc04619..4530aadc 100644 --- a/constantine/ethereum_eip4844_kzg_polynomial_commitments.nim +++ b/constantine/ethereum_eip4844_kzg_polynomial_commitments.nim @@ -314,7 +314,8 @@ func compute_kzg_proof*( kzg_prove( proof, y, poly[], ctx.domain, - z, ctx.srs_lagrange_g1) + z, ctx.srs_lagrange_g1, + bitreversedDomain = true) discard proof_bytes.serialize_g1_compressed(proof) # cannot fail y_bytes.marshal(y, bigEndian) # cannot fail diff --git a/constantine/math/polynomials/polynomials.nim b/constantine/math/polynomials/polynomials.nim index 29292d34..6e97881a 100644 --- a/constantine/math/polynomials/polynomials.nim +++ b/constantine/math/polynomials/polynomials.nim @@ -40,6 +40,11 @@ type PolyDomainEval*[N: static int, Field] = object ## Metadata for polynomial in Lagrange basis (evaluation form) + ## + ## Note on inverses + ## 1/ωⁱ (mod N) = ωⁿ⁻ⁱ (mod N) + ## Hence in canonical representation rootsOfUnity[(N-i) and (N-1)] contains the inverse of rootsOfUnity[i] + ## This translates into rootsOfUnity[brp((N-brp(i)) and (N-1))] when bit-reversal permuted rootsOfUnity*{.align: 64.}: array[N, Field] invMaxDegree*: Field @@ -141,9 +146,10 @@ func differenceQuotientEvalOffDomain*[N: static int, Field]( func differenceQuotientEvalInDomain*[N: static int, Field]( r: var PolynomialEval[N, Field], poly: PolynomialEval[N, Field], - zIndex: int, + zIndex: uint32, invRootsMinusZ: array[N, Field], - domain: PolyDomainEval[N, Field]) = + domain: PolyDomainEval[N, Field], + isBitReversedDomain: static bool) = ## Compute r(x) = (p(x) - p(z)) / (x - z) ## ## for z = ωⁱ a power of a root of unity @@ -153,9 +159,14 @@ func differenceQuotientEvalInDomain*[N: static int, Field]( ## - rootsOfUnity: ωⁱ ## - invRootsMinusZ: 1/(ωⁱ-z) ## - zIndex: the index of the root of unity power that matches z = ωⁱᵈˣ + + static: + # For powers of 2: x mod N == x and (N-1) + doAssert N.isPowerOf2_vartime() + r.evals[zIndex].setZero() - for i in 0 ..< N: + for i in 0'u32 ..< N: if i == zIndex: # https://dankradfeist.de/ethereum/2021/06/18/pcs-multiproofs.html # section "Dividing when one of the points is zero". @@ -168,31 +179,32 @@ func differenceQuotientEvalInDomain*[N: static int, Field]( # q'ᵢ = -qᵢ * ωⁱ/z # q'idx = ∑ q'ᵢ - # since z is a power of ω, ωⁱ/z = ωⁱ⁻ⁱᵈˣ - # However some protocols use bit-reversal permutation (brp) to store the ωⁱ - # Hence retrieving the data would require roots[brp((brp(i)-brp(index)) mod n)] for those - # But is this fast? There is no single instruction for reversing bits of an integer. - # and the reversal depends on N. - # - https://stackoverflow.com/questions/746171/efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c - # - https://stackoverflow.com/questions/52226858/bit-reversal-algorithm-by-rutkowska - # - https://www.hpl.hp.com/techreports/93/HPL-93-89.pdf - # - https://graphics.stanford.edu/~seander/bithacks.html#BitReverseObvious - # The C version from Stanford's bithacks need log₂(n) loop iterations - # A 254~255-bit multiplication takes 38 cycles, we need 3 brp so at most ~13 cycles per brp - # For small Ethereum KZG, n = 2¹² = 4096, we're already at the breaking point - # even if an iteration takes a single cycle with instruction-level parallelism var ri {.noinit.}: Field - ri.neg(r.evals[i]) # -qᵢ - ri *= domain.rootsOfUnity[(i+N-zIndex) and (N-1)] # -qᵢ * ωⁱ/z (explanation at the bottom) - r.evals[zIndex] += ri # r[zIndex] = ∑ -qᵢ * ωⁱ/z - - # ωⁱ/z computation detail - # from ωⁿ = 1 and z = ωⁱᵈˣ - # hence ωⁿ⁻ⁱᵈˣ = 1/z - # Note if using bit-reversal permutation (BRP): - # BRP maintains the relationship - # that the inverse of ωⁱ is at position n-i (mod n) in the array of roots of unity + ri.neg(r.evals[i]) # -qᵢ + when isBitReversedDomain: + const logN = log2_vartime(uint32 N) + let invZidx = N - reverseBits(uint32 zIndex, logN) + let canonI = reverseBits(i, logN) + let idx = reverseBits((canonI + invZidx) and (N-1), logN) + ri *= domain.rootsOfUnity[idx] # -qᵢ * ωⁱ/z (explanation at the bottom) + else: + ri *= domain.rootsOfUnity[(i+N-zIndex) and (N-1)] # -qᵢ * ωⁱ/z (explanation at the bottom) + r.evals[zIndex] += ri # r[zIndex] = ∑ -qᵢ * ωⁱ/z + + # * 1/z computation detail + # from ωⁿ = 1 and z = ωⁱᵈˣ + # hence ωⁿ⁻ⁱᵈˣ = 1/z + # However our z may be in bit-reversal permuted + # + # * We want ωⁱ/z which translate to ωⁱ*ωⁿ⁻ⁱᵈˣ hence ωⁱ⁺ⁿ⁻ⁱᵈˣ + # with the roots of unity being a cyclic group of order N so we compute i+N-zIndex (mod N) + # + # However some protocols use bit-reversal permutation (brp) to store the ωⁱ + # Hence retrieving the data requires roots[brp((brp(i)-n-brp(idx)) mod n)] for those (note: n = brp(n)) # - # We want ωⁱ/z which translate to ωⁱ*ωⁿ⁻ⁱᵈˣ hence ωⁱ⁺ⁿ⁻ⁱᵈˣ - # with the roots of unity being a cyclic group of order N so we compute i+N-zIndex (mod N) - static: doAssert N.isPowerOf2_vartime() + # For Ethereum: + # A 254~255-bit multiplication takes 11ns / 38 cycles (Fr[BLS12-381]), + # A brp with n = 2¹² = 4096 (for EIP4844) takes about 6ns + # We could also cache either ωⁿ⁻ⁱ or a map i' = brp(n - brp(i)) + # in non-brp order but cache misses are expensive + # and brp can benefits from instruction-level parallelism diff --git a/tests/t_ethereum_eip4844_deneb_kzg.nim b/tests/t_ethereum_eip4844_deneb_kzg.nim index a9b8d48b..2a6ff6a9 100644 --- a/tests/t_ethereum_eip4844_deneb_kzg.nim +++ b/tests/t_ethereum_eip4844_deneb_kzg.nim @@ -149,8 +149,8 @@ block: test "blob_to_kzg_commitment(dst: var array[48, byte], blob: ptr array[4096, byte])": ctx.test_blob_to_kzg_commitment() - # test "compute_kzg_proof(proof: var array[48, byte], y: var array[32, byte], blob: ptr array[4096, byte], z: array[32, byte])": - # ctx.test_compute_kzg_proof() + test "compute_kzg_proof(proof: var array[48, byte], y: var array[32, byte], blob: ptr array[4096, byte], z: array[32, byte])": + ctx.test_compute_kzg_proof() test "verify_kzg_proof(commitment: array[48, byte], z, y: array[32, byte], proof: array[48, byte]) -> bool": ctx.test_verify_kzg_proof() diff --git a/tests/t_ethereum_eip4844_deneb_kzg.nim.cfg b/tests/t_ethereum_eip4844_deneb_kzg.nim.cfg new file mode 100644 index 00000000..b8c53144 --- /dev/null +++ b/tests/t_ethereum_eip4844_deneb_kzg.nim.cfg @@ -0,0 +1,2 @@ +# NimYAML requires ORC instead of ARC for memory management to deal with cycles +--mm:orc