From 9bc2788a85f994e58c777e82cdf382cc08fa95f3 Mon Sep 17 00:00:00 2001 From: Youssef El Housni Date: Fri, 15 Mar 2024 14:41:55 -0400 Subject: [PATCH 01/12] perf(2-chain): optimize varScalarMul --- std/algebra/emulated/sw_emulated/point.go | 2 +- std/algebra/native/sw_bls12377/g1.go | 120 +++++++++++----------- std/algebra/native/sw_bls12377/hints.go | 25 ++--- 3 files changed, 72 insertions(+), 75 deletions(-) diff --git a/std/algebra/emulated/sw_emulated/point.go b/std/algebra/emulated/sw_emulated/point.go index 0ed2f78307..6c73bb046c 100644 --- a/std/algebra/emulated/sw_emulated/point.go +++ b/std/algebra/emulated/sw_emulated/point.go @@ -582,7 +582,7 @@ func (c *Curve[B, S]) scalarMulGLV(Q *AffinePoint[B], s *emulated.Element[S], op // B1 = Q+Φ(Q) // B2 = -Q-Φ(Q) // B3 = Q-Φ(Q) - // B4 = -QΦ(Q) + // B4 = -Q+Φ(Q) // // If we extend this by merging two iterations, we need to look up P and P' // both from {B1, B2, B3, B4} and compute: diff --git a/std/algebra/native/sw_bls12377/g1.go b/std/algebra/native/sw_bls12377/g1.go index c823e69c29..f0173c6fa6 100644 --- a/std/algebra/native/sw_bls12377/g1.go +++ b/std/algebra/native/sw_bls12377/g1.go @@ -186,29 +186,30 @@ func (P *G1Affine) ScalarMul(api frontend.API, Q G1Affine, s interface{}, opts . } } -// varScalarMul sets P = [s] Q and returns P. +// varScalarMul sets P = [s]Q and returns P. It doesn't modify Q nor s. +// It implements an optimized version based on algorithm 1 of [Halo] (see Section 6.2 and appendix C). +// +// ⚠️ The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set. +// (0,0) is not on the curve but we conventionally take it as the +// neutral/infinity point as per the [EVM]. +// +// [Halo]: https://eprint.iacr.org/2019/1021.pdf +// [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variable, opts ...algopts.AlgebraOption) *G1Affine { cfg, err := algopts.NewConfig(opts...) if err != nil { panic(err) } - // This method computes [s] Q. We use several methods to reduce the number - // of added constraints - first, instead of classical double-and-add, we use - // the optimized version from https://github.com/zcash/zcash/issues/3924 - // which allows to omit computation of several intermediate values. - // Secondly, we use the GLV scalar multiplication to reduce the number - // iterations in the main loop. There is a small difference though - as - // two-bit select takes three constraints, then it takes as many constraints - // to compute ± Q ± Φ(Q) every iteration instead of selecting the value - // from a precomputed table. However, precomputing the table adds 12 - // additional constraints and thus table-version is more expensive than - // addition-version. var selector frontend.Variable if cfg.CompleteArithmetic { // if Q=(0,0) we assign a dummy (1,1) to Q and continue selector = api.And(api.IsZero(Q.X), api.IsZero(Q.Y)) Q.Select(api, selector, G1Affine{X: 1, Y: 1}, Q) } + + // We use the endomorphism à la GLV to compute [s]Q as + // [s1]Q + [s2]Φ(Q) + // // The context we are working is based on the `outer` curve. However, the // points and the operations on the points are performed on the `inner` // curve of the outer curve. We require some parameters from the inner @@ -218,31 +219,24 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl // the hints allow to decompose the scalar s into s1 and s2 such that // s1 + λ * s2 == s mod r, // where λ is third root of one in 𝔽_r. - sd, err := api.Compiler().NewHint(decomposeScalarG1, 3, s) + sd, err := api.Compiler().NewHint(decomposeScalarG1, 2, s) if err != nil { // err is non-nil only for invalid number of inputs panic(err) } s1, s2 := sd[0], sd[1] - // when we split scalar, then s1, s2 < lambda by default. However, to have - // the high 1-2 bits of s1, s2 set, the hint functions compute the - // decomposition for - // s + k*r (for some k) - // instead and omits the last reduction. Thus, to constrain s1 and s2, we - // have to assert that - // s1 + λ * s2 == s + k*r - api.AssertIsEqual(api.Add(s1, api.Mul(s2, cc.lambda)), api.Add(s, api.Mul(cc.fr, sd[2]))) - - // As the decomposed scalars are not fully reduced, then in addition of - // having the high bit set, an overflow bit may also be set. Thus, the total - // number of bits may be one more than the bitlength of λ. - nbits := cc.lambda.BitLen() + 1 + // s1 + λ * s2 == s + api.AssertIsEqual( + api.Add(s1, api.Mul(s2, cc.lambda)), + s, + ) + // For BLS12 λ bitsize is 127 equal to half of r bitsize + nbits := cc.lambda.BitLen() s1bits := api.ToBinary(s1, nbits) s2bits := api.ToBinary(s2, nbits) - var Acc /*accumulator*/, B, B2 /*tmp vars*/ G1Affine // precompute -Q, -Φ(Q), Φ(Q) var tableQ, tablePhiQ [2]G1Affine tableQ[1] = Q @@ -250,45 +244,48 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl cc.phi1(api, &tablePhiQ[1], &Q) tablePhiQ[0].Neg(api, tablePhiQ[1]) - // We now initialize the accumulator. Due to the way the scalar is - // decomposed, either the high bits of s1 or s2 are set and we can use the - // incomplete addition laws. - - // Acc = Q + Φ(Q) = -Φ²(Q) + // we suppose that the first bits of the sub-scalars are 1 and set: + // Acc = Q + Φ(Q) = -Φ²(Q) + var Acc, B G1Affine cc.phi2Neg(api, &Acc, &Q) - // However, we can not directly add step value conditionally as we may get - // to incomplete path of the addition formula. We either add or subtract - // step value from [2] Acc (instead of conditionally adding step value to - // Acc): - // Acc = [2] (Q + Φ(Q)) ± Q ± Φ(Q) - // only y coordinate differs for negation, select on that instead. - // first bit - B.X = tableQ[0].X - B.Y = api.Select(s1bits[nbits-1], tableQ[1].Y, tableQ[0].Y) - Acc.DoubleAndAdd(api, &Acc, &B) - B.X = tablePhiQ[0].X - B.Y = api.Select(s2bits[nbits-1], tablePhiQ[1].Y, tablePhiQ[0].Y) - Acc.AddAssign(api, B) - - // second bit - B.X = tableQ[0].X - B.Y = api.Select(s1bits[nbits-2], tableQ[1].Y, tableQ[0].Y) - Acc.DoubleAndAdd(api, &Acc, &B) - B.X = tablePhiQ[0].X - B.Y = api.Select(s2bits[nbits-2], tablePhiQ[1].Y, tablePhiQ[0].Y) - Acc.AddAssign(api, B) + // At each iteration we need to compute: + // [2]Acc ± Q ± Φ(Q). + // We can compute [2]Acc and look up the (precomputed) point B from: + // B1 = +Q + Φ(Q) + B1 := Acc + // B2 = -Q - Φ(Q) + B2 := G1Affine{} + B2.Neg(api, B1) + // B3 = +Q - Φ(Q) + B3 := tableQ[1] + B3.AddAssign(api, tablePhiQ[0]) + // B4 = -Q + Φ(Q) + B4 := G1Affine{} + B4.Neg(api, B3) + // + // Note that half the points are negatives of the other half, + // hence have the same X coordinates. + + // However when doing doubleAndAdd(Acc, B) as (Acc+B)+Acc it might happen + // that Acc==B or -B. So we add the base point G to it to avoid incomplete + // additions in the loop by forcing Acc to be different than the stored B. + // However now we need at the end to subtract [2^nbits]G (harcoded) from + // the result. + // + // Acc = Q + Φ(Q) + G + points := getCurvePoints() + Acc.AddAssign(api, G1Affine{X: points.G1x, Y: points.G1y}) - B2.X = tablePhiQ[0].X - for i := nbits - 3; i > 0; i-- { - B.X = Q.X - B.Y = api.Select(s1bits[i], tableQ[1].Y, tableQ[0].Y) - B2.Y = api.Select(s2bits[i], tablePhiQ[1].Y, tablePhiQ[0].Y) - B.AddAssign(api, B2) + for i := nbits - 1; i > 0; i-- { + B.X = api.Select(api.Xor(s1bits[i], s2bits[i]), B3.X, B2.X) + B.Y = api.Lookup2(s1bits[i], s2bits[i], B2.Y, B3.Y, B4.Y, B1.Y) + // Acc = [2]Acc + B Acc.DoubleAndAdd(api, &Acc, &B) } // i = 0 + // subtract the Q, R, Φ(Q), Φ(R) if the first bits are 0. // When cfg.CompleteArithmetic is set, we use AddUnified instead of Add. This means // when s=0 then Acc=(0,0) because AddUnified(Q, -Q) = (0,0). if cfg.CompleteArithmetic { @@ -304,6 +301,11 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl Acc.Select(api, s2bits[0], Acc, tablePhiQ[0]) } + // subtract [2^nbits]G since we added G at the beginning + B.X = points.G1m[nbits-1][0] + B.Y = api.Neg(points.G1m[nbits-1][1]) + Acc.AddAssign(api, B) + P.X = Acc.X P.Y = Acc.Y diff --git a/std/algebra/native/sw_bls12377/hints.go b/std/algebra/native/sw_bls12377/hints.go index d34b321af9..cba289a1dc 100644 --- a/std/algebra/native/sw_bls12377/hints.go +++ b/std/algebra/native/sw_bls12377/hints.go @@ -1,6 +1,7 @@ package sw_bls12377 import ( + "fmt" "math/big" "github.com/consensys/gnark-crypto/ecc" @@ -18,23 +19,17 @@ func init() { solver.RegisterHint(GetHints()...) } -func decomposeScalarG1(scalarField *big.Int, inputs []*big.Int, res []*big.Int) error { +func decomposeScalarG1(scalarField *big.Int, inputs []*big.Int, outputs []*big.Int) error { + if len(inputs) != 1 { + return fmt.Errorf("expecting one input") + } + if len(outputs) != 2 { + return fmt.Errorf("expecting two outputs") + } cc := getInnerCurveConfig(scalarField) sp := ecc.SplitScalar(inputs[0], cc.glvBasis) - res[0].Set(&(sp[0])) - res[1].Set(&(sp[1])) - one := big.NewInt(1) - // add (lambda+1, lambda) until scalar compostion is over Fr to ensure that - // the high bits are set in decomposition. - for res[0].Cmp(cc.lambda) < 1 && res[1].Cmp(cc.lambda) < 1 { - res[0].Add(res[0], cc.lambda) - res[0].Add(res[0], one) - res[1].Add(res[1], cc.lambda) - } - // figure out how many times we have overflowed - res[2].Mul(res[1], cc.lambda).Add(res[2], res[0]) - res[2].Sub(res[2], inputs[0]) - res[2].Div(res[2], cc.fr) + outputs[0].Set(&(sp[0])) + outputs[1].Set(&(sp[1])) return nil } From 0457871c22efcdfe478eac2943f0f70124d00d7c Mon Sep 17 00:00:00 2001 From: Youssef El Housni Date: Fri, 15 Mar 2024 15:42:59 -0400 Subject: [PATCH 02/12] perf(2-chain): handle edge cases in varScalarMul --- std/algebra/native/sw_bls12377/g1.go | 12 +++++- std/algebra/native/sw_bls12377/hints.go | 56 ++++++++++++++++++++----- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/std/algebra/native/sw_bls12377/g1.go b/std/algebra/native/sw_bls12377/g1.go index f0173c6fa6..603b662457 100644 --- a/std/algebra/native/sw_bls12377/g1.go +++ b/std/algebra/native/sw_bls12377/g1.go @@ -219,7 +219,7 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl // the hints allow to decompose the scalar s into s1 and s2 such that // s1 + λ * s2 == s mod r, // where λ is third root of one in 𝔽_r. - sd, err := api.Compiler().NewHint(decomposeScalarG1, 2, s) + sd, err := api.Compiler().NewHint(decomposeScalarG1Simple, 2, s) if err != nil { // err is non-nil only for invalid number of inputs panic(err) @@ -304,7 +304,15 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl // subtract [2^nbits]G since we added G at the beginning B.X = points.G1m[nbits-1][0] B.Y = api.Neg(points.G1m[nbits-1][1]) - Acc.AddAssign(api, B) + if cfg.CompleteArithmetic { + Acc.AddUnified(api, B) + } else { + Acc.AddAssign(api, B) + } + + if cfg.CompleteArithmetic { + Acc.Select(api, selector, G1Affine{X: 0, Y: 0}, Acc) + } P.X = Acc.X P.Y = Acc.Y diff --git a/std/algebra/native/sw_bls12377/hints.go b/std/algebra/native/sw_bls12377/hints.go index cba289a1dc..d59ef955ef 100644 --- a/std/algebra/native/sw_bls12377/hints.go +++ b/std/algebra/native/sw_bls12377/hints.go @@ -11,6 +11,7 @@ import ( func GetHints() []solver.Hint { return []solver.Hint{ decomposeScalarG1, + decomposeScalarG1Simple, decomposeScalarG2, } } @@ -19,7 +20,7 @@ func init() { solver.RegisterHint(GetHints()...) } -func decomposeScalarG1(scalarField *big.Int, inputs []*big.Int, outputs []*big.Int) error { +func decomposeScalarG1Simple(scalarField *big.Int, inputs []*big.Int, outputs []*big.Int) error { if len(inputs) != 1 { return fmt.Errorf("expecting one input") } @@ -34,23 +35,56 @@ func decomposeScalarG1(scalarField *big.Int, inputs []*big.Int, outputs []*big.I return nil } -func decomposeScalarG2(scalarField *big.Int, inputs []*big.Int, res []*big.Int) error { +func decomposeScalarG1(scalarField *big.Int, inputs []*big.Int, outputs []*big.Int) error { + if len(inputs) != 1 { + return fmt.Errorf("expecting one input") + } + if len(outputs) != 3 { + return fmt.Errorf("expecting three outputs") + } cc := getInnerCurveConfig(scalarField) sp := ecc.SplitScalar(inputs[0], cc.glvBasis) - res[0].Set(&(sp[0])) - res[1].Set(&(sp[1])) + outputs[0].Set(&(sp[0])) + outputs[1].Set(&(sp[1])) + one := big.NewInt(1) + // add (lambda+1, lambda) until scalar compostion is over Fr to ensure that + // the high bits are set in decomposition. + for outputs[0].Cmp(cc.lambda) < 1 && outputs[1].Cmp(cc.lambda) < 1 { + outputs[0].Add(outputs[0], cc.lambda) + outputs[0].Add(outputs[0], one) + outputs[1].Add(outputs[1], cc.lambda) + } + // figure out how many times we have overflowed + outputs[2].Mul(outputs[1], cc.lambda).Add(outputs[2], outputs[0]) + outputs[2].Sub(outputs[2], inputs[0]) + outputs[2].Div(outputs[2], cc.fr) + + return nil +} + +func decomposeScalarG2(scalarField *big.Int, inputs []*big.Int, outputs []*big.Int) error { + if len(inputs) != 1 { + return fmt.Errorf("expecting one input") + } + if len(outputs) != 3 { + return fmt.Errorf("expecting three outputs") + } + cc := getInnerCurveConfig(scalarField) + sp := ecc.SplitScalar(inputs[0], cc.glvBasis) + outputs[0].Set(&(sp[0])) + outputs[1].Set(&(sp[1])) one := big.NewInt(1) // add (lambda+1, lambda) until scalar compostion is over Fr to ensure that // the high bits are set in decomposition. - for res[0].Cmp(cc.lambda) < 1 && res[1].Cmp(cc.lambda) < 1 { - res[0].Add(res[0], cc.lambda) - res[0].Add(res[0], one) - res[1].Add(res[1], cc.lambda) + for outputs[0].Cmp(cc.lambda) < 1 && outputs[1].Cmp(cc.lambda) < 1 { + outputs[0].Add(outputs[0], cc.lambda) + outputs[0].Add(outputs[0], one) + outputs[1].Add(outputs[1], cc.lambda) } // figure out how many times we have overflowed - res[2].Mul(res[1], cc.lambda).Add(res[2], res[0]) - res[2].Sub(res[2], inputs[0]) - res[2].Div(res[2], cc.fr) + outputs[2].Mul(outputs[1], cc.lambda).Add(outputs[2], outputs[0]) + outputs[2].Sub(outputs[2], inputs[0]) + outputs[2].Div(outputs[2], cc.fr) return nil } From dafaacb76b4d5849b533c13e4e810ddc1ecd2d69 Mon Sep 17 00:00:00 2001 From: Youssef El Housni Date: Fri, 15 Mar 2024 15:44:39 -0400 Subject: [PATCH 03/12] perf(2-chain): optimize folded MSM --- std/algebra/native/sw_bls12377/g1.go | 100 +++++++++++++++++---------- 1 file changed, 63 insertions(+), 37 deletions(-) diff --git a/std/algebra/native/sw_bls12377/g1.go b/std/algebra/native/sw_bls12377/g1.go index 603b662457..07d80feb27 100644 --- a/std/algebra/native/sw_bls12377/g1.go +++ b/std/algebra/native/sw_bls12377/g1.go @@ -578,9 +578,19 @@ func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits [] selector = api.And(api.IsZero(Q.X), api.IsZero(Q.Y)) Q.Select(api, selector, G1Affine{X: 1, Y: 1}, Q) } + + // We use the endomorphism à la GLV to compute [s]Q as + // [s1]Q + [s2]Φ(Q) + // + // The context we are working is based on the `outer` curve. However, the + // points and the operations on the points are performed on the `inner` + // curve of the outer curve. We require some parameters from the inner + // curve. cc := getInnerCurveConfig(api.Compiler().Field()) - nbits := cc.lambda.BitLen() + 1 - var Acc /*accumulator*/, B, B2 /*tmp vars*/ G1Affine + + // For BLS12 λ bitsize is 127 equal to half of r bitsize + nbits := cc.lambda.BitLen() + // precompute -Q, -Φ(Q), Φ(Q) var tableQ, tablePhiQ [2]G1Affine tableQ[1] = Q @@ -588,45 +598,48 @@ func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits [] cc.phi1(api, &tablePhiQ[1], &Q) tablePhiQ[0].Neg(api, tablePhiQ[1]) - // We now initialize the accumulator. Due to the way the scalar is - // decomposed, either the high bits of s1 or s2 are set and we can use the - // incomplete addition laws. - - // Acc = Q + Φ(Q) - Acc = tableQ[1] - Acc.AddAssign(api, tablePhiQ[1]) - - // However, we can not directly add step value conditionally as we may get - // to incomplete path of the addition formula. We either add or subtract - // step value from [2] Acc (instead of conditionally adding step value to - // Acc): - // Acc = [2] (Q + Φ(Q)) ± Q ± Φ(Q) - // only y coordinate differs for negation, select on that instead. - B.X = tableQ[0].X - B.Y = api.Select(s1bits[nbits-1], tableQ[1].Y, tableQ[0].Y) - Acc.DoubleAndAdd(api, &Acc, &B) - B.X = tablePhiQ[0].X - B.Y = api.Select(s2bits[nbits-1], tablePhiQ[1].Y, tablePhiQ[0].Y) - Acc.AddAssign(api, B) - - // second bit - B.X = tableQ[0].X - B.Y = api.Select(s1bits[nbits-2], tableQ[1].Y, tableQ[0].Y) - Acc.DoubleAndAdd(api, &Acc, &B) - B.X = tablePhiQ[0].X - B.Y = api.Select(s2bits[nbits-2], tablePhiQ[1].Y, tablePhiQ[0].Y) - Acc.AddAssign(api, B) - - B2.X = tablePhiQ[0].X - for i := nbits - 3; i > 0; i-- { - B.X = Q.X - B.Y = api.Select(s1bits[i], tableQ[1].Y, tableQ[0].Y) - B2.Y = api.Select(s2bits[i], tablePhiQ[1].Y, tablePhiQ[0].Y) - B.AddAssign(api, B2) + // we suppose that the first bits of the sub-scalars are 1 and set: + // Acc = Q + Φ(Q) = -Φ²(Q) + var Acc, B G1Affine + cc.phi2Neg(api, &Acc, &Q) + + // At each iteration we need to compute: + // [2]Acc ± Q ± Φ(Q). + // We can compute [2]Acc and look up the (precomputed) point B from: + // B1 = +Q + Φ(Q) + B1 := Acc + // B2 = -Q - Φ(Q) + B2 := G1Affine{} + B2.Neg(api, B1) + // B3 = +Q - Φ(Q) + B3 := tableQ[1] + B3.AddAssign(api, tablePhiQ[0]) + // B4 = -Q + Φ(Q) + B4 := G1Affine{} + B4.Neg(api, B3) + // + // Note that half the points are negatives of the other half, + // hence have the same X coordinates. + + // However when doing doubleAndAdd(Acc, B) as (Acc+B)+Acc it might happen + // that Acc==B or -B. So we add the base point G to it to avoid incomplete + // additions in the loop by forcing Acc to be different than the stored B. + // However now we need at the end to subtract [2^nbits]G (harcoded) from + // the result. + // + // Acc = Q + Φ(Q) + G + points := getCurvePoints() + Acc.AddAssign(api, G1Affine{X: points.G1x, Y: points.G1y}) + + for i := nbits - 1; i > 0; i-- { + B.X = api.Select(api.Xor(s1bits[i], s2bits[i]), B3.X, B2.X) + B.Y = api.Lookup2(s1bits[i], s2bits[i], B2.Y, B3.Y, B4.Y, B1.Y) + // Acc = [2]Acc + B Acc.DoubleAndAdd(api, &Acc, &B) } // i = 0 + // subtract the Q, R, Φ(Q), Φ(R) if the first bits are 0. // When cfg.CompleteArithmetic is set, we use AddUnified instead of Add. This means // when s=0 then Acc=(0,0) because AddUnified(Q, -Q) = (0,0). if cfg.CompleteArithmetic { @@ -642,6 +655,19 @@ func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits [] Acc.Select(api, s2bits[0], Acc, tablePhiQ[0]) } + // subtract [2^nbits]G since we added G at the beginning + B.X = points.G1m[nbits-1][0] + B.Y = api.Neg(points.G1m[nbits-1][1]) + if cfg.CompleteArithmetic { + Acc.AddUnified(api, B) + } else { + Acc.AddAssign(api, B) + } + + if cfg.CompleteArithmetic { + Acc.Select(api, selector, G1Affine{X: 0, Y: 0}, Acc) + } + P.X = Acc.X P.Y = Acc.Y From beccb36414d9f639baefea8376daed1219e01674 Mon Sep 17 00:00:00 2001 From: Youssef El Housni Date: Mon, 18 Mar 2024 12:15:50 -0400 Subject: [PATCH 04/12] fix: folded MSM scalar decomposition --- std/algebra/native/sw_bls12377/pairing2.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/std/algebra/native/sw_bls12377/pairing2.go b/std/algebra/native/sw_bls12377/pairing2.go index 05b00818af..f977ab916d 100644 --- a/std/algebra/native/sw_bls12377/pairing2.go +++ b/std/algebra/native/sw_bls12377/pairing2.go @@ -175,13 +175,16 @@ func (c *Curve) MultiScalarMul(P []*G1Affine, scalars []*Scalar, opts ...algopts gamma := c.packScalarToVar(scalars[0]) // decompose gamma in the endomorphism eigenvalue basis and bit-decompose the sub-scalars cc := getInnerCurveConfig(c.api.Compiler().Field()) - sd, err := c.api.Compiler().NewHint(decomposeScalarG1, 3, gamma) + sd, err := c.api.Compiler().NewHint(decomposeScalarG1Simple, 2, gamma) if err != nil { panic(err) } gamma1, gamma2 := sd[0], sd[1] - c.api.AssertIsEqual(c.api.Add(gamma1, c.api.Mul(gamma2, cc.lambda)), c.api.Add(gamma, c.api.Mul(cc.fr, sd[2]))) - nbits := cc.lambda.BitLen() + 1 + c.api.AssertIsEqual( + c.api.Add(gamma1, c.api.Mul(gamma2, cc.lambda)), + gamma, + ) + nbits := cc.lambda.BitLen() gamma1Bits := c.api.ToBinary(gamma1, nbits) gamma2Bits := c.api.ToBinary(gamma2, nbits) From 902fc1b18fd8f00c1c45183087e684117384f5ff Mon Sep 17 00:00:00 2001 From: Youssef El Housni Date: Mon, 18 Mar 2024 12:54:22 -0400 Subject: [PATCH 05/12] perf: replace dummy G by (0,1) in ScalarMul --- std/algebra/native/sw_bls12377/g1.go | 63 ++++++++++++++-------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/std/algebra/native/sw_bls12377/g1.go b/std/algebra/native/sw_bls12377/g1.go index 07d80feb27..7b5936a8c4 100644 --- a/std/algebra/native/sw_bls12377/g1.go +++ b/std/algebra/native/sw_bls12377/g1.go @@ -268,14 +268,17 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl // hence have the same X coordinates. // However when doing doubleAndAdd(Acc, B) as (Acc+B)+Acc it might happen - // that Acc==B or -B. So we add the base point G to it to avoid incomplete - // additions in the loop by forcing Acc to be different than the stored B. - // However now we need at the end to subtract [2^nbits]G (harcoded) from - // the result. + // that Acc==B or -B. So we add the point H=(0,1) on BLS12-377 of order 2 + // to it to avoid incomplete additions in the loop by forcing Acc to be + // different than the stored B. Normally, the point H should be "killed + // out" by the first doubling in the loop and the result will remain + // unchanged. However, we are using affine coordinates that do not encode + // the infinity point. Given the affine formulae, doubling (0,1) results in + // (0,-1). Since the loop size N=nbits-1 is even we need to subtract + // [2^N]H = (0,1) from the result at the end. // - // Acc = Q + Φ(Q) + G - points := getCurvePoints() - Acc.AddAssign(api, G1Affine{X: points.G1x, Y: points.G1y}) + // Acc = Q + Φ(Q) + H + Acc.AddAssign(api, G1Affine{X: 0, Y: 1}) for i := nbits - 1; i > 0; i-- { B.X = api.Select(api.Xor(s1bits[i], s2bits[i]), B3.X, B2.X) @@ -301,17 +304,13 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl Acc.Select(api, s2bits[0], Acc, tablePhiQ[0]) } - // subtract [2^nbits]G since we added G at the beginning - B.X = points.G1m[nbits-1][0] - B.Y = api.Neg(points.G1m[nbits-1][1]) - if cfg.CompleteArithmetic { - Acc.AddUnified(api, B) - } else { - Acc.AddAssign(api, B) - } - if cfg.CompleteArithmetic { + // subtract [2^N]G = (0,1) since we added H at the beginning + Acc.AddUnified(api, G1Affine{X: 0, Y: -1}) Acc.Select(api, selector, G1Affine{X: 0, Y: 0}, Acc) + } else { + // subtract [2^N]G = (0,1) since we added H at the beginning + Acc.AddAssign(api, G1Affine{X: 0, Y: -1}) } P.X = Acc.X @@ -565,7 +564,7 @@ func (P *G1Affine) jointScalarMulUnsafe(api frontend.API, Q, R G1Affine, s, t fr return P } -// scalarBitsMul computes s * p and returns it where sBits is the bit decomposition of s. It doesn't modify p nor sBits. +// scalarBitsMul computes [s]Q and returns it where sBits is the bit decomposition of s. It doesn't modify Q nor sBits. // The method is similar to varScalarMul. func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits []frontend.Variable, opts ...algopts.AlgebraOption) *G1Affine { cfg, err := algopts.NewConfig(opts...) @@ -622,14 +621,17 @@ func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits [] // hence have the same X coordinates. // However when doing doubleAndAdd(Acc, B) as (Acc+B)+Acc it might happen - // that Acc==B or -B. So we add the base point G to it to avoid incomplete - // additions in the loop by forcing Acc to be different than the stored B. - // However now we need at the end to subtract [2^nbits]G (harcoded) from - // the result. + // that Acc==B or -B. So we add the point H=(0,1) on BLS12-377 of order 2 + // to it to avoid incomplete additions in the loop by forcing Acc to be + // different than the stored B. Normally, the point H should be "killed + // out" by the first doubling in the loop and the result will remain + // unchanged. However, we are using affine coordinates that do not encode + // the infinity point. Given the affine formulae, doubling (0,1) results in + // (0,-1). Since the loop size N=nbits-1 is even we need to subtract + // [2^N]H = (0,1) from the result at the end. // - // Acc = Q + Φ(Q) + G - points := getCurvePoints() - Acc.AddAssign(api, G1Affine{X: points.G1x, Y: points.G1y}) + // Acc = Q + Φ(Q) + H + Acc.AddAssign(api, G1Affine{X: 0, Y: 1}) for i := nbits - 1; i > 0; i-- { B.X = api.Select(api.Xor(s1bits[i], s2bits[i]), B3.X, B2.X) @@ -655,17 +657,14 @@ func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits [] Acc.Select(api, s2bits[0], Acc, tablePhiQ[0]) } - // subtract [2^nbits]G since we added G at the beginning - B.X = points.G1m[nbits-1][0] - B.Y = api.Neg(points.G1m[nbits-1][1]) if cfg.CompleteArithmetic { - Acc.AddUnified(api, B) + // subtract [2^N]G = (0,1) since we added H at the beginning + Acc.AddUnified(api, G1Affine{X: 0, Y: -1}) + Acc.Select(api, selector, G1Affine{X: 0, Y: 0}, Acc) } else { - Acc.AddAssign(api, B) - } + // subtract [2^N]G = (0,1) since we added H at the beginning + Acc.AddAssign(api, G1Affine{X: 0, Y: -1}) - if cfg.CompleteArithmetic { - Acc.Select(api, selector, G1Affine{X: 0, Y: 0}, Acc) } P.X = Acc.X From 92a9d381244d130b0993e1000fc4e26ad09db7a0 Mon Sep 17 00:00:00 2001 From: Youssef El Housni Date: Mon, 18 Mar 2024 15:54:30 -0400 Subject: [PATCH 06/12] perf(bls24): optimize varScalarMul --- std/algebra/native/sw_bls24315/g1.go | 278 +++++++++++------- std/algebra/native/sw_bls24315/g1_test.go | 317 +++++++++++++++++++++ std/algebra/native/sw_bls24315/hints.go | 21 ++ std/algebra/native/sw_bls24315/pairing2.go | 31 +- std/math/emulated/emparams/emparams.go | 12 +- 5 files changed, 537 insertions(+), 122 deletions(-) diff --git a/std/algebra/native/sw_bls24315/g1.go b/std/algebra/native/sw_bls24315/g1.go index 2f21900e78..baa71256b1 100644 --- a/std/algebra/native/sw_bls24315/g1.go +++ b/std/algebra/native/sw_bls24315/g1.go @@ -158,29 +158,30 @@ func (P *G1Affine) ScalarMul(api frontend.API, Q G1Affine, s interface{}, opts . } } -// varScalarMul sets P = [s] Q and returns P. +// varScalarMul sets P = [s]Q and returns P. It doesn't modify Q nor s. +// It implements an optimized version based on algorithm 1 of [Halo] (see Section 6.2 and appendix C). +// +// ⚠️ The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set. +// (0,0) is not on the curve but we conventionally take it as the +// neutral/infinity point as per the [EVM]. +// +// [Halo]: https://eprint.iacr.org/2019/1021.pdf +// [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variable, opts ...algopts.AlgebraOption) *G1Affine { cfg, err := algopts.NewConfig(opts...) if err != nil { panic(err) } - // This method computes [s] Q. We use several methods to reduce the number - // of added constraints - first, instead of classical double-and-add, we use - // the optimized version from https://github.com/zcash/zcash/issues/3924 - // which allows to omit computation of several intermediate values. - // Secondly, we use the GLV scalar multiplication to reduce the number - // iterations in the main loop. There is a small difference though - as - // two-bit select takes three constraints, then it takes as many constraints - // to compute ± Q ± Φ(Q) every iteration instead of selecting the value - // from a precomputed table. However, precomputing the table adds 12 - // additional constraints and thus table-version is more expensive than - // addition-version. var selector frontend.Variable if cfg.CompleteArithmetic { // if Q=(0,0) we assign a dummy (1,1) to Q and continue selector = api.And(api.IsZero(Q.X), api.IsZero(Q.Y)) Q.Select(api, selector, G1Affine{X: 1, Y: 1}, Q) } + + // We use the endomorphism à la GLV to compute [s]Q as + // [s1]Q + [s2]Φ(Q) + // // The context we are working is based on the `outer` curve. However, the // points and the operations on the points are performed on the `inner` // curve of the outer curve. We require some parameters from the inner @@ -190,31 +191,23 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl // the hints allow to decompose the scalar s into s1 and s2 such that // s1 + λ * s2 == s mod r, // where λ is third root of one in 𝔽_r. - sd, err := api.Compiler().NewHint(decomposeScalarG1, 3, s) + sd, err := api.Compiler().NewHint(decomposeScalarG1Simple, 3, s) if err != nil { // err is non-nil only for invalid number of inputs panic(err) } s1, s2 := sd[0], sd[1] - // when we split scalar, then s1, s2 < lambda by default. However, to have - // the high 1-2 bits of s1, s2 set, the hint functions compute the - // decomposition for - // s + k*r (for some k) - // instead and omits the last reduction. Thus, to constrain s1 and s2, we - // have to assert that - // s1 + λ * s2 == s + k*r - api.AssertIsEqual(api.Add(s1, api.Mul(s2, cc.lambda)), api.Add(s, api.Mul(cc.fr, sd[2]))) - - // As the decomposed scalars are not fully reduced, then in addition of - // having the high bit set, an overflow bit may also be set. Thus, the total - // number of bits may be one more than the bitlength of λ. - nbits := cc.lambda.BitLen() + 1 + // s1 + λ * s2 == s mod r, + api.AssertIsEqual( + api.Add(s1, api.Mul(s2, cc.lambda)), + api.Add(s, api.Mul(cc.fr, sd[2])), + ) + nbits := 127 s1bits := api.ToBinary(s1, nbits) s2bits := api.ToBinary(s2, nbits) - var Acc /*accumulator*/, B, B2 /*tmp vars*/ G1Affine // precompute -Q, -Φ(Q), Φ(Q) var tableQ, tablePhiQ [2]G1Affine tableQ[1] = Q @@ -222,45 +215,51 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl cc.phi1(api, &tablePhiQ[1], &Q) tablePhiQ[0].Neg(api, tablePhiQ[1]) - // We now initialize the accumulator. Due to the way the scalar is - // decomposed, either the high bits of s1 or s2 are set and we can use the - // incomplete addition laws. - - // Acc = Q + Φ(Q) = -Φ²(Q) + // we suppose that the first bits of the sub-scalars are 1 and set: + // Acc = Q + Φ(Q) = -Φ²(Q) + var Acc, B G1Affine cc.phi2Neg(api, &Acc, &Q) - // However, we can not directly add step value conditionally as we may get - // to incomplete path of the addition formula. We either add or subtract - // step value from [2] Acc (instead of conditionally adding step value to - // Acc): - // Acc = [2] (Q + Φ(Q)) ± Q ± Φ(Q) - // only y coordinate differs for negation, select on that instead. - // first bit - B.X = tableQ[0].X - B.Y = api.Select(s1bits[nbits-1], tableQ[1].Y, tableQ[0].Y) - Acc.DoubleAndAdd(api, &Acc, &B) - B.X = tablePhiQ[0].X - B.Y = api.Select(s2bits[nbits-1], tablePhiQ[1].Y, tablePhiQ[0].Y) - Acc.AddAssign(api, B) - - // second bit - B.X = tableQ[0].X - B.Y = api.Select(s1bits[nbits-2], tableQ[1].Y, tableQ[0].Y) - Acc.DoubleAndAdd(api, &Acc, &B) - B.X = tablePhiQ[0].X - B.Y = api.Select(s2bits[nbits-2], tablePhiQ[1].Y, tablePhiQ[0].Y) - Acc.AddAssign(api, B) - - B2.X = tablePhiQ[0].X - for i := nbits - 3; i > 0; i-- { - B.X = Q.X - B.Y = api.Select(s1bits[i], tableQ[1].Y, tableQ[0].Y) - B2.Y = api.Select(s2bits[i], tablePhiQ[1].Y, tablePhiQ[0].Y) - B.AddAssign(api, B2) + // At each iteration we need to compute: + // [2]Acc ± Q ± Φ(Q). + // We can compute [2]Acc and look up the (precomputed) point B from: + // B1 = +Q + Φ(Q) + B1 := Acc + // B2 = -Q - Φ(Q) + B2 := G1Affine{} + B2.Neg(api, B1) + // B3 = +Q - Φ(Q) + B3 := tableQ[1] + B3.AddAssign(api, tablePhiQ[0]) + // B4 = -Q + Φ(Q) + B4 := G1Affine{} + B4.Neg(api, B3) + // + // Note that half the points are negatives of the other half, + // hence have the same X coordinates. + + // However when doing doubleAndAdd(Acc, B) as (Acc+B)+Acc it might happen + // that Acc==B or -B. So we add the point H=(0,1) on BLS12-377 of order 2 + // to it to avoid incomplete additions in the loop by forcing Acc to be + // different than the stored B. Normally, the point H should be "killed + // out" by the first doubling in the loop and the result will remain + // unchanged. However, we are using affine coordinates that do not encode + // the infinity point. Given the affine formulae, doubling (0,1) results in + // (0,-1). Since the loop size N=nbits-1 is even we need to subtract + // [2^N]H = (0,1) from the result at the end. + // + // Acc = Q + Φ(Q) + H + Acc.AddAssign(api, G1Affine{X: 0, Y: 1}) + + for i := nbits - 1; i > 0; i-- { + B.X = api.Select(api.Xor(s1bits[i], s2bits[i]), B3.X, B2.X) + B.Y = api.Lookup2(s1bits[i], s2bits[i], B2.Y, B3.Y, B4.Y, B1.Y) + // Acc = [2]Acc + B Acc.DoubleAndAdd(api, &Acc, &B) } // i = 0 + // subtract the Q, R, Φ(Q), Φ(R) if the first bits are 0. // When cfg.CompleteArithmetic is set, we use AddUnified instead of Add. This means // when s=0 then Acc=(0,0) because AddUnified(Q, -Q) = (0,0). if cfg.CompleteArithmetic { @@ -276,6 +275,15 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl Acc.Select(api, s2bits[0], Acc, tablePhiQ[0]) } + if cfg.CompleteArithmetic { + // subtract [2^N]G = (0,1) since we added H at the beginning + Acc.AddUnified(api, G1Affine{X: 0, Y: -1}) + Acc.Select(api, selector, G1Affine{X: 0, Y: 0}, Acc) + } else { + // subtract [2^N]G = (0,1) since we added H at the beginning + Acc.AddAssign(api, G1Affine{X: 0, Y: -1}) + } + P.X = Acc.X P.Y = Acc.Y @@ -424,8 +432,25 @@ func (P *G1Affine) ScalarMulBase(api frontend.API, s frontend.Variable, opts ... return P.ScalarMul(api, generator, s, opts...) } +func (P *G1Affine) jointScalarMul(api frontend.API, Q, R G1Affine, s, t frontend.Variable, opts ...algopts.AlgebraOption) *G1Affine { + cfg, err := algopts.NewConfig(opts...) + if err != nil { + panic(err) + } + if cfg.CompleteArithmetic { + // TODO @yelhousni: optimize + var tmp G1Affine + P.ScalarMul(api, Q, s, opts...) + tmp.ScalarMul(api, R, t, opts...) + P.AddUnified(api, tmp) + } else { + P.jointScalarMulUnsafe(api, Q, R, s, t) + } + return P +} + // P = [s]Q + [t]R using Shamir's trick -func (P *G1Affine) jointScalarMul(api frontend.API, Q, R G1Affine, s, t frontend.Variable) *G1Affine { +func (P *G1Affine) jointScalarMulUnsafe(api frontend.API, Q, R G1Affine, s, t frontend.Variable) *G1Affine { cc := getInnerCurveConfig(api.Compiler().Field()) sd, err := api.Compiler().NewHint(decomposeScalarG1, 3, s) @@ -510,12 +535,30 @@ func (P *G1Affine) jointScalarMul(api frontend.API, Q, R G1Affine, s, t frontend return P } -// scalarBitsMul computes s * p and returns it where sBits is the bit decomposition of s. It doesn't modify p nor sBits. +// scalarBitsMul computes [s]Q and returns it where sBits is the bit decomposition of s. It doesn't modify Q nor sBits. // The method is similar to varScalarMul. -func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits []frontend.Variable) *G1Affine { +func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits []frontend.Variable, opts ...algopts.AlgebraOption) *G1Affine { + cfg, err := algopts.NewConfig(opts...) + if err != nil { + panic(err) + } + var selector frontend.Variable + if cfg.CompleteArithmetic { + // if Q=(0,0) we assign a dummy (1,1) to Q and continue + selector = api.And(api.IsZero(Q.X), api.IsZero(Q.Y)) + Q.Select(api, selector, G1Affine{X: 1, Y: 1}, Q) + } + + // We use the endomorphism à la GLV to compute [s]Q as + // [s1]Q + [s2]Φ(Q) + // + // The context we are working is based on the `outer` curve. However, the + // points and the operations on the points are performed on the `inner` + // curve of the outer curve. We require some parameters from the inner + // curve. cc := getInnerCurveConfig(api.Compiler().Field()) - nbits := cc.lambda.BitLen() + 1 - var Acc /*accumulator*/, B, B2 /*tmp vars*/ G1Affine + nbits := 127 + // precompute -Q, -Φ(Q), Φ(Q) var tableQ, tablePhiQ [2]G1Affine tableQ[1] = Q @@ -523,48 +566,75 @@ func (P *G1Affine) scalarBitsMul(api frontend.API, Q G1Affine, s1bits, s2bits [] cc.phi1(api, &tablePhiQ[1], &Q) tablePhiQ[0].Neg(api, tablePhiQ[1]) - // We now initialize the accumulator. Due to the way the scalar is - // decomposed, either the high bits of s1 or s2 are set and we can use the - // incomplete addition laws. - - // Acc = Q + Φ(Q) - Acc = tableQ[1] - Acc.AddAssign(api, tablePhiQ[1]) - - // However, we can not directly add step value conditionally as we may get - // to incomplete path of the addition formula. We either add or subtract - // step value from [2] Acc (instead of conditionally adding step value to - // Acc): - // Acc = [2] (Q + Φ(Q)) ± Q ± Φ(Q) - // only y coordinate differs for negation, select on that instead. - B.X = tableQ[0].X - B.Y = api.Select(s1bits[nbits-1], tableQ[1].Y, tableQ[0].Y) - Acc.DoubleAndAdd(api, &Acc, &B) - B.X = tablePhiQ[0].X - B.Y = api.Select(s2bits[nbits-1], tablePhiQ[1].Y, tablePhiQ[0].Y) - Acc.AddAssign(api, B) - - // second bit - B.X = tableQ[0].X - B.Y = api.Select(s1bits[nbits-2], tableQ[1].Y, tableQ[0].Y) - Acc.DoubleAndAdd(api, &Acc, &B) - B.X = tablePhiQ[0].X - B.Y = api.Select(s2bits[nbits-2], tablePhiQ[1].Y, tablePhiQ[0].Y) - Acc.AddAssign(api, B) - - B2.X = tablePhiQ[0].X - for i := nbits - 3; i > 0; i-- { - B.X = Q.X - B.Y = api.Select(s1bits[i], tableQ[1].Y, tableQ[0].Y) - B2.Y = api.Select(s2bits[i], tablePhiQ[1].Y, tablePhiQ[0].Y) - B.AddAssign(api, B2) + // we suppose that the first bits of the sub-scalars are 1 and set: + // Acc = Q + Φ(Q) = -Φ²(Q) + var Acc, B G1Affine + cc.phi2Neg(api, &Acc, &Q) + + // At each iteration we need to compute: + // [2]Acc ± Q ± Φ(Q). + // We can compute [2]Acc and look up the (precomputed) point B from: + // B1 = +Q + Φ(Q) + B1 := Acc + // B2 = -Q - Φ(Q) + B2 := G1Affine{} + B2.Neg(api, B1) + // B3 = +Q - Φ(Q) + B3 := tableQ[1] + B3.AddAssign(api, tablePhiQ[0]) + // B4 = -Q + Φ(Q) + B4 := G1Affine{} + B4.Neg(api, B3) + // + // Note that half the points are negatives of the other half, + // hence have the same X coordinates. + + // However when doing doubleAndAdd(Acc, B) as (Acc+B)+Acc it might happen + // that Acc==B or -B. So we add the point H=(0,1) on BLS12-377 of order 2 + // to it to avoid incomplete additions in the loop by forcing Acc to be + // different than the stored B. Normally, the point H should be "killed + // out" by the first doubling in the loop and the result will remain + // unchanged. However, we are using affine coordinates that do not encode + // the infinity point. Given the affine formulae, doubling (0,1) results in + // (0,-1). Since the loop size N=nbits-1 is even we need to subtract + // [2^N]H = (0,1) from the result at the end. + // + // Acc = Q + Φ(Q) + H + Acc.AddAssign(api, G1Affine{X: 0, Y: 1}) + + for i := nbits - 1; i > 0; i-- { + B.X = api.Select(api.Xor(s1bits[i], s2bits[i]), B3.X, B2.X) + B.Y = api.Lookup2(s1bits[i], s2bits[i], B2.Y, B3.Y, B4.Y, B1.Y) + // Acc = [2]Acc + B Acc.DoubleAndAdd(api, &Acc, &B) } - tableQ[0].AddAssign(api, Acc) - Acc.Select(api, s1bits[0], Acc, tableQ[0]) - tablePhiQ[0].AddAssign(api, Acc) - Acc.Select(api, s2bits[0], Acc, tablePhiQ[0]) + // i = 0 + // subtract the Q, R, Φ(Q), Φ(R) if the first bits are 0. + // When cfg.CompleteArithmetic is set, we use AddUnified instead of Add. This means + // when s=0 then Acc=(0,0) because AddUnified(Q, -Q) = (0,0). + if cfg.CompleteArithmetic { + tableQ[0].AddUnified(api, Acc) + Acc.Select(api, s1bits[0], Acc, tableQ[0]) + tablePhiQ[0].AddUnified(api, Acc) + Acc.Select(api, s2bits[0], Acc, tablePhiQ[0]) + Acc.Select(api, selector, G1Affine{X: 0, Y: 0}, Acc) + } else { + tableQ[0].AddAssign(api, Acc) + Acc.Select(api, s1bits[0], Acc, tableQ[0]) + tablePhiQ[0].AddAssign(api, Acc) + Acc.Select(api, s2bits[0], Acc, tablePhiQ[0]) + } + + if cfg.CompleteArithmetic { + // subtract [2^N]G = (0,1) since we added H at the beginning + Acc.AddUnified(api, G1Affine{X: 0, Y: -1}) + Acc.Select(api, selector, G1Affine{X: 0, Y: 0}, Acc) + } else { + // subtract [2^N]G = (0,1) since we added H at the beginning + Acc.AddAssign(api, G1Affine{X: 0, Y: -1}) + + } P.X = Acc.X P.Y = Acc.Y diff --git a/std/algebra/native/sw_bls24315/g1_test.go b/std/algebra/native/sw_bls24315/g1_test.go index 629647bc96..4387f94c05 100644 --- a/std/algebra/native/sw_bls24315/g1_test.go +++ b/std/algebra/native/sw_bls24315/g1_test.go @@ -26,6 +26,7 @@ import ( "github.com/consensys/gnark/frontend" "github.com/consensys/gnark/std/algebra/algopts" "github.com/consensys/gnark/std/math/emulated" + "github.com/consensys/gnark/std/math/emulated/emparams" "github.com/consensys/gnark/test" bls24315 "github.com/consensys/gnark-crypto/ecc/bls24-315" @@ -456,6 +457,114 @@ func TestVarScalarMulBaseG1(t *testing.T) { assert.CheckCircuit(&circuit, test.WithValidAssignment(&witness), test.WithCurves(ecc.BW6_633), test.NoProverChecks()) } +type MultiScalarMulEdgeCasesTest struct { + Points []G1Affine + Scalars []emulated.Element[ScalarField] + Res G1Affine +} + +func (c *MultiScalarMulEdgeCasesTest) Define(api frontend.API) error { + cr, err := NewCurve(api) + if err != nil { + return err + } + ps := make([]*G1Affine, len(c.Points)) + for i := range c.Points { + ps[i] = &c.Points[i] + } + ss := make([]*emulated.Element[ScalarField], len(c.Scalars)) + for i := range c.Scalars { + ss[i] = &c.Scalars[i] + } + res, err := cr.MultiScalarMul(ps, ss, algopts.WithCompleteArithmetic()) + if err != nil { + return err + } + cr.AssertIsEqual(res, &c.Res) + return nil +} + +func TestMultiScalarMulEdgeCases(t *testing.T) { + assert := test.NewAssert(t) + nbLen := 5 + P := make([]bls24315.G1Affine, nbLen) + S := make([]fr.Element, nbLen) + for i := 0; i < nbLen; i++ { + S[i].SetRandom() + P[i].ScalarMultiplicationBase(S[i].BigInt(new(big.Int))) + } + var res, infinity bls24315.G1Affine + _, err := res.MultiExp(P, S, ecc.MultiExpConfig{}) + + assert.NoError(err) + cP := make([]G1Affine, len(P)) + cS := make([]emulated.Element[ScalarField], len(S)) + + // s1 * (0,0) + s2 * (0,0) + s3 * (0,0) + s4 * (0,0) + s5 * (0,0) == (0,0) + for i := range cP { + cP[i] = NewG1Affine(infinity) + } + for i := range cS { + cS[i] = NewScalar(S[i]) + } + assignment1 := MultiScalarMulEdgeCasesTest{ + Points: cP, + Scalars: cS, + Res: NewG1Affine(infinity), + } + err = test.IsSolved(&MultiScalarMulEdgeCasesTest{ + Points: make([]G1Affine, nbLen), + Scalars: make([]emulated.Element[ScalarField], nbLen), + }, &assignment1, ecc.BW6_633.ScalarField()) + assert.NoError(err) + + // 0 * P1 + 0 * P2 + 0 * P3 + 0 * P4 + 0 * P5 == (0,0) + for i := range cP { + cP[i] = NewG1Affine(P[i]) + } + for i := range cS { + cS[i] = emulated.ValueOf[emparams.BLS24315Fr](0) + } + assignment2 := MultiScalarMulEdgeCasesTest{ + Points: cP, + Scalars: cS, + Res: NewG1Affine(infinity), + } + err = test.IsSolved(&MultiScalarMulEdgeCasesTest{ + Points: make([]G1Affine, nbLen), + Scalars: make([]emulated.Element[ScalarField], nbLen), + }, &assignment2, ecc.BW6_633.ScalarField()) + assert.NoError(err) + + // s1 * (0,0) + s2 * P2 + s3 * (0,0) + s4 * P4 + 0 * P5 == s2 * P + s4 * P4 + var res3 bls24315.G1Affine + res3.ScalarMultiplication(&P[1], S[1].BigInt(new(big.Int))) + res.ScalarMultiplication(&P[3], S[3].BigInt(new(big.Int))) + res3.Add(&res3, &res) + for i := range cP { + cP[i] = NewG1Affine(P[i]) + } + cP[0].X = infinity.X + cP[0].Y = infinity.Y + cP[2].X = infinity.X + cP[2].Y = infinity.Y + for i := range cS { + cS[i] = NewScalar(S[i]) + } + cS[4] = emulated.ValueOf[emparams.BLS24315Fr](0) + + assignment3 := MultiScalarMulEdgeCasesTest{ + Points: cP, + Scalars: cS, + Res: NewG1Affine(res3), + } + err = test.IsSolved(&MultiScalarMulEdgeCasesTest{ + Points: make([]G1Affine, nbLen), + Scalars: make([]emulated.Element[ScalarField], nbLen), + }, &assignment3, ecc.BW6_633.ScalarField()) + assert.NoError(err) +} + type MultiScalarMulTest struct { Points []G1Affine Scalars []emulated.Element[ScalarField] @@ -516,6 +625,61 @@ func TestMultiScalarMul(t *testing.T) { assert.NoError(err) } +type g1JointScalarMulEdgeCases struct { + A, B G1Affine + C G1Affine `gnark:",public"` + R, S frontend.Variable +} + +func (circuit *g1JointScalarMulEdgeCases) Define(api frontend.API) error { + expected1 := G1Affine{} + expected2 := G1Affine{} + expected3 := G1Affine{} + expected4 := G1Affine{} + infinity := G1Affine{X: 0, Y: 0} + expected1.jointScalarMul(api, infinity, infinity, circuit.R, circuit.S, algopts.WithCompleteArithmetic()) + expected2.jointScalarMul(api, circuit.A, circuit.B, big.NewInt(0), big.NewInt(0), algopts.WithCompleteArithmetic()) + expected3.jointScalarMul(api, circuit.A, infinity, circuit.R, circuit.S, algopts.WithCompleteArithmetic()) + expected4.jointScalarMul(api, circuit.A, circuit.B, circuit.R, big.NewInt(0), algopts.WithCompleteArithmetic()) + _expected := G1Affine{} + _expected.ScalarMul(api, circuit.A, circuit.R, algopts.WithCompleteArithmetic()) + expected1.AssertIsEqual(api, infinity) + expected2.AssertIsEqual(api, infinity) + expected3.AssertIsEqual(api, _expected) + expected4.AssertIsEqual(api, _expected) + return nil +} + +func TestJointScalarMulG1EdgeCases(t *testing.T) { + // sample random point + _a := randomPointG1() + _b := randomPointG1() + var a, b, c bls24315.G1Affine + a.FromJacobian(&_a) + b.FromJacobian(&_b) + + // create the cs + var circuit, witness g1JointScalarMulEdgeCases + var r, s fr.Element + _, _ = r.SetRandom() + _, _ = s.SetRandom() + witness.R = r.String() + witness.S = s.String() + // assign the inputs + witness.A.Assign(&a) + witness.B.Assign(&b) + // compute the result + var br, bs big.Int + _a.ScalarMultiplication(&_a, r.BigInt(&br)) + _b.ScalarMultiplication(&_b, s.BigInt(&bs)) + _a.AddAssign(&_b) + c.FromJacobian(&_a) + witness.C.Assign(&c) + + assert := test.NewAssert(t) + assert.CheckCircuit(&circuit, test.WithValidAssignment(&witness), test.WithCurves(ecc.BW6_633)) +} + type g1JointScalarMul struct { A, B G1Affine C G1Affine `gnark:",public"` @@ -616,3 +780,156 @@ func randomPointG1() bls24315.G1Jac { return p1 } + +type MultiScalarMulFoldedEdgeCasesTest struct { + Points []G1Affine + Scalars []emulated.Element[ScalarField] + Res G1Affine +} + +func (c *MultiScalarMulFoldedEdgeCasesTest) Define(api frontend.API) error { + cr, err := NewCurve(api) + if err != nil { + return err + } + ps := make([]*G1Affine, len(c.Points)) + for i := range c.Points { + ps[i] = &c.Points[i] + } + ss := make([]*emulated.Element[ScalarField], len(c.Scalars)) + for i := range c.Scalars { + ss[i] = &c.Scalars[i] + } + res, err := cr.MultiScalarMul(ps, ss, algopts.WithFoldingScalarMul(), algopts.WithCompleteArithmetic()) + if err != nil { + return err + } + cr.AssertIsEqual(res, &c.Res) + return nil +} + +func TestMultiScalarMulFoldedEdgeCases(t *testing.T) { + assert := test.NewAssert(t) + nbLen := 5 + P := make([]bls24315.G1Affine, nbLen) + S := make([]fr.Element, nbLen) + S[0].SetOne() + S[1].SetRandom() + S[2].Square(&S[1]) + S[3].Mul(&S[1], &S[2]) + S[4].Mul(&S[1], &S[3]) + for i := 0; i < nbLen; i++ { + P[i].ScalarMultiplicationBase(S[i].BigInt(new(big.Int))) + } + var res, infinity bls24315.G1Affine + _, err := res.MultiExp(P, S, ecc.MultiExpConfig{}) + + assert.NoError(err) + cP := make([]G1Affine, len(P)) + cS := make([]emulated.Element[ScalarField], len(S)) + + // s^0 * (0,0) + s^1 * (0,0) + s^2 * (0,0) + s^3 * (0,0) + s^4 * (0,0) == (0,0) + for i := range cP { + cP[i] = NewG1Affine(infinity) + } + // s0 = s + S[0].Set(&S[1]) + for i := range cS { + cS[i] = NewScalar(S[i]) + } + assignment1 := MultiScalarMulFoldedEdgeCasesTest{ + Points: cP, + Scalars: cS, + Res: NewG1Affine(infinity), + } + err = test.IsSolved(&MultiScalarMulFoldedEdgeCasesTest{ + Points: make([]G1Affine, nbLen), + Scalars: make([]emulated.Element[ScalarField], nbLen), + }, &assignment1, ecc.BW6_633.ScalarField()) + assert.NoError(err) + + // 0^0 * P1 + 0 * P2 + 0 * P3 + 0 * P4 + 0 * P5 == P1 + for i := range cP { + cP[i] = NewG1Affine(P[i]) + } + for i := range cS { + cS[i] = emulated.ValueOf[emparams.BLS24315Fr](0) + } + + assignment3 := MultiScalarMulFoldedEdgeCasesTest{ + Points: cP, + Scalars: cS, + Res: NewG1Affine(P[0]), + } + err = test.IsSolved(&MultiScalarMulFoldedEdgeCasesTest{ + Points: make([]G1Affine, nbLen), + Scalars: make([]emulated.Element[ScalarField], nbLen), + }, &assignment3, ecc.BW6_633.ScalarField()) + assert.NoError(err) +} + +type MultiScalarMulFoldedTest struct { + Points []G1Affine + Scalars []emulated.Element[ScalarField] + Res G1Affine +} + +func (c *MultiScalarMulFoldedTest) Define(api frontend.API) error { + cr, err := NewCurve(api) + if err != nil { + return err + } + ps := make([]*G1Affine, len(c.Points)) + for i := range c.Points { + ps[i] = &c.Points[i] + } + ss := make([]*emulated.Element[ScalarField], len(c.Scalars)) + for i := range c.Scalars { + ss[i] = &c.Scalars[i] + } + res, err := cr.MultiScalarMul(ps, ss, algopts.WithFoldingScalarMul()) + if err != nil { + return err + } + cr.AssertIsEqual(res, &c.Res) + return nil +} + +func TestMultiScalarMulFolded(t *testing.T) { + assert := test.NewAssert(t) + nbLen := 4 + P := make([]bls24315.G1Affine, nbLen) + S := make([]fr.Element, nbLen) + // [s^0]P0 + [s^1]P1 + [s^2]P2 + [s^3]P3 = P0 + [s]P1 + [s^2]P2 + [s^3]P3 + S[0].SetOne() + S[1].SetRandom() + S[2].Square(&S[1]) + S[3].Mul(&S[1], &S[2]) + for i := 0; i < nbLen; i++ { + P[i].ScalarMultiplicationBase(S[i].BigInt(new(big.Int))) + } + var res bls24315.G1Affine + _, err := res.MultiExp(P, S, ecc.MultiExpConfig{}) + + assert.NoError(err) + cP := make([]G1Affine, len(P)) + for i := range cP { + cP[i] = NewG1Affine(P[i]) + } + cS := make([]emulated.Element[ScalarField], len(S)) + // s0 = s + S[0].Set(&S[1]) + for i := range cS { + cS[i] = NewScalar(S[i]) + } + assignment := MultiScalarMulFoldedTest{ + Points: cP, + Scalars: cS, + Res: NewG1Affine(res), + } + err = test.IsSolved(&MultiScalarMulFoldedTest{ + Points: make([]G1Affine, nbLen), + Scalars: make([]emulated.Element[ScalarField], nbLen), + }, &assignment, ecc.BW6_633.ScalarField()) + assert.NoError(err) +} diff --git a/std/algebra/native/sw_bls24315/hints.go b/std/algebra/native/sw_bls24315/hints.go index 1269ecaca8..0404212674 100644 --- a/std/algebra/native/sw_bls24315/hints.go +++ b/std/algebra/native/sw_bls24315/hints.go @@ -1,6 +1,7 @@ package sw_bls24315 import ( + "fmt" "math/big" "github.com/consensys/gnark-crypto/ecc" @@ -10,6 +11,7 @@ import ( func GetHints() []solver.Hint { return []solver.Hint{ decomposeScalarG1, + decomposeScalarG1Simple, decomposeScalarG2, } } @@ -18,6 +20,25 @@ func init() { solver.RegisterHint(GetHints()...) } +func decomposeScalarG1Simple(scalarField *big.Int, inputs []*big.Int, outputs []*big.Int) error { + if len(inputs) != 1 { + return fmt.Errorf("expecting one input") + } + if len(outputs) != 3 { + return fmt.Errorf("expecting three outputs") + } + cc := getInnerCurveConfig(scalarField) + sp := ecc.SplitScalar(inputs[0], cc.glvBasis) + outputs[0].Set(&(sp[0])) + outputs[1].Set(&(sp[1])) + // figure out how many times we have overflowed + outputs[2].Mul(outputs[1], cc.lambda).Add(outputs[2], outputs[0]) + outputs[2].Sub(outputs[2], inputs[0]) + outputs[2].Div(outputs[2], cc.fr) + + return nil +} + func decomposeScalarG1(scalarField *big.Int, inputs []*big.Int, res []*big.Int) error { cc := getInnerCurveConfig(scalarField) sp := ecc.SplitScalar(inputs[0], cc.glvBasis) diff --git a/std/algebra/native/sw_bls24315/pairing2.go b/std/algebra/native/sw_bls24315/pairing2.go index d3830cca42..3bbbc4d041 100644 --- a/std/algebra/native/sw_bls24315/pairing2.go +++ b/std/algebra/native/sw_bls24315/pairing2.go @@ -107,7 +107,7 @@ func (c *Curve) jointScalarMul(P1, P2 *G1Affine, s1, s2 *Scalar, opts ...algopts res := &G1Affine{} varScalar1 := c.packScalarToVar(s1) varScalar2 := c.packScalarToVar(s2) - res.jointScalarMul(c.api, *P1, *P2, varScalar1, varScalar2) + res.jointScalarMul(c.api, *P1, *P2, varScalar1, varScalar2, opts...) return res } @@ -119,7 +119,7 @@ func (c *Curve) ScalarMul(P *G1Affine, s *Scalar, opts ...algopts.AlgebraOption) Y: P.Y, } varScalar := c.packScalarToVar(s) - res.ScalarMul(c.api, *P, varScalar) + res.ScalarMul(c.api, *P, varScalar, opts...) return res } @@ -128,7 +128,7 @@ func (c *Curve) ScalarMul(P *G1Affine, s *Scalar, opts ...algopts.AlgebraOption) func (c *Curve) ScalarMulBase(s *Scalar, opts ...algopts.AlgebraOption) *G1Affine { res := new(G1Affine) varScalar := c.packScalarToVar(s) - res.ScalarMulBase(c.api, varScalar) + res.ScalarMulBase(c.api, varScalar, opts...) return res } @@ -146,6 +146,10 @@ func (c *Curve) MultiScalarMul(P []*G1Affine, scalars []*Scalar, opts ...algopts if err != nil { return nil, fmt.Errorf("new config: %w", err) } + addFn := c.Add + if cfg.CompleteArithmetic { + addFn = c.AddUnified + } if !cfg.FoldMulti { if len(P) != len(scalars) { return nil, fmt.Errorf("mismatching points and scalars slice lengths") @@ -160,7 +164,7 @@ func (c *Curve) MultiScalarMul(P []*G1Affine, scalars []*Scalar, opts ...algopts } for i := 1; i < n-1; i += 2 { q := c.jointScalarMul(P[i-1], P[i], scalars[i-1], scalars[i], opts...) - res = c.Add(res, q) + res = addFn(res, q) } return res, nil } else { @@ -171,24 +175,27 @@ func (c *Curve) MultiScalarMul(P []*G1Affine, scalars []*Scalar, opts ...algopts gamma := c.packScalarToVar(scalars[0]) // decompose gamma in the endomorphism eigenvalue basis and bit-decompose the sub-scalars cc := getInnerCurveConfig(c.api.Compiler().Field()) - sd, err := c.api.Compiler().NewHint(decomposeScalarG1, 3, gamma) + sd, err := c.api.Compiler().NewHint(decomposeScalarG1Simple, 3, gamma) if err != nil { panic(err) } gamma1, gamma2 := sd[0], sd[1] - c.api.AssertIsEqual(c.api.Add(gamma1, c.api.Mul(gamma2, cc.lambda)), c.api.Add(gamma, c.api.Mul(cc.fr, sd[2]))) - nbits := cc.lambda.BitLen() + 1 + c.api.AssertIsEqual( + c.api.Add(gamma1, c.api.Mul(gamma2, cc.lambda)), + c.api.Add(gamma, c.api.Mul(cc.fr, sd[2])), + ) + nbits := 127 gamma1Bits := c.api.ToBinary(gamma1, nbits) gamma2Bits := c.api.ToBinary(gamma2, nbits) // points and scalars must be non-zero var res G1Affine - res.scalarBitsMul(c.api, *P[len(P)-1], gamma1Bits, gamma2Bits) + res.scalarBitsMul(c.api, *P[len(P)-1], gamma1Bits, gamma2Bits, opts...) for i := len(P) - 2; i > 0; i-- { - res = *c.Add(P[i], &res) - res.scalarBitsMul(c.api, res, gamma1Bits, gamma2Bits) + res = *addFn(P[i], &res) + res.scalarBitsMul(c.api, res, gamma1Bits, gamma2Bits, opts...) } - res = *c.Add(P[0], &res) + res = *addFn(P[0], &res) return &res, nil } } @@ -475,4 +482,4 @@ func (c *Curve) packScalarToVar(s *Scalar) frontend.Variable { } // ScalarField defines the [emulated.FieldParams] implementation on a one limb of the scalar field. -type ScalarField = emparams.BLS12315Fr +type ScalarField = emparams.BLS24315Fr diff --git a/std/math/emulated/emparams/emparams.go b/std/math/emulated/emparams/emparams.go index ebbf7d5d8c..b07fb6e96b 100644 --- a/std/math/emulated/emparams/emparams.go +++ b/std/math/emulated/emparams/emparams.go @@ -254,7 +254,7 @@ type BW6761Fr struct{ sixLimbPrimeField } func (fp BW6761Fr) Modulus() *big.Int { return ecc.BW6_761.ScalarField() } -// BLS12315Fp provides type parametrization for field emulation: +// BLS24315Fp provides type parametrization for field emulation: // - limbs: 5 // - limb width: 64 bits // @@ -264,11 +264,11 @@ func (fp BW6761Fr) Modulus() *big.Int { return ecc.BW6_761.ScalarField() } // 39705142709513438335025689890408969744933502416914749335064285505637884093126342347073617133569 (base 10) // // This is the base field of the BLS24-315 curve. -type BLS12315Fp struct{ fiveLimbPrimeField } +type BLS24315Fp struct{ fiveLimbPrimeField } -func (fp BLS12315Fp) Modulus() *big.Int { return ecc.BLS24_315.BaseField() } +func (fp BLS24315Fp) Modulus() *big.Int { return ecc.BLS24_315.BaseField() } -// BLS12315Fr provides type parametrization for field emulation: +// BLS24315Fr provides type parametrization for field emulation: // - limbs: 4 // - limb width: 64 bits // @@ -278,6 +278,6 @@ func (fp BLS12315Fp) Modulus() *big.Int { return ecc.BLS24_315.BaseField() } // 0x196deac24a9da12b25fc7ec9cf927a98c8c480ece644e36419d0c5fd00c00001 (base 10) // // This is the scalar field of the BLS24-315 curve. -type BLS12315Fr struct{ fourLimbPrimeField } +type BLS24315Fr struct{ fourLimbPrimeField } -func (fr BLS12315Fr) Modulus() *big.Int { return ecc.BLS24_315.ScalarField() } +func (fr BLS24315Fr) Modulus() *big.Int { return ecc.BLS24_315.ScalarField() } From 14d47845f51ce912cbb979addba76e1c4419a335 Mon Sep 17 00:00:00 2001 From: Youssef El Housni Date: Mon, 18 Mar 2024 16:27:28 -0400 Subject: [PATCH 07/12] perf(2-chain/bls12): optimize varScalarMul for G2 --- std/algebra/native/sw_bls12377/g2.go | 144 +++++++++++++++------------ 1 file changed, 81 insertions(+), 63 deletions(-) diff --git a/std/algebra/native/sw_bls12377/g2.go b/std/algebra/native/sw_bls12377/g2.go index d096da0e97..3b8e273dc5 100644 --- a/std/algebra/native/sw_bls12377/g2.go +++ b/std/algebra/native/sw_bls12377/g2.go @@ -194,30 +194,32 @@ func (P *g2AffP) ScalarMul(api frontend.API, Q g2AffP, s interface{}, opts ...al } } -// varScalarMul sets P = [s] Q and returns P. +// varScalarMul sets P = [s]Q and returns P. It doesn't modify Q nor s. +// It implements an optimized version based on algorithm 1 of [Halo] (see Section 6.2 and appendix C). +// +// ⚠️ The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set. +// (0,0) is not on the curve but we conventionally take it as the +// neutral/infinity point as per the [EVM]. +// +// [Halo]: https://eprint.iacr.org/2019/1021.pdf +// [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, opts ...algopts.AlgebraOption) *g2AffP { cfg, err := algopts.NewConfig(opts...) if err != nil { panic(err) } - // This method computes [s] Q. We use several methods to reduce the number - // of added constraints - first, instead of classical double-and-add, we use - // the optimized version from https://github.com/zcash/zcash/issues/3924 - // which allows to omit computation of several intermediate values. - // Secondly, we use the GLV scalar multiplication to reduce the number - // iterations in the main loop. There is a small difference though - as - // two-bit select takes three constraints, then it takes as many constraints - // to compute ± Q ± Φ(Q) every iteration instead of selecting the value - // from a precomputed table. However, precomputing the table adds 12 - // additional constraints and thus table-version is more expensive than - // addition-version. var selector frontend.Variable + one := fields_bls12377.E2{A0: 1, A1: 0} + zero := fields_bls12377.E2{A0: 0, A1: 0} if cfg.CompleteArithmetic { // if Q=(0,0) we assign a dummy (1,1) to Q and continue selector = api.And(Q.X.IsZero(api), Q.Y.IsZero(api)) - one := fields_bls12377.E2{A0: 1, A1: 0} Q.Select(api, selector, g2AffP{X: one, Y: one}, Q) } + + // We use the endomorphism à la GLV to compute [s]Q as + // [s1]Q + [s2]Φ(Q) + // // The context we are working is based on the `outer` curve. However, the // points and the operations on the points are performed on the `inner` // curve of the outer curve. We require some parameters from the inner @@ -227,31 +229,24 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o // the hints allow to decompose the scalar s into s1 and s2 such that // s1 + λ * s2 == s mod r, // where λ is third root of one in 𝔽_r. - sd, err := api.Compiler().NewHint(decomposeScalarG2, 3, s) + sd, err := api.Compiler().NewHint(decomposeScalarG1Simple, 2, s) if err != nil { // err is non-nil only for invalid number of inputs panic(err) } s1, s2 := sd[0], sd[1] - // when we split scalar, then s1, s2 < lambda by default. However, to have - // the high 1-2 bits of s1, s2 set, the hint functions compute the - // decomposition for - // s + k*r (for some k) - // instead and omits the last reduction. Thus, to constrain s1 and s2, we - // have to assert that - // s1 + λ * s2 == s + k*r - api.AssertIsEqual(api.Add(s1, api.Mul(s2, cc.lambda)), api.Add(s, api.Mul(cc.fr, sd[2]))) - - // As the decomposed scalars are not fully reduced, then in addition of - // having the high bit set, an overflow bit may also be set. Thus, the total - // number of bits may be one more than the bitlength of λ. - nbits := cc.lambda.BitLen() + 1 + // s1 + λ * s2 == s + api.AssertIsEqual( + api.Add(s1, api.Mul(s2, cc.lambda)), + s, + ) + // For BLS12 λ bitsize is 127 equal to half of r bitsize + nbits := cc.lambda.BitLen() s1bits := api.ToBinary(s1, nbits) s2bits := api.ToBinary(s2, nbits) - var Acc, B, B1, B2, B3, B4 g2AffP // precompute -Q, -Φ(Q), Φ(Q) var tableQ, tablePhiQ [2]g2AffP tableQ[1] = Q @@ -259,49 +254,53 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o cc.phi2(api, &tablePhiQ[1], &Q) tablePhiQ[0].Neg(api, tablePhiQ[1]) - // We now initialize the accumulator. Due to the way the scalar is - // decomposed, either the high bits of s1 or s2 are set and we can use the - // incomplete addition laws. - - // Acc = Q + Φ(Q) = B1 + // we suppose that the first bits of the sub-scalars are 1 and set: + // Acc = Q + Φ(Q) = -Φ²(Q) + var Acc, B g2AffP cc.phi1Neg(api, &Acc, &Q) - B1 = Acc - - // However, we can not directly add step value conditionally as we may get - // to incomplete path of the addition formula. We either add or subtract - // step value from [2] Acc (instead of conditionally adding step value to - // Acc): - // Acc = [2] (Q + Φ(Q)) ± Q ± Φ(Q) - // only y coordinate differs for negation, select on that instead. - B.X = tableQ[0].X - B.Y.Select(api, s1bits[nbits-1], tableQ[1].Y, tableQ[0].Y) - Acc.DoubleAndAdd(api, &Acc, &B) - B.X = tablePhiQ[0].X - B.Y.Select(api, s2bits[nbits-1], tablePhiQ[1].Y, tablePhiQ[0].Y) - Acc.AddAssign(api, B) - - // second bit - B.X = tableQ[0].X - B.Y.Select(api, s1bits[nbits-2], tableQ[1].Y, tableQ[0].Y) - Acc.DoubleAndAdd(api, &Acc, &B) - B.X = tablePhiQ[0].X - B.Y.Select(api, s2bits[nbits-2], tablePhiQ[1].Y, tablePhiQ[0].Y) - Acc.AddAssign(api, B) - - // B2 = -Q-Φ(Q) + + // At each iteration we need to compute: + // [2]Acc ± Q ± Φ(Q). + // We can compute [2]Acc and look up the (precomputed) point B from: + // B1 = +Q + Φ(Q) + B1 := Acc + // B2 = -Q - Φ(Q) + B2 := g2AffP{} B2.Neg(api, B1) - // B3 = Q-Φ(Q) - B3 = tablePhiQ[0] - B3.AddAssign(api, tableQ[1]) - // B4 = -Q+Φ(Q) + // B3 = +Q - Φ(Q) + B3 := tableQ[1] + B3.AddAssign(api, tablePhiQ[0]) + // B4 = -Q + Φ(Q) + B4 := g2AffP{} B4.Neg(api, B3) - for i := nbits - 3; i > 0; i-- { + // + // Note that half the points are negatives of the other half, + // hence have the same X coordinates. + + // However when doing doubleAndAdd(Acc, B) as (Acc+B)+Acc it might happen + // that Acc==B or -B. So we add the base point G to it to avoid incomplete + // additions in the loop by forcing Acc to be different than the stored B. + // However we need at the end to subtract [2^nbits]G or conditionally + // [2^nbits]Φ²(G) from the result. + // + // Acc = Q + Φ(Q) + G + points := getTwistPoints() + Acc.AddAssign(api, + g2AffP{ + X: fields_bls12377.E2{A0: points.G2x[0], A1: points.G2x[1]}, + Y: fields_bls12377.E2{A0: points.G2y[0], A1: points.G2y[1]}, + }, + ) + + for i := nbits - 1; i > 0; i-- { B.X.Select(api, api.Xor(s1bits[i], s2bits[i]), B3.X, B2.X) B.Y.Lookup2(api, s1bits[i], s2bits[i], B2.Y, B3.Y, B4.Y, B1.Y) + // Acc = [2]Acc + B Acc.DoubleAndAdd(api, &Acc, &B) } // i = 0 + // subtract the Q, R, Φ(Q), Φ(R) if the first bits are 0. // When cfg.CompleteArithmetic is set, we use AddUnified instead of Add. This means // when s=0 then Acc=(0,0) because AddUnified(Q, -Q) = (0,0). if cfg.CompleteArithmetic { @@ -309,7 +308,6 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o Acc.Select(api, s1bits[0], Acc, tableQ[0]) tablePhiQ[0].AddUnified(api, Acc) Acc.Select(api, s2bits[0], Acc, tablePhiQ[0]) - zero := fields_bls12377.E2{A0: 0, A1: 0} Acc.Select(api, selector, g2AffP{X: zero, Y: zero}, Acc) } else { tableQ[0].AddAssign(api, Acc) @@ -318,6 +316,26 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o Acc.Select(api, s2bits[0], Acc, tablePhiQ[0]) } + // subtract [2^nbits]G since we added G at the beginning + B.X = fields_bls12377.E2{ + A0: points.G2m[nbits-1][0], + A1: points.G2m[nbits-1][1], + } + B.Y = fields_bls12377.E2{ + A0: points.G2m[nbits-1][2], + A1: points.G2m[nbits-1][3], + } + B.Y.Neg(api, B.Y) + if cfg.CompleteArithmetic { + Acc.AddUnified(api, B) + } else { + Acc.AddAssign(api, B) + } + + if cfg.CompleteArithmetic { + Acc.Select(api, selector, g2AffP{X: zero, Y: zero}, Acc) + } + P.X = Acc.X P.Y = Acc.Y From b97db9907c2e5f5b5ed05143374de61734c26656 Mon Sep 17 00:00:00 2001 From: Youssef El Housni Date: Mon, 18 Mar 2024 16:37:35 -0400 Subject: [PATCH 08/12] perf(2-chain/bls24): optimize varScalarMul for G2 --- std/algebra/native/sw_bls24315/g1.go | 2 +- std/algebra/native/sw_bls24315/g2.go | 179 +++++++++++++++++---------- 2 files changed, 115 insertions(+), 66 deletions(-) diff --git a/std/algebra/native/sw_bls24315/g1.go b/std/algebra/native/sw_bls24315/g1.go index baa71256b1..d57481bd92 100644 --- a/std/algebra/native/sw_bls24315/g1.go +++ b/std/algebra/native/sw_bls24315/g1.go @@ -198,7 +198,7 @@ func (P *G1Affine) varScalarMul(api frontend.API, Q G1Affine, s frontend.Variabl } s1, s2 := sd[0], sd[1] - // s1 + λ * s2 == s mod r, + // s1 + λ * s2 == s mod r api.AssertIsEqual( api.Add(s1, api.Mul(s2, cc.lambda)), api.Add(s, api.Mul(cc.fr, sd[2])), diff --git a/std/algebra/native/sw_bls24315/g2.go b/std/algebra/native/sw_bls24315/g2.go index f8d64acf8d..85aa37cf8f 100644 --- a/std/algebra/native/sw_bls24315/g2.go +++ b/std/algebra/native/sw_bls24315/g2.go @@ -167,30 +167,34 @@ func (P *g2AffP) ScalarMul(api frontend.API, Q g2AffP, s interface{}, opts ...al } } -// varScalarMul sets P = [s] Q and returns P. +// varScalarMul sets P = [s]Q and returns P. It doesn't modify Q nor s. +// It implements an optimized version based on algorithm 1 of [Halo] (see Section 6.2 and appendix C). +// +// ⚠️ The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set. +// (0,0) is not on the curve but we conventionally take it as the +// neutral/infinity point as per the [EVM]. +// +// [Halo]: https://eprint.iacr.org/2019/1021.pdf +// [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, opts ...algopts.AlgebraOption) *g2AffP { cfg, err := algopts.NewConfig(opts...) if err != nil { panic(err) } - // This method computes [s] Q. We use several methods to reduce the number - // of added constraints - first, instead of classical double-and-add, we use - // the optimized version from https://github.com/zcash/zcash/issues/3924 - // which allows to omit computation of several intermediate values. - // Secondly, we use the GLV scalar multiplication to reduce the number - // iterations in the main loop. There is a small difference though - as - // two-bit select takes three constraints, then it takes as many constraints - // to compute ± Q ± Φ(Q) every iteration instead of selecting the value - // from a precomputed table. However, precomputing the table adds 12 - // additional constraints and thus table-version is more expensive than - // addition-version. var selector frontend.Variable + oneE2 := fields_bls24315.E2{A0: 1, A1: 0} + zeroE2 := fields_bls24315.E2{A0: 0, A1: 0} + zeroE4 := fields_bls24315.E4{B0: zeroE2, B1: zeroE2} + oneE4 := fields_bls24315.E4{B0: oneE2, B1: zeroE2} if cfg.CompleteArithmetic { // if Q=(0,0) we assign a dummy (1,1) to Q and continue selector = api.And(Q.X.IsZero(api), Q.Y.IsZero(api)) - one := fields_bls24315.E4{B0: fields_bls24315.E2{A0: 1, A1: 0}, B1: fields_bls24315.E2{A0: 0, A1: 0}} - Q.Select(api, selector, g2AffP{X: one, Y: one}, Q) + Q.Select(api, selector, g2AffP{X: oneE4, Y: oneE4}, Q) } + + // We use the endomorphism à la GLV to compute [s]Q as + // [s1]Q + [s2]Φ(Q) + // // The context we are working is based on the `outer` curve. However, the // points and the operations on the points are performed on the `inner` // curve of the outer curve. We require some parameters from the inner @@ -200,31 +204,23 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o // the hints allow to decompose the scalar s into s1 and s2 such that // s1 + λ * s2 == s mod r, // where λ is third root of one in 𝔽_r. - sd, err := api.Compiler().NewHint(decomposeScalarG2, 3, s) + sd, err := api.Compiler().NewHint(decomposeScalarG1Simple, 3, s) if err != nil { // err is non-nil only for invalid number of inputs panic(err) } s1, s2 := sd[0], sd[1] - // when we split scalar, then s1, s2 < lambda by default. However, to have - // the high 1-2 bits of s1, s2 set, the hint functions compute the - // decomposition for - // s + k*r (for some k) - // instead and omits the last reduction. Thus, to constrain s1 and s2, we - // have to assert that - // s1 + λ * s2 == s + k*r - api.AssertIsEqual(api.Add(s1, api.Mul(s2, cc.lambda)), api.Add(s, api.Mul(cc.fr, sd[2]))) - - // As the decomposed scalars are not fully reduced, then in addition of - // having the high bit set, an overflow bit may also be set. Thus, the total - // number of bits may be one more than the bitlength of λ. - nbits := cc.lambda.BitLen() + 1 + // s1 + λ * s2 == s mod r, + api.AssertIsEqual( + api.Add(s1, api.Mul(s2, cc.lambda)), + api.Add(s, api.Mul(cc.fr, sd[2])), + ) + nbits := 127 s1bits := api.ToBinary(s1, nbits) s2bits := api.ToBinary(s2, nbits) - var Acc, B, B1, B2, B3, B4 g2AffP // precompute -Q, -Φ(Q), Φ(Q) var tableQ, tablePhiQ [2]g2AffP tableQ[1] = Q @@ -232,49 +228,71 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o cc.phi2(api, &tablePhiQ[1], &Q) tablePhiQ[0].Neg(api, tablePhiQ[1]) - // We now initialize the accumulator. Due to the way the scalar is - // decomposed, either the high bits of s1 or s2 are set and we can use the - // incomplete addition laws. - - // Acc = Q + Φ(Q) = B1 + // we suppose that the first bits of the sub-scalars are 1 and set: + // Acc = Q + Φ(Q) = -Φ²(Q) + var Acc, B g2AffP cc.phi1Neg(api, &Acc, &Q) - B1 = Acc - - // However, we can not directly add step value conditionally as we may get - // to incomplete path of the addition formula. We either add or subtract - // step value from [2] Acc (instead of conditionally adding step value to - // Acc): - // Acc = [2] (Q + Φ(Q)) ± Q ± Φ(Q) - // only y coordinate differs for negation, select on that instead. - B.X = tableQ[0].X - B.Y.Select(api, s1bits[nbits-1], tableQ[1].Y, tableQ[0].Y) - Acc.DoubleAndAdd(api, &Acc, &B) - B.X = tablePhiQ[0].X - B.Y.Select(api, s2bits[nbits-1], tablePhiQ[1].Y, tablePhiQ[0].Y) - Acc.AddAssign(api, B) - - // second bit - B.X = tableQ[0].X - B.Y.Select(api, s1bits[nbits-2], tableQ[1].Y, tableQ[0].Y) - Acc.DoubleAndAdd(api, &Acc, &B) - B.X = tablePhiQ[0].X - B.Y.Select(api, s2bits[nbits-2], tablePhiQ[1].Y, tablePhiQ[0].Y) - Acc.AddAssign(api, B) - - // B2 = -Q-Φ(Q) + + // At each iteration we need to compute: + // [2]Acc ± Q ± Φ(Q). + // We can compute [2]Acc and look up the (precomputed) point B from: + // B1 = +Q + Φ(Q) + B1 := Acc + // B2 = -Q - Φ(Q) + B2 := g2AffP{} B2.Neg(api, B1) - // B3 = Q-Φ(Q) - B3 = tablePhiQ[0] - B3.AddAssign(api, tableQ[1]) - // B4 = -Q+Φ(Q) + // B3 = +Q - Φ(Q) + B3 := tableQ[1] + B3.AddAssign(api, tablePhiQ[0]) + // B4 = -Q + Φ(Q) + B4 := g2AffP{} B4.Neg(api, B3) - for i := nbits - 3; i > 0; i-- { + // + // Note that half the points are negatives of the other half, + // hence have the same X coordinates. + + // However when doing doubleAndAdd(Acc, B) as (Acc+B)+Acc it might happen + // that Acc==B or -B. So we add the base point G to it to avoid incomplete + // additions in the loop by forcing Acc to be different than the stored B. + // However we need at the end to subtract [2^nbits]G or conditionally + // [2^nbits]Φ²(G) from the result. + // + // Acc = Q + Φ(Q) + G + points := getTwistPoints() + Acc.AddAssign(api, + g2AffP{ + X: fields_bls24315.E4{ + B0: fields_bls24315.E2{ + A0: points.G2x[0], + A1: points.G2x[1], + }, + B1: fields_bls24315.E2{ + A0: points.G2x[2], + A1: points.G2x[3], + }, + }, + Y: fields_bls24315.E4{ + B0: fields_bls24315.E2{ + A0: points.G2y[0], + A1: points.G2y[1], + }, + B1: fields_bls24315.E2{ + A0: points.G2y[2], + A1: points.G2y[3], + }, + }, + }, + ) + + for i := nbits - 1; i > 0; i-- { B.X.Select(api, api.Xor(s1bits[i], s2bits[i]), B3.X, B2.X) B.Y.Lookup2(api, s1bits[i], s2bits[i], B2.Y, B3.Y, B4.Y, B1.Y) + // Acc = [2]Acc + B Acc.DoubleAndAdd(api, &Acc, &B) } // i = 0 + // subtract the Q, R, Φ(Q), Φ(R) if the first bits are 0. // When cfg.CompleteArithmetic is set, we use AddUnified instead of Add. This means // when s=0 then Acc=(0,0) because AddUnified(Q, -Q) = (0,0). if cfg.CompleteArithmetic { @@ -282,8 +300,7 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o Acc.Select(api, s1bits[0], Acc, tableQ[0]) tablePhiQ[0].AddUnified(api, Acc) Acc.Select(api, s2bits[0], Acc, tablePhiQ[0]) - zero := fields_bls24315.E4{B0: fields_bls24315.E2{A0: 0, A1: 0}, B1: fields_bls24315.E2{A0: 0, A1: 0}} - Acc.Select(api, selector, g2AffP{X: zero, Y: zero}, Acc) + Acc.Select(api, selector, g2AffP{X: zeroE4, Y: zeroE4}, Acc) } else { tableQ[0].AddAssign(api, Acc) Acc.Select(api, s1bits[0], Acc, tableQ[0]) @@ -291,6 +308,38 @@ func (P *g2AffP) varScalarMul(api frontend.API, Q g2AffP, s frontend.Variable, o Acc.Select(api, s2bits[0], Acc, tablePhiQ[0]) } + // subtract [2^nbits]G since we added G at the beginning + B.X = fields_bls24315.E4{ + B0: fields_bls24315.E2{ + A0: points.G2m[nbits-1][0], + A1: points.G2m[nbits-1][1], + }, + B1: fields_bls24315.E2{ + A0: points.G2m[nbits-1][2], + A1: points.G2m[nbits-1][3], + }, + } + B.Y = fields_bls24315.E4{ + B0: fields_bls24315.E2{ + A0: points.G2m[nbits-1][4], + A1: points.G2m[nbits-1][5], + }, + B1: fields_bls24315.E2{ + A0: points.G2m[nbits-1][6], + A1: points.G2m[nbits-1][7], + }, + } + B.Y.Neg(api, B.Y) + if cfg.CompleteArithmetic { + Acc.AddUnified(api, B) + } else { + Acc.AddAssign(api, B) + } + + if cfg.CompleteArithmetic { + Acc.Select(api, selector, g2AffP{X: zeroE4, Y: zeroE4}, Acc) + } + P.X = Acc.X P.Y = Acc.Y From c15c7bef1c31bef9f1c91fd605f49713f49091fa Mon Sep 17 00:00:00 2001 From: Youssef El Housni Date: Tue, 19 Mar 2024 13:02:43 -0400 Subject: [PATCH 09/12] perf(kzg): use MSM instead of two SM in CheckOpeningProof --- std/commitments/kzg/verifier.go | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/std/commitments/kzg/verifier.go b/std/commitments/kzg/verifier.go index 25fd482544..74262a4c5c 100644 --- a/std/commitments/kzg/verifier.go +++ b/std/commitments/kzg/verifier.go @@ -424,22 +424,20 @@ func NewVerifier[FR emulated.FieldParams, G1El algebra.G1ElementT, G2El algebra. // commitment at point. func (v *Verifier[FR, G1El, G2El, GTEl]) CheckOpeningProof(commitment Commitment[G1El], proof OpeningProof[FR, G1El], point emulated.Element[FR], vk VerifyingKey[G1El, G2El]) error { - claimedValueG1 := v.curve.ScalarMul(&vk.G1, &proof.ClaimedValue) - - // [f(α) - f(a)]G₁ - fminusfaG1 := v.curve.Neg(claimedValueG1) - fminusfaG1 = v.curve.Add(fminusfaG1, &commitment.G1El) - - // [-H(α)]G₁ - negQuotientPoly := v.curve.Neg(&proof.Quotient) + // [f(a)]G1 + [-a]([H(α)]G₁) = [f(a) - a*H(α)]G₁ + pointNeg := v.scalarApi.Neg(&point) + totalG1, err := v.curve.MultiScalarMul([]*G1El{&vk.G1, &proof.Quotient}, []*emulated.Element[FR]{&proof.ClaimedValue, pointNeg}) + if err != nil { + return fmt.Errorf("check opening proof: %w", err) + } - // [f(α) - f(a) + a*H(α)]G₁ - totalG1 := v.curve.ScalarMul(&proof.Quotient, &point) - totalG1 = v.curve.Add(totalG1, fminusfaG1) + // [f(a) - a*H(α)]G₁ + [-f(α)]G₁ = [f(a) - f(α) - a*H(α)]G₁ + commitmentNeg := v.curve.Neg(&commitment.G1El) + totalG1 = v.curve.Add(totalG1, commitmentNeg) - // e([f(α)-f(a)+aH(α)]G₁], G₂).e([-H(α)]G₁, [α]G₂) == 1 + // e([f(a)-f(α)-a*H(α)]G₁], G₂).e([H(α)]G₁, [α]G₂) == 1 if err := v.pairing.PairingCheck( - []*G1El{totalG1, negQuotientPoly}, + []*G1El{totalG1, &proof.Quotient}, []*G2El{&vk.G2[0], &vk.G2[1]}, ); err != nil { return fmt.Errorf("pairing check: %w", err) From 95c227094de9bd1d19c31a2b1da7eb0b367ff286 Mon Sep 17 00:00:00 2001 From: Youssef El Housni Date: Tue, 19 Mar 2024 17:52:11 -0400 Subject: [PATCH 10/12] perf(2-chain/pairing): replace subs with adds --- std/algebra/native/sw_bls12377/pairing.go | 32 +++++++++++------------ 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/std/algebra/native/sw_bls12377/pairing.go b/std/algebra/native/sw_bls12377/pairing.go index 2cdfbd9b84..fa9febf7c3 100644 --- a/std/algebra/native/sw_bls12377/pairing.go +++ b/std/algebra/native/sw_bls12377/pairing.go @@ -277,14 +277,14 @@ func doubleAndAddStep(api frontend.API, p1, p2 *g2AffP) (g2AffP, *lineEvaluation d.Sub(api, p1.X, p2.X) l1.DivUnchecked(api, n, d) - // x3 =lambda1**2-p1.x-p2.x - x3.Square(api, l1). - Sub(api, x3, p1.X). - Sub(api, x3, p2.X) + // x3 =lambda1**2-(p1.x+p2.x) + x3.Square(api, l1) + n.Add(api, p1.X, p2.X) + x3.Sub(api, x3, n) - // omit y3 computation + // omit y3 computation - // compute line1 + // compute line1 line1.R0 = l1 line1.R1.Mul(api, l1, p1.X).Sub(api, line1.R1, p1.Y) @@ -294,10 +294,10 @@ func doubleAndAddStep(api frontend.API, p1, p2 *g2AffP) (g2AffP, *lineEvaluation l2.DivUnchecked(api, n, d) l2.Add(api, l2, l1).Neg(api, l2) - // compute x4 = lambda2**2-x1-x3 - x4.Square(api, l2). - Sub(api, x4, p1.X). - Sub(api, x4, x3) + // compute x4 = lambda2**2-(x1+x3) + x4.Square(api, l2) + n.Add(api, p1.X, x3) + x4.Sub(api, x4, n) // compute y4 = lambda2*(x1 - x4)-y1 y4.Sub(api, p1.X, x4). @@ -328,9 +328,9 @@ func doubleStep(api frontend.API, p1 *g2AffP) (g2AffP, *lineEvaluation) { l.DivUnchecked(api, n, d) // xr = lambda**2-2*p1.x - xr.Square(api, l). - Sub(api, xr, p1.X). - Sub(api, xr, p1.X) + xr.Square(api, l) + n.MulByFp(api, p1.X, 2) + xr.Sub(api, xr, n) // yr = lambda*(p.x-xr)-p.y yr.Sub(api, p1.X, xr). @@ -359,9 +359,9 @@ func linesCompute(api frontend.API, p1, p2 *g2AffP) (*lineEvaluation, *lineEvalu l1.DivUnchecked(api, n, d) // x3 =lambda1**2-p1.x-p2.x - x3.Square(api, l1). - Sub(api, x3, p1.X). - Sub(api, x3, p2.X) + x3.Square(api, l1) + n.Add(api, p1.X, p2.X) + x3.Sub(api, x3, n) // omit y3 computation // compute line1 From a7b94f7ec94df31b825df2ea5514b6ec9b406264 Mon Sep 17 00:00:00 2001 From: Youssef El Housni Date: Tue, 19 Mar 2024 19:09:18 -0400 Subject: [PATCH 11/12] perf(2-chain/pairing): few small optims --- .../native/fields_bls12377/e12_pairing.go | 39 ++++++++++--------- std/algebra/native/fields_bls12377/e2.go | 4 +- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/std/algebra/native/fields_bls12377/e12_pairing.go b/std/algebra/native/fields_bls12377/e12_pairing.go index 08624038f1..c6f87c4a0a 100644 --- a/std/algebra/native/fields_bls12377/e12_pairing.go +++ b/std/algebra/native/fields_bls12377/e12_pairing.go @@ -22,16 +22,17 @@ func (e *E12) Square034(api frontend.API, x E12) *E12 { c0.B0.Sub(api, x.C0.B0, x.C1.B0) c0.B1.Neg(api, x.C1.B1) - c0.B2 = E2{0, 0} c3.B0 = x.C0.B0 c3.B1.Neg(api, x.C1.B0) c3.B2.Neg(api, x.C1.B1) c2.Mul0By01(api, x.C0.B0, x.C1.B0, x.C1.B1) - c3.MulBy01(api, c0.B0, c0.B1).Add(api, c3, c2) - e.C1.B0.Add(api, c2.B0, c2.B0) - e.C1.B1.Add(api, c2.B1, c2.B1) + c3.MulBy01(api, c0.B0, c0.B1) + c3.B0.Add(api, c3.B0, c2.B0) + c3.B1.Add(api, c3.B1, c2.B1) + e.C1.B0.MulByFp(api, c2.B0, 2) + e.C1.B1.MulByFp(api, c2.B1, 2) e.C0.B0 = c3.B0 e.C0.B1.Add(api, c3.B1, c2.B0) @@ -49,8 +50,7 @@ func (e *E12) MulBy034(api frontend.API, c3, c4 E2) *E12 { b := e.C1 b.MulBy01(api, c3, c4) - - c3.Add(api, E2{A0: 1, A1: 0}, c3) + c3.A0 = api.Add(1, c3.A0) d.Add(api, e.C0, e.C1) d.MulBy01(api, c3, c4) @@ -81,17 +81,19 @@ func Mul034By034(api frontend.API, d3, d4, c3, c4 E2) *[5]E2 { } func Mul01234By034(api frontend.API, x [5]E2, z3, z4 E2) *E12 { - var a, b, z1, z0, one E6 - var zero E2 - zero.SetZero() - one.SetOne() + var a, b, z1, z0 E6 c0 := &E6{B0: x[0], B1: x[1], B2: x[2]} - c1 := &E6{B0: x[3], B1: x[4], B2: zero} - a.Add(api, one, E6{B0: z3, B1: z4, B2: zero}) - b.Add(api, *c0, *c1) - a.Mul(api, a, b) + a.B0.A0 = api.Add(z3.A0, 1) + a.B0.A1 = z3.A1 + a.B1 = z4 + a.B2.A0 = 0 + a.B2.A1 = 0 + b.B0.Add(api, c0.B0, x[3]) + b.B1.Add(api, c0.B1, x[4]) + b.B2 = c0.B2 + b.MulBy01(api, a.B0, a.B1) c := *Mul01By01(api, z3, z4, x[3], x[4]) - z1.Sub(api, a, *c0) + z1.Sub(api, b, *c0) z1.Sub(api, z1, c) z0.MulByNonResidue(api, c) z0.Add(api, z0, *c0) @@ -103,12 +105,11 @@ func Mul01234By034(api frontend.API, x [5]E2, z3, z4 E2) *E12 { func (e *E12) MulBy01234(api frontend.API, x [5]E2) *E12 { var a, b, c, z1, z0 E6 - var zero E2 - zero.SetZero() c0 := &E6{B0: x[0], B1: x[1], B2: x[2]} - c1 := &E6{B0: x[3], B1: x[4], B2: zero} a.Add(api, e.C0, e.C1) - b.Add(api, *c0, *c1) + b.B0.Add(api, x[0], x[3]) + b.B1.Add(api, x[1], x[4]) + b.B2 = x[2] a.Mul(api, a, b) b.Mul(api, e.C0, *c0) c = e.C1 diff --git a/std/algebra/native/fields_bls12377/e2.go b/std/algebra/native/fields_bls12377/e2.go index a203843157..fd6f99ecde 100644 --- a/std/algebra/native/fields_bls12377/e2.go +++ b/std/algebra/native/fields_bls12377/e2.go @@ -68,8 +68,8 @@ func (e *E2) Add(api frontend.API, e1, e2 E2) *E2 { // Double e2 elmt func (e *E2) Double(api frontend.API, e1 E2) *E2 { - e.A0 = api.Add(e1.A0, e1.A0) - e.A1 = api.Add(e1.A1, e1.A1) + e.A0 = api.Mul(e1.A0, 2) + e.A1 = api.Mul(e1.A1, 2) return e } From 2d17ac193c92a2fa5907c3cf2cfcfa600670f3b5 Mon Sep 17 00:00:00 2001 From: Youssef El Housni Date: Tue, 19 Mar 2024 22:13:06 -0400 Subject: [PATCH 12/12] chore: update stats --- internal/stats/latest.stats | Bin 2246 -> 2246 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/internal/stats/latest.stats b/internal/stats/latest.stats index 329496c6f7bed17ed35e0d8a5926d410372cf9ec..37f16d35c679bdae9cac75fd13eba33817eed7aa 100644 GIT binary patch delta 69 zcmV-L0J{Ii5yla)4FQwp2Az|%1)P(E2b{C>13v+?rUFd?lMMkh2auov{Q;1mlSlzW blQIQClj{MylZydhlTZehlOG7DlXC?@uvZ!e delta 69 zcmV-L0J{Ii5yla)4FQvG1vCedjsg7vk&ct81w)hQ24Isd2D`H%1Wf_6;sQSblk5dS blZ6ODld1unlSl`alMMl#lSTrZlimlXg60|;