Skip to content

Commit

Permalink
ec-nvidia: switch to 32-bit due to upstream fused-multiply-add-with-c…
Browse files Browse the repository at this point in the history
…arry 64-bit bug
  • Loading branch information
mratsim committed Oct 27, 2024
1 parent 1529eb6 commit bea5c1a
Show file tree
Hide file tree
Showing 13 changed files with 46 additions and 505 deletions.
1 change: 0 additions & 1 deletion constantine.nimble
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,6 @@ const testDescNvidia: seq[string] = @[
"tests/gpu/t_nsqr.nim",
"tests/gpu/t_nsqr_rt.nim",
"tests/gpu/t_ec_jac_coords.nim",
"tests/gpu/t_ec_sum_port.nim",
"tests/gpu/t_ec_sum.nim"
]

Expand Down
2 changes: 1 addition & 1 deletion constantine/math_compiler/impl_curves_ops_affine.nim
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ proc isNeutralAff*(asy: Assembler_LLVM, cd: CurveDescriptor, r, a: ValueRef) {.u

let (ri, ai) = llvmParams

let P = asy.asEcPointAff(ed, ai)
let P = asy.asEcPointAff(cd, ai)

asy.store(ri, P.x.isZero() and P.y.isZero())

Expand Down
13 changes: 7 additions & 6 deletions docs/crypto-nvidia_gpus.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ This documentation references useful information for implementing and optimizing

## Integer instruction bug

### Integer FMA with carry-in uint64

We get incorrect result for modular multiplication with 64-bit limbs due to a fused-multiuply-add with carry bug.

- https://gist.github.com/mratsim/a34df1e091925df15c13208df7eda569#file-mul-py
- https://forums.developer.nvidia.com/t/incorrect-result-of-ptx-code/221067

### Integer FMA with carry-in uint32

The instruction integer fused-multiply-add with carry-in may
Expand Down Expand Up @@ -57,12 +64,6 @@ int main() {
}
```
### Integer FMA with carry-in uint64
https://forums.developer.nvidia.com/t/incorrect-result-of-ptx-code/221067
Unfortunately it seems like we're also hit by this on uint64.
## The hidden XMAD instruction
Expand Down
2 changes: 1 addition & 1 deletion tests/gpu/t_ccopy.nim
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,4 @@ let a = Fp[BN254_Snarks].fromHex("0x12345678FF11FFAA00321321CAFECAFE")
let b = Fp[BN254_Snarks].fromHex("0xDEADBEEFDEADBEEFDEADBEEFDEADBEEF")


testName(Fp[BN254_Snarks], 64, a, b)
testName(Fp[BN254_Snarks], 32, a, b)
2 changes: 1 addition & 1 deletion tests/gpu/t_cneg.nim
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@ proc testName[Name: static Algebra](field: type FF[Name], wordSize: int, a: FF[N

let a = Fp[BN254_Snarks].fromHex("0x12345678FF11FFAA00321321CAFECAFE")

testName(Fp[BN254_Snarks], 64, a)
testName(Fp[BN254_Snarks], 32, a)
10 changes: 5 additions & 5 deletions tests/gpu/t_ec_jac_coords.nim
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import
constantine/math/elliptic/ec_shortweierstrass_jacobian,
constantine/platforms/abstractions,
constantine/platforms/llvm/llvm,
constantine/math_compiler/[ir, pub_fields, pub_curves_jacobian, codegen_nvidia, impl_fields_globals],
constantine/math_compiler/[ir, pub_fields, pub_curves_jacobian, codegen_nvidia, impl_fields_globals, impl_curves_ops_jacobian],
# Test utilities
helpers/prng_unsafe

Expand All @@ -28,7 +28,7 @@ template genGetComponent*(asy: Assembler_LLVM, cd: CurveDescriptor, fn: typed):
let rA = asy.asField(r, cd.fd.fieldTy)

let x = fn(ec)
asy.store(rA, x)
rA.store(x)

asy.br.retVoid()
name
Expand Down Expand Up @@ -71,6 +71,6 @@ let y = "0x2beb0d0d6115007676f30bcc462fe814bf81198848f139621a3e9fa454fe8e6a"
let pt = EC_ShortW_Jac[Fp[BN254_Snarks], G1].fromHex(x, y)
echo pt.toHex()

testX(Fp[BN254_Snarks], 64, pt)
testY(Fp[BN254_Snarks], 64, pt)
testZ(Fp[BN254_Snarks], 64, pt)
testX(Fp[BN254_Snarks], 32, pt)
testY(Fp[BN254_Snarks], 32, pt)
testZ(Fp[BN254_Snarks], 32, pt)
6 changes: 3 additions & 3 deletions tests/gpu/t_ec_sum.nim
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,12 @@ let pt2 = EC_ShortW_Jac[Fp[BN254_Snarks], G1].fromHex(x2, y2)
## If `skipFinalSub` is set to `true` in the EC sum implementation
## `S1.prod(Q.z, Z2Z2, skipFinalSub = true)`
## the following fails at iteration 57.
testSum(Fp[BN254_Snarks], 64, pt, pt2)
testSum(Fp[BN254_Snarks], 32, pt, pt2)

var pt2Aff: EC_ShortW_Aff[Fp[BN254_Snarks], G1]
pt2Aff.affine(pt2)

testMixedSum(Fp[BN254_Snarks], 64, pt, pt2Aff)
testMixedSum(Fp[BN254_Snarks], 32, pt, pt2Aff)


## NOTE: While these inputs a, b are the ones that end up causing the
Expand Down Expand Up @@ -115,7 +115,7 @@ block SkipFinalSubIssue:
let y2 = "0x0f53265870f65aa18bded3ccb9c62a4d8b060a32a05a75d455710bce95a991df"
let b = EC_ShortW_Jac[Fp[BN254_Snarks], G1].fromHex(x2, y2)

let nv = initNvAsm(EC_ShortW_Jac[Fp[BN254_Snarks], G1], 64)
let nv = initNvAsm(EC_ShortW_Jac[Fp[BN254_Snarks], G1], 32)
let kernel = nv.compile(genEcMixedSum)

template checkSum(a, b): untyped =
Expand Down
Loading

0 comments on commit bea5c1a

Please sign in to comment.