Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat(gpu): Icicle NTT integration #859

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
31 changes: 19 additions & 12 deletions crypto/src/commitments/kzg.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
use super::traits::IsCommitmentScheme;
use alloc::{borrow::ToOwned, vec::Vec};
use core::{marker::PhantomData, mem};
use lambdaworks_math::gpu::icicle::GpuMSMPoint;
use lambdaworks_math::traits::ByteConversion;
use lambdaworks_math::{
cyclic_group::IsGroup,
elliptic_curve::traits::IsPairing,
errors::DeserializationError,
field::{element::FieldElement, traits::IsPrimeField},
field::{
element::FieldElement,
traits::{IsField, IsPrimeField},
},
msm::pippenger::msm,
polynomial::Polynomial,
traits::{AsBytes, Deserializable},
Expand Down Expand Up @@ -136,12 +141,12 @@ where
}

#[derive(Clone)]
pub struct KateZaveruchaGoldberg<F: IsPrimeField, P: IsPairing> {
pub struct KateZaveruchaGoldberg<F: IsField, P: IsPairing> {
srs: StructuredReferenceString<P::G1Point, P::G2Point>,
phantom: PhantomData<F>,
}

impl<F: IsPrimeField, P: IsPairing> KateZaveruchaGoldberg<F, P> {
impl<F: IsField, P: IsPairing> KateZaveruchaGoldberg<F, P> {
pub fn new(srs: StructuredReferenceString<P::G1Point, P::G2Point>) -> Self {
Self {
srs,
Expand All @@ -150,20 +155,22 @@ impl<F: IsPrimeField, P: IsPairing> KateZaveruchaGoldberg<F, P> {
}
}

impl<const N: usize, F: IsPrimeField<RepresentativeType = UnsignedInteger<N>>, P: IsPairing>
IsCommitmentScheme<F> for KateZaveruchaGoldberg<F, P>
impl<
const N: usize,
F: IsField<BaseType = UnsignedInteger<N>>
+ IsPrimeField<RepresentativeType = UnsignedInteger<N>>,
P: IsPairing,
> IsCommitmentScheme<F> for KateZaveruchaGoldberg<F, P>
where
FieldElement<F>: ByteConversion,
P::G1Point: GpuMSMPoint,
{
type Commitment = P::G1Point;

fn commit(&self, p: &Polynomial<FieldElement<F>>) -> Self::Commitment {
let coefficients: Vec<_> = p
.coefficients
.iter()
.map(|coefficient| coefficient.representative())
.collect();
msm(
&coefficients,
&self.srs.powers_main_group[..coefficients.len()],
&p.coefficients,
&self.srs.powers_main_group[..p.coefficients.len()],
)
.expect("`points` is sliced by `cs`'s length")
}
Expand Down
14 changes: 13 additions & 1 deletion math/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,17 @@ objc = { version = "0.2.7", optional = true }
# cuda
cudarc = { version = "0.9.7", optional = true }

# Icicle integration
icicle-cuda-runtime = { git = "https://github.com/ingonyama-zk/icicle.git", tag = "v1.4.0", optional = true }
icicle-core = { git = "https://github.com/ingonyama-zk/icicle.git", tag = "v1.4.0", optional = true }
icicle-bls12-377 = { git = "https://github.com/ingonyama-zk/icicle.git", tag = "v1.4.0", optional = true}
icicle-bls12-381 = { git = "https://github.com/ingonyama-zk/icicle.git", tag = "v1.4.0", optional = true }
icicle-bn254 = { git = "https://github.com/ingonyama-zk/icicle.git", tag = "v1.4.0", optional = true }

lambdaworks-gpu = { workspace = true, optional = true }

[dev-dependencies]
rand = { version = "0.8.5", default-features = false }
rand = { version = "0.8.5" }
rand_chacha = "0.3.1"
criterion = "0.5.1"
const-random = "0.1.15"
Expand All @@ -45,6 +51,7 @@ lambdaworks-serde-binary = ["dep:serde", "alloc"]
lambdaworks-serde-string = ["dep:serde", "dep:serde_json", "alloc"]
proptest = ["dep:proptest"]
winter_compatibility = ["winter-math", "miden-core"]
icicle = ["dep:icicle-cuda-runtime", "dep:icicle-core", "dep:icicle-bls12-377", "dep:icicle-bls12-381", "dep:icicle-bn254"]

# gpu
metal = [
Expand Down Expand Up @@ -84,6 +91,11 @@ name = "criterion_msm"
harness = false
required-features = ["parallel"]

[[bench]]
name = "criterion_icicle"
harness = false
required-features = ["icicle"]

[[bench]]
name = "criterion_fft"
harness = false
Expand Down
71 changes: 71 additions & 0 deletions math/benches/criterion_icicle.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use lambdaworks_math::{
cyclic_group::IsGroup,
elliptic_curve::{
short_weierstrass::curves::{
bls12_377::curve::BLS12377Curve, bls12_381::curve::BLS12381Curve,
bn_254::curve::BN254Curve,
},
traits::IsEllipticCurve,
},
field::{element::FieldElement, traits::IsField},
};

use lambdaworks_math::gpu::icicle::{
bls12_377::bls12_377_g1_msm, bls12_381::bls12_381_g1_msm, bn254::bn254_g1_msm,
};
use rand::{rngs::StdRng, Rng, SeedableRng};

pub fn generate_cs_and_points<C: IsEllipticCurve>(
msm_size: usize,
) -> (Vec<FieldElement<C::BaseField>>, Vec<C::PointRepresentation>)
where
<C::BaseField as IsField>::BaseType: From<u64>,
{
// We use a seeded rng so the benchmarks are reproducible.
let mut rng = StdRng::seed_from_u64(42);

let g = C::generator();

let cs: Vec<_> = (0..msm_size)
.map(|_| FieldElement::<C::BaseField>::new(rng.gen::<u64>().into()))
.collect();

let points: Vec<_> = (0..msm_size)
.map(|_| g.operate_with_self(rng.gen::<u64>()))
.collect();

(cs, points)
}

pub fn msm_benchmarks_with_size(c: &mut Criterion, msm_size: usize) {
let mut group = c.benchmark_group(format!("MSM benchmarks with size {msm_size}"));

let (cs, points) = generate_cs_and_points::<BLS12381Curve>(msm_size);
group.bench_function("BLS12_381", |bench| {
bench.iter(|| black_box(bls12_381_g1_msm(&cs, &points, None)));
});

let (cs, points) = generate_cs_and_points::<BLS12377Curve>(msm_size);
group.bench_function("BLS12_377", |bench| {
bench.iter(|| black_box(bls12_377_g1_msm(&cs, &points, None)));
});

let (cs, points) = generate_cs_and_points::<BN254Curve>(msm_size);
group.bench_function("BN_254", |bench| {
bench.iter(|| black_box(bn254_g1_msm(&cs, &points, None)));
});
}

pub fn run_benchmarks(c: &mut Criterion) {
let exponents = 1..=18;

for exp in exponents {
let msm_size = 1 << exp;

msm_benchmarks_with_size(c, msm_size);
}
}

criterion_group!(icicle_msm, run_benchmarks);
criterion_main!(icicle_msm);
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ use crate::{
elliptic_curve::short_weierstrass::traits::IsShortWeierstrass, field::element::FieldElement,
};

pub type BLS12377FieldElement = FieldElement<BLS12377PrimeField>;

/// The description of the curve.
#[derive(Clone, Debug)]
pub struct BLS12377Curve;
Expand Down
7 changes: 7 additions & 0 deletions math/src/fft/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ use lambdaworks_gpu::metal::abstractions::errors::MetalError;
#[cfg(feature = "cuda")]
use lambdaworks_gpu::cuda::abstractions::errors::CudaError;

#[cfg(feature = "icicle")]
use icicle_core::error::IcicleError;

#[derive(Debug)]
pub enum FFTError {
RootOfUnityError(u64),
Expand All @@ -17,6 +20,8 @@ pub enum FFTError {
MetalError(MetalError),
#[cfg(feature = "cuda")]
CudaError(CudaError),
#[cfg(feature = "icicle")]
IcicleError(IcicleError),
}

impl Display for FFTError {
Expand All @@ -37,6 +42,8 @@ impl Display for FFTError {
FFTError::CudaError(_) => {
write!(f, "A CUDA related error has ocurred")
}
#[cfg(feature = "icicle")]
FFTError::IcicleError(e) => write!(f, "Icicle GPU backend failure. {:?}", e),
}
}
}
Expand Down
56 changes: 38 additions & 18 deletions math/src/fft/polynomial.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,24 @@ use crate::{
traits::{IsFFTField, RootsConfig},
},
polynomial::Polynomial,
traits::ByteConversion,
gpu::icicle::{IcicleFFT, GpuMSMPoint}
};
use alloc::{vec, vec::Vec};

#[cfg(feature = "cuda")]
use crate::fft::gpu::cuda::polynomial::{evaluate_fft_cuda, interpolate_fft_cuda};
#[cfg(feature = "metal")]
use crate::fft::gpu::metal::polynomial::{evaluate_fft_metal, interpolate_fft_metal};
#[cfg(feature = "icicle")]
use crate::gpu::icicle::{evaluate_fft_icicle, interpolate_fft_icicle};

use super::cpu::{ops, roots_of_unity};

impl<E: IsField> Polynomial<FieldElement<E>> {
impl<E: IsField + IsFFTField + IcicleFFT> Polynomial<FieldElement<E>>
where
FieldElement<E>: ByteConversion,
{
/// Returns `N` evaluations of this polynomial using FFT over a domain in a subfield F of E (so the results
/// are P(w^i), with w being a primitive root of unity).
/// `N = max(self.coeff_len(), domain_size).next_power_of_two() * blowup_factor`.
Expand All @@ -26,7 +33,10 @@ impl<E: IsField> Polynomial<FieldElement<E>> {
poly: &Polynomial<FieldElement<E>>,
blowup_factor: usize,
domain_size: Option<usize>,
) -> Result<Vec<FieldElement<E>>, FFTError> {
) -> Result<Vec<FieldElement<E>>, FFTError>
where
FieldElement<E>: ByteConversion,
{
let domain_size = domain_size.unwrap_or(0);
let len = core::cmp::max(poly.coeff_len(), domain_size).next_power_of_two() * blowup_factor;

Expand All @@ -51,17 +61,20 @@ impl<E: IsField> Polynomial<FieldElement<E>> {
}
}

#[cfg(feature = "cuda")]
#[cfg(feature = "icicle")]
{
// TODO: support multiple fields with CUDA
if F::field_name() == "stark256" {
Ok(evaluate_fft_cuda(&coeffs)?)
if !F::field_name().is_empty() {
Ok(evaluate_fft_icicle::<F, E>(&coeffs)?)
} else {
println!(
"GPU evaluation failed for field {}. Program will fallback to CPU.",
core::any::type_name::<F>()
);
evaluate_fft_cpu::<F, E>(&coeffs)
}
}

#[cfg(all(not(feature = "metal"), not(feature = "cuda")))]
#[cfg(all(not(feature = "metal"), not(feature = "icicle")))]
{
evaluate_fft_cpu::<F, E>(&coeffs)
}
Expand All @@ -76,7 +89,10 @@ impl<E: IsField> Polynomial<FieldElement<E>> {
blowup_factor: usize,
domain_size: Option<usize>,
offset: &FieldElement<F>,
) -> Result<Vec<FieldElement<E>>, FFTError> {
) -> Result<Vec<FieldElement<E>>, FFTError>
where
FieldElement<E>: ByteConversion,
{
let scaled = poly.scale(offset);
Polynomial::evaluate_fft::<F>(&scaled, blowup_factor, domain_size)
}
Expand All @@ -100,16 +116,20 @@ impl<E: IsField> Polynomial<FieldElement<E>> {
}
}

#[cfg(feature = "cuda")]
#[cfg(feature = "icicle")]
{
if !F::field_name().is_empty() {
Ok(interpolate_fft_cuda(fft_evals)?)
Ok(interpolate_fft_icicle::<F, E>(fft_evals)?)
} else {
interpolate_fft_cpu::<F, E>(fft_evals)
println!(
"GPU evaluation failed for field {}. Program will fallback to CPU.",
core::any::type_name::<F>()
);
interpolate_fft_cpu::<F, E>(&fft_evals)
}
}

#[cfg(all(not(feature = "metal"), not(feature = "cuda")))]
#[cfg(all(not(feature = "metal"), not(feature = "icicle")))]
{
interpolate_fft_cpu::<F, E>(fft_evals)
}
Expand All @@ -127,13 +147,13 @@ impl<E: IsField> Polynomial<FieldElement<E>> {
}
}

pub fn compose_fft<F, E>(
poly_1: &Polynomial<FieldElement<E>>,
poly_2: &Polynomial<FieldElement<E>>,
) -> Polynomial<FieldElement<E>>
pub fn compose_fft<F>(
poly_1: &Polynomial<FieldElement<F>>,
poly_2: &Polynomial<FieldElement<F>>,
) -> Polynomial<FieldElement<F>>
where
F: IsFFTField + IsSubFieldOf<E>,
E: IsField,
F: IsFFTField + IcicleFFT,
FieldElement<F>: ByteConversion,
{
let poly_2_evaluations = Polynomial::evaluate_fft::<F>(poly_2, 1, None).unwrap();

Expand Down
Loading
Loading