Skip to content

Commit

Permalink
Merge branch 'zkonduit:main' into feat/msm-gpu-acceleration
Browse files Browse the repository at this point in the history
  • Loading branch information
ElusAegis authored Jan 17, 2025
2 parents b53561d + ee4e1a0 commit 74c0a12
Show file tree
Hide file tree
Showing 8 changed files with 258 additions and 33 deletions.
4 changes: 4 additions & 0 deletions halo2_proofs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ rustdoc-args = ["--cfg", "docsrs", "--html-in-header", "katex-header.html"]
name = "arithmetic"
harness = false

[[bench]]
name = "polyread"
harness = false

[[bench]]
name = "commit_zk"
harness = false
Expand Down
130 changes: 130 additions & 0 deletions halo2_proofs/benches/polyread.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
use std::io::Cursor;

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use halo2_proofs::{poly::Polynomial, SerdeFormat, SerdePrimeField};
use halo2curves::bn256::Fr;
use maybe_rayon::{iter::ParallelIterator, slice::ParallelSlice};
use rand_core::OsRng;

pub fn parallel_poly_read_benchmark_unchecked(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_poly_read_unchecked");

for batch_size in [64, 256, 1024, 4096, 100000, 1000000].iter() {
let data = setup_random_poly(100_000_000);
group.bench_function(format!("batch_{}", batch_size), |b| {
b.iter(|| {
let mut reader = Cursor::new(data.clone());
black_box(
read::<_, Fr>(&mut reader, SerdeFormat::RawBytesUnchecked, *batch_size)
.unwrap(),
)
});
});
}

group.finish();
}

pub fn parallel_poly_read_benchmark_checked(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_poly_read_checked");

for batch_size in [64, 256, 1024, 4096, 100000, 1000000].iter() {
let data = setup_random_poly(100_000_000);
group.bench_function(format!("batch_{}", batch_size), |b| {
b.iter(|| {
let mut reader = Cursor::new(data.clone());
black_box(read::<_, Fr>(&mut reader, SerdeFormat::RawBytes, *batch_size).unwrap())
});
});
}

group.finish();
}

pub fn parallel_poly_read_benchmark_checked_serial(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_poly_read_checked_serial");

let data = setup_random_poly(100_000_000);
group.bench_function(format!("batch_{}", 100_000_000), |b| {
b.iter(|| {
let mut reader = Cursor::new(data.clone());
black_box(read_serial::<_, Fr>(&mut reader, SerdeFormat::RawBytes).unwrap())
});
});

group.finish();
}

criterion_group!(
benches,
parallel_poly_read_benchmark_checked_serial,
parallel_poly_read_benchmark_checked,
parallel_poly_read_benchmark_unchecked
);
criterion_main!(benches);

fn setup_random_poly(n: usize) -> Vec<u8> {
let mut rng = OsRng;
let random_poly = Polynomial::<Fr, usize>::random(n, &mut rng);
let mut vector_bytes = vec![];
random_poly
.write(&mut vector_bytes, SerdeFormat::RawBytes)
.unwrap();
vector_bytes
}

pub fn read<R: std::io::Read, F: SerdePrimeField>(
reader: &mut R,
format: SerdeFormat,
batch_size: usize,
) -> std::io::Result<Vec<F>> {
let poly_len = u32::from_be_bytes({
let mut buf = [0u8; 4];
reader.read_exact(&mut buf)?;
buf
}) as usize;

let repr_len = F::default().to_repr().as_ref().len();
let buffer = {
let mut buf = vec![0u8; poly_len * repr_len];
reader.read_exact(&mut buf)?;
buf
};

Ok(buffer
.par_chunks(repr_len * batch_size)
.map(|batch| {
batch
.chunks(repr_len)
.map(|chunk| F::read(&mut std::io::Cursor::new(chunk), format))
.collect::<Result<Vec<_>, _>>()
})
.collect::<Result<Vec<_>, _>>()?
.into_iter()
.flatten()
.collect())
}

pub fn read_serial<R: std::io::Read, F: SerdePrimeField>(
reader: &mut R,
format: SerdeFormat,
) -> std::io::Result<Vec<F>> {
let poly_len = u32::from_be_bytes({
let mut buf = [0u8; 4];
reader.read_exact(&mut buf)?;
buf
}) as usize;

let repr_len = F::default().to_repr().as_ref().len();

// Preallocate both buffers at once
let mut buffer = vec![0u8; poly_len * repr_len];

reader.read_exact(&mut buffer)?;

// Use par_bridge() for better workload distribution
buffer
.par_chunks_exact(repr_len)
.map(|chunk| F::read(&mut std::io::Cursor::new(chunk), format))
.collect::<std::io::Result<Vec<_>>>()
}
62 changes: 61 additions & 1 deletion halo2_proofs/src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ pub trait SerdeCurveAffine: CurveAffine + SerdeObject {
}
impl<C: CurveAffine + SerdeObject> SerdeCurveAffine for C {}

///
pub trait SerdePrimeField: PrimeField + SerdeObject {
/// Reads a field element as bytes from the buffer according to the `format`:
/// - `Processed`: Reads a field element in standard form, with endianness specified by the
Expand Down Expand Up @@ -120,6 +121,7 @@ pub fn unpack(byte: u8, bits: &mut [bool]) {
}
}

#[cfg(not(feature = "parallel-poly-read"))]
/// Reads a vector of polynomials from buffer
pub(crate) fn read_polynomial_vec<R: io::Read, F: SerdePrimeField, B>(
reader: &mut R,
Expand All @@ -129,18 +131,75 @@ pub(crate) fn read_polynomial_vec<R: io::Read, F: SerdePrimeField, B>(
reader.read_exact(&mut len)?;
let len = u32::from_be_bytes(len);

let poly_lens: Result<Vec<_>, _> = (0..len)
.map(|_| {
let mut poly_len = [0u8; 4];
reader.read_exact(&mut poly_len)?;
Ok::<_, std::io::Error>(u32::from_be_bytes(poly_len))
})
.collect();

let _poly_lens = poly_lens?;

(0..len)
.map(|_| Polynomial::<F, B>::read(reader, format))
.collect::<io::Result<Vec<_>>>()
}

#[cfg(feature = "parallel-poly-read")]
/// Reads a vector of polynomials from buffer
pub(crate) fn read_polynomial_vec<R: io::Read, F: SerdePrimeField, B: std::marker::Send>(
reader: &mut R,
format: SerdeFormat,
) -> io::Result<Vec<Polynomial<F, B>>> {
use maybe_rayon::iter::IntoParallelIterator;
use maybe_rayon::iter::ParallelIterator;

let mut len = [0u8; 4];
reader.read_exact(&mut len)?;
let len = u32::from_be_bytes(len);

// Read all polynomial lengths first
let mut poly_lens = Vec::with_capacity(len as usize);
for _ in 0..len {
let mut poly_len = [0u8; 4];
reader.read_exact(&mut poly_len)?;
poly_lens.push(u32::from_be_bytes(poly_len));
}

// Pre-read all polynomial data into separate buffers
let mut poly_buffers = Vec::with_capacity(len as usize);
for &poly_len in &poly_lens {
let repr_len = F::default().to_repr().as_ref().len();
// sum of all the Field elements AND also the prepended u32 bytes
let buffer_size = repr_len * poly_len as usize + std::mem::size_of::<u32>();
let mut buffer = vec![0u8; buffer_size];
reader.read_exact(&mut buffer)?;
poly_buffers.push(buffer);
}

// Process buffers in parallel
poly_buffers
.into_par_iter()
.map(|buffer| {
let mut cursor = std::io::Cursor::new(buffer);
Polynomial::<F, B>::read_serial(&mut cursor, format)
})
.collect::<io::Result<Vec<_>>>()
}

/// Writes a slice of polynomials to buffer
pub(crate) fn write_polynomial_slice<W: io::Write, F: SerdePrimeField, B>(
slice: &[Polynomial<F, B>],
writer: &mut W,
format: SerdeFormat,
) -> io::Result<()> {
writer.write_all(&(slice.len() as u32).to_be_bytes())?;
// then write each polynomial's len
for poly in slice.iter() {
writer.write_all(&(poly.num_coeffs() as u32).to_be_bytes())?;
}

for poly in slice.iter() {
poly.write(writer, format)?;
}
Expand All @@ -150,5 +209,6 @@ pub(crate) fn write_polynomial_slice<W: io::Write, F: SerdePrimeField, B>(
/// Gets the total number of bytes of a slice of polynomials, assuming all polynomials are the same length
pub(crate) fn polynomial_slice_byte_length<F: PrimeField, B>(slice: &[Polynomial<F, B>]) -> usize {
let field_len = F::default().to_repr().as_ref().len();
4 + slice.len() * (4 + field_len * slice.first().map(|poly| poly.len()).unwrap_or(0))
4 + 4 * slice.len()
+ slice.len() * (4 + field_len * slice.first().map(|poly| poly.len()).unwrap_or(0))
}
1 change: 1 addition & 0 deletions halo2_proofs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ pub mod transcript;
pub mod dev;
mod helpers;
pub use helpers::SerdeFormat;
pub use helpers::SerdePrimeField;

#[cfg(feature = "icicle_gpu")]
#[allow(unsafe_code)]
Expand Down
20 changes: 14 additions & 6 deletions halo2_proofs/src/plonk/mv_lookup/prover.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,12 @@ use group::{
};
use rustc_hash::FxHashMap as HashMap;

use maybe_rayon::prelude::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
use std::{
iter,
ops::{Mul, MulAssign},
};

use maybe_rayon::prelude::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};

#[derive(Debug)]
pub(in crate::plonk) struct Prepared<C: CurveAffine> {
compressed_inputs_expressions: Vec<Polynomial<C::Scalar, LagrangeCoeff>>,
Expand Down Expand Up @@ -219,9 +218,9 @@ impl<F: WithSmallOrderMulGroup<3>> Argument<F> {
}

// commit to m(X)
let start = instant::Instant::now();
let blind = Blind(C::Scalar::ZERO);
let m_commitment = params.commit_lagrange(&m_values, blind).to_affine();
let start = instant::Instant::now();
let m_commitment = params.commit_lagrange(&m_values, blind.clone()).to_affine();
log::trace!("m_commitment {:?}", start.elapsed());

// write commitment of m(X) to transcript
Expand All @@ -242,6 +241,7 @@ impl<C: CurveAffine> Prepared<C> {
vk: &VerifyingKey<C>,
params: &P,
beta: ChallengeBeta<C>,
phi_blinds: &[C::Scalar],
) -> Result<Committed<C>, Error> {
/*
φ_i(X) = f_i(X) + α
Expand Down Expand Up @@ -322,6 +322,12 @@ impl<C: CurveAffine> Prepared<C> {
// Compute the evaluations of the lookup grand sum polynomial
// over our domain, starting with phi[0] = 0
let blinding_factors = vk.cs.blinding_factors();

assert!(
phi_blinds.len() == blinding_factors,
"invalid number of blinding factors"
);

let phi = iter::once(C::Scalar::ZERO)
.chain(log_derivatives_diff)
.scan(C::Scalar::ZERO, |state, cur| {
Expand All @@ -332,7 +338,7 @@ impl<C: CurveAffine> Prepared<C> {
// be a 0
.take(params.n() as usize - blinding_factors)
// Chain random blinding factors.
.chain((0..blinding_factors).map(|_| C::Scalar::ZERO))
.chain(phi_blinds.into_iter().map(|&x| x))
.collect::<Vec<_>>();
assert_eq!(phi.len(), params.n() as usize);
let phi = vk.domain.lagrange_from_vec(phi);
Expand Down Expand Up @@ -400,7 +406,9 @@ impl<C: CurveAffine> Prepared<C> {

let grand_sum_blind = Blind(C::Scalar::ZERO);
let start = instant::Instant::now();
let phi_commitment = params.commit_lagrange(&phi, grand_sum_blind).to_affine();
let phi_commitment = params
.commit_lagrange(&phi, grand_sum_blind.clone())
.to_affine();
log::trace!(" - phi_commitment {:?}", start.elapsed());

// Hash grand sum commitment
Expand Down
8 changes: 7 additions & 1 deletion halo2_proofs/src/plonk/prover.rs
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,7 @@ where
log::trace!("Theta challenge: {:?}", start.elapsed());

let start = Instant::now();

#[cfg(feature = "mv-lookup")]
let lookups: Vec<Vec<lookup::prover::Prepared<Scheme::Curve>>> = instance
.par_iter()
Expand Down Expand Up @@ -564,6 +565,11 @@ where

// preallocate the lookups

#[cfg(feature = "mv-lookup")]
let phi_blinds = (0..pk.vk.cs.blinding_factors())
.map(|_| Scheme::Scalar::random(&mut rng))
.collect::<Vec<_>>();

#[cfg(feature = "mv-lookup")]
let commit_lookups = || -> Result<Vec<Vec<lookup::prover::Committed<Scheme::Curve>>>, _> {
lookups
Expand All @@ -572,7 +578,7 @@ where
// Construct and commit to products for each lookup
let res = lookups
.into_par_iter()
.map(|lookup| lookup.commit_grand_sum(&pk.vk, params, beta))
.map(|lookup| lookup.commit_grand_sum(&pk.vk, params, beta, &phi_blinds))
.collect::<Result<Vec<_>, _>>();

res
Expand Down
Loading

0 comments on commit 74c0a12

Please sign in to comment.