- add some benches
- cleanup dependencies - will work on WASM and GM17
This commit is contained in:
parent
6e5cfe211f
commit
e775b47d99
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1,3 @@
|
||||
target
|
||||
Cargo.lock
|
||||
pkg
|
@ -6,7 +6,10 @@ homepage = "https://github.com/matterinc/bellman"
|
||||
license = "MIT/Apache-2.0"
|
||||
name = "bellman"
|
||||
repository = "https://github.com/matterinc/bellman"
|
||||
version = "0.1.2"
|
||||
version = "0.1.3"
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "lib", "staticlib"]
|
||||
|
||||
[dependencies]
|
||||
rand = "0.4"
|
||||
@ -18,8 +21,6 @@ crossbeam = "0.3"
|
||||
pairing = { git = 'https://github.com/matterinc/pairing' }
|
||||
byteorder = "1"
|
||||
ff = { git = 'https://github.com/matterinc/ff', features = ["derive"] }
|
||||
pbr = "1.0.1"
|
||||
time = "0.1"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
@ -509,3 +509,59 @@ fn parallel_fft_consistency() {
|
||||
|
||||
test_consistency::<Bls12, _>(rng);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_field_element_multiplication_bn256() {
|
||||
use rand::{self, Rand};
|
||||
use pairing::bn256::Bn256;
|
||||
use pairing::bn256::Fr;
|
||||
use num_cpus;
|
||||
|
||||
let cpus = num_cpus::get();
|
||||
const SAMPLES: usize = 1 << 27;
|
||||
|
||||
let rng = &mut rand::thread_rng();
|
||||
let v1 = (0..SAMPLES).map(|_| Scalar::<Bn256>(Fr::rand(rng))).collect::<Vec<_>>();
|
||||
let v2 = (0..SAMPLES).map(|_| Scalar::<Bn256>(Fr::rand(rng))).collect::<Vec<_>>();
|
||||
|
||||
let mut v1 = EvaluationDomain::from_coeffs(v1).unwrap();
|
||||
let v2 = EvaluationDomain::from_coeffs(v2).unwrap();
|
||||
|
||||
let pool = Worker::new();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
v1.mul_assign(&pool, &v2);
|
||||
|
||||
let duration_ns = start.elapsed().as_nanos() as f64;
|
||||
println!("Elapsed {} ns for {} samples", duration_ns, SAMPLES);
|
||||
let time_per_sample = duration_ns/(SAMPLES as f64);
|
||||
println!("Tested on {} samples on {} CPUs with {} ns per field element multiplication", SAMPLES, cpus, time_per_sample);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fft_bn256() {
|
||||
use rand::{self, Rand};
|
||||
use pairing::bn256::Bn256;
|
||||
use pairing::bn256::Fr;
|
||||
use num_cpus;
|
||||
|
||||
let cpus = num_cpus::get();
|
||||
const SAMPLES: usize = 1 << 27;
|
||||
|
||||
let rng = &mut rand::thread_rng();
|
||||
let v1 = (0..SAMPLES).map(|_| Scalar::<Bn256>(Fr::rand(rng))).collect::<Vec<_>>();
|
||||
|
||||
let mut v1 = EvaluationDomain::from_coeffs(v1).unwrap();
|
||||
|
||||
let pool = Worker::new();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
v1.ifft(&pool);
|
||||
|
||||
let duration_ns = start.elapsed().as_nanos() as f64;
|
||||
println!("Elapsed {} ns for {} samples", duration_ns, SAMPLES);
|
||||
let time_per_sample = duration_ns/(SAMPLES as f64);
|
||||
println!("Tested on {} samples on {} CPUs with {} ns per field element multiplication", SAMPLES, cpus, time_per_sample);
|
||||
}
|
@ -1,9 +1,5 @@
|
||||
extern crate time;
|
||||
|
||||
use super::super::verbose_flag;
|
||||
|
||||
use self::time::PreciseTime;
|
||||
|
||||
use rand::Rng;
|
||||
|
||||
use std::sync::Arc;
|
||||
@ -255,7 +251,9 @@ pub fn generate_parameters<E, C>(
|
||||
{
|
||||
// Compute powers of tau
|
||||
if verbose {eprintln!("computing powers of tau...")};
|
||||
let start = PreciseTime::now();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
{
|
||||
let powers_of_tau = powers_of_tau.as_mut();
|
||||
worker.scope(powers_of_tau.len(), |scope, chunk| {
|
||||
@ -272,14 +270,16 @@ pub fn generate_parameters<E, C>(
|
||||
}
|
||||
});
|
||||
}
|
||||
if verbose {eprintln!("powers of tau stage 1 done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0);};
|
||||
if verbose {eprintln!("powers of tau stage 1 done in {} s", start.elapsed().as_millis() as f64 / 1000.0);};
|
||||
|
||||
// coeff = t(x) / delta
|
||||
let mut coeff = powers_of_tau.z(&tau);
|
||||
coeff.mul_assign(&delta_inverse);
|
||||
|
||||
if verbose {eprintln!("computing the H query with multiple threads...")};
|
||||
let start = PreciseTime::now();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// Compute the H query with multiple threads
|
||||
worker.scope(h.len(), |scope, chunk| {
|
||||
for (h, p) in h.chunks_mut(chunk).zip(powers_of_tau.as_ref().chunks(chunk))
|
||||
@ -302,17 +302,18 @@ pub fn generate_parameters<E, C>(
|
||||
});
|
||||
}
|
||||
});
|
||||
if verbose {eprintln!("computing the H query done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0);};
|
||||
if verbose {eprintln!("computing the H query done in {} s", start.elapsed().as_millis() as f64 / 1000.0);};
|
||||
}
|
||||
|
||||
if verbose {eprintln!("using inverse FFT to convert powers of tau to Lagrange coefficients...")};
|
||||
let start = PreciseTime::now();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// Use inverse FFT to convert powers of tau to Lagrange coefficients
|
||||
powers_of_tau.ifft(&worker);
|
||||
let powers_of_tau = powers_of_tau.into_coeffs();
|
||||
|
||||
if verbose {eprintln!("powers of tau stage 2 done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0)};
|
||||
if verbose {eprintln!("powers of tau stage 2 done in {} s", start.elapsed().as_millis() as f64 / 1000.0)};
|
||||
|
||||
let mut a = vec![E::G1::zero(); assembly.num_inputs + assembly.num_aux];
|
||||
let mut b_g1 = vec![E::G1::zero(); assembly.num_inputs + assembly.num_aux];
|
||||
@ -321,7 +322,7 @@ pub fn generate_parameters<E, C>(
|
||||
let mut l = vec![E::G1::zero(); assembly.num_aux];
|
||||
|
||||
if verbose {eprintln!("evaluating polynomials...")};
|
||||
let start = PreciseTime::now();
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
fn eval<E: Engine>(
|
||||
// wNAF window tables
|
||||
@ -474,7 +475,7 @@ pub fn generate_parameters<E, C>(
|
||||
&worker
|
||||
);
|
||||
|
||||
if verbose {eprintln!("evaluating polynomials done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0);};
|
||||
if verbose {eprintln!("evaluating polynomials done in {} s", start.elapsed().as_millis() as f64 / 1000.0);};
|
||||
|
||||
// Don't allow any elements be unconstrained, so that
|
||||
// the L query is always fully dense.
|
||||
|
@ -1,6 +1,3 @@
|
||||
extern crate time;
|
||||
use self::time::PreciseTime;
|
||||
|
||||
use super::super::verbose_flag;
|
||||
|
||||
use rand::Rng;
|
||||
@ -173,7 +170,7 @@ impl<E:Engine> PreparedProver<E> {
|
||||
|
||||
let vk = params.get_vk(self.assignment.input_assignment.len())?;
|
||||
|
||||
let h_start = PreciseTime::now();
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let h = {
|
||||
let mut a = EvaluationDomain::from_coeffs(prover.a)?;
|
||||
@ -209,10 +206,9 @@ impl<E:Engine> PreparedProver<E> {
|
||||
multiexp(&worker, params.get_h(a.len())?, FullDensity, a)
|
||||
};
|
||||
|
||||
let h_end = PreciseTime::now();
|
||||
if verbose {eprintln!("{} seconds for prover for H evaluation", h_start.to(h_end))};
|
||||
if verbose {eprintln!("{} seconds for prover for H evaluation", start.elapsed().as_secs())};
|
||||
|
||||
let points_start = PreciseTime::now();
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// TODO: Check that difference in operations for different chunks is small
|
||||
|
||||
@ -283,8 +279,7 @@ impl<E:Engine> PreparedProver<E> {
|
||||
g_c.add_assign(&h.wait()?);
|
||||
g_c.add_assign(&l.wait()?);
|
||||
|
||||
let points_end = PreciseTime::now();
|
||||
if verbose {eprintln!("{} seconds for prover for point multiplication", points_start.to(points_end))};
|
||||
if verbose {eprintln!("{} seconds for prover for point multiplication", start.elapsed().as_secs())};
|
||||
|
||||
Ok(Proof {
|
||||
a: g_a.into_affine(),
|
||||
@ -437,7 +432,7 @@ pub fn create_proof<E, C, P: ParameterSource<E>>(
|
||||
|
||||
let vk = params.get_vk(prover.input_assignment.len())?;
|
||||
|
||||
let h_start = PreciseTime::now();
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let h = {
|
||||
let mut a = EvaluationDomain::from_coeffs(prover.a)?;
|
||||
@ -473,10 +468,9 @@ pub fn create_proof<E, C, P: ParameterSource<E>>(
|
||||
multiexp(&worker, params.get_h(a.len())?, FullDensity, a)
|
||||
};
|
||||
|
||||
let h_end = PreciseTime::now();
|
||||
if verbose {eprintln!("{} seconds for prover for H evaluation", h_start.to(h_end))};
|
||||
if verbose {eprintln!("{} seconds for prover for H evaluation", start.elapsed().as_secs())};
|
||||
|
||||
let points_start = PreciseTime::now();
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// TODO: Check that difference in operations for different chunks is small
|
||||
|
||||
@ -547,8 +541,7 @@ pub fn create_proof<E, C, P: ParameterSource<E>>(
|
||||
g_c.add_assign(&h.wait()?);
|
||||
g_c.add_assign(&l.wait()?);
|
||||
|
||||
let points_end = PreciseTime::now();
|
||||
if verbose {eprintln!("{} seconds for prover for point multiplication", points_start.to(points_end))};
|
||||
if verbose {eprintln!("{} seconds for prover for point multiplication", start.elapsed().as_secs())};
|
||||
|
||||
Ok(Proof {
|
||||
a: g_a.into_affine(),
|
||||
|
@ -14,7 +14,6 @@ pub mod multicore;
|
||||
mod multiexp;
|
||||
pub mod domain;
|
||||
pub mod groth16;
|
||||
pub mod progress_bar;
|
||||
|
||||
use pairing::{Engine};
|
||||
use ff::Field;
|
||||
|
@ -142,6 +142,36 @@ impl DensityTracker {
|
||||
}
|
||||
}
|
||||
|
||||
/// This genious piece of code works in the following way:
|
||||
/// - choose `c` - the bit length of the region that one thread works on
|
||||
/// - make `2^c - 1` buckets and initialize them with `G = infinity` (that's equivalent of zero)
|
||||
/// - there is no bucket for "zero" cause it's not necessary
|
||||
/// - go over the pairs `(base, scalar)`
|
||||
/// - for each scalar calculate `scalar % 2^c` and add the base (without any multiplications!) to the
|
||||
/// corresponding bucket
|
||||
/// - at the end each bucket will have an accumulated value that should be multiplied by the corresponding factor
|
||||
/// between `1` and `2^c - 1` to get the right value
|
||||
/// - here comes the first trick - you don't need to do multiplications at all, just add all the buckets together
|
||||
/// starting from the first one `(a + b + c + ...)` and than add to the first sum another sum of the form
|
||||
/// `(b + c + d + ...)`, and than the third one `(c + d + ...)`, that will result in the proper prefactor infront of every
|
||||
/// accumulator, without any multiplication operations at all
|
||||
/// - that's of course not enough, so spawn the next thread
|
||||
/// - this thread works with the same bit width `c`, but SKIPS lowers bits completely, so it actually takes values
|
||||
/// in the form `(scalar >> c) % 2^c`, so works on the next region
|
||||
/// - spawn more threads until you exhaust all the bit length
|
||||
/// - you will get roughly `[bitlength / c] + 1` inaccumulators
|
||||
/// - double the highest accumulator enough times, add to the next one, double the result, add the next accumulator, continue
|
||||
///
|
||||
/// Demo why it works:
|
||||
/// ```
|
||||
/// a * G + b * H = (a_2 * (2^c)^2 + a_1 * (2^c)^1 + a_0) * G + (b_2 * (2^c)^2 + b_1 * (2^c)^1 + b_0) * H
|
||||
/// ```
|
||||
/// - make buckets over `0` labeled coefficients
|
||||
/// - make buckets over `1` labeled coefficients
|
||||
/// - make buckets over `2` labeled coefficients
|
||||
/// - accumulators over each set of buckets will have an implicit factor of `(2^c)^i`, so before summing thme up
|
||||
/// "higher" accumulators must be doubled `c` times
|
||||
///
|
||||
fn multiexp_inner<Q, D, G, S>(
|
||||
pool: &Worker,
|
||||
bases: S,
|
||||
@ -195,7 +225,7 @@ fn multiexp_inner<Q, D, G, S>(
|
||||
} else {
|
||||
// Place multiplication into the bucket: Separate s * P as
|
||||
// (s/2^c) * P + (s mod 2^c) P
|
||||
// First multiplication is c bits less, do one can do it,
|
||||
// First multiplication is c bits less, so one can do it,
|
||||
// sum results from different buckets and double it c times,
|
||||
// then add with (s mod 2^c) P parts
|
||||
let mut exp = exp;
|
||||
@ -317,3 +347,34 @@ fn test_with_bls12() {
|
||||
|
||||
assert_eq!(naive, fast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_speed_with_bn256() {
|
||||
use rand::{self, Rand};
|
||||
use pairing::bn256::Bn256;
|
||||
use num_cpus;
|
||||
|
||||
let cpus = num_cpus::get();
|
||||
const SAMPLES: usize = 1 << 22;
|
||||
|
||||
let rng = &mut rand::thread_rng();
|
||||
let v = Arc::new((0..SAMPLES).map(|_| <Bn256 as ScalarEngine>::Fr::rand(rng).into_repr()).collect::<Vec<_>>());
|
||||
let g = Arc::new((0..SAMPLES).map(|_| <Bn256 as Engine>::G1::rand(rng).into_affine()).collect::<Vec<_>>());
|
||||
|
||||
let pool = Worker::new();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let _fast = multiexp(
|
||||
&pool,
|
||||
(g, 0),
|
||||
FullDensity,
|
||||
v
|
||||
).wait().unwrap();
|
||||
|
||||
|
||||
let duration_ns = start.elapsed().as_nanos() as f64;
|
||||
println!("Elapsed {} ns for {} samples", duration_ns, SAMPLES);
|
||||
let time_per_sample = duration_ns/(SAMPLES as f64);
|
||||
println!("Tested on {} samples on {} CPUs with {} ns per multiplication", SAMPLES, cpus, time_per_sample);
|
||||
}
|
||||
|
@ -1,146 +0,0 @@
|
||||
extern crate time;
|
||||
|
||||
use std::io::{Write};
|
||||
use std::sync::{
|
||||
mpsc::{channel, Sender, Receiver},
|
||||
Arc,
|
||||
atomic::{AtomicUsize, Ordering}
|
||||
};
|
||||
use self::time::precise_time_ns;
|
||||
use std::time::Duration;
|
||||
|
||||
static UPDATE_INTERVAL: u64 = 1000_000 * 1000; // ms
|
||||
|
||||
pub struct MultiBar {
|
||||
n_workers: u64,
|
||||
|
||||
total: u64,
|
||||
cur: u64,
|
||||
|
||||
prev: u64,
|
||||
prev_time: u64,
|
||||
|
||||
total_elapsed: u64,
|
||||
|
||||
step: Arc<AtomicUsize>,
|
||||
tx: Sender<u64>,
|
||||
rx: Receiver<u64>,
|
||||
}
|
||||
|
||||
pub struct ProgressBar {
|
||||
//chunk: u64,
|
||||
acc: u64,
|
||||
step: Arc<AtomicUsize>,
|
||||
tx: Option<Sender<u64>>,
|
||||
}
|
||||
|
||||
/// Simple efficient thread-safe progress indicator
|
||||
/// It follows the interface of [https://github.com/a8m/pb](https://github.com/a8m/pb)
|
||||
impl MultiBar {
|
||||
|
||||
/// Create a new MultiBar for stdout
|
||||
pub fn new() -> Self {
|
||||
let (tx, rx) = channel();
|
||||
Self{
|
||||
n_workers: 0,
|
||||
total: 0,
|
||||
cur: 0,
|
||||
prev: 0,
|
||||
prev_time: precise_time_ns(),
|
||||
total_elapsed: 0,
|
||||
step: Arc::new(AtomicUsize::new(1)),
|
||||
tx,
|
||||
rx,
|
||||
}
|
||||
}
|
||||
|
||||
// Create a ProgressBar for a process of `total` steps
|
||||
pub fn create_bar(&mut self, chunk: u64) -> ProgressBar {
|
||||
self.n_workers += 1;
|
||||
self.total += chunk;
|
||||
//println!("step 0 of {}", chunk);
|
||||
ProgressBar{
|
||||
//chunk,
|
||||
acc: 0,
|
||||
tx: Some(Sender::clone(&self.tx)),
|
||||
step: Arc::clone(&self.step),
|
||||
}
|
||||
}
|
||||
|
||||
/// Start listening for updates from ProgressBars in different threads
|
||||
pub fn listen(&mut self) {
|
||||
//println!("");
|
||||
for d in &self.rx {
|
||||
if d == 0 {
|
||||
self.n_workers -= 1;
|
||||
}
|
||||
if self.n_workers == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
self.cur += d;
|
||||
let processed = self.cur - self.prev;
|
||||
if processed > self.step.load(Ordering::Acquire) as u64 * self.n_workers {
|
||||
let now = time::precise_time_ns();
|
||||
let elapsed = now - self.prev_time;
|
||||
|
||||
if elapsed > UPDATE_INTERVAL {
|
||||
self.prev = self.cur;
|
||||
self.prev_time = now;
|
||||
self.total_elapsed += elapsed;
|
||||
|
||||
print!("\rprocessed {:2}%: {} of {}.", self.cur * 100 / self.total, self.cur, self.total);
|
||||
|
||||
let r = Duration::from_nanos((self.total - self.cur) * self.total_elapsed / self.cur).as_secs();
|
||||
print!(" Remaining estimated: {} h {} min {} s", r / 3600, r % 3600 / 60, r % 60);
|
||||
|
||||
let new_step = (self.cur * UPDATE_INTERVAL / self.total_elapsed) / self.n_workers;
|
||||
self.step.store(new_step as usize, Ordering::Release);
|
||||
|
||||
std::io::stdout().flush().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
println!("\rdone ");
|
||||
}
|
||||
}
|
||||
|
||||
impl ProgressBar {
|
||||
|
||||
/// Increment progress by `d` steps
|
||||
pub fn add(&mut self, d: u64) {
|
||||
self.acc += d;
|
||||
if self.acc > (self.step.load(Ordering::Relaxed) as u64) {
|
||||
if let Some(tx) = &self.tx {
|
||||
tx.send(self.acc).unwrap();
|
||||
}
|
||||
self.acc = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Finish the process
|
||||
pub fn finish(&mut self) {
|
||||
let tx = self.tx.take().unwrap();
|
||||
tx.send(0).unwrap();
|
||||
drop(tx);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_progress_display() {
|
||||
|
||||
let mut mb = MultiBar::new();
|
||||
|
||||
for _j in 1..=0 {
|
||||
let mut pb = mb.create_bar(3600000);
|
||||
std::thread::spawn(move || {
|
||||
for _i in 0..3600000 {
|
||||
std::thread::sleep(Duration::from_millis(1));
|
||||
pb.add(1);
|
||||
}
|
||||
pb.finish();
|
||||
});
|
||||
};
|
||||
//mb.listen();
|
||||
}
|
Loading…
Reference in New Issue
Block a user