- add some benches

- cleanup dependencies - will work on WASM and GM17
2019-01-23 21:02:11 +03:00 · 2019-01-23 21:02:11 +03:00 · e775b47d99
commit e775b47d99
parent 6e5cfe211f
8 changed files with 144 additions and 178 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,3 @@
 target
 Cargo.lock
 pkg
--- a/Cargo.toml
+++ b/Cargo.toml
@ -6,7 +6,10 @@ homepage = "https://github.com/matterinc/bellman"
 license = "MIT/Apache-2.0"
 name = "bellman"
 repository = "https://github.com/matterinc/bellman"
-version = "0.1.2"
+version = "0.1.3"
 [lib]
 crate-type = ["cdylib", "lib", "staticlib"]
 [dependencies]
 rand = "0.4"
@ -18,8 +21,6 @@ crossbeam = "0.3"
 pairing = { git = 'https://github.com/matterinc/pairing' }
 byteorder = "1"
 ff = { git = 'https://github.com/matterinc/ff', features = ["derive"] }
 pbr = "1.0.1"
 time = "0.1"
 [features]
 default = []
--- a/src/domain.rs
+++ b/src/domain.rs
@ -509,3 +509,59 @@ fn parallel_fft_consistency() {
    test_consistency::<Bls12, _>(rng);
 }
 #[test]
 fn test_field_element_multiplication_bn256() {
    use rand::{self, Rand};
    use pairing::bn256::Bn256;
    use pairing::bn256::Fr;
    use num_cpus;
    let cpus = num_cpus::get();
    const SAMPLES: usize = 1 << 27;
    let rng = &mut rand::thread_rng();
    let v1 = (0..SAMPLES).map(|_| Scalar::<Bn256>(Fr::rand(rng))).collect::<Vec<_>>();
    let v2 = (0..SAMPLES).map(|_| Scalar::<Bn256>(Fr::rand(rng))).collect::<Vec<_>>();
    let mut v1 = EvaluationDomain::from_coeffs(v1).unwrap();
    let v2 = EvaluationDomain::from_coeffs(v2).unwrap();
    let pool = Worker::new();
    let start = std::time::Instant::now();
    v1.mul_assign(&pool, &v2);
    let duration_ns = start.elapsed().as_nanos() as f64;
    println!("Elapsed {} ns for {} samples", duration_ns, SAMPLES);
    let time_per_sample = duration_ns/(SAMPLES as f64);
    println!("Tested on {} samples on {} CPUs with {} ns per field element multiplication", SAMPLES, cpus, time_per_sample);
 }
 #[test]
 fn test_fft_bn256() {
    use rand::{self, Rand};
    use pairing::bn256::Bn256;
    use pairing::bn256::Fr;
    use num_cpus;
    let cpus = num_cpus::get();
    const SAMPLES: usize = 1 << 27;
    let rng = &mut rand::thread_rng();
    let v1 = (0..SAMPLES).map(|_| Scalar::<Bn256>(Fr::rand(rng))).collect::<Vec<_>>();
    let mut v1 = EvaluationDomain::from_coeffs(v1).unwrap();
    let pool = Worker::new();
    let start = std::time::Instant::now();
    v1.ifft(&pool);
    let duration_ns = start.elapsed().as_nanos() as f64;
    println!("Elapsed {} ns for {} samples", duration_ns, SAMPLES);
    let time_per_sample = duration_ns/(SAMPLES as f64);
    println!("Tested on {} samples on {} CPUs with {} ns per field element multiplication", SAMPLES, cpus, time_per_sample);
 }
--- a/src/groth16/generator.rs
+++ b/src/groth16/generator.rs
@ -1,9 +1,5 @@
 extern crate time;
 use super::super::verbose_flag;
 use self::time::PreciseTime;
 use rand::Rng;
 use std::sync::Arc;
@ -255,7 +251,9 @@ pub fn generate_parameters<E, C>(
    {
        // Compute powers of tau
        if verbose {eprintln!("computing powers of tau...")};
-        let start = PreciseTime::now();
+
        let start = std::time::Instant::now();
        {
            let powers_of_tau = powers_of_tau.as_mut();
            worker.scope(powers_of_tau.len(), |scope, chunk| {
@ -272,14 +270,16 @@ pub fn generate_parameters<E, C>(
                }
            });
        }
-        if verbose {eprintln!("powers of tau stage 1 done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0);};
+        if verbose {eprintln!("powers of tau stage 1 done in {} s", start.elapsed().as_millis() as f64 / 1000.0);};
        // coeff = t(x) / delta
        let mut coeff = powers_of_tau.z(&tau);
        coeff.mul_assign(&delta_inverse);
        if verbose {eprintln!("computing the H query with multiple threads...")};
-        let start = PreciseTime::now();
+
        let start = std::time::Instant::now();
        // Compute the H query with multiple threads
        worker.scope(h.len(), |scope, chunk| {
            for (h, p) in h.chunks_mut(chunk).zip(powers_of_tau.as_ref().chunks(chunk))
@ -302,17 +302,18 @@ pub fn generate_parameters<E, C>(
                });
            }
        });
-        if verbose {eprintln!("computing the H query done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0);};
+        if verbose {eprintln!("computing the H query done in {} s", start.elapsed().as_millis() as f64 / 1000.0);};
    }
    if verbose {eprintln!("using inverse FFT to convert powers of tau to Lagrange coefficients...")};
-    let start = PreciseTime::now();
+    
    let start = std::time::Instant::now();
    // Use inverse FFT to convert powers of tau to Lagrange coefficients
    powers_of_tau.ifft(&worker);
    let powers_of_tau = powers_of_tau.into_coeffs();
-    if verbose {eprintln!("powers of tau stage 2 done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0)};
+    if verbose {eprintln!("powers of tau stage 2 done in {} s", start.elapsed().as_millis() as f64 / 1000.0)};
    let mut a = vec![E::G1::zero(); assembly.num_inputs + assembly.num_aux];
    let mut b_g1 = vec![E::G1::zero(); assembly.num_inputs + assembly.num_aux];
@ -321,7 +322,7 @@ pub fn generate_parameters<E, C>(
    let mut l = vec![E::G1::zero(); assembly.num_aux];
    if verbose {eprintln!("evaluating polynomials...")};
-    let start = PreciseTime::now();
+    let start = std::time::Instant::now();
    fn eval<E: Engine>(
        // wNAF window tables
@ -474,7 +475,7 @@ pub fn generate_parameters<E, C>(
        &worker
    );
-    if verbose {eprintln!("evaluating polynomials done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0);};
+    if verbose {eprintln!("evaluating polynomials done in {} s", start.elapsed().as_millis() as f64 / 1000.0);};
    // Don't allow any elements be unconstrained, so that
    // the L query is always fully dense.
--- a/src/groth16/prover.rs
+++ b/src/groth16/prover.rs
@ -1,6 +1,3 @@
 extern crate time;
 use self::time::PreciseTime;
 use super::super::verbose_flag;
 use rand::Rng;
@ -173,7 +170,7 @@ impl<E:Engine> PreparedProver<E> {
        let vk = params.get_vk(self.assignment.input_assignment.len())?;
-        let h_start = PreciseTime::now();
+        let start = std::time::Instant::now();
        let h = {
            let mut a = EvaluationDomain::from_coeffs(prover.a)?;
@ -209,10 +206,9 @@ impl<E:Engine> PreparedProver<E> {
            multiexp(&worker, params.get_h(a.len())?, FullDensity, a)
        };
-        let h_end = PreciseTime::now();
+        if verbose {eprintln!("{} seconds for prover for H evaluation", start.elapsed().as_secs())};
        if verbose {eprintln!("{} seconds for prover for H evaluation", h_start.to(h_end))};
-        let points_start = PreciseTime::now();
+        let start = std::time::Instant::now();
        // TODO: Check that difference in operations for different chunks is small
@ -283,8 +279,7 @@ impl<E:Engine> PreparedProver<E> {
        g_c.add_assign(&h.wait()?);
        g_c.add_assign(&l.wait()?);
-        let points_end = PreciseTime::now();
+        if verbose {eprintln!("{} seconds for prover for point multiplication", start.elapsed().as_secs())};
        if verbose {eprintln!("{} seconds for prover for point multiplication", points_start.to(points_end))};
        Ok(Proof {
            a: g_a.into_affine(),
@ -437,7 +432,7 @@ pub fn create_proof<E, C, P: ParameterSource<E>>(
    let vk = params.get_vk(prover.input_assignment.len())?;
-    let h_start = PreciseTime::now();
+    let start = std::time::Instant::now();
    let h = {
        let mut a = EvaluationDomain::from_coeffs(prover.a)?;
@ -473,10 +468,9 @@ pub fn create_proof<E, C, P: ParameterSource<E>>(
        multiexp(&worker, params.get_h(a.len())?, FullDensity, a)
    };
-    let h_end = PreciseTime::now();
+    if verbose {eprintln!("{} seconds for prover for H evaluation", start.elapsed().as_secs())};
    if verbose {eprintln!("{} seconds for prover for H evaluation", h_start.to(h_end))};
-    let points_start = PreciseTime::now();
+    let start = std::time::Instant::now();
    // TODO: Check that difference in operations for different chunks is small
@ -547,8 +541,7 @@ pub fn create_proof<E, C, P: ParameterSource<E>>(
    g_c.add_assign(&h.wait()?);
    g_c.add_assign(&l.wait()?);
-    let points_end = PreciseTime::now();
+    if verbose {eprintln!("{} seconds for prover for point multiplication", start.elapsed().as_secs())};
    if verbose {eprintln!("{} seconds for prover for point multiplication", points_start.to(points_end))};
    Ok(Proof {
        a: g_a.into_affine(),
--- a/src/lib.rs
+++ b/src/lib.rs
@ -14,7 +14,6 @@ pub mod multicore;
 mod multiexp;
 pub mod domain;
 pub mod groth16;
 pub mod progress_bar;
 use pairing::{Engine};
 use ff::Field;
--- a/src/multiexp.rs
+++ b/src/multiexp.rs
@ -142,6 +142,36 @@ impl DensityTracker {
    }
 }
 /// This genious piece of code works in the following way:
 /// - choose `c` - the bit length of the region that one thread works on
 /// - make `2^c - 1` buckets and initialize them with `G = infinity` (that's equivalent of zero)
 /// - there is no bucket for "zero" cause it's not necessary
 /// - go over the pairs `(base, scalar)`
 /// - for each scalar calculate `scalar % 2^c` and add the base (without any multiplications!) to the 
 /// corresponding bucket
 /// - at the end each bucket will have an accumulated value that should be multiplied by the corresponding factor
 /// between `1` and `2^c - 1` to get the right value
 /// - here comes the first trick - you don't need to do multiplications at all, just add all the buckets together
 /// starting from the first one `(a + b + c + ...)` and than add to the first sum another sum of the form
 /// `(b + c + d + ...)`, and than the third one `(c + d + ...)`, that will result in the proper prefactor infront of every
 /// accumulator, without any multiplication operations at all
 /// - that's of course not enough, so spawn the next thread
 /// - this thread works with the same bit width `c`, but SKIPS lowers bits completely, so it actually takes values
 /// in the form `(scalar >> c) % 2^c`, so works on the next region
 /// - spawn more threads until you exhaust all the bit length
 /// - you will get roughly `[bitlength / c] + 1` inaccumulators
 /// - double the highest accumulator enough times, add to the next one, double the result, add the next accumulator, continue
 /// 
 /// Demo why it works:
 /// ```
 ///     a * G + b * H = (a_2 * (2^c)^2 + a_1 * (2^c)^1 + a_0) * G + (b_2 * (2^c)^2 + b_1 * (2^c)^1 + b_0) * H
 /// ```
 /// - make buckets over `0` labeled coefficients
 /// - make buckets over `1` labeled coefficients
 /// - make buckets over `2` labeled coefficients
 /// - accumulators over each set of buckets will have an implicit factor of `(2^c)^i`, so before summing thme up
 /// "higher" accumulators must be doubled `c` times
 ///
 fn multiexp_inner<Q, D, G, S>(
    pool: &Worker,
    bases: S,
@ -195,7 +225,7 @@ fn multiexp_inner<Q, D, G, S>(
                    } else {
                        // Place multiplication into the bucket: Separate s * P as 
                        // (s/2^c) * P + (s mod 2^c) P
-                        // First multiplication is c bits less, do one can do it,
+                        // First multiplication is c bits less, so one can do it,
                        // sum results from different buckets and double it c times,
                        // then add with (s mod 2^c) P parts
                        let mut exp = exp;
@ -317,3 +347,34 @@ fn test_with_bls12() {
    assert_eq!(naive, fast);
 }
 #[test]
 fn test_speed_with_bn256() {
    use rand::{self, Rand};
    use pairing::bn256::Bn256;
    use num_cpus;
    let cpus = num_cpus::get();
    const SAMPLES: usize = 1 << 22;
    let rng = &mut rand::thread_rng();
    let v = Arc::new((0..SAMPLES).map(|_| <Bn256 as ScalarEngine>::Fr::rand(rng).into_repr()).collect::<Vec<_>>());
    let g = Arc::new((0..SAMPLES).map(|_| <Bn256 as Engine>::G1::rand(rng).into_affine()).collect::<Vec<_>>());
    let pool = Worker::new();
    let start = std::time::Instant::now();
    let _fast = multiexp(
        &pool,
        (g, 0),
        FullDensity,
        v
    ).wait().unwrap();
    let duration_ns = start.elapsed().as_nanos() as f64;
    println!("Elapsed {} ns for {} samples", duration_ns, SAMPLES);
    let time_per_sample = duration_ns/(SAMPLES as f64);
    println!("Tested on {} samples on {} CPUs with {} ns per multiplication", SAMPLES, cpus, time_per_sample);
 }
--- a/src/progress_bar.rs
+++ b/src/progress_bar.rs
@ -1,146 +0,0 @@
 extern crate time;
 use std::io::{Write};
 use std::sync::{
    mpsc::{channel, Sender, Receiver}, 
    Arc, 
    atomic::{AtomicUsize, Ordering}
 };
 use self::time::precise_time_ns;
 use std::time::Duration;
 static UPDATE_INTERVAL: u64 = 1000_000 * 1000; // ms
 pub struct MultiBar {
    n_workers:  u64,
    total:      u64,
    cur:        u64,
    prev:       u64,
    prev_time:  u64,
    total_elapsed: u64,
    step:       Arc<AtomicUsize>,
    tx:         Sender<u64>,
    rx:         Receiver<u64>,
 }
 pub struct ProgressBar {
    //chunk:  u64,
    acc:    u64,
    step:   Arc<AtomicUsize>,
    tx:     Option<Sender<u64>>,
 }
 /// Simple efficient thread-safe progress indicator
 /// It follows the interface of [https://github.com/a8m/pb](https://github.com/a8m/pb)
 impl MultiBar {
    /// Create a new MultiBar for stdout
    pub fn new() -> Self {
        let (tx, rx) = channel();
        Self{
            n_workers:  0,
            total:      0,
            cur:        0,
            prev:       0,
            prev_time:  precise_time_ns(),
            total_elapsed: 0,
            step:       Arc::new(AtomicUsize::new(1)),
            tx, 
            rx,
        }
    }
    // Create a ProgressBar for a process of `total` steps
    pub fn create_bar(&mut self, chunk: u64) -> ProgressBar {
        self.n_workers += 1;
        self.total += chunk;
        //println!("step 0 of {}", chunk);
        ProgressBar{
            //chunk,
            acc:    0,
            tx:     Some(Sender::clone(&self.tx)),
            step:   Arc::clone(&self.step),
        }
    }
    /// Start listening for updates from ProgressBars in different threads
    pub fn listen(&mut self) {
        //println!("");
        for d in &self.rx {
            if d == 0 {
                self.n_workers -= 1;
            }
            if self.n_workers == 0 { 
                break; 
            }
            self.cur += d;
            let processed = self.cur - self.prev;
            if processed > self.step.load(Ordering::Acquire) as u64 * self.n_workers {
                let now = time::precise_time_ns();
                let elapsed = now - self.prev_time;
                if elapsed > UPDATE_INTERVAL {
                    self.prev = self.cur;
                    self.prev_time = now;
                    self.total_elapsed += elapsed;
                    print!("\rprocessed {:2}%: {} of {}.", self.cur * 100 / self.total, self.cur, self.total);
                    let r = Duration::from_nanos((self.total - self.cur) * self.total_elapsed / self.cur).as_secs();
                    print!(" Remaining estimated: {} h {} min {} s", r / 3600, r % 3600 / 60, r % 60);
                    let new_step = (self.cur * UPDATE_INTERVAL / self.total_elapsed) / self.n_workers;
                    self.step.store(new_step as usize, Ordering::Release);
                    std::io::stdout().flush().unwrap();
                }
            }
        }
        println!("\rdone                                                                   ");
    }
 }
 impl ProgressBar {
    /// Increment progress by `d` steps
    pub fn add(&mut self, d: u64) {
        self.acc += d;
        if self.acc > (self.step.load(Ordering::Relaxed) as u64) {
            if let Some(tx) = &self.tx { 
                tx.send(self.acc).unwrap(); 
            }
            self.acc = 0;
        }
    }
    /// Finish the process
    pub fn finish(&mut self) {
        let tx = self.tx.take().unwrap();
        tx.send(0).unwrap();
        drop(tx);
    }
 }
 #[test]
 fn test_progress_display() {
    let mut mb = MultiBar::new();
    for _j in 1..=0 { 
        let mut pb = mb.create_bar(3600000); 
        std::thread::spawn(move || {
            for _i in 0..3600000 {
                std::thread::sleep(Duration::from_millis(1));
                pb.add(1);
            }
            pb.finish();
        });
    };
    //mb.listen();
 }