implement prefetch for nightly
This commit is contained in:
parent
2d9f5528d4
commit
07ca2e83b6
@ -25,6 +25,7 @@ byteorder = "1"
|
|||||||
futures-cpupool = {version = "0.1", optional = true}
|
futures-cpupool = {version = "0.1", optional = true}
|
||||||
num_cpus = {version = "1", optional = true}
|
num_cpus = {version = "1", optional = true}
|
||||||
crossbeam = {version = "0.7.1", optional = true}
|
crossbeam = {version = "0.7.1", optional = true}
|
||||||
|
prefetch = {version = "0.2", optional = true}
|
||||||
|
|
||||||
web-sys = {version = "0.3.17", optional = true, features = ["console", "Performance", "Window"]}
|
web-sys = {version = "0.3.17", optional = true, features = ["console", "Performance", "Window"]}
|
||||||
|
|
||||||
@ -33,8 +34,10 @@ blake2-rfc = {version = "0.2.18", optional = true}
|
|||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["multicore"]
|
default = ["multicore"]
|
||||||
|
#default = ["multicore", "nightly"]
|
||||||
#default = ["wasm"]
|
#default = ["wasm"]
|
||||||
multicore = ["futures-cpupool", "num_cpus", "crossbeam"]
|
multicore = ["futures-cpupool", "num_cpus", "crossbeam"]
|
||||||
sonic = ["tiny-keccak", "blake2-rfc"]
|
sonic = ["tiny-keccak", "blake2-rfc"]
|
||||||
gm17 = []
|
gm17 = []
|
||||||
wasm = ["web-sys"]
|
wasm = ["web-sys"]
|
||||||
|
nightly = ["prefetch"]
|
||||||
|
105
src/multiexp.rs
105
src/multiexp.rs
@ -17,6 +17,8 @@ use super::worker::Worker;
|
|||||||
|
|
||||||
use super::SynthesisError;
|
use super::SynthesisError;
|
||||||
|
|
||||||
|
use cfg_if;
|
||||||
|
|
||||||
/// This genious piece of code works in the following way:
|
/// This genious piece of code works in the following way:
|
||||||
/// - choose `c` - the bit length of the region that one thread works on
|
/// - choose `c` - the bit length of the region that one thread works on
|
||||||
/// - make `2^c - 1` buckets and initialize them with `G = infinity` (that's equivalent of zero)
|
/// - make `2^c - 1` buckets and initialize them with `G = infinity` (that's equivalent of zero)
|
||||||
@ -47,6 +49,7 @@ use super::SynthesisError;
|
|||||||
/// - accumulators over each set of buckets will have an implicit factor of `(2^c)^i`, so before summing thme up
|
/// - accumulators over each set of buckets will have an implicit factor of `(2^c)^i`, so before summing thme up
|
||||||
/// "higher" accumulators must be doubled `c` times
|
/// "higher" accumulators must be doubled `c` times
|
||||||
///
|
///
|
||||||
|
#[cfg(not(feature = "nightly"))]
|
||||||
fn multiexp_inner<Q, D, G, S>(
|
fn multiexp_inner<Q, D, G, S>(
|
||||||
pool: &Worker,
|
pool: &Worker,
|
||||||
bases: S,
|
bases: S,
|
||||||
@ -56,7 +59,7 @@ fn multiexp_inner<Q, D, G, S>(
|
|||||||
mut skip: u32,
|
mut skip: u32,
|
||||||
c: u32,
|
c: u32,
|
||||||
handle_trivial: bool
|
handle_trivial: bool
|
||||||
) -> Box<Future<Item=<G as CurveAffine>::Projective, Error=SynthesisError>>
|
) -> Box<dyn Future<Item=<G as CurveAffine>::Projective, Error=SynthesisError>>
|
||||||
where for<'a> &'a Q: QueryDensity,
|
where for<'a> &'a Q: QueryDensity,
|
||||||
D: Send + Sync + 'static + Clone + AsRef<Q>,
|
D: Send + Sync + 'static + Clone + AsRef<Q>,
|
||||||
G: CurveAffine,
|
G: CurveAffine,
|
||||||
@ -153,6 +156,53 @@ fn multiexp_inner<Q, D, G, S>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cfg_if! {
|
||||||
|
if #[cfg(feature = "nightly")] {
|
||||||
|
#[inline(always)]
|
||||||
|
fn multiexp_inner_impl<Q, D, G, S>(
|
||||||
|
pool: &Worker,
|
||||||
|
bases: S,
|
||||||
|
density_map: D,
|
||||||
|
exponents: Arc<Vec<<G::Scalar as PrimeField>::Repr>>,
|
||||||
|
skip: u32,
|
||||||
|
c: u32,
|
||||||
|
handle_trivial: bool
|
||||||
|
) -> Box<dyn Future<Item=<G as CurveAffine>::Projective, Error=SynthesisError>>
|
||||||
|
where for<'a> &'a Q: QueryDensity,
|
||||||
|
D: Send + Sync + 'static + Clone + AsRef<Q>,
|
||||||
|
G: CurveAffine,
|
||||||
|
S: SourceBuilder<G>
|
||||||
|
{
|
||||||
|
multiexp_inner_with_prefetch(pool, bases, density_map, exponents, skip, c, handle_trivial)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
#[inline(always)]
|
||||||
|
fn multiexp_inner_impl<Q, D, G, S>(
|
||||||
|
pool: &Worker,
|
||||||
|
bases: S,
|
||||||
|
density_map: D,
|
||||||
|
exponents: Arc<Vec<<G::Scalar as PrimeField>::Repr>>,
|
||||||
|
skip: u32,
|
||||||
|
c: u32,
|
||||||
|
handle_trivial: bool
|
||||||
|
) -> Box<dyn Future<Item=<G as CurveAffine>::Projective, Error=SynthesisError>>
|
||||||
|
where for<'a> &'a Q: QueryDensity,
|
||||||
|
D: Send + Sync + 'static + Clone + AsRef<Q>,
|
||||||
|
G: CurveAffine,
|
||||||
|
S: SourceBuilder<G>
|
||||||
|
{
|
||||||
|
multiexp_inner(pool, bases, density_map, exponents, skip, c, handle_trivial)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#[cfg(feature = "nightly")]
|
||||||
|
extern crate prefetch;
|
||||||
|
|
||||||
|
#[cfg(feature = "nightly")]
|
||||||
fn multiexp_inner_with_prefetch<Q, D, G, S>(
|
fn multiexp_inner_with_prefetch<Q, D, G, S>(
|
||||||
pool: &Worker,
|
pool: &Worker,
|
||||||
bases: S,
|
bases: S,
|
||||||
@ -161,12 +211,13 @@ fn multiexp_inner_with_prefetch<Q, D, G, S>(
|
|||||||
mut skip: u32,
|
mut skip: u32,
|
||||||
c: u32,
|
c: u32,
|
||||||
handle_trivial: bool
|
handle_trivial: bool
|
||||||
) -> Box<Future<Item=<G as CurveAffine>::Projective, Error=SynthesisError>>
|
) -> Box<dyn Future<Item=<G as CurveAffine>::Projective, Error=SynthesisError>>
|
||||||
where for<'a> &'a Q: QueryDensity,
|
where for<'a> &'a Q: QueryDensity,
|
||||||
D: Send + Sync + 'static + Clone + AsRef<Q>,
|
D: Send + Sync + 'static + Clone + AsRef<Q>,
|
||||||
G: CurveAffine,
|
G: CurveAffine,
|
||||||
S: SourceBuilder<G>
|
S: SourceBuilder<G>
|
||||||
{
|
{
|
||||||
|
use prefetch::prefetch::*;
|
||||||
// Perform this region of the multiexp
|
// Perform this region of the multiexp
|
||||||
let this = {
|
let this = {
|
||||||
let bases = bases.clone();
|
let bases = bases.clone();
|
||||||
@ -191,12 +242,23 @@ fn multiexp_inner_with_prefetch<Q, D, G, S>(
|
|||||||
let one = <G::Engine as ScalarEngine>::Fr::one().into_repr();
|
let one = <G::Engine as ScalarEngine>::Fr::one().into_repr();
|
||||||
let padding = Arc::new(vec![zero]);
|
let padding = Arc::new(vec![zero]);
|
||||||
|
|
||||||
|
let mask = 1 << c;
|
||||||
|
|
||||||
// Sort the bases into buckets
|
// Sort the bases into buckets
|
||||||
for ((&exp, &next_exp), density) in exponents.iter()
|
for ((&exp, &next_exp), density) in exponents.iter()
|
||||||
.zip(exponents.iter().skip(1).chain(padding.iter()))
|
.zip(exponents.iter().skip(1).chain(padding.iter()))
|
||||||
.zip(density_map.as_ref().iter()) {
|
.zip(density_map.as_ref().iter()) {
|
||||||
// no matter what happens - prefetch next bucket
|
// no matter what happens - prefetch next bucket
|
||||||
|
if next_exp != zero && next_exp != one {
|
||||||
|
let mut next_exp = next_exp;
|
||||||
|
next_exp.shr(skip);
|
||||||
|
let next_exp = next_exp.as_ref()[0] % mask;
|
||||||
|
if next_exp != 0 {
|
||||||
|
let p: *const <G as CurveAffine>::Projective = &buckets[(next_exp - 1) as usize];
|
||||||
|
prefetch::<Write, High, Data, _>(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
// Go over density and exponents
|
// Go over density and exponents
|
||||||
if density {
|
if density {
|
||||||
if exp == zero {
|
if exp == zero {
|
||||||
@ -215,7 +277,7 @@ fn multiexp_inner_with_prefetch<Q, D, G, S>(
|
|||||||
// then add with (s mod 2^c) P parts
|
// then add with (s mod 2^c) P parts
|
||||||
let mut exp = exp;
|
let mut exp = exp;
|
||||||
exp.shr(skip);
|
exp.shr(skip);
|
||||||
let exp = exp.as_ref()[0] % (1 << c);
|
let exp = exp.as_ref()[0] % mask;
|
||||||
|
|
||||||
if exp != 0 {
|
if exp != 0 {
|
||||||
bases.add_assign_mixed(&mut buckets[(exp - 1) as usize])?;
|
bases.add_assign_mixed(&mut buckets[(exp - 1) as usize])?;
|
||||||
@ -249,7 +311,7 @@ fn multiexp_inner_with_prefetch<Q, D, G, S>(
|
|||||||
// There's another region more significant. Calculate and join it with
|
// There's another region more significant. Calculate and join it with
|
||||||
// this region recursively.
|
// this region recursively.
|
||||||
Box::new(
|
Box::new(
|
||||||
this.join(multiexp_inner(pool, bases, density_map, exponents, skip, c, false))
|
this.join(multiexp_inner_with_prefetch(pool, bases, density_map, exponents, skip, c, false))
|
||||||
.map(move |(this, mut higher)| {
|
.map(move |(this, mut higher)| {
|
||||||
for _ in 0..c {
|
for _ in 0..c {
|
||||||
higher.double();
|
higher.double();
|
||||||
@ -270,7 +332,7 @@ pub fn multiexp<Q, D, G, S>(
|
|||||||
bases: S,
|
bases: S,
|
||||||
density_map: D,
|
density_map: D,
|
||||||
exponents: Arc<Vec<<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr>>
|
exponents: Arc<Vec<<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr>>
|
||||||
) -> Box<Future<Item=<G as CurveAffine>::Projective, Error=SynthesisError>>
|
) -> Box<dyn Future<Item=<G as CurveAffine>::Projective, Error=SynthesisError>>
|
||||||
where for<'a> &'a Q: QueryDensity,
|
where for<'a> &'a Q: QueryDensity,
|
||||||
D: Send + Sync + 'static + Clone + AsRef<Q>,
|
D: Send + Sync + 'static + Clone + AsRef<Q>,
|
||||||
G: CurveAffine,
|
G: CurveAffine,
|
||||||
@ -289,7 +351,7 @@ pub fn multiexp<Q, D, G, S>(
|
|||||||
assert!(query_size == exponents.len());
|
assert!(query_size == exponents.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
multiexp_inner(pool, bases, density_map, exponents, 0, c, true)
|
multiexp_inner_impl(pool, bases, density_map, exponents, 0, c, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -525,4 +587,33 @@ fn test_dense_multiexp() {
|
|||||||
println!("{} ns for sparse for {} samples", duration_ns, SAMPLES);
|
println!("{} ns for sparse for {} samples", duration_ns, SAMPLES);
|
||||||
|
|
||||||
assert_eq!(dense, sparse);
|
assert_eq!(dense, sparse);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_bench_sparse_multiexp() {
|
||||||
|
use rand::{XorShiftRng, SeedableRng, Rand, Rng};
|
||||||
|
use crate::pairing::bn256::Bn256;
|
||||||
|
use num_cpus;
|
||||||
|
|
||||||
|
const SAMPLES: usize = 1 << 22;
|
||||||
|
let rng = &mut XorShiftRng::from_seed([0x3dbe6259, 0x8d313d76, 0x3237db17, 0xe5bc0654]);
|
||||||
|
|
||||||
|
let v = (0..SAMPLES).map(|_| <Bn256 as ScalarEngine>::Fr::rand(rng).into_repr()).collect::<Vec<_>>();
|
||||||
|
let g = (0..SAMPLES).map(|_| <Bn256 as Engine>::G1::rand(rng).into_affine()).collect::<Vec<_>>();
|
||||||
|
|
||||||
|
println!("Done generating test points and scalars");
|
||||||
|
|
||||||
|
let pool = Worker::new();
|
||||||
|
let start = std::time::Instant::now();
|
||||||
|
|
||||||
|
let _sparse = multiexp(
|
||||||
|
&pool,
|
||||||
|
(Arc::new(g), 0),
|
||||||
|
FullDensity,
|
||||||
|
Arc::new(v)
|
||||||
|
).wait().unwrap();
|
||||||
|
|
||||||
|
let duration_ns = start.elapsed().as_nanos() as f64;
|
||||||
|
println!("{} ms for sparse for {} samples", duration_ns/1000.0f64, SAMPLES);
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user