Parallel FFTs.
This commit is contained in:
parent
669853de99
commit
bb73258ce9
@ -44,6 +44,9 @@ macro_rules! curve_impl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Group<$engine> for $name {
|
impl Group<$engine> for $name {
|
||||||
|
fn group_zero(e: &$engine) -> $name {
|
||||||
|
$name::zero(e)
|
||||||
|
}
|
||||||
fn group_mul_assign(&mut self, e: &$engine, scalar: &$scalarfield) {
|
fn group_mul_assign(&mut self, e: &$engine, scalar: &$scalarfield) {
|
||||||
self.mul_assign(e, scalar);
|
self.mul_assign(e, scalar);
|
||||||
}
|
}
|
||||||
|
@ -97,6 +97,9 @@ fp_impl!(
|
|||||||
);
|
);
|
||||||
|
|
||||||
impl Group<Bls381> for Fr {
|
impl Group<Bls381> for Fr {
|
||||||
|
fn group_zero(_: &Bls381) -> Fr {
|
||||||
|
Fr::zero()
|
||||||
|
}
|
||||||
fn group_mul_assign(&mut self, e: &Bls381, scalar: &Fr) {
|
fn group_mul_assign(&mut self, e: &Bls381, scalar: &Fr) {
|
||||||
self.mul_assign(e, scalar);
|
self.mul_assign(e, scalar);
|
||||||
}
|
}
|
||||||
|
@ -9,7 +9,7 @@ use super::{Cow, Convert};
|
|||||||
|
|
||||||
pub mod bls381;
|
pub mod bls381;
|
||||||
|
|
||||||
pub trait Engine: Sized + Clone
|
pub trait Engine: Sized + Clone + Send + Sync
|
||||||
{
|
{
|
||||||
type Fq: PrimeField<Self>;
|
type Fq: PrimeField<Self>;
|
||||||
type Fr: SnarkField<Self>;
|
type Fr: SnarkField<Self>;
|
||||||
@ -46,8 +46,9 @@ pub trait Engine: Sized + Clone
|
|||||||
fn batch_baseexp<G: Curve<Self>, S: AsRef<[Self::Fr]>>(&self, table: &WindowTable<Self, G, Vec<G>>, scalars: S) -> Vec<G::Affine>;
|
fn batch_baseexp<G: Curve<Self>, S: AsRef<[Self::Fr]>>(&self, table: &WindowTable<Self, G, Vec<G>>, scalars: S) -> Vec<G::Affine>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait Group<E: Engine>: Copy
|
pub trait Group<E: Engine>: Copy + Send + Sync + Sized
|
||||||
{
|
{
|
||||||
|
fn group_zero(&E) -> Self;
|
||||||
fn group_mul_assign(&mut self, &E, scalar: &E::Fr);
|
fn group_mul_assign(&mut self, &E, scalar: &E::Fr);
|
||||||
fn group_add_assign(&mut self, &E, other: &Self);
|
fn group_add_assign(&mut self, &E, other: &Self);
|
||||||
fn group_sub_assign(&mut self, &E, other: &Self);
|
fn group_sub_assign(&mut self, &E, other: &Self);
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
use curves::{Engine, Field, SnarkField, PrimeField, Group};
|
use curves::{Engine, Field, SnarkField, PrimeField, Group};
|
||||||
|
use crossbeam;
|
||||||
|
use num_cpus;
|
||||||
|
|
||||||
pub struct EvaluationDomain<E: Engine> {
|
pub struct EvaluationDomain<E: Engine> {
|
||||||
pub m: u64,
|
pub m: u64,
|
||||||
@ -49,19 +51,36 @@ impl<E: Engine> EvaluationDomain<E> {
|
|||||||
pub fn ifft<T: Group<E>>(&self, e: &E, v: &mut [T])
|
pub fn ifft<T: Group<E>>(&self, e: &E, v: &mut [T])
|
||||||
{
|
{
|
||||||
assert!(v.len() == self.m as usize);
|
assert!(v.len() == self.m as usize);
|
||||||
self._fft(e, v, &self.omegainv);
|
parallel_fft(e, v, &self.omegainv, self.exp);
|
||||||
|
|
||||||
|
let chunk = (v.len() / num_cpus::get()) + 1;
|
||||||
|
|
||||||
|
crossbeam::scope(|scope| {
|
||||||
|
for v in v.chunks_mut(chunk) {
|
||||||
|
scope.spawn(move || {
|
||||||
for v in v {
|
for v in v {
|
||||||
v.group_mul_assign(e, &self.minv);
|
v.group_mul_assign(e, &self.minv);
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn mul_coset(&self, e: &E, v: &mut [E::Fr], g: &E::Fr)
|
fn mul_coset(&self, e: &E, v: &mut [E::Fr], g: &E::Fr)
|
||||||
{
|
{
|
||||||
let mut u = *g;
|
let chunk = (v.len() / num_cpus::get()) + 1;
|
||||||
for v in v.iter_mut().skip(1) {
|
|
||||||
|
crossbeam::scope(|scope| {
|
||||||
|
for (i, v) in v.chunks_mut(chunk).enumerate() {
|
||||||
|
scope.spawn(move || {
|
||||||
|
let mut u = g.pow(e, &[(i * chunk) as u64]);
|
||||||
|
for v in v.iter_mut() {
|
||||||
v.mul_assign(e, &u);
|
v.mul_assign(e, &u);
|
||||||
u.mul_assign(e, g);
|
u.mul_assign(e, g);
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn coset_fft(&self, e: &E, v: &mut [E::Fr])
|
pub fn coset_fft(&self, e: &E, v: &mut [E::Fr])
|
||||||
@ -79,18 +98,76 @@ impl<E: Engine> EvaluationDomain<E> {
|
|||||||
pub fn divide_by_z_on_coset(&self, e: &E, v: &mut [E::Fr])
|
pub fn divide_by_z_on_coset(&self, e: &E, v: &mut [E::Fr])
|
||||||
{
|
{
|
||||||
let i = self.z(e, &E::Fr::multiplicative_generator(e)).inverse(e).unwrap();
|
let i = self.z(e, &E::Fr::multiplicative_generator(e)).inverse(e).unwrap();
|
||||||
|
|
||||||
|
let chunk = (v.len() / num_cpus::get()) + 1;
|
||||||
|
|
||||||
|
crossbeam::scope(|scope| {
|
||||||
|
for v in v.chunks_mut(chunk) {
|
||||||
|
scope.spawn(move || {
|
||||||
for v in v {
|
for v in v {
|
||||||
v.mul_assign(e, &i);
|
v.mul_assign(e, &i);
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn fft<T: Group<E>>(&self, e: &E, a: &mut [T])
|
pub fn fft<T: Group<E>>(&self, e: &E, a: &mut [T])
|
||||||
{
|
{
|
||||||
self._fft(e, a, &self.omega);
|
parallel_fft(e, a, &self.omega, self.exp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parallel_fft<E: Engine, T: Group<E>>(e: &E, a: &mut [T], omega: &E::Fr, log_n: u64)
|
||||||
|
{
|
||||||
|
let log_cpus = get_log_cpus();
|
||||||
|
let num_cpus = 1 << log_cpus;
|
||||||
|
|
||||||
|
if log_n < log_cpus {
|
||||||
|
serial_fft(e, a, omega, log_n)
|
||||||
|
} else {
|
||||||
|
// Shuffle
|
||||||
|
let log_new_n = log_n - log_cpus;
|
||||||
|
let mut tmp = vec![vec![T::group_zero(e); 1 << log_new_n]; num_cpus];
|
||||||
|
let omega_num_cpus = omega.pow(e, &[num_cpus as u64]);
|
||||||
|
|
||||||
|
crossbeam::scope(|scope| {
|
||||||
|
let a = &*a;
|
||||||
|
|
||||||
|
for (j, tmp) in tmp.iter_mut().enumerate() {
|
||||||
|
scope.spawn(move || {
|
||||||
|
let omega_j = omega.pow(e, &[j as u64]);
|
||||||
|
let omega_step = omega.pow(e, &[(j as u64) << log_new_n]);
|
||||||
|
|
||||||
|
let mut elt = E::Fr::one(e);
|
||||||
|
for i in 0..(1 << log_new_n) {
|
||||||
|
for s in 0..num_cpus {
|
||||||
|
let idx = (i + (s << log_new_n)) % (1 << log_n);
|
||||||
|
let mut t = a[idx];
|
||||||
|
t.group_mul_assign(e, &elt);
|
||||||
|
tmp[i].group_add_assign(e, &t);
|
||||||
|
elt.mul_assign(e, &omega_step);
|
||||||
|
}
|
||||||
|
elt.mul_assign(e, &omega_j);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn _fft<T: Group<E>>(&self, e: &E, a: &mut [T], omega: &E::Fr)
|
serial_fft(e, tmp, &omega_num_cpus, log_new_n);
|
||||||
{
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// TODO: parallelize
|
||||||
|
// Unshuffle
|
||||||
|
for i in 0..num_cpus {
|
||||||
|
for j in 0..(1 << log_new_n) {
|
||||||
|
a[(j << log_cpus) + i] = tmp[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serial_fft<E: Engine, T: Group<E>>(e: &E, a: &mut [T], omega: &E::Fr, log_n: u64)
|
||||||
|
{
|
||||||
fn bitreverse(mut n: usize, l: u64) -> usize {
|
fn bitreverse(mut n: usize, l: u64) -> usize {
|
||||||
let mut r = 0;
|
let mut r = 0;
|
||||||
for _ in 0..l {
|
for _ in 0..l {
|
||||||
@ -100,8 +177,11 @@ impl<E: Engine> EvaluationDomain<E> {
|
|||||||
r
|
r
|
||||||
}
|
}
|
||||||
|
|
||||||
for k in 0..(self.m as usize) {
|
let n = a.len();
|
||||||
let rk = bitreverse(k, self.exp);
|
assert_eq!(n, 1 << log_n);
|
||||||
|
|
||||||
|
for k in 0..n {
|
||||||
|
let rk = bitreverse(k, log_n);
|
||||||
if k < rk {
|
if k < rk {
|
||||||
let tmp1 = a[rk];
|
let tmp1 = a[rk];
|
||||||
let tmp2 = a[k];
|
let tmp2 = a[k];
|
||||||
@ -111,11 +191,11 @@ impl<E: Engine> EvaluationDomain<E> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut m = 1;
|
let mut m = 1;
|
||||||
for _ in 0..self.exp {
|
for _ in 0..log_n {
|
||||||
let w_m = omega.pow(e, &[(self.m / (2*m)) as u64]);
|
let w_m = omega.pow(e, &[(n / (2*m)) as u64]);
|
||||||
|
|
||||||
let mut k = 0;
|
let mut k = 0;
|
||||||
while k < self.m {
|
while k < n {
|
||||||
let mut w = E::Fr::one(e);
|
let mut w = E::Fr::one(e);
|
||||||
for j in 0..m {
|
for j in 0..m {
|
||||||
let mut t = a[(k+j+m) as usize];
|
let mut t = a[(k+j+m) as usize];
|
||||||
@ -132,7 +212,6 @@ impl<E: Engine> EvaluationDomain<E> {
|
|||||||
|
|
||||||
m *= 2;
|
m *= 2;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test multiplying various (low degree) polynomials together and
|
// Test multiplying various (low degree) polynomials together and
|
||||||
@ -190,3 +269,32 @@ fn polynomial_arith() {
|
|||||||
|
|
||||||
test_mul(e, rng);
|
test_mul(e, rng);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_log_cpus() -> u64 {
|
||||||
|
let num = num_cpus::get();
|
||||||
|
log2_floor(num)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn log2_floor(num: usize) -> u64 {
|
||||||
|
assert!(num > 0);
|
||||||
|
|
||||||
|
let mut pow = 0;
|
||||||
|
|
||||||
|
while (1 << (pow+1)) <= num {
|
||||||
|
pow += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
pow
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_log2_floor() {
|
||||||
|
assert_eq!(log2_floor(1), 0);
|
||||||
|
assert_eq!(log2_floor(2), 1);
|
||||||
|
assert_eq!(log2_floor(3), 1);
|
||||||
|
assert_eq!(log2_floor(4), 2);
|
||||||
|
assert_eq!(log2_floor(5), 2);
|
||||||
|
assert_eq!(log2_floor(6), 2);
|
||||||
|
assert_eq!(log2_floor(7), 2);
|
||||||
|
assert_eq!(log2_floor(8), 3);
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user