core, eth, trie: bloom filter for trie node dedup during fast sync (#19489)

* core, eth, trie: bloom filter for trie node dedup during fast sync

* eth/downloader, trie: address review comments

* core, ethdb, trie: restart fast-sync bloom construction now and again

* eth/downloader: initialize fast sync bloom on startup

* eth: reenable eth/62 until we properly remove it
This commit is contained in:
Péter Szilágyi 2019-05-13 15:28:01 +03:00 committed by GitHub
parent 40cdcf8c47
commit 9effd64290
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
46 changed files with 2668 additions and 57 deletions

@ -35,6 +35,7 @@ import (
"github.com/ethereum/go-ethereum/eth/downloader" "github.com/ethereum/go-ethereum/eth/downloader"
"github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/event"
"github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie"
"gopkg.in/urfave/cli.v1" "gopkg.in/urfave/cli.v1"
) )
@ -375,11 +376,16 @@ func copyDb(ctx *cli.Context) error {
defer stack.Close() defer stack.Close()
chain, chainDb := utils.MakeChain(ctx, stack) chain, chainDb := utils.MakeChain(ctx, stack)
syncmode := *utils.GlobalTextMarshaler(ctx, utils.SyncModeFlag.Name).(*downloader.SyncMode) syncMode := *utils.GlobalTextMarshaler(ctx, utils.SyncModeFlag.Name).(*downloader.SyncMode)
dl := downloader.New(syncmode, 0, chainDb, new(event.TypeMux), chain, nil, nil)
var syncBloom *trie.SyncBloom
if syncMode == downloader.FastSync {
syncBloom = trie.NewSyncBloom(uint64(ctx.GlobalInt(utils.CacheFlag.Name)/2), chainDb)
}
dl := downloader.New(0, chainDb, syncBloom, new(event.TypeMux), chain, nil, nil)
// Create a source peer to satisfy downloader requests from // Create a source peer to satisfy downloader requests from
db, err := rawdb.NewLevelDBDatabase(ctx.Args().First(), ctx.GlobalInt(utils.CacheFlag.Name), 256, "") db, err := rawdb.NewLevelDBDatabase(ctx.Args().First(), ctx.GlobalInt(utils.CacheFlag.Name)/2, 256, "")
if err != nil { if err != nil {
return err return err
} }
@ -395,7 +401,7 @@ func copyDb(ctx *cli.Context) error {
start := time.Now() start := time.Now()
currentHeader := hc.CurrentHeader() currentHeader := hc.CurrentHeader()
if err = dl.Synchronise("local", currentHeader.Hash(), hc.GetTd(currentHeader.Hash(), currentHeader.Number.Uint64()), syncmode); err != nil { if err = dl.Synchronise("local", currentHeader.Hash(), hc.GetTd(currentHeader.Hash(), currentHeader.Number.Uint64()), syncMode); err != nil {
return err return err
} }
for dl.Synchronising() { for dl.Synchronising() {

@ -67,6 +67,13 @@ func (t *table) NewIterator() ethdb.Iterator {
return t.NewIteratorWithPrefix(nil) return t.NewIteratorWithPrefix(nil)
} }
// NewIteratorWithStart creates a binary-alphabetical iterator over a subset of
// database content starting at a particular initial key (or after, if it does
// not exist).
func (t *table) NewIteratorWithStart(start []byte) ethdb.Iterator {
return t.db.NewIteratorWithStart(start)
}
// NewIteratorWithPrefix creates a binary-alphabetical iterator over a subset // NewIteratorWithPrefix creates a binary-alphabetical iterator over a subset
// of database content with a particular key prefix. // of database content with a particular key prefix.
func (t *table) NewIteratorWithPrefix(prefix []byte) ethdb.Iterator { func (t *table) NewIteratorWithPrefix(prefix []byte) ethdb.Iterator {

@ -26,7 +26,7 @@ import (
) )
// NewStateSync create a new state trie download scheduler. // NewStateSync create a new state trie download scheduler.
func NewStateSync(root common.Hash, database ethdb.Reader) *trie.Sync { func NewStateSync(root common.Hash, database ethdb.Reader, bloom *trie.SyncBloom) *trie.Sync {
var syncer *trie.Sync var syncer *trie.Sync
callback := func(leaf []byte, parent common.Hash) error { callback := func(leaf []byte, parent common.Hash) error {
var obj Account var obj Account
@ -37,6 +37,6 @@ func NewStateSync(root common.Hash, database ethdb.Reader) *trie.Sync {
syncer.AddRawEntry(common.BytesToHash(obj.CodeHash), 64, parent) syncer.AddRawEntry(common.BytesToHash(obj.CodeHash), 64, parent)
return nil return nil
} }
syncer = trie.NewSync(root, database, callback) syncer = trie.NewSync(root, database, callback, bloom)
return syncer return syncer
} }

@ -25,6 +25,7 @@ import (
"github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/ethdb/memorydb"
"github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie"
) )
@ -125,7 +126,7 @@ func checkStateConsistency(db ethdb.Database, root common.Hash) error {
// Tests that an empty state is not scheduled for syncing. // Tests that an empty state is not scheduled for syncing.
func TestEmptyStateSync(t *testing.T) { func TestEmptyStateSync(t *testing.T) {
empty := common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") empty := common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
if req := NewStateSync(empty, rawdb.NewMemoryDatabase()).Missing(1); len(req) != 0 { if req := NewStateSync(empty, rawdb.NewMemoryDatabase(), trie.NewSyncBloom(1, memorydb.New())).Missing(1); len(req) != 0 {
t.Errorf("content requested for empty state: %v", req) t.Errorf("content requested for empty state: %v", req)
} }
} }
@ -141,7 +142,7 @@ func testIterativeStateSync(t *testing.T, batch int) {
// Create a destination state and sync with the scheduler // Create a destination state and sync with the scheduler
dstDb := rawdb.NewMemoryDatabase() dstDb := rawdb.NewMemoryDatabase()
sched := NewStateSync(srcRoot, dstDb) sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
queue := append([]common.Hash{}, sched.Missing(batch)...) queue := append([]common.Hash{}, sched.Missing(batch)...)
for len(queue) > 0 { for len(queue) > 0 {
@ -173,7 +174,7 @@ func TestIterativeDelayedStateSync(t *testing.T) {
// Create a destination state and sync with the scheduler // Create a destination state and sync with the scheduler
dstDb := rawdb.NewMemoryDatabase() dstDb := rawdb.NewMemoryDatabase()
sched := NewStateSync(srcRoot, dstDb) sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
queue := append([]common.Hash{}, sched.Missing(0)...) queue := append([]common.Hash{}, sched.Missing(0)...)
for len(queue) > 0 { for len(queue) > 0 {
@ -210,7 +211,7 @@ func testIterativeRandomStateSync(t *testing.T, batch int) {
// Create a destination state and sync with the scheduler // Create a destination state and sync with the scheduler
dstDb := rawdb.NewMemoryDatabase() dstDb := rawdb.NewMemoryDatabase()
sched := NewStateSync(srcRoot, dstDb) sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
queue := make(map[common.Hash]struct{}) queue := make(map[common.Hash]struct{})
for _, hash := range sched.Missing(batch) { for _, hash := range sched.Missing(batch) {
@ -250,7 +251,7 @@ func TestIterativeRandomDelayedStateSync(t *testing.T) {
// Create a destination state and sync with the scheduler // Create a destination state and sync with the scheduler
dstDb := rawdb.NewMemoryDatabase() dstDb := rawdb.NewMemoryDatabase()
sched := NewStateSync(srcRoot, dstDb) sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
queue := make(map[common.Hash]struct{}) queue := make(map[common.Hash]struct{})
for _, hash := range sched.Missing(0) { for _, hash := range sched.Missing(0) {
@ -297,7 +298,7 @@ func TestIncompleteStateSync(t *testing.T) {
// Create a destination state and sync with the scheduler // Create a destination state and sync with the scheduler
dstDb := rawdb.NewMemoryDatabase() dstDb := rawdb.NewMemoryDatabase()
sched := NewStateSync(srcRoot, dstDb) sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
added := []common.Hash{} added := []common.Hash{}
queue := append([]common.Hash{}, sched.Missing(1)...) queue := append([]common.Hash{}, sched.Missing(1)...)

@ -190,10 +190,11 @@ func New(ctx *node.ServiceContext, config *Config) (*Ethereum, error) {
} }
eth.txPool = core.NewTxPool(config.TxPool, chainConfig, eth.blockchain) eth.txPool = core.NewTxPool(config.TxPool, chainConfig, eth.blockchain)
if eth.protocolManager, err = NewProtocolManager(chainConfig, config.SyncMode, config.NetworkId, eth.eventMux, eth.txPool, eth.engine, eth.blockchain, chainDb, config.Whitelist); err != nil { // Permit the downloader to use the trie cache allowance during fast sync
cacheLimit := cacheConfig.TrieCleanLimit + cacheConfig.TrieDirtyLimit
if eth.protocolManager, err = NewProtocolManager(chainConfig, config.SyncMode, config.NetworkId, eth.eventMux, eth.txPool, eth.engine, eth.blockchain, chainDb, cacheLimit, config.Whitelist); err != nil {
return nil, err return nil, err
} }
eth.miner = miner.New(eth, &config.Miner, chainConfig, eth.EventMux(), eth.engine, eth.isLocalBlock) eth.miner = miner.New(eth, &config.Miner, chainConfig, eth.EventMux(), eth.engine, eth.isLocalBlock)
eth.miner.SetExtra(makeExtraData(config.Miner.ExtraData)) eth.miner.SetExtra(makeExtraData(config.Miner.ExtraData))

@ -34,6 +34,7 @@ import (
"github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/metrics"
"github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/params"
"github.com/ethereum/go-ethereum/trie"
) )
var ( var (
@ -104,7 +105,9 @@ type Downloader struct {
genesis uint64 // Genesis block number to limit sync to (e.g. light client CHT) genesis uint64 // Genesis block number to limit sync to (e.g. light client CHT)
queue *queue // Scheduler for selecting the hashes to download queue *queue // Scheduler for selecting the hashes to download
peers *peerSet // Set of active peers from which download can proceed peers *peerSet // Set of active peers from which download can proceed
stateDB ethdb.Database
stateDB ethdb.Database // Database to state sync into (and deduplicate via)
stateBloom *trie.SyncBloom // Bloom filter for fast trie node existence checks
rttEstimate uint64 // Round trip time to target for download requests rttEstimate uint64 // Round trip time to target for download requests
rttConfidence uint64 // Confidence in the estimated RTT (unit: millionths to allow atomic ops) rttConfidence uint64 // Confidence in the estimated RTT (unit: millionths to allow atomic ops)
@ -207,13 +210,13 @@ type BlockChain interface {
} }
// New creates a new downloader to fetch hashes and blocks from remote peers. // New creates a new downloader to fetch hashes and blocks from remote peers.
func New(mode SyncMode, checkpoint uint64, stateDb ethdb.Database, mux *event.TypeMux, chain BlockChain, lightchain LightChain, dropPeer peerDropFn) *Downloader { func New(checkpoint uint64, stateDb ethdb.Database, stateBloom *trie.SyncBloom, mux *event.TypeMux, chain BlockChain, lightchain LightChain, dropPeer peerDropFn) *Downloader {
if lightchain == nil { if lightchain == nil {
lightchain = chain lightchain = chain
} }
dl := &Downloader{ dl := &Downloader{
mode: mode,
stateDB: stateDb, stateDB: stateDb,
stateBloom: stateBloom,
mux: mux, mux: mux,
checkpoint: checkpoint, checkpoint: checkpoint,
queue: newQueue(), queue: newQueue(),
@ -255,13 +258,15 @@ func (d *Downloader) Progress() ethereum.SyncProgress {
defer d.syncStatsLock.RUnlock() defer d.syncStatsLock.RUnlock()
current := uint64(0) current := uint64(0)
switch d.mode { switch {
case FullSync: case d.blockchain != nil && d.mode == FullSync:
current = d.blockchain.CurrentBlock().NumberU64() current = d.blockchain.CurrentBlock().NumberU64()
case FastSync: case d.blockchain != nil && d.mode == FastSync:
current = d.blockchain.CurrentFastBlock().NumberU64() current = d.blockchain.CurrentFastBlock().NumberU64()
case LightSync: case d.lightchain != nil:
current = d.lightchain.CurrentHeader().Number.Uint64() current = d.lightchain.CurrentHeader().Number.Uint64()
default:
log.Error("Unknown downloader chain/mode combo", "light", d.lightchain != nil, "full", d.blockchain != nil, "mode", d.mode)
} }
return ethereum.SyncProgress{ return ethereum.SyncProgress{
StartingBlock: d.syncStatsChainOrigin, StartingBlock: d.syncStatsChainOrigin,
@ -363,6 +368,12 @@ func (d *Downloader) synchronise(id string, hash common.Hash, td *big.Int, mode
if atomic.CompareAndSwapInt32(&d.notified, 0, 1) { if atomic.CompareAndSwapInt32(&d.notified, 0, 1) {
log.Info("Block synchronisation started") log.Info("Block synchronisation started")
} }
// If we are already full syncing, but have a fast-sync bloom filter laying
// around, make sure it does't use memory any more. This is a special case
// when the user attempts to fast sync a new empty network.
if mode == FullSync && d.stateBloom != nil {
d.stateBloom.Close()
}
// Reset the queue, peer set and wake channels to clean any internal leftover state // Reset the queue, peer set and wake channels to clean any internal leftover state
d.queue.Reset() d.queue.Reset()
d.peers.Reset() d.peers.Reset()
@ -1662,6 +1673,8 @@ func (d *Downloader) commitFastSyncData(results []*fetchResult, stateSync *state
func (d *Downloader) commitPivotBlock(result *fetchResult) error { func (d *Downloader) commitPivotBlock(result *fetchResult) error {
block := types.NewBlockWithHeader(result.Header).WithBody(result.Transactions, result.Uncles) block := types.NewBlockWithHeader(result.Header).WithBody(result.Transactions, result.Uncles)
log.Debug("Committing fast sync pivot as new head", "number", block.Number(), "hash", block.Hash()) log.Debug("Committing fast sync pivot as new head", "number", block.Number(), "hash", block.Hash())
// Commit the pivot block as the new head, will require full sync from here on
if _, err := d.blockchain.InsertReceiptChain([]*types.Block{block}, []types.Receipts{result.Receipts}); err != nil { if _, err := d.blockchain.InsertReceiptChain([]*types.Block{block}, []types.Receipts{result.Receipts}); err != nil {
return err return err
} }
@ -1669,6 +1682,15 @@ func (d *Downloader) commitPivotBlock(result *fetchResult) error {
return err return err
} }
atomic.StoreInt32(&d.committed, 1) atomic.StoreInt32(&d.committed, 1)
// If we had a bloom filter for the state sync, deallocate it now. Note, we only
// deallocate internally, but keep the empty wrapper. This ensures that if we do
// a rollback after committing the pivot and restarting fast sync, we don't end
// up using a nil bloom. Empty bloom is fine, it just returns that it does not
// have the info we need, so reach down to the database instead.
if d.stateBloom != nil {
d.stateBloom.Close()
}
return nil return nil
} }

@ -75,7 +75,7 @@ func newTester() *downloadTester {
tester.stateDb = rawdb.NewMemoryDatabase() tester.stateDb = rawdb.NewMemoryDatabase()
tester.stateDb.Put(testGenesis.Root().Bytes(), []byte{0x00}) tester.stateDb.Put(testGenesis.Root().Bytes(), []byte{0x00})
tester.downloader = New(FullSync, 0, tester.stateDb, new(event.TypeMux), tester, nil, tester.dropPeer) tester.downloader = New(0, tester.stateDb, trie.NewSyncBloom(1, tester.stateDb), new(event.TypeMux), tester, nil, tester.dropPeer)
return tester return tester
} }

@ -59,6 +59,7 @@ type stateSyncStats struct {
// syncState starts downloading state with the given root hash. // syncState starts downloading state with the given root hash.
func (d *Downloader) syncState(root common.Hash) *stateSync { func (d *Downloader) syncState(root common.Hash) *stateSync {
// Create the state sync
s := newStateSync(d, root) s := newStateSync(d, root)
select { select {
case d.stateSyncStart <- s: case d.stateSyncStart <- s:
@ -239,7 +240,7 @@ type stateTask struct {
func newStateSync(d *Downloader, root common.Hash) *stateSync { func newStateSync(d *Downloader, root common.Hash) *stateSync {
return &stateSync{ return &stateSync{
d: d, d: d,
sched: state.NewStateSync(root, d.stateDB), sched: state.NewStateSync(root, d.stateDB, d.stateBloom),
keccak: sha3.NewLegacyKeccak256(), keccak: sha3.NewLegacyKeccak256(),
tasks: make(map[common.Hash]*stateTask), tasks: make(map[common.Hash]*stateTask),
deliver: make(chan *stateReq), deliver: make(chan *stateReq),

@ -39,6 +39,7 @@ import (
"github.com/ethereum/go-ethereum/p2p/enode" "github.com/ethereum/go-ethereum/p2p/enode"
"github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/params"
"github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
) )
const ( const (
@ -105,7 +106,7 @@ type ProtocolManager struct {
// NewProtocolManager returns a new Ethereum sub protocol manager. The Ethereum sub protocol manages peers capable // NewProtocolManager returns a new Ethereum sub protocol manager. The Ethereum sub protocol manages peers capable
// with the Ethereum network. // with the Ethereum network.
func NewProtocolManager(config *params.ChainConfig, mode downloader.SyncMode, networkID uint64, mux *event.TypeMux, txpool txPool, engine consensus.Engine, blockchain *core.BlockChain, chaindb ethdb.Database, whitelist map[uint64]common.Hash) (*ProtocolManager, error) { func NewProtocolManager(config *params.ChainConfig, mode downloader.SyncMode, networkID uint64, mux *event.TypeMux, txpool txPool, engine consensus.Engine, blockchain *core.BlockChain, chaindb ethdb.Database, cacheLimit int, whitelist map[uint64]common.Hash) (*ProtocolManager, error) {
// Create the protocol manager with the base fields // Create the protocol manager with the base fields
manager := &ProtocolManager{ manager := &ProtocolManager{
networkID: networkID, networkID: networkID,
@ -120,12 +121,8 @@ func NewProtocolManager(config *params.ChainConfig, mode downloader.SyncMode, ne
txsyncCh: make(chan *txsync), txsyncCh: make(chan *txsync),
quitSync: make(chan struct{}), quitSync: make(chan struct{}),
} }
// Figure out whether to allow fast sync or not // If fast sync was requested and our database is empty, grant it
if mode == downloader.FastSync && blockchain.CurrentBlock().NumberU64() > 0 { if mode == downloader.FastSync && blockchain.CurrentBlock().NumberU64() == 0 {
log.Warn("Blockchain not empty, fast sync disabled")
mode = downloader.FullSync
}
if mode == downloader.FastSync {
manager.fastSync = uint32(1) manager.fastSync = uint32(1)
} }
// If we have trusted checkpoints, enforce them on the chain // If we have trusted checkpoints, enforce them on the chain
@ -137,7 +134,8 @@ func NewProtocolManager(config *params.ChainConfig, mode downloader.SyncMode, ne
manager.SubProtocols = make([]p2p.Protocol, 0, len(ProtocolVersions)) manager.SubProtocols = make([]p2p.Protocol, 0, len(ProtocolVersions))
for i, version := range ProtocolVersions { for i, version := range ProtocolVersions {
// Skip protocol version if incompatible with the mode of operation // Skip protocol version if incompatible with the mode of operation
if mode == downloader.FastSync && version < eth63 { // TODO(karalabe): hard-drop eth/62 from the code base
if atomic.LoadUint32(&manager.fastSync) == 1 && version < eth63 {
continue continue
} }
// Compatible; initialise the sub-protocol // Compatible; initialise the sub-protocol
@ -171,9 +169,16 @@ func NewProtocolManager(config *params.ChainConfig, mode downloader.SyncMode, ne
if len(manager.SubProtocols) == 0 { if len(manager.SubProtocols) == 0 {
return nil, errIncompatibleConfig return nil, errIncompatibleConfig
} }
// Construct the different synchronisation mechanisms // Construct the downloader (long sync) and its backing state bloom if fast
manager.downloader = downloader.New(mode, manager.checkpointNumber, chaindb, manager.eventMux, blockchain, nil, manager.removePeer) // sync is requested. The downloader is responsible for deallocating the state
// bloom when it's done.
var stateBloom *trie.SyncBloom
if atomic.LoadUint32(&manager.fastSync) == 1 {
stateBloom = trie.NewSyncBloom(uint64(cacheLimit), chaindb)
}
manager.downloader = downloader.New(manager.checkpointNumber, chaindb, stateBloom, manager.eventMux, blockchain, nil, manager.removePeer)
// Construct the fetcher (short sync)
validator := func(header *types.Header) error { validator := func(header *types.Header) error {
return engine.VerifyHeader(blockchain, header, true) return engine.VerifyHeader(blockchain, header, true)
} }

@ -528,7 +528,7 @@ func testCheckpointChallenge(t *testing.T, syncmode downloader.SyncMode, checkpo
if err != nil { if err != nil {
t.Fatalf("failed to create new blockchain: %v", err) t.Fatalf("failed to create new blockchain: %v", err)
} }
pm, err := NewProtocolManager(config, syncmode, DefaultConfig.NetworkId, new(event.TypeMux), new(testTxPool), ethash.NewFaker(), blockchain, db, nil) pm, err := NewProtocolManager(config, syncmode, DefaultConfig.NetworkId, new(event.TypeMux), new(testTxPool), ethash.NewFaker(), blockchain, db, 1, nil)
if err != nil { if err != nil {
t.Fatalf("failed to start test protocol manager: %v", err) t.Fatalf("failed to start test protocol manager: %v", err)
} }
@ -615,7 +615,7 @@ func testBroadcastBlock(t *testing.T, totalPeers, broadcastExpected int) {
if err != nil { if err != nil {
t.Fatalf("failed to create new blockchain: %v", err) t.Fatalf("failed to create new blockchain: %v", err)
} }
pm, err := NewProtocolManager(config, downloader.FullSync, DefaultConfig.NetworkId, evmux, new(testTxPool), pow, blockchain, db, nil) pm, err := NewProtocolManager(config, downloader.FullSync, DefaultConfig.NetworkId, evmux, new(testTxPool), pow, blockchain, db, 1, nil)
if err != nil { if err != nil {
t.Fatalf("failed to start test protocol manager: %v", err) t.Fatalf("failed to start test protocol manager: %v", err)
} }

@ -66,8 +66,7 @@ func newTestProtocolManager(mode downloader.SyncMode, blocks int, generator func
if _, err := blockchain.InsertChain(chain); err != nil { if _, err := blockchain.InsertChain(chain); err != nil {
panic(err) panic(err)
} }
pm, err := NewProtocolManager(gspec.Config, mode, DefaultConfig.NetworkId, evmux, &testTxPool{added: newtx}, engine, blockchain, db, 1, nil)
pm, err := NewProtocolManager(gspec.Config, mode, DefaultConfig.NetworkId, evmux, &testTxPool{added: newtx}, engine, blockchain, db, nil)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }

@ -55,6 +55,11 @@ type Iteratee interface {
// contained within the key-value database. // contained within the key-value database.
NewIterator() Iterator NewIterator() Iterator
// NewIteratorWithStart creates a binary-alphabetical iterator over a subset of
// database content starting at a particular initial key (or after, if it does
// not exist).
NewIteratorWithStart(start []byte) Iterator
// NewIteratorWithPrefix creates a binary-alphabetical iterator over a subset // NewIteratorWithPrefix creates a binary-alphabetical iterator over a subset
// of database content with a particular key prefix. // of database content with a particular key prefix.
NewIteratorWithPrefix(prefix []byte) Iterator NewIteratorWithPrefix(prefix []byte) Iterator

@ -175,7 +175,14 @@ func (db *Database) NewBatch() ethdb.Batch {
// NewIterator creates a binary-alphabetical iterator over the entire keyspace // NewIterator creates a binary-alphabetical iterator over the entire keyspace
// contained within the leveldb database. // contained within the leveldb database.
func (db *Database) NewIterator() ethdb.Iterator { func (db *Database) NewIterator() ethdb.Iterator {
return db.NewIteratorWithPrefix(nil) return db.db.NewIterator(new(util.Range), nil)
}
// NewIteratorWithStart creates a binary-alphabetical iterator over a subset of
// database content starting at a particular initial key (or after, if it does
// not exist).
func (db *Database) NewIteratorWithStart(start []byte) ethdb.Iterator {
return db.db.NewIterator(&util.Range{Start: start}, nil)
} }
// NewIteratorWithPrefix creates a binary-alphabetical iterator over a subset // NewIteratorWithPrefix creates a binary-alphabetical iterator over a subset

@ -132,7 +132,36 @@ func (db *Database) NewBatch() ethdb.Batch {
// NewIterator creates a binary-alphabetical iterator over the entire keyspace // NewIterator creates a binary-alphabetical iterator over the entire keyspace
// contained within the memory database. // contained within the memory database.
func (db *Database) NewIterator() ethdb.Iterator { func (db *Database) NewIterator() ethdb.Iterator {
return db.NewIteratorWithPrefix(nil) return db.NewIteratorWithStart(nil)
}
// NewIteratorWithStart creates a binary-alphabetical iterator over a subset of
// database content starting at a particular initial key (or after, if it does
// not exist).
func (db *Database) NewIteratorWithStart(start []byte) ethdb.Iterator {
db.lock.RLock()
defer db.lock.RUnlock()
var (
st = string(start)
keys = make([]string, 0, len(db.db))
values = make([][]byte, 0, len(db.db))
)
// Collect the keys from the memory database corresponding to the given start
for key := range db.db {
if key >= st {
keys = append(keys, key)
}
}
// Sort the items and retrieve the associated values
sort.Strings(keys)
for _, key := range keys {
values = append(values, db.db[key])
}
return &iterator{
keys: keys,
values: values,
}
} }
// NewIteratorWithPrefix creates a binary-alphabetical iterator over a subset // NewIteratorWithPrefix creates a binary-alphabetical iterator over a subset

@ -179,7 +179,7 @@ func NewProtocolManager(
if cht, ok := params.TrustedCheckpoints[blockchain.Genesis().Hash()]; ok { if cht, ok := params.TrustedCheckpoints[blockchain.Genesis().Hash()]; ok {
checkpoint = (cht.SectionIndex+1)*params.CHTFrequency - 1 checkpoint = (cht.SectionIndex+1)*params.CHTFrequency - 1
} }
manager.downloader = downloader.New(downloader.LightSync, checkpoint, chainDb, manager.eventMux, nil, blockchain, removePeer) manager.downloader = downloader.New(checkpoint, chainDb, nil, manager.eventMux, nil, blockchain, removePeer)
manager.peers.notify((*downloaderPeerNotify)(manager)) manager.peers.notify((*downloaderPeerNotify)(manager))
manager.fetcher = newLightFetcher(manager) manager.fetcher = newLightFetcher(manager)
} }

@ -76,15 +76,17 @@ type Sync struct {
membatch *syncMemBatch // Memory buffer to avoid frequent database writes membatch *syncMemBatch // Memory buffer to avoid frequent database writes
requests map[common.Hash]*request // Pending requests pertaining to a key hash requests map[common.Hash]*request // Pending requests pertaining to a key hash
queue *prque.Prque // Priority queue with the pending requests queue *prque.Prque // Priority queue with the pending requests
bloom *SyncBloom // Bloom filter for fast node existence checks
} }
// NewSync creates a new trie data download scheduler. // NewSync creates a new trie data download scheduler.
func NewSync(root common.Hash, database ethdb.Reader, callback LeafCallback) *Sync { func NewSync(root common.Hash, database ethdb.Reader, callback LeafCallback, bloom *SyncBloom) *Sync {
ts := &Sync{ ts := &Sync{
database: database, database: database,
membatch: newSyncMemBatch(), membatch: newSyncMemBatch(),
requests: make(map[common.Hash]*request), requests: make(map[common.Hash]*request),
queue: prque.New(nil), queue: prque.New(nil),
bloom: bloom,
} }
ts.AddSubTrie(root, 0, common.Hash{}, callback) ts.AddSubTrie(root, 0, common.Hash{}, callback)
return ts return ts
@ -99,10 +101,14 @@ func (s *Sync) AddSubTrie(root common.Hash, depth int, parent common.Hash, callb
if _, ok := s.membatch.batch[root]; ok { if _, ok := s.membatch.batch[root]; ok {
return return
} }
key := root.Bytes() if s.bloom.Contains(root[:]) {
blob, _ := s.database.Get(key) // Bloom filter says this might be a duplicate, double check
if local, err := decodeNode(key, blob); local != nil && err == nil { blob, _ := s.database.Get(root[:])
return if local, err := decodeNode(root[:], blob); local != nil && err == nil {
return
}
// False positive, bump fault meter
bloomFaultMeter.Mark(1)
} }
// Assemble the new sub-trie sync request // Assemble the new sub-trie sync request
req := &request{ req := &request{
@ -134,8 +140,13 @@ func (s *Sync) AddRawEntry(hash common.Hash, depth int, parent common.Hash) {
if _, ok := s.membatch.batch[hash]; ok { if _, ok := s.membatch.batch[hash]; ok {
return return
} }
if ok, _ := s.database.Has(hash.Bytes()); ok { if s.bloom.Contains(hash[:]) {
return // Bloom filter says this might be a duplicate, double check
if ok, _ := s.database.Has(hash[:]); ok {
return
}
// False positive, bump fault meter
bloomFaultMeter.Mark(1)
} }
// Assemble the new sub-trie sync request // Assemble the new sub-trie sync request
req := &request{ req := &request{
@ -219,8 +230,9 @@ func (s *Sync) Commit(dbw ethdb.Writer) (int, error) {
if err := dbw.Put(key[:], s.membatch.batch[key]); err != nil { if err := dbw.Put(key[:], s.membatch.batch[key]); err != nil {
return i, err return i, err
} }
s.bloom.Add(key[:])
} }
written := len(s.membatch.order) written := len(s.membatch.order) // TODO(karalabe): could an order change improve write performance?
// Drop the membatch data and return // Drop the membatch data and return
s.membatch = newSyncMemBatch() s.membatch = newSyncMemBatch()
@ -292,8 +304,13 @@ func (s *Sync) children(req *request, object node) ([]*request, error) {
if _, ok := s.membatch.batch[hash]; ok { if _, ok := s.membatch.batch[hash]; ok {
continue continue
} }
if ok, _ := s.database.Has(node); ok { if s.bloom.Contains(node) {
continue // Bloom filter says this might be a duplicate, double check
if ok, _ := s.database.Has(node); ok {
continue
}
// False positive, bump fault meter
bloomFaultMeter.Mark(1)
} }
// Locally unknown node, schedule for retrieval // Locally unknown node, schedule for retrieval
requests = append(requests, &request{ requests = append(requests, &request{

207
trie/sync_bloom.go Normal file

@ -0,0 +1,207 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
"encoding/binary"
"fmt"
"math"
"sync"
"sync/atomic"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics"
"github.com/steakknife/bloomfilter"
)
var (
bloomAddMeter = metrics.NewRegisteredMeter("trie/bloom/add", nil)
bloomLoadMeter = metrics.NewRegisteredMeter("trie/bloom/load", nil)
bloomTestMeter = metrics.NewRegisteredMeter("trie/bloom/test", nil)
bloomMissMeter = metrics.NewRegisteredMeter("trie/bloom/miss", nil)
bloomFaultMeter = metrics.NewRegisteredMeter("trie/bloom/fault", nil)
bloomErrorGauge = metrics.NewRegisteredGauge("trie/bloom/error", nil)
)
// syncBloomHasher is a wrapper around a byte blob to satisfy the interface API
// requirements of the bloom library used. It's used to convert a trie hash into
// a 64 bit mini hash.
type syncBloomHasher []byte
func (f syncBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
func (f syncBloomHasher) Sum(b []byte) []byte { panic("not implemented") }
func (f syncBloomHasher) Reset() { panic("not implemented") }
func (f syncBloomHasher) BlockSize() int { panic("not implemented") }
func (f syncBloomHasher) Size() int { return 8 }
func (f syncBloomHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) }
// SyncBloom is a bloom filter used during fast sync to quickly decide if a trie
// node already exists on disk or not. It self populates from the provided disk
// database on creation in a background thread and will only start returning live
// results once that's finished.
type SyncBloom struct {
bloom *bloomfilter.Filter
inited uint32
closer sync.Once
closed uint32
pend sync.WaitGroup
}
// NewSyncBloom creates a new bloom filter of the given size (in megabytes) and
// initializes it from the database. The bloom is hard coded to use 3 filters.
func NewSyncBloom(memory uint64, database ethdb.Iteratee) *SyncBloom {
// Create the bloom filter to track known trie nodes
bloom, err := bloomfilter.New(memory*1024*1024*8, 3)
if err != nil {
panic(fmt.Sprintf("failed to create bloom: %v", err)) // Can't happen, here for sanity
}
log.Info("Allocated fast sync bloom", "size", common.StorageSize(memory*1024*1024))
// Assemble the fast sync bloom and init it from previous sessions
b := &SyncBloom{
bloom: bloom,
}
b.pend.Add(2)
go func() {
defer b.pend.Done()
b.init(database)
}()
go func() {
defer b.pend.Done()
b.meter()
}()
return b
}
// init iterates over the database, pushing every trie hash into the bloom filter.
func (b *SyncBloom) init(database ethdb.Iteratee) {
// Iterate over the database, but restart every now and again to avoid holding
// a persistent snapshot since fast sync can push a ton of data concurrently,
// bloating the disk.
//
// Note, this is fine, because everything inserted into leveldb by fast sync is
// also pushed into the bloom directly, so we're not missing anything when the
// iterator is swapped out for a new one.
it := database.NewIterator()
var (
start = time.Now()
swap = time.Now()
)
for it.Next() && atomic.LoadUint32(&b.closed) == 0 {
// If the database entry is a trie node, add it to the bloom
if key := it.Key(); len(key) == common.HashLength {
b.bloom.Add(syncBloomHasher(key))
bloomLoadMeter.Mark(1)
}
// If enough time elapsed since the last iterator swap, restart
if time.Since(swap) > 8*time.Second {
key := common.CopyBytes(it.Key())
it.Release()
it = database.NewIteratorWithStart(key)
log.Info("Initializing fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", time.Since(start))
swap = time.Now()
}
}
it.Release()
// Mark the bloom filter inited and return
log.Info("Initialized fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", time.Since(start))
atomic.StoreUint32(&b.inited, 1)
}
// meter periodically recalculates the false positive error rate of the bloom
// filter and reports it in a metric.
func (b *SyncBloom) meter() {
for {
// Report the current error ration. No floats, lame, scale it up.
bloomErrorGauge.Update(int64(b.errorRate() * 100000))
// Wait one second, but check termination more frequently
for i := 0; i < 10; i++ {
if atomic.LoadUint32(&b.closed) == 1 {
return
}
time.Sleep(100 * time.Millisecond)
}
}
}
// Close terminates any background initializer still running and releases all the
// memory allocated for the bloom.
func (b *SyncBloom) Close() error {
b.closer.Do(func() {
// Ensure the initializer is stopped
atomic.StoreUint32(&b.closed, 1)
b.pend.Wait()
// Wipe the bloom, but mark it "uninited" just in case someone attempts an access
log.Info("Deallocated fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate())
atomic.StoreUint32(&b.inited, 0)
b.bloom = nil
})
return nil
}
// Add inserts a new trie node hash into the bloom filter.
func (b *SyncBloom) Add(hash []byte) {
if atomic.LoadUint32(&b.closed) == 1 {
return
}
b.bloom.Add(syncBloomHasher(hash))
bloomAddMeter.Mark(1)
}
// Contains tests if the bloom filter contains the given hash:
// - false: the bloom definitely does not contain hash
// - true: the bloom maybe contains hash
//
// While the bloom is being initialized, any query will return true.
func (b *SyncBloom) Contains(hash []byte) bool {
bloomTestMeter.Mark(1)
if atomic.LoadUint32(&b.inited) == 0 {
// We didn't load all the trie nodes from the previous run of Geth yet. As
// such, we can't say for sure if a hash is not present for anything. Until
// the init is done, we're faking "possible presence" for everything.
return true
}
// Bloom initialized, check the real one and report any successful misses
maybe := b.bloom.Contains(syncBloomHasher(hash))
if !maybe {
bloomMissMeter.Mark(1)
}
return maybe
}
// errorRate calculates the probability of a random containment test returning a
// false positive.
//
// We're calculating it ourselves because the bloom library we used missed a
// parentheses in the formula and calculates it wrong. And it's discontinued...
func (b *SyncBloom) errorRate() float64 {
k := float64(b.bloom.K())
n := float64(b.bloom.N())
m := float64(b.bloom.M())
return math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k)
}

@ -94,7 +94,7 @@ func TestEmptySync(t *testing.T) {
emptyB, _ := New(emptyRoot, dbB) emptyB, _ := New(emptyRoot, dbB)
for i, trie := range []*Trie{emptyA, emptyB} { for i, trie := range []*Trie{emptyA, emptyB} {
if req := NewSync(trie.Hash(), memorydb.New(), nil).Missing(1); len(req) != 0 { if req := NewSync(trie.Hash(), memorydb.New(), nil, NewSyncBloom(1, memorydb.New())).Missing(1); len(req) != 0 {
t.Errorf("test %d: content requested for empty trie: %v", i, req) t.Errorf("test %d: content requested for empty trie: %v", i, req)
} }
} }
@ -112,7 +112,7 @@ func testIterativeSync(t *testing.T, batch int) {
// Create a destination trie and sync with the scheduler // Create a destination trie and sync with the scheduler
diskdb := memorydb.New() diskdb := memorydb.New()
triedb := NewDatabase(diskdb) triedb := NewDatabase(diskdb)
sched := NewSync(srcTrie.Hash(), diskdb, nil) sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb))
queue := append([]common.Hash{}, sched.Missing(batch)...) queue := append([]common.Hash{}, sched.Missing(batch)...)
for len(queue) > 0 { for len(queue) > 0 {
@ -145,7 +145,7 @@ func TestIterativeDelayedSync(t *testing.T) {
// Create a destination trie and sync with the scheduler // Create a destination trie and sync with the scheduler
diskdb := memorydb.New() diskdb := memorydb.New()
triedb := NewDatabase(diskdb) triedb := NewDatabase(diskdb)
sched := NewSync(srcTrie.Hash(), diskdb, nil) sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb))
queue := append([]common.Hash{}, sched.Missing(10000)...) queue := append([]common.Hash{}, sched.Missing(10000)...)
for len(queue) > 0 { for len(queue) > 0 {
@ -183,7 +183,7 @@ func testIterativeRandomSync(t *testing.T, batch int) {
// Create a destination trie and sync with the scheduler // Create a destination trie and sync with the scheduler
diskdb := memorydb.New() diskdb := memorydb.New()
triedb := NewDatabase(diskdb) triedb := NewDatabase(diskdb)
sched := NewSync(srcTrie.Hash(), diskdb, nil) sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb))
queue := make(map[common.Hash]struct{}) queue := make(map[common.Hash]struct{})
for _, hash := range sched.Missing(batch) { for _, hash := range sched.Missing(batch) {
@ -224,7 +224,7 @@ func TestIterativeRandomDelayedSync(t *testing.T) {
// Create a destination trie and sync with the scheduler // Create a destination trie and sync with the scheduler
diskdb := memorydb.New() diskdb := memorydb.New()
triedb := NewDatabase(diskdb) triedb := NewDatabase(diskdb)
sched := NewSync(srcTrie.Hash(), diskdb, nil) sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb))
queue := make(map[common.Hash]struct{}) queue := make(map[common.Hash]struct{})
for _, hash := range sched.Missing(10000) { for _, hash := range sched.Missing(10000) {
@ -271,7 +271,7 @@ func TestDuplicateAvoidanceSync(t *testing.T) {
// Create a destination trie and sync with the scheduler // Create a destination trie and sync with the scheduler
diskdb := memorydb.New() diskdb := memorydb.New()
triedb := NewDatabase(diskdb) triedb := NewDatabase(diskdb)
sched := NewSync(srcTrie.Hash(), diskdb, nil) sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb))
queue := append([]common.Hash{}, sched.Missing(0)...) queue := append([]common.Hash{}, sched.Missing(0)...)
requested := make(map[common.Hash]struct{}) requested := make(map[common.Hash]struct{})
@ -311,7 +311,7 @@ func TestIncompleteSync(t *testing.T) {
// Create a destination trie and sync with the scheduler // Create a destination trie and sync with the scheduler
diskdb := memorydb.New() diskdb := memorydb.New()
triedb := NewDatabase(diskdb) triedb := NewDatabase(diskdb)
sched := NewSync(srcTrie.Hash(), diskdb, nil) sched := NewSync(srcTrie.Hash(), diskdb, nil, NewSyncBloom(1, diskdb))
var added []common.Hash var added []common.Hash
queue := append([]common.Hash{}, sched.Missing(1)...) queue := append([]common.Hash{}, sched.Missing(1)...)

@ -0,0 +1,8 @@
The MIT License (MIT)
Copyright © 2014, 2015 Barry Allard
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

123
vendor/github.com/steakknife/bloomfilter/README.md generated vendored Normal file

@ -0,0 +1,123 @@
**Important**: Zeroth, [consider](https://bdupras.github.io/filter-tutorial/) if a [Cuckoo filter](https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) could be [right for your use-case](https://github.com/seiflotfy/cuckoofilter).
[![GoDoc](https://godoc.org/github.com/steakknife/bloomfilter?status.png)](https://godoc.org/github.com/steakknife/bloomfilter) [![travis](https://img.shields.io/travis/steakknife/bloomfilter.svg)](https://travis-ci.org/steakknife/bloomfilter)
# Face-meltingly fast, thread-safe, marshalable, unionable, probability- and optimal-size-calculating Bloom filter in go
Copyright © 2014-2016,2018 Barry Allard
[MIT license](MIT-LICENSE.txt)
## WTF is a bloom filter
**TL;DR: **Probabilistic, extra lookup table to track a set of elements kept elsewhere to reduce expensive, unnecessary set element retrieval and/or iterator operations **when an element is not present in the set.** It's a classic time-storage tradeoff algoritm.
### Properties
#### [See wikipedia](https://en.wikipedia.org/wiki/Bloom_filter) for algorithm details
|Impact|What|Description|
|---|---|---|
|Good|No false negatives|know for certain if a given element is definitely NOT in the set|
|Bad|False positives|uncertain if a given element is in the set|
|Bad|Theoretical potential for hash collisions|in very large systems and/or badly hash.Hash64-conforming implementations|
|Bad|Add only|Cannot remove an element, it would destroy information about other elements|
|Good|Constant storage|uses only a fixed amount of memory|
## Naming conventions
(Similar to algorithm)
|Variable/function|Description|Range|
|---|---|---|
|m/M()|number of bits in the bloom filter (memory representation is about m/8 bytes in size)|>=2|
|n/N()|number of elements present|>=0|
|k/K()|number of keys to use (keys are kept private to user code but are de/serialized to Marshal and file I/O)|>=0|
|maxN|maximum capacity of intended structure|>0|
|p|maximum allowed probability of collision (for computing m and k for optimal sizing)|>0..<1|
- Memory representation should be exactly `24 + 8*(k + (m+63)/64) + unsafe.Sizeof(RWMutex)` bytes.
- Serialized (`BinaryMarshaler`) representation should be exactly `72 + 8*(k + (m+63)/64)` bytes. (Disk format is less due to compression.)
## Binary serialization format
All values in Little-endian format
|Offset|Offset (Hex)|Length (bytes)|Name|Type|
|---|---|---|---|---|
|0|00|8|k|`uint64`|
|8|08|8|n|`uint64`|
|16|10|8|m|`uint64`|
|24|18|k|(keys)|`[k]uint64`|
|24+8*k|...|(m+63)/64|(bloom filter)|`[(m+63)/64]uint64`|
|24+8\*k+8\*((m+63)/64)|...|48|(SHA384 of all previous fields, hashed in order)|`[48]byte`|
- `bloomfilter.Filter` conforms to `encoding.BinaryMarshaler` and `encoding.BinaryUnmarshaler'
## Usage
```go
import "github.com/steakknife/bloomfilter"
const (
maxElements = 100000
probCollide = 0.0000001
)
bf, err := bloomfilter.NewOptimal(maxElements, probCollide)
if err != nil {
panic(err)
}
someValue := ... // must conform to hash.Hash64
bf.Add(someValue)
if bf.Contains(someValue) { // probably true, could be false
// whatever
}
anotherValue := ... // must also conform to hash.Hash64
if bf.Contains(anotherValue) {
panic("This should never happen")
}
err := bf.WriteFile("1.bf.gz") // saves this BF to a file
if err != nil {
panic(err)
}
bf2, err := bloomfilter.ReadFile("1.bf.gz") // read the BF to another var
if err != nil {
panic(err)
}
```
## Design
Where possible, branch-free operations are used to avoid deep pipeline / execution unit stalls on branch-misses.
## Get
go get -u github.com/steakknife/bloomfilter # master is always stable
## Source
- On the web: [https://github.com/steakknife/bloomfilter](https://github.com/steakknife/bloomfilter)
- Git: `git clone https://github.com/steakknife/bloomfilter`
## Contact
- [Feedback](mailto:barry.allard@gmail.com)
- [Issues](https://github.com/steakknife/bloomfilter/issues)
## License
[MIT license](MIT-LICENSE.txt)
Copyright © 2014-2016 Barry Allard

@ -0,0 +1,87 @@
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
package bloomfilter
import (
"bytes"
"crypto/sha512"
"encoding/binary"
)
// conforms to encoding.BinaryMarshaler
// marshalled binary layout (Little Endian):
//
// k 1 uint64
// n 1 uint64
// m 1 uint64
// keys [k]uint64
// bits [(m+63)/64]uint64
// hash sha384 (384 bits == 48 bytes)
//
// size = (3 + k + (m+63)/64) * 8 bytes
//
func (f *Filter) marshal() (buf *bytes.Buffer,
hash [sha512.Size384]byte,
err error,
) {
f.lock.RLock()
defer f.lock.RUnlock()
debug("write bf k=%d n=%d m=%d\n", f.K(), f.n, f.m)
buf = new(bytes.Buffer)
err = binary.Write(buf, binary.LittleEndian, f.K())
if err != nil {
return nil, hash, err
}
err = binary.Write(buf, binary.LittleEndian, f.n)
if err != nil {
return nil, hash, err
}
err = binary.Write(buf, binary.LittleEndian, f.m)
if err != nil {
return nil, hash, err
}
err = binary.Write(buf, binary.LittleEndian, f.keys)
if err != nil {
return nil, hash, err
}
err = binary.Write(buf, binary.LittleEndian, f.bits)
if err != nil {
return nil, hash, err
}
hash = sha512.Sum384(buf.Bytes())
err = binary.Write(buf, binary.LittleEndian, hash)
return buf, hash, err
}
// MarshalBinary converts a Filter into []bytes
func (f *Filter) MarshalBinary() (data []byte, err error) {
buf, hash, err := f.marshal()
if err != nil {
return nil, err
}
debug(
"bloomfilter.MarshalBinary: Successfully wrote %d byte(s), sha384 %v",
buf.Len(), hash,
)
data = buf.Bytes()
return data, nil
}

@ -0,0 +1,111 @@
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
package bloomfilter
import (
"bytes"
"crypto/hmac"
"crypto/sha512"
"encoding/binary"
"io"
)
func unmarshalBinaryHeader(r io.Reader) (k, n, m uint64, err error) {
err = binary.Read(r, binary.LittleEndian, &k)
if err != nil {
return k, n, m, err
}
if k < KMin {
return k, n, m, errK()
}
err = binary.Read(r, binary.LittleEndian, &n)
if err != nil {
return k, n, m, err
}
err = binary.Read(r, binary.LittleEndian, &m)
if err != nil {
return k, n, m, err
}
if m < MMin {
return k, n, m, errM()
}
debug("read bf k=%d n=%d m=%d\n", k, n, m)
return k, n, m, err
}
func unmarshalBinaryBits(r io.Reader, m uint64) (bits []uint64, err error) {
bits, err = newBits(m)
if err != nil {
return bits, err
}
err = binary.Read(r, binary.LittleEndian, bits)
return bits, err
}
func unmarshalBinaryKeys(r io.Reader, k uint64) (keys []uint64, err error) {
keys = make([]uint64, k)
err = binary.Read(r, binary.LittleEndian, keys)
return keys, err
}
func checkBinaryHash(r io.Reader, data []byte) (err error) {
expectedHash := make([]byte, sha512.Size384)
err = binary.Read(r, binary.LittleEndian, expectedHash)
if err != nil {
return err
}
actualHash := sha512.Sum384(data[:len(data)-sha512.Size384])
if !hmac.Equal(expectedHash, actualHash[:]) {
debug("bloomfilter.UnmarshalBinary() sha384 hash failed:"+
" actual %v expected %v", actualHash, expectedHash)
return errHash()
}
debug("bloomfilter.UnmarshalBinary() successfully read"+
" %d byte(s), sha384 %v", len(data), actualHash)
return nil
}
// UnmarshalBinary converts []bytes into a Filter
// conforms to encoding.BinaryUnmarshaler
func (f *Filter) UnmarshalBinary(data []byte) (err error) {
f.lock.Lock()
defer f.lock.Unlock()
buf := bytes.NewBuffer(data)
var k uint64
k, f.n, f.m, err = unmarshalBinaryHeader(buf)
if err != nil {
return err
}
f.keys, err = unmarshalBinaryKeys(buf, k)
if err != nil {
return err
}
f.bits, err = unmarshalBinaryBits(buf, f.m)
if err != nil {
return err
}
return checkBinaryHash(buf, data)
}

123
vendor/github.com/steakknife/bloomfilter/bloomfilter.go generated vendored Normal file

@ -0,0 +1,123 @@
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
package bloomfilter
import (
"hash"
"sync"
)
// Filter is an opaque Bloom filter type
type Filter struct {
lock sync.RWMutex
bits []uint64
keys []uint64
m uint64 // number of bits the "bits" field should recognize
n uint64 // number of inserted elements
}
// Hashable -> hashes
func (f *Filter) hash(v hash.Hash64) []uint64 {
rawHash := v.Sum64()
n := len(f.keys)
hashes := make([]uint64, n)
for i := 0; i < n; i++ {
hashes[i] = rawHash ^ f.keys[i]
}
return hashes
}
// M is the size of Bloom filter, in bits
func (f *Filter) M() uint64 {
return f.m
}
// K is the count of keys
func (f *Filter) K() uint64 {
return uint64(len(f.keys))
}
// Add a hashable item, v, to the filter
func (f *Filter) Add(v hash.Hash64) {
f.lock.Lock()
defer f.lock.Unlock()
for _, i := range f.hash(v) {
// f.setBit(i)
i %= f.m
f.bits[i>>6] |= 1 << uint(i&0x3f)
}
f.n++
}
// Contains tests if f contains v
// false: f definitely does not contain value v
// true: f maybe contains value v
func (f *Filter) Contains(v hash.Hash64) bool {
f.lock.RLock()
defer f.lock.RUnlock()
r := uint64(1)
for _, i := range f.hash(v) {
// r |= f.getBit(k)
i %= f.m
r &= (f.bits[i>>6] >> uint(i&0x3f)) & 1
}
return uint64ToBool(r)
}
// Copy f to a new Bloom filter
func (f *Filter) Copy() (*Filter, error) {
f.lock.RLock()
defer f.lock.RUnlock()
out, err := f.NewCompatible()
if err != nil {
return nil, err
}
copy(out.bits, f.bits)
out.n = f.n
return out, nil
}
// UnionInPlace merges Bloom filter f2 into f
func (f *Filter) UnionInPlace(f2 *Filter) error {
if !f.IsCompatible(f2) {
return errIncompatibleBloomFilters()
}
f.lock.Lock()
defer f.lock.Unlock()
for i, bitword := range f2.bits {
f.bits[i] |= bitword
}
return nil
}
// Union merges f2 and f2 into a new Filter out
func (f *Filter) Union(f2 *Filter) (out *Filter, err error) {
if !f.IsCompatible(f2) {
return nil, errIncompatibleBloomFilters()
}
f.lock.RLock()
defer f.lock.RUnlock()
out, err = f.NewCompatible()
if err != nil {
return nil, err
}
for i, bitword := range f2.bits {
out.bits[i] = f.bits[i] | bitword
}
return out, nil
}

@ -0,0 +1,29 @@
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
package bloomfilter
import (
"encoding"
"encoding/gob"
"io"
)
// compile-time conformance tests
var (
_ encoding.BinaryMarshaler = (*Filter)(nil)
_ encoding.BinaryUnmarshaler = (*Filter)(nil)
_ encoding.TextMarshaler = (*Filter)(nil)
_ encoding.TextUnmarshaler = (*Filter)(nil)
_ io.ReaderFrom = (*Filter)(nil)
_ io.WriterTo = (*Filter)(nil)
_ gob.GobDecoder = (*Filter)(nil)
_ gob.GobEncoder = (*Filter)(nil)
)

37
vendor/github.com/steakknife/bloomfilter/debug.go generated vendored Normal file

@ -0,0 +1,37 @@
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
package bloomfilter
import (
"log"
"os"
)
const debugVar = "GOLANG_STEAKKNIFE_BLOOMFILTER_DEBUG"
// EnableDebugging permits debug() logging of details to stderr
func EnableDebugging() {
err := os.Setenv(debugVar, "1")
if err != nil {
panic("Unable to Setenv " + debugVar)
}
}
func debugging() bool {
return os.Getenv(debugVar) != ""
}
// debug printing when debugging() is true
func debug(format string, a ...interface{}) {
if debugging() {
log.Printf(format, a...)
}
}

34
vendor/github.com/steakknife/bloomfilter/errors.go generated vendored Normal file

@ -0,0 +1,34 @@
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
package bloomfilter
import "fmt"
func errHash() error {
return fmt.Errorf(
"Hash mismatch, the Bloom filter is probably corrupt")
}
func errK() error {
return fmt.Errorf(
"keys must have length %d or greater", KMin)
}
func errM() error {
return fmt.Errorf(
"m (number of bits in the Bloom filter) must be >= %d", MMin)
}
func errUniqueKeys() error {
return fmt.Errorf(
"Bloom filter keys must be unique")
}
func errIncompatibleBloomFilters() error {
return fmt.Errorf(
"Cannot perform union on two incompatible Bloom filters")
}

105
vendor/github.com/steakknife/bloomfilter/fileio.go generated vendored Normal file

@ -0,0 +1,105 @@
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
package bloomfilter
import (
"compress/gzip"
"io"
"io/ioutil"
"os"
)
// ReadFrom r and overwrite f with new Bloom filter data
func (f *Filter) ReadFrom(r io.Reader) (n int64, err error) {
f2, n, err := ReadFrom(r)
if err != nil {
return -1, err
}
f.lock.Lock()
defer f.lock.Unlock()
f.m = f2.m
f.n = f2.n
f.bits = f2.bits
f.keys = f2.keys
return n, nil
}
// ReadFrom Reader r into a lossless-compressed Bloom filter f
func ReadFrom(r io.Reader) (f *Filter, n int64, err error) {
rawR, err := gzip.NewReader(r)
if err != nil {
return nil, -1, err
}
defer func() {
err = rawR.Close()
}()
content, err := ioutil.ReadAll(rawR)
if err != nil {
return nil, -1, err
}
f = new(Filter)
n = int64(len(content))
err = f.UnmarshalBinary(content)
if err != nil {
return nil, -1, err
}
return f, n, nil
}
// ReadFile from filename into a lossless-compressed Bloom Filter f
// Suggested file extension: .bf.gz
func ReadFile(filename string) (f *Filter, n int64, err error) {
r, err := os.Open(filename)
if err != nil {
return nil, -1, err
}
defer func() {
err = r.Close()
}()
return ReadFrom(r)
}
// WriteTo a Writer w from lossless-compressed Bloom Filter f
func (f *Filter) WriteTo(w io.Writer) (n int64, err error) {
f.lock.RLock()
defer f.lock.RUnlock()
rawW := gzip.NewWriter(w)
defer func() {
err = rawW.Close()
}()
content, err := f.MarshalBinary()
if err != nil {
return -1, err
}
intN, err := rawW.Write(content)
n = int64(intN)
return n, err
}
// WriteFile filename from a a lossless-compressed Bloom Filter f
// Suggested file extension: .bf.gz
func (f *Filter) WriteFile(filename string) (n int64, err error) {
w, err := os.Create(filename)
if err != nil {
return -1, err
}
defer func() {
err = w.Close()
}()
return f.WriteTo(w)
}

23
vendor/github.com/steakknife/bloomfilter/gob.go generated vendored Normal file

@ -0,0 +1,23 @@
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
package bloomfilter
import _ "encoding/gob" // make sure gob is available
// GobDecode conforms to interface gob.GobDecoder
func (f *Filter) GobDecode(data []byte) error {
return f.UnmarshalBinary(data)
}
// GobEncode conforms to interface gob.GobEncoder
func (f *Filter) GobEncode() ([]byte, error) {
return f.MarshalBinary()
}

@ -0,0 +1,41 @@
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
package bloomfilter
import "unsafe"
func uint64ToBool(x uint64) bool {
return *(*bool)(unsafe.Pointer(&x)) // #nosec
}
// returns 0 if equal, does not compare len(b0) with len(b1)
func noBranchCompareUint64s(b0, b1 []uint64) uint64 {
r := uint64(0)
for i, b0i := range b0 {
r |= b0i ^ b1[i]
}
return r
}
// IsCompatible is true if f and f2 can be Union()ed together
func (f *Filter) IsCompatible(f2 *Filter) bool {
f.lock.RLock()
defer f.lock.RUnlock()
f.lock.RLock()
defer f2.lock.RUnlock()
// 0 is true, non-0 is false
compat := f.M() ^ f2.M()
compat |= f.K() ^ f2.K()
compat |= noBranchCompareUint64s(f.keys, f2.keys)
return uint64ToBool(^compat)
}

134
vendor/github.com/steakknife/bloomfilter/new.go generated vendored Normal file

@ -0,0 +1,134 @@
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
package bloomfilter
import (
"crypto/rand"
"encoding/binary"
"log"
)
const (
// MMin is the minimum Bloom filter bits count
MMin = 2
// KMin is the minimum number of keys
KMin = 1
// Uint64Bytes is the number of bytes in type uint64
Uint64Bytes = 8
)
// New Filter with CSPRNG keys
//
// m is the size of the Bloom filter, in bits, >= 2
//
// k is the number of random keys, >= 1
func New(m, k uint64) (*Filter, error) {
return NewWithKeys(m, newRandKeys(k))
}
func newRandKeys(k uint64) []uint64 {
keys := make([]uint64, k)
err := binary.Read(rand.Reader, binary.LittleEndian, keys)
if err != nil {
log.Panicf(
"Cannot read %d bytes from CSRPNG crypto/rand.Read (err=%v)",
Uint64Bytes, err,
)
}
return keys
}
// NewCompatible Filter compatible with f
func (f *Filter) NewCompatible() (*Filter, error) {
return NewWithKeys(f.m, f.keys)
}
// NewOptimal Bloom filter with random CSPRNG keys
func NewOptimal(maxN uint64, p float64) (*Filter, error) {
m := OptimalM(maxN, p)
k := OptimalK(m, maxN)
debug("New optimal bloom filter ::"+
" requested max elements (n):%d,"+
" probability of collision (p):%1.10f "+
"-> recommends -> bits (m): %d (%f GiB), "+
"number of keys (k): %d",
maxN, p, m, float64(m)/(gigabitsPerGiB), k)
return New(m, k)
}
// UniqueKeys is true if all keys are unique
func UniqueKeys(keys []uint64) bool {
for j := 0; j < len(keys)-1; j++ {
elem := keys[j]
for i := 1; i < j; i++ {
if keys[i] == elem {
return false
}
}
}
return true
}
// NewWithKeys creates a new Filter from user-supplied origKeys
func NewWithKeys(m uint64, origKeys []uint64) (f *Filter, err error) {
bits, err := newBits(m)
if err != nil {
return nil, err
}
keys, err := newKeysCopy(origKeys)
if err != nil {
return nil, err
}
return &Filter{
m: m,
n: 0,
bits: bits,
keys: keys,
}, nil
}
func newBits(m uint64) ([]uint64, error) {
if m < MMin {
return nil, errM()
}
return make([]uint64, (m+63)/64), nil
}
func newKeysBlank(k uint64) ([]uint64, error) {
if k < KMin {
return nil, errK()
}
return make([]uint64, k), nil
}
func newKeysCopy(origKeys []uint64) (keys []uint64, err error) {
if !UniqueKeys(origKeys) {
return nil, errUniqueKeys()
}
keys, err = newKeysBlank(uint64(len(origKeys)))
if err != nil {
return keys, err
}
copy(keys, origKeys)
return keys, err
}
func newWithKeysAndBits(m uint64, keys []uint64, bits []uint64, n uint64) (
f *Filter, err error,
) {
f, err = NewWithKeys(m, keys)
if err != nil {
return nil, err
}
copy(f.bits, bits)
f.n = n
return f, nil
}

28
vendor/github.com/steakknife/bloomfilter/optimal.go generated vendored Normal file

@ -0,0 +1,28 @@
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
package bloomfilter
import "math"
const gigabitsPerGiB float64 = 8.0 * 1024 * 1024 * 1024
// OptimalK calculates the optimal k value for creating a new Bloom filter
// maxn is the maximum anticipated number of elements
func OptimalK(m, maxN uint64) uint64 {
return uint64(math.Ceil(float64(m) * math.Ln2 / float64(maxN)))
}
// OptimalM calculates the optimal m value for creating a new Bloom filter
// p is the desired false positive probability
// optimal m = ceiling( - n * ln(p) / ln(2)**2 )
func OptimalM(maxN uint64, p float64) uint64 {
return uint64(math.Ceil(-float64(maxN) * math.Log(p) / (math.Ln2 * math.Ln2)))
}

43
vendor/github.com/steakknife/bloomfilter/statistics.go generated vendored Normal file

@ -0,0 +1,43 @@
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
package bloomfilter
import (
"math"
"github.com/steakknife/hamming"
)
// PreciseFilledRatio is an exhaustive count # of 1's
func (f *Filter) PreciseFilledRatio() float64 {
f.lock.RLock()
defer f.lock.RUnlock()
return float64(hamming.CountBitsUint64s(f.bits)) / float64(f.M())
}
// N is how many elements have been inserted
// (actually, how many Add()s have been performed?)
func (f *Filter) N() uint64 {
f.lock.RLock()
defer f.lock.RUnlock()
return f.n
}
// FalsePosititveProbability is the upper-bound probability of false positives
// (1 - exp(-k*(n+0.5)/(m-1))) ** k
func (f *Filter) FalsePosititveProbability() float64 {
k := float64(f.K())
n := float64(f.N())
m := float64(f.M())
return math.Pow(1.0-math.Exp(-k)*(n+0.5)/(m-1), k)
}

@ -0,0 +1,49 @@
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
package bloomfilter
import "fmt"
// MarshalText conforms to encoding.TextMarshaler
func (f *Filter) MarshalText() (text []byte, err error) {
f.lock.RLock()
defer f.lock.RUnlock()
s := fmt.Sprintln("k")
s += fmt.Sprintln(f.K())
s += fmt.Sprintln("n")
s += fmt.Sprintln(f.n)
s += fmt.Sprintln("m")
s += fmt.Sprintln(f.m)
s += fmt.Sprintln("keys")
for key := range f.keys {
s += fmt.Sprintf(keyFormat, key) + nl()
}
s += fmt.Sprintln("bits")
for w := range f.bits {
s += fmt.Sprintf(bitsFormat, w) + nl()
}
_, hash, err := f.marshal()
if err != nil {
return nil, err
}
s += fmt.Sprintln("sha384")
for b := range hash {
s += fmt.Sprintf("%02x", b)
}
s += nl()
text = []byte(s)
return text, nil
}

@ -0,0 +1,150 @@
// Package bloomfilter is face-meltingly fast, thread-safe,
// marshalable, unionable, probability- and
// optimal-size-calculating Bloom filter in go
//
// https://github.com/steakknife/bloomfilter
//
// Copyright © 2014, 2015, 2018 Barry Allard
//
// MIT license
//
package bloomfilter
import (
"bytes"
"crypto/hmac"
"crypto/sha512"
"fmt"
"io"
)
const (
keyFormat = "%016x"
bitsFormat = "%016x"
)
func nl() string {
return fmt.Sprintln()
}
func unmarshalTextHeader(r io.Reader) (k, n, m uint64, err error) {
format := "k" + nl() + "%d" + nl()
format += "n" + nl() + "%d" + nl()
format += "m" + nl() + "%d" + nl()
format += "keys" + nl()
_, err = fmt.Fscanf(r, format, k, n, m)
return k, n, m, err
}
func unmarshalTextKeys(r io.Reader, keys []uint64) (err error) {
for i := range keys {
_, err = fmt.Fscanf(r, keyFormat, keys[i])
if err != nil {
return err
}
}
return nil
}
func unmarshalTextBits(r io.Reader, bits []uint64) (err error) {
_, err = fmt.Fscanf(r, "bits")
if err != nil {
return err
}
for i := range bits {
_, err = fmt.Fscanf(r, bitsFormat, bits[i])
if err != nil {
return err
}
}
return nil
}
func unmarshalAndCheckTextHash(r io.Reader, f *Filter) (err error) {
_, err = fmt.Fscanf(r, "sha384")
if err != nil {
return err
}
actualHash := [sha512.Size384]byte{}
for i := range actualHash {
_, err = fmt.Fscanf(r, "%02x", actualHash[i])
if err != nil {
return err
}
}
_, expectedHash, err := f.marshal()
if err != nil {
return err
}
if !hmac.Equal(expectedHash[:], actualHash[:]) {
return errHash()
}
return nil
}
// UnmarshalText conforms to TextUnmarshaler
func UnmarshalText(text []byte) (f *Filter, err error) {
r := bytes.NewBuffer(text)
k, n, m, err := unmarshalTextHeader(r)
if err != nil {
return nil, err
}
keys, err := newKeysBlank(k)
if err != nil {
return nil, err
}
err = unmarshalTextKeys(r, keys)
if err != nil {
return nil, err
}
bits, err := newBits(m)
if err != nil {
return nil, err
}
err = unmarshalTextBits(r, bits)
if err != nil {
return nil, err
}
f, err = newWithKeysAndBits(m, keys, bits, n)
if err != nil {
return nil, err
}
err = unmarshalAndCheckTextHash(r, f)
if err != nil {
return nil, err
}
return f, nil
}
// UnmarshalText method overwrites f with data decoded from text
func (f *Filter) UnmarshalText(text []byte) error {
f.lock.Lock()
defer f.lock.Unlock()
f2, err := UnmarshalText(text)
if err != nil {
return err
}
f.m = f2.m
f.n = f2.n
copy(f.bits, f2.bits)
copy(f.keys, f2.keys)
return nil
}

8
vendor/github.com/steakknife/hamming/MIT-LICENSE.txt generated vendored Normal file

@ -0,0 +1,8 @@
The MIT License (MIT)
Copyright © 2014, 2015, 2016 Barry Allard
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

82
vendor/github.com/steakknife/hamming/README.md generated vendored Normal file

@ -0,0 +1,82 @@
[![GoDoc](https://godoc.org/github.com/steakknife/hamming?status.png)](https://godoc.org/github.com/steakknife/hamming) [![Build Status](https://travis-ci.org/steakknife/hamming.svg?branch=master)](https://travis-ci.org/steakknife/hamming)
# hamming distance calculations in Go
Copyright © 2014, 2015, 2016, 2018 Barry Allard
[MIT license](MIT-LICENSE.txt)
## Performance
```
$ go test -bench=.
BenchmarkCountBitsInt8PopCnt-4 300000000 4.30 ns/op
BenchmarkCountBitsInt16PopCnt-4 300000000 3.83 ns/op
BenchmarkCountBitsInt32PopCnt-4 300000000 3.64 ns/op
BenchmarkCountBitsInt64PopCnt-4 500000000 3.60 ns/op
BenchmarkCountBitsIntPopCnt-4 300000000 5.72 ns/op
BenchmarkCountBitsUint8PopCnt-4 1000000000 2.98 ns/op
BenchmarkCountBitsUint16PopCnt-4 500000000 3.23 ns/op
BenchmarkCountBitsUint32PopCnt-4 500000000 3.00 ns/op
BenchmarkCountBitsUint64PopCnt-4 1000000000 2.94 ns/op
BenchmarkCountBitsUintPopCnt-4 300000000 5.04 ns/op
BenchmarkCountBitsBytePopCnt-4 300000000 3.99 ns/op
BenchmarkCountBitsRunePopCnt-4 300000000 3.83 ns/op
BenchmarkCountBitsInt8-4 2000000000 0.74 ns/op
BenchmarkCountBitsInt16-4 2000000000 1.54 ns/op
BenchmarkCountBitsInt32-4 1000000000 2.63 ns/op
BenchmarkCountBitsInt64-4 1000000000 2.56 ns/op
BenchmarkCountBitsInt-4 200000000 7.23 ns/op
BenchmarkCountBitsUint16-4 2000000000 1.51 ns/op
BenchmarkCountBitsUint32-4 500000000 4.00 ns/op
BenchmarkCountBitsUint64-4 1000000000 2.64 ns/op
BenchmarkCountBitsUint64Alt-4 200000000 7.60 ns/op
BenchmarkCountBitsUint-4 300000000 5.48 ns/op
BenchmarkCountBitsUintReference-4 100000000 19.2 ns/op
BenchmarkCountBitsByte-4 2000000000 0.75 ns/op
BenchmarkCountBitsByteAlt-4 1000000000 2.37 ns/op
BenchmarkCountBitsRune-4 500000000 2.85 ns/op
PASS
ok _/Users/bmf/Projects/hamming 58.305s
$
```
## Usage
```go
import 'github.com/steakknife/hamming'
// ...
// hamming distance between values
hamming.Byte(0xFF, 0x00) // 8
hamming.Byte(0x00, 0x00) // 0
// just count bits in a byte
hamming.CountBitsByte(0xA5), // 4
```
See help in the [docs](https://godoc.org/github.com/steakknife/hamming)
## Get
go get -u github.com/steakknife/hamming # master is always stable
## Source
- On the web: https://github.com/steakknife/hamming
- Git: `git clone https://github.com/steakknife/hamming`
## Contact
- [Feedback](mailto:barry.allard@gmail.com)
- [Issues](https://github.com/steakknife/hamming/issues)
## License
[MIT license](MIT-LICENSE.txt)
Copyright © 2014, 2015, 2016 Barry Allard

35
vendor/github.com/steakknife/hamming/doc.go generated vendored Normal file

@ -0,0 +1,35 @@
//
// Package hamming distance calculations in Go
//
// https://github.com/steakknife/hamming
//
// Copyright © 2014, 2015, 2016, 2018 Barry Allard
//
// MIT license
//
//
// Usage
//
// For functions named CountBits.+s?. The plural forms are for slices.
// The CountBits.+ forms are Population Count only, where the bare-type
// forms are Hamming distance (number of bits different) between two values.
//
// Optimized assembly .+PopCnt forms are available on amd64, and operate just
// like the regular forms (Must check and guard on HasPopCnt() first before
// trying to call .+PopCnt functions).
//
// import 'github.com/steakknife/hamming'
//
// // ...
//
// // hamming distance between values
// hamming.Byte(0xFF, 0x00) // 8
// hamming.Byte(0x00, 0x00) // 0
//
// // just count bits in a byte
// hamming.CountBitsByte(0xA5), // 4
//
// Got rune? use int32
// Got uint8? use byte
//
package hamming

70
vendor/github.com/steakknife/hamming/hamming.go generated vendored Normal file

@ -0,0 +1,70 @@
//
// Package hamming distance calculations in Go
//
// https://github.com/steakknife/hamming
//
// Copyright © 2014, 2015, 2016, 2018 Barry Allard
//
// MIT license
//
package hamming
// Int8 hamming distance of two int8's
func Int8(x, y int8) int {
return CountBitsInt8(x ^ y)
}
// Int16 hamming distance of two int16's
func Int16(x, y int16) int {
return CountBitsInt16(x ^ y)
}
// Int32 hamming distance of two int32's
func Int32(x, y int32) int {
return CountBitsInt32(x ^ y)
}
// Int64 hamming distance of two int64's
func Int64(x, y int64) int {
return CountBitsInt64(x ^ y)
}
// Int hamming distance of two ints
func Int(x, y int) int {
return CountBitsInt(x ^ y)
}
// Uint8 hamming distance of two uint8's
func Uint8(x, y uint8) int {
return CountBitsUint8(x ^ y)
}
// Uint16 hamming distance of two uint16's
func Uint16(x, y uint16) int {
return CountBitsUint16(x ^ y)
}
// Uint32 hamming distance of two uint32's
func Uint32(x, y uint32) int {
return CountBitsUint32(x ^ y)
}
// Uint64 hamming distance of two uint64's
func Uint64(x, y uint64) int {
return CountBitsUint64(x ^ y)
}
// Uint hamming distance of two uint's
func Uint(x, y uint) int {
return CountBitsUint(x ^ y)
}
// Byte hamming distance of two bytes
func Byte(x, y byte) int {
return CountBitsByte(x ^ y)
}
// Rune hamming distance of two runes
func Rune(x, y rune) int {
return CountBitsRune(x ^ y)
}

65
vendor/github.com/steakknife/hamming/popcnt_amd64.go generated vendored Normal file

@ -0,0 +1,65 @@
//
// Package hamming distance calculations in Go
//
// https://github.com/steakknife/hamming
//
// Copyright © 2014, 2015, 2016, 2018 Barry Allard
//
// MIT license
//
package hamming
import "strconv"
// HasPopCnt returns true if *PopCnt functions are callable
func HasPopCnt() (ret bool)
// CountBitsInt8PopCnt count 1's in x
func CountBitsInt8PopCnt(x int8) (ret int)
// CountBitsInt16PopCnt count 1's in x
func CountBitsInt16PopCnt(x int16) (ret int)
// CountBitsInt32PopCnt count 1's in x
func CountBitsInt32PopCnt(x int32) (ret int)
// CountBitsInt64PopCnt count 1's in x
func CountBitsInt64PopCnt(x int64) (ret int)
// CountBitsIntPopCnt count 1's in x
func CountBitsIntPopCnt(x int) int {
if strconv.IntSize == 64 {
return CountBitsInt64PopCnt(int64(x))
} else if strconv.IntSize == 32 {
return CountBitsInt32PopCnt(int32(x))
}
panic("strconv.IntSize must be 32 or 64")
}
// CountBitsUint8PopCnt count 1's in x
func CountBitsUint8PopCnt(x uint8) (ret int)
// CountBitsUint16PopCnt count 1's in x
func CountBitsUint16PopCnt(x uint16) (ret int)
// CountBitsUint32PopCnt count 1's in x
func CountBitsUint32PopCnt(x uint32) (ret int)
// CountBitsUint64PopCnt count 1's in x
func CountBitsUint64PopCnt(x uint64) (ret int)
// CountBitsUintPopCnt count 1's in x
func CountBitsUintPopCnt(x uint) int {
if strconv.IntSize == 64 {
return CountBitsUint64PopCnt(uint64(x))
} else if strconv.IntSize == 32 {
return CountBitsUint32PopCnt(uint32(x))
}
panic("strconv.IntSize must be 32 or 64")
}
// CountBitsBytePopCnt count 1's in x
func CountBitsBytePopCnt(x byte) (ret int)
// CountBitsRunePopCnt count 1's in x
func CountBitsRunePopCnt(x rune) (ret int)

64
vendor/github.com/steakknife/hamming/popcnt_amd64.s generated vendored Normal file

@ -0,0 +1,64 @@
//
// hamming distance calculations in Go
//
// https://github.com/steakknife/hamming
//
// Copyright © 2014, 2015, 2016 Barry Allard
//
// MIT license
//
#include "textflag.h"
TEXT ·CountBitsInt8PopCnt(SB),NOSPLIT,$0
JMP ·CountBitsBytePopCnt(SB)
TEXT ·CountBitsInt16PopCnt(SB),NOSPLIT,$0
JMP ·CountBitsUint16PopCnt(SB)
TEXT ·CountBitsInt32PopCnt(SB),NOSPLIT,$0
JMP ·CountBitsUint32PopCnt(SB)
TEXT ·CountBitsInt64PopCnt(SB),NOSPLIT,$0
JMP ·CountBitsUint64PopCnt(SB)
TEXT ·CountBitsBytePopCnt(SB),NOSPLIT,$0
JMP ·CountBitsUint8PopCnt(SB)
TEXT ·CountBitsRunePopCnt(SB),NOSPLIT,$0
JMP ·CountBitsUint32PopCnt(SB)
TEXT ·CountBitsUint8PopCnt(SB),NOSPLIT,$0
XORQ AX, AX
MOVB x+0(FP), AX
POPCNTQ AX, AX
MOVQ AX, ret+8(FP)
RET
TEXT ·CountBitsUint16PopCnt(SB),NOSPLIT,$0
XORQ AX, AX
MOVW x+0(FP), AX
POPCNTQ AX, AX
MOVQ AX, ret+8(FP)
RET
TEXT ·CountBitsUint32PopCnt(SB),NOSPLIT,$0
XORQ AX, AX
MOVL x+0(FP), AX
POPCNTQ AX, AX
MOVQ AX, ret+8(FP)
RET
TEXT ·CountBitsUint64PopCnt(SB),NOSPLIT,$0
POPCNTQ x+0(FP), AX
MOVQ AX, ret+8(FP)
RET
// func hasPopCnt() (ret bool)
TEXT ·HasPopCnt(SB),NOSPLIT,$0
MOVL $1, AX
CPUID
SHRL $23, CX // bit 23: Advanced Bit Manipulation Bit (ABM) -> POPCNTQ
ANDL $1, CX
MOVB CX, ret+0(FP)
RET

134
vendor/github.com/steakknife/hamming/popcount.go generated vendored Normal file

@ -0,0 +1,134 @@
//
// Package hamming distance calculations in Go
//
// https://github.com/steakknife/hamming
//
// Copyright © 2014, 2015, 2016, 2018 Barry Allard
//
// MIT license
//
package hamming
import "strconv"
// References: check out Hacker's Delight, about p. 70
func table() [256]uint8 {
return [256]uint8{
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
}
}
// CountBitsByteAlt table-less, branch-free implementation
func CountBitsByteAlt(x byte) int {
x = (x & 0x55) + ((x >> 1) & 0x55)
x = (x & 0x33) + ((x >> 2) & 0x33)
return int((x & 0x0f) + ((x >> 4) & 0x0f))
}
// CountBitsInt8 count 1's in x
func CountBitsInt8(x int8) int { return CountBitsByte(byte(x)) }
// CountBitsInt16 count 1's in x
func CountBitsInt16(x int16) int { return CountBitsUint16(uint16(x)) }
// CountBitsInt32 count 1's in x
func CountBitsInt32(x int32) int { return CountBitsUint32(uint32(x)) }
// CountBitsInt64 count 1's in x
func CountBitsInt64(x int64) int { return CountBitsUint64(uint64(x)) }
// CountBitsInt count 1's in x
func CountBitsInt(x int) int { return CountBitsUint(uint(x)) }
// CountBitsByte count 1's in x
func CountBitsByte(x byte) int { return CountBitsUint8(x) }
// CountBitsRune count 1's in x
func CountBitsRune(x rune) int { return CountBitsInt32(x) }
// CountBitsUint8 count 1's in x
func CountBitsUint8(x uint8) int { return int(table()[x]) }
// CountBitsUint16 count 1's in x
func CountBitsUint16(x uint16) int {
return int(table()[x&0xFF] + table()[(x>>8)&0xFF])
}
const (
m1d uint32 = 0x55555555
m2d = 0x33333333
m4d = 0x0f0f0f0f
)
// CountBitsUint32 count 1's in x
func CountBitsUint32(x uint32) int {
x -= ((x >> 1) & m1d)
x = (x & m2d) + ((x >> 2) & m2d)
x = (x + (x >> 4)) & m4d
x += x >> 8
x += x >> 16
return int(x & 0x3f)
}
const (
m1q uint64 = 0x5555555555555555
m2q = 0x3333333333333333
m4q = 0x0f0f0f0f0f0f0f0f
hq = 0x0101010101010101
)
// CountBitsUint64 count 1's in x
func CountBitsUint64(x uint64) int {
// put count of each 2 bits into those 2 bits
x -= (x >> 1) & m1q
// put count of each 4 bits into those 4 bits
x = (x & m2q) + ((x >> 2) & m2q)
// put count of each 8 bits into those 8 bits
x = (x + (x >> 4)) & m4q
// returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
return int((x * hq) >> 56)
}
// CountBitsUint64Alt count 1's in x
func CountBitsUint64Alt(x uint64) int {
return CountBitsUint32(uint32(x>>32)) + CountBitsUint32(uint32(x))
}
// CountBitsUintReference count 1's in x
func CountBitsUintReference(x uint) int {
c := 0
for x != 0 {
x &= x - 1
c++
}
return c
}
// CountBitsUint count 1's in x
func CountBitsUint(x uint) int {
if strconv.IntSize == 64 {
return CountBitsUint64(uint64(x))
} else if strconv.IntSize == 32 {
return CountBitsUint32(uint32(x))
}
panic("strconv.IntSize must be 32 or 64 bits")
}

123
vendor/github.com/steakknife/hamming/popcount_slices.go generated vendored Normal file

@ -0,0 +1,123 @@
//
// Package hamming distance calculations in Go
//
// https://github.com/steakknife/hamming
//
// Copyright © 2014, 2015, 2016, 2018 Barry Allard
//
// MIT license
//
package hamming
// CountBitsInt8s count 1's in b
func CountBitsInt8s(b []int8) int {
c := 0
for _, x := range b {
c += CountBitsInt8(x)
}
return c
}
// CountBitsInt16s count 1's in b
func CountBitsInt16s(b []int16) int {
c := 0
for _, x := range b {
c += CountBitsInt16(x)
}
return c
}
// CountBitsInt32s count 1's in b
func CountBitsInt32s(b []int32) int {
c := 0
for _, x := range b {
c += CountBitsInt32(x)
}
return c
}
// CountBitsInt64s count 1's in b
func CountBitsInt64s(b []int64) int {
c := 0
for _, x := range b {
c += CountBitsInt64(x)
}
return c
}
// CountBitsInts count 1's in b
func CountBitsInts(b []int) int {
c := 0
for _, x := range b {
c += CountBitsInt(x)
}
return c
}
// CountBitsUint8s count 1's in b
func CountBitsUint8s(b []uint8) int {
c := 0
for _, x := range b {
c += CountBitsUint8(x)
}
return c
}
// CountBitsUint16s count 1's in b
func CountBitsUint16s(b []uint16) int {
c := 0
for _, x := range b {
c += CountBitsUint16(x)
}
return c
}
// CountBitsUint32s count 1's in b
func CountBitsUint32s(b []uint32) int {
c := 0
for _, x := range b {
c += CountBitsUint32(x)
}
return c
}
// CountBitsUint64s count 1's in b
func CountBitsUint64s(b []uint64) int {
c := 0
for _, x := range b {
c += CountBitsUint64(x)
}
return c
}
// CountBitsUints count 1's in b
func CountBitsUints(b []uint) int {
c := 0
for _, x := range b {
c += CountBitsUint(x)
}
return c
}
// CountBitsBytes count 1's in b
func CountBitsBytes(b []byte) int {
c := 0
for _, x := range b {
c += CountBitsByte(x)
}
return c
}
// CountBitsRunes count 1's in b
func CountBitsRunes(b []rune) int {
c := 0
for _, x := range b {
c += CountBitsRune(x)
}
return c
}
// CountBitsString count 1's in s
func CountBitsString(s string) int {
return CountBitsBytes([]byte(s))
}

@ -0,0 +1,72 @@
//
// Package hamming distance calculations in Go
//
// https://github.com/steakknife/hamming
//
// Copyright © 2014, 2015, 2016, 2018 Barry Allard
//
// MIT license
//
package hamming
import (
"strconv"
"unsafe"
)
// CountBitsInt8sPopCnt count 1's in x
func CountBitsInt8sPopCnt(x []int8) (ret int)
// CountBitsInt16sPopCnt count 1's in x
func CountBitsInt16sPopCnt(x []int16) (ret int)
// CountBitsInt32sPopCnt count 1's in x
func CountBitsInt32sPopCnt(x []int32) (ret int)
// CountBitsInt64sPopCnt count 1's in x
func CountBitsInt64sPopCnt(x []int64) (ret int)
// CountBitsIntsPopCnt count 1's in x
func CountBitsIntsPopCnt(x []int) int {
if strconv.IntSize == 64 {
y := (*[]int64)(unsafe.Pointer(&x)) // #nosec G103
return CountBitsInt64sPopCnt(*y)
} else if strconv.IntSize == 32 {
y := (*[]int32)(unsafe.Pointer(&x)) // #nosec G103
return CountBitsInt32sPopCnt(*y)
}
panic("strconv.IntSize must be 32 or 64 bits")
}
// CountBitsUint8sPopCnt count 1's in x
func CountBitsUint8sPopCnt(x []uint8) (ret int)
// CountBitsUint16sPopCnt count 1's in x
func CountBitsUint16sPopCnt(x []uint16) (ret int)
// CountBitsUint32sPopCnt count 1's in x
func CountBitsUint32sPopCnt(x []uint32) (ret int)
// CountBitsUint64sPopCnt count 1's in x
func CountBitsUint64sPopCnt(x []uint64) (ret int)
// CountBitsUintsPopCnt count 1's in x
func CountBitsUintsPopCnt(x []uint) int {
if strconv.IntSize == 64 {
y := (*[]uint64)(unsafe.Pointer(&x)) // #nosec G103
return CountBitsUint64sPopCnt(*y)
} else if strconv.IntSize == 32 {
y := (*[]uint32)(unsafe.Pointer(&x)) // #nosec G103
return CountBitsUint32sPopCnt(*y)
}
panic("strconv.IntSize must be 32 or 64 bits")
}
// CountBitsBytesPopCnt count 1's in x
func CountBitsBytesPopCnt(x []byte) (ret int)
// CountBitsRunesPopCnt count 1's in x
func CountBitsRunesPopCnt(x []rune) (ret int)
// CountBitsStringPopCnt count 1's in s
func CountBitsStringPopCnt(s string) (ret int)

@ -0,0 +1,370 @@
//
// hamming distance calculations in Go
//
// https://github.com/steakknife/hamming
//
// Copyright © 2014, 2015, 2016 Barry Allard
//
// MIT license
//
#include "textflag.h"
// type SliceHeader struct {
// Data uintptr 0
// Len int 8
// Cap int 16
// }
// 0 x.Data
// 8 x.Len
// 16 x.Cap
// 24 ret
// type StringHeader struct {
// Data uintptr 0
// Len int 8
// }
// 0 x.Data
// 8 x.Len
// 16 ret
// func CountBitsInt8sPopCnt(x []int8) (ret int)
TEXT ·CountBitsInt8sPopCnt(SB),NOSPLIT,$0
JMP ·CountBitsUint8sPopCnt(SB)
// func CountBitsInt16sPopCnt(x []int16) (ret int)
TEXT ·CountBitsInt16sPopCnt(SB),NOSPLIT,$0
JMP ·CountBitsUint16sPopCnt(SB)
// func CountBitsInt32sPopCnt(x []int32) (ret int)
TEXT ·CountBitsInt32sPopCnt(SB),NOSPLIT,$0
JMP ·CountBitsUint32sPopCnt(SB)
// func CountBitsInt64sPopCnt(x []int64) (ret int)
TEXT ·CountBitsInt64sPopCnt(SB),NOSPLIT,$0
JMP ·CountBitsUint64sPopCnt(SB)
// func CountBitsUint8sPopCnt(x []uint8) (ret int)
TEXT ·CountBitsUint8sPopCnt(SB),NOSPLIT,$0
XORQ AX, AX // ret = 0
MOVQ x+8(FP), CX // x.Len -> CX
test_negative_slice_len:
MOVQ CX, BX // x.Len < 0 ---> x.Len[63] != 0
SHRQ $63, BX
JNZ done
MOVQ x+0(FP), DI // x.Data -> DI
CMPQ CX, $32 // x.Len >= 32
JL unrolled_loop_skip
unrolled_loop_setup:
XORQ R9, R9
XORQ BX, BX
XORQ DX, DX
unrolled_loop: // 4 unrolled loops of POPCNTQ (4 quad words at a time)
SUBQ $32, CX
POPCNTQ 0(DI), R10
ADDQ R10, R9
POPCNTQ 8(DI), R11
ADDQ R11, AX
POPCNTQ 16(DI), R12
ADDQ R12, BX
POPCNTQ 24(DI), R13
ADDQ R13, DX
ADDQ $32, DI
CMPQ CX, $32 // x.Len >= 32
JGE unrolled_loop
unrolled_loop_done:
ADDQ R9, AX
ADDQ BX, DX
ADDQ DX, AX
XORQ BX, BX
unrolled_loop_skip:
CMPQ CX, $0
JZ done
XORQ DX, DX
remainder_loop:
MOVB 0(DI), DL
POPCNTQ DX, BX
ADDQ BX, AX
INCQ DI
DECQ CX
JNZ remainder_loop
done:
MOVQ AX, ret+24(FP)
RET
// func CountBitsUint16sPopCnt(x []uint16) (ret int)
TEXT ·CountBitsUint16sPopCnt(SB),NOSPLIT,$0
XORQ AX, AX // ret = 0
MOVQ x+8(FP), CX // x.Len -> CX
test_negative_slice_len:
MOVQ CX, BX // x.Len*2 < 0 ---> x.Len[63:62] != 0
SHLQ $1, CX
SHRQ $62, BX
JNZ done
MOVQ x+0(FP), DI // x.Data -> DI
CMPQ CX, $32 // x.Len*2 >= 32
JL unrolled_loop_skip
unrolled_loop_setup:
XORQ R9, R9
XORQ BX, BX
XORQ DX, DX
unrolled_loop: // 4 unrolled loops of POPCNTQ (4 quad words at a time)
SUBQ $32, CX
POPCNTQ 0(DI), R10
ADDQ R10, R9
POPCNTQ 8(DI), R11
ADDQ R11, AX
POPCNTQ 16(DI), R12
ADDQ R12, BX
POPCNTQ 24(DI), R13
ADDQ R13, DX
ADDQ $32, DI
CMPQ CX, $32 // x.Len*2 >= 32
JGE unrolled_loop
unrolled_loop_done:
ADDQ R9, AX
ADDQ BX, DX
ADDQ DX, AX
XORQ BX, BX
unrolled_loop_skip:
CMPQ CX, $0
JZ done
XORQ DX, DX
remainder_loop:
MOVW 0(DI), DX
POPCNTQ DX, BX
ADDQ BX, AX
ADDQ $2, DI
SUBQ $2, CX
JNZ remainder_loop
done:
MOVQ AX, ret+24(FP)
RET
// func CountBitsUint32sPopCnt(x []uint32) (ret int)
TEXT ·CountBitsUint32sPopCnt(SB),NOSPLIT,$0
XORQ AX, AX // ret = 0
MOVQ x+8(FP), CX // x.Len -> CX
MOVQ CX, BX
MOVQ x+0(FP), DI // x.Data -> DI
test_negative_slice_len:
SHLQ $2, CX // x.Len*4 < 0 ---> x.Len[63:61] != 0
SHRQ $61, BX
JNZ done
CMPQ CX, $32 // x.Len*4 >= 32
JL unrolled_loop_skip
unrolled_loop_setup:
XORQ R9, R9
XORQ BX, BX
XORQ DX, DX
unrolled_loop: // 4 unrolled loops of POPCNTQ (4 quad words at a time)
SUBQ $32, CX
POPCNTQ 0(DI), R10 // r9 += popcntq(QW DI+0)
ADDQ R10, R9
POPCNTQ 8(DI), R11 // ax += popcntq(QW DI+8)
ADDQ R11, AX
POPCNTQ 16(DI), R12 // bx += popcntq(QW DI+16)
ADDQ R12, BX
POPCNTQ 24(DI), R13 // dx += popcntq(QW DI+24)
ADDQ R13, DX
ADDQ $32, DI
CMPQ CX, $32 // x.Len*4 >= 32
JGE unrolled_loop
unrolled_loop_done:
ADDQ R9, AX // ax = (ax + r9) + (bx + dx)
ADDQ BX, DX
ADDQ DX, AX
XORQ BX, BX
unrolled_loop_skip:
CMPQ CX, $0
JZ done
XORQ DX, DX
remainder_loop:
MOVB (DI), DX // ax += popcnt(DB 0(DI))
POPCNTQ DX, BX
ADDQ BX, AX
INCQ DI
DECQ CX
JNZ remainder_loop
done:
MOVQ AX, ret+24(FP)
RET
// func CountBitsUint64sPopCnt(x []uint64) (ret int)
TEXT ·CountBitsUint64sPopCnt(SB),NOSPLIT,$0
XORQ AX, AX // ret = 0
MOVQ x+8(FP), CX // x.Len -> CX
test_negative_slice_len:
MOVQ CX, BX // x.Len*8 < 0 ---> x.Len[63:60] != 0
SHLQ $3, CX
SHRQ $60, BX
JNZ done
MOVQ x+0(FP), DI // x.Data -> DI
CMPQ CX, $32 // x.Len*8 >= 32
JL unrolled_loop_skip
unrolled_loop_setup:
XORQ R9, R9
XORQ BX, BX
XORQ DX, DX
unrolled_loop: // 4 unrolled loops of POPCNTQ (4 quad words at a time)
SUBQ $32, CX
POPCNTQ 0(DI), R10
ADDQ R10, R9
POPCNTQ 8(DI), R11
ADDQ R11, AX
POPCNTQ 16(DI), R12
ADDQ R12, BX
POPCNTQ 24(DI), R13
ADDQ R13, DX
ADDQ $32, DI
CMPQ CX, $32 // x.Len*4 >= 32
JGE unrolled_loop
unrolled_loop_done:
ADDQ R9, AX
ADDQ BX, DX
ADDQ DX, AX
XORQ BX, BX
unrolled_loop_skip:
CMPQ CX, $0
JZ done
XORQ DX, DX
remainder_loop:
MOVQ 0(DI), DX
POPCNTQ DX, BX
ADDQ BX, AX
ADDQ $8, DI
SUBQ $8, CX
JNZ remainder_loop
done:
MOVQ AX, ret+24(FP)
RET
// func CountBitsBytesPopCnt(x []byte) (ret int)
TEXT ·CountBitsBytesPopCnt(SB),NOSPLIT,$0
JMP ·CountBitsUint8sPopCnt(SB)
// func CountBitsRunesPopCnt(x []rune) (ret int)
TEXT ·CountBitsRunesPopCnt(SB),NOSPLIT,$0
JMP ·CountBitsUint32sPopCnt(SB)
// func CountBitsStringPopCnt(s string) (ret int)
TEXT ·CountBitsStringPopCnt(SB),NOSPLIT,$0
XORQ AX, AX // ret = 0
MOVQ x+8(FP), CX // x.Len -> CX
test_negative_slice_len:
MOVQ CX, BX // x.Len < 0 ---> x.Len[63] != 0
SHRQ $63, BX
JNZ done
MOVQ x+0(FP), DI // x.Data -> DI
CMPQ CX, $32 // x.Len >= 32
JL unrolled_loop_skip
unrolled_loop_setup:
XORQ R9, R9
XORQ BX, BX
XORQ DX, DX
unrolled_loop: // 4 unrolled loops of POPCNTQ (4 quad words at a time)
SUBQ $32, CX
POPCNTQ 0(DI), R10
ADDQ R10, R9
POPCNTQ 8(DI), R11
ADDQ R11, AX
POPCNTQ 16(DI), R12
ADDQ R12, BX
POPCNTQ 24(DI), R13
ADDQ R13, DX
ADDQ $32, DI
CMPQ CX, $32 // x.Len >= 32
JGE unrolled_loop
unrolled_loop_done:
ADDQ R9, AX
ADDQ BX, DX
ADDQ DX, AX
XORQ BX, BX
unrolled_loop_skip:
CMPQ CX, $0
JZ done
XORQ DX, DX
remainder_loop:
MOVB 0(DI), DL
POPCNTQ DX, BX
ADDQ BX, AX
INCQ DI
DECQ CX
JNZ remainder_loop
done:
MOVQ AX, ret+16(FP)
RET

@ -0,0 +1,144 @@
//
// Package hamming distance calculations in Go
//
// https://github.com/steakknife/hamming
//
// Copyright © 2014, 2015, 2016, 2018 Barry Allard
//
// MIT license
//
package hamming
// Int8s hamming distance of two int8 buffers, of which the size of b0
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
func Int8s(b0, b1 []int8) int {
d := 0
for i, x := range b0 {
d += Int8(x, b1[i])
}
return d
}
// Int16s hamming distance of two int16 buffers, of which the size of b0
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
func Int16s(b0, b1 []int16) int {
d := 0
for i, x := range b0 {
d += Int16(x, b1[i])
}
return d
}
// Int32s hamming distance of two int32 buffers, of which the size of b0
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
func Int32s(b0, b1 []int32) int {
d := 0
for i, x := range b0 {
d += Int32(x, b1[i])
}
return d
}
// Int64s hamming distance of two int64 buffers, of which the size of b0
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
func Int64s(b0, b1 []int64) int {
d := 0
for i, x := range b0 {
d += Int64(x, b1[i])
}
return d
}
// Ints hamming distance of two int buffers, of which the size of b0
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
func Ints(b0, b1 []int) int {
d := 0
for i, x := range b0 {
d += Int(x, b1[i])
}
return d
}
// Uint8s hamming distance of two uint8 buffers, of which the size of b0
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
func Uint8s(b0, b1 []uint8) int {
d := 0
for i, x := range b0 {
d += Uint8(x, b1[i])
}
return d
}
// Uint16s hamming distance of two uint16 buffers, of which the size of b0
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
func Uint16s(b0, b1 []uint16) int {
d := 0
for i, x := range b0 {
d += Uint16(x, b1[i])
}
return d
}
// Uint32s hamming distance of two uint32 buffers, of which the size of b0
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
func Uint32s(b0, b1 []uint32) int {
d := 0
for i, x := range b0 {
d += Uint32(x, b1[i])
}
return d
}
// Uint64s hamming distance of two uint64 buffers, of which the size of b0
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
func Uint64s(b0, b1 []uint64) int {
d := 0
for i, x := range b0 {
d += Uint64(x, b1[i])
}
return d
}
// Uints hamming distance of two uint buffers, of which the size of b0
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
func Uints(b0, b1 []uint) int {
d := 0
for i, x := range b0 {
d += Uint(x, b1[i])
}
return d
}
// Bytes hamming distance of two byte buffers, of which the size of b0
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
func Bytes(b0, b1 []byte) int {
d := 0
for i, x := range b0 {
d += Byte(x, b1[i])
}
return d
}
// Runes hamming distance of two rune buffers, of which the size of b0
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
func Runes(b0, b1 []rune) int {
d := 0
for i, x := range b0 {
d += Rune(x, b1[i])
}
return d
}
// Strings hamming distance of two strings, of which the size of b0
// is used for both (panics if b1 < b0, does not compare b1 beyond length of b0)
func Strings(b0, b1 string) int {
return Runes(runes(b0), runes(b1))
}
// runize string
func runes(s string) (r []rune) {
for _, ch := range s {
r = append(r, ch)
}
return
}

12
vendor/vendor.json vendored

@ -436,6 +436,18 @@
"revision": "8537d3370df43a30a3d450c023783d2e43432b89", "revision": "8537d3370df43a30a3d450c023783d2e43432b89",
"revisionTime": "2019-03-16T09:03:35Z" "revisionTime": "2019-03-16T09:03:35Z"
}, },
{
"checksumSHA1": "1mI7DMaBgFAsU0aCrW5yCKyAPdM=",
"path": "github.com/steakknife/bloomfilter",
"revision": "6819c0d2a57025e1700378ee59b7382d71987f14",
"revisionTime": "2018-09-22T17:46:46Z"
},
{
"checksumSHA1": "uuF97bplG/+iQ/nfNSQGZOmTKBE=",
"path": "github.com/steakknife/hamming",
"revision": "c99c65617cd3d686aea8365fe563d6542f01d940",
"revisionTime": "2018-09-06T05:59:17Z"
},
{ {
"checksumSHA1": "mGbTYZ8dHVTiPTTJu3ktp+84pPI=", "checksumSHA1": "mGbTYZ8dHVTiPTTJu3ktp+84pPI=",
"path": "github.com/stretchr/testify/assert", "path": "github.com/stretchr/testify/assert",