From da977e9cdcca9ebb969f531401bdf2b467930928 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 15 Feb 2022 21:15:13 +0800 Subject: [PATCH] core, ethdb, tests, trie: implement NewBatchWithSize API for batcher (#24392) This PR adds an addtional API called `NewBatchWithSize` for db batcher. It turns out that leveldb batch memory allocation is super inefficient. The main reason is the allocation step of leveldb Batch is too small when the batch size is large. It can take a few second to build a leveldb batch with 100MB size. Luckily, leveldb also offers another API called MakeBatch which can pre-allocate the memory area. So if the approximate size of batch is known in advance, this API can be used in this case. It's needed in new state scheme PR which needs to commit a batch of trie nodes in a single batch. Implement the feature in a seperate PR. --- core/rawdb/table.go | 4 ++++ ethdb/batch.go | 3 +++ ethdb/leveldb/leveldb.go | 8 ++++++++ ethdb/memorydb/memorydb.go | 7 +++++++ tests/fuzzers/bn256/bn256_fuzz.go | 1 + tests/fuzzers/stacktrie/trie_fuzzer.go | 1 + trie/iterator_test.go | 4 ++++ trie/trie_test.go | 1 + 8 files changed, 29 insertions(+) diff --git a/core/rawdb/table.go b/core/rawdb/table.go index bdb6bbf56..f3d769fc5 100644 --- a/core/rawdb/table.go +++ b/core/rawdb/table.go @@ -189,6 +189,10 @@ func (t *table) DiffStore() ethdb.KeyValueStore { func (t *table) SetDiffStore(diff ethdb.KeyValueStore) { panic("not implement") } +// NewBatchWithSize creates a write-only database batch with pre-allocated buffer. +func (t *table) NewBatchWithSize(size int) ethdb.Batch { + return &tableBatch{t.db.NewBatchWithSize(size), t.prefix} +} // tableBatch is a wrapper around a database batch that prefixes each key access // with a pre-configured string. diff --git a/ethdb/batch.go b/ethdb/batch.go index 135369331..541f40c83 100644 --- a/ethdb/batch.go +++ b/ethdb/batch.go @@ -43,6 +43,9 @@ type Batcher interface { // NewBatch creates a write-only database that buffers changes to its host db // until a final write is called. NewBatch() Batch + + // NewBatchWithSize creates a write-only database batch with pre-allocated buffer. + NewBatchWithSize(size int) Batch } // HookedBatch wraps an arbitrary batch where each operation may be hooked into diff --git a/ethdb/leveldb/leveldb.go b/ethdb/leveldb/leveldb.go index 9a782dedb..cb348ea28 100644 --- a/ethdb/leveldb/leveldb.go +++ b/ethdb/leveldb/leveldb.go @@ -213,6 +213,14 @@ func (db *Database) NewBatch() ethdb.Batch { } } +// NewBatchWithSize creates a write-only database batch with pre-allocated buffer. +func (db *Database) NewBatchWithSize(size int) ethdb.Batch { + return &batch{ + db: db.db, + b: leveldb.MakeBatch(size), + } +} + // NewIterator creates a binary-alphabetical iterator over a subset // of database content with a particular key prefix, starting at a particular // initial key (or after, if it does not exist). diff --git a/ethdb/memorydb/memorydb.go b/ethdb/memorydb/memorydb.go index 78181e860..7c8d655f4 100644 --- a/ethdb/memorydb/memorydb.go +++ b/ethdb/memorydb/memorydb.go @@ -129,6 +129,13 @@ func (db *Database) NewBatch() ethdb.Batch { } } +// NewBatchWithSize creates a write-only database batch with pre-allocated buffer. +func (db *Database) NewBatchWithSize(size int) ethdb.Batch { + return &batch{ + db: db, + } +} + // NewIterator creates a binary-alphabetical iterator over a subset // of database content with a particular key prefix, starting at a particular // initial key (or after, if it does not exist). diff --git a/tests/fuzzers/bn256/bn256_fuzz.go b/tests/fuzzers/bn256/bn256_fuzz.go index 030ac19b3..11fd9e18d 100644 --- a/tests/fuzzers/bn256/bn256_fuzz.go +++ b/tests/fuzzers/bn256/bn256_fuzz.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be found // in the LICENSE file. +//go:build gofuzz // +build gofuzz package bn256 diff --git a/tests/fuzzers/stacktrie/trie_fuzzer.go b/tests/fuzzers/stacktrie/trie_fuzzer.go index e73ef4851..6beec7c80 100644 --- a/tests/fuzzers/stacktrie/trie_fuzzer.go +++ b/tests/fuzzers/stacktrie/trie_fuzzer.go @@ -66,6 +66,7 @@ func (s *spongeDb) Has(key []byte) (bool, error) { panic("implement func (s *spongeDb) Get(key []byte) ([]byte, error) { return nil, errors.New("no such elem") } func (s *spongeDb) Delete(key []byte) error { panic("implement me") } func (s *spongeDb) NewBatch() ethdb.Batch { return &spongeBatch{s} } +func (s *spongeDb) NewBatchWithSize(size int) ethdb.Batch { return &spongeBatch{s} } func (s *spongeDb) Stat(property string) (string, error) { panic("implement me") } func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") } func (s *spongeDb) Close() error { return nil } diff --git a/trie/iterator_test.go b/trie/iterator_test.go index 1ebd6a113..871a5f3fc 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -483,6 +483,10 @@ func (l *loggingDb) NewBatch() ethdb.Batch { return l.backend.NewBatch() } +func (l *loggingDb) NewBatchWithSize(size int) ethdb.Batch { + return l.backend.NewBatchWithSize(size) +} + func (l *loggingDb) NewIterator(prefix []byte, start []byte) ethdb.Iterator { fmt.Printf("NewIterator\n") return l.backend.NewIterator(prefix, start) diff --git a/trie/trie_test.go b/trie/trie_test.go index 20944bf8f..63aed333d 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -676,6 +676,7 @@ func (s *spongeDb) Has(key []byte) (bool, error) { panic("implement func (s *spongeDb) Get(key []byte) ([]byte, error) { return nil, errors.New("no such elem") } func (s *spongeDb) Delete(key []byte) error { panic("implement me") } func (s *spongeDb) NewBatch() ethdb.Batch { return &spongeBatch{s} } +func (s *spongeDb) NewBatchWithSize(size int) ethdb.Batch { return &spongeBatch{s} } func (s *spongeDb) Stat(property string) (string, error) { panic("implement me") } func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") } func (s *spongeDb) Close() error { return nil }