core, ethdb, tests, trie: implement NewBatchWithSize API for batcher (#24392)

This PR adds an addtional API called `NewBatchWithSize` for db
batcher. It turns out that leveldb batch memory allocation is
super inefficient. The main reason is the allocation step of
leveldb Batch is too small when the batch size is large. It can
take a few second to build a leveldb batch with 100MB size.

Luckily, leveldb also offers another API called MakeBatch which can
pre-allocate the memory area. So if the approximate size of batch is
known in advance, this API can be used in this case.

It's needed in new state scheme PR which needs to commit a batch of
trie nodes in a single batch. Implement the feature in a seperate PR.
This commit is contained in:
rjl493456442 2022-02-15 21:15:13 +08:00 committed by GitHub
parent 2056e596f2
commit 4d086430bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 30 additions and 0 deletions

@ -172,6 +172,11 @@ func (t *table) NewBatch() ethdb.Batch {
return &tableBatch{t.db.NewBatch(), t.prefix} return &tableBatch{t.db.NewBatch(), t.prefix}
} }
// NewBatchWithSize creates a write-only database batch with pre-allocated buffer.
func (t *table) NewBatchWithSize(size int) ethdb.Batch {
return &tableBatch{t.db.NewBatchWithSize(size), t.prefix}
}
// tableBatch is a wrapper around a database batch that prefixes each key access // tableBatch is a wrapper around a database batch that prefixes each key access
// with a pre-configured string. // with a pre-configured string.
type tableBatch struct { type tableBatch struct {

@ -43,6 +43,9 @@ type Batcher interface {
// NewBatch creates a write-only database that buffers changes to its host db // NewBatch creates a write-only database that buffers changes to its host db
// until a final write is called. // until a final write is called.
NewBatch() Batch NewBatch() Batch
// NewBatchWithSize creates a write-only database batch with pre-allocated buffer.
NewBatchWithSize(size int) Batch
} }
// HookedBatch wraps an arbitrary batch where each operation may be hooked into // HookedBatch wraps an arbitrary batch where each operation may be hooked into

@ -213,6 +213,14 @@ func (db *Database) NewBatch() ethdb.Batch {
} }
} }
// NewBatchWithSize creates a write-only database batch with pre-allocated buffer.
func (db *Database) NewBatchWithSize(size int) ethdb.Batch {
return &batch{
db: db.db,
b: leveldb.MakeBatch(size),
}
}
// NewIterator creates a binary-alphabetical iterator over a subset // NewIterator creates a binary-alphabetical iterator over a subset
// of database content with a particular key prefix, starting at a particular // of database content with a particular key prefix, starting at a particular
// initial key (or after, if it does not exist). // initial key (or after, if it does not exist).

@ -129,6 +129,13 @@ func (db *Database) NewBatch() ethdb.Batch {
} }
} }
// NewBatchWithSize creates a write-only database batch with pre-allocated buffer.
func (db *Database) NewBatchWithSize(size int) ethdb.Batch {
return &batch{
db: db,
}
}
// NewIterator creates a binary-alphabetical iterator over a subset // NewIterator creates a binary-alphabetical iterator over a subset
// of database content with a particular key prefix, starting at a particular // of database content with a particular key prefix, starting at a particular
// initial key (or after, if it does not exist). // initial key (or after, if it does not exist).

@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be found // Use of this source code is governed by a BSD-style license that can be found
// in the LICENSE file. // in the LICENSE file.
//go:build gofuzz
// +build gofuzz // +build gofuzz
package bn256 package bn256

@ -66,6 +66,7 @@ func (s *spongeDb) Has(key []byte) (bool, error) { panic("implement
func (s *spongeDb) Get(key []byte) ([]byte, error) { return nil, errors.New("no such elem") } func (s *spongeDb) Get(key []byte) ([]byte, error) { return nil, errors.New("no such elem") }
func (s *spongeDb) Delete(key []byte) error { panic("implement me") } func (s *spongeDb) Delete(key []byte) error { panic("implement me") }
func (s *spongeDb) NewBatch() ethdb.Batch { return &spongeBatch{s} } func (s *spongeDb) NewBatch() ethdb.Batch { return &spongeBatch{s} }
func (s *spongeDb) NewBatchWithSize(size int) ethdb.Batch { return &spongeBatch{s} }
func (s *spongeDb) Stat(property string) (string, error) { panic("implement me") } func (s *spongeDb) Stat(property string) (string, error) { panic("implement me") }
func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") } func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") }
func (s *spongeDb) Close() error { return nil } func (s *spongeDb) Close() error { return nil }

@ -470,6 +470,10 @@ func (l *loggingDb) NewBatch() ethdb.Batch {
return l.backend.NewBatch() return l.backend.NewBatch()
} }
func (l *loggingDb) NewBatchWithSize(size int) ethdb.Batch {
return l.backend.NewBatchWithSize(size)
}
func (l *loggingDb) NewIterator(prefix []byte, start []byte) ethdb.Iterator { func (l *loggingDb) NewIterator(prefix []byte, start []byte) ethdb.Iterator {
fmt.Printf("NewIterator\n") fmt.Printf("NewIterator\n")
return l.backend.NewIterator(prefix, start) return l.backend.NewIterator(prefix, start)

@ -675,6 +675,7 @@ func (s *spongeDb) Has(key []byte) (bool, error) { panic("implement
func (s *spongeDb) Get(key []byte) ([]byte, error) { return nil, errors.New("no such elem") } func (s *spongeDb) Get(key []byte) ([]byte, error) { return nil, errors.New("no such elem") }
func (s *spongeDb) Delete(key []byte) error { panic("implement me") } func (s *spongeDb) Delete(key []byte) error { panic("implement me") }
func (s *spongeDb) NewBatch() ethdb.Batch { return &spongeBatch{s} } func (s *spongeDb) NewBatch() ethdb.Batch { return &spongeBatch{s} }
func (s *spongeDb) NewBatchWithSize(size int) ethdb.Batch { return &spongeBatch{s} }
func (s *spongeDb) Stat(property string) (string, error) { panic("implement me") } func (s *spongeDb) Stat(property string) (string, error) { panic("implement me") }
func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") } func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") }
func (s *spongeDb) Close() error { return nil } func (s *spongeDb) Close() error { return nil }