From 542df8898ef6d718647058c129069804bc463ea5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Tue, 6 Aug 2019 13:40:28 +0300 Subject: [PATCH] core: initial version of state snapshots --- accounts/abi/bind/backends/simulated.go | 6 +- cmd/evm/runner.go | 4 +- cmd/geth/chaincmd.go | 2 +- core/blockchain.go | 56 ++-- core/chain_makers.go | 2 +- core/genesis.go | 4 +- core/rawdb/accessors_snapshot.go | 99 +++++++ core/rawdb/database.go | 8 + core/rawdb/schema.go | 23 +- core/state/snapshot/account.go | 54 ++++ core/state/snapshot/difflayer.go | 337 +++++++++++++++++++++++ core/state/snapshot/difflayer_journal.go | 140 ++++++++++ core/state/snapshot/disklayer.go | 115 ++++++++ core/state/snapshot/generate.go | 212 ++++++++++++++ core/state/snapshot/generate_test.go | 111 ++++++++ core/state/snapshot/snapshot.go | 244 ++++++++++++++++ core/state/snapshot/snapshot_test.go | 17 ++ core/state/snapshot/sort.go | 62 +++++ core/state/state_object.go | 59 +++- core/state/statedb.go | 129 +++++++-- core/state_prefetcher.go | 2 + core/vm/runtime/runtime.go | 4 +- core/vm/runtime/runtime_test.go | 4 +- eth/api_test.go | 6 +- eth/api_tracer.go | 6 +- eth/handler_test.go | 2 +- light/odr_test.go | 4 +- light/trie.go | 2 +- tests/state_test_util.go | 4 +- trie/iterator.go | 2 + 30 files changed, 1635 insertions(+), 85 deletions(-) create mode 100644 core/rawdb/accessors_snapshot.go create mode 100644 core/state/snapshot/account.go create mode 100644 core/state/snapshot/difflayer.go create mode 100644 core/state/snapshot/difflayer_journal.go create mode 100644 core/state/snapshot/disklayer.go create mode 100644 core/state/snapshot/generate.go create mode 100644 core/state/snapshot/generate_test.go create mode 100644 core/state/snapshot/snapshot.go create mode 100644 core/state/snapshot/snapshot_test.go create mode 100644 core/state/snapshot/sort.go diff --git a/accounts/abi/bind/backends/simulated.go b/accounts/abi/bind/backends/simulated.go index f7f3dec839..2dbc593569 100644 --- a/accounts/abi/bind/backends/simulated.go +++ b/accounts/abi/bind/backends/simulated.go @@ -124,7 +124,7 @@ func (b *SimulatedBackend) rollback() { statedb, _ := b.blockchain.State() b.pendingBlock = blocks[0] - b.pendingState, _ = state.New(b.pendingBlock.Root(), statedb.Database()) + b.pendingState, _ = state.New(b.pendingBlock.Root(), statedb.Database(), nil) } // stateByBlockNumber retrieves a state by a given blocknumber. @@ -480,7 +480,7 @@ func (b *SimulatedBackend) SendTransaction(ctx context.Context, tx *types.Transa statedb, _ := b.blockchain.State() b.pendingBlock = blocks[0] - b.pendingState, _ = state.New(b.pendingBlock.Root(), statedb.Database()) + b.pendingState, _ = state.New(b.pendingBlock.Root(), statedb.Database(), nil) return nil } @@ -593,7 +593,7 @@ func (b *SimulatedBackend) AdjustTime(adjustment time.Duration) error { statedb, _ := b.blockchain.State() b.pendingBlock = blocks[0] - b.pendingState, _ = state.New(b.pendingBlock.Root(), statedb.Database()) + b.pendingState, _ = state.New(b.pendingBlock.Root(), statedb.Database(), nil) return nil } diff --git a/cmd/evm/runner.go b/cmd/evm/runner.go index da301ff5ee..0a9c19f5bc 100644 --- a/cmd/evm/runner.go +++ b/cmd/evm/runner.go @@ -129,10 +129,10 @@ func runCmd(ctx *cli.Context) error { genesisConfig = gen db := rawdb.NewMemoryDatabase() genesis := gen.ToBlock(db) - statedb, _ = state.New(genesis.Root(), state.NewDatabase(db)) + statedb, _ = state.New(genesis.Root(), state.NewDatabase(db), nil) chainConfig = gen.Config } else { - statedb, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase())) + statedb, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil) genesisConfig = new(core.Genesis) } if ctx.GlobalString(SenderFlag.Name) != "" { diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index 5b176b6da1..9d4835a16c 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -544,7 +544,7 @@ func dump(ctx *cli.Context) error { fmt.Println("{}") utils.Fatalf("block not found") } else { - state, err := state.New(block.Root(), state.NewDatabase(chainDb)) + state, err := state.New(block.Root(), state.NewDatabase(chainDb), nil) if err != nil { utils.Fatalf("could not create new state: %v", err) } diff --git a/core/blockchain.go b/core/blockchain.go index d7fcbd5e31..676a72c779 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -34,6 +34,7 @@ import ( "github.com/ethereum/go-ethereum/consensus" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" + "github.com/ethereum/go-ethereum/core/state/snapshot" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/ethdb" @@ -61,6 +62,10 @@ var ( storageUpdateTimer = metrics.NewRegisteredTimer("chain/storage/updates", nil) storageCommitTimer = metrics.NewRegisteredTimer("chain/storage/commits", nil) + snapshotAccountReadTimer = metrics.NewRegisteredTimer("chain/snapshot/accountreads", nil) + snapshotStorageReadTimer = metrics.NewRegisteredTimer("chain/snapshot/storagereads", nil) + snapshotCommitTimer = metrics.NewRegisteredTimer("chain/snapshot/commits", nil) + blockInsertTimer = metrics.NewRegisteredTimer("chain/inserts", nil) blockValidationTimer = metrics.NewRegisteredTimer("chain/validation", nil) blockExecutionTimer = metrics.NewRegisteredTimer("chain/execution", nil) @@ -135,9 +140,10 @@ type BlockChain struct { chainConfig *params.ChainConfig // Chain & network configuration cacheConfig *CacheConfig // Cache configuration for pruning - db ethdb.Database // Low level persistent database to store final content in - triegc *prque.Prque // Priority queue mapping block numbers to tries to gc - gcproc time.Duration // Accumulates canonical block processing for trie dumping + db ethdb.Database // Low level persistent database to store final content in + snaps *snapshot.SnapshotTree // Snapshot tree for fast trie leaf access + triegc *prque.Prque // Priority queue mapping block numbers to tries to gc + gcproc time.Duration // Accumulates canonical block processing for trie dumping hc *HeaderChain rmLogsFeed event.Feed @@ -293,6 +299,11 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par } } } + // Load any existing snapshot, regenerating it if loading failed + head := bc.CurrentBlock() + if bc.snaps, err = snapshot.New(bc.db, "snapshot.rlp", head.NumberU64(), head.Root()); err != nil { + return nil, err + } // Take ownership of this particular state go bc.update() return bc, nil @@ -339,7 +350,7 @@ func (bc *BlockChain) loadLastState() error { return bc.Reset() } // Make sure the state associated with the block is available - if _, err := state.New(currentBlock.Root(), bc.stateCache); err != nil { + if _, err := state.New(currentBlock.Root(), bc.stateCache, bc.snaps); err != nil { // Dangling block without a state associated, init from scratch log.Warn("Head state missing, repairing chain", "number", currentBlock.Number(), "hash", currentBlock.Hash()) if err := bc.repair(¤tBlock); err != nil { @@ -401,7 +412,7 @@ func (bc *BlockChain) SetHead(head uint64) error { if newHeadBlock == nil { newHeadBlock = bc.genesisBlock } else { - if _, err := state.New(newHeadBlock.Root(), bc.stateCache); err != nil { + if _, err := state.New(newHeadBlock.Root(), bc.stateCache, bc.snaps); err != nil { // Rewound state missing, rolled back to before pivot, reset to genesis newHeadBlock = bc.genesisBlock } @@ -524,7 +535,7 @@ func (bc *BlockChain) State() (*state.StateDB, error) { // StateAt returns a new mutable state based on a particular point in time. func (bc *BlockChain) StateAt(root common.Hash) (*state.StateDB, error) { - return state.New(root, bc.stateCache) + return state.New(root, bc.stateCache, bc.snaps) } // StateCache returns the caching database underpinning the blockchain instance. @@ -576,7 +587,7 @@ func (bc *BlockChain) ResetWithGenesisBlock(genesis *types.Block) error { func (bc *BlockChain) repair(head **types.Block) error { for { // Abort if we've rewound to a head block that does have associated state - if _, err := state.New((*head).Root(), bc.stateCache); err == nil { + if _, err := state.New((*head).Root(), bc.stateCache, bc.snaps); err == nil { log.Info("Rewound blockchain to past state", "number", (*head).Number(), "hash", (*head).Hash()) return nil } @@ -839,6 +850,10 @@ func (bc *BlockChain) Stop() { bc.wg.Wait() + // Ensure that the entirety of the state snapshot is journalled to disk. + if err := bc.snaps.Journal(bc.CurrentBlock().Root()); err != nil { + log.Error("Failed to journal state snapshot", "err", err) + } // Ensure the state of a recent block is also stored to disk before exiting. // We're writing three different states to catch different restart scenarios: // - HEAD: So we don't need to reprocess any blocks in the general case @@ -1647,7 +1662,7 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er if parent == nil { parent = bc.GetHeader(block.ParentHash(), block.NumberU64()-1) } - statedb, err := state.New(parent.Root, bc.stateCache) + statedb, err := state.New(parent.Root, bc.stateCache, bc.snaps) if err != nil { return it.index, err } @@ -1656,9 +1671,9 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er var followupInterrupt uint32 if !bc.cacheConfig.TrieCleanNoPrefetch { if followup, err := it.peek(); followup != nil && err == nil { - throwaway, _ := state.New(parent.Root, bc.stateCache) + throwaway, _ := state.New(parent.Root, bc.stateCache, bc.snaps) go func(start time.Time, followup *types.Block, throwaway *state.StateDB, interrupt *uint32) { - bc.prefetcher.Prefetch(followup, throwaway, bc.vmConfig, interrupt) + bc.prefetcher.Prefetch(followup, throwaway, bc.vmConfig, &followupInterrupt) blockPrefetchExecuteTimer.Update(time.Since(start)) if atomic.LoadUint32(interrupt) == 1 { @@ -1676,14 +1691,16 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er return it.index, err } // Update the metrics touched during block processing - accountReadTimer.Update(statedb.AccountReads) // Account reads are complete, we can mark them - storageReadTimer.Update(statedb.StorageReads) // Storage reads are complete, we can mark them - accountUpdateTimer.Update(statedb.AccountUpdates) // Account updates are complete, we can mark them - storageUpdateTimer.Update(statedb.StorageUpdates) // Storage updates are complete, we can mark them + accountReadTimer.Update(statedb.AccountReads) // Account reads are complete, we can mark them + storageReadTimer.Update(statedb.StorageReads) // Storage reads are complete, we can mark them + accountUpdateTimer.Update(statedb.AccountUpdates) // Account updates are complete, we can mark them + storageUpdateTimer.Update(statedb.StorageUpdates) // Storage updates are complete, we can mark them + snapshotAccountReadTimer.Update(statedb.SnapshotAccountReads) // Account reads are complete, we can mark them + snapshotStorageReadTimer.Update(statedb.SnapshotStorageReads) // Storage reads are complete, we can mark them triehash := statedb.AccountHashes + statedb.StorageHashes // Save to not double count in validation - trieproc := statedb.AccountReads + statedb.AccountUpdates - trieproc += statedb.StorageReads + statedb.StorageUpdates + trieproc := statedb.SnapshotAccountReads + statedb.AccountReads + statedb.AccountUpdates + trieproc += statedb.SnapshotStorageReads + statedb.StorageReads + statedb.StorageUpdates blockExecutionTimer.Update(time.Since(substart) - trieproc - triehash) @@ -1712,10 +1729,11 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er atomic.StoreUint32(&followupInterrupt, 1) // Update the metrics touched during block commit - accountCommitTimer.Update(statedb.AccountCommits) // Account commits are complete, we can mark them - storageCommitTimer.Update(statedb.StorageCommits) // Storage commits are complete, we can mark them + accountCommitTimer.Update(statedb.AccountCommits) // Account commits are complete, we can mark them + storageCommitTimer.Update(statedb.StorageCommits) // Storage commits are complete, we can mark them + snapshotCommitTimer.Update(statedb.SnapshotCommits) // Snapshot commits are complete, we can mark them - blockWriteTimer.Update(time.Since(substart) - statedb.AccountCommits - statedb.StorageCommits) + blockWriteTimer.Update(time.Since(substart) - statedb.AccountCommits - statedb.StorageCommits - statedb.SnapshotCommits) blockInsertTimer.UpdateSince(start) switch status { diff --git a/core/chain_makers.go b/core/chain_makers.go index fc4f7d182d..6524087d4e 100644 --- a/core/chain_makers.go +++ b/core/chain_makers.go @@ -228,7 +228,7 @@ func GenerateChain(config *params.ChainConfig, parent *types.Block, engine conse return nil, nil } for i := 0; i < n; i++ { - statedb, err := state.New(parent.Root(), state.NewDatabase(db)) + statedb, err := state.New(parent.Root(), state.NewDatabase(db), nil) if err != nil { panic(err) } diff --git a/core/genesis.go b/core/genesis.go index 92e654da83..06d347f736 100644 --- a/core/genesis.go +++ b/core/genesis.go @@ -178,7 +178,7 @@ func SetupGenesisBlockWithOverride(db ethdb.Database, genesis *Genesis, override // We have the genesis block in database(perhaps in ancient database) // but the corresponding state is missing. header := rawdb.ReadHeader(db, stored, 0) - if _, err := state.New(header.Root, state.NewDatabaseWithCache(db, 0)); err != nil { + if _, err := state.New(header.Root, state.NewDatabaseWithCache(db, 0), nil); err != nil { if genesis == nil { genesis = DefaultGenesisBlock() } @@ -259,7 +259,7 @@ func (g *Genesis) ToBlock(db ethdb.Database) *types.Block { if db == nil { db = rawdb.NewMemoryDatabase() } - statedb, _ := state.New(common.Hash{}, state.NewDatabase(db)) + statedb, _ := state.New(common.Hash{}, state.NewDatabase(db), nil) for addr, account := range g.Alloc { statedb.AddBalance(addr, account.Balance) statedb.SetCode(addr, account.Code) diff --git a/core/rawdb/accessors_snapshot.go b/core/rawdb/accessors_snapshot.go new file mode 100644 index 0000000000..9989e6b50e --- /dev/null +++ b/core/rawdb/accessors_snapshot.go @@ -0,0 +1,99 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package rawdb + +import ( + "encoding/binary" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" +) + +// ReadSnapshotBlock retrieves the number and root of the block whose state is +// contained in the persisted snapshot. +func ReadSnapshotBlock(db ethdb.KeyValueReader) (uint64, common.Hash) { + data, _ := db.Get(snapshotBlockKey) + if len(data) != 8+common.HashLength { + return 0, common.Hash{} + } + return binary.BigEndian.Uint64(data[:8]), common.BytesToHash(data[8:]) +} + +// WriteSnapshotBlock stores the number and root of the block whose state is +// contained in the persisted snapshot. +func WriteSnapshotBlock(db ethdb.KeyValueWriter, number uint64, root common.Hash) { + if err := db.Put(snapshotBlockKey, append(encodeBlockNumber(number), root.Bytes()...)); err != nil { + log.Crit("Failed to store snapsnot block's number and root", "err", err) + } +} + +// DeleteSnapshotBlock deletes the number and hash of the block whose state is +// contained in the persisted snapshot. Since snapshots are not immutable, this +// method can be used during updates, so a crash or failure will mark the entire +// snapshot invalid. +func DeleteSnapshotBlock(db ethdb.KeyValueWriter) { + if err := db.Delete(snapshotBlockKey); err != nil { + log.Crit("Failed to remove snapsnot block's number and hash", "err", err) + } +} + +// ReadAccountSnapshot retrieves the snapshot entry of an account trie leaf. +func ReadAccountSnapshot(db ethdb.KeyValueReader, hash common.Hash) []byte { + data, _ := db.Get(accountSnapshotKey(hash)) + return data +} + +// WriteAccountSnapshot stores the snapshot entry of an account trie leaf. +func WriteAccountSnapshot(db ethdb.KeyValueWriter, hash common.Hash, entry []byte) { + if err := db.Put(accountSnapshotKey(hash), entry); err != nil { + log.Crit("Failed to store account snapshot", "err", err) + } +} + +// DeleteAccountSnapshot removes the snapshot entry of an account trie leaf. +func DeleteAccountSnapshot(db ethdb.KeyValueWriter, hash common.Hash) { + if err := db.Delete(accountSnapshotKey(hash)); err != nil { + log.Crit("Failed to delete account snapshot", "err", err) + } +} + +// ReadStorageSnapshot retrieves the snapshot entry of an storage trie leaf. +func ReadStorageSnapshot(db ethdb.KeyValueReader, accountHash, storageHash common.Hash) []byte { + data, _ := db.Get(storageSnapshotKey(accountHash, storageHash)) + return data +} + +// WriteStorageSnapshot stores the snapshot entry of an storage trie leaf. +func WriteStorageSnapshot(db ethdb.KeyValueWriter, accountHash, storageHash common.Hash, entry []byte) { + if err := db.Put(storageSnapshotKey(accountHash, storageHash), entry); err != nil { + log.Crit("Failed to store storage snapshot", "err", err) + } +} + +// DeleteStorageSnapshot removes the snapshot entry of an storage trie leaf. +func DeleteStorageSnapshot(db ethdb.KeyValueWriter, accountHash, storageHash common.Hash) { + if err := db.Delete(storageSnapshotKey(accountHash, storageHash)); err != nil { + log.Crit("Failed to delete storage snapshot", "err", err) + } +} + +// IterateStorageSnapshots returns an iterator for walking the entire storage +// space of a specific account. +func IterateStorageSnapshots(db ethdb.Iteratee, accountHash common.Hash) ethdb.Iterator { + return db.NewIteratorWithPrefix(storageSnapshotsKey(accountHash)) +} diff --git a/core/rawdb/database.go b/core/rawdb/database.go index 838c084359..7abd07359f 100644 --- a/core/rawdb/database.go +++ b/core/rawdb/database.go @@ -239,6 +239,8 @@ func InspectDatabase(db ethdb.Database) error { hashNumPairing common.StorageSize trieSize common.StorageSize txlookupSize common.StorageSize + accountSnapSize common.StorageSize + storageSnapSize common.StorageSize preimageSize common.StorageSize bloomBitsSize common.StorageSize cliqueSnapsSize common.StorageSize @@ -280,6 +282,10 @@ func InspectDatabase(db ethdb.Database) error { receiptSize += size case bytes.HasPrefix(key, txLookupPrefix) && len(key) == (len(txLookupPrefix)+common.HashLength): txlookupSize += size + case bytes.HasPrefix(key, StateSnapshotPrefix) && len(key) == (len(StateSnapshotPrefix)+common.HashLength): + accountSnapSize += size + case bytes.HasPrefix(key, StateSnapshotPrefix) && len(key) == (len(StateSnapshotPrefix)+2*common.HashLength): + storageSnapSize += size case bytes.HasPrefix(key, preimagePrefix) && len(key) == (len(preimagePrefix)+common.HashLength): preimageSize += size case bytes.HasPrefix(key, bloomBitsPrefix) && len(key) == (len(bloomBitsPrefix)+10+common.HashLength): @@ -331,6 +337,8 @@ func InspectDatabase(db ethdb.Database) error { {"Key-Value store", "Bloombit index", bloomBitsSize.String()}, {"Key-Value store", "Trie nodes", trieSize.String()}, {"Key-Value store", "Trie preimages", preimageSize.String()}, + {"Key-Value store", "Account snapshot", accountSnapSize.String()}, + {"Key-Value store", "Storage snapshot", storageSnapSize.String()}, {"Key-Value store", "Clique snapshots", cliqueSnapsSize.String()}, {"Key-Value store", "Singleton metadata", metadata.String()}, {"Ancient store", "Headers", ancientHeaders.String()}, diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index a44a2c99f9..8e611246a1 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -41,6 +41,9 @@ var ( // fastTrieProgressKey tracks the number of trie entries imported during fast sync. fastTrieProgressKey = []byte("TrieSync") + // snapshotBlockKey tracks the number and hash of the last snapshot. + snapshotBlockKey = []byte("SnapshotBlock") + // Data item prefixes (use single byte to avoid mixing data types, avoid `i`, used for indexes). headerPrefix = []byte("h") // headerPrefix + num (uint64 big endian) + hash -> header headerTDSuffix = []byte("t") // headerPrefix + num (uint64 big endian) + hash + headerTDSuffix -> td @@ -50,8 +53,9 @@ var ( blockBodyPrefix = []byte("b") // blockBodyPrefix + num (uint64 big endian) + hash -> block body blockReceiptsPrefix = []byte("r") // blockReceiptsPrefix + num (uint64 big endian) + hash -> block receipts - txLookupPrefix = []byte("l") // txLookupPrefix + hash -> transaction/receipt lookup metadata - bloomBitsPrefix = []byte("B") // bloomBitsPrefix + bit (uint16 big endian) + section (uint64 big endian) + hash -> bloom bits + txLookupPrefix = []byte("l") // txLookupPrefix + hash -> transaction/receipt lookup metadata + bloomBitsPrefix = []byte("B") // bloomBitsPrefix + bit (uint16 big endian) + section (uint64 big endian) + hash -> bloom bits + StateSnapshotPrefix = []byte("s") // StateSnapshotPrefix + account hash [+ storage hash] -> account/storage trie value preimagePrefix = []byte("secure-key-") // preimagePrefix + hash -> preimage configPrefix = []byte("ethereum-config-") // config prefix for the db @@ -145,6 +149,21 @@ func txLookupKey(hash common.Hash) []byte { return append(txLookupPrefix, hash.Bytes()...) } +// accountSnapshotKey = StateSnapshotPrefix + hash +func accountSnapshotKey(hash common.Hash) []byte { + return append(StateSnapshotPrefix, hash.Bytes()...) +} + +// storageSnapshotKey = StateSnapshotPrefix + account hash + storage hash +func storageSnapshotKey(accountHash, storageHash common.Hash) []byte { + return append(append(StateSnapshotPrefix, accountHash.Bytes()...), storageHash.Bytes()...) +} + +// storageSnapshotsKey = StateSnapshotPrefix + account hash + storage hash +func storageSnapshotsKey(accountHash common.Hash) []byte { + return append(StateSnapshotPrefix, accountHash.Bytes()...) +} + // bloomBitsKey = bloomBitsPrefix + bit (uint16 big endian) + section (uint64 big endian) + hash func bloomBitsKey(bit uint, section uint64, hash common.Hash) []byte { key := append(append(bloomBitsPrefix, make([]byte, 10)...), hash.Bytes()...) diff --git a/core/state/snapshot/account.go b/core/state/snapshot/account.go new file mode 100644 index 0000000000..1068dc2a01 --- /dev/null +++ b/core/state/snapshot/account.go @@ -0,0 +1,54 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "bytes" + "math/big" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" +) + +// Account is a slim version of a state.Account, where the root and code hash +// are replaced with a nil byte slice for empty accounts. +type Account struct { + Nonce uint64 + Balance *big.Int + Root []byte + CodeHash []byte +} + +// AccountRLP converts a state.Account content into a slim snapshot version RLP +// encoded. +func AccountRLP(nonce uint64, balance *big.Int, root common.Hash, codehash []byte) []byte { + slim := Account{ + Nonce: nonce, + Balance: balance, + } + if root != emptyRoot { + slim.Root = root[:] + } + if !bytes.Equal(codehash, emptyCode[:]) { + slim.CodeHash = codehash + } + data, err := rlp.EncodeToBytes(slim) + if err != nil { + panic(err) + } + return data +} diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go new file mode 100644 index 0000000000..f163feb561 --- /dev/null +++ b/core/state/snapshot/difflayer.go @@ -0,0 +1,337 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "fmt" + "sort" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" +) + +// diffLayer represents a collection of modifications made to a state snapshot +// after running a block on top. It contains one sorted list for the account trie +// and one-one list for each storage tries. +// +// The goal of a diff layer is to act as a journal, tracking recent modifications +// made to the state, that have not yet graduated into a semi-immutable state. +type diffLayer struct { + parent snapshot // Parent snapshot modified by this one, never nil + memory uint64 // Approximate guess as to how much memory we use + + number uint64 // Block number to which this snapshot diff belongs to + root common.Hash // Root hash to which this snapshot diff belongs to + + accountList []common.Hash // List of account for iteration, might not be sorted yet (lazy) + accountSorted bool // Flag whether the account list has alreayd been sorted or not + accountData map[common.Hash][]byte // Keyed accounts for direct retrival (nil means deleted) + storageList map[common.Hash][]common.Hash // List of storage slots for iterated retrievals, one per account + storageSorted map[common.Hash]bool // Flag whether the storage slot list has alreayd been sorted or not + storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrival. one per account (nil means deleted) + + lock sync.RWMutex +} + +// newDiffLayer creates a new diff on top of an existing snapshot, whether that's a low +// level persistent database or a hierarchical diff already. +func newDiffLayer(parent snapshot, number uint64, root common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer { + // Create the new layer with some pre-allocated data segments + dl := &diffLayer{ + parent: parent, + number: number, + root: root, + accountData: accounts, + storageData: storage, + } + // Fill the account hashes and sort them for the iterator + accountList := make([]common.Hash, 0, len(accounts)) + for hash, data := range accounts { + accountList = append(accountList, hash) + dl.memory += uint64(len(data)) + } + sort.Sort(hashes(accountList)) + dl.accountList = accountList + dl.accountSorted = true + + dl.memory += uint64(len(dl.accountList) * common.HashLength) + + // Fill the storage hashes and sort them for the iterator + dl.storageList = make(map[common.Hash][]common.Hash, len(storage)) + dl.storageSorted = make(map[common.Hash]bool, len(storage)) + + for accountHash, slots := range storage { + // If the slots are nil, sanity check that it's a deleted account + if slots == nil { + // Ensure that the account was just marked as deleted + if account, ok := accounts[accountHash]; account != nil || !ok { + panic(fmt.Sprintf("storage in %#x nil, but account conflicts (%#x, exists: %v)", accountHash, account, ok)) + } + // Everything ok, store the deletion mark and continue + dl.storageList[accountHash] = nil + continue + } + // Storage slots are not nil so entire contract was not deleted, ensure the + // account was just updated. + if account, ok := accounts[accountHash]; account == nil || !ok { + log.Error(fmt.Sprintf("storage in %#x exists, but account nil (exists: %v)", accountHash, ok)) + //panic(fmt.Sprintf("storage in %#x exists, but account nil (exists: %v)", accountHash, ok)) + } + // Fill the storage hashes for this account and sort them for the iterator + storageList := make([]common.Hash, 0, len(slots)) + for storageHash, data := range slots { + storageList = append(storageList, storageHash) + dl.memory += uint64(len(data)) + } + sort.Sort(hashes(storageList)) + dl.storageList[accountHash] = storageList + dl.storageSorted[accountHash] = true + + dl.memory += uint64(len(storageList) * common.HashLength) + } + dl.memory += uint64(len(dl.storageList) * common.HashLength) + + return dl +} + +// Info returns the block number and root hash for which this snapshot was made. +func (dl *diffLayer) Info() (uint64, common.Hash) { + return dl.number, dl.root +} + +// Account directly retrieves the account associated with a particular hash in +// the snapshot slim data format. +func (dl *diffLayer) Account(hash common.Hash) *Account { + data := dl.AccountRLP(hash) + if len(data) == 0 { // can be both nil and []byte{} + return nil + } + account := new(Account) + if err := rlp.DecodeBytes(data, account); err != nil { + panic(err) + } + return account +} + +// AccountRLP directly retrieves the account RLP associated with a particular +// hash in the snapshot slim data format. +func (dl *diffLayer) AccountRLP(hash common.Hash) []byte { + dl.lock.RLock() + defer dl.lock.RUnlock() + + // If the account is known locally, return it. Note, a nil account means it was + // deleted, and is a different notion than an unknown account! + if data, ok := dl.accountData[hash]; ok { + return data + } + // Account unknown to this diff, resolve from parent + return dl.parent.AccountRLP(hash) +} + +// Storage directly retrieves the storage data associated with a particular hash, +// within a particular account. If the slot is unknown to this diff, it's parent +// is consulted. +func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) []byte { + dl.lock.RLock() + defer dl.lock.RUnlock() + + // If the account is known locally, try to resolve the slot locally. Note, a nil + // account means it was deleted, and is a different notion than an unknown account! + if storage, ok := dl.storageData[accountHash]; ok { + if storage == nil { + return nil + } + if data, ok := storage[storageHash]; ok { + return data + } + } + // Account - or slot within - unknown to this diff, resolve from parent + return dl.parent.Storage(accountHash, storageHash) +} + +// Update creates a new layer on top of the existing snapshot diff tree with +// the specified data items. +func (dl *diffLayer) Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer { + return newDiffLayer(dl, dl.number+1, blockRoot, accounts, storage) +} + +// Cap traverses downwards the diff tree until the number of allowed layers are +// crossed. All diffs beyond the permitted number are flattened downwards. If +// the layer limit is reached, memory cap is also enforced (but not before). The +// block numbers for the disk layer and first diff layer are returned for GC. +func (dl *diffLayer) Cap(layers int, memory uint64) (uint64, uint64) { + // Dive until we run out of layers or reach the persistent database + if layers > 2 { + // If we still have diff layers below, recurse + if parent, ok := dl.parent.(*diffLayer); ok { + return parent.Cap(layers-1, memory) + } + // Diff stack too shallow, return block numbers without modifications + return dl.parent.(*diskLayer).number, dl.number + } + // We're out of layers, flatten anything below, stopping if it's the disk or if + // the memory limit is not yet exceeded. + switch parent := dl.parent.(type) { + case *diskLayer: + return parent.number, dl.number + case *diffLayer: + dl.lock.Lock() + defer dl.lock.Unlock() + + dl.parent = parent.flatten() + if dl.parent.(*diffLayer).memory < memory { + diskNumber, _ := parent.parent.Info() + return diskNumber, parent.number + } + default: + panic(fmt.Sprintf("unknown data layer: %T", parent)) + } + // If the bottommost layer is larger than our memory cap, persist to disk + var ( + parent = dl.parent.(*diffLayer) + base = parent.parent.(*diskLayer) + batch = base.db.NewBatch() + ) + parent.lock.RLock() + defer parent.lock.RUnlock() + + // Start by temporarilly deleting the current snapshot block marker. This + // ensures that in the case of a crash, the entire snapshot is invalidated. + rawdb.DeleteSnapshotBlock(batch) + + // Push all the accounts into the database + for hash, data := range parent.accountData { + if len(data) > 0 { + // Account was updated, push to disk + rawdb.WriteAccountSnapshot(batch, hash, data) + base.cache.Set(string(hash[:]), data) + + if batch.ValueSize() > ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + log.Crit("Failed to write account snapshot", "err", err) + } + batch.Reset() + } + } else { + // Account was deleted, remove all storage slots too + rawdb.DeleteAccountSnapshot(batch, hash) + base.cache.Set(string(hash[:]), nil) + + it := rawdb.IterateStorageSnapshots(base.db, hash) + for it.Next() { + if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator + batch.Delete(key) + base.cache.Delete(string(key[1:])) + } + } + it.Release() + } + } + // Push all the storage slots into the database + for accountHash, storage := range parent.storageData { + for storageHash, data := range storage { + if len(data) > 0 { + rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data) + base.cache.Set(string(append(accountHash[:], storageHash[:]...)), data) + } else { + rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash) + base.cache.Set(string(append(accountHash[:], storageHash[:]...)), nil) + } + } + if batch.ValueSize() > ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + log.Crit("Failed to write storage snapshot", "err", err) + } + batch.Reset() + } + } + // Update the snapshot block marker and write any remainder data + base.number, base.root = parent.number, parent.root + + rawdb.WriteSnapshotBlock(batch, base.number, base.root) + if err := batch.Write(); err != nil { + log.Crit("Failed to write leftover snapshot", "err", err) + } + dl.parent = base + + return base.number, dl.number +} + +// flatten pushes all data from this point downwards, flattening everything into +// a single diff at the bottom. Since usually the lowermost diff is the largest, +// the flattening bulds up from there in reverse. +func (dl *diffLayer) flatten() snapshot { + // If the parent is not diff, we're the first in line, return unmodified + parent, ok := dl.parent.(*diffLayer) + if !ok { + return dl + } + // Parent is a diff, flatten it first (note, apart from weird corned cases, + // flatten will realistically only ever merge 1 layer, so there's no need to + // be smarter about grouping flattens together). + parent = parent.flatten().(*diffLayer) + + // Overwrite all the updated accounts blindly, merge the sorted list + for hash, data := range dl.accountData { + parent.accountData[hash] = data + } + parent.accountList = append(parent.accountList, dl.accountList...) // TODO(karalabe): dedup!! + parent.accountSorted = false + + // Overwrite all the updates storage slots (individually) + for accountHash, storage := range dl.storageData { + // If storage didn't exist (or was deleted) in the parent; or if the storage + // was freshly deleted in the child, overwrite blindly + if parent.storageData[accountHash] == nil || storage == nil { + parent.storageList[accountHash] = dl.storageList[accountHash] + parent.storageData[accountHash] = storage + continue + } + // Storage exists in both parent and child, merge the slots + comboData := parent.storageData[accountHash] + for storageHash, data := range storage { + comboData[storageHash] = data + } + parent.storageData[accountHash] = comboData + parent.storageList[accountHash] = append(parent.storageList[accountHash], dl.storageList[accountHash]...) // TODO(karalabe): dedup!! + parent.storageSorted[accountHash] = false + } + // Return the combo parent + parent.number = dl.number + parent.root = dl.root + parent.memory += dl.memory + return parent +} + +// Journal commits an entire diff hierarchy to disk into a single journal file. +// This is meant to be used during shutdown to persist the snapshot without +// flattening everything down (bad for reorgs). +func (dl *diffLayer) Journal() error { + dl.lock.RLock() + defer dl.lock.RUnlock() + + writer, err := dl.journal() + if err != nil { + return err + } + writer.Close() + return nil +} diff --git a/core/state/snapshot/difflayer_journal.go b/core/state/snapshot/difflayer_journal.go new file mode 100644 index 0000000000..844ee88592 --- /dev/null +++ b/core/state/snapshot/difflayer_journal.go @@ -0,0 +1,140 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "fmt" + "io" + "os" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" +) + +// journalAccount is an account entry in a diffLayer's disk journal. +type journalAccount struct { + Hash common.Hash + Blob []byte +} + +// journalStorage is an account's storage map in a diffLayer's disk journal. +type journalStorage struct { + Hash common.Hash + Keys []common.Hash + Vals [][]byte +} + +// loadDiffLayer reads the next sections of a snapshot journal, reconstructing a new +// diff and verifying that it can be linked to the requested parent. +func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) { + // Read the next diff journal entry + var ( + number uint64 + root common.Hash + ) + if err := r.Decode(&number); err != nil { + // The first read may fail with EOF, marking the end of the journal + if err == io.EOF { + return parent, nil + } + return nil, fmt.Errorf("load diff number: %v", err) + } + if err := r.Decode(&root); err != nil { + return nil, fmt.Errorf("load diff root: %v", err) + } + var accounts []journalAccount + if err := r.Decode(&accounts); err != nil { + return nil, fmt.Errorf("load diff accounts: %v", err) + } + accountData := make(map[common.Hash][]byte) + for _, entry := range accounts { + accountData[entry.Hash] = entry.Blob + } + var storage []journalStorage + if err := r.Decode(&storage); err != nil { + return nil, fmt.Errorf("load diff storage: %v", err) + } + storageData := make(map[common.Hash]map[common.Hash][]byte) + for _, entry := range storage { + slots := make(map[common.Hash][]byte) + for i, key := range entry.Keys { + slots[key] = entry.Vals[i] + } + storageData[entry.Hash] = slots + } + // Validate the block number to avoid state corruption + if parent, ok := parent.(*diffLayer); ok { + if number != parent.number+1 { + return nil, fmt.Errorf("snapshot chain broken: block #%d after #%d", number, parent.number) + } + } + return loadDiffLayer(newDiffLayer(parent, number, root, accountData, storageData), r) +} + +// journal is the internal version of Journal that also returns the journal file +// so subsequent layers know where to write to. +func (dl *diffLayer) journal() (io.WriteCloser, error) { + // If we've reached the bottom, open the journal + var writer io.WriteCloser + if parent, ok := dl.parent.(*diskLayer); ok { + file, err := os.Create(parent.journal) + if err != nil { + return nil, err + } + writer = file + } + // If we haven't reached the bottom yet, journal the parent first + if writer == nil { + file, err := dl.parent.(*diffLayer).journal() + if err != nil { + return nil, err + } + writer = file + } + // Everything below was journalled, persist this layer too + if err := rlp.Encode(writer, dl.number); err != nil { + writer.Close() + return nil, err + } + if err := rlp.Encode(writer, dl.root); err != nil { + writer.Close() + return nil, err + } + accounts := make([]journalAccount, 0, len(dl.accountData)) + for hash, blob := range dl.accountData { + accounts = append(accounts, journalAccount{Hash: hash, Blob: blob}) + } + if err := rlp.Encode(writer, accounts); err != nil { + writer.Close() + return nil, err + } + storage := make([]journalStorage, 0, len(dl.storageData)) + for hash, slots := range dl.storageData { + keys := make([]common.Hash, 0, len(slots)) + vals := make([][]byte, 0, len(slots)) + for key, val := range slots { + keys = append(keys, key) + vals = append(vals, val) + } + storage = append(storage, journalStorage{Hash: hash, Keys: keys, Vals: vals}) + } + if err := rlp.Encode(writer, storage); err != nil { + writer.Close() + return nil, err + } + return writer, nil +} diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go new file mode 100644 index 0000000000..0406d298fb --- /dev/null +++ b/core/state/snapshot/disklayer.go @@ -0,0 +1,115 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "github.com/allegro/bigcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/rlp" +) + +// diskLayer is a low level persistent snapshot built on top of a key-value store. +type diskLayer struct { + journal string // Path of the snapshot journal to use on shutdown + db ethdb.KeyValueStore // Key-value store containing the base snapshot + cache *bigcache.BigCache // Cache to avoid hitting the disk for direct access + + number uint64 // Block number of the base snapshot + root common.Hash // Root hash of the base snapshot +} + +// Info returns the block number and root hash for which this snapshot was made. +func (dl *diskLayer) Info() (uint64, common.Hash) { + return dl.number, dl.root +} + +// Account directly retrieves the account associated with a particular hash in +// the snapshot slim data format. +func (dl *diskLayer) Account(hash common.Hash) *Account { + data := dl.AccountRLP(hash) + if len(data) == 0 { // can be both nil and []byte{} + return nil + } + account := new(Account) + if err := rlp.DecodeBytes(data, account); err != nil { + panic(err) + } + return account +} + +// AccountRLP directly retrieves the account RLP associated with a particular +// hash in the snapshot slim data format. +func (dl *diskLayer) AccountRLP(hash common.Hash) []byte { + key := string(hash[:]) + + // Try to retrieve the account from the memory cache + if blob, err := dl.cache.Get(key); err == nil { + snapshotCleanHitMeter.Mark(1) + snapshotCleanReadMeter.Mark(int64(len(blob))) + return blob + } + // Cache doesn't contain account, pull from disk and cache for later + blob := rawdb.ReadAccountSnapshot(dl.db, hash) + dl.cache.Set(key, blob) + + snapshotCleanMissMeter.Mark(1) + snapshotCleanWriteMeter.Mark(int64(len(blob))) + + return blob +} + +// Storage directly retrieves the storage data associated with a particular hash, +// within a particular account. +func (dl *diskLayer) Storage(accountHash, storageHash common.Hash) []byte { + key := string(append(accountHash[:], storageHash[:]...)) + + // Try to retrieve the storage slot from the memory cache + if blob, err := dl.cache.Get(key); err == nil { + snapshotCleanHitMeter.Mark(1) + snapshotCleanReadMeter.Mark(int64(len(blob))) + return blob + } + // Cache doesn't contain storage slot, pull from disk and cache for later + blob := rawdb.ReadStorageSnapshot(dl.db, accountHash, storageHash) + dl.cache.Set(key, blob) + + snapshotCleanMissMeter.Mark(1) + snapshotCleanWriteMeter.Mark(int64(len(blob))) + + return blob +} + +// Update creates a new layer on top of the existing snapshot diff tree with +// the specified data items. Note, the maps are retained by the method to avoid +// copying everything. +func (dl *diskLayer) Update(blockHash common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer { + return newDiffLayer(dl, dl.number+1, blockHash, accounts, storage) +} + +// Cap traverses downwards the diff tree until the number of allowed layers are +// crossed. All diffs beyond the permitted number are flattened downwards. +func (dl *diskLayer) Cap(layers int, memory uint64) (uint64, uint64) { + return dl.number, dl.number +} + +// Journal commits an entire diff hierarchy to disk into a single journal file. +func (dl *diskLayer) Journal() error { + // There's no journalling a disk layer + return nil +} diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go new file mode 100644 index 0000000000..0d451fe50d --- /dev/null +++ b/core/state/snapshot/generate.go @@ -0,0 +1,212 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "bytes" + "fmt" + "math/big" + "time" + + "github.com/allegro/bigcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" +) + +var ( + // emptyRoot is the known root hash of an empty trie. + emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") + + // emptyCode is the known hash of the empty EVM bytecode. + emptyCode = crypto.Keccak256Hash(nil) +) + +// wipeSnapshot iterates over the entire key-value database and deletes all the +// data associated with the snapshot (accounts, storage, metadata). After all is +// done, the snapshot range of the database is compacted to free up unused data +// blocks. +func wipeSnapshot(db ethdb.KeyValueStore) error { + // Batch deletions together to avoid holding an iterator for too long + var ( + batch = db.NewBatch() + items int + ) + // Iterate over the snapshot key-range and delete all of them + log.Info("Deleting previous snapshot leftovers") + start, logged := time.Now(), time.Now() + + it := db.NewIteratorWithStart(rawdb.StateSnapshotPrefix) + for it.Next() { + // Skip any keys with the correct prefix but wrong lenth (trie nodes) + key := it.Key() + if !bytes.HasPrefix(key, rawdb.StateSnapshotPrefix) { + break + } + if len(key) != len(rawdb.StateSnapshotPrefix)+common.HashLength && len(key) != len(rawdb.StateSnapshotPrefix)+2*common.HashLength { + continue + } + // Delete the key and periodically recreate the batch and iterator + batch.Delete(key) + items++ + + if items%10000 == 0 { + // Batch too large (or iterator too long lived, flush and recreate) + it.Release() + if err := batch.Write(); err != nil { + return err + } + batch.Reset() + it = db.NewIteratorWithStart(key) + + if time.Since(logged) > 8*time.Second { + log.Info("Deleting previous snapshot leftovers", "wiped", items, "elapsed", time.Since(start)) + logged = time.Now() + } + } + } + it.Release() + + rawdb.DeleteSnapshotBlock(batch) + if err := batch.Write(); err != nil { + return err + } + log.Info("Deleted previous snapshot leftovers", "wiped", items, "elapsed", time.Since(start)) + + // Compact the snapshot section of the database to get rid of unused space + log.Info("Compacting snapshot area in database") + start = time.Now() + + end := common.CopyBytes(rawdb.StateSnapshotPrefix) + end[len(end)-1]++ + + if err := db.Compact(rawdb.StateSnapshotPrefix, end); err != nil { + return err + } + log.Info("Compacted snapshot area in database", "elapsed", time.Since(start)) + + return nil +} + +// generateSnapshot regenerates a brand new snapshot based on an existing state database and head block. +func generateSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64, headRoot common.Hash) (snapshot, error) { + // Wipe any previously existing snapshot from the database + if err := wipeSnapshot(db); err != nil { + return nil, err + } + // Iterate the entire storage trie and re-generate the state snapshot + var ( + accountCount int + storageCount int + storageNodes int + accountSize common.StorageSize + storageSize common.StorageSize + logged time.Time + ) + batch := db.NewBatch() + triedb := trie.NewDatabase(db) + + accTrie, err := trie.NewSecure(headRoot, triedb) + if err != nil { + return nil, err + } + accIt := trie.NewIterator(accTrie.NodeIterator(nil)) + for accIt.Next() { + var ( + curStorageCount int + curStorageNodes int + curAccountSize common.StorageSize + curStorageSize common.StorageSize + ) + var acc struct { + Nonce uint64 + Balance *big.Int + Root common.Hash + CodeHash []byte + } + if err := rlp.DecodeBytes(accIt.Value, &acc); err != nil { + return nil, err + } + data := AccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash) + curAccountSize += common.StorageSize(1 + common.HashLength + len(data)) + + rawdb.WriteAccountSnapshot(batch, common.BytesToHash(accIt.Key), data) + if batch.ValueSize() > ethdb.IdealBatchSize { + batch.Write() + batch.Reset() + } + if acc.Root != emptyRoot { + storeTrie, err := trie.NewSecure(acc.Root, triedb) + if err != nil { + return nil, err + } + storeIt := trie.NewIterator(storeTrie.NodeIterator(nil)) + for storeIt.Next() { + curStorageSize += common.StorageSize(1 + 2*common.HashLength + len(storeIt.Value)) + curStorageCount++ + + rawdb.WriteStorageSnapshot(batch, common.BytesToHash(accIt.Key), common.BytesToHash(storeIt.Key), storeIt.Value) + if batch.ValueSize() > ethdb.IdealBatchSize { + batch.Write() + batch.Reset() + } + } + curStorageNodes = storeIt.Nodes + } + accountCount++ + storageCount += curStorageCount + accountSize += curAccountSize + storageSize += curStorageSize + storageNodes += curStorageNodes + + if time.Since(logged) > 8*time.Second { + fmt.Printf("%#x: %9s + %9s (%6d slots, %6d nodes), total %9s (%d accs, %d nodes) + %9s (%d slots, %d nodes)\n", accIt.Key, curAccountSize.TerminalString(), curStorageSize.TerminalString(), curStorageCount, curStorageNodes, accountSize.TerminalString(), accountCount, accIt.Nodes, storageSize.TerminalString(), storageCount, storageNodes) + logged = time.Now() + } + } + fmt.Printf("Totals: %9s (%d accs, %d nodes) + %9s (%d slots, %d nodes)\n", accountSize.TerminalString(), accountCount, accIt.Nodes, storageSize.TerminalString(), storageCount, storageNodes) + + // Update the snapshot block marker and write any remainder data + rawdb.WriteSnapshotBlock(batch, headNumber, headRoot) + batch.Write() + batch.Reset() + + // Compact the snapshot section of the database to get rid of unused space + log.Info("Compacting snapshot in chain database") + if err := db.Compact([]byte{'s'}, []byte{'s' + 1}); err != nil { + return nil, err + } + // New snapshot generated, construct a brand new base layer + cache, _ := bigcache.NewBigCache(bigcache.Config{ // TODO(karalabe): dedup + Shards: 1024, + LifeWindow: time.Hour, + MaxEntriesInWindow: 512 * 1024, + MaxEntrySize: 512, + HardMaxCacheSize: 512, + }) + return &diskLayer{ + journal: journal, + db: db, + cache: cache, + number: headNumber, + root: headRoot, + }, nil +} diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go new file mode 100644 index 0000000000..1206445c58 --- /dev/null +++ b/core/state/snapshot/generate_test.go @@ -0,0 +1,111 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "math/rand" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb/memorydb" +) + +// randomHash generates a random blob of data and returns it as a hash. +func randomHash() common.Hash { + var hash common.Hash + if n, err := rand.Read(hash[:]); n != common.HashLength || err != nil { + panic(err) + } + return hash +} + +// Tests that given a database with random data content, all parts of a snapshot +// can be crrectly wiped without touching anything else. +func TestWipe(t *testing.T) { + // Create a database with some random snapshot data + db := memorydb.New() + + for i := 0; i < 128; i++ { + account := randomHash() + rawdb.WriteAccountSnapshot(db, account, randomHash().Bytes()) + for j := 0; j < 1024; j++ { + rawdb.WriteStorageSnapshot(db, account, randomHash(), randomHash().Bytes()) + } + } + rawdb.WriteSnapshotBlock(db, 123, randomHash()) + + // Add some random non-snapshot data too to make wiping harder + for i := 0; i < 65536; i++ { + // Generate a key that's the wrong length for a state snapshot item + var keysize int + for keysize == 0 || keysize == 32 || keysize == 64 { + keysize = 8 + rand.Intn(64) // +8 to ensure we will "never" randomize duplicates + } + // Randomize the suffix, dedup and inject it under the snapshot namespace + keysuffix := make([]byte, keysize) + rand.Read(keysuffix) + db.Put(append(rawdb.StateSnapshotPrefix, keysuffix...), randomHash().Bytes()) + } + // Sanity check that all the keys are present + var items int + + it := db.NewIteratorWithPrefix(rawdb.StateSnapshotPrefix) + defer it.Release() + + for it.Next() { + key := it.Key() + if len(key) == len(rawdb.StateSnapshotPrefix)+32 || len(key) == len(rawdb.StateSnapshotPrefix)+64 { + items++ + } + } + if items != 128+128*1024 { + t.Fatalf("snapshot size mismatch: have %d, want %d", items, 128+128*1024) + } + if number, hash := rawdb.ReadSnapshotBlock(db); number != 123 || hash == (common.Hash{}) { + t.Errorf("snapshot block marker mismatch: have #%d [%#x], want #%d []", number, hash, 123) + } + // Wipe all snapshot entries from the database + if err := wipeSnapshot(db); err != nil { + t.Fatalf("failed to wipe snapshot: %v", err) + } + // Iterate over the database end ensure no snapshot information remains + it = db.NewIteratorWithPrefix(rawdb.StateSnapshotPrefix) + defer it.Release() + + for it.Next() { + key := it.Key() + if len(key) == len(rawdb.StateSnapshotPrefix)+32 || len(key) == len(rawdb.StateSnapshotPrefix)+64 { + t.Errorf("snapshot entry remained after wipe: %x", key) + } + } + if number, hash := rawdb.ReadSnapshotBlock(db); number != 0 || hash != (common.Hash{}) { + t.Errorf("snapshot block marker remained after wipe: #%d [%#x]", number, hash) + } + // Iterate over the database and ensure miscellaneous items are present + items = 0 + + it = db.NewIterator() + defer it.Release() + + for it.Next() { + items++ + } + if items != 65536 { + t.Fatalf("misc item count mismatch: have %d, want %d", items, 65536) + } +} diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go new file mode 100644 index 0000000000..6d4df96da8 --- /dev/null +++ b/core/state/snapshot/snapshot.go @@ -0,0 +1,244 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// Package snapshot implements a journalled, dynamic state dump. +package snapshot + +import ( + "errors" + "fmt" + "os" + "sync" + "time" + + "github.com/allegro/bigcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" + "github.com/ethereum/go-ethereum/rlp" +) + +var ( + snapshotCleanHitMeter = metrics.NewRegisteredMeter("state/snapshot/clean/hit", nil) + snapshotCleanMissMeter = metrics.NewRegisteredMeter("state/snapshot/clean/miss", nil) + snapshotCleanReadMeter = metrics.NewRegisteredMeter("state/snapshot/clean/read", nil) + snapshotCleanWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/write", nil) +) + +// Snapshot represents the functionality supported by a snapshot storage layer. +type Snapshot interface { + // Info returns the block number and root hash for which this snapshot was made. + Info() (uint64, common.Hash) + + // Account directly retrieves the account associated with a particular hash in + // the snapshot slim data format. + Account(hash common.Hash) *Account + + // AccountRLP directly retrieves the account RLP associated with a particular + // hash in the snapshot slim data format. + AccountRLP(hash common.Hash) []byte + + // Storage directly retrieves the storage data associated with a particular hash, + // within a particular account. + Storage(accountHash, storageHash common.Hash) []byte +} + +// snapshot is the internal version of the snapshot data layer that supports some +// additional methods compared to the public API. +type snapshot interface { + Snapshot + + // Update creates a new layer on top of the existing snapshot diff tree with + // the specified data items. Note, the maps are retained by the method to avoid + // copying everything. + Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer + + // Cap traverses downwards the diff tree until the number of allowed layers are + // crossed. All diffs beyond the permitted number are flattened downwards. The + // block numbers for the disk layer and first diff layer are returned for GC. + Cap(layers int, memory uint64) (uint64, uint64) + + // Journal commits an entire diff hierarchy to disk into a single journal file. + // This is meant to be used during shutdown to persist the snapshot without + // flattening everything down (bad for reorgs). + Journal() error +} + +// SnapshotTree is an Ethereum state snapshot tree. It consists of one persistent +// base layer backed by a key-value store, on top of which arbitrarilly many in- +// memory diff layers are topped. The memory diffs can form a tree with branching, +// but the disk layer is singleton and common to all. If a reorg goes deeper than +// the disk layer, everything needs to be deleted. +// +// The goal of a state snapshot is twofold: to allow direct access to account and +// storage data to avoid expensive multi-level trie lookups; and to allow sorted, +// cheap iteration of the account/storage tries for sync aid. +type SnapshotTree struct { + layers map[common.Hash]snapshot // Collection of all known layers // TODO(karalabe): split Clique overlaps + lock sync.RWMutex +} + +// New attempts to load an already existing snapshot from a persistent key-value +// store (with a number of memory layers from a journal), ensuring that the head +// of the snapshot matches the expected one. +// +// If the snapshot is missing or inconsistent, the entirety is deleted and will +// be reconstructed from scratch based on the tries in the key-value store. +func New(db ethdb.KeyValueStore, journal string, headNumber uint64, headRoot common.Hash) (*SnapshotTree, error) { + // Attempt to load a previously persisted snapshot + head, err := loadSnapshot(db, journal, headNumber, headRoot) + if err != nil { + log.Warn("Failed to load snapshot, regenerating", "err", err) + if head, err = generateSnapshot(db, journal, headNumber, headRoot); err != nil { + return nil, err + } + } + // Existing snapshot loaded or one regenerated, seed all the layers + snap := &SnapshotTree{ + layers: make(map[common.Hash]snapshot), + } + for head != nil { + _, root := head.Info() + snap.layers[root] = head + + switch self := head.(type) { + case *diffLayer: + head = self.parent + case *diskLayer: + head = nil + default: + panic(fmt.Sprintf("unknown data layer: %T", self)) + } + } + return snap, nil +} + +// Snapshot retrieves a snapshot belonging to the given block root, or nil if no +// snapshot is maintained for that block. +func (st *SnapshotTree) Snapshot(blockRoot common.Hash) Snapshot { + st.lock.RLock() + defer st.lock.RUnlock() + + return st.layers[blockRoot] +} + +// Update adds a new snapshot into the tree, if that can be linked to an existing +// old parent. It is disallowed to insert a disk layer (the origin of all). +func (st *SnapshotTree) Update(blockRoot common.Hash, parentRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error { + // Generate a new snapshot on top of the parent + parent := st.Snapshot(parentRoot).(snapshot) + if parent == nil { + return fmt.Errorf("parent [%#x] snapshot missing", parentRoot) + } + snap := parent.Update(blockRoot, accounts, storage) + + // Save the new snapshot for later + st.lock.Lock() + defer st.lock.Unlock() + + st.layers[snap.root] = snap + return nil +} + +// Cap traverses downwards the snapshot tree from a head block hash until the +// number of allowed layers are crossed. All layers beyond the permitted number +// are flattened downwards. +func (st *SnapshotTree) Cap(blockRoot common.Hash, layers int, memory uint64) error { + // Retrieve the head snapshot to cap from + snap := st.Snapshot(blockRoot).(snapshot) + if snap == nil { + return fmt.Errorf("snapshot [%#x] missing", blockRoot) + } + // Run the internal capping and discard all stale layers + st.lock.Lock() + defer st.lock.Unlock() + + diskNumber, diffNumber := snap.Cap(layers, memory) + for root, snap := range st.layers { + if number, _ := snap.Info(); number != diskNumber && number < diffNumber { + delete(st.layers, root) + } + } + return nil +} + +// Journal commits an entire diff hierarchy to disk into a single journal file. +// This is meant to be used during shutdown to persist the snapshot without +// flattening everything down (bad for reorgs). +func (st *SnapshotTree) Journal(blockRoot common.Hash) error { + // Retrieve the head snapshot to journal from + snap := st.Snapshot(blockRoot).(snapshot) + if snap == nil { + return fmt.Errorf("snapshot [%#x] missing", blockRoot) + } + // Run the journaling + st.lock.Lock() + defer st.lock.Unlock() + + return snap.Journal() +} + +// loadSnapshot loads a pre-existing state snapshot backed by a key-value store. +func loadSnapshot(db ethdb.KeyValueStore, journal string, headNumber uint64, headRoot common.Hash) (snapshot, error) { + // Retrieve the block number and hash of the snapshot, failing if no snapshot + // is present in the database (or crashed mid-update). + number, root := rawdb.ReadSnapshotBlock(db) + if root == (common.Hash{}) { + return nil, errors.New("missing or corrupted snapshot") + } + cache, _ := bigcache.NewBigCache(bigcache.Config{ // TODO(karalabe): dedup + Shards: 1024, + LifeWindow: time.Hour, + MaxEntriesInWindow: 512 * 1024, + MaxEntrySize: 512, + HardMaxCacheSize: 512, + }) + base := &diskLayer{ + journal: journal, + db: db, + cache: cache, + number: number, + root: root, + } + // Load all the snapshot diffs from the journal, failing if their chain is broken + // or does not lead from the disk snapshot to the specified head. + if _, err := os.Stat(journal); os.IsNotExist(err) { + // Journal doesn't exist, don't worry if it's not supposed to + if number != headNumber || root != headRoot { + return nil, fmt.Errorf("snapshot journal missing, head does't match snapshot: #%d [%#x] vs. #%d [%#x]", + headNumber, headRoot, number, root) + } + return base, nil + } + file, err := os.Open(journal) + if err != nil { + return nil, err + } + snapshot, err := loadDiffLayer(base, rlp.NewStream(file, 0)) + if err != nil { + return nil, err + } + // Entire snapshot journal loaded, sanity check the head and return + // Journal doesn't exist, don't worry if it's not supposed to + number, root = snapshot.Info() + if number != headNumber || root != headRoot { + return nil, fmt.Errorf("head does't match snapshot: #%d [%#x] vs. #%d [%#x]", + headNumber, headRoot, number, root) + } + return snapshot, nil +} diff --git a/core/state/snapshot/snapshot_test.go b/core/state/snapshot/snapshot_test.go new file mode 100644 index 0000000000..903bd4a6f6 --- /dev/null +++ b/core/state/snapshot/snapshot_test.go @@ -0,0 +1,17 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot diff --git a/core/state/snapshot/sort.go b/core/state/snapshot/sort.go new file mode 100644 index 0000000000..04729c60b2 --- /dev/null +++ b/core/state/snapshot/sort.go @@ -0,0 +1,62 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "bytes" + + "github.com/ethereum/go-ethereum/common" +) + +// hashes is a helper to implement sort.Interface. +type hashes []common.Hash + +// Len is the number of elements in the collection. +func (hs hashes) Len() int { return len(hs) } + +// Less reports whether the element with index i should sort before the element +// with index j. +func (hs hashes) Less(i, j int) bool { return bytes.Compare(hs[i][:], hs[j][:]) < 0 } + +// Swap swaps the elements with indexes i and j. +func (hs hashes) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } + +// merge combines two sorted lists of hashes into a combo sorted one. +func merge(a, b []common.Hash) []common.Hash { + result := make([]common.Hash, len(a)+len(b)) + + i := 0 + for len(a) > 0 && len(b) > 0 { + if bytes.Compare(a[0][:], b[0][:]) < 0 { + result[i] = a[0] + a = a[1:] + } else { + result[i] = b[0] + b = b[1:] + } + i++ + } + for j := 0; j < len(a); j++ { + result[i] = a[j] + i++ + } + for j := 0; j < len(b); j++ { + result[i] = b[j] + i++ + } + return result +} diff --git a/core/state/state_object.go b/core/state/state_object.go index 667d5ec02e..98be566716 100644 --- a/core/state/state_object.go +++ b/core/state/state_object.go @@ -195,15 +195,26 @@ func (s *stateObject) GetCommittedState(db Database, key common.Hash) common.Has if value, cached := s.originStorage[key]; cached { return value } - // Track the amount of time wasted on reading the storage trie - if metrics.EnabledExpensive { - defer func(start time.Time) { s.db.StorageReads += time.Since(start) }(time.Now()) - } - // Otherwise load the value from the database - enc, err := s.getTrie(db).TryGet(key[:]) - if err != nil { - s.setError(err) - return common.Hash{} + // If no live objects are available, attempt to use snapshots + var ( + enc []byte + err error + ) + if s.db.snap != nil { + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.SnapshotStorageReads += time.Since(start) }(time.Now()) + } + enc = s.db.snap.Storage(s.addrHash, crypto.Keccak256Hash(key[:])) + } else { + // Track the amount of time wasted on reading the storage trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.StorageReads += time.Since(start) }(time.Now()) + } + // Otherwise load the value from the database + if enc, err = s.getTrie(db).TryGet(key[:]); err != nil { + s.setError(err) + return common.Hash{} + } } var value common.Hash if len(enc) > 0 { @@ -283,6 +294,23 @@ func (s *stateObject) updateTrie(db Database) Trie { if metrics.EnabledExpensive { defer func(start time.Time) { s.db.StorageUpdates += time.Since(start) }(time.Now()) } + // Retrieve the snapshot storage map for the object + var storage map[common.Hash][]byte + if s.db.snap != nil { + // Retrieve the old storage map, if available + s.db.snapLock.RLock() + storage = s.db.snapStorage[s.addrHash] + s.db.snapLock.RUnlock() + + // If no old storage map was available, create a new one + if storage == nil { + storage = make(map[common.Hash][]byte) + + s.db.snapLock.Lock() + s.db.snapStorage[s.addrHash] = storage + s.db.snapLock.Unlock() + } + } // Insert all the pending updates into the trie tr := s.getTrie(db) for key, value := range s.pendingStorage { @@ -292,13 +320,18 @@ func (s *stateObject) updateTrie(db Database) Trie { } s.originStorage[key] = value + var v []byte if (value == common.Hash{}) { s.setError(tr.TryDelete(key[:])) - continue + } else { + // Encoding []byte cannot fail, ok to ignore the error. + v, _ = rlp.EncodeToBytes(common.TrimLeftZeroes(value[:])) + s.setError(tr.TryUpdate(key[:], v)) + } + // If state snapshotting is active, cache the data til commit + if storage != nil { + storage[crypto.Keccak256Hash(key[:])] = v // v will be nil if value is 0x00 } - // Encoding []byte cannot fail, ok to ignore the error. - v, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(value[:])) - s.setError(tr.TryUpdate(key[:], v)) } if len(s.pendingStorage) > 0 { s.pendingStorage = make(Storage) diff --git a/core/state/statedb.go b/core/state/statedb.go index 5d40f59c65..0fb1095ce2 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -22,9 +22,11 @@ import ( "fmt" "math/big" "sort" + "sync" "time" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/state/snapshot" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/log" @@ -66,6 +68,12 @@ type StateDB struct { db Database trie Trie + snaps *snapshot.SnapshotTree + snap snapshot.Snapshot + snapAccounts map[common.Hash][]byte + snapStorage map[common.Hash]map[common.Hash][]byte + snapLock sync.RWMutex // Lock for the concurrent storage updaters + // This map holds 'live' objects, which will get modified while processing a state transition. stateObjects map[common.Address]*stateObject stateObjectsPending map[common.Address]struct{} // State objects finalized but not yet written to the trie @@ -95,32 +103,43 @@ type StateDB struct { nextRevisionId int // Measurements gathered during execution for debugging purposes - AccountReads time.Duration - AccountHashes time.Duration - AccountUpdates time.Duration - AccountCommits time.Duration - StorageReads time.Duration - StorageHashes time.Duration - StorageUpdates time.Duration - StorageCommits time.Duration + AccountReads time.Duration + AccountHashes time.Duration + AccountUpdates time.Duration + AccountCommits time.Duration + StorageReads time.Duration + StorageHashes time.Duration + StorageUpdates time.Duration + StorageCommits time.Duration + SnapshotAccountReads time.Duration + SnapshotStorageReads time.Duration + SnapshotCommits time.Duration } // Create a new state from a given trie. -func New(root common.Hash, db Database) (*StateDB, error) { +func New(root common.Hash, db Database, snaps *snapshot.SnapshotTree) (*StateDB, error) { tr, err := db.OpenTrie(root) if err != nil { return nil, err } - return &StateDB{ + sdb := &StateDB{ db: db, trie: tr, + snaps: snaps, stateObjects: make(map[common.Address]*stateObject), stateObjectsPending: make(map[common.Address]struct{}), stateObjectsDirty: make(map[common.Address]struct{}), logs: make(map[common.Hash][]*types.Log), preimages: make(map[common.Hash][]byte), journal: newJournal(), - }, nil + } + if sdb.snaps != nil { + if sdb.snap = sdb.snaps.Snapshot(root); sdb.snap != nil { + sdb.snapAccounts = make(map[common.Hash][]byte) + sdb.snapStorage = make(map[common.Hash]map[common.Hash][]byte) + } + } + return sdb, nil } // setError remembers the first non-nil error it is called with. @@ -152,6 +171,14 @@ func (s *StateDB) Reset(root common.Hash) error { s.logSize = 0 s.preimages = make(map[common.Hash][]byte) s.clearJournalAndRefund() + + if s.snaps != nil { + s.snapAccounts, s.snapStorage = nil, nil + if s.snap = s.snaps.Snapshot(root); s.snap != nil { + s.snapAccounts = make(map[common.Hash][]byte) + s.snapStorage = make(map[common.Hash]map[common.Hash][]byte) + } + } return nil } @@ -438,6 +465,11 @@ func (s *StateDB) updateStateObject(obj *stateObject) { panic(fmt.Errorf("can't encode object at %x: %v", addr[:], err)) } s.setError(s.trie.TryUpdate(addr[:], data)) + + // If state snapshotting is active, cache the data til commit + if s.snap != nil { + s.snapAccounts[obj.addrHash] = snapshot.AccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash) + } } // deleteStateObject removes the given object from the state trie. @@ -449,6 +481,14 @@ func (s *StateDB) deleteStateObject(obj *stateObject) { // Delete the account from the trie addr := obj.Address() s.setError(s.trie.TryDelete(addr[:])) + + // If state snapshotting is active, cache the data til commit + if s.snap != nil { + s.snapLock.Lock() + s.snapAccounts[obj.addrHash] = nil // We need to maintain account deletions explicitly + s.snapStorage[obj.addrHash] = nil // We need to maintain storage deletions explicitly + s.snapLock.Unlock() + } } // getStateObject retrieves a state object given by the address, returning nil if @@ -470,20 +510,38 @@ func (s *StateDB) getDeletedStateObject(addr common.Address) *stateObject { if obj := s.stateObjects[addr]; obj != nil { return obj } - // Track the amount of time wasted on loading the object from the database - if metrics.EnabledExpensive { - defer func(start time.Time) { s.AccountReads += time.Since(start) }(time.Now()) - } - // Load the object from the database - enc, err := s.trie.TryGet(addr[:]) - if len(enc) == 0 { - s.setError(err) - return nil - } + // If no live objects are available, attempt to use snapshots var data Account - if err := rlp.DecodeBytes(enc, &data); err != nil { - log.Error("Failed to decode state object", "addr", addr, "err", err) - return nil + if s.snap != nil { + if metrics.EnabledExpensive { + defer func(start time.Time) { s.SnapshotAccountReads += time.Since(start) }(time.Now()) + } + acc := s.snap.Account(crypto.Keccak256Hash(addr[:])) + if acc == nil { + return nil + } + data.Nonce, data.Balance, data.CodeHash = acc.Nonce, acc.Balance, acc.CodeHash + if len(data.CodeHash) == 0 { + data.CodeHash = emptyCodeHash + } + data.Root = common.BytesToHash(acc.Root) + if data.Root == (common.Hash{}) { + data.Root = emptyRoot + } + } else { + // Snapshot unavailable, fall back to the trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountReads += time.Since(start) }(time.Now()) + } + enc, err := s.trie.TryGet(addr[:]) + if len(enc) == 0 { + s.setError(err) + return nil + } + if err := rlp.DecodeBytes(enc, &data); err != nil { + log.Error("Failed to decode state object", "addr", addr, "err", err) + return nil + } } // Insert into the live set obj := newObject(s, addr, data) @@ -748,13 +806,14 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { s.stateObjectsDirty = make(map[common.Address]struct{}) } // Write the account trie changes, measuing the amount of wasted time + var start time.Time if metrics.EnabledExpensive { - defer func(start time.Time) { s.AccountCommits += time.Since(start) }(time.Now()) + start = time.Now() } // The onleaf func is called _serially_, so we can reuse the same account // for unmarshalling every time. var account Account - return s.trie.Commit(func(leaf []byte, parent common.Hash) error { + root, err := s.trie.Commit(func(leaf []byte, parent common.Hash) error { if err := rlp.DecodeBytes(leaf, &account); err != nil { return nil } @@ -767,4 +826,22 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { } return nil }) + if metrics.EnabledExpensive { + s.AccountCommits += time.Since(start) + } + // If snapshotting is enabled, update the snapshot tree with this new version + if s.snap != nil { + if metrics.EnabledExpensive { + defer func(start time.Time) { s.SnapshotCommits += time.Since(start) }(time.Now()) + } + _, parentRoot := s.snap.Info() + if err := s.snaps.Update(root, parentRoot, s.snapAccounts, s.snapStorage); err != nil { + log.Warn("Failed to update snapshot tree", "from", parentRoot, "to", root, "err", err) + } + if err := s.snaps.Cap(root, 16, 4*1024*1024); err != nil { + log.Warn("Failed to cap snapshot tree", "root", root, "layers", 16, "memory", 4*1024*1024, "err", err) + } + s.snap, s.snapAccounts, s.snapStorage = nil, nil, nil + } + return root, err } diff --git a/core/state_prefetcher.go b/core/state_prefetcher.go index cb85a05b57..bb5db4ced1 100644 --- a/core/state_prefetcher.go +++ b/core/state_prefetcher.go @@ -65,6 +65,8 @@ func (p *statePrefetcher) Prefetch(block *types.Block, statedb *state.StateDB, c return // Ugh, something went horribly wrong, bail out } } + // All transactions processed, finalize the block to force loading written-only trie paths + statedb.Finalise(true) // TODO(karalabe): should we run this on interrupt too? } // precacheTransaction attempts to apply a transaction to the given state database diff --git a/core/vm/runtime/runtime.go b/core/vm/runtime/runtime.go index dd5dba66f0..9cb492786b 100644 --- a/core/vm/runtime/runtime.go +++ b/core/vm/runtime/runtime.go @@ -99,7 +99,7 @@ func Execute(code, input []byte, cfg *Config) ([]byte, *state.StateDB, error) { setDefaults(cfg) if cfg.State == nil { - cfg.State, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase())) + cfg.State, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil) } var ( address = common.BytesToAddress([]byte("contract")) @@ -129,7 +129,7 @@ func Create(input []byte, cfg *Config) ([]byte, common.Address, uint64, error) { setDefaults(cfg) if cfg.State == nil { - cfg.State, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase())) + cfg.State, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil) } var ( vmenv = NewEnv(cfg) diff --git a/core/vm/runtime/runtime_test.go b/core/vm/runtime/runtime_test.go index f2d05118ce..fb07d69d09 100644 --- a/core/vm/runtime/runtime_test.go +++ b/core/vm/runtime/runtime_test.go @@ -98,7 +98,7 @@ func TestExecute(t *testing.T) { } func TestCall(t *testing.T) { - state, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase())) + state, _ := state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil) address := common.HexToAddress("0x0a") state.SetCode(address, []byte{ byte(vm.PUSH1), 10, @@ -154,7 +154,7 @@ func BenchmarkCall(b *testing.B) { } func benchmarkEVM_Create(bench *testing.B, code string) { var ( - statedb, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase())) + statedb, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil) sender = common.BytesToAddress([]byte("sender")) receiver = common.BytesToAddress([]byte("receiver")) ) diff --git a/eth/api_test.go b/eth/api_test.go index 1e7c489c32..ab846db3ea 100644 --- a/eth/api_test.go +++ b/eth/api_test.go @@ -64,7 +64,7 @@ func (h resultHash) Less(i, j int) bool { return bytes.Compare(h[i].Bytes(), h[j func TestAccountRange(t *testing.T) { var ( statedb = state.NewDatabase(rawdb.NewMemoryDatabase()) - state, _ = state.New(common.Hash{}, statedb) + state, _ = state.New(common.Hash{}, statedb, nil) addrs = [AccountRangeMaxResults * 2]common.Address{} m = map[common.Address]bool{} ) @@ -162,7 +162,7 @@ func TestAccountRange(t *testing.T) { func TestEmptyAccountRange(t *testing.T) { var ( statedb = state.NewDatabase(rawdb.NewMemoryDatabase()) - state, _ = state.New(common.Hash{}, statedb) + state, _ = state.New(common.Hash{}, statedb, nil) ) state.Commit(true) @@ -188,7 +188,7 @@ func TestEmptyAccountRange(t *testing.T) { func TestStorageRangeAt(t *testing.T) { // Create a state where account 0x010000... has a few storage entries. var ( - state, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase())) + state, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil) addr = common.Address{0x01} keys = []common.Hash{ // hashes of Keys of storage common.HexToHash("340dd630ad21bf010b4e676dbfa9ba9a02175262d1fa356232cfde6cb5b47ef2"), diff --git a/eth/api_tracer.go b/eth/api_tracer.go index ce211cbd99..560f460445 100644 --- a/eth/api_tracer.go +++ b/eth/api_tracer.go @@ -155,7 +155,7 @@ func (api *PrivateDebugAPI) traceChain(ctx context.Context, start, end *types.Bl return nil, fmt.Errorf("parent block #%d not found", number-1) } } - statedb, err := state.New(start.Root(), database) + statedb, err := state.New(start.Root(), database, nil) if err != nil { // If the starting state is missing, allow some number of blocks to be reexecuted reexec := defaultTraceReexec @@ -168,7 +168,7 @@ func (api *PrivateDebugAPI) traceChain(ctx context.Context, start, end *types.Bl if start == nil { break } - if statedb, err = state.New(start.Root(), database); err == nil { + if statedb, err = state.New(start.Root(), database, nil); err == nil { break } } @@ -648,7 +648,7 @@ func (api *PrivateDebugAPI) computeStateDB(block *types.Block, reexec uint64) (* if block == nil { break } - if statedb, err = state.New(block.Root(), database); err == nil { + if statedb, err = state.New(block.Root(), database, nil); err == nil { break } } diff --git a/eth/handler_test.go b/eth/handler_test.go index 4a4e1f9559..670fd2b14c 100644 --- a/eth/handler_test.go +++ b/eth/handler_test.go @@ -349,7 +349,7 @@ func testGetNodeData(t *testing.T, protocol int) { } accounts := []common.Address{testBank, acc1Addr, acc2Addr} for i := uint64(0); i <= pm.blockchain.CurrentBlock().NumberU64(); i++ { - trie, _ := state.New(pm.blockchain.GetBlockByNumber(i).Root(), state.NewDatabase(statedb)) + trie, _ := state.New(pm.blockchain.GetBlockByNumber(i).Root(), state.NewDatabase(statedb), nil) for j, acc := range accounts { state, _ := pm.blockchain.State() diff --git a/light/odr_test.go b/light/odr_test.go index debd5544c3..9149c02fc2 100644 --- a/light/odr_test.go +++ b/light/odr_test.go @@ -149,7 +149,7 @@ func odrAccounts(ctx context.Context, db ethdb.Database, bc *core.BlockChain, lc st = NewState(ctx, header, lc.Odr()) } else { header := bc.GetHeaderByHash(bhash) - st, _ = state.New(header.Root, state.NewDatabase(db)) + st, _ = state.New(header.Root, state.NewDatabase(db), nil) } var res []byte @@ -189,7 +189,7 @@ func odrContractCall(ctx context.Context, db ethdb.Database, bc *core.BlockChain } else { chain = bc header = bc.GetHeaderByHash(bhash) - st, _ = state.New(header.Root, state.NewDatabase(db)) + st, _ = state.New(header.Root, state.NewDatabase(db), nil) } // Perform read-only call. diff --git a/light/trie.go b/light/trie.go index e512bf6f95..0d69e74e21 100644 --- a/light/trie.go +++ b/light/trie.go @@ -30,7 +30,7 @@ import ( ) func NewState(ctx context.Context, head *types.Header, odr OdrBackend) *state.StateDB { - state, _ := state.New(head.Root, NewStateDatabase(ctx, head, odr)) + state, _ := state.New(head.Root, NewStateDatabase(ctx, head, odr), nil) return state } diff --git a/tests/state_test_util.go b/tests/state_test_util.go index 59ebcb6e1e..a10d044cd0 100644 --- a/tests/state_test_util.go +++ b/tests/state_test_util.go @@ -206,7 +206,7 @@ func (t *StateTest) gasLimit(subtest StateSubtest) uint64 { func MakePreState(db ethdb.Database, accounts core.GenesisAlloc) *state.StateDB { sdb := state.NewDatabase(db) - statedb, _ := state.New(common.Hash{}, sdb) + statedb, _ := state.New(common.Hash{}, sdb, nil) for addr, a := range accounts { statedb.SetCode(addr, a.Code) statedb.SetNonce(addr, a.Nonce) @@ -217,7 +217,7 @@ func MakePreState(db ethdb.Database, accounts core.GenesisAlloc) *state.StateDB } // Commit and re-open to start with a clean state. root, _ := statedb.Commit(false) - statedb, _ = state.New(root, sdb) + statedb, _ = state.New(root, sdb, nil) return statedb } diff --git a/trie/iterator.go b/trie/iterator.go index bb4025d8f3..88189c5420 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -29,6 +29,7 @@ import ( type Iterator struct { nodeIt NodeIterator + Nodes int // Number of nodes iterated over Key []byte // Current data key on which the iterator is positioned on Value []byte // Current data value on which the iterator is positioned on Err error @@ -46,6 +47,7 @@ func NewIterator(it NodeIterator) *Iterator { // Next moves the iterator forward one key-value entry. func (it *Iterator) Next() bool { for it.nodeIt.Next(true) { + it.Nodes++ if it.nodeIt.Leaf() { it.Key = it.nodeIt.LeafKey() it.Value = it.nodeIt.LeafBlob()