cmd/geth: add hash2path & trie get tools

This commit is contained in:
Fynn 2023-09-25 11:16:36 +08:00
parent 528d97b541
commit b8bad314ed
4 changed files with 537 additions and 0 deletions

@ -19,6 +19,7 @@ package main
import (
"bytes"
"fmt"
"math"
"os"
"os/signal"
"path/filepath"
@ -72,6 +73,10 @@ Remove blockchain and state databases`,
// no legacy stored receipts for bsc
// dbMigrateFreezerCmd,
dbCheckStateContentCmd,
dbHbss2PbssCmd,
dbPruneHashTrieCmd,
dbTrieGetCmd,
dbTrieDeleteCmd,
},
}
dbInspectCmd = &cli.Command{
@ -94,6 +99,54 @@ Remove blockchain and state databases`,
For each trie node encountered, it checks that the key corresponds to the keccak256(value). If this is not true, this indicates
a data corruption.`,
}
dbHbss2PbssCmd = &cli.Command{
Action: hbss2pbss,
Name: "hbss-to-pbss",
ArgsUsage: "<jobnum (optional)>",
Flags: []cli.Flag{
utils.DataDirFlag,
utils.SyncModeFlag,
},
Usage: "Convert Hash-Base to Path-Base trie node.",
Description: `This command iterates the entire trie node database and convert the hash-base node to path-base node.`,
}
dbTrieGetCmd = &cli.Command{
Action: dbTrieGet,
Name: "trie-get",
Usage: "Show the value of a trie node path key",
ArgsUsage: "[trie owner] <path-base key>",
Flags: []cli.Flag{
utils.DataDirFlag,
utils.SyncModeFlag,
utils.MainnetFlag,
utils.StateSchemeFlag,
},
Description: "This command looks up the specified trie node key from the database.",
}
dbTrieDeleteCmd = &cli.Command{
Action: dbTrieDelete,
Name: "trie-delete",
Usage: "delete the specify trie node",
ArgsUsage: "[trie owner] <hash-base key> | <path-base key>",
Flags: []cli.Flag{
utils.DataDirFlag,
utils.SyncModeFlag,
utils.MainnetFlag,
utils.StateSchemeFlag,
},
Description: "This command delete the specify trie node from the database.",
}
dbPruneHashTrieCmd = &cli.Command{
Action: pruneHashTrie,
Name: "prune-hash-trie",
ArgsUsage: "",
Flags: []cli.Flag{
utils.DataDirFlag,
utils.SyncModeFlag,
},
Usage: "[Caution]Prune all the hash trie node in diskdb",
Description: `This command iterates the entrie kv in leveldb and delete all the hash trie node.`,
}
dbStatCmd = &cli.Command{
Action: dbStats,
Name: "stats",
@ -433,6 +486,133 @@ func dbGet(ctx *cli.Context) error {
return nil
}
// dbTrieGet shows the value of a given database key
func dbTrieGet(ctx *cli.Context) error {
if ctx.NArg() < 1 || ctx.NArg() > 2 {
return fmt.Errorf("required arguments: %v", ctx.Command.ArgsUsage)
}
stack, _ := makeConfigNode(ctx)
defer stack.Close()
db := utils.MakeChainDatabase(ctx, stack, false, false)
defer db.Close()
scheme := ctx.String(utils.StateSchemeFlag.Name)
if scheme == "" {
scheme = rawdb.HashScheme
}
if scheme == rawdb.PathScheme {
var (
pathKey []byte
owner []byte
err error
)
if ctx.NArg() == 1 {
pathKey, err = hexutil.Decode(ctx.Args().Get(0))
if err != nil {
log.Info("Could not decode the value", "error", err)
return err
}
nodeVal, hash := rawdb.ReadAccountTrieNode(db, pathKey)
log.Info("TrieGet result ", "PathKey", common.Bytes2Hex(pathKey), "Hash: ", hash, "node: ", trie.NodeString(hash.Bytes(), nodeVal))
} else if ctx.NArg() == 2 {
owner, err = hexutil.Decode(ctx.Args().Get(0))
if err != nil {
log.Info("Could not decode the value", "error", err)
return err
}
pathKey, err = hexutil.Decode(ctx.Args().Get(1))
if err != nil {
log.Info("Could not decode the value", "error", err)
return err
}
nodeVal, hash := rawdb.ReadStorageTrieNode(db, common.BytesToHash(owner), pathKey)
log.Info("TrieGet result ", "PathKey: ", common.Bytes2Hex(pathKey), "Owner: ", common.BytesToHash(owner), "Hash: ", hash, "node: ", trie.NodeString(hash.Bytes(), nodeVal))
}
} else if scheme == rawdb.HashScheme {
if ctx.NArg() == 1 {
hashKey, err := hexutil.Decode(ctx.Args().Get(0))
if err != nil {
log.Info("Could not decode the value", "error", err)
return err
}
val, err := db.Get(hashKey)
if err != nil {
log.Error("db get failed, ", "error: ", err)
return err
}
log.Info("TrieGet result ", "HashKey: ", common.BytesToHash(hashKey), "node: ", trie.NodeString(hashKey, val))
} else {
log.Error("args too much")
}
}
return nil
}
// dbTrieDelete delete the trienode of a given database key
func dbTrieDelete(ctx *cli.Context) error {
if ctx.NArg() < 1 || ctx.NArg() > 2 {
return fmt.Errorf("required arguments: %v", ctx.Command.ArgsUsage)
}
stack, _ := makeConfigNode(ctx)
defer stack.Close()
db := utils.MakeChainDatabase(ctx, stack, false, false)
defer db.Close()
scheme := ctx.String(utils.StateSchemeFlag.Name)
if scheme == "" {
scheme = rawdb.HashScheme
}
if scheme == rawdb.PathScheme {
var (
pathKey []byte
owner []byte
err error
)
if ctx.NArg() == 1 {
pathKey, err = hexutil.Decode(ctx.Args().Get(0))
if err != nil {
log.Info("Could not decode the value", "error", err)
return err
}
rawdb.DeleteAccountTrieNode(db, pathKey)
} else if ctx.NArg() == 2 {
owner, err = hexutil.Decode(ctx.Args().Get(0))
if err != nil {
log.Info("Could not decode the value", "error", err)
return err
}
pathKey, err = hexutil.Decode(ctx.Args().Get(1))
if err != nil {
log.Info("Could not decode the value", "error", err)
return err
}
rawdb.DeleteStorageTrieNode(db, common.BytesToHash(owner), pathKey)
}
} else if scheme == rawdb.HashScheme {
if ctx.NArg() == 1 {
hashKey, err := hexutil.Decode(ctx.Args().Get(0))
if err != nil {
log.Info("Could not decode the value", "error", err)
return err
}
err = db.Delete(hashKey)
if err != nil {
log.Error("db delete failed", "err", err)
return err
}
} else {
log.Error("args too much")
}
}
return nil
}
// dbDelete deletes a key from the database
func dbDelete(ctx *cli.Context) error {
if ctx.NArg() != 1 {
@ -743,3 +923,90 @@ func showMetaData(ctx *cli.Context) error {
table.Render()
return nil
}
func hbss2pbss(ctx *cli.Context) error {
if ctx.NArg() > 1 {
return fmt.Errorf("required arguments: %v", ctx.Command.ArgsUsage)
}
var jobnum uint64
var err error
if ctx.NArg() == 1 {
jobnum, err = strconv.ParseUint(ctx.Args().Get(0), 10, 64)
if err != nil {
return fmt.Errorf("failed to Parse jobnum, Args[1]: %v, err: %v", ctx.Args().Get(1), err)
}
} else {
// by default
jobnum = 1000
}
stack, _ := makeConfigNode(ctx)
defer stack.Close()
db := utils.MakeChainDatabase(ctx, stack, false, false)
db.Sync()
defer db.Close()
config := trie.HashDefaults
triedb := trie.NewDatabase(db, config)
triedb.Cap(0)
log.Info("hbss2pbss triedb", "scheme", triedb.Scheme())
defer triedb.Close()
headerHash := rawdb.ReadHeadHeaderHash(db)
blockNumber := rawdb.ReadHeaderNumber(db, headerHash)
if blockNumber == nil {
log.Error("read header number failed.")
return fmt.Errorf("read header number failed")
}
log.Info("hbss2pbss converting", "HeaderHash: ", headerHash.String(), ", blockNumber: ", *blockNumber)
var headerBlockHash common.Hash
var trieRootHash common.Hash
if *blockNumber != math.MaxUint64 {
headerBlockHash = rawdb.ReadCanonicalHash(db, *blockNumber)
if headerBlockHash == (common.Hash{}) {
return fmt.Errorf("ReadHeadBlockHash empty hash")
}
blockHeader := rawdb.ReadHeader(db, headerBlockHash, *blockNumber)
trieRootHash = blockHeader.Root
fmt.Println("Canonical Hash: ", headerBlockHash.String(), ", TrieRootHash: ", trieRootHash.String())
}
if (trieRootHash == common.Hash{}) {
log.Error("Empty root hash")
return fmt.Errorf("Empty root hash.")
}
id := trie.StateTrieID(trieRootHash)
theTrie, err := trie.New(id, triedb)
if err != nil {
log.Error("fail to new trie tree", "err", err, "rootHash", err, trieRootHash.String())
return err
}
h2p, err := trie.NewHbss2Pbss(theTrie, triedb, trieRootHash, *blockNumber, jobnum)
if err != nil {
log.Error("fail to new hash2pbss", "err", err, "rootHash", err, trieRootHash.String())
return err
}
h2p.Run()
return nil
}
func pruneHashTrie(ctx *cli.Context) error {
if ctx.NArg() != 0 {
return fmt.Errorf("required none argument")
}
stack, _ := makeConfigNode(ctx)
defer stack.Close()
db := utils.MakeChainDatabase(ctx, stack, false, false)
defer db.Close()
return rawdb.PruneHashTrieNodeInDataBase(db)
}

@ -581,6 +581,28 @@ func AncientInspect(db ethdb.Database) error {
return nil
}
func PruneHashTrieNodeInDataBase(db ethdb.Database) error {
it := db.NewIterator([]byte{}, []byte{})
defer it.Release()
total_num := 0
for it.Next() {
var key = it.Key()
switch {
case IsLegacyTrieNode(key, it.Value()):
db.Delete(key)
total_num++
if total_num%100000 == 0 {
log.Info("Pruning ", "Complete progress: ", total_num, "hash-base trie nodes")
}
default:
continue
}
}
log.Info("Pruning ", "Complete progress", total_num, "hash-base trie nodes")
return nil
}
// InspectDatabase traverses the entire database and checks the size
// of all different categories of data.
func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error {

243
trie/hbss2pbss.go Normal file

@ -0,0 +1,243 @@
package trie
import (
"bytes"
"errors"
"fmt"
"runtime"
"sync"
"sync/atomic"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie/trienode"
)
type Hbss2Pbss struct {
trie *Trie // traverse trie
db *Database
blocknum uint64
root node // root of triedb
stateRootHash common.Hash
concurrentQueue chan struct{}
totalNum uint64
wg sync.WaitGroup
}
const (
DEFAULT_TRIEDBCACHE_SIZE = 1024 * 1024 * 1024
)
// NewHbss2Pbss return a hash2Path obj
func NewHbss2Pbss(tr *Trie, db *Database, stateRootHash common.Hash, blocknum uint64, jobnum uint64) (*Hbss2Pbss, error) {
if tr == nil {
return nil, errors.New("trie is nil")
}
if tr.root == nil {
return nil, errors.New("trie root is nil")
}
ins := &Hbss2Pbss{
trie: tr,
blocknum: blocknum,
db: db,
stateRootHash: stateRootHash,
root: tr.root,
concurrentQueue: make(chan struct{}, jobnum),
wg: sync.WaitGroup{},
}
return ins, nil
}
func (t *Trie) resloveWithoutTrack(n node, prefix []byte) (node, error) {
if n, ok := n.(hashNode); ok {
blob, err := t.reader.node(prefix, common.BytesToHash(n))
if err != nil {
return nil, err
}
return mustDecodeNode(n, blob), nil
}
return n, nil
}
func (h2p *Hbss2Pbss) writeNode(pathKey []byte, n *trienode.Node, owner common.Hash) {
if owner == (common.Hash{}) {
rawdb.WriteAccountTrieNode(h2p.db.diskdb, pathKey, n.Blob)
log.Debug("WriteNodes account node, ", "path: ", common.Bytes2Hex(pathKey), "Hash: ", n.Hash, "BlobHash: ", crypto.Keccak256Hash(n.Blob))
} else {
rawdb.WriteStorageTrieNode(h2p.db.diskdb, owner, pathKey, n.Blob)
log.Debug("WriteNodes storage node, ", "path: ", common.Bytes2Hex(pathKey), "owner: ", owner.String(), "Hash: ", n.Hash, "BlobHash: ", crypto.Keccak256Hash(n.Blob))
}
}
// Run statistics, external call
func (h2p *Hbss2Pbss) Run() {
log.Debug("Find Account Trie Tree, rootHash: ", h2p.trie.Hash().String(), "BlockNum: ", h2p.blocknum)
h2p.ConcurrentTraversal(h2p.trie, h2p.root, []byte{})
h2p.wg.Wait()
log.Info("Total complete: %v, go routines Num: %v, h2p concurrentQueue: %v\n", h2p.totalNum, runtime.NumGoroutine(), len(h2p.concurrentQueue))
rawdb.WritePersistentStateID(h2p.db.diskdb, h2p.blocknum)
rawdb.WriteStateID(h2p.db.diskdb, h2p.stateRootHash, h2p.blocknum)
}
func (h2p *Hbss2Pbss) SubConcurrentTraversal(theTrie *Trie, theNode node, path []byte) {
h2p.concurrentQueue <- struct{}{}
h2p.ConcurrentTraversal(theTrie, theNode, path)
<-h2p.concurrentQueue
h2p.wg.Done()
}
func (h2p *Hbss2Pbss) ConcurrentTraversal(theTrie *Trie, theNode node, path []byte) {
total_num := uint64(0)
// nil node
if theNode == nil {
return
}
switch current := (theNode).(type) {
case *shortNode:
collapsed := current.copy()
collapsed.Key = hexToCompact(current.Key)
var hash, _ = current.cache()
h2p.writeNode(path, trienode.New(common.BytesToHash(hash), nodeToBytes(collapsed)), theTrie.owner)
h2p.ConcurrentTraversal(theTrie, current.Val, append(path, current.Key...))
case *fullNode:
// copy from trie/Committer (*committer).commit
collapsed := current.copy()
var hash, _ = collapsed.cache()
collapsed.Children = h2p.commitChildren(path, current)
nodebytes := nodeToBytes(collapsed)
if common.BytesToHash(hash) != common.BytesToHash(crypto.Keccak256(nodebytes)) {
log.Error("Hash is inconsistent, hash: ", common.BytesToHash(hash), "node hash: ", common.BytesToHash(crypto.Keccak256(nodebytes)), "node: ", collapsed.fstring(""))
panic("hash inconsistent.")
}
h2p.writeNode(path, trienode.New(common.BytesToHash(hash), nodeToBytes(collapsed)), theTrie.owner)
for idx, child := range current.Children {
if child == nil {
continue
}
childPath := append(path, byte(idx))
if len(h2p.concurrentQueue)*2 < cap(h2p.concurrentQueue) {
h2p.wg.Add(1)
dst := make([]byte, len(childPath))
copy(dst, childPath)
go h2p.SubConcurrentTraversal(theTrie, child, dst)
} else {
h2p.ConcurrentTraversal(theTrie, child, childPath)
}
}
case hashNode:
n, err := theTrie.resloveWithoutTrack(current, path)
if err != nil {
log.Error("Resolve HashNode", "error", err, "TrieRoot", theTrie.Hash(), "Path", path)
return
}
h2p.ConcurrentTraversal(theTrie, n, path)
total_num = atomic.AddUint64(&h2p.totalNum, 1)
if total_num%100000 == 0 {
log.Info("Converting ", "Complete progress", total_num, "go routines Num", runtime.NumGoroutine(), "h2p concurrentQueue", len(h2p.concurrentQueue))
}
return
case valueNode:
if !hasTerm(path) {
log.Info("ValueNode miss path term", "path", common.Bytes2Hex(path))
break
}
var account types.StateAccount
if err := rlp.Decode(bytes.NewReader(current), &account); err != nil {
// log.Info("Rlp decode account failed.", "err", err)
break
}
if account.Root == (common.Hash{}) || account.Root == types.EmptyRootHash {
// log.Info("Not a storage trie.", "account", common.BytesToHash(path).String())
break
}
ownerAddress := common.BytesToHash(hexToCompact(path))
tr, err := New(StorageTrieID(h2p.stateRootHash, ownerAddress, account.Root), h2p.db)
if err != nil {
log.Error("New Storage trie error", "err", err, "root", account.Root.String(), "owner", ownerAddress.String())
break
}
log.Debug("Find Contract Trie Tree", "rootHash: ", tr.Hash().String(), "")
h2p.wg.Add(1)
go h2p.SubConcurrentTraversal(tr, tr.root, []byte{})
default:
panic(errors.New("Invalid node type to traverse."))
}
}
// copy from trie/Commiter (*committer).commit
func (h2p *Hbss2Pbss) commitChildren(path []byte, n *fullNode) [17]node {
var children [17]node
for i := 0; i < 16; i++ {
child := n.Children[i]
if child == nil {
continue
}
// If it's the hashed child, save the hash value directly.
// Note: it's impossible that the child in range [0, 15]
// is a valueNode.
if hn, ok := child.(hashNode); ok {
children[i] = hn
continue
}
children[i] = h2p.commit(append(path, byte(i)), child)
}
// For the 17th child, it's possible the type is valuenode.
if n.Children[16] != nil {
children[16] = n.Children[16]
}
return children
}
// commit collapses a node down into a hash node and returns it.
func (h2p *Hbss2Pbss) commit(path []byte, n node) node {
// if this path is clean, use available cached data
hash, dirty := n.cache()
if hash != nil && !dirty {
return hash
}
// Commit children, then parent, and remove the dirty flag.
switch cn := n.(type) {
case *shortNode:
// Commit child
collapsed := cn.copy()
// If the child is fullNode, recursively commit,
// otherwise it can only be hashNode or valueNode.
if _, ok := cn.Val.(*fullNode); ok {
collapsed.Val = h2p.commit(append(path, cn.Key...), cn.Val)
}
// The key needs to be copied, since we're adding it to the
// modified nodeset.
collapsed.Key = hexToCompact(cn.Key)
return collapsed
case *fullNode:
hashedKids := h2p.commitChildren(path, cn)
collapsed := cn.copy()
collapsed.Children = hashedKids
return collapsed
case hashNode:
return cn
default:
// nil, valuenode shouldn't be committed
panic(fmt.Sprintf("%T: invalid node: %v", n, n))
}
}

@ -112,6 +112,11 @@ func (n rawNode) EncodeRLP(w io.Writer) error {
return err
}
func NodeString(hash, buf []byte) string {
node := mustDecodeNode(hash, buf)
return node.fstring("NodeString: ")
}
// mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered.
func mustDecodeNode(hash, buf []byte) node {
n, err := decodeNode(hash, buf)