Skip to content
This repository was archived by the owner on Feb 18, 2025. It is now read-only.

trie: refactor tracer #581

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 17 additions & 33 deletions trie/committer.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ var committerPool = sync.Pool{
}

// newCommitter creates a new committer or picks one from the pool.
func newCommitter(owner common.Hash, tracer *tracer, collectLeaf bool) *committer {
func newCommitter(nodes *NodeSet, tracer *tracer, collectLeaf bool) *committer {
return &committer{
nodes: NewNodeSet(owner),
nodes: nodes,
tracer: tracer,
collectLeaf: collectLeaf,
}
Expand All @@ -74,20 +74,6 @@ func (c *committer) Commit(n node) (hashNode, *NodeSet, error) {
if err != nil {
return nil, nil, err
}
// Some nodes can be deleted from trie which can't be captured by committer
// itself. Iterate all deleted nodes tracked by tracer and marked them as
// deleted only if they are present in database previously.
for _, path := range c.tracer.deleteList() {
// There are a few possibilities for this scenario(the node is deleted
// but not present in database previously), for example the node was
// embedded in the parent and now deleted from the trie. In this case
// it's noop from database's perspective.
val := c.tracer.getPrev(path)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm that's interested, so they don't need to track the deleted nodes any more from trie in tracer, and mark it deleted 🤔

if len(val) == 0 {
continue
}
c.nodes.markDeleted(path, val)
}
return h.(hashNode), c.nodes, nil
}

Expand Down Expand Up @@ -119,12 +105,6 @@ func (c *committer) commit(path []byte, n node) (node, error) {
if hn, ok := hashedNode.(hashNode); ok {
return hn, nil
}
// The short node now is embedded in its parent. Mark the node as
// deleted if it's present in database previously. It's equivalent
// as deletion from database's perspective.
if prev := c.tracer.getPrev(path); len(prev) != 0 {
c.nodes.markDeleted(path, prev)
}
return collapsed, nil
case *fullNode:
hashedKids, err := c.commitChildren(path, cn)
Expand All @@ -138,12 +118,6 @@ func (c *committer) commit(path []byte, n node) (node, error) {
if hn, ok := hashedNode.(hashNode); ok {
return hn, nil
}
// The short node now is embedded in its parent. Mark the node as
// deleted if it's present in database previously. It's equivalent
// as deletion from database's perspective.
if prev := c.tracer.getPrev(path); len(prev) != 0 {
c.nodes.markDeleted(path, prev)
}
return collapsed, nil
case hashNode:
return cn, nil
Expand Down Expand Up @@ -196,22 +170,32 @@ func (c *committer) store(path []byte, n node) node {
// usually is leaf node). But small value(less than 32bytes) is not
// our target(leaves in account trie only).
if hash == nil {
// The node is embedded in its parent, in other words, this node
// will not be stored in the database independently, mark it as
// deleted only if the node was existent in database before.
prev, ok := c.tracer.accessList[string(path)]
if ok {
c.nodes.addNode(path, &nodeWithPrev{&memoryNode{}, prev})
}
return n
}
// We have the hash already, estimate the RLP encoding-size of the node.
// The size is used for mem tracking, does not need to be exact
var (
size = estimateSize(n)
nhash = common.BytesToHash(hash)
mnode = &memoryNode{
hash: nhash,
node: simplifyNode(n),
size: uint16(size),
node = &nodeWithPrev{
&memoryNode{
nhash,
uint16(size),
simplifyNode(n),
},
c.tracer.accessList[string(path)],
}
)

// Collect the dirty node to nodeset for return.
c.nodes.markUpdated(path, mnode, c.tracer.getPrev(path))
c.nodes.addNode(path, node)
// Collect the corresponding leaf node if it's required. We don't check
// full node since it's impossible to store value in fullNode. The key
// length of leaves should be exactly same.
Expand Down
30 changes: 22 additions & 8 deletions trie/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -782,17 +782,31 @@ func (db *Database) Update(nodes *MergedNodeSet) error {
defer db.lock.Unlock()
// Insert dirty nodes into the database. In the same tree, it must be
// ensured that children are inserted first, then parent so that children
// can be linked with their parent correctly. The order of writing between
// different tries(account trie, storage tries) is not required.
for owner, subset := range nodes.sets {
for _, path := range subset.updates.order {
n, ok := subset.updates.nodes[path]
if !ok {
return fmt.Errorf("missing node %x %v", owner, path)
// can be linked with their parent correctly.
//
// Note, the storage tries must be flushed before the account trie to
// retain the invariant that children go into the dirty cache first.
var order []common.Hash
for owner := range nodes.sets {
if owner == (common.Hash{}) {
continue
}
order = append(order, owner)
}
if _, ok := nodes.sets[common.Hash{}]; ok {
order = append(order, common.Hash{})
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, the trie with owner empty representing the account trie is pushed to the back of the array so that all the storage dirty nodes are flushed first.
Reference: ethereum/go-ethereum@5758d1f

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add this like comment in the source code too ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a comment like this above

 	// Note, the storage tries must be flushed before the account trie to
	// retain the invariant that children go into the dirty cache first.

Do you think this is clear enough?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the trie with owner empty representing the account trie is pushed to the back of the array so that all the storage dirty nodes are flushed first. I think u can add those blocks for showing why we use common.Hash{} here.

}
for _, owner := range order {
subset := nodes.sets[owner]
subset.forEachWithOrder(func(path string, n *memoryNode) {
if n.isDeleted() {
return // ignore deletion
}
db.insert(n.hash, int(n.size), n.node)
}
})
}
// Link up the account trie and storage trie if the node points
// to an account trie leaf.
if set, present := nodes.sets[common.Hash{}]; present {
for _, leaf := range set.leaves {
// Looping node leaf, then reference the leaf node to the root node
Expand Down
100 changes: 52 additions & 48 deletions trie/nodeset.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package trie
import (
"fmt"
"reflect"
"sort"
"strings"

"github.com/ethereum/go-ethereum/common"
Expand All @@ -42,8 +43,13 @@ var memoryNodeSize = int(reflect.TypeOf(memoryNode{}).Size())

// memorySize returns the total memory size used by this node.
// nolint:unused
func (n *memoryNode) memorySize(key int) int {
return int(n.size) + memoryNodeSize + key
func (n *memoryNode) memorySize(pathlen int) int {
return int(n.size) + memoryNodeSize + pathlen
}

// isDeleted returns the indicator if the node is marked as deleted.
func (n *memoryNode) isDeleted() bool {
return n.hash == (common.Hash{})
}

// rlp returns the raw rlp encoded blob of the cached trie node, either directly
Expand Down Expand Up @@ -89,21 +95,19 @@ func (n *nodeWithPrev) memorySize(key int) int {
return n.memoryNode.memorySize(key) + len(n.prev)
}

// nodesWithOrder represents a collection of dirty nodes which includes
// newly-inserted and updated nodes. The modification order of all nodes
// is represented by order list.
type nodesWithOrder struct {
order []string // the path list of dirty nodes, sort by insertion order
nodes map[string]*nodeWithPrev // the map of dirty nodes, keyed by node path
}

// NodeSet contains all dirty nodes collected during the commit operation
// Each node is keyed by path. It's not the thread-safe to use.
type NodeSet struct {
owner common.Hash // the identifier of the trie
updates *nodesWithOrder // the set of updated nodes(newly inserted, updated)
deletes map[string][]byte // the map of deleted nodes, keyed by node
leaves []*leaf // the list of dirty leaves
owner common.Hash // the identifier of the trie
leaves []*leaf // the list of dirty leaves
updates int // the count of updated and inserted nodes
deletes int // the count of deleted nodes

// The set of all dirty nodes. Dirty nodes include newly inserted nodes,
// deleted nodes and updated nodes. The original value of the newly
// inserted node must be nil, and the original value of the other two
// types must be non-nil.
nodes map[string]*nodeWithPrev
}

// NewNodeSet initializes an empty node set to be used for tracking dirty nodes
Expand All @@ -112,35 +116,32 @@ type NodeSet struct {
func NewNodeSet(owner common.Hash) *NodeSet {
return &NodeSet{
owner: owner,
updates: &nodesWithOrder{
nodes: make(map[string]*nodeWithPrev),
},
deletes: make(map[string][]byte),
nodes: make(map[string]*nodeWithPrev),
}
}

// NewNodeSetWithDeletion initializes the nodeset with provided deletion set.
func NewNodeSetWithDeletion(owner common.Hash, paths [][]byte, prev [][]byte) *NodeSet {
set := NewNodeSet(owner)
for i, path := range paths {
set.markDeleted(path, prev[i])
// forEachWithOrder iterates the dirty nodes with the order from bottom to top,
// right to left, nodes with the longest path will be iterated first.
func (set *NodeSet) forEachWithOrder(callback func(path string, n *memoryNode)) {
var paths sort.StringSlice
for path := range set.nodes {
paths = append(paths, path)
}
return set
}

// markUpdated marks the node as dirty(newly-inserted or updated) with provided
// node path, node object along with its previous value.
func (set *NodeSet) markUpdated(path []byte, node *memoryNode, prev []byte) {
set.updates.order = append(set.updates.order, string(path))
set.updates.nodes[string(path)] = &nodeWithPrev{
memoryNode: node,
prev: prev,
// Bottom-up, longest path first
sort.Sort(sort.Reverse(paths))
for _, path := range paths {
callback(path, set.nodes[path].unwrap())
}
}

// markDeleted marks the node as deleted with provided path and previous value.
func (set *NodeSet) markDeleted(path []byte, prev []byte) {
set.deletes[string(path)] = prev
// addNode adds the provided dirty node into set.
func (set *NodeSet) addNode(path []byte, n *nodeWithPrev) {
if n.isDeleted() {
set.deletes += 1
} else {
set.updates += 1
}
set.nodes[string(path)] = n
}

// addLeaf collects the provided leaf node into set.
Expand All @@ -150,13 +151,13 @@ func (set *NodeSet) addLeaf(leaf *leaf) {

// Size returns the number of updated and deleted nodes contained in the set.
func (set *NodeSet) Size() (int, int) {
return len(set.updates.order), len(set.deletes)
return set.updates, set.deletes
}

// Hashes returns the hashes of all updated nodes.
func (set *NodeSet) Hashes() []common.Hash {
var ret []common.Hash
for _, node := range set.updates.nodes {
for _, node := range set.nodes {
ret = append(ret, node.hash)
}
return ret
Expand All @@ -166,19 +167,22 @@ func (set *NodeSet) Hashes() []common.Hash {
func (set *NodeSet) Summary() string {
var out = new(strings.Builder)
fmt.Fprintf(out, "nodeset owner: %v\n", set.owner)
if set.updates != nil {
for _, key := range set.updates.order {
updated := set.updates.nodes[key]
if updated.prev != nil {
fmt.Fprintf(out, " [*]: %x -> %v prev: %x\n", key, updated.hash, updated.prev)
} else {
fmt.Fprintf(out, " [+]: %x -> %v\n", key, updated.hash)
if set.nodes != nil {
for path, n := range set.nodes {
// Deletion
if n.isDeleted() {
fmt.Fprintf(out, " [-]: %x prev: %x\n", path, n.prev)
continue
}
// Insertion
if len(n.prev) == 0 {
fmt.Fprintf(out, " [+]: %x -> %v\n", path, n.hash)
continue
}
// Update
fmt.Fprintf(out, " [*]: %x -> %v prev: %x\n", path, n.hash, n.prev)
}
}
for k, n := range set.deletes {
fmt.Fprintf(out, " [-]: %x -> %x\n", k, n)
}
for _, n := range set.leaves {
fmt.Fprintf(out, "[leaf]: %v\n", n)
}
Expand Down
2 changes: 1 addition & 1 deletion trie/proof.go
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ func VerifyRangeProof(rootHash common.Hash, firstKey []byte, lastKey []byte, key
}
// Rebuild the trie with the leaf stream, the shape of trie
// should be same with the original one.
tr := &Trie{root: root, reader: newEmptyReader()}
tr := &Trie{root: root, reader: newEmptyReader(), tracer: newTracer()}
if empty {
tr.root = nil
}
Expand Down
Loading
Loading