core/rawdb: fix cornercase shutdown behaviour in freezer (#26485)

This PR does a few things. 
It fixes a shutdown-order flaw in the chainfreezer. Previously, the chain-freezer would shutdown the freezer backend first, and then signal for the loop to exit. This can lead to a scenario where the freezer tries to fsync closed files, which is an error-conditon that could lead to exit via log.Crit. 

It also makes the printout more detailed when truncating 'dangling' items, by showing the exact number instead of approximate MB.

This PR also adds calls to fsync files before closing them, and also makes the `db inspect` command slightly more robust.
This commit is contained in:
Martin Holst Swende 2023-01-16 03:57:27 -05:00 committed by GitHub
parent 450d771bee
commit 0b53b29078
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 63 additions and 31 deletions

@ -551,16 +551,8 @@ func freezerInspect(ctx *cli.Context) error {
return err return err
} }
stack, _ := makeConfigNode(ctx) stack, _ := makeConfigNode(ctx)
defer stack.Close() ancient := stack.ResolveAncient("chaindata", ctx.String(utils.AncientFlag.Name))
stack.Close()
db := utils.MakeChainDatabase(ctx, stack, true)
defer db.Close()
ancient, err := db.AncientDatadir()
if err != nil {
log.Info("Failed to retrieve ancient root", "err", err)
return err
}
return rawdb.InspectFreezerTable(ancient, freezer, table, start, end) return rawdb.InspectFreezerTable(ancient, freezer, table, start, end)
} }

@ -70,14 +70,13 @@ func newChainFreezer(datadir string, namespace string, readonly bool, maxTableSi
// Close closes the chain freezer instance and terminates the background thread. // Close closes the chain freezer instance and terminates the background thread.
func (f *chainFreezer) Close() error { func (f *chainFreezer) Close() error {
err := f.Freezer.Close()
select { select {
case <-f.quit: case <-f.quit:
default: default:
close(f.quit) close(f.quit)
} }
f.wg.Wait() f.wg.Wait()
return err return f.Freezer.Close()
} }
// freeze is a background thread that periodically checks the blockchain for any // freeze is a background thread that periodically checks the blockchain for any

@ -229,6 +229,7 @@ func (t *freezerTable) repair() error {
lastIndex indexEntry lastIndex indexEntry
contentSize int64 contentSize int64
contentExp int64 contentExp int64
verbose bool
) )
// Read index zero, determine what file is the earliest // Read index zero, determine what file is the earliest
// and what item offset to use // and what item offset to use
@ -272,9 +273,10 @@ func (t *freezerTable) repair() error {
// Keep truncating both files until they come in sync // Keep truncating both files until they come in sync
contentExp = int64(lastIndex.offset) contentExp = int64(lastIndex.offset)
for contentExp != contentSize { for contentExp != contentSize {
verbose = true
// Truncate the head file to the last offset pointer // Truncate the head file to the last offset pointer
if contentExp < contentSize { if contentExp < contentSize {
t.logger.Warn("Truncating dangling head", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize)) t.logger.Warn("Truncating dangling head", "indexed", contentExp, "stored", contentSize)
if err := truncateFreezerFile(t.head, contentExp); err != nil { if err := truncateFreezerFile(t.head, contentExp); err != nil {
return err return err
} }
@ -282,7 +284,7 @@ func (t *freezerTable) repair() error {
} }
// Truncate the index to point within the head file // Truncate the index to point within the head file
if contentExp > contentSize { if contentExp > contentSize {
t.logger.Warn("Truncating dangling indexes", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize)) t.logger.Warn("Truncating dangling indexes", "indexes", offsetsSize/indexEntrySize, "indexed", contentExp, "stored", contentSize)
if err := truncateFreezerFile(t.index, offsetsSize-indexEntrySize); err != nil { if err := truncateFreezerFile(t.index, offsetsSize-indexEntrySize); err != nil {
return err return err
} }
@ -343,7 +345,11 @@ func (t *freezerTable) repair() error {
if err := t.preopen(); err != nil { if err := t.preopen(); err != nil {
return err return err
} }
t.logger.Debug("Chain freezer table opened", "items", t.items, "size", common.StorageSize(t.headBytes)) if verbose {
t.logger.Info("Chain freezer table opened", "items", t.items, "size", t.headBytes)
} else {
t.logger.Debug("Chain freezer table opened", "items", t.items, "size", common.StorageSize(t.headBytes))
}
return nil return nil
} }
@ -553,21 +559,31 @@ func (t *freezerTable) Close() error {
defer t.lock.Unlock() defer t.lock.Unlock()
var errs []error var errs []error
if err := t.index.Close(); err != nil { doClose := func(f *os.File, sync bool, close bool) {
errs = append(errs, err) if sync && !t.readonly {
} if err := f.Sync(); err != nil {
t.index = nil errs = append(errs, err)
}
if err := t.meta.Close(); err != nil { }
errs = append(errs, err) if close {
} if err := f.Close(); err != nil {
t.meta = nil errs = append(errs, err)
}
for _, f := range t.files {
if err := f.Close(); err != nil {
errs = append(errs, err)
} }
} }
// Trying to fsync a file opened in rdonly causes "Access denied"
// error on Windows.
doClose(t.index, true, true)
doClose(t.meta, true, true)
// The preopened non-head data-files are all opened in readonly.
// The head is opened in rw-mode, so we sync it here - but since it's also
// part of t.files, it will be closed in the loop below.
doClose(t.head, true, false) // sync but do not close
for _, f := range t.files {
doClose(f, false, true) // close but do not sync
}
t.index = nil
t.meta = nil
t.head = nil t.head = nil
if errs != nil { if errs != nil {
@ -724,7 +740,7 @@ func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []i
defer t.lock.RUnlock() defer t.lock.RUnlock()
// Ensure the table and the item are accessible // Ensure the table and the item are accessible
if t.index == nil || t.head == nil { if t.index == nil || t.head == nil || t.meta == nil {
return nil, nil, errClosed return nil, nil, errClosed
} }
var ( var (
@ -872,7 +888,9 @@ func (t *freezerTable) advanceHead() error {
func (t *freezerTable) Sync() error { func (t *freezerTable) Sync() error {
t.lock.Lock() t.lock.Lock()
defer t.lock.Unlock() defer t.lock.Unlock()
if t.index == nil || t.head == nil || t.meta == nil {
return errClosed
}
var err error var err error
trackError := func(e error) { trackError := func(e error) {
if e != nil && err == nil { if e != nil && err == nil {
@ -903,7 +921,8 @@ func (t *freezerTable) dumpIndex(w io.Writer, start, stop int64) {
fmt.Fprintf(w, "Failed to decode freezer table %v\n", err) fmt.Fprintf(w, "Failed to decode freezer table %v\n", err)
return return
} }
fmt.Fprintf(w, "Version %d deleted %d, hidden %d\n", meta.Version, atomic.LoadUint64(&t.itemOffset), atomic.LoadUint64(&t.itemHidden)) fmt.Fprintf(w, "Version %d count %d, deleted %d, hidden %d\n", meta.Version,
atomic.LoadUint64(&t.items), atomic.LoadUint64(&t.itemOffset), atomic.LoadUint64(&t.itemHidden))
buf := make([]byte, indexEntrySize) buf := make([]byte, indexEntrySize)

@ -407,3 +407,25 @@ func TestRenameWindows(t *testing.T) {
t.Errorf("unexpected file contents. Got %v\n", buf) t.Errorf("unexpected file contents. Got %v\n", buf)
} }
} }
func TestFreezerCloseSync(t *testing.T) {
t.Parallel()
f, _ := newFreezerForTesting(t, map[string]bool{"a": true, "b": true})
defer f.Close()
// Now, close and sync. This mimics the behaviour if the node is shut down,
// just as the chain freezer is writing.
// 1: thread-1: chain treezer writes, via freezeRange (holds lock)
// 2: thread-2: Close called, waits for write to finish
// 3: thread-1: finishes writing, releases lock
// 4: thread-2: obtains lock, completes Close()
// 5: thread-1: calls f.Sync()
if err := f.Close(); err != nil {
t.Fatal(err)
}
if err := f.Sync(); err == nil {
t.Fatalf("want error, have nil")
} else if have, want := err.Error(), "[closed closed]"; have != want {
t.Fatalf("want %v, have %v", have, want)
}
}