core/rawdb: fix cornercase shutdown behaviour in freezer (#26485)

This PR does a few things. 
It fixes a shutdown-order flaw in the chainfreezer. Previously, the chain-freezer would shutdown the freezer backend first, and then signal for the loop to exit. This can lead to a scenario where the freezer tries to fsync closed files, which is an error-conditon that could lead to exit via log.Crit. 

It also makes the printout more detailed when truncating 'dangling' items, by showing the exact number instead of approximate MB.

This PR also adds calls to fsync files before closing them, and also makes the `db inspect` command slightly more robust.
This commit is contained in:
Martin Holst Swende 2023-01-16 03:57:27 -05:00 committed by GitHub
parent 450d771bee
commit 0b53b29078
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 63 additions and 31 deletions

@ -551,16 +551,8 @@ func freezerInspect(ctx *cli.Context) error {
return err
}
stack, _ := makeConfigNode(ctx)
defer stack.Close()
db := utils.MakeChainDatabase(ctx, stack, true)
defer db.Close()
ancient, err := db.AncientDatadir()
if err != nil {
log.Info("Failed to retrieve ancient root", "err", err)
return err
}
ancient := stack.ResolveAncient("chaindata", ctx.String(utils.AncientFlag.Name))
stack.Close()
return rawdb.InspectFreezerTable(ancient, freezer, table, start, end)
}

@ -70,14 +70,13 @@ func newChainFreezer(datadir string, namespace string, readonly bool, maxTableSi
// Close closes the chain freezer instance and terminates the background thread.
func (f *chainFreezer) Close() error {
err := f.Freezer.Close()
select {
case <-f.quit:
default:
close(f.quit)
}
f.wg.Wait()
return err
return f.Freezer.Close()
}
// freeze is a background thread that periodically checks the blockchain for any

@ -229,6 +229,7 @@ func (t *freezerTable) repair() error {
lastIndex indexEntry
contentSize int64
contentExp int64
verbose bool
)
// Read index zero, determine what file is the earliest
// and what item offset to use
@ -272,9 +273,10 @@ func (t *freezerTable) repair() error {
// Keep truncating both files until they come in sync
contentExp = int64(lastIndex.offset)
for contentExp != contentSize {
verbose = true
// Truncate the head file to the last offset pointer
if contentExp < contentSize {
t.logger.Warn("Truncating dangling head", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize))
t.logger.Warn("Truncating dangling head", "indexed", contentExp, "stored", contentSize)
if err := truncateFreezerFile(t.head, contentExp); err != nil {
return err
}
@ -282,7 +284,7 @@ func (t *freezerTable) repair() error {
}
// Truncate the index to point within the head file
if contentExp > contentSize {
t.logger.Warn("Truncating dangling indexes", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize))
t.logger.Warn("Truncating dangling indexes", "indexes", offsetsSize/indexEntrySize, "indexed", contentExp, "stored", contentSize)
if err := truncateFreezerFile(t.index, offsetsSize-indexEntrySize); err != nil {
return err
}
@ -343,7 +345,11 @@ func (t *freezerTable) repair() error {
if err := t.preopen(); err != nil {
return err
}
t.logger.Debug("Chain freezer table opened", "items", t.items, "size", common.StorageSize(t.headBytes))
if verbose {
t.logger.Info("Chain freezer table opened", "items", t.items, "size", t.headBytes)
} else {
t.logger.Debug("Chain freezer table opened", "items", t.items, "size", common.StorageSize(t.headBytes))
}
return nil
}
@ -553,21 +559,31 @@ func (t *freezerTable) Close() error {
defer t.lock.Unlock()
var errs []error
if err := t.index.Close(); err != nil {
errs = append(errs, err)
}
t.index = nil
if err := t.meta.Close(); err != nil {
errs = append(errs, err)
}
t.meta = nil
for _, f := range t.files {
if err := f.Close(); err != nil {
errs = append(errs, err)
doClose := func(f *os.File, sync bool, close bool) {
if sync && !t.readonly {
if err := f.Sync(); err != nil {
errs = append(errs, err)
}
}
if close {
if err := f.Close(); err != nil {
errs = append(errs, err)
}
}
}
// Trying to fsync a file opened in rdonly causes "Access denied"
// error on Windows.
doClose(t.index, true, true)
doClose(t.meta, true, true)
// The preopened non-head data-files are all opened in readonly.
// The head is opened in rw-mode, so we sync it here - but since it's also
// part of t.files, it will be closed in the loop below.
doClose(t.head, true, false) // sync but do not close
for _, f := range t.files {
doClose(f, false, true) // close but do not sync
}
t.index = nil
t.meta = nil
t.head = nil
if errs != nil {
@ -724,7 +740,7 @@ func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []i
defer t.lock.RUnlock()
// Ensure the table and the item are accessible
if t.index == nil || t.head == nil {
if t.index == nil || t.head == nil || t.meta == nil {
return nil, nil, errClosed
}
var (
@ -872,7 +888,9 @@ func (t *freezerTable) advanceHead() error {
func (t *freezerTable) Sync() error {
t.lock.Lock()
defer t.lock.Unlock()
if t.index == nil || t.head == nil || t.meta == nil {
return errClosed
}
var err error
trackError := func(e error) {
if e != nil && err == nil {
@ -903,7 +921,8 @@ func (t *freezerTable) dumpIndex(w io.Writer, start, stop int64) {
fmt.Fprintf(w, "Failed to decode freezer table %v\n", err)
return
}
fmt.Fprintf(w, "Version %d deleted %d, hidden %d\n", meta.Version, atomic.LoadUint64(&t.itemOffset), atomic.LoadUint64(&t.itemHidden))
fmt.Fprintf(w, "Version %d count %d, deleted %d, hidden %d\n", meta.Version,
atomic.LoadUint64(&t.items), atomic.LoadUint64(&t.itemOffset), atomic.LoadUint64(&t.itemHidden))
buf := make([]byte, indexEntrySize)

@ -407,3 +407,25 @@ func TestRenameWindows(t *testing.T) {
t.Errorf("unexpected file contents. Got %v\n", buf)
}
}
func TestFreezerCloseSync(t *testing.T) {
t.Parallel()
f, _ := newFreezerForTesting(t, map[string]bool{"a": true, "b": true})
defer f.Close()
// Now, close and sync. This mimics the behaviour if the node is shut down,
// just as the chain freezer is writing.
// 1: thread-1: chain treezer writes, via freezeRange (holds lock)
// 2: thread-2: Close called, waits for write to finish
// 3: thread-1: finishes writing, releases lock
// 4: thread-2: obtains lock, completes Close()
// 5: thread-1: calls f.Sync()
if err := f.Close(); err != nil {
t.Fatal(err)
}
if err := f.Sync(); err == nil {
t.Fatalf("want error, have nil")
} else if have, want := err.Error(), "[closed closed]"; have != want {
t.Fatalf("want %v, have %v", have, want)
}
}