From bf16a39876cb78b0341cf03450ac82a6bc6840e9 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 17 Apr 2024 13:52:08 +0800 Subject: [PATCH] ethdb/pebble: print warning log if pebble performance degrades (#29478) --- ethdb/pebble/pebble.go | 43 +++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index 0109ca977..aa339bf2d 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -48,6 +48,9 @@ const ( // numLevels is the level number of pebble sst files numLevels = 7 + // degradationWarnInterval specifies how often warning should be printed if the + // leveldb database cannot keep up with requested writes. + degradationWarnInterval = time.Minute ) // Database is a persistent key-value store based on the pebble storage engine. @@ -79,14 +82,16 @@ type Database struct { log log.Logger // Contextual logger tracking the database path - activeComp int // Current number of active compactions - compStartTime time.Time // The start time of the earliest currently-active compaction - compTime atomic.Int64 // Total time spent in compaction in ns - level0Comp atomic.Uint32 // Total number of level-zero compactions - nonLevel0Comp atomic.Uint32 // Total number of non level-zero compactions - writeDelayStartTime time.Time // The start time of the latest write stall - writeDelayCount atomic.Int64 // Total number of write stall counts - writeDelayTime atomic.Int64 // Total time spent in write stalls + activeComp int // Current number of active compactions + compStartTime time.Time // The start time of the earliest currently-active compaction + compTime atomic.Int64 // Total time spent in compaction in ns + level0Comp atomic.Uint32 // Total number of level-zero compactions + nonLevel0Comp atomic.Uint32 // Total number of non level-zero compactions + + writeStalled atomic.Bool // Flag whether the write is stalled + writeDelayStartTime time.Time // The start time of the latest write stall + writeDelayCount atomic.Int64 // Total number of write stall counts + writeDelayTime atomic.Int64 // Total time spent in write stalls writeOptions *pebble.WriteOptions } @@ -115,10 +120,13 @@ func (d *Database) onCompactionEnd(info pebble.CompactionInfo) { func (d *Database) onWriteStallBegin(b pebble.WriteStallBeginInfo) { d.writeDelayStartTime = time.Now() + d.writeDelayCount.Add(1) + d.writeStalled.Store(true) } func (d *Database) onWriteStallEnd() { d.writeDelayTime.Add(int64(time.Since(d.writeDelayStartTime))) + d.writeStalled.Store(false) } // panicLogger is just a noop logger to disable Pebble's internal logger. @@ -461,13 +469,15 @@ func (d *Database) meter(refresh time.Duration, namespace string) { // Create storage and warning log tracer for write delay. var ( - compTimes [2]int64 - writeDelayTimes [2]int64 - writeDelayCounts [2]int64 - compWrites [2]int64 - compReads [2]int64 + compTimes [2]int64 + compWrites [2]int64 + compReads [2]int64 nWrites [2]int64 + + writeDelayTimes [2]int64 + writeDelayCounts [2]int64 + lastWriteStallReport time.Time ) // Iterate ad infinitum and collect the stats @@ -507,6 +517,13 @@ func (d *Database) meter(refresh time.Duration, namespace string) { if d.writeDelayMeter != nil { d.writeDelayMeter.Mark(writeDelayTimes[i%2] - writeDelayTimes[(i-1)%2]) } + // Print a warning log if writing has been stalled for a while. The log will + // be printed per minute to avoid overwhelming users. + if d.writeStalled.Load() && writeDelayCounts[i%2] == writeDelayCounts[(i-1)%2] && + time.Now().After(lastWriteStallReport.Add(degradationWarnInterval)) { + d.log.Warn("Database compacting, degraded performance") + lastWriteStallReport = time.Now() + } if d.compTimeMeter != nil { d.compTimeMeter.Mark(compTimes[i%2] - compTimes[(i-1)%2]) }