ethdb/pebble: print warning log if pebble performance degrades (#29478)
This commit is contained in:
parent
2c8720016d
commit
bf16a39876
@ -48,6 +48,9 @@ const (
|
|||||||
|
|
||||||
// numLevels is the level number of pebble sst files
|
// numLevels is the level number of pebble sst files
|
||||||
numLevels = 7
|
numLevels = 7
|
||||||
|
// degradationWarnInterval specifies how often warning should be printed if the
|
||||||
|
// leveldb database cannot keep up with requested writes.
|
||||||
|
degradationWarnInterval = time.Minute
|
||||||
)
|
)
|
||||||
|
|
||||||
// Database is a persistent key-value store based on the pebble storage engine.
|
// Database is a persistent key-value store based on the pebble storage engine.
|
||||||
@ -79,14 +82,16 @@ type Database struct {
|
|||||||
|
|
||||||
log log.Logger // Contextual logger tracking the database path
|
log log.Logger // Contextual logger tracking the database path
|
||||||
|
|
||||||
activeComp int // Current number of active compactions
|
activeComp int // Current number of active compactions
|
||||||
compStartTime time.Time // The start time of the earliest currently-active compaction
|
compStartTime time.Time // The start time of the earliest currently-active compaction
|
||||||
compTime atomic.Int64 // Total time spent in compaction in ns
|
compTime atomic.Int64 // Total time spent in compaction in ns
|
||||||
level0Comp atomic.Uint32 // Total number of level-zero compactions
|
level0Comp atomic.Uint32 // Total number of level-zero compactions
|
||||||
nonLevel0Comp atomic.Uint32 // Total number of non level-zero compactions
|
nonLevel0Comp atomic.Uint32 // Total number of non level-zero compactions
|
||||||
writeDelayStartTime time.Time // The start time of the latest write stall
|
|
||||||
writeDelayCount atomic.Int64 // Total number of write stall counts
|
writeStalled atomic.Bool // Flag whether the write is stalled
|
||||||
writeDelayTime atomic.Int64 // Total time spent in write stalls
|
writeDelayStartTime time.Time // The start time of the latest write stall
|
||||||
|
writeDelayCount atomic.Int64 // Total number of write stall counts
|
||||||
|
writeDelayTime atomic.Int64 // Total time spent in write stalls
|
||||||
|
|
||||||
writeOptions *pebble.WriteOptions
|
writeOptions *pebble.WriteOptions
|
||||||
}
|
}
|
||||||
@ -115,10 +120,13 @@ func (d *Database) onCompactionEnd(info pebble.CompactionInfo) {
|
|||||||
|
|
||||||
func (d *Database) onWriteStallBegin(b pebble.WriteStallBeginInfo) {
|
func (d *Database) onWriteStallBegin(b pebble.WriteStallBeginInfo) {
|
||||||
d.writeDelayStartTime = time.Now()
|
d.writeDelayStartTime = time.Now()
|
||||||
|
d.writeDelayCount.Add(1)
|
||||||
|
d.writeStalled.Store(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *Database) onWriteStallEnd() {
|
func (d *Database) onWriteStallEnd() {
|
||||||
d.writeDelayTime.Add(int64(time.Since(d.writeDelayStartTime)))
|
d.writeDelayTime.Add(int64(time.Since(d.writeDelayStartTime)))
|
||||||
|
d.writeStalled.Store(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
// panicLogger is just a noop logger to disable Pebble's internal logger.
|
// panicLogger is just a noop logger to disable Pebble's internal logger.
|
||||||
@ -461,13 +469,15 @@ func (d *Database) meter(refresh time.Duration, namespace string) {
|
|||||||
|
|
||||||
// Create storage and warning log tracer for write delay.
|
// Create storage and warning log tracer for write delay.
|
||||||
var (
|
var (
|
||||||
compTimes [2]int64
|
compTimes [2]int64
|
||||||
writeDelayTimes [2]int64
|
compWrites [2]int64
|
||||||
writeDelayCounts [2]int64
|
compReads [2]int64
|
||||||
compWrites [2]int64
|
|
||||||
compReads [2]int64
|
|
||||||
|
|
||||||
nWrites [2]int64
|
nWrites [2]int64
|
||||||
|
|
||||||
|
writeDelayTimes [2]int64
|
||||||
|
writeDelayCounts [2]int64
|
||||||
|
lastWriteStallReport time.Time
|
||||||
)
|
)
|
||||||
|
|
||||||
// Iterate ad infinitum and collect the stats
|
// Iterate ad infinitum and collect the stats
|
||||||
@ -507,6 +517,13 @@ func (d *Database) meter(refresh time.Duration, namespace string) {
|
|||||||
if d.writeDelayMeter != nil {
|
if d.writeDelayMeter != nil {
|
||||||
d.writeDelayMeter.Mark(writeDelayTimes[i%2] - writeDelayTimes[(i-1)%2])
|
d.writeDelayMeter.Mark(writeDelayTimes[i%2] - writeDelayTimes[(i-1)%2])
|
||||||
}
|
}
|
||||||
|
// Print a warning log if writing has been stalled for a while. The log will
|
||||||
|
// be printed per minute to avoid overwhelming users.
|
||||||
|
if d.writeStalled.Load() && writeDelayCounts[i%2] == writeDelayCounts[(i-1)%2] &&
|
||||||
|
time.Now().After(lastWriteStallReport.Add(degradationWarnInterval)) {
|
||||||
|
d.log.Warn("Database compacting, degraded performance")
|
||||||
|
lastWriteStallReport = time.Now()
|
||||||
|
}
|
||||||
if d.compTimeMeter != nil {
|
if d.compTimeMeter != nil {
|
||||||
d.compTimeMeter.Mark(compTimes[i%2] - compTimes[(i-1)%2])
|
d.compTimeMeter.Mark(compTimes[i%2] - compTimes[(i-1)%2])
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user