metrics: improve reading Go runtime metrics (#25886)

This changes how we read performance metrics from the Go runtime. Instead of using runtime.ReadMemStats, we now rely on the API provided by package runtime/metrics. runtime/metrics provides more accurate information. For example, the new interface has better reporting of memory use. In my testing, the reported value of held memory more accurately reflects the usage reported by the OS. The semantics of metrics system/memory/allocs and system/memory/frees have changed to report amounts in bytes. ReadMemStats only reported the count of allocations in number-of-objects. This is imprecise: 'tiny objects' are not counted because the runtime allocates them in batches; and certain improvements in allocation behavior, such as struct size optimizations, will be less visible when the number of allocs doesn't change. Changing allocation reports to be in bytes makes it appear in graphs that lots more is being allocated. I don't think that's a problem because this metric is primarily interesting for geth developers. The metric system/memory/pauses has been changed to report statistical values from the histogram provided by the runtime. Its name in influxdb has changed from geth.system/memory/pauses.meter to geth.system/memory/pauses.histogram. We also have a new histogram metric, system/cpu/schedlatency, reporting the Go scheduler latency.
2022-11-11 13:16:13 +01:00 · 2022-11-11 13:16:13 +01:00 · c539bda166
commit c539bda166
parent 62c973eba6
11 changed files with 598 additions and 427 deletions
--- a/metrics/influxdb/influxdb.go
+++ b/metrics/influxdb/influxdb.go
@ -160,26 +160,27 @@ func (r *reporter) send() error {
 			})
 		case metrics.Histogram:
 			ms := metric.Snapshot()
 			if ms.Count() > 0 {
-				ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999})
+				ps := ms.Percentiles([]float64{0.25, 0.5, 0.75, 0.95, 0.99, 0.999, 0.9999})
-				pts = append(pts, client.Point{
+				fields := map[string]interface{}{
 					Measurement: fmt.Sprintf("%s%s.histogram", namespace, name),
 					Tags:        r.tags,
 					Fields: map[string]interface{}{
 					"count":    ms.Count(),
 					"max":      ms.Max(),
 					"mean":     ms.Mean(),
 					"min":      ms.Min(),
 					"stddev":   ms.StdDev(),
 					"variance": ms.Variance(),
-						"p50":      ps[0],
+					"p25":      ps[0],
-						"p75":      ps[1],
+					"p50":      ps[1],
-						"p95":      ps[2],
+					"p75":      ps[2],
-						"p99":      ps[3],
+					"p95":      ps[3],
-						"p999":     ps[4],
+					"p99":      ps[4],
-						"p9999":    ps[5],
+					"p999":     ps[5],
-					},
+					"p9999":    ps[6],
 				}
 				pts = append(pts, client.Point{
 					Measurement: fmt.Sprintf("%s%s.histogram", namespace, name),
 					Tags:        r.tags,
 					Fields:      fields,
 					Time:        now,
 				})
 			}
--- a/metrics/metrics.go
+++ b/metrics/metrics.go
@ -7,7 +7,8 @@ package metrics
 import (
 	"os"
-	"runtime"
+	"runtime/metrics"
 	"runtime/pprof"
 	"strings"
 	"time"
@ -54,24 +55,92 @@ func init() {
 	}
 }
-// CollectProcessMetrics periodically collects various metrics about the running
+var threadCreateProfile = pprof.Lookup("threadcreate")
-// process.
+
 type runtimeStats struct {
 	GCPauses     *metrics.Float64Histogram
 	GCAllocBytes uint64
 	GCFreedBytes uint64
 	MemTotal     uint64
 	HeapObjects  uint64
 	HeapFree     uint64
 	HeapReleased uint64
 	HeapUnused   uint64
 	Goroutines   uint64
 	SchedLatency *metrics.Float64Histogram
 }
 var runtimeSamples = []metrics.Sample{
 	{Name: "/gc/pauses:seconds"}, // histogram
 	{Name: "/gc/heap/allocs:bytes"},
 	{Name: "/gc/heap/frees:bytes"},
 	{Name: "/memory/classes/total:bytes"},
 	{Name: "/memory/classes/heap/objects:bytes"},
 	{Name: "/memory/classes/heap/free:bytes"},
 	{Name: "/memory/classes/heap/released:bytes"},
 	{Name: "/memory/classes/heap/unused:bytes"},
 	{Name: "/sched/goroutines:goroutines"},
 	{Name: "/sched/latencies:seconds"}, // histogram
 }
 func readRuntimeStats(v *runtimeStats) {
 	metrics.Read(runtimeSamples)
 	for _, s := range runtimeSamples {
 		// Skip invalid/unknown metrics. This is needed because some metrics
 		// are unavailable in older Go versions, and attempting to read a 'bad'
 		// metric panics.
 		if s.Value.Kind() == metrics.KindBad {
 			continue
 		}
 		switch s.Name {
 		case "/gc/pauses:seconds":
 			v.GCPauses = s.Value.Float64Histogram()
 		case "/gc/heap/allocs:bytes":
 			v.GCAllocBytes = s.Value.Uint64()
 		case "/gc/heap/frees:bytes":
 			v.GCFreedBytes = s.Value.Uint64()
 		case "/memory/classes/total:bytes":
 			v.MemTotal = s.Value.Uint64()
 		case "/memory/classes/heap/objects:bytes":
 			v.HeapObjects = s.Value.Uint64()
 		case "/memory/classes/heap/free:bytes":
 			v.HeapFree = s.Value.Uint64()
 		case "/memory/classes/heap/released:bytes":
 			v.HeapReleased = s.Value.Uint64()
 		case "/memory/classes/heap/unused:bytes":
 			v.HeapUnused = s.Value.Uint64()
 		case "/sched/goroutines:goroutines":
 			v.Goroutines = s.Value.Uint64()
 		case "/sched/latencies:seconds":
 			v.SchedLatency = s.Value.Float64Histogram()
 		}
 	}
 }
 // CollectProcessMetrics periodically collects various metrics about the running process.
 func CollectProcessMetrics(refresh time.Duration) {
 	// Short circuit if the metrics system is disabled
 	if !Enabled {
 		return
 	}
 	refreshFreq := int64(refresh / time.Second)
 	// Create the various data collectors
-	cpuStats := make([]*CPUStats, 2)
+	var (
-	memstats := make([]*runtime.MemStats, 2)
+		cpustats  = make([]CPUStats, 2)
-	diskstats := make([]*DiskStats, 2)
+		diskstats = make([]DiskStats, 2)
-	for i := 0; i < len(memstats); i++ {
+		rstats    = make([]runtimeStats, 2)
-		cpuStats[i] = new(CPUStats)
+	)
-		memstats[i] = new(runtime.MemStats)
+
-		diskstats[i] = new(DiskStats)
+	// This scale factor is used for the runtime's time metrics. It's useful to convert to
-	}
+	// ns here because the runtime gives times in float seconds, but runtimeHistogram can
 	// only provide integers for the minimum and maximum values.
 	const secondsToNs = float64(time.Second)
 	// Define the various metrics to collect
 	var (
 		cpuSysLoad            = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry)
@ -79,13 +148,13 @@ func CollectProcessMetrics(refresh time.Duration) {
 		cpuProcLoad           = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry)
 		cpuThreads            = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry)
 		cpuGoroutines         = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry)
-
+		cpuSchedLatency       = getOrRegisterRuntimeHistogram("system/cpu/schedlatency", secondsToNs, nil)
-		memPauses = GetOrRegisterMeter("system/memory/pauses", DefaultRegistry)
+		memPauses             = getOrRegisterRuntimeHistogram("system/memory/pauses", secondsToNs, nil)
 		memAllocs             = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry)
 		memFrees              = GetOrRegisterMeter("system/memory/frees", DefaultRegistry)
-		memHeld   = GetOrRegisterGauge("system/memory/held", DefaultRegistry)
+		memTotal              = GetOrRegisterGauge("system/memory/held", DefaultRegistry)
-		memUsed   = GetOrRegisterGauge("system/memory/used", DefaultRegistry)
+		heapUsed              = GetOrRegisterGauge("system/memory/used", DefaultRegistry)
-
+		heapObjects           = GetOrRegisterGauge("system/memory/objects", DefaultRegistry)
 		diskReads             = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry)
 		diskReadBytes         = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry)
 		diskReadBytesCounter  = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry)
@ -93,34 +162,43 @@ func CollectProcessMetrics(refresh time.Duration) {
 		diskWriteBytes        = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry)
 		diskWriteBytesCounter = GetOrRegisterCounter("system/disk/writebytes", DefaultRegistry)
 	)
 	// Iterate loading the different stats and updating the meters
 	for i := 1; ; i++ {
 		location1 := i % 2
 		location2 := (i - 1) % 2
-		ReadCPUStats(cpuStats[location1])
+	// Iterate loading the different stats and updating the meters.
-		cpuSysLoad.Update((cpuStats[location1].GlobalTime - cpuStats[location2].GlobalTime) / refreshFreq)
+	now, prev := 0, 1
-		cpuSysWait.Update((cpuStats[location1].GlobalWait - cpuStats[location2].GlobalWait) / refreshFreq)
+	for ; ; now, prev = prev, now {
-		cpuProcLoad.Update((cpuStats[location1].LocalTime - cpuStats[location2].LocalTime) / refreshFreq)
+		// CPU
 		ReadCPUStats(&cpustats[now])
 		cpuSysLoad.Update((cpustats[now].GlobalTime - cpustats[prev].GlobalTime) / refreshFreq)
 		cpuSysWait.Update((cpustats[now].GlobalWait - cpustats[prev].GlobalWait) / refreshFreq)
 		cpuProcLoad.Update((cpustats[now].LocalTime - cpustats[prev].LocalTime) / refreshFreq)
 		// Threads
 		cpuThreads.Update(int64(threadCreateProfile.Count()))
 		cpuGoroutines.Update(int64(runtime.NumGoroutine()))
-		runtime.ReadMemStats(memstats[location1])
+		// Go runtime metrics
-		memPauses.Mark(int64(memstats[location1].PauseTotalNs - memstats[location2].PauseTotalNs))
+		readRuntimeStats(&rstats[now])
 		memAllocs.Mark(int64(memstats[location1].Mallocs - memstats[location2].Mallocs))
 		memFrees.Mark(int64(memstats[location1].Frees - memstats[location2].Frees))
 		memHeld.Update(int64(memstats[location1].HeapSys - memstats[location1].HeapReleased))
 		memUsed.Update(int64(memstats[location1].Alloc))
-		if ReadDiskStats(diskstats[location1]) == nil {
+		cpuGoroutines.Update(int64(rstats[now].Goroutines))
-			diskReads.Mark(diskstats[location1].ReadCount - diskstats[location2].ReadCount)
+		cpuSchedLatency.update(rstats[now].SchedLatency)
-			diskReadBytes.Mark(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes)
+		memPauses.update(rstats[now].GCPauses)
 			diskWrites.Mark(diskstats[location1].WriteCount - diskstats[location2].WriteCount)
 			diskWriteBytes.Mark(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes)
-			diskReadBytesCounter.Inc(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes)
+		memAllocs.Mark(int64(rstats[now].GCAllocBytes - rstats[prev].GCAllocBytes))
-			diskWriteBytesCounter.Inc(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes)
+		memFrees.Mark(int64(rstats[now].GCFreedBytes - rstats[prev].GCFreedBytes))
 		memTotal.Update(int64(rstats[now].MemTotal))
 		heapUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapUnused - rstats[now].HeapFree - rstats[now].HeapReleased))
 		heapObjects.Update(int64(rstats[now].HeapObjects))
 		// Disk
 		if ReadDiskStats(&diskstats[now]) == nil {
 			diskReads.Mark(diskstats[now].ReadCount - diskstats[prev].ReadCount)
 			diskReadBytes.Mark(diskstats[now].ReadBytes - diskstats[prev].ReadBytes)
 			diskWrites.Mark(diskstats[now].WriteCount - diskstats[prev].WriteCount)
 			diskWriteBytes.Mark(diskstats[now].WriteBytes - diskstats[prev].WriteBytes)
 			diskReadBytesCounter.Inc(diskstats[now].ReadBytes - diskstats[prev].ReadBytes)
 			diskWriteBytesCounter.Inc(diskstats[now].WriteBytes - diskstats[prev].WriteBytes)
 		}
 		time.Sleep(refresh)
 	}
 }
--- a/metrics/metrics_test.go
+++ b/metrics/metrics_test.go
@ -2,8 +2,6 @@ package metrics
 import (
 	"fmt"
 	"io"
 	"log"
 	"sync"
 	"testing"
 	"time"
@ -11,11 +9,11 @@ import (
 const FANOUT = 128
-// Stop the compiler from complaining during debugging.
+func TestReadRuntimeValues(t *testing.T) {
-var (
+	var v runtimeStats
-	_ = io.Discard
+	readRuntimeStats(&v)
-	_ = log.LstdFlags
+	t.Logf("%+v", v)
-)
+}
 func BenchmarkMetrics(b *testing.B) {
 	r := NewRegistry()
@ -26,7 +24,6 @@ func BenchmarkMetrics(b *testing.B) {
 	m := NewRegisteredMeter("meter", r)
 	t := NewRegisteredTimer("timer", r)
 	RegisterDebugGCStats(r)
 	RegisterRuntimeMemStats(r)
 	b.ResetTimer()
 	ch := make(chan bool)
@ -48,24 +45,6 @@ func BenchmarkMetrics(b *testing.B) {
 		}()
 	//*/
 	wgR := &sync.WaitGroup{}
 	//*
 	wgR.Add(1)
 	go func() {
 		defer wgR.Done()
 		//log.Println("go CaptureRuntimeMemStats")
 		for {
 			select {
 			case <-ch:
 				//log.Println("done CaptureRuntimeMemStats")
 				return
 			default:
 				CaptureRuntimeMemStatsOnce(r)
 			}
 		}
 	}()
 	//*/
 	wgW := &sync.WaitGroup{}
 	/*
 		wgW.Add(1)
@ -104,7 +83,6 @@ func BenchmarkMetrics(b *testing.B) {
 	wg.Wait()
 	close(ch)
 	wgD.Wait()
 	wgR.Wait()
 	wgW.Wait()
 }
--- a/metrics/runtime.go
+++ b/metrics/runtime.go
@ -1,212 +0,0 @@
 package metrics
 import (
 	"runtime"
 	"runtime/pprof"
 	"time"
 )
 var (
 	memStats       runtime.MemStats
 	runtimeMetrics struct {
 		MemStats struct {
 			Alloc         Gauge
 			BuckHashSys   Gauge
 			DebugGC       Gauge
 			EnableGC      Gauge
 			Frees         Gauge
 			HeapAlloc     Gauge
 			HeapIdle      Gauge
 			HeapInuse     Gauge
 			HeapObjects   Gauge
 			HeapReleased  Gauge
 			HeapSys       Gauge
 			LastGC        Gauge
 			Lookups       Gauge
 			Mallocs       Gauge
 			MCacheInuse   Gauge
 			MCacheSys     Gauge
 			MSpanInuse    Gauge
 			MSpanSys      Gauge
 			NextGC        Gauge
 			NumGC         Gauge
 			GCCPUFraction GaugeFloat64
 			PauseNs       Histogram
 			PauseTotalNs  Gauge
 			StackInuse    Gauge
 			StackSys      Gauge
 			Sys           Gauge
 			TotalAlloc    Gauge
 		}
 		NumCgoCall   Gauge
 		NumGoroutine Gauge
 		NumThread    Gauge
 		ReadMemStats Timer
 	}
 	frees       uint64
 	lookups     uint64
 	mallocs     uint64
 	numGC       uint32
 	numCgoCalls int64
 	threadCreateProfile = pprof.Lookup("threadcreate")
 )
 // Capture new values for the Go runtime statistics exported in
 // runtime.MemStats.  This is designed to be called as a goroutine.
 func CaptureRuntimeMemStats(r Registry, d time.Duration) {
 	for range time.Tick(d) {
 		CaptureRuntimeMemStatsOnce(r)
 	}
 }
 // Capture new values for the Go runtime statistics exported in
 // runtime.MemStats.  This is designed to be called in a background
 // goroutine.  Giving a registry which has not been given to
 // RegisterRuntimeMemStats will panic.
 //
 // Be very careful with this because runtime.ReadMemStats calls the C
 // functions runtime·semacquire(&runtime·worldsema) and runtime·stoptheworld()
 // and that last one does what it says on the tin.
 func CaptureRuntimeMemStatsOnce(r Registry) {
 	t := time.Now()
 	runtime.ReadMemStats(&memStats) // This takes 50-200us.
 	runtimeMetrics.ReadMemStats.UpdateSince(t)
 	runtimeMetrics.MemStats.Alloc.Update(int64(memStats.Alloc))
 	runtimeMetrics.MemStats.BuckHashSys.Update(int64(memStats.BuckHashSys))
 	if memStats.DebugGC {
 		runtimeMetrics.MemStats.DebugGC.Update(1)
 	} else {
 		runtimeMetrics.MemStats.DebugGC.Update(0)
 	}
 	if memStats.EnableGC {
 		runtimeMetrics.MemStats.EnableGC.Update(1)
 	} else {
 		runtimeMetrics.MemStats.EnableGC.Update(0)
 	}
 	runtimeMetrics.MemStats.Frees.Update(int64(memStats.Frees - frees))
 	runtimeMetrics.MemStats.HeapAlloc.Update(int64(memStats.HeapAlloc))
 	runtimeMetrics.MemStats.HeapIdle.Update(int64(memStats.HeapIdle))
 	runtimeMetrics.MemStats.HeapInuse.Update(int64(memStats.HeapInuse))
 	runtimeMetrics.MemStats.HeapObjects.Update(int64(memStats.HeapObjects))
 	runtimeMetrics.MemStats.HeapReleased.Update(int64(memStats.HeapReleased))
 	runtimeMetrics.MemStats.HeapSys.Update(int64(memStats.HeapSys))
 	runtimeMetrics.MemStats.LastGC.Update(int64(memStats.LastGC))
 	runtimeMetrics.MemStats.Lookups.Update(int64(memStats.Lookups - lookups))
 	runtimeMetrics.MemStats.Mallocs.Update(int64(memStats.Mallocs - mallocs))
 	runtimeMetrics.MemStats.MCacheInuse.Update(int64(memStats.MCacheInuse))
 	runtimeMetrics.MemStats.MCacheSys.Update(int64(memStats.MCacheSys))
 	runtimeMetrics.MemStats.MSpanInuse.Update(int64(memStats.MSpanInuse))
 	runtimeMetrics.MemStats.MSpanSys.Update(int64(memStats.MSpanSys))
 	runtimeMetrics.MemStats.NextGC.Update(int64(memStats.NextGC))
 	runtimeMetrics.MemStats.NumGC.Update(int64(memStats.NumGC - numGC))
 	runtimeMetrics.MemStats.GCCPUFraction.Update(gcCPUFraction(&memStats))
 	// <https://code.google.com/p/go/source/browse/src/pkg/runtime/mgc0.c>
 	i := numGC % uint32(len(memStats.PauseNs))
 	ii := memStats.NumGC % uint32(len(memStats.PauseNs))
 	if memStats.NumGC-numGC >= uint32(len(memStats.PauseNs)) {
 		for i = 0; i < uint32(len(memStats.PauseNs)); i++ {
 			runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i]))
 		}
 	} else {
 		if i > ii {
 			for ; i < uint32(len(memStats.PauseNs)); i++ {
 				runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i]))
 			}
 			i = 0
 		}
 		for ; i < ii; i++ {
 			runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i]))
 		}
 	}
 	frees = memStats.Frees
 	lookups = memStats.Lookups
 	mallocs = memStats.Mallocs
 	numGC = memStats.NumGC
 	runtimeMetrics.MemStats.PauseTotalNs.Update(int64(memStats.PauseTotalNs))
 	runtimeMetrics.MemStats.StackInuse.Update(int64(memStats.StackInuse))
 	runtimeMetrics.MemStats.StackSys.Update(int64(memStats.StackSys))
 	runtimeMetrics.MemStats.Sys.Update(int64(memStats.Sys))
 	runtimeMetrics.MemStats.TotalAlloc.Update(int64(memStats.TotalAlloc))
 	currentNumCgoCalls := numCgoCall()
 	runtimeMetrics.NumCgoCall.Update(currentNumCgoCalls - numCgoCalls)
 	numCgoCalls = currentNumCgoCalls
 	runtimeMetrics.NumGoroutine.Update(int64(runtime.NumGoroutine()))
 	runtimeMetrics.NumThread.Update(int64(threadCreateProfile.Count()))
 }
 // Register runtimeMetrics for the Go runtime statistics exported in runtime and
 // specifically runtime.MemStats.  The runtimeMetrics are named by their
 // fully-qualified Go symbols, i.e. runtime.MemStats.Alloc.
 func RegisterRuntimeMemStats(r Registry) {
 	runtimeMetrics.MemStats.Alloc = NewGauge()
 	runtimeMetrics.MemStats.BuckHashSys = NewGauge()
 	runtimeMetrics.MemStats.DebugGC = NewGauge()
 	runtimeMetrics.MemStats.EnableGC = NewGauge()
 	runtimeMetrics.MemStats.Frees = NewGauge()
 	runtimeMetrics.MemStats.HeapAlloc = NewGauge()
 	runtimeMetrics.MemStats.HeapIdle = NewGauge()
 	runtimeMetrics.MemStats.HeapInuse = NewGauge()
 	runtimeMetrics.MemStats.HeapObjects = NewGauge()
 	runtimeMetrics.MemStats.HeapReleased = NewGauge()
 	runtimeMetrics.MemStats.HeapSys = NewGauge()
 	runtimeMetrics.MemStats.LastGC = NewGauge()
 	runtimeMetrics.MemStats.Lookups = NewGauge()
 	runtimeMetrics.MemStats.Mallocs = NewGauge()
 	runtimeMetrics.MemStats.MCacheInuse = NewGauge()
 	runtimeMetrics.MemStats.MCacheSys = NewGauge()
 	runtimeMetrics.MemStats.MSpanInuse = NewGauge()
 	runtimeMetrics.MemStats.MSpanSys = NewGauge()
 	runtimeMetrics.MemStats.NextGC = NewGauge()
 	runtimeMetrics.MemStats.NumGC = NewGauge()
 	runtimeMetrics.MemStats.GCCPUFraction = NewGaugeFloat64()
 	runtimeMetrics.MemStats.PauseNs = NewHistogram(NewExpDecaySample(1028, 0.015))
 	runtimeMetrics.MemStats.PauseTotalNs = NewGauge()
 	runtimeMetrics.MemStats.StackInuse = NewGauge()
 	runtimeMetrics.MemStats.StackSys = NewGauge()
 	runtimeMetrics.MemStats.Sys = NewGauge()
 	runtimeMetrics.MemStats.TotalAlloc = NewGauge()
 	runtimeMetrics.NumCgoCall = NewGauge()
 	runtimeMetrics.NumGoroutine = NewGauge()
 	runtimeMetrics.NumThread = NewGauge()
 	runtimeMetrics.ReadMemStats = NewTimer()
 	r.Register("runtime.MemStats.Alloc", runtimeMetrics.MemStats.Alloc)
 	r.Register("runtime.MemStats.BuckHashSys", runtimeMetrics.MemStats.BuckHashSys)
 	r.Register("runtime.MemStats.DebugGC", runtimeMetrics.MemStats.DebugGC)
 	r.Register("runtime.MemStats.EnableGC", runtimeMetrics.MemStats.EnableGC)
 	r.Register("runtime.MemStats.Frees", runtimeMetrics.MemStats.Frees)
 	r.Register("runtime.MemStats.HeapAlloc", runtimeMetrics.MemStats.HeapAlloc)
 	r.Register("runtime.MemStats.HeapIdle", runtimeMetrics.MemStats.HeapIdle)
 	r.Register("runtime.MemStats.HeapInuse", runtimeMetrics.MemStats.HeapInuse)
 	r.Register("runtime.MemStats.HeapObjects", runtimeMetrics.MemStats.HeapObjects)
 	r.Register("runtime.MemStats.HeapReleased", runtimeMetrics.MemStats.HeapReleased)
 	r.Register("runtime.MemStats.HeapSys", runtimeMetrics.MemStats.HeapSys)
 	r.Register("runtime.MemStats.LastGC", runtimeMetrics.MemStats.LastGC)
 	r.Register("runtime.MemStats.Lookups", runtimeMetrics.MemStats.Lookups)
 	r.Register("runtime.MemStats.Mallocs", runtimeMetrics.MemStats.Mallocs)
 	r.Register("runtime.MemStats.MCacheInuse", runtimeMetrics.MemStats.MCacheInuse)
 	r.Register("runtime.MemStats.MCacheSys", runtimeMetrics.MemStats.MCacheSys)
 	r.Register("runtime.MemStats.MSpanInuse", runtimeMetrics.MemStats.MSpanInuse)
 	r.Register("runtime.MemStats.MSpanSys", runtimeMetrics.MemStats.MSpanSys)
 	r.Register("runtime.MemStats.NextGC", runtimeMetrics.MemStats.NextGC)
 	r.Register("runtime.MemStats.NumGC", runtimeMetrics.MemStats.NumGC)
 	r.Register("runtime.MemStats.GCCPUFraction", runtimeMetrics.MemStats.GCCPUFraction)
 	r.Register("runtime.MemStats.PauseNs", runtimeMetrics.MemStats.PauseNs)
 	r.Register("runtime.MemStats.PauseTotalNs", runtimeMetrics.MemStats.PauseTotalNs)
 	r.Register("runtime.MemStats.StackInuse", runtimeMetrics.MemStats.StackInuse)
 	r.Register("runtime.MemStats.StackSys", runtimeMetrics.MemStats.StackSys)
 	r.Register("runtime.MemStats.Sys", runtimeMetrics.MemStats.Sys)
 	r.Register("runtime.MemStats.TotalAlloc", runtimeMetrics.MemStats.TotalAlloc)
 	r.Register("runtime.NumCgoCall", runtimeMetrics.NumCgoCall)
 	r.Register("runtime.NumGoroutine", runtimeMetrics.NumGoroutine)
 	r.Register("runtime.NumThread", runtimeMetrics.NumThread)
 	r.Register("runtime.ReadMemStats", runtimeMetrics.ReadMemStats)
 }
--- a/metrics/runtime_cgo.go
+++ b/metrics/runtime_cgo.go
@ -1,10 +0,0 @@
 //go:build cgo && !appengine && !js
 // +build cgo,!appengine,!js
 package metrics
 import "runtime"
 func numCgoCall() int64 {
 	return runtime.NumCgoCall()
 }
--- a/metrics/runtime_gccpufraction.go
+++ b/metrics/runtime_gccpufraction.go
@ -1,10 +0,0 @@
 //go:build go1.5
 // +build go1.5
 package metrics
 import "runtime"
 func gcCPUFraction(memStats *runtime.MemStats) float64 {
 	return memStats.GCCPUFraction
 }
--- a/metrics/runtime_no_cgo.go
+++ b/metrics/runtime_no_cgo.go
@ -1,8 +0,0 @@
 //go:build !cgo || appengine || js
 // +build !cgo appengine js
 package metrics
 func numCgoCall() int64 {
 	return 0
 }
--- a/metrics/runtime_no_gccpufraction.go
+++ b/metrics/runtime_no_gccpufraction.go
@ -1,10 +0,0 @@
 //go:build !go1.5
 // +build !go1.5
 package metrics
 import "runtime"
 func gcCPUFraction(memStats *runtime.MemStats) float64 {
 	return 0
 }
--- a/metrics/runtime_test.go
+++ b/metrics/runtime_test.go
@ -1,88 +0,0 @@
 package metrics
 import (
 	"runtime"
 	"testing"
 	"time"
 )
 func BenchmarkRuntimeMemStats(b *testing.B) {
 	r := NewRegistry()
 	RegisterRuntimeMemStats(r)
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		CaptureRuntimeMemStatsOnce(r)
 	}
 }
 func TestRuntimeMemStats(t *testing.T) {
 	r := NewRegistry()
 	RegisterRuntimeMemStats(r)
 	CaptureRuntimeMemStatsOnce(r)
 	zero := runtimeMetrics.MemStats.PauseNs.Count() // Get a "zero" since GC may have run before these tests.
 	runtime.GC()
 	CaptureRuntimeMemStatsOnce(r)
 	if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 1 {
 		t.Fatal(count - zero)
 	}
 	runtime.GC()
 	runtime.GC()
 	CaptureRuntimeMemStatsOnce(r)
 	if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 3 {
 		t.Fatal(count - zero)
 	}
 	for i := 0; i < 256; i++ {
 		runtime.GC()
 	}
 	CaptureRuntimeMemStatsOnce(r)
 	if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 259 {
 		t.Fatal(count - zero)
 	}
 	for i := 0; i < 257; i++ {
 		runtime.GC()
 	}
 	CaptureRuntimeMemStatsOnce(r)
 	if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 515 { // We lost one because there were too many GCs between captures.
 		t.Fatal(count - zero)
 	}
 }
 func TestRuntimeMemStatsNumThread(t *testing.T) {
 	r := NewRegistry()
 	RegisterRuntimeMemStats(r)
 	CaptureRuntimeMemStatsOnce(r)
 	if value := runtimeMetrics.NumThread.Value(); value < 1 {
 		t.Fatalf("got NumThread: %d, wanted at least 1", value)
 	}
 }
 func TestRuntimeMemStatsBlocking(t *testing.T) {
 	if g := runtime.GOMAXPROCS(0); g < 2 {
 		t.Skipf("skipping TestRuntimeMemStatsBlocking with GOMAXPROCS=%d\n", g)
 	}
 	ch := make(chan int)
 	go testRuntimeMemStatsBlocking(ch)
 	var memStats runtime.MemStats
 	t0 := time.Now()
 	runtime.ReadMemStats(&memStats)
 	t1 := time.Now()
 	t.Log("i++ during runtime.ReadMemStats:", <-ch)
 	go testRuntimeMemStatsBlocking(ch)
 	d := t1.Sub(t0)
 	t.Log(d)
 	time.Sleep(d)
 	t.Log("i++ during time.Sleep:", <-ch)
 }
 func testRuntimeMemStatsBlocking(ch chan int) {
 	i := 0
 	for {
 		select {
 		case ch <- i:
 			return
 		default:
 			i++
 		}
 	}
 }
--- a/metrics/runtimehistogram.go
+++ b/metrics/runtimehistogram.go
@ -0,0 +1,319 @@
 package metrics
 import (
 	"math"
 	"runtime/metrics"
 	"sort"
 	"sync/atomic"
 )
 func getOrRegisterRuntimeHistogram(name string, scale float64, r Registry) *runtimeHistogram {
 	if r == nil {
 		r = DefaultRegistry
 	}
 	constructor := func() Histogram { return newRuntimeHistogram(scale) }
 	return r.GetOrRegister(name, constructor).(*runtimeHistogram)
 }
 // runtimeHistogram wraps a runtime/metrics histogram.
 type runtimeHistogram struct {
 	v           atomic.Value
 	scaleFactor float64
 }
 func newRuntimeHistogram(scale float64) *runtimeHistogram {
 	h := &runtimeHistogram{scaleFactor: scale}
 	h.update(&metrics.Float64Histogram{})
 	return h
 }
 func (h *runtimeHistogram) update(mh *metrics.Float64Histogram) {
 	if mh == nil {
 		// The update value can be nil if the current Go version doesn't support a
 		// requested metric. It's just easier to handle nil here than putting
 		// conditionals everywhere.
 		return
 	}
 	s := runtimeHistogramSnapshot{
 		Counts:  make([]uint64, len(mh.Counts)),
 		Buckets: make([]float64, len(mh.Buckets)),
 	}
 	copy(s.Counts, mh.Counts)
 	copy(s.Buckets, mh.Buckets)
 	for i, b := range s.Buckets {
 		s.Buckets[i] = b * h.scaleFactor
 	}
 	h.v.Store(&s)
 }
 func (h *runtimeHistogram) load() *runtimeHistogramSnapshot {
 	return h.v.Load().(*runtimeHistogramSnapshot)
 }
 func (h *runtimeHistogram) Clear() {
 	panic("runtimeHistogram does not support Clear")
 }
 func (h *runtimeHistogram) Update(int64) {
 	panic("runtimeHistogram does not support Update")
 }
 func (h *runtimeHistogram) Sample() Sample {
 	return NilSample{}
 }
 // Snapshot returns a non-changing cop of the histogram.
 func (h *runtimeHistogram) Snapshot() Histogram {
 	return h.load()
 }
 // Count returns the sample count.
 func (h *runtimeHistogram) Count() int64 {
 	return h.load().Count()
 }
 // Mean returns an approximation of the mean.
 func (h *runtimeHistogram) Mean() float64 {
 	return h.load().Mean()
 }
 // StdDev approximates the standard deviation of the histogram.
 func (h *runtimeHistogram) StdDev() float64 {
 	return h.load().StdDev()
 }
 // Variance approximates the variance of the histogram.
 func (h *runtimeHistogram) Variance() float64 {
 	return h.load().Variance()
 }
 // Percentile computes the p'th percentile value.
 func (h *runtimeHistogram) Percentile(p float64) float64 {
 	return h.load().Percentile(p)
 }
 // Percentiles computes all requested percentile values.
 func (h *runtimeHistogram) Percentiles(ps []float64) []float64 {
 	return h.load().Percentiles(ps)
 }
 // Max returns the highest sample value.
 func (h *runtimeHistogram) Max() int64 {
 	return h.load().Max()
 }
 // Min returns the lowest sample value.
 func (h *runtimeHistogram) Min() int64 {
 	return h.load().Min()
 }
 // Sum returns the sum of all sample values.
 func (h *runtimeHistogram) Sum() int64 {
 	return h.load().Sum()
 }
 type runtimeHistogramSnapshot metrics.Float64Histogram
 func (h *runtimeHistogramSnapshot) Clear() {
 	panic("runtimeHistogram does not support Clear")
 }
 func (h *runtimeHistogramSnapshot) Update(int64) {
 	panic("runtimeHistogram does not support Update")
 }
 func (h *runtimeHistogramSnapshot) Sample() Sample {
 	return NilSample{}
 }
 func (h *runtimeHistogramSnapshot) Snapshot() Histogram {
 	return h
 }
 // Count returns the sample count.
 func (h *runtimeHistogramSnapshot) Count() int64 {
 	var count int64
 	for _, c := range h.Counts {
 		count += int64(c)
 	}
 	return count
 }
 // Mean returns an approximation of the mean.
 func (h *runtimeHistogramSnapshot) Mean() float64 {
 	if len(h.Counts) == 0 {
 		return 0
 	}
 	mean, _ := h.mean()
 	return mean
 }
 // mean computes the mean and also the total sample count.
 func (h *runtimeHistogramSnapshot) mean() (mean, totalCount float64) {
 	var sum float64
 	for i, c := range h.Counts {
 		midpoint := h.midpoint(i)
 		sum += midpoint * float64(c)
 		totalCount += float64(c)
 	}
 	return sum / totalCount, totalCount
 }
 func (h *runtimeHistogramSnapshot) midpoint(bucket int) float64 {
 	high := h.Buckets[bucket+1]
 	low := h.Buckets[bucket]
 	if math.IsInf(high, 1) {
 		// The edge of the highest bucket can be +Inf, and it's supposed to mean that this
 		// bucket contains all remaining samples > low. We can't get the middle of an
 		// infinite range, so just return the lower bound of this bucket instead.
 		return low
 	}
 	if math.IsInf(low, -1) {
 		// Similarly, we can get -Inf in the left edge of the lowest bucket,
 		// and it means the bucket contains all remaining values < high.
 		return high
 	}
 	return (low + high) / 2
 }
 // StdDev approximates the standard deviation of the histogram.
 func (h *runtimeHistogramSnapshot) StdDev() float64 {
 	return math.Sqrt(h.Variance())
 }
 // Variance approximates the variance of the histogram.
 func (h *runtimeHistogramSnapshot) Variance() float64 {
 	if len(h.Counts) == 0 {
 		return 0
 	}
 	mean, totalCount := h.mean()
 	if totalCount <= 1 {
 		// There is no variance when there are zero or one items.
 		return 0
 	}
 	var sum float64
 	for i, c := range h.Counts {
 		midpoint := h.midpoint(i)
 		d := midpoint - mean
 		sum += float64(c) * (d * d)
 	}
 	return sum / (totalCount - 1)
 }
 // Percentile computes the p'th percentile value.
 func (h *runtimeHistogramSnapshot) Percentile(p float64) float64 {
 	threshold := float64(h.Count()) * p
 	values := [1]float64{threshold}
 	h.computePercentiles(values[:])
 	return values[0]
 }
 // Percentiles computes all requested percentile values.
 func (h *runtimeHistogramSnapshot) Percentiles(ps []float64) []float64 {
 	// Compute threshold values. We need these to be sorted
 	// for the percentile computation, but restore the original
 	// order later, so keep the indexes as well.
 	count := float64(h.Count())
 	thresholds := make([]float64, len(ps))
 	indexes := make([]int, len(ps))
 	for i, percentile := range ps {
 		thresholds[i] = count * math.Max(0, math.Min(1.0, percentile))
 		indexes[i] = i
 	}
 	sort.Sort(floatsAscendingKeepingIndex{thresholds, indexes})
 	// Now compute. The result is stored back into the thresholds slice.
 	h.computePercentiles(thresholds)
 	// Put the result back into the requested order.
 	sort.Sort(floatsByIndex{thresholds, indexes})
 	return thresholds
 }
 func (h *runtimeHistogramSnapshot) computePercentiles(thresh []float64) {
 	var totalCount float64
 	for i, count := range h.Counts {
 		totalCount += float64(count)
 		for len(thresh) > 0 && thresh[0] < totalCount {
 			thresh[0] = h.Buckets[i]
 			thresh = thresh[1:]
 		}
 		if len(thresh) == 0 {
 			return
 		}
 	}
 }
 // Note: runtime/metrics.Float64Histogram is a collection of float64s, but the methods
 // below need to return int64 to satisfy the interface. The histogram provided by runtime
 // also doesn't keep track of individual samples, so results are approximated.
 // Max returns the highest sample value.
 func (h *runtimeHistogramSnapshot) Max() int64 {
 	for i := len(h.Counts) - 1; i >= 0; i-- {
 		count := h.Counts[i]
 		if count > 0 {
 			edge := h.Buckets[i+1]
 			if math.IsInf(edge, 1) {
 				edge = h.Buckets[i]
 			}
 			return int64(math.Ceil(edge))
 		}
 	}
 	return 0
 }
 // Min returns the lowest sample value.
 func (h *runtimeHistogramSnapshot) Min() int64 {
 	for i, count := range h.Counts {
 		if count > 0 {
 			return int64(math.Floor(h.Buckets[i]))
 		}
 	}
 	return 0
 }
 // Sum returns the sum of all sample values.
 func (h *runtimeHistogramSnapshot) Sum() int64 {
 	var sum float64
 	for i := range h.Counts {
 		sum += h.Buckets[i] * float64(h.Counts[i])
 	}
 	return int64(math.Ceil(sum))
 }
 type floatsAscendingKeepingIndex struct {
 	values  []float64
 	indexes []int
 }
 func (s floatsAscendingKeepingIndex) Len() int {
 	return len(s.values)
 }
 func (s floatsAscendingKeepingIndex) Less(i, j int) bool {
 	return s.values[i] < s.values[j]
 }
 func (s floatsAscendingKeepingIndex) Swap(i, j int) {
 	s.values[i], s.values[j] = s.values[j], s.values[i]
 	s.indexes[i], s.indexes[j] = s.indexes[j], s.indexes[i]
 }
 type floatsByIndex struct {
 	values  []float64
 	indexes []int
 }
 func (s floatsByIndex) Len() int {
 	return len(s.values)
 }
 func (s floatsByIndex) Less(i, j int) bool {
 	return s.indexes[i] < s.indexes[j]
 }
 func (s floatsByIndex) Swap(i, j int) {
 	s.values[i], s.values[j] = s.values[j], s.values[i]
 	s.indexes[i], s.indexes[j] = s.indexes[j], s.indexes[i]
 }
--- a/metrics/runtimehistogram_test.go
+++ b/metrics/runtimehistogram_test.go
@ -0,0 +1,133 @@
 package metrics
 import (
 	"fmt"
 	"math"
 	"reflect"
 	"runtime/metrics"
 	"testing"
 )
 var _ Histogram = (*runtimeHistogram)(nil)
 type runtimeHistogramTest struct {
 	h metrics.Float64Histogram
 	Count       int64
 	Min         int64
 	Max         int64
 	Sum         int64
 	Mean        float64
 	Variance    float64
 	StdDev      float64
 	Percentiles []float64 // .5 .8 .9 .99 .995
 }
 // This test checks the results of statistical functions implemented
 // by runtimeHistogramSnapshot.
 func TestRuntimeHistogramStats(t *testing.T) {
 	tests := []runtimeHistogramTest{
 		0: {
 			h: metrics.Float64Histogram{
 				Counts:  []uint64{},
 				Buckets: []float64{},
 			},
 			Count:       0,
 			Max:         0,
 			Min:         0,
 			Sum:         0,
 			Mean:        0,
 			Variance:    0,
 			StdDev:      0,
 			Percentiles: []float64{0, 0, 0, 0, 0},
 		},
 		1: {
 			// This checks the case where the highest bucket is +Inf.
 			h: metrics.Float64Histogram{
 				Counts:  []uint64{0, 1, 2},
 				Buckets: []float64{0, 0.5, 1, math.Inf(1)},
 			},
 			Count:       3,
 			Max:         1,
 			Min:         0,
 			Sum:         3,
 			Mean:        0.9166666,
 			Percentiles: []float64{1, 1, 1, 1, 1},
 			Variance:    0.020833,
 			StdDev:      0.144433,
 		},
 		2: {
 			h: metrics.Float64Histogram{
 				Counts:  []uint64{8, 6, 3, 1},
 				Buckets: []float64{12, 16, 18, 24, 25},
 			},
 			Count:       18,
 			Max:         25,
 			Min:         12,
 			Sum:         270,
 			Mean:        16.75,
 			Variance:    10.3015,
 			StdDev:      3.2096,
 			Percentiles: []float64{16, 18, 18, 24, 24},
 		},
 	}
 	for i, test := range tests {
 		t.Run(fmt.Sprint(i), func(t *testing.T) {
 			s := runtimeHistogramSnapshot(test.h)
 			if v := s.Count(); v != test.Count {
 				t.Errorf("Count() = %v, want %v", v, test.Count)
 			}
 			if v := s.Min(); v != test.Min {
 				t.Errorf("Min() = %v, want %v", v, test.Min)
 			}
 			if v := s.Max(); v != test.Max {
 				t.Errorf("Max() = %v, want %v", v, test.Max)
 			}
 			if v := s.Sum(); v != test.Sum {
 				t.Errorf("Sum() = %v, want %v", v, test.Sum)
 			}
 			if v := s.Mean(); !approxEqual(v, test.Mean, 0.0001) {
 				t.Errorf("Mean() = %v, want %v", v, test.Mean)
 			}
 			if v := s.Variance(); !approxEqual(v, test.Variance, 0.0001) {
 				t.Errorf("Variance() = %v, want %v", v, test.Variance)
 			}
 			if v := s.StdDev(); !approxEqual(v, test.StdDev, 0.0001) {
 				t.Errorf("StdDev() = %v, want %v", v, test.StdDev)
 			}
 			ps := []float64{.5, .8, .9, .99, .995}
 			if v := s.Percentiles(ps); !reflect.DeepEqual(v, test.Percentiles) {
 				t.Errorf("Percentiles(%v) = %v, want %v", ps, v, test.Percentiles)
 			}
 		})
 	}
 }
 func approxEqual(x, y, ε float64) bool {
 	if math.IsInf(x, -1) && math.IsInf(y, -1) {
 		return true
 	}
 	if math.IsInf(x, 1) && math.IsInf(y, 1) {
 		return true
 	}
 	if math.IsNaN(x) && math.IsNaN(y) {
 		return true
 	}
 	return math.Abs(x-y) < ε
 }
 // This test verifies that requesting Percentiles in unsorted order
 // returns them in the requested order.
 func TestRuntimeHistogramStatsPercentileOrder(t *testing.T) {
 	p := runtimeHistogramSnapshot{
 		Counts:  []uint64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
 		Buckets: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 	}
 	result := p.Percentiles([]float64{1, 0.2, 0.5, 0.1, 0.2})
 	expected := []float64{10, 2, 5, 1, 2}
 	if !reflect.DeepEqual(result, expected) {
 		t.Fatal("wrong result:", result)
 	}
 }