metrics: improve reading Go runtime metrics (#25886)

This changes how we read performance metrics from the Go runtime. Instead of using runtime.ReadMemStats, we now rely on the API provided by package runtime/metrics. runtime/metrics provides more accurate information. For example, the new interface has better reporting of memory use. In my testing, the reported value of held memory more accurately reflects the usage reported by the OS. The semantics of metrics system/memory/allocs and system/memory/frees have changed to report amounts in bytes. ReadMemStats only reported the count of allocations in number-of-objects. This is imprecise: 'tiny objects' are not counted because the runtime allocates them in batches; and certain improvements in allocation behavior, such as struct size optimizations, will be less visible when the number of allocs doesn't change. Changing allocation reports to be in bytes makes it appear in graphs that lots more is being allocated. I don't think that's a problem because this metric is primarily interesting for geth developers. The metric system/memory/pauses has been changed to report statistical values from the histogram provided by the runtime. Its name in influxdb has changed from geth.system/memory/pauses.meter to geth.system/memory/pauses.histogram. We also have a new histogram metric, system/cpu/schedlatency, reporting the Go scheduler latency.
2022-11-11 13:16:13 +01:00 · 2022-11-11 13:16:13 +01:00 · c539bda166
commit c539bda166
parent 62c973eba6
11 changed files with 598 additions and 427 deletions
--- a/metrics/influxdb/influxdb.go
+++ b/metrics/influxdb/influxdb.go
@ -160,26 +160,27 @@ func (r *reporter) send() error {
 			})
 		case metrics.Histogram:
 			ms := metric.Snapshot()
-
 			if ms.Count() > 0 {
-				ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999})
-				pts = append(pts, client.Point{
-					Measurement: fmt.Sprintf("%s%s.histogram", namespace, name),
-					Tags:        r.tags,
-					Fields: map[string]interface{}{
+				ps := ms.Percentiles([]float64{0.25, 0.5, 0.75, 0.95, 0.99, 0.999, 0.9999})
+				fields := map[string]interface{}{
 					"count":    ms.Count(),
 					"max":      ms.Max(),
 					"mean":     ms.Mean(),
 					"min":      ms.Min(),
 					"stddev":   ms.StdDev(),
 					"variance": ms.Variance(),
-						"p50":      ps[0],
-						"p75":      ps[1],
-						"p95":      ps[2],
-						"p99":      ps[3],
-						"p999":     ps[4],
-						"p9999":    ps[5],
-					},
+					"p25":      ps[0],
+					"p50":      ps[1],
+					"p75":      ps[2],
+					"p95":      ps[3],
+					"p99":      ps[4],
+					"p999":     ps[5],
+					"p9999":    ps[6],
+				}
+				pts = append(pts, client.Point{
+					Measurement: fmt.Sprintf("%s%s.histogram", namespace, name),
+					Tags:        r.tags,
+					Fields:      fields,
 					Time:        now,
 				})
 			}
--- a/metrics/metrics.go
+++ b/metrics/metrics.go
@ -7,7 +7,8 @@ package metrics

 import (
 	"os"
-	"runtime"
+	"runtime/metrics"
+	"runtime/pprof"
 	"strings"
 	"time"

@ -54,24 +55,92 @@ func init() {
 	}
 }

-// CollectProcessMetrics periodically collects various metrics about the running
-// process.
+var threadCreateProfile = pprof.Lookup("threadcreate")
+
+type runtimeStats struct {
+	GCPauses     *metrics.Float64Histogram
+	GCAllocBytes uint64
+	GCFreedBytes uint64
+
+	MemTotal     uint64
+	HeapObjects  uint64
+	HeapFree     uint64
+	HeapReleased uint64
+	HeapUnused   uint64
+
+	Goroutines   uint64
+	SchedLatency *metrics.Float64Histogram
+}
+
+var runtimeSamples = []metrics.Sample{
+	{Name: "/gc/pauses:seconds"}, // histogram
+	{Name: "/gc/heap/allocs:bytes"},
+	{Name: "/gc/heap/frees:bytes"},
+	{Name: "/memory/classes/total:bytes"},
+	{Name: "/memory/classes/heap/objects:bytes"},
+	{Name: "/memory/classes/heap/free:bytes"},
+	{Name: "/memory/classes/heap/released:bytes"},
+	{Name: "/memory/classes/heap/unused:bytes"},
+	{Name: "/sched/goroutines:goroutines"},
+	{Name: "/sched/latencies:seconds"}, // histogram
+}
+
+func readRuntimeStats(v *runtimeStats) {
+	metrics.Read(runtimeSamples)
+	for _, s := range runtimeSamples {
+		// Skip invalid/unknown metrics. This is needed because some metrics
+		// are unavailable in older Go versions, and attempting to read a 'bad'
+		// metric panics.
+		if s.Value.Kind() == metrics.KindBad {
+			continue
+		}
+
+		switch s.Name {
+		case "/gc/pauses:seconds":
+			v.GCPauses = s.Value.Float64Histogram()
+		case "/gc/heap/allocs:bytes":
+			v.GCAllocBytes = s.Value.Uint64()
+		case "/gc/heap/frees:bytes":
+			v.GCFreedBytes = s.Value.Uint64()
+		case "/memory/classes/total:bytes":
+			v.MemTotal = s.Value.Uint64()
+		case "/memory/classes/heap/objects:bytes":
+			v.HeapObjects = s.Value.Uint64()
+		case "/memory/classes/heap/free:bytes":
+			v.HeapFree = s.Value.Uint64()
+		case "/memory/classes/heap/released:bytes":
+			v.HeapReleased = s.Value.Uint64()
+		case "/memory/classes/heap/unused:bytes":
+			v.HeapUnused = s.Value.Uint64()
+		case "/sched/goroutines:goroutines":
+			v.Goroutines = s.Value.Uint64()
+		case "/sched/latencies:seconds":
+			v.SchedLatency = s.Value.Float64Histogram()
+		}
+	}
+}
+
+// CollectProcessMetrics periodically collects various metrics about the running process.
 func CollectProcessMetrics(refresh time.Duration) {
 	// Short circuit if the metrics system is disabled
 	if !Enabled {
 		return
 	}
+
 	refreshFreq := int64(refresh / time.Second)

 	// Create the various data collectors
-	cpuStats := make([]*CPUStats, 2)
-	memstats := make([]*runtime.MemStats, 2)
-	diskstats := make([]*DiskStats, 2)
-	for i := 0; i < len(memstats); i++ {
-		cpuStats[i] = new(CPUStats)
-		memstats[i] = new(runtime.MemStats)
-		diskstats[i] = new(DiskStats)
-	}
+	var (
+		cpustats  = make([]CPUStats, 2)
+		diskstats = make([]DiskStats, 2)
+		rstats    = make([]runtimeStats, 2)
+	)
+
+	// This scale factor is used for the runtime's time metrics. It's useful to convert to
+	// ns here because the runtime gives times in float seconds, but runtimeHistogram can
+	// only provide integers for the minimum and maximum values.
+	const secondsToNs = float64(time.Second)
+
 	// Define the various metrics to collect
 	var (
 		cpuSysLoad            = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry)
@ -79,13 +148,13 @@ func CollectProcessMetrics(refresh time.Duration) {
 		cpuProcLoad           = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry)
 		cpuThreads            = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry)
 		cpuGoroutines         = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry)
-
-		memPauses = GetOrRegisterMeter("system/memory/pauses", DefaultRegistry)
+		cpuSchedLatency       = getOrRegisterRuntimeHistogram("system/cpu/schedlatency", secondsToNs, nil)
+		memPauses             = getOrRegisterRuntimeHistogram("system/memory/pauses", secondsToNs, nil)
 		memAllocs             = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry)
 		memFrees              = GetOrRegisterMeter("system/memory/frees", DefaultRegistry)
-		memHeld   = GetOrRegisterGauge("system/memory/held", DefaultRegistry)
-		memUsed   = GetOrRegisterGauge("system/memory/used", DefaultRegistry)
-
+		memTotal              = GetOrRegisterGauge("system/memory/held", DefaultRegistry)
+		heapUsed              = GetOrRegisterGauge("system/memory/used", DefaultRegistry)
+		heapObjects           = GetOrRegisterGauge("system/memory/objects", DefaultRegistry)
 		diskReads             = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry)
 		diskReadBytes         = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry)
 		diskReadBytesCounter  = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry)
@ -93,34 +162,43 @@ func CollectProcessMetrics(refresh time.Duration) {
 		diskWriteBytes        = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry)
 		diskWriteBytesCounter = GetOrRegisterCounter("system/disk/writebytes", DefaultRegistry)
 	)
-	// Iterate loading the different stats and updating the meters
-	for i := 1; ; i++ {
-		location1 := i % 2
-		location2 := (i - 1) % 2

-		ReadCPUStats(cpuStats[location1])
-		cpuSysLoad.Update((cpuStats[location1].GlobalTime - cpuStats[location2].GlobalTime) / refreshFreq)
-		cpuSysWait.Update((cpuStats[location1].GlobalWait - cpuStats[location2].GlobalWait) / refreshFreq)
-		cpuProcLoad.Update((cpuStats[location1].LocalTime - cpuStats[location2].LocalTime) / refreshFreq)
+	// Iterate loading the different stats and updating the meters.
+	now, prev := 0, 1
+	for ; ; now, prev = prev, now {
+		// CPU
+		ReadCPUStats(&cpustats[now])
+		cpuSysLoad.Update((cpustats[now].GlobalTime - cpustats[prev].GlobalTime) / refreshFreq)
+		cpuSysWait.Update((cpustats[now].GlobalWait - cpustats[prev].GlobalWait) / refreshFreq)
+		cpuProcLoad.Update((cpustats[now].LocalTime - cpustats[prev].LocalTime) / refreshFreq)
+
+		// Threads
 		cpuThreads.Update(int64(threadCreateProfile.Count()))
-		cpuGoroutines.Update(int64(runtime.NumGoroutine()))

-		runtime.ReadMemStats(memstats[location1])
-		memPauses.Mark(int64(memstats[location1].PauseTotalNs - memstats[location2].PauseTotalNs))
-		memAllocs.Mark(int64(memstats[location1].Mallocs - memstats[location2].Mallocs))
-		memFrees.Mark(int64(memstats[location1].Frees - memstats[location2].Frees))
-		memHeld.Update(int64(memstats[location1].HeapSys - memstats[location1].HeapReleased))
-		memUsed.Update(int64(memstats[location1].Alloc))
+		// Go runtime metrics
+		readRuntimeStats(&rstats[now])

-		if ReadDiskStats(diskstats[location1]) == nil {
-			diskReads.Mark(diskstats[location1].ReadCount - diskstats[location2].ReadCount)
-			diskReadBytes.Mark(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes)
-			diskWrites.Mark(diskstats[location1].WriteCount - diskstats[location2].WriteCount)
-			diskWriteBytes.Mark(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes)
+		cpuGoroutines.Update(int64(rstats[now].Goroutines))
+		cpuSchedLatency.update(rstats[now].SchedLatency)
+		memPauses.update(rstats[now].GCPauses)

-			diskReadBytesCounter.Inc(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes)
-			diskWriteBytesCounter.Inc(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes)
+		memAllocs.Mark(int64(rstats[now].GCAllocBytes - rstats[prev].GCAllocBytes))
+		memFrees.Mark(int64(rstats[now].GCFreedBytes - rstats[prev].GCFreedBytes))
+
+		memTotal.Update(int64(rstats[now].MemTotal))
+		heapUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapUnused - rstats[now].HeapFree - rstats[now].HeapReleased))
+		heapObjects.Update(int64(rstats[now].HeapObjects))
+
+		// Disk
+		if ReadDiskStats(&diskstats[now]) == nil {
+			diskReads.Mark(diskstats[now].ReadCount - diskstats[prev].ReadCount)
+			diskReadBytes.Mark(diskstats[now].ReadBytes - diskstats[prev].ReadBytes)
+			diskWrites.Mark(diskstats[now].WriteCount - diskstats[prev].WriteCount)
+			diskWriteBytes.Mark(diskstats[now].WriteBytes - diskstats[prev].WriteBytes)
+			diskReadBytesCounter.Inc(diskstats[now].ReadBytes - diskstats[prev].ReadBytes)
+			diskWriteBytesCounter.Inc(diskstats[now].WriteBytes - diskstats[prev].WriteBytes)
 		}
+
 		time.Sleep(refresh)
 	}
 }
--- a/metrics/metrics_test.go
+++ b/metrics/metrics_test.go
@ -2,8 +2,6 @@ package metrics

 import (
 	"fmt"
-	"io"
-	"log"
 	"sync"
 	"testing"
 	"time"
@ -11,11 +9,11 @@ import (

 const FANOUT = 128

-// Stop the compiler from complaining during debugging.
-var (
-	_ = io.Discard
-	_ = log.LstdFlags
-)
+func TestReadRuntimeValues(t *testing.T) {
+	var v runtimeStats
+	readRuntimeStats(&v)
+	t.Logf("%+v", v)
+}

 func BenchmarkMetrics(b *testing.B) {
 	r := NewRegistry()
@ -26,7 +24,6 @@ func BenchmarkMetrics(b *testing.B) {
 	m := NewRegisteredMeter("meter", r)
 	t := NewRegisteredTimer("timer", r)
 	RegisterDebugGCStats(r)
-	RegisterRuntimeMemStats(r)
 	b.ResetTimer()
 	ch := make(chan bool)

@ -48,24 +45,6 @@ func BenchmarkMetrics(b *testing.B) {
 		}()
 	//*/

-	wgR := &sync.WaitGroup{}
-	//*
-	wgR.Add(1)
-	go func() {
-		defer wgR.Done()
-		//log.Println("go CaptureRuntimeMemStats")
-		for {
-			select {
-			case <-ch:
-				//log.Println("done CaptureRuntimeMemStats")
-				return
-			default:
-				CaptureRuntimeMemStatsOnce(r)
-			}
-		}
-	}()
-	//*/
-
 	wgW := &sync.WaitGroup{}
 	/*
 		wgW.Add(1)
@ -104,7 +83,6 @@ func BenchmarkMetrics(b *testing.B) {
 	wg.Wait()
 	close(ch)
 	wgD.Wait()
-	wgR.Wait()
 	wgW.Wait()
 }

--- a/metrics/runtime.go
+++ b/metrics/runtime.go
@ -1,212 +0,0 @@
-package metrics
-
-import (
-	"runtime"
-	"runtime/pprof"
-	"time"
-)
-
-var (
-	memStats       runtime.MemStats
-	runtimeMetrics struct {
-		MemStats struct {
-			Alloc         Gauge
-			BuckHashSys   Gauge
-			DebugGC       Gauge
-			EnableGC      Gauge
-			Frees         Gauge
-			HeapAlloc     Gauge
-			HeapIdle      Gauge
-			HeapInuse     Gauge
-			HeapObjects   Gauge
-			HeapReleased  Gauge
-			HeapSys       Gauge
-			LastGC        Gauge
-			Lookups       Gauge
-			Mallocs       Gauge
-			MCacheInuse   Gauge
-			MCacheSys     Gauge
-			MSpanInuse    Gauge
-			MSpanSys      Gauge
-			NextGC        Gauge
-			NumGC         Gauge
-			GCCPUFraction GaugeFloat64
-			PauseNs       Histogram
-			PauseTotalNs  Gauge
-			StackInuse    Gauge
-			StackSys      Gauge
-			Sys           Gauge
-			TotalAlloc    Gauge
-		}
-		NumCgoCall   Gauge
-		NumGoroutine Gauge
-		NumThread    Gauge
-		ReadMemStats Timer
-	}
-	frees       uint64
-	lookups     uint64
-	mallocs     uint64
-	numGC       uint32
-	numCgoCalls int64
-
-	threadCreateProfile = pprof.Lookup("threadcreate")
-)
-
-// Capture new values for the Go runtime statistics exported in
-// runtime.MemStats.  This is designed to be called as a goroutine.
-func CaptureRuntimeMemStats(r Registry, d time.Duration) {
-	for range time.Tick(d) {
-		CaptureRuntimeMemStatsOnce(r)
-	}
-}
-
-// Capture new values for the Go runtime statistics exported in
-// runtime.MemStats.  This is designed to be called in a background
-// goroutine.  Giving a registry which has not been given to
-// RegisterRuntimeMemStats will panic.
-//
-// Be very careful with this because runtime.ReadMemStats calls the C
-// functions runtime·semacquire(&runtime·worldsema) and runtime·stoptheworld()
-// and that last one does what it says on the tin.
-func CaptureRuntimeMemStatsOnce(r Registry) {
-	t := time.Now()
-	runtime.ReadMemStats(&memStats) // This takes 50-200us.
-	runtimeMetrics.ReadMemStats.UpdateSince(t)
-
-	runtimeMetrics.MemStats.Alloc.Update(int64(memStats.Alloc))
-	runtimeMetrics.MemStats.BuckHashSys.Update(int64(memStats.BuckHashSys))
-	if memStats.DebugGC {
-		runtimeMetrics.MemStats.DebugGC.Update(1)
-	} else {
-		runtimeMetrics.MemStats.DebugGC.Update(0)
-	}
-	if memStats.EnableGC {
-		runtimeMetrics.MemStats.EnableGC.Update(1)
-	} else {
-		runtimeMetrics.MemStats.EnableGC.Update(0)
-	}
-
-	runtimeMetrics.MemStats.Frees.Update(int64(memStats.Frees - frees))
-	runtimeMetrics.MemStats.HeapAlloc.Update(int64(memStats.HeapAlloc))
-	runtimeMetrics.MemStats.HeapIdle.Update(int64(memStats.HeapIdle))
-	runtimeMetrics.MemStats.HeapInuse.Update(int64(memStats.HeapInuse))
-	runtimeMetrics.MemStats.HeapObjects.Update(int64(memStats.HeapObjects))
-	runtimeMetrics.MemStats.HeapReleased.Update(int64(memStats.HeapReleased))
-	runtimeMetrics.MemStats.HeapSys.Update(int64(memStats.HeapSys))
-	runtimeMetrics.MemStats.LastGC.Update(int64(memStats.LastGC))
-	runtimeMetrics.MemStats.Lookups.Update(int64(memStats.Lookups - lookups))
-	runtimeMetrics.MemStats.Mallocs.Update(int64(memStats.Mallocs - mallocs))
-	runtimeMetrics.MemStats.MCacheInuse.Update(int64(memStats.MCacheInuse))
-	runtimeMetrics.MemStats.MCacheSys.Update(int64(memStats.MCacheSys))
-	runtimeMetrics.MemStats.MSpanInuse.Update(int64(memStats.MSpanInuse))
-	runtimeMetrics.MemStats.MSpanSys.Update(int64(memStats.MSpanSys))
-	runtimeMetrics.MemStats.NextGC.Update(int64(memStats.NextGC))
-	runtimeMetrics.MemStats.NumGC.Update(int64(memStats.NumGC - numGC))
-	runtimeMetrics.MemStats.GCCPUFraction.Update(gcCPUFraction(&memStats))
-
-	// <https://code.google.com/p/go/source/browse/src/pkg/runtime/mgc0.c>
-	i := numGC % uint32(len(memStats.PauseNs))
-	ii := memStats.NumGC % uint32(len(memStats.PauseNs))
-	if memStats.NumGC-numGC >= uint32(len(memStats.PauseNs)) {
-		for i = 0; i < uint32(len(memStats.PauseNs)); i++ {
-			runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i]))
-		}
-	} else {
-		if i > ii {
-			for ; i < uint32(len(memStats.PauseNs)); i++ {
-				runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i]))
-			}
-			i = 0
-		}
-		for ; i < ii; i++ {
-			runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i]))
-		}
-	}
-	frees = memStats.Frees
-	lookups = memStats.Lookups
-	mallocs = memStats.Mallocs
-	numGC = memStats.NumGC
-
-	runtimeMetrics.MemStats.PauseTotalNs.Update(int64(memStats.PauseTotalNs))
-	runtimeMetrics.MemStats.StackInuse.Update(int64(memStats.StackInuse))
-	runtimeMetrics.MemStats.StackSys.Update(int64(memStats.StackSys))
-	runtimeMetrics.MemStats.Sys.Update(int64(memStats.Sys))
-	runtimeMetrics.MemStats.TotalAlloc.Update(int64(memStats.TotalAlloc))
-
-	currentNumCgoCalls := numCgoCall()
-	runtimeMetrics.NumCgoCall.Update(currentNumCgoCalls - numCgoCalls)
-	numCgoCalls = currentNumCgoCalls
-
-	runtimeMetrics.NumGoroutine.Update(int64(runtime.NumGoroutine()))
-
-	runtimeMetrics.NumThread.Update(int64(threadCreateProfile.Count()))
-}
-
-// Register runtimeMetrics for the Go runtime statistics exported in runtime and
-// specifically runtime.MemStats.  The runtimeMetrics are named by their
-// fully-qualified Go symbols, i.e. runtime.MemStats.Alloc.
-func RegisterRuntimeMemStats(r Registry) {
-	runtimeMetrics.MemStats.Alloc = NewGauge()
-	runtimeMetrics.MemStats.BuckHashSys = NewGauge()
-	runtimeMetrics.MemStats.DebugGC = NewGauge()
-	runtimeMetrics.MemStats.EnableGC = NewGauge()
-	runtimeMetrics.MemStats.Frees = NewGauge()
-	runtimeMetrics.MemStats.HeapAlloc = NewGauge()
-	runtimeMetrics.MemStats.HeapIdle = NewGauge()
-	runtimeMetrics.MemStats.HeapInuse = NewGauge()
-	runtimeMetrics.MemStats.HeapObjects = NewGauge()
-	runtimeMetrics.MemStats.HeapReleased = NewGauge()
-	runtimeMetrics.MemStats.HeapSys = NewGauge()
-	runtimeMetrics.MemStats.LastGC = NewGauge()
-	runtimeMetrics.MemStats.Lookups = NewGauge()
-	runtimeMetrics.MemStats.Mallocs = NewGauge()
-	runtimeMetrics.MemStats.MCacheInuse = NewGauge()
-	runtimeMetrics.MemStats.MCacheSys = NewGauge()
-	runtimeMetrics.MemStats.MSpanInuse = NewGauge()
-	runtimeMetrics.MemStats.MSpanSys = NewGauge()
-	runtimeMetrics.MemStats.NextGC = NewGauge()
-	runtimeMetrics.MemStats.NumGC = NewGauge()
-	runtimeMetrics.MemStats.GCCPUFraction = NewGaugeFloat64()
-	runtimeMetrics.MemStats.PauseNs = NewHistogram(NewExpDecaySample(1028, 0.015))
-	runtimeMetrics.MemStats.PauseTotalNs = NewGauge()
-	runtimeMetrics.MemStats.StackInuse = NewGauge()
-	runtimeMetrics.MemStats.StackSys = NewGauge()
-	runtimeMetrics.MemStats.Sys = NewGauge()
-	runtimeMetrics.MemStats.TotalAlloc = NewGauge()
-	runtimeMetrics.NumCgoCall = NewGauge()
-	runtimeMetrics.NumGoroutine = NewGauge()
-	runtimeMetrics.NumThread = NewGauge()
-	runtimeMetrics.ReadMemStats = NewTimer()
-
-	r.Register("runtime.MemStats.Alloc", runtimeMetrics.MemStats.Alloc)
-	r.Register("runtime.MemStats.BuckHashSys", runtimeMetrics.MemStats.BuckHashSys)
-	r.Register("runtime.MemStats.DebugGC", runtimeMetrics.MemStats.DebugGC)
-	r.Register("runtime.MemStats.EnableGC", runtimeMetrics.MemStats.EnableGC)
-	r.Register("runtime.MemStats.Frees", runtimeMetrics.MemStats.Frees)
-	r.Register("runtime.MemStats.HeapAlloc", runtimeMetrics.MemStats.HeapAlloc)
-	r.Register("runtime.MemStats.HeapIdle", runtimeMetrics.MemStats.HeapIdle)
-	r.Register("runtime.MemStats.HeapInuse", runtimeMetrics.MemStats.HeapInuse)
-	r.Register("runtime.MemStats.HeapObjects", runtimeMetrics.MemStats.HeapObjects)
-	r.Register("runtime.MemStats.HeapReleased", runtimeMetrics.MemStats.HeapReleased)
-	r.Register("runtime.MemStats.HeapSys", runtimeMetrics.MemStats.HeapSys)
-	r.Register("runtime.MemStats.LastGC", runtimeMetrics.MemStats.LastGC)
-	r.Register("runtime.MemStats.Lookups", runtimeMetrics.MemStats.Lookups)
-	r.Register("runtime.MemStats.Mallocs", runtimeMetrics.MemStats.Mallocs)
-	r.Register("runtime.MemStats.MCacheInuse", runtimeMetrics.MemStats.MCacheInuse)
-	r.Register("runtime.MemStats.MCacheSys", runtimeMetrics.MemStats.MCacheSys)
-	r.Register("runtime.MemStats.MSpanInuse", runtimeMetrics.MemStats.MSpanInuse)
-	r.Register("runtime.MemStats.MSpanSys", runtimeMetrics.MemStats.MSpanSys)
-	r.Register("runtime.MemStats.NextGC", runtimeMetrics.MemStats.NextGC)
-	r.Register("runtime.MemStats.NumGC", runtimeMetrics.MemStats.NumGC)
-	r.Register("runtime.MemStats.GCCPUFraction", runtimeMetrics.MemStats.GCCPUFraction)
-	r.Register("runtime.MemStats.PauseNs", runtimeMetrics.MemStats.PauseNs)
-	r.Register("runtime.MemStats.PauseTotalNs", runtimeMetrics.MemStats.PauseTotalNs)
-	r.Register("runtime.MemStats.StackInuse", runtimeMetrics.MemStats.StackInuse)
-	r.Register("runtime.MemStats.StackSys", runtimeMetrics.MemStats.StackSys)
-	r.Register("runtime.MemStats.Sys", runtimeMetrics.MemStats.Sys)
-	r.Register("runtime.MemStats.TotalAlloc", runtimeMetrics.MemStats.TotalAlloc)
-	r.Register("runtime.NumCgoCall", runtimeMetrics.NumCgoCall)
-	r.Register("runtime.NumGoroutine", runtimeMetrics.NumGoroutine)
-	r.Register("runtime.NumThread", runtimeMetrics.NumThread)
-	r.Register("runtime.ReadMemStats", runtimeMetrics.ReadMemStats)
-}
--- a/metrics/runtime_cgo.go
+++ b/metrics/runtime_cgo.go
@ -1,10 +0,0 @@
-//go:build cgo && !appengine && !js
-// +build cgo,!appengine,!js
-
-package metrics
-
-import "runtime"
-
-func numCgoCall() int64 {
-	return runtime.NumCgoCall()
-}
--- a/metrics/runtime_gccpufraction.go
+++ b/metrics/runtime_gccpufraction.go
@ -1,10 +0,0 @@
-//go:build go1.5
-// +build go1.5
-
-package metrics
-
-import "runtime"
-
-func gcCPUFraction(memStats *runtime.MemStats) float64 {
-	return memStats.GCCPUFraction
-}
--- a/metrics/runtime_no_cgo.go
+++ b/metrics/runtime_no_cgo.go
@ -1,8 +0,0 @@
-//go:build !cgo || appengine || js
-// +build !cgo appengine js
-
-package metrics
-
-func numCgoCall() int64 {
-	return 0
-}
--- a/metrics/runtime_no_gccpufraction.go
+++ b/metrics/runtime_no_gccpufraction.go
@ -1,10 +0,0 @@
-//go:build !go1.5
-// +build !go1.5
-
-package metrics
-
-import "runtime"
-
-func gcCPUFraction(memStats *runtime.MemStats) float64 {
-	return 0
-}
--- a/metrics/runtime_test.go
+++ b/metrics/runtime_test.go
@ -1,88 +0,0 @@
-package metrics
-
-import (
-	"runtime"
-	"testing"
-	"time"
-)
-
-func BenchmarkRuntimeMemStats(b *testing.B) {
-	r := NewRegistry()
-	RegisterRuntimeMemStats(r)
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		CaptureRuntimeMemStatsOnce(r)
-	}
-}
-
-func TestRuntimeMemStats(t *testing.T) {
-	r := NewRegistry()
-	RegisterRuntimeMemStats(r)
-	CaptureRuntimeMemStatsOnce(r)
-	zero := runtimeMetrics.MemStats.PauseNs.Count() // Get a "zero" since GC may have run before these tests.
-	runtime.GC()
-	CaptureRuntimeMemStatsOnce(r)
-	if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 1 {
-		t.Fatal(count - zero)
-	}
-	runtime.GC()
-	runtime.GC()
-	CaptureRuntimeMemStatsOnce(r)
-	if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 3 {
-		t.Fatal(count - zero)
-	}
-	for i := 0; i < 256; i++ {
-		runtime.GC()
-	}
-	CaptureRuntimeMemStatsOnce(r)
-	if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 259 {
-		t.Fatal(count - zero)
-	}
-	for i := 0; i < 257; i++ {
-		runtime.GC()
-	}
-	CaptureRuntimeMemStatsOnce(r)
-	if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 515 { // We lost one because there were too many GCs between captures.
-		t.Fatal(count - zero)
-	}
-}
-
-func TestRuntimeMemStatsNumThread(t *testing.T) {
-	r := NewRegistry()
-	RegisterRuntimeMemStats(r)
-	CaptureRuntimeMemStatsOnce(r)
-
-	if value := runtimeMetrics.NumThread.Value(); value < 1 {
-		t.Fatalf("got NumThread: %d, wanted at least 1", value)
-	}
-}
-
-func TestRuntimeMemStatsBlocking(t *testing.T) {
-	if g := runtime.GOMAXPROCS(0); g < 2 {
-		t.Skipf("skipping TestRuntimeMemStatsBlocking with GOMAXPROCS=%d\n", g)
-	}
-	ch := make(chan int)
-	go testRuntimeMemStatsBlocking(ch)
-	var memStats runtime.MemStats
-	t0 := time.Now()
-	runtime.ReadMemStats(&memStats)
-	t1 := time.Now()
-	t.Log("i++ during runtime.ReadMemStats:", <-ch)
-	go testRuntimeMemStatsBlocking(ch)
-	d := t1.Sub(t0)
-	t.Log(d)
-	time.Sleep(d)
-	t.Log("i++ during time.Sleep:", <-ch)
-}
-
-func testRuntimeMemStatsBlocking(ch chan int) {
-	i := 0
-	for {
-		select {
-		case ch <- i:
-			return
-		default:
-			i++
-		}
-	}
-}
--- a/metrics/runtimehistogram.go
+++ b/metrics/runtimehistogram.go
@ -0,0 +1,319 @@
+package metrics
+
+import (
+	"math"
+	"runtime/metrics"
+	"sort"
+	"sync/atomic"
+)
+
+func getOrRegisterRuntimeHistogram(name string, scale float64, r Registry) *runtimeHistogram {
+	if r == nil {
+		r = DefaultRegistry
+	}
+	constructor := func() Histogram { return newRuntimeHistogram(scale) }
+	return r.GetOrRegister(name, constructor).(*runtimeHistogram)
+}
+
+// runtimeHistogram wraps a runtime/metrics histogram.
+type runtimeHistogram struct {
+	v           atomic.Value
+	scaleFactor float64
+}
+
+func newRuntimeHistogram(scale float64) *runtimeHistogram {
+	h := &runtimeHistogram{scaleFactor: scale}
+	h.update(&metrics.Float64Histogram{})
+	return h
+}
+
+func (h *runtimeHistogram) update(mh *metrics.Float64Histogram) {
+	if mh == nil {
+		// The update value can be nil if the current Go version doesn't support a
+		// requested metric. It's just easier to handle nil here than putting
+		// conditionals everywhere.
+		return
+	}
+
+	s := runtimeHistogramSnapshot{
+		Counts:  make([]uint64, len(mh.Counts)),
+		Buckets: make([]float64, len(mh.Buckets)),
+	}
+	copy(s.Counts, mh.Counts)
+	copy(s.Buckets, mh.Buckets)
+	for i, b := range s.Buckets {
+		s.Buckets[i] = b * h.scaleFactor
+	}
+	h.v.Store(&s)
+}
+
+func (h *runtimeHistogram) load() *runtimeHistogramSnapshot {
+	return h.v.Load().(*runtimeHistogramSnapshot)
+}
+
+func (h *runtimeHistogram) Clear() {
+	panic("runtimeHistogram does not support Clear")
+}
+func (h *runtimeHistogram) Update(int64) {
+	panic("runtimeHistogram does not support Update")
+}
+func (h *runtimeHistogram) Sample() Sample {
+	return NilSample{}
+}
+
+// Snapshot returns a non-changing cop of the histogram.
+func (h *runtimeHistogram) Snapshot() Histogram {
+	return h.load()
+}
+
+// Count returns the sample count.
+func (h *runtimeHistogram) Count() int64 {
+	return h.load().Count()
+}
+
+// Mean returns an approximation of the mean.
+func (h *runtimeHistogram) Mean() float64 {
+	return h.load().Mean()
+}
+
+// StdDev approximates the standard deviation of the histogram.
+func (h *runtimeHistogram) StdDev() float64 {
+	return h.load().StdDev()
+}
+
+// Variance approximates the variance of the histogram.
+func (h *runtimeHistogram) Variance() float64 {
+	return h.load().Variance()
+}
+
+// Percentile computes the p'th percentile value.
+func (h *runtimeHistogram) Percentile(p float64) float64 {
+	return h.load().Percentile(p)
+}
+
+// Percentiles computes all requested percentile values.
+func (h *runtimeHistogram) Percentiles(ps []float64) []float64 {
+	return h.load().Percentiles(ps)
+}
+
+// Max returns the highest sample value.
+func (h *runtimeHistogram) Max() int64 {
+	return h.load().Max()
+}
+
+// Min returns the lowest sample value.
+func (h *runtimeHistogram) Min() int64 {
+	return h.load().Min()
+}
+
+// Sum returns the sum of all sample values.
+func (h *runtimeHistogram) Sum() int64 {
+	return h.load().Sum()
+}
+
+type runtimeHistogramSnapshot metrics.Float64Histogram
+
+func (h *runtimeHistogramSnapshot) Clear() {
+	panic("runtimeHistogram does not support Clear")
+}
+func (h *runtimeHistogramSnapshot) Update(int64) {
+	panic("runtimeHistogram does not support Update")
+}
+func (h *runtimeHistogramSnapshot) Sample() Sample {
+	return NilSample{}
+}
+
+func (h *runtimeHistogramSnapshot) Snapshot() Histogram {
+	return h
+}
+
+// Count returns the sample count.
+func (h *runtimeHistogramSnapshot) Count() int64 {
+	var count int64
+	for _, c := range h.Counts {
+		count += int64(c)
+	}
+	return count
+}
+
+// Mean returns an approximation of the mean.
+func (h *runtimeHistogramSnapshot) Mean() float64 {
+	if len(h.Counts) == 0 {
+		return 0
+	}
+	mean, _ := h.mean()
+	return mean
+}
+
+// mean computes the mean and also the total sample count.
+func (h *runtimeHistogramSnapshot) mean() (mean, totalCount float64) {
+	var sum float64
+	for i, c := range h.Counts {
+		midpoint := h.midpoint(i)
+		sum += midpoint * float64(c)
+		totalCount += float64(c)
+	}
+	return sum / totalCount, totalCount
+}
+
+func (h *runtimeHistogramSnapshot) midpoint(bucket int) float64 {
+	high := h.Buckets[bucket+1]
+	low := h.Buckets[bucket]
+	if math.IsInf(high, 1) {
+		// The edge of the highest bucket can be +Inf, and it's supposed to mean that this
+		// bucket contains all remaining samples > low. We can't get the middle of an
+		// infinite range, so just return the lower bound of this bucket instead.
+		return low
+	}
+	if math.IsInf(low, -1) {
+		// Similarly, we can get -Inf in the left edge of the lowest bucket,
+		// and it means the bucket contains all remaining values < high.
+		return high
+	}
+	return (low + high) / 2
+}
+
+// StdDev approximates the standard deviation of the histogram.
+func (h *runtimeHistogramSnapshot) StdDev() float64 {
+	return math.Sqrt(h.Variance())
+}
+
+// Variance approximates the variance of the histogram.
+func (h *runtimeHistogramSnapshot) Variance() float64 {
+	if len(h.Counts) == 0 {
+		return 0
+	}
+
+	mean, totalCount := h.mean()
+	if totalCount <= 1 {
+		// There is no variance when there are zero or one items.
+		return 0
+	}
+
+	var sum float64
+	for i, c := range h.Counts {
+		midpoint := h.midpoint(i)
+		d := midpoint - mean
+		sum += float64(c) * (d * d)
+	}
+	return sum / (totalCount - 1)
+}
+
+// Percentile computes the p'th percentile value.
+func (h *runtimeHistogramSnapshot) Percentile(p float64) float64 {
+	threshold := float64(h.Count()) * p
+	values := [1]float64{threshold}
+	h.computePercentiles(values[:])
+	return values[0]
+}
+
+// Percentiles computes all requested percentile values.
+func (h *runtimeHistogramSnapshot) Percentiles(ps []float64) []float64 {
+	// Compute threshold values. We need these to be sorted
+	// for the percentile computation, but restore the original
+	// order later, so keep the indexes as well.
+	count := float64(h.Count())
+	thresholds := make([]float64, len(ps))
+	indexes := make([]int, len(ps))
+	for i, percentile := range ps {
+		thresholds[i] = count * math.Max(0, math.Min(1.0, percentile))
+		indexes[i] = i
+	}
+	sort.Sort(floatsAscendingKeepingIndex{thresholds, indexes})
+
+	// Now compute. The result is stored back into the thresholds slice.
+	h.computePercentiles(thresholds)
+
+	// Put the result back into the requested order.
+	sort.Sort(floatsByIndex{thresholds, indexes})
+	return thresholds
+}
+
+func (h *runtimeHistogramSnapshot) computePercentiles(thresh []float64) {
+	var totalCount float64
+	for i, count := range h.Counts {
+		totalCount += float64(count)
+
+		for len(thresh) > 0 && thresh[0] < totalCount {
+			thresh[0] = h.Buckets[i]
+			thresh = thresh[1:]
+		}
+		if len(thresh) == 0 {
+			return
+		}
+	}
+}
+
+// Note: runtime/metrics.Float64Histogram is a collection of float64s, but the methods
+// below need to return int64 to satisfy the interface. The histogram provided by runtime
+// also doesn't keep track of individual samples, so results are approximated.
+
+// Max returns the highest sample value.
+func (h *runtimeHistogramSnapshot) Max() int64 {
+	for i := len(h.Counts) - 1; i >= 0; i-- {
+		count := h.Counts[i]
+		if count > 0 {
+			edge := h.Buckets[i+1]
+			if math.IsInf(edge, 1) {
+				edge = h.Buckets[i]
+			}
+			return int64(math.Ceil(edge))
+		}
+	}
+	return 0
+}
+
+// Min returns the lowest sample value.
+func (h *runtimeHistogramSnapshot) Min() int64 {
+	for i, count := range h.Counts {
+		if count > 0 {
+			return int64(math.Floor(h.Buckets[i]))
+		}
+	}
+	return 0
+}
+
+// Sum returns the sum of all sample values.
+func (h *runtimeHistogramSnapshot) Sum() int64 {
+	var sum float64
+	for i := range h.Counts {
+		sum += h.Buckets[i] * float64(h.Counts[i])
+	}
+	return int64(math.Ceil(sum))
+}
+
+type floatsAscendingKeepingIndex struct {
+	values  []float64
+	indexes []int
+}
+
+func (s floatsAscendingKeepingIndex) Len() int {
+	return len(s.values)
+}
+
+func (s floatsAscendingKeepingIndex) Less(i, j int) bool {
+	return s.values[i] < s.values[j]
+}
+
+func (s floatsAscendingKeepingIndex) Swap(i, j int) {
+	s.values[i], s.values[j] = s.values[j], s.values[i]
+	s.indexes[i], s.indexes[j] = s.indexes[j], s.indexes[i]
+}
+
+type floatsByIndex struct {
+	values  []float64
+	indexes []int
+}
+
+func (s floatsByIndex) Len() int {
+	return len(s.values)
+}
+
+func (s floatsByIndex) Less(i, j int) bool {
+	return s.indexes[i] < s.indexes[j]
+}
+
+func (s floatsByIndex) Swap(i, j int) {
+	s.values[i], s.values[j] = s.values[j], s.values[i]
+	s.indexes[i], s.indexes[j] = s.indexes[j], s.indexes[i]
+}
--- a/metrics/runtimehistogram_test.go
+++ b/metrics/runtimehistogram_test.go
@ -0,0 +1,133 @@
+package metrics
+
+import (
+	"fmt"
+	"math"
+	"reflect"
+	"runtime/metrics"
+	"testing"
+)
+
+var _ Histogram = (*runtimeHistogram)(nil)
+
+type runtimeHistogramTest struct {
+	h metrics.Float64Histogram
+
+	Count       int64
+	Min         int64
+	Max         int64
+	Sum         int64
+	Mean        float64
+	Variance    float64
+	StdDev      float64
+	Percentiles []float64 // .5 .8 .9 .99 .995
+}
+
+// This test checks the results of statistical functions implemented
+// by runtimeHistogramSnapshot.
+func TestRuntimeHistogramStats(t *testing.T) {
+	tests := []runtimeHistogramTest{
+		0: {
+			h: metrics.Float64Histogram{
+				Counts:  []uint64{},
+				Buckets: []float64{},
+			},
+			Count:       0,
+			Max:         0,
+			Min:         0,
+			Sum:         0,
+			Mean:        0,
+			Variance:    0,
+			StdDev:      0,
+			Percentiles: []float64{0, 0, 0, 0, 0},
+		},
+		1: {
+			// This checks the case where the highest bucket is +Inf.
+			h: metrics.Float64Histogram{
+				Counts:  []uint64{0, 1, 2},
+				Buckets: []float64{0, 0.5, 1, math.Inf(1)},
+			},
+			Count:       3,
+			Max:         1,
+			Min:         0,
+			Sum:         3,
+			Mean:        0.9166666,
+			Percentiles: []float64{1, 1, 1, 1, 1},
+			Variance:    0.020833,
+			StdDev:      0.144433,
+		},
+		2: {
+			h: metrics.Float64Histogram{
+				Counts:  []uint64{8, 6, 3, 1},
+				Buckets: []float64{12, 16, 18, 24, 25},
+			},
+			Count:       18,
+			Max:         25,
+			Min:         12,
+			Sum:         270,
+			Mean:        16.75,
+			Variance:    10.3015,
+			StdDev:      3.2096,
+			Percentiles: []float64{16, 18, 18, 24, 24},
+		},
+	}
+
+	for i, test := range tests {
+		t.Run(fmt.Sprint(i), func(t *testing.T) {
+			s := runtimeHistogramSnapshot(test.h)
+
+			if v := s.Count(); v != test.Count {
+				t.Errorf("Count() = %v, want %v", v, test.Count)
+			}
+			if v := s.Min(); v != test.Min {
+				t.Errorf("Min() = %v, want %v", v, test.Min)
+			}
+			if v := s.Max(); v != test.Max {
+				t.Errorf("Max() = %v, want %v", v, test.Max)
+			}
+			if v := s.Sum(); v != test.Sum {
+				t.Errorf("Sum() = %v, want %v", v, test.Sum)
+			}
+			if v := s.Mean(); !approxEqual(v, test.Mean, 0.0001) {
+				t.Errorf("Mean() = %v, want %v", v, test.Mean)
+			}
+			if v := s.Variance(); !approxEqual(v, test.Variance, 0.0001) {
+				t.Errorf("Variance() = %v, want %v", v, test.Variance)
+			}
+			if v := s.StdDev(); !approxEqual(v, test.StdDev, 0.0001) {
+				t.Errorf("StdDev() = %v, want %v", v, test.StdDev)
+			}
+			ps := []float64{.5, .8, .9, .99, .995}
+			if v := s.Percentiles(ps); !reflect.DeepEqual(v, test.Percentiles) {
+				t.Errorf("Percentiles(%v) = %v, want %v", ps, v, test.Percentiles)
+			}
+		})
+	}
+}
+
+func approxEqual(x, y, ε float64) bool {
+	if math.IsInf(x, -1) && math.IsInf(y, -1) {
+		return true
+	}
+	if math.IsInf(x, 1) && math.IsInf(y, 1) {
+		return true
+	}
+	if math.IsNaN(x) && math.IsNaN(y) {
+		return true
+	}
+	return math.Abs(x-y) < ε
+}
+
+// This test verifies that requesting Percentiles in unsorted order
+// returns them in the requested order.
+func TestRuntimeHistogramStatsPercentileOrder(t *testing.T) {
+	p := runtimeHistogramSnapshot{
+		Counts:  []uint64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+		Buckets: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+	}
+	result := p.Percentiles([]float64{1, 0.2, 0.5, 0.1, 0.2})
+	expected := []float64{10, 2, 5, 1, 2}
+	if !reflect.DeepEqual(result, expected) {
+		t.Fatal("wrong result:", result)
+	}
+}