go-ethereum/metrics/opentsdb.go
Martin HS 9045b79bc2
metrics, cmd/geth: change init-process of metrics (#30814)
This PR modifies how the metrics library handles `Enabled`: previously,
the package `init` decided whether to serve real metrics or just
dummy-types.

This has several drawbacks: 
- During pkg init, we need to determine whether metrics are enabled or
not. So we first hacked in a check if certain geth-specific
commandline-flags were enabled. Then we added a similar check for
geth-env-vars. Then we almost added a very elaborate check for
toml-config-file, plus toml parsing.

- Using "real" types and dummy types interchangeably means that
everything is hidden behind interfaces. This has a performance penalty,
and also it just adds a lot of code.

This PR removes the interface stuff, uses concrete types, and allows for
the setting of Enabled to happen later. It is still assumed that
`metrics.Enable()` is invoked early on.

The somewhat 'heavy' operations, such as ticking meters and exp-decay,
now checks the enable-flag to prevent resource leak.

The change may be large, but it's mostly pretty trivial, and from the
last time I gutted the metrics, I ensured that we have fairly good test
coverage.

---------

Co-authored-by: Felix Lange <fjl@twurst.com>
2024-12-10 13:27:29 +01:00

129 lines
5.9 KiB
Go

package metrics
import (
"bufio"
"fmt"
"io"
"log"
"net"
"os"
"strings"
"time"
)
var shortHostName = ""
// OpenTSDBConfig provides a container with configuration parameters for
// the OpenTSDB exporter
type OpenTSDBConfig struct {
Addr *net.TCPAddr // Network address to connect to
Registry Registry // Registry to be exported
FlushInterval time.Duration // Flush interval
DurationUnit time.Duration // Time conversion unit for durations
Prefix string // Prefix to be prepended to metric names
}
// OpenTSDB is a blocking exporter function which reports metrics in r
// to a TSDB server located at addr, flushing them every d duration
// and prepending metric names with prefix.
func OpenTSDB(r Registry, d time.Duration, prefix string, addr *net.TCPAddr) {
OpenTSDBWithConfig(OpenTSDBConfig{
Addr: addr,
Registry: r,
FlushInterval: d,
DurationUnit: time.Nanosecond,
Prefix: prefix,
})
}
// OpenTSDBWithConfig is a blocking exporter function just like OpenTSDB,
// but it takes a OpenTSDBConfig instead.
func OpenTSDBWithConfig(c OpenTSDBConfig) {
for range time.Tick(c.FlushInterval) {
if err := openTSDB(&c); nil != err {
log.Println(err)
}
}
}
func getShortHostname() string {
if shortHostName == "" {
host, _ := os.Hostname()
if index := strings.Index(host, "."); index > 0 {
shortHostName = host[:index]
} else {
shortHostName = host
}
}
return shortHostName
}
// writeRegistry writes the registry-metrics on the opentsb format.
func (c *OpenTSDBConfig) writeRegistry(w io.Writer, now int64, shortHostname string) {
du := float64(c.DurationUnit)
c.Registry.Each(func(name string, i interface{}) {
switch metric := i.(type) {
case *Counter:
fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, metric.Snapshot().Count(), shortHostname)
case *CounterFloat64:
fmt.Fprintf(w, "put %s.%s.count %d %f host=%s\n", c.Prefix, name, now, metric.Snapshot().Count(), shortHostname)
case *Gauge:
fmt.Fprintf(w, "put %s.%s.value %d %d host=%s\n", c.Prefix, name, now, metric.Snapshot().Value(), shortHostname)
case *GaugeFloat64:
fmt.Fprintf(w, "put %s.%s.value %d %f host=%s\n", c.Prefix, name, now, metric.Snapshot().Value(), shortHostname)
case *GaugeInfo:
fmt.Fprintf(w, "put %s.%s.value %d %s host=%s\n", c.Prefix, name, now, metric.Snapshot().Value().String(), shortHostname)
case Histogram:
h := metric.Snapshot()
ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999})
fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, h.Count(), shortHostname)
fmt.Fprintf(w, "put %s.%s.min %d %d host=%s\n", c.Prefix, name, now, h.Min(), shortHostname)
fmt.Fprintf(w, "put %s.%s.max %d %d host=%s\n", c.Prefix, name, now, h.Max(), shortHostname)
fmt.Fprintf(w, "put %s.%s.mean %d %.2f host=%s\n", c.Prefix, name, now, h.Mean(), shortHostname)
fmt.Fprintf(w, "put %s.%s.std-dev %d %.2f host=%s\n", c.Prefix, name, now, h.StdDev(), shortHostname)
fmt.Fprintf(w, "put %s.%s.50-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[0], shortHostname)
fmt.Fprintf(w, "put %s.%s.75-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[1], shortHostname)
fmt.Fprintf(w, "put %s.%s.95-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[2], shortHostname)
fmt.Fprintf(w, "put %s.%s.99-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[3], shortHostname)
fmt.Fprintf(w, "put %s.%s.999-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[4], shortHostname)
case *Meter:
m := metric.Snapshot()
fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, m.Count(), shortHostname)
fmt.Fprintf(w, "put %s.%s.one-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate1(), shortHostname)
fmt.Fprintf(w, "put %s.%s.five-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate5(), shortHostname)
fmt.Fprintf(w, "put %s.%s.fifteen-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate15(), shortHostname)
fmt.Fprintf(w, "put %s.%s.mean %d %.2f host=%s\n", c.Prefix, name, now, m.RateMean(), shortHostname)
case *Timer:
t := metric.Snapshot()
ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999})
fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, t.Count(), shortHostname)
fmt.Fprintf(w, "put %s.%s.min %d %d host=%s\n", c.Prefix, name, now, t.Min()/int64(du), shortHostname)
fmt.Fprintf(w, "put %s.%s.max %d %d host=%s\n", c.Prefix, name, now, t.Max()/int64(du), shortHostname)
fmt.Fprintf(w, "put %s.%s.mean %d %.2f host=%s\n", c.Prefix, name, now, t.Mean()/du, shortHostname)
fmt.Fprintf(w, "put %s.%s.std-dev %d %.2f host=%s\n", c.Prefix, name, now, t.StdDev()/du, shortHostname)
fmt.Fprintf(w, "put %s.%s.50-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[0]/du, shortHostname)
fmt.Fprintf(w, "put %s.%s.75-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[1]/du, shortHostname)
fmt.Fprintf(w, "put %s.%s.95-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[2]/du, shortHostname)
fmt.Fprintf(w, "put %s.%s.99-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[3]/du, shortHostname)
fmt.Fprintf(w, "put %s.%s.999-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[4]/du, shortHostname)
fmt.Fprintf(w, "put %s.%s.one-minute %d %.2f host=%s\n", c.Prefix, name, now, t.Rate1(), shortHostname)
fmt.Fprintf(w, "put %s.%s.five-minute %d %.2f host=%s\n", c.Prefix, name, now, t.Rate5(), shortHostname)
fmt.Fprintf(w, "put %s.%s.fifteen-minute %d %.2f host=%s\n", c.Prefix, name, now, t.Rate15(), shortHostname)
fmt.Fprintf(w, "put %s.%s.mean-rate %d %.2f host=%s\n", c.Prefix, name, now, t.RateMean(), shortHostname)
}
})
}
func openTSDB(c *OpenTSDBConfig) error {
conn, err := net.DialTCP("tcp", nil, c.Addr)
if nil != err {
return err
}
defer conn.Close()
w := bufio.NewWriter(conn)
c.writeRegistry(w, time.Now().Unix(), getShortHostname())
w.Flush()
return nil
}