9045b79bc2
This PR modifies how the metrics library handles `Enabled`: previously, the package `init` decided whether to serve real metrics or just dummy-types. This has several drawbacks: - During pkg init, we need to determine whether metrics are enabled or not. So we first hacked in a check if certain geth-specific commandline-flags were enabled. Then we added a similar check for geth-env-vars. Then we almost added a very elaborate check for toml-config-file, plus toml parsing. - Using "real" types and dummy types interchangeably means that everything is hidden behind interfaces. This has a performance penalty, and also it just adds a lot of code. This PR removes the interface stuff, uses concrete types, and allows for the setting of Enabled to happen later. It is still assumed that `metrics.Enable()` is invoked early on. The somewhat 'heavy' operations, such as ticking meters and exp-decay, now checks the enable-flag to prevent resource leak. The change may be large, but it's mostly pretty trivial, and from the last time I gutted the metrics, I ensured that we have fairly good test coverage. --------- Co-authored-by: Felix Lange <fjl@twurst.com>
129 lines
5.9 KiB
Go
129 lines
5.9 KiB
Go
package metrics
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"net"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
var shortHostName = ""
|
|
|
|
// OpenTSDBConfig provides a container with configuration parameters for
|
|
// the OpenTSDB exporter
|
|
type OpenTSDBConfig struct {
|
|
Addr *net.TCPAddr // Network address to connect to
|
|
Registry Registry // Registry to be exported
|
|
FlushInterval time.Duration // Flush interval
|
|
DurationUnit time.Duration // Time conversion unit for durations
|
|
Prefix string // Prefix to be prepended to metric names
|
|
}
|
|
|
|
// OpenTSDB is a blocking exporter function which reports metrics in r
|
|
// to a TSDB server located at addr, flushing them every d duration
|
|
// and prepending metric names with prefix.
|
|
func OpenTSDB(r Registry, d time.Duration, prefix string, addr *net.TCPAddr) {
|
|
OpenTSDBWithConfig(OpenTSDBConfig{
|
|
Addr: addr,
|
|
Registry: r,
|
|
FlushInterval: d,
|
|
DurationUnit: time.Nanosecond,
|
|
Prefix: prefix,
|
|
})
|
|
}
|
|
|
|
// OpenTSDBWithConfig is a blocking exporter function just like OpenTSDB,
|
|
// but it takes a OpenTSDBConfig instead.
|
|
func OpenTSDBWithConfig(c OpenTSDBConfig) {
|
|
for range time.Tick(c.FlushInterval) {
|
|
if err := openTSDB(&c); nil != err {
|
|
log.Println(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func getShortHostname() string {
|
|
if shortHostName == "" {
|
|
host, _ := os.Hostname()
|
|
if index := strings.Index(host, "."); index > 0 {
|
|
shortHostName = host[:index]
|
|
} else {
|
|
shortHostName = host
|
|
}
|
|
}
|
|
return shortHostName
|
|
}
|
|
|
|
// writeRegistry writes the registry-metrics on the opentsb format.
|
|
func (c *OpenTSDBConfig) writeRegistry(w io.Writer, now int64, shortHostname string) {
|
|
du := float64(c.DurationUnit)
|
|
|
|
c.Registry.Each(func(name string, i interface{}) {
|
|
switch metric := i.(type) {
|
|
case *Counter:
|
|
fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, metric.Snapshot().Count(), shortHostname)
|
|
case *CounterFloat64:
|
|
fmt.Fprintf(w, "put %s.%s.count %d %f host=%s\n", c.Prefix, name, now, metric.Snapshot().Count(), shortHostname)
|
|
case *Gauge:
|
|
fmt.Fprintf(w, "put %s.%s.value %d %d host=%s\n", c.Prefix, name, now, metric.Snapshot().Value(), shortHostname)
|
|
case *GaugeFloat64:
|
|
fmt.Fprintf(w, "put %s.%s.value %d %f host=%s\n", c.Prefix, name, now, metric.Snapshot().Value(), shortHostname)
|
|
case *GaugeInfo:
|
|
fmt.Fprintf(w, "put %s.%s.value %d %s host=%s\n", c.Prefix, name, now, metric.Snapshot().Value().String(), shortHostname)
|
|
case Histogram:
|
|
h := metric.Snapshot()
|
|
ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999})
|
|
fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, h.Count(), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.min %d %d host=%s\n", c.Prefix, name, now, h.Min(), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.max %d %d host=%s\n", c.Prefix, name, now, h.Max(), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.mean %d %.2f host=%s\n", c.Prefix, name, now, h.Mean(), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.std-dev %d %.2f host=%s\n", c.Prefix, name, now, h.StdDev(), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.50-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[0], shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.75-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[1], shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.95-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[2], shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.99-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[3], shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.999-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[4], shortHostname)
|
|
case *Meter:
|
|
m := metric.Snapshot()
|
|
fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, m.Count(), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.one-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate1(), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.five-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate5(), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.fifteen-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate15(), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.mean %d %.2f host=%s\n", c.Prefix, name, now, m.RateMean(), shortHostname)
|
|
case *Timer:
|
|
t := metric.Snapshot()
|
|
ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999})
|
|
fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, t.Count(), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.min %d %d host=%s\n", c.Prefix, name, now, t.Min()/int64(du), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.max %d %d host=%s\n", c.Prefix, name, now, t.Max()/int64(du), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.mean %d %.2f host=%s\n", c.Prefix, name, now, t.Mean()/du, shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.std-dev %d %.2f host=%s\n", c.Prefix, name, now, t.StdDev()/du, shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.50-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[0]/du, shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.75-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[1]/du, shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.95-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[2]/du, shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.99-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[3]/du, shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.999-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[4]/du, shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.one-minute %d %.2f host=%s\n", c.Prefix, name, now, t.Rate1(), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.five-minute %d %.2f host=%s\n", c.Prefix, name, now, t.Rate5(), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.fifteen-minute %d %.2f host=%s\n", c.Prefix, name, now, t.Rate15(), shortHostname)
|
|
fmt.Fprintf(w, "put %s.%s.mean-rate %d %.2f host=%s\n", c.Prefix, name, now, t.RateMean(), shortHostname)
|
|
}
|
|
})
|
|
}
|
|
|
|
func openTSDB(c *OpenTSDBConfig) error {
|
|
conn, err := net.DialTCP("tcp", nil, c.Addr)
|
|
if nil != err {
|
|
return err
|
|
}
|
|
defer conn.Close()
|
|
w := bufio.NewWriter(conn)
|
|
c.writeRegistry(w, time.Now().Unix(), getShortHostname())
|
|
w.Flush()
|
|
return nil
|
|
}
|