diff --git a/eth/handler.go b/eth/handler.go index 6a8487649e..9d6a8c4225 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -38,6 +38,7 @@ import ( "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/p2p" ) @@ -424,6 +425,13 @@ func (h *handler) runSnapExtension(peer *snap.Peer, handler snap.Handler) error defer h.peerWG.Done() if err := h.peers.registerSnapExtension(peer); err != nil { + if metrics.Enabled { + if peer.Inbound() { + snap.IngressRegistrationErrorMeter.Mark(1) + } else { + snap.EgressRegistrationErrorMeter.Mark(1) + } + } peer.Log().Warn("Snapshot extension registration failed", "err", err) return err } diff --git a/eth/protocols/eth/handshake.go b/eth/protocols/eth/handshake.go index 9a2769fa0d..ea16a85b1e 100644 --- a/eth/protocols/eth/handshake.go +++ b/eth/protocols/eth/handshake.go @@ -17,12 +17,14 @@ package eth import ( + "errors" "fmt" "math/big" "time" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/forkid" + "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/p2p" ) @@ -59,9 +61,11 @@ func (p *Peer) Handshake(network uint64, td *big.Int, head common.Hash, genesis select { case err := <-errc: if err != nil { + markError(p, err) return err } case <-timeout.C: + markError(p, p2p.DiscReadTimeout) return p2p.DiscReadTimeout } } @@ -105,3 +109,25 @@ func (p *Peer) readStatus(network uint64, status *StatusPacket, genesis common.H } return nil } + +// markError registers the error with the corresponding metric. +func markError(p *Peer, err error) { + if !metrics.Enabled { + return + } + m := meters.get(p.Inbound()) + switch errors.Unwrap(err) { + case errNetworkIDMismatch: + m.networkIDMismatch.Mark(1) + case errProtocolVersionMismatch: + m.protocolVersionMismatch.Mark(1) + case errGenesisMismatch: + m.genesisMismatch.Mark(1) + case errForkIDRejected: + m.forkidRejected.Mark(1) + case p2p.DiscReadTimeout: + m.timeoutError.Mark(1) + default: + m.peerError.Mark(1) + } +} diff --git a/eth/protocols/eth/metrics.go b/eth/protocols/eth/metrics.go new file mode 100644 index 0000000000..5e0aee39f8 --- /dev/null +++ b/eth/protocols/eth/metrics.go @@ -0,0 +1,81 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package eth + +import "github.com/ethereum/go-ethereum/metrics" + +// meters stores ingress and egress handshake meters. +var meters bidirectionalMeters + +// bidirectionalMeters stores ingress and egress handshake meters. +type bidirectionalMeters struct { + ingress *hsMeters + egress *hsMeters +} + +// get returns the corresponding meter depending if ingress or egress is +// desired. +func (h *bidirectionalMeters) get(ingress bool) *hsMeters { + if ingress { + return h.ingress + } + return h.egress +} + +// hsMeters is a collection of meters which track metrics related to the +// eth subprotocol handshake. +type hsMeters struct { + // peerError measures the number of errors related to incorrect peer + // behaviour, such as invalid message code, size, encoding, etc. + peerError metrics.Meter + + // timeoutError measures the number of timeouts. + timeoutError metrics.Meter + + // networkIDMismatch measures the number of network id mismatch errors. + networkIDMismatch metrics.Meter + + // protocolVersionMismatch measures the number of differing protocol + // versions. + protocolVersionMismatch metrics.Meter + + // genesisMismatch measures the number of differing genesises. + genesisMismatch metrics.Meter + + // forkidRejected measures the number of differing forkids. + forkidRejected metrics.Meter +} + +// newHandshakeMeters registers and returns handshake meters for the given +// base. +func newHandshakeMeters(base string) *hsMeters { + return &hsMeters{ + peerError: metrics.NewRegisteredMeter(base+"error/peer", nil), + timeoutError: metrics.NewRegisteredMeter(base+"error/timeout", nil), + networkIDMismatch: metrics.NewRegisteredMeter(base+"error/network", nil), + protocolVersionMismatch: metrics.NewRegisteredMeter(base+"error/version", nil), + genesisMismatch: metrics.NewRegisteredMeter(base+"error/genesis", nil), + forkidRejected: metrics.NewRegisteredMeter(base+"error/forkid", nil), + } +} + +func init() { + meters = bidirectionalMeters{ + ingress: newHandshakeMeters("eth/protocols/eth/ingress/handshake/"), + egress: newHandshakeMeters("eth/protocols/eth/egress/handshake/"), + } +} diff --git a/eth/protocols/snap/metrics.go b/eth/protocols/snap/metrics.go new file mode 100644 index 0000000000..a9f35ca447 --- /dev/null +++ b/eth/protocols/snap/metrics.go @@ -0,0 +1,29 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snap + +import ( + metrics "github.com/ethereum/go-ethereum/metrics" +) + +var ( + ingressRegistrationErrorName = "eth/protocols/snap/ingress/registration/error" + egressRegistrationErrorName = "eth/protocols/snap/egress/registration/error" + + IngressRegistrationErrorMeter = metrics.NewRegisteredMeter(ingressRegistrationErrorName, nil) + EgressRegistrationErrorMeter = metrics.NewRegisteredMeter(egressRegistrationErrorName, nil) +) diff --git a/p2p/dial.go b/p2p/dial.go index 134e6e2eae..5e4ab1d50d 100644 --- a/p2p/dial.go +++ b/p2p/dial.go @@ -521,13 +521,14 @@ func (t *dialTask) resolve(d *dialScheduler) bool { // dial performs the actual connection attempt. func (t *dialTask) dial(d *dialScheduler, dest *enode.Node) error { + dialMeter.Mark(1) fd, err := d.dialer.Dial(d.ctx, t.dest) if err != nil { d.log.Trace("Dial error", "id", t.dest.ID(), "addr", nodeAddr(t.dest), "conn", t.flags, "err", cleanupDialErr(err)) + dialConnectionError.Mark(1) return &dialError{err} } - mfd := newMeteredConn(fd, false, &net.TCPAddr{IP: dest.IP(), Port: dest.TCP()}) - return d.setupFunc(mfd, t.flags, dest) + return d.setupFunc(newMeteredConn(fd), t.flags, dest) } func (t *dialTask) String() string { diff --git a/p2p/discover/metrics.go b/p2p/discover/metrics.go index bf1a2fa2b8..da8e9cb817 100644 --- a/p2p/discover/metrics.go +++ b/p2p/discover/metrics.go @@ -17,6 +17,7 @@ package discover import ( + "fmt" "net" "github.com/ethereum/go-ethereum/metrics" @@ -32,10 +33,17 @@ const ( ) var ( + bucketsCounter []metrics.Counter ingressTrafficMeter = metrics.NewRegisteredMeter(ingressMeterName, nil) egressTrafficMeter = metrics.NewRegisteredMeter(egressMeterName, nil) ) +func init() { + for i := 0; i < nBuckets; i++ { + bucketsCounter = append(bucketsCounter, metrics.NewRegisteredCounter(fmt.Sprintf("%s/bucket/%d/count", moduleName, i), nil)) + } +} + // meteredConn is a wrapper around a net.UDPConn that meters both the // inbound and outbound network traffic. type meteredUdpConn struct { diff --git a/p2p/discover/table.go b/p2p/discover/table.go index 1397348aec..b601f233d0 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -34,6 +34,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/p2p/enode" "github.com/ethereum/go-ethereum/p2p/netutil" ) @@ -80,7 +81,8 @@ type Table struct { closeReq chan struct{} closed chan struct{} - nodeAddedHook func(*node) // for testing + nodeAddedHook func(*bucket, *node) + nodeRemovedHook func(*bucket, *node) } // transport is implemented by the UDP transports. @@ -98,6 +100,7 @@ type bucket struct { entries []*node // live entries, sorted by time of last contact replacements []*node // recently seen nodes to be used if revalidation fails ips netutil.DistinctNetSet + index int } func newTable(t transport, db *enode.DB, cfg Config) (*Table, error) { @@ -119,7 +122,8 @@ func newTable(t transport, db *enode.DB, cfg Config) (*Table, error) { } for i := range tab.buckets { tab.buckets[i] = &bucket{ - ips: netutil.DistinctNetSet{Subnet: bucketSubnet, Limit: bucketIPLimit}, + index: i, + ips: netutil.DistinctNetSet{Subnet: bucketSubnet, Limit: bucketIPLimit}, } } tab.seedRand() @@ -128,6 +132,22 @@ func newTable(t transport, db *enode.DB, cfg Config) (*Table, error) { return tab, nil } +func newMeteredTable(t transport, db *enode.DB, cfg Config) (*Table, error) { + tab, err := newTable(t, db, cfg) + if err != nil { + return nil, err + } + if metrics.Enabled { + tab.nodeAddedHook = func(b *bucket, n *node) { + bucketsCounter[b.index].Inc(1) + } + tab.nodeRemovedHook = func(b *bucket, n *node) { + bucketsCounter[b.index].Dec(1) + } + } + return tab, nil +} + // Nodes returns all nodes contained in the table. func (tab *Table) Nodes() []*enode.Node { if !tab.isInitDone() { @@ -495,7 +515,7 @@ func (tab *Table) addSeenNode(n *node) { n.addedAt = time.Now() if tab.nodeAddedHook != nil { - tab.nodeAddedHook(n) + tab.nodeAddedHook(b, n) } } @@ -539,7 +559,7 @@ func (tab *Table) addVerifiedNode(n *node) { n.addedAt = time.Now() if tab.nodeAddedHook != nil { - tab.nodeAddedHook(n) + tab.nodeAddedHook(b, n) } } @@ -638,8 +658,16 @@ func (tab *Table) bumpInBucket(b *bucket, n *node) bool { } func (tab *Table) deleteInBucket(b *bucket, n *node) { + // Check if the node is actually in the bucket so the removed hook + // isn't called multiple times for the same node. + if !contains(b.entries, n.ID()) { + return + } b.entries = deleteNode(b.entries, n) tab.removeIP(b, n.IP()) + if tab.nodeRemovedHook != nil { + tab.nodeRemovedHook(b, n) + } } func contains(ns []*node, id enode.ID) bool { diff --git a/p2p/discover/v4_udp.go b/p2p/discover/v4_udp.go index ab335e2ced..988f16b01d 100644 --- a/p2p/discover/v4_udp.go +++ b/p2p/discover/v4_udp.go @@ -142,7 +142,7 @@ func ListenV4(c UDPConn, ln *enode.LocalNode, cfg Config) (*UDPv4, error) { log: cfg.Log, } - tab, err := newTable(t, ln.Database(), cfg) + tab, err := newMeteredTable(t, ln.Database(), cfg) if err != nil { return nil, err } diff --git a/p2p/discover/v4_udp_test.go b/p2p/discover/v4_udp_test.go index 21f0d75172..5add9cefa1 100644 --- a/p2p/discover/v4_udp_test.go +++ b/p2p/discover/v4_udp_test.go @@ -394,7 +394,7 @@ func TestUDPv4_pingMatchIP(t *testing.T) { func TestUDPv4_successfulPing(t *testing.T) { test := newUDPTest(t) added := make(chan *node, 1) - test.table.nodeAddedHook = func(n *node) { added <- n } + test.table.nodeAddedHook = func(b *bucket, n *node) { added <- n } defer test.close() // The remote side sends a ping packet to initiate the exchange. diff --git a/p2p/discover/v5_udp.go b/p2p/discover/v5_udp.go index 7bed9dbcfd..6ba7a90618 100644 --- a/p2p/discover/v5_udp.go +++ b/p2p/discover/v5_udp.go @@ -174,7 +174,7 @@ func newUDPv5(conn UDPConn, ln *enode.LocalNode, cfg Config) (*UDPv5, error) { cancelCloseCtx: cancelCloseCtx, } t.talk = newTalkSystem(t) - tab, err := newTable(t, t.db, cfg) + tab, err := newMeteredTable(t, t.db, cfg) if err != nil { return nil, err } diff --git a/p2p/metrics.go b/p2p/metrics.go index 1bb505cdfb..a6e36b91a8 100644 --- a/p2p/metrics.go +++ b/p2p/metrics.go @@ -19,30 +19,86 @@ package p2p import ( + "errors" "net" "github.com/ethereum/go-ethereum/metrics" ) const ( + // HandleHistName is the prefix of the per-packet serving time histograms. + HandleHistName = "p2p/handle" + // ingressMeterName is the prefix of the per-packet inbound metrics. ingressMeterName = "p2p/ingress" // egressMeterName is the prefix of the per-packet outbound metrics. egressMeterName = "p2p/egress" - - // HandleHistName is the prefix of the per-packet serving time histograms. - HandleHistName = "p2p/handle" ) var ( - ingressConnectMeter = metrics.NewRegisteredMeter("p2p/serves", nil) - ingressTrafficMeter = metrics.NewRegisteredMeter(ingressMeterName, nil) - egressConnectMeter = metrics.NewRegisteredMeter("p2p/dials", nil) - egressTrafficMeter = metrics.NewRegisteredMeter(egressMeterName, nil) - activePeerGauge = metrics.NewRegisteredGauge("p2p/peers", nil) + activePeerGauge metrics.Gauge = metrics.NilGauge{} + + ingressTrafficMeter = metrics.NewRegisteredMeter("p2p/ingress", nil) + egressTrafficMeter = metrics.NewRegisteredMeter("p2p/egress", nil) + + // general ingress/egress connection meters + serveMeter metrics.Meter = metrics.NilMeter{} + serveSuccessMeter metrics.Meter = metrics.NilMeter{} + dialMeter metrics.Meter = metrics.NilMeter{} + dialSuccessMeter metrics.Meter = metrics.NilMeter{} + dialConnectionError metrics.Meter = metrics.NilMeter{} + + // handshake error meters + dialTooManyPeers = metrics.NewRegisteredMeter("p2p/dials/error/saturated", nil) + dialAlreadyConnected = metrics.NewRegisteredMeter("p2p/dials/error/known", nil) + dialSelf = metrics.NewRegisteredMeter("p2p/dials/error/self", nil) + dialUselessPeer = metrics.NewRegisteredMeter("p2p/dials/error/useless", nil) + dialUnexpectedIdentity = metrics.NewRegisteredMeter("p2p/dials/error/id/unexpected", nil) + dialEncHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/enc", nil) + dialProtoHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/proto", nil) ) +func init() { + if !metrics.Enabled { + return + } + + activePeerGauge = metrics.NewRegisteredGauge("p2p/peers", nil) + serveMeter = metrics.NewRegisteredMeter("p2p/serves", nil) + serveSuccessMeter = metrics.NewRegisteredMeter("p2p/serves/success", nil) + dialMeter = metrics.NewRegisteredMeter("p2p/dials", nil) + dialSuccessMeter = metrics.NewRegisteredMeter("p2p/dials/success", nil) + dialConnectionError = metrics.NewRegisteredMeter("p2p/dials/error/connection", nil) +} + +// markDialError matches errors that occur while setting up a dial connection +// to the corresponding meter. +func markDialError(err error) { + if !metrics.Enabled { + return + } + if err2 := errors.Unwrap(err); err2 != nil { + err = err2 + } + switch err { + case DiscTooManyPeers: + dialTooManyPeers.Mark(1) + case DiscAlreadyConnected: + dialAlreadyConnected.Mark(1) + case DiscSelf: + dialSelf.Mark(1) + case DiscUselessPeer: + dialUselessPeer.Mark(1) + case DiscUnexpectedIdentity: + dialUnexpectedIdentity.Mark(1) + case errEncHandshakeError: + dialEncHandshakeError.Mark(1) + case errProtoHandshakeError: + dialProtoHandshakeError.Mark(1) + } +} + // meteredConn is a wrapper around a net.Conn that meters both the // inbound and outbound network traffic. type meteredConn struct { @@ -52,18 +108,10 @@ type meteredConn struct { // newMeteredConn creates a new metered connection, bumps the ingress or egress // connection meter and also increases the metered peer count. If the metrics // system is disabled, function returns the original connection. -func newMeteredConn(conn net.Conn, ingress bool, addr *net.TCPAddr) net.Conn { - // Short circuit if metrics are disabled +func newMeteredConn(conn net.Conn) net.Conn { if !metrics.Enabled { return conn } - // Bump the connection counters and wrap the connection - if ingress { - ingressConnectMeter.Mark(1) - } else { - egressConnectMeter.Mark(1) - } - activePeerGauge.Inc(1) return &meteredConn{Conn: conn} } @@ -82,13 +130,3 @@ func (c *meteredConn) Write(b []byte) (n int, err error) { egressTrafficMeter.Mark(int64(n)) return n, err } - -// Close delegates a close operation to the underlying connection, unregisters -// the peer from the traffic registries and emits close event. -func (c *meteredConn) Close() error { - err := c.Conn.Close() - if err == nil { - activePeerGauge.Dec(1) - } - return err -} diff --git a/p2p/server.go b/p2p/server.go index bdfeb74c67..8c417635e6 100644 --- a/p2p/server.go +++ b/p2p/server.go @@ -64,7 +64,11 @@ const ( frameWriteTimeout = 20 * time.Second ) -var errServerStopped = errors.New("server stopped") +var ( + errServerStopped = errors.New("server stopped") + errEncHandshakeError = errors.New("rlpx enc error") + errProtoHandshakeError = errors.New("rlpx proto error") +) // Config holds Server options. type Config struct { @@ -772,7 +776,11 @@ running: srv.dialsched.peerAdded(c) if p.Inbound() { inboundCount++ + serveSuccessMeter.Mark(1) + } else { + dialSuccessMeter.Mark(1) } + activePeerGauge.Inc(1) } c.cont <- err @@ -785,6 +793,7 @@ running: if pd.Inbound() { inboundCount-- } + activePeerGauge.Dec(1) } } @@ -894,11 +903,8 @@ func (srv *Server) listenLoop() { continue } if remoteIP != nil { - var addr *net.TCPAddr - if tcp, ok := fd.RemoteAddr().(*net.TCPAddr); ok { - addr = tcp - } - fd = newMeteredConn(fd, true, addr) + fd = newMeteredConn(fd) + serveMeter.Mark(1) srv.log.Trace("Accepted connection", "addr", fd.RemoteAddr()) } go func() { @@ -939,6 +945,9 @@ func (srv *Server) SetupConn(fd net.Conn, flags connFlag, dialDest *enode.Node) err := srv.setupConn(c, flags, dialDest) if err != nil { + if !c.is(inboundConn) { + markDialError(err) + } c.close(err) } return err @@ -957,7 +966,7 @@ func (srv *Server) setupConn(c *conn, flags connFlag, dialDest *enode.Node) erro if dialDest != nil { dialPubkey := new(ecdsa.PublicKey) if err := dialDest.Load((*enode.Secp256k1)(dialPubkey)); err != nil { - err = errors.New("dial destination doesn't have a secp256k1 public key") + err = fmt.Errorf("%w: dial destination doesn't have a secp256k1 public key", errEncHandshakeError) srv.log.Trace("Setting up connection failed", "addr", c.fd.RemoteAddr(), "conn", c.flags, "err", err) return err } @@ -967,7 +976,7 @@ func (srv *Server) setupConn(c *conn, flags connFlag, dialDest *enode.Node) erro remotePubkey, err := c.doEncHandshake(srv.PrivateKey) if err != nil { srv.log.Trace("Failed RLPx handshake", "addr", c.fd.RemoteAddr(), "conn", c.flags, "err", err) - return err + return fmt.Errorf("%w: %v", errEncHandshakeError, err) } if dialDest != nil { c.node = dialDest @@ -985,7 +994,7 @@ func (srv *Server) setupConn(c *conn, flags connFlag, dialDest *enode.Node) erro phs, err := c.doProtoHandshake(srv.ourHandshake) if err != nil { clog.Trace("Failed p2p handshake", "err", err) - return err + return fmt.Errorf("%w: %v", errProtoHandshakeError, err) } if id := c.node.ID(); !bytes.Equal(crypto.Keccak256(phs.ID), id[:]) { clog.Trace("Wrong devp2p handshake identity", "phsid", hex.EncodeToString(phs.ID)) diff --git a/p2p/server_test.go b/p2p/server_test.go index f6f5700c5e..c8bf4c941c 100644 --- a/p2p/server_test.go +++ b/p2p/server_test.go @@ -370,8 +370,6 @@ func TestServerSetupConn(t *testing.T) { clientkey, srvkey = newkey(), newkey() clientpub = &clientkey.PublicKey srvpub = &srvkey.PublicKey - fooErr = errors.New("foo") - readErr = errors.New("read error") ) tests := []struct { dontstart bool @@ -389,10 +387,10 @@ func TestServerSetupConn(t *testing.T) { wantCloseErr: errServerStopped, }, { - tt: &setupTransport{pubkey: clientpub, encHandshakeErr: readErr}, + tt: &setupTransport{pubkey: clientpub, encHandshakeErr: errEncHandshakeError}, flags: inboundConn, wantCalls: "doEncHandshake,close,", - wantCloseErr: readErr, + wantCloseErr: errEncHandshakeError, }, { tt: &setupTransport{pubkey: clientpub, phs: protoHandshake{ID: randomID().Bytes()}}, @@ -402,11 +400,11 @@ func TestServerSetupConn(t *testing.T) { wantCloseErr: DiscUnexpectedIdentity, }, { - tt: &setupTransport{pubkey: clientpub, protoHandshakeErr: fooErr}, + tt: &setupTransport{pubkey: clientpub, protoHandshakeErr: errProtoHandshakeError}, dialDest: enode.NewV4(clientpub, nil, 0, 0), flags: dynDialedConn, wantCalls: "doEncHandshake,doProtoHandshake,close,", - wantCloseErr: fooErr, + wantCloseErr: errProtoHandshakeError, }, { tt: &setupTransport{pubkey: srvpub, phs: protoHandshake{ID: crypto.FromECDSAPub(srvpub)[1:]}},