7473c93668
We don't have a UDP which specifies any messages that will be 4KB. Aside from being implemented for months and a necessity for encryption and piggy-backing packets, 1280bytes is ideal, and, means this TODO can be completed! Why 1280 bytes? * It's less than the default MTU for most WAN/LAN networks. That means fewer fragmented datagrams (esp on well-connected networks). * Fragmented datagrams and dropped packets suck and add latency while OS waits for a dropped fragment to never arrive (blocking readLoop()) * Most of our packets are < 1280 bytes. * 1280 bytes is minimum datagram size and MTU for IPv6 -- on IPv6, a datagram < 1280bytes will *never* be fragmented. UDP datagrams are dropped. A lot! And fragmented datagrams are worse. If a datagram has a 30% chance of being dropped, then a fragmented datagram has a 60% chance of being dropped. More importantly, we have signed packets and can't do anything with a packet unless we receive the entire datagram because the signature can't be verified. The same is true when we have encrypted packets. So the solution here to picking an ideal buffer size for receiving datagrams is a number under 1400bytes. And the lower-bound value for IPv6 of 1280 bytes make's it a non-decision. On IPv4 most ISPs and 3g/4g/let networks have an MTU just over 1400 -- and *never* over 1500. Never -- that means packets over 1500 (in reality: ~1450) bytes are fragmented. And probably dropped a lot. Just to prove the point, here are pings sending non-fragmented packets over wifi/ISP, and a second set of pings via cell-phone tethering. It's important to note that, if *any* router between my system and the EC2 node has a lower MTU, the message would not go through: On wifi w/normal ISP: localhost:Debug $ ping -D -s 1450 52.6.250.242 PING 52.6.250.242 (52.6.250.242): 1450 data bytes 1458 bytes from 52.6.250.242: icmp_seq=0 ttl=42 time=104.831 ms 1458 bytes from 52.6.250.242: icmp_seq=1 ttl=42 time=119.004 ms ^C --- 52.6.250.242 ping statistics --- 2 packets transmitted, 2 packets received, 0.0% packet loss round-trip min/avg/max/stddev = 104.831/111.918/119.004/7.087 ms localhost:Debug $ ping -D -s 1480 52.6.250.242 PING 52.6.250.242 (52.6.250.242): 1480 data bytes ping: sendto: Message too long ping: sendto: Message too long Request timeout for icmp_seq 0 ping: sendto: Message too long Request timeout for icmp_seq 1 Tethering to O2: localhost:Debug $ ping -D -s 1480 52.6.250.242 PING 52.6.250.242 (52.6.250.242): 1480 data bytes ping: sendto: Message too long ping: sendto: Message too long Request timeout for icmp_seq 0 ^C --- 52.6.250.242 ping statistics --- 2 packets transmitted, 0 packets received, 100.0% packet loss localhost:Debug $ ping -D -s 1450 52.6.250.242 PING 52.6.250.242 (52.6.250.242): 1450 data bytes 1458 bytes from 52.6.250.242: icmp_seq=0 ttl=42 time=107.844 ms 1458 bytes from 52.6.250.242: icmp_seq=1 ttl=42 time=105.127 ms 1458 bytes from 52.6.250.242: icmp_seq=2 ttl=42 time=120.483 ms 1458 bytes from 52.6.250.242: icmp_seq=3 ttl=42 time=102.136 ms
532 lines
14 KiB
Go
532 lines
14 KiB
Go
package discover
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/ecdsa"
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"time"
|
|
|
|
"github.com/ethereum/go-ethereum/crypto"
|
|
"github.com/ethereum/go-ethereum/logger"
|
|
"github.com/ethereum/go-ethereum/logger/glog"
|
|
"github.com/ethereum/go-ethereum/p2p/nat"
|
|
"github.com/ethereum/go-ethereum/rlp"
|
|
)
|
|
|
|
const Version = 4
|
|
|
|
// Errors
|
|
var (
|
|
errPacketTooSmall = errors.New("too small")
|
|
errBadHash = errors.New("bad hash")
|
|
errExpired = errors.New("expired")
|
|
errBadVersion = errors.New("version mismatch")
|
|
errUnsolicitedReply = errors.New("unsolicited reply")
|
|
errUnknownNode = errors.New("unknown node")
|
|
errTimeout = errors.New("RPC timeout")
|
|
errClosed = errors.New("socket closed")
|
|
)
|
|
|
|
// Timeouts
|
|
const (
|
|
respTimeout = 500 * time.Millisecond
|
|
sendTimeout = 500 * time.Millisecond
|
|
expiration = 20 * time.Second
|
|
|
|
refreshInterval = 1 * time.Hour
|
|
)
|
|
|
|
// RPC packet types
|
|
const (
|
|
pingPacket = iota + 1 // zero is 'reserved'
|
|
pongPacket
|
|
findnodePacket
|
|
neighborsPacket
|
|
)
|
|
|
|
// RPC request structures
|
|
type (
|
|
ping struct {
|
|
Version uint
|
|
From, To rpcEndpoint
|
|
Expiration uint64
|
|
}
|
|
|
|
// pong is the reply to ping.
|
|
pong struct {
|
|
// This field should mirror the UDP envelope address
|
|
// of the ping packet, which provides a way to discover the
|
|
// the external address (after NAT).
|
|
To rpcEndpoint
|
|
|
|
ReplyTok []byte // This contains the hash of the ping packet.
|
|
Expiration uint64 // Absolute timestamp at which the packet becomes invalid.
|
|
}
|
|
|
|
// findnode is a query for nodes close to the given target.
|
|
findnode struct {
|
|
Target NodeID // doesn't need to be an actual public key
|
|
Expiration uint64
|
|
}
|
|
|
|
// reply to findnode
|
|
neighbors struct {
|
|
Nodes []rpcNode
|
|
Expiration uint64
|
|
}
|
|
|
|
rpcNode struct {
|
|
IP net.IP // len 4 for IPv4 or 16 for IPv6
|
|
UDP uint16 // for discovery protocol
|
|
TCP uint16 // for RLPx protocol
|
|
ID NodeID
|
|
}
|
|
|
|
rpcEndpoint struct {
|
|
IP net.IP // len 4 for IPv4 or 16 for IPv6
|
|
UDP uint16 // for discovery protocol
|
|
TCP uint16 // for RLPx protocol
|
|
}
|
|
)
|
|
|
|
func makeEndpoint(addr *net.UDPAddr, tcpPort uint16) rpcEndpoint {
|
|
ip := addr.IP.To4()
|
|
if ip == nil {
|
|
ip = addr.IP.To16()
|
|
}
|
|
return rpcEndpoint{IP: ip, UDP: uint16(addr.Port), TCP: tcpPort}
|
|
}
|
|
|
|
func nodeFromRPC(rn rpcNode) (n *Node, valid bool) {
|
|
// TODO: don't accept localhost, LAN addresses from internet hosts
|
|
// TODO: check public key is on secp256k1 curve
|
|
if rn.IP.IsMulticast() || rn.IP.IsUnspecified() || rn.UDP == 0 {
|
|
return nil, false
|
|
}
|
|
return newNode(rn.ID, rn.IP, rn.UDP, rn.TCP), true
|
|
}
|
|
|
|
func nodeToRPC(n *Node) rpcNode {
|
|
return rpcNode{ID: n.ID, IP: n.IP, UDP: n.UDP, TCP: n.TCP}
|
|
}
|
|
|
|
type packet interface {
|
|
handle(t *udp, from *net.UDPAddr, fromID NodeID, mac []byte) error
|
|
}
|
|
|
|
type conn interface {
|
|
ReadFromUDP(b []byte) (n int, addr *net.UDPAddr, err error)
|
|
WriteToUDP(b []byte, addr *net.UDPAddr) (n int, err error)
|
|
Close() error
|
|
LocalAddr() net.Addr
|
|
}
|
|
|
|
// udp implements the RPC protocol.
|
|
type udp struct {
|
|
conn conn
|
|
priv *ecdsa.PrivateKey
|
|
ourEndpoint rpcEndpoint
|
|
|
|
addpending chan *pending
|
|
gotreply chan reply
|
|
|
|
closing chan struct{}
|
|
nat nat.Interface
|
|
|
|
*Table
|
|
}
|
|
|
|
// pending represents a pending reply.
|
|
//
|
|
// some implementations of the protocol wish to send more than one
|
|
// reply packet to findnode. in general, any neighbors packet cannot
|
|
// be matched up with a specific findnode packet.
|
|
//
|
|
// our implementation handles this by storing a callback function for
|
|
// each pending reply. incoming packets from a node are dispatched
|
|
// to all the callback functions for that node.
|
|
type pending struct {
|
|
// these fields must match in the reply.
|
|
from NodeID
|
|
ptype byte
|
|
|
|
// time when the request must complete
|
|
deadline time.Time
|
|
|
|
// callback is called when a matching reply arrives. if it returns
|
|
// true, the callback is removed from the pending reply queue.
|
|
// if it returns false, the reply is considered incomplete and
|
|
// the callback will be invoked again for the next matching reply.
|
|
callback func(resp interface{}) (done bool)
|
|
|
|
// errc receives nil when the callback indicates completion or an
|
|
// error if no further reply is received within the timeout.
|
|
errc chan<- error
|
|
}
|
|
|
|
type reply struct {
|
|
from NodeID
|
|
ptype byte
|
|
data interface{}
|
|
// loop indicates whether there was
|
|
// a matching request by sending on this channel.
|
|
matched chan<- bool
|
|
}
|
|
|
|
// ListenUDP returns a new table that listens for UDP packets on laddr.
|
|
func ListenUDP(priv *ecdsa.PrivateKey, laddr string, natm nat.Interface, nodeDBPath string) (*Table, error) {
|
|
addr, err := net.ResolveUDPAddr("udp", laddr)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
conn, err := net.ListenUDP("udp", addr)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
tab, _ := newUDP(priv, conn, natm, nodeDBPath)
|
|
glog.V(logger.Info).Infoln("Listening,", tab.self)
|
|
return tab, nil
|
|
}
|
|
|
|
func newUDP(priv *ecdsa.PrivateKey, c conn, natm nat.Interface, nodeDBPath string) (*Table, *udp) {
|
|
udp := &udp{
|
|
conn: c,
|
|
priv: priv,
|
|
closing: make(chan struct{}),
|
|
gotreply: make(chan reply),
|
|
addpending: make(chan *pending),
|
|
}
|
|
realaddr := c.LocalAddr().(*net.UDPAddr)
|
|
if natm != nil {
|
|
if !realaddr.IP.IsLoopback() {
|
|
go nat.Map(natm, udp.closing, "udp", realaddr.Port, realaddr.Port, "ethereum discovery")
|
|
}
|
|
// TODO: react to external IP changes over time.
|
|
if ext, err := natm.ExternalIP(); err == nil {
|
|
realaddr = &net.UDPAddr{IP: ext, Port: realaddr.Port}
|
|
}
|
|
}
|
|
// TODO: separate TCP port
|
|
udp.ourEndpoint = makeEndpoint(realaddr, uint16(realaddr.Port))
|
|
udp.Table = newTable(udp, PubkeyID(&priv.PublicKey), realaddr, nodeDBPath)
|
|
go udp.loop()
|
|
go udp.readLoop()
|
|
return udp.Table, udp
|
|
}
|
|
|
|
func (t *udp) close() {
|
|
close(t.closing)
|
|
t.conn.Close()
|
|
// TODO: wait for the loops to end.
|
|
}
|
|
|
|
// ping sends a ping message to the given node and waits for a reply.
|
|
func (t *udp) ping(toid NodeID, toaddr *net.UDPAddr) error {
|
|
// TODO: maybe check for ReplyTo field in callback to measure RTT
|
|
errc := t.pending(toid, pongPacket, func(interface{}) bool { return true })
|
|
t.send(toaddr, pingPacket, ping{
|
|
Version: Version,
|
|
From: t.ourEndpoint,
|
|
To: makeEndpoint(toaddr, 0), // TODO: maybe use known TCP port from DB
|
|
Expiration: uint64(time.Now().Add(expiration).Unix()),
|
|
})
|
|
return <-errc
|
|
}
|
|
|
|
func (t *udp) waitping(from NodeID) error {
|
|
return <-t.pending(from, pingPacket, func(interface{}) bool { return true })
|
|
}
|
|
|
|
// findnode sends a findnode request to the given node and waits until
|
|
// the node has sent up to k neighbors.
|
|
func (t *udp) findnode(toid NodeID, toaddr *net.UDPAddr, target NodeID) ([]*Node, error) {
|
|
nodes := make([]*Node, 0, bucketSize)
|
|
nreceived := 0
|
|
errc := t.pending(toid, neighborsPacket, func(r interface{}) bool {
|
|
reply := r.(*neighbors)
|
|
for _, rn := range reply.Nodes {
|
|
nreceived++
|
|
if n, valid := nodeFromRPC(rn); valid {
|
|
nodes = append(nodes, n)
|
|
}
|
|
}
|
|
return nreceived >= bucketSize
|
|
})
|
|
t.send(toaddr, findnodePacket, findnode{
|
|
Target: target,
|
|
Expiration: uint64(time.Now().Add(expiration).Unix()),
|
|
})
|
|
err := <-errc
|
|
return nodes, err
|
|
}
|
|
|
|
// pending adds a reply callback to the pending reply queue.
|
|
// see the documentation of type pending for a detailed explanation.
|
|
func (t *udp) pending(id NodeID, ptype byte, callback func(interface{}) bool) <-chan error {
|
|
ch := make(chan error, 1)
|
|
p := &pending{from: id, ptype: ptype, callback: callback, errc: ch}
|
|
select {
|
|
case t.addpending <- p:
|
|
// loop will handle it
|
|
case <-t.closing:
|
|
ch <- errClosed
|
|
}
|
|
return ch
|
|
}
|
|
|
|
func (t *udp) handleReply(from NodeID, ptype byte, req packet) bool {
|
|
matched := make(chan bool)
|
|
select {
|
|
case t.gotreply <- reply{from, ptype, req, matched}:
|
|
// loop will handle it
|
|
return <-matched
|
|
case <-t.closing:
|
|
return false
|
|
}
|
|
}
|
|
|
|
// loop runs in its own goroutin. it keeps track of
|
|
// the refresh timer and the pending reply queue.
|
|
func (t *udp) loop() {
|
|
var (
|
|
pending []*pending
|
|
nextDeadline time.Time
|
|
timeout = time.NewTimer(0)
|
|
refresh = time.NewTicker(refreshInterval)
|
|
)
|
|
<-timeout.C // ignore first timeout
|
|
defer refresh.Stop()
|
|
defer timeout.Stop()
|
|
|
|
rearmTimeout := func() {
|
|
now := time.Now()
|
|
if len(pending) == 0 || now.Before(nextDeadline) {
|
|
return
|
|
}
|
|
nextDeadline = pending[0].deadline
|
|
timeout.Reset(nextDeadline.Sub(now))
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case <-refresh.C:
|
|
go t.refresh()
|
|
|
|
case <-t.closing:
|
|
for _, p := range pending {
|
|
p.errc <- errClosed
|
|
}
|
|
pending = nil
|
|
return
|
|
|
|
case p := <-t.addpending:
|
|
p.deadline = time.Now().Add(respTimeout)
|
|
pending = append(pending, p)
|
|
rearmTimeout()
|
|
|
|
case r := <-t.gotreply:
|
|
var matched bool
|
|
for i := 0; i < len(pending); i++ {
|
|
if p := pending[i]; p.from == r.from && p.ptype == r.ptype {
|
|
matched = true
|
|
if p.callback(r.data) {
|
|
// callback indicates the request is done, remove it.
|
|
p.errc <- nil
|
|
copy(pending[i:], pending[i+1:])
|
|
pending = pending[:len(pending)-1]
|
|
i--
|
|
}
|
|
}
|
|
}
|
|
r.matched <- matched
|
|
|
|
case now := <-timeout.C:
|
|
// notify and remove callbacks whose deadline is in the past.
|
|
i := 0
|
|
for ; i < len(pending) && now.After(pending[i].deadline); i++ {
|
|
pending[i].errc <- errTimeout
|
|
}
|
|
if i > 0 {
|
|
copy(pending, pending[i:])
|
|
pending = pending[:len(pending)-i]
|
|
}
|
|
rearmTimeout()
|
|
}
|
|
}
|
|
}
|
|
|
|
const (
|
|
macSize = 256 / 8
|
|
sigSize = 520 / 8
|
|
headSize = macSize + sigSize // space of packet frame data
|
|
)
|
|
|
|
var headSpace = make([]byte, headSize)
|
|
|
|
func (t *udp) send(toaddr *net.UDPAddr, ptype byte, req interface{}) error {
|
|
packet, err := encodePacket(t.priv, ptype, req)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
glog.V(logger.Detail).Infof(">>> %v %T\n", toaddr, req)
|
|
if _, err = t.conn.WriteToUDP(packet, toaddr); err != nil {
|
|
glog.V(logger.Detail).Infoln("UDP send failed:", err)
|
|
}
|
|
return err
|
|
}
|
|
|
|
func encodePacket(priv *ecdsa.PrivateKey, ptype byte, req interface{}) ([]byte, error) {
|
|
b := new(bytes.Buffer)
|
|
b.Write(headSpace)
|
|
b.WriteByte(ptype)
|
|
if err := rlp.Encode(b, req); err != nil {
|
|
glog.V(logger.Error).Infoln("error encoding packet:", err)
|
|
return nil, err
|
|
}
|
|
packet := b.Bytes()
|
|
sig, err := crypto.Sign(crypto.Sha3(packet[headSize:]), priv)
|
|
if err != nil {
|
|
glog.V(logger.Error).Infoln("could not sign packet:", err)
|
|
return nil, err
|
|
}
|
|
copy(packet[macSize:], sig)
|
|
// add the hash to the front. Note: this doesn't protect the
|
|
// packet in any way. Our public key will be part of this hash in
|
|
// The future.
|
|
copy(packet, crypto.Sha3(packet[macSize:]))
|
|
return packet, nil
|
|
}
|
|
|
|
// readLoop runs in its own goroutine. it handles incoming UDP packets.
|
|
func (t *udp) readLoop() {
|
|
defer t.conn.Close()
|
|
buf := make([]byte, 1280)
|
|
for {
|
|
nbytes, from, err := t.conn.ReadFromUDP(buf)
|
|
if err != nil {
|
|
return
|
|
}
|
|
t.handlePacket(from, buf[:nbytes])
|
|
}
|
|
}
|
|
|
|
func (t *udp) handlePacket(from *net.UDPAddr, buf []byte) error {
|
|
packet, fromID, hash, err := decodePacket(buf)
|
|
if err != nil {
|
|
glog.V(logger.Debug).Infof("Bad packet from %v: %v\n", from, err)
|
|
return err
|
|
}
|
|
status := "ok"
|
|
if err = packet.handle(t, from, fromID, hash); err != nil {
|
|
status = err.Error()
|
|
}
|
|
glog.V(logger.Detail).Infof("<<< %v %T: %s\n", from, packet, status)
|
|
return err
|
|
}
|
|
|
|
func decodePacket(buf []byte) (packet, NodeID, []byte, error) {
|
|
if len(buf) < headSize+1 {
|
|
return nil, NodeID{}, nil, errPacketTooSmall
|
|
}
|
|
hash, sig, sigdata := buf[:macSize], buf[macSize:headSize], buf[headSize:]
|
|
shouldhash := crypto.Sha3(buf[macSize:])
|
|
if !bytes.Equal(hash, shouldhash) {
|
|
return nil, NodeID{}, nil, errBadHash
|
|
}
|
|
fromID, err := recoverNodeID(crypto.Sha3(buf[headSize:]), sig)
|
|
if err != nil {
|
|
return nil, NodeID{}, hash, err
|
|
}
|
|
var req packet
|
|
switch ptype := sigdata[0]; ptype {
|
|
case pingPacket:
|
|
req = new(ping)
|
|
case pongPacket:
|
|
req = new(pong)
|
|
case findnodePacket:
|
|
req = new(findnode)
|
|
case neighborsPacket:
|
|
req = new(neighbors)
|
|
default:
|
|
return nil, fromID, hash, fmt.Errorf("unknown type: %d", ptype)
|
|
}
|
|
err = rlp.DecodeBytes(sigdata[1:], req)
|
|
return req, fromID, hash, err
|
|
}
|
|
|
|
func (req *ping) handle(t *udp, from *net.UDPAddr, fromID NodeID, mac []byte) error {
|
|
if expired(req.Expiration) {
|
|
return errExpired
|
|
}
|
|
if req.Version != Version {
|
|
return errBadVersion
|
|
}
|
|
t.send(from, pongPacket, pong{
|
|
To: makeEndpoint(from, req.From.TCP),
|
|
ReplyTok: mac,
|
|
Expiration: uint64(time.Now().Add(expiration).Unix()),
|
|
})
|
|
if !t.handleReply(fromID, pingPacket, req) {
|
|
// Note: we're ignoring the provided IP address right now
|
|
go t.bond(true, fromID, from, req.From.TCP)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (req *pong) handle(t *udp, from *net.UDPAddr, fromID NodeID, mac []byte) error {
|
|
if expired(req.Expiration) {
|
|
return errExpired
|
|
}
|
|
if !t.handleReply(fromID, pongPacket, req) {
|
|
return errUnsolicitedReply
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (req *findnode) handle(t *udp, from *net.UDPAddr, fromID NodeID, mac []byte) error {
|
|
if expired(req.Expiration) {
|
|
return errExpired
|
|
}
|
|
if t.db.node(fromID) == nil {
|
|
// No bond exists, we don't process the packet. This prevents
|
|
// an attack vector where the discovery protocol could be used
|
|
// to amplify traffic in a DDOS attack. A malicious actor
|
|
// would send a findnode request with the IP address and UDP
|
|
// port of the target as the source address. The recipient of
|
|
// the findnode packet would then send a neighbors packet
|
|
// (which is a much bigger packet than findnode) to the victim.
|
|
return errUnknownNode
|
|
}
|
|
target := crypto.Sha3Hash(req.Target[:])
|
|
t.mutex.Lock()
|
|
closest := t.closest(target, bucketSize).entries
|
|
t.mutex.Unlock()
|
|
|
|
// TODO: this conversion could use a cached version of the slice
|
|
closestrpc := make([]rpcNode, len(closest))
|
|
for i, n := range closest {
|
|
closestrpc[i] = nodeToRPC(n)
|
|
}
|
|
t.send(from, neighborsPacket, neighbors{
|
|
Nodes: closestrpc,
|
|
Expiration: uint64(time.Now().Add(expiration).Unix()),
|
|
})
|
|
return nil
|
|
}
|
|
|
|
func (req *neighbors) handle(t *udp, from *net.UDPAddr, fromID NodeID, mac []byte) error {
|
|
if expired(req.Expiration) {
|
|
return errExpired
|
|
}
|
|
if !t.handleReply(fromID, neighborsPacket, req) {
|
|
return errUnsolicitedReply
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func expired(ts uint64) bool {
|
|
return time.Unix(int64(ts), 0).Before(time.Now())
|
|
}
|