From 94e4aa6ea9aabb5bf6244d9b38607b336703af98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Fri, 5 Jun 2015 11:53:46 +0300 Subject: [PATCH 1/2] eth/downloader: log hard timeouts and reset capacity --- eth/downloader/downloader.go | 1 + eth/downloader/peer.go | 3 +++ 2 files changed, 4 insertions(+) diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index d8dbef726d..626f47b7be 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -460,6 +460,7 @@ out: // 3) Amount and availability. if peer := d.peers.Peer(pid); peer != nil { peer.Demote() + glog.V(logger.Detail).Infof("%s: block delivery timeout", peer) } } // After removing bad peers make sure we actually have sufficient peer left to keep downloading diff --git a/eth/downloader/peer.go b/eth/downloader/peer.go index 43b50079b4..5fbc64648f 100644 --- a/eth/downloader/peer.go +++ b/eth/downloader/peer.go @@ -87,6 +87,9 @@ func (p *peer) SetIdle() { scale := 2.0 if time.Since(p.started) > blockSoftTTL { scale = 0.5 + if time.Since(p.started) > blockHardTTL { + scale = 1 / float64(MaxBlockFetch) // reduces capacity to 1 + } } for { // Calculate the new download bandwidth allowance From 328ef60b856f75bf664fb103bc54674d962bef2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Fri, 5 Jun 2015 12:37:48 +0300 Subject: [PATCH 2/2] eth/downloader: differentiate stale and nonexistent deliveries --- eth/downloader/downloader.go | 50 ++++++++++++++++++++++++------------ eth/downloader/queue.go | 15 ++++++++--- 2 files changed, 46 insertions(+), 19 deletions(-) diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index 626f47b7be..cc1ceb6377 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -422,28 +422,46 @@ out: // in a reasonable time frame, ignore it's message. if peer := d.peers.Peer(blockPack.peerId); peer != nil { // Deliver the received chunk of blocks, and demote in case of errors - if err := d.queue.Deliver(blockPack.peerId, blockPack.blocks); err != nil { - if err == ErrInvalidChain { - // The hash chain is invalid (blocks are not ordered properly), abort - return err + err := d.queue.Deliver(blockPack.peerId, blockPack.blocks) + switch err { + case nil: + // If no blocks were delivered, demote the peer (need the delivery above) + if len(blockPack.blocks) == 0 { + peer.Demote() + peer.SetIdle() + glog.V(logger.Detail).Infof("%s: no blocks delivered", peer) + break } - // Peer did deliver, but some blocks were off, penalize + // All was successful, promote the peer + peer.Promote() + peer.SetIdle() + glog.V(logger.Detail).Infof("%s: delivered %d blocks", peer, len(blockPack.blocks)) + + case ErrInvalidChain: + // The hash chain is invalid (blocks are not ordered properly), abort + return err + + case errNoFetchesPending: + // Peer probably timed out with its delivery but came through + // in the end, demote, but allow to to pull from this peer. peer.Demote() peer.SetIdle() - glog.V(logger.Detail).Infof("%s: block delivery failed: %v", peer, err) - break - } - // If no blocks were delivered, demote the peer (above code is needed to mark the packet done!) - if len(blockPack.blocks) == 0 { + glog.V(logger.Detail).Infof("%s: out of bound delivery", peer) + + case errStaleDelivery: + // Delivered something completely else than requested, usually + // caused by a timeout and delivery during a new sync cycle. + // Don't set it to idle as the original request should still be + // in flight. + peer.Demote() + glog.V(logger.Detail).Infof("%s: stale delivery", peer) + + default: + // Peer did something semi-useful, demote but keep it around peer.Demote() peer.SetIdle() - glog.V(logger.Detail).Infof("%s: no blocks delivered", peer) - break + glog.V(logger.Detail).Infof("%s: delivery partially failed: %v", peer, err) } - // All was successful, promote the peer - peer.Promote() - peer.SetIdle() - glog.V(logger.Detail).Infof("%s: delivered %d blocks", peer, len(blockPack.blocks)) } case <-ticker.C: // Check for bad peers. Bad peers may indicate a peer not responding diff --git a/eth/downloader/queue.go b/eth/downloader/queue.go index 02fa667f19..671ffe51bb 100644 --- a/eth/downloader/queue.go +++ b/eth/downloader/queue.go @@ -20,6 +20,11 @@ const ( blockCacheLimit = 8 * MaxBlockFetch // Maximum number of blocks to cache before throttling the download ) +var ( + errNoFetchesPending = errors.New("no fetches pending") + errStaleDelivery = errors.New("stale delivery") +) + // fetchRequest is a currently running block retrieval operation. type fetchRequest struct { Peer *peer // Peer to which the request was sent @@ -293,7 +298,7 @@ func (q *queue) Deliver(id string, blocks []*types.Block) (err error) { // Short circuit if the blocks were never requested request := q.pendPool[id] if request == nil { - return errors.New("no fetches pending") + return errNoFetchesPending } delete(q.pendPool, id) @@ -309,7 +314,7 @@ func (q *queue) Deliver(id string, blocks []*types.Block) (err error) { // Skip any blocks that were not requested hash := block.Hash() if _, ok := request.Hashes[hash]; !ok { - errs = append(errs, fmt.Errorf("non-requested block %v", hash)) + errs = append(errs, fmt.Errorf("non-requested block %x", hash)) continue } // If a requested block falls out of the range, the hash chain is invalid @@ -326,11 +331,15 @@ func (q *queue) Deliver(id string, blocks []*types.Block) (err error) { delete(q.hashPool, hash) q.blockPool[hash] = int(block.NumberU64()) } - // Return all failed fetches to the queue + // Return all failed or missing fetches to the queue for hash, index := range request.Hashes { q.hashQueue.Push(hash, float32(index)) } + // If none of the blocks were good, it's a stale delivery if len(errs) != 0 { + if len(errs) == len(blocks) { + return errStaleDelivery + } return fmt.Errorf("multiple failures: %v", errs) } return nil