Stream subscriptions (#18355)

* swarm/network: eachBin now starts at kaddepth for nn

* swarm/network: fix Kademlia.EachBin

* swarm/network: fix kademlia.EachBin

* swarm/network: correct EachBin implementation according to requirements

* swarm/network: less addresses simplified tests

* swarm: calc kad depth outside loop in EachBin test

* swarm/network: removed printResults

* swarm/network: cleanup imports

* swarm/network: remove kademlia.EachBin; fix RequestSubscriptions and add unit test

* swarm/network/stream: address PR comments

* swarm/network/stream: package-wide subscriptionFunc

* swarm/network/stream: refactor to kad.EachConn
This commit is contained in:
holisticode 2019-01-11 09:08:09 -05:00 committed by Viktor Trón
parent d5cad488be
commit 88168ff5c5
5 changed files with 234 additions and 320 deletions

@ -356,37 +356,6 @@ func (k *Kademlia) Off(p *Peer) {
}
}
// EachBin is a two level nested iterator
// The outer iterator returns all bins that have known peers, in order from shallowest to deepest
// The inner iterator returns all peers per bin returned by the outer iterator, in no defined order
// TODO the po returned by the inner iterator is not reliable. However, it is not being used in this method
func (k *Kademlia) EachBin(base []byte, pof pot.Pof, o int, eachBinFunc func(conn *Peer, po int) bool) {
k.lock.RLock()
defer k.lock.RUnlock()
var startPo int
var endPo int
kadDepth := depthForPot(k.conns, k.NeighbourhoodSize, k.base)
k.conns.EachBin(base, Pof, o, func(po, size int, f func(func(val pot.Val, i int) bool) bool) bool {
if startPo > 0 && endPo != k.MaxProxDisplay {
startPo = endPo + 1
}
if po < kadDepth {
endPo = po
} else {
endPo = k.MaxProxDisplay
}
for bin := startPo; bin <= endPo; bin++ {
f(func(val pot.Val, _ int) bool {
return eachBinFunc(val.(*Peer), bin)
})
}
return true
})
}
// EachConn is an iterator with args (base, po, f) applies f to each live peer
// that has proximity order po or less as measured from the base
// if base is nil, kademlia base address is used

@ -1,4 +1,4 @@
// Copyright 2017 The go-ethereum Authors
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify

@ -106,43 +106,6 @@ func TestSyncingViaGlobalSync(t *testing.T) {
}
}
func TestSyncingViaDirectSubscribe(t *testing.T) {
if runtime.GOOS == "darwin" && os.Getenv("TRAVIS") == "true" {
t.Skip("Flaky on mac on travis")
}
//if nodes/chunks have been provided via commandline,
//run the tests with these values
if *nodes != 0 && *chunks != 0 {
log.Info(fmt.Sprintf("Running test with %d chunks and %d nodes...", *chunks, *nodes))
err := testSyncingViaDirectSubscribe(t, *chunks, *nodes)
if err != nil {
t.Fatal(err)
}
} else {
var nodeCnt []int
var chnkCnt []int
//if the `longrunning` flag has been provided
//run more test combinations
if *longrunning {
chnkCnt = []int{1, 8, 32, 256, 1024}
nodeCnt = []int{32, 16}
} else {
//default test
chnkCnt = []int{4, 32}
nodeCnt = []int{32, 16}
}
for _, chnk := range chnkCnt {
for _, n := range nodeCnt {
log.Info(fmt.Sprintf("Long running test with %d chunks and %d nodes...", chnk, n))
err := testSyncingViaDirectSubscribe(t, chnk, n)
if err != nil {
t.Fatal(err)
}
}
}
}
}
var simServiceMap = map[string]simulation.ServiceFunc{
"streamer": streamerFunc,
}
@ -323,235 +286,6 @@ func runSim(conf *synctestConfig, ctx context.Context, sim *simulation.Simulatio
})
}
/*
The test generates the given number of chunks
For every chunk generated, the nearest node addresses
are identified, we verify that the nodes closer to the
chunk addresses actually do have the chunks in their local stores.
The test loads a snapshot file to construct the swarm network,
assuming that the snapshot file identifies a healthy
kademlia network. The snapshot should have 'streamer' in its service list.
*/
func testSyncingViaDirectSubscribe(t *testing.T, chunkCount int, nodeCount int) error {
sim := simulation.New(map[string]simulation.ServiceFunc{
"streamer": func(ctx *adapters.ServiceContext, bucket *sync.Map) (s node.Service, cleanup func(), err error) {
n := ctx.Config.Node()
addr := network.NewAddr(n)
store, datadir, err := createTestLocalStorageForID(n.ID(), addr)
if err != nil {
return nil, nil, err
}
bucket.Store(bucketKeyStore, store)
localStore := store.(*storage.LocalStore)
netStore, err := storage.NewNetStore(localStore, nil)
if err != nil {
return nil, nil, err
}
kad := network.NewKademlia(addr.Over(), network.NewKadParams())
delivery := NewDelivery(kad, netStore)
netStore.NewNetFetcherFunc = network.NewFetcherFactory(dummyRequestFromPeers, true).New
r := NewRegistry(addr.ID(), delivery, netStore, state.NewInmemoryStore(), &RegistryOptions{
Retrieval: RetrievalDisabled,
Syncing: SyncingRegisterOnly,
}, nil)
bucket.Store(bucketKeyRegistry, r)
fileStore := storage.NewFileStore(netStore, storage.NewFileStoreParams())
bucket.Store(bucketKeyFileStore, fileStore)
cleanup = func() {
os.RemoveAll(datadir)
netStore.Close()
r.Close()
}
return r, cleanup, nil
},
})
defer sim.Close()
ctx, cancelSimRun := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancelSimRun()
conf := &synctestConfig{}
//map of discover ID to indexes of chunks expected at that ID
conf.idToChunksMap = make(map[enode.ID][]int)
//map of overlay address to discover ID
conf.addrToIDMap = make(map[string]enode.ID)
//array where the generated chunk hashes will be stored
conf.hashes = make([]storage.Address, 0)
err := sim.UploadSnapshot(fmt.Sprintf("testing/snapshot_%d.json", nodeCount))
if err != nil {
return err
}
if _, err := sim.WaitTillHealthy(ctx); err != nil {
return err
}
disconnections := sim.PeerEvents(
context.Background(),
sim.NodeIDs(),
simulation.NewPeerEventsFilter().Drop(),
)
var disconnected atomic.Value
go func() {
for d := range disconnections {
if d.Error != nil {
log.Error("peer drop", "node", d.NodeID, "peer", d.PeerID)
disconnected.Store(true)
}
}
}()
result := sim.Run(ctx, func(ctx context.Context, sim *simulation.Simulation) error {
nodeIDs := sim.UpNodeIDs()
for _, n := range nodeIDs {
//get the kademlia overlay address from this ID
a := n.Bytes()
//append it to the array of all overlay addresses
conf.addrs = append(conf.addrs, a)
//the proximity calculation is on overlay addr,
//the p2p/simulations check func triggers on enode.ID,
//so we need to know which overlay addr maps to which nodeID
conf.addrToIDMap[string(a)] = n
}
var subscriptionCount int
filter := simulation.NewPeerEventsFilter().ReceivedMessages().Protocol("stream").MsgCode(4)
eventC := sim.PeerEvents(ctx, nodeIDs, filter)
for j, node := range nodeIDs {
log.Trace(fmt.Sprintf("Start syncing subscriptions: %d", j))
//start syncing!
item, ok := sim.NodeItem(node, bucketKeyRegistry)
if !ok {
return fmt.Errorf("No registry")
}
registry := item.(*Registry)
var cnt int
cnt, err = startSyncing(registry, conf)
if err != nil {
return err
}
//increment the number of subscriptions we need to wait for
//by the count returned from startSyncing (SYNC subscriptions)
subscriptionCount += cnt
}
for e := range eventC {
if e.Error != nil {
return e.Error
}
subscriptionCount--
if subscriptionCount == 0 {
break
}
}
//select a random node for upload
node := sim.Net.GetRandomUpNode()
item, ok := sim.NodeItem(node.ID(), bucketKeyStore)
if !ok {
return fmt.Errorf("No localstore")
}
lstore := item.(*storage.LocalStore)
hashes, err := uploadFileToSingleNodeStore(node.ID(), chunkCount, lstore)
if err != nil {
return err
}
conf.hashes = append(conf.hashes, hashes...)
mapKeysToNodes(conf)
if _, err := sim.WaitTillHealthy(ctx); err != nil {
return err
}
var globalStore mock.GlobalStorer
if *useMockStore {
globalStore = mockmem.NewGlobalStore()
}
// File retrieval check is repeated until all uploaded files are retrieved from all nodes
// or until the timeout is reached.
REPEAT:
for {
for _, id := range nodeIDs {
//for each expected chunk, check if it is in the local store
localChunks := conf.idToChunksMap[id]
for _, ch := range localChunks {
//get the real chunk by the index in the index array
chunk := conf.hashes[ch]
log.Trace(fmt.Sprintf("node has chunk: %s:", chunk))
//check if the expected chunk is indeed in the localstore
var err error
if *useMockStore {
//use the globalStore if the mockStore should be used; in that case,
//the complete localStore stack is bypassed for getting the chunk
_, err = globalStore.Get(common.BytesToAddress(id.Bytes()), chunk)
} else {
//use the actual localstore
item, ok := sim.NodeItem(id, bucketKeyStore)
if !ok {
return fmt.Errorf("Error accessing localstore")
}
lstore := item.(*storage.LocalStore)
_, err = lstore.Get(ctx, chunk)
}
if err != nil {
log.Debug(fmt.Sprintf("Chunk %s NOT found for id %s", chunk, id))
// Do not get crazy with logging the warn message
time.Sleep(500 * time.Millisecond)
continue REPEAT
}
log.Debug(fmt.Sprintf("Chunk %s IS FOUND for id %s", chunk, id))
}
}
return nil
}
})
if result.Error != nil {
return result.Error
}
if yes, ok := disconnected.Load().(bool); ok && yes {
t.Fatal("disconnect events received")
}
log.Info("Simulation ended")
return nil
}
//the server func to start syncing
//issues `RequestSubscriptionMsg` to peers, based on po, by iterating over
//the kademlia's `EachBin` function.
//returns the number of subscriptions requested
func startSyncing(r *Registry, conf *synctestConfig) (int, error) {
var err error
kad := r.delivery.kad
subCnt := 0
//iterate over each bin and solicit needed subscription to bins
kad.EachBin(r.addr[:], pof, 0, func(conn *network.Peer, po int) bool {
//identify begin and start index of the bin(s) we want to subscribe to
subCnt++
err = r.RequestSubscription(conf.addrToIDMap[string(conn.Address())], NewStream("SYNC", FormatSyncBinKey(uint8(po)), true), NewRange(0, 0), High)
if err != nil {
log.Error(fmt.Sprintf("Error in RequestSubsciption! %v", err))
return false
}
return true
})
return subCnt, nil
}
//map chunk keys to addresses which are responsible
func mapKeysToNodes(conf *synctestConfig) {
nodemap := make(map[string][]int)

@ -33,7 +33,6 @@ import (
"github.com/ethereum/go-ethereum/swarm/log"
"github.com/ethereum/go-ethereum/swarm/network"
"github.com/ethereum/go-ethereum/swarm/network/stream/intervals"
"github.com/ethereum/go-ethereum/swarm/pot"
"github.com/ethereum/go-ethereum/swarm/state"
"github.com/ethereum/go-ethereum/swarm/storage"
)
@ -73,6 +72,11 @@ const (
RetrievalEnabled
)
// subscriptionFunc is used to determine what to do in order to perform subscriptions
// usually we would start to really subscribe to nodes, but for tests other functionality may be needed
// (see TestRequestPeerSubscriptions in streamer_test.go)
var subscriptionFunc func(r *Registry, p *network.Peer, bin uint8, subs map[enode.ID]map[Stream]struct{}) bool = doRequestSubscription
// Registry registry for outgoing and incoming streamer constructors
type Registry struct {
addr enode.ID
@ -88,9 +92,9 @@ type Registry struct {
intervalsStore state.Store
autoRetrieval bool // automatically subscribe to retrieve request stream
maxPeerServers int
balance protocols.Balance // implements protocols.Balance, for accounting
prices protocols.Prices // implements protocols.Prices, provides prices to accounting
spec *protocols.Spec // this protocol's spec
spec *protocols.Spec //this protocol's spec
balance protocols.Balance //implements protocols.Balance, for accounting
prices protocols.Prices //implements protocols.Prices, provides prices to accounting
}
// RegistryOptions holds optional values for NewRegistry constructor.
@ -125,6 +129,7 @@ func NewRegistry(localID enode.ID, delivery *Delivery, syncChunkStore storage.Sy
maxPeerServers: options.MaxPeerServers,
balance: balance,
}
streamer.setupSpec()
streamer.api = NewAPI(streamer)
@ -467,24 +472,8 @@ func (r *Registry) updateSyncing() {
}
r.peersMu.RUnlock()
// request subscriptions for all nodes and bins
kad.EachBin(r.addr[:], pot.DefaultPof(256), 0, func(p *network.Peer, bin int) bool {
log.Debug(fmt.Sprintf("Requesting subscription by: registry %s from peer %s for bin: %d", r.addr, p.ID(), bin))
// bin is always less then 256 and it is safe to convert it to type uint8
stream := NewStream("SYNC", FormatSyncBinKey(uint8(bin)), true)
if streams, ok := subs[p.ID()]; ok {
// delete live and history streams from the map, so that it won't be removed with a Quit request
delete(streams, stream)
delete(streams, getHistoryStream(stream))
}
err := r.RequestSubscription(p.ID(), stream, NewRange(0, 0), High)
if err != nil {
log.Debug("Request subscription", "err", err, "peer", p.ID(), "stream", stream)
return false
}
return true
})
// start requesting subscriptions from peers
r.requestPeerSubscriptions(kad, subs)
// remove SYNC servers that do not need to be subscribed
for id, streams := range subs {
@ -505,6 +494,66 @@ func (r *Registry) updateSyncing() {
}
}
// requestPeerSubscriptions calls on each live peer in the kademlia table
// and sends a `RequestSubscription` to peers according to their bin
// and their relationship with kademlia's depth.
// Also check `TestRequestPeerSubscriptions` in order to understand the
// expected behavior.
// The function expects:
// * the kademlia
// * a map of subscriptions
// * the actual function to subscribe
// (in case of the test, it doesn't do real subscriptions)
func (r *Registry) requestPeerSubscriptions(kad *network.Kademlia, subs map[enode.ID]map[Stream]struct{}) {
var startPo int
var endPo int
var ok bool
// kademlia's depth
kadDepth := kad.NeighbourhoodDepth()
// request subscriptions for all nodes and bins
// nil as base takes the node's base; we need to pass 255 as `EachConn` runs
// from deepest bins backwards
kad.EachConn(nil, 255, func(p *network.Peer, po int) bool {
//if the peer's bin is shallower than the kademlia depth,
//only the peer's bin should be subscribed
if po < kadDepth {
startPo = po
endPo = po
} else {
//if the peer's bin is equal or deeper than the kademlia depth,
//each bin from the depth up to k.MaxProxDisplay should be subscribed
startPo = kadDepth
endPo = kad.MaxProxDisplay
}
for bin := startPo; bin <= endPo; bin++ {
//do the actual subscription
ok = subscriptionFunc(r, p, uint8(bin), subs)
}
return ok
})
}
// doRequestSubscription sends the actual RequestSubscription to the peer
func doRequestSubscription(r *Registry, p *network.Peer, bin uint8, subs map[enode.ID]map[Stream]struct{}) bool {
log.Debug("Requesting subscription by registry:", "registry", r.addr, "peer", p.ID(), "bin", bin)
// bin is always less then 256 and it is safe to convert it to type uint8
stream := NewStream("SYNC", FormatSyncBinKey(bin), true)
if streams, ok := subs[p.ID()]; ok {
// delete live and history streams from the map, so that it won't be removed with a Quit request
delete(streams, stream)
delete(streams, getHistoryStream(stream))
}
err := r.RequestSubscription(p.ID(), stream, NewRange(0, 0), High)
if err != nil {
log.Debug("Request subscription", "err", err, "peer", p.ID(), "stream", stream)
return false
}
return true
}
func (r *Registry) runProtocol(p *p2p.Peer, rw p2p.MsgReadWriter) error {
peer := protocols.NewPeer(p, rw, r.spec)
bp := network.NewBzzPeer(peer)

@ -20,11 +20,16 @@ import (
"bytes"
"context"
"errors"
"fmt"
"strconv"
"testing"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/p2p/enode"
p2ptest "github.com/ethereum/go-ethereum/p2p/testing"
"github.com/ethereum/go-ethereum/swarm/network"
"golang.org/x/crypto/sha3"
)
@ -952,3 +957,160 @@ func TestHasPriceImplementation(t *testing.T) {
t.Fatal("No prices set for chunk delivery msg")
}
}
/*
TestRequestPeerSubscriptions is a unit test for stream's pull sync subscriptions.
The test does:
* assign each connected peer to a bin map
* build up a known kademlia in advance
* run the EachConn function, which returns supposed subscription bins
* store all supposed bins per peer in a map
* check that all peers have the expected subscriptions
This kad table and its peers are copied from network.TestKademliaCase1,
it represents an edge case but for the purpose of testing the
syncing subscriptions it is just fine.
Addresses used in this test are discovered as part of the simulation network
in higher level tests for streaming. They were generated randomly.
The resulting kademlia looks like this:
=========================================================================
Fri Dec 21 20:02:39 UTC 2018 KΛÐΞMLIΛ hive: queen's address: 7efef1
population: 12 (12), MinProxBinSize: 2, MinBinSize: 2, MaxBinSize: 4
000 2 8196 835f | 2 8196 (0) 835f (0)
001 2 2690 28f0 | 2 2690 (0) 28f0 (0)
002 2 4d72 4a45 | 2 4d72 (0) 4a45 (0)
003 1 646e | 1 646e (0)
004 3 769c 76d1 7656 | 3 769c (0) 76d1 (0) 7656 (0)
============ DEPTH: 5 ==========================================
005 1 7a48 | 1 7a48 (0)
006 1 7cbd | 1 7cbd (0)
007 0 | 0
008 0 | 0
009 0 | 0
010 0 | 0
011 0 | 0
012 0 | 0
013 0 | 0
014 0 | 0
015 0 | 0
=========================================================================
*/
func TestRequestPeerSubscriptions(t *testing.T) {
// the pivot address; this is the actual kademlia node
pivotAddr := "7efef1c41d77f843ad167be95f6660567eb8a4a59f39240000cce2e0d65baf8e"
// a map of bin number to addresses from the given kademlia
binMap := make(map[int][]string)
binMap[0] = []string{
"835fbbf1d16ba7347b6e2fc552d6e982148d29c624ea20383850df3c810fa8fc",
"81968a2d8fb39114342ee1da85254ec51e0608d7f0f6997c2a8354c260a71009",
}
binMap[1] = []string{
"28f0bc1b44658548d6e05dd16d4c2fe77f1da5d48b6774bc4263b045725d0c19",
"2690a910c33ee37b91eb6c4e0731d1d345e2dc3b46d308503a6e85bbc242c69e",
}
binMap[2] = []string{
"4a45f1fc63e1a9cb9dfa44c98da2f3d20c2923e5d75ff60b2db9d1bdb0c54d51",
"4d72a04ddeb851a68cd197ef9a92a3e2ff01fbbff638e64929dd1a9c2e150112",
}
binMap[3] = []string{
"646e9540c84f6a2f9cf6585d45a4c219573b4fd1b64a3c9a1386fc5cf98c0d4d",
}
binMap[4] = []string{
"7656caccdc79cd8d7ce66d415cc96a718e8271c62fb35746bfc2b49faf3eebf3",
"76d1e83c71ca246d042e37ff1db181f2776265fbcfdc890ce230bfa617c9c2f0",
"769ce86aa90b518b7ed382f9fdacfbed93574e18dc98fe6c342e4f9f409c2d5a",
}
binMap[5] = []string{
"7a48f75f8ca60487ae42d6f92b785581b40b91f2da551ae73d5eae46640e02e8",
}
binMap[6] = []string{
"7cbd42350bde8e18ae5b955b5450f8e2cef3419f92fbf5598160c60fd78619f0",
}
// create the pivot's kademlia
addr := common.FromHex(pivotAddr)
k := network.NewKademlia(addr, network.NewKadParams())
// construct the peers and the kademlia
for _, binaddrs := range binMap {
for _, a := range binaddrs {
addr := common.FromHex(a)
k.On(network.NewPeer(&network.BzzPeer{BzzAddr: &network.BzzAddr{OAddr: addr}}, k))
}
}
// TODO: check kad table is same
// currently k.String() prints date so it will never be the same :)
// --> implement JSON representation of kad table
log.Debug(k.String())
// simulate that we would do subscriptions: just store the bin numbers
fakeSubscriptions := make(map[string][]int)
//after the test, we need to reset the subscriptionFunc to the default
defer func() { subscriptionFunc = doRequestSubscription }()
// define the function which should run for each connection
// instead of doing real subscriptions, we just store the bin numbers
subscriptionFunc = func(r *Registry, p *network.Peer, bin uint8, subs map[enode.ID]map[Stream]struct{}) bool {
// get the peer ID
peerstr := fmt.Sprintf("%x", p.Over())
// create the array of bins per peer
if _, ok := fakeSubscriptions[peerstr]; !ok {
fakeSubscriptions[peerstr] = make([]int, 0)
}
// store the (fake) bin subscription
log.Debug(fmt.Sprintf("Adding fake subscription for peer %s with bin %d", peerstr, bin))
fakeSubscriptions[peerstr] = append(fakeSubscriptions[peerstr], int(bin))
return true
}
// create just a simple Registry object in order to be able to call...
r := &Registry{}
r.requestPeerSubscriptions(k, nil)
// calculate the kademlia depth
kdepth := k.NeighbourhoodDepth()
// now, check that all peers have the expected (fake) subscriptions
// iterate the bin map
for bin, peers := range binMap {
// for every peer...
for _, peer := range peers {
// ...get its (fake) subscriptions
fakeSubsForPeer := fakeSubscriptions[peer]
// if the peer's bin is shallower than the kademlia depth...
if bin < kdepth {
// (iterate all (fake) subscriptions)
for _, subbin := range fakeSubsForPeer {
// ...only the peer's bin should be "subscribed"
// (and thus have only one subscription)
if subbin != bin || len(fakeSubsForPeer) != 1 {
t.Fatalf("Did not get expected subscription for bin < depth; bin of peer %s: %d, subscription: %d", peer, bin, subbin)
}
}
} else { //if the peer's bin is equal or higher than the kademlia depth...
// (iterate all (fake) subscriptions)
for i, subbin := range fakeSubsForPeer {
// ...each bin from the peer's bin number up to k.MaxProxDisplay should be "subscribed"
// as we start from depth we can use the iteration index to check
if subbin != i+kdepth {
t.Fatalf("Did not get expected subscription for bin > depth; bin of peer %s: %d, subscription: %d", peer, bin, subbin)
}
// the last "subscription" should be k.MaxProxDisplay
if i == len(fakeSubsForPeer)-1 && subbin != k.MaxProxDisplay {
t.Fatalf("Expected last subscription to be: %d, but is: %d", k.MaxProxDisplay, subbin)
}
}
}
}
}
// print some output
for p, subs := range fakeSubscriptions {
log.Debug(fmt.Sprintf("Peer %s has the following fake subscriptions: ", p))
for _, bin := range subs {
log.Debug(fmt.Sprintf("%d,", bin))
}
}
}