Commit fd171eff authored by Jeffrey Wilcke's avatar Jeffrey Wilcke

Merge pull request #592 from fjl/disco-ping-pong

Discovery bonding protocol
parents 101ea1a1 76218959
......@@ -32,8 +32,8 @@ var (
defaultBootNodes = []*discover.Node{
// ETH/DEV cmd/bootnode
discover.MustParseNode("enode://09fbeec0d047e9a37e63f60f8618aa9df0e49271f3fadb2c070dc09e2099b95827b63a8b837c6fd01d0802d457dd83e3bd48bd3e6509f8209ed90dabbc30e3d3@52.16.188.185:30303"),
// ETH/DEV cpp-ethereum (poc-8.ethdev.com)
discover.MustParseNode("enode://4a44599974518ea5b0f14c31c4463692ac0329cb84851f3435e6d1b18ee4eae4aa495f846a0fa1219bd58035671881d44423876e57db2abd57254d0197da0ebe@5.1.83.226:30303"),
// ETH/DEV cpp-ethereum (poc-9.ethdev.com)
discover.MustParseNode("enode://487611428e6c99a11a9795a6abe7b529e81315ca6aad66e2a2fc76e3adf263faba0d35466c2f8f68d561dbefa8878d4df5f1f2ddb1fbeab7f42ffb8cd328bd4a@5.1.83.226:30303"),
}
)
......
......@@ -13,6 +13,8 @@ import (
"net/url"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/ethereum/go-ethereum/crypto"
......@@ -30,7 +32,8 @@ type Node struct {
DiscPort int // UDP listening port for discovery protocol
TCPPort int // TCP listening port for RLPx
active time.Time
// this must be set/read using atomic load and store.
activeStamp int64
}
func newNode(id NodeID, addr *net.UDPAddr) *Node {
......@@ -39,7 +42,6 @@ func newNode(id NodeID, addr *net.UDPAddr) *Node {
IP: addr.IP,
DiscPort: addr.Port,
TCPPort: addr.Port,
active: time.Now(),
}
}
......@@ -48,6 +50,20 @@ func (n *Node) isValid() bool {
return !n.IP.IsMulticast() && !n.IP.IsUnspecified() && n.TCPPort != 0 && n.DiscPort != 0
}
func (n *Node) bumpActive() {
stamp := time.Now().Unix()
atomic.StoreInt64(&n.activeStamp, stamp)
}
func (n *Node) active() time.Time {
stamp := atomic.LoadInt64(&n.activeStamp)
return time.Unix(stamp, 0)
}
func (n *Node) addr() *net.UDPAddr {
return &net.UDPAddr{IP: n.IP, Port: n.DiscPort}
}
// The string representation of a Node is a URL.
// Please see ParseNode for a description of the format.
func (n *Node) String() string {
......@@ -304,3 +320,26 @@ func randomID(a NodeID, n int) (b NodeID) {
}
return b
}
// nodeDB stores all nodes we know about.
type nodeDB struct {
mu sync.RWMutex
byID map[NodeID]*Node
}
func (db *nodeDB) get(id NodeID) *Node {
db.mu.RLock()
defer db.mu.RUnlock()
return db.byID[id]
}
func (db *nodeDB) add(id NodeID, addr *net.UDPAddr, tcpPort uint16) *Node {
db.mu.Lock()
defer db.mu.Unlock()
if db.byID == nil {
db.byID = make(map[NodeID]*Node)
}
n := &Node{ID: id, IP: addr.IP, DiscPort: addr.Port, TCPPort: int(tcpPort)}
db.byID[n.ID] = n
return n
}
......@@ -14,9 +14,10 @@ import (
)
const (
alpha = 3 // Kademlia concurrency factor
bucketSize = 16 // Kademlia bucket size
nBuckets = nodeIDBits + 1 // Number of buckets
alpha = 3 // Kademlia concurrency factor
bucketSize = 16 // Kademlia bucket size
nBuckets = nodeIDBits + 1 // Number of buckets
maxBondingPingPongs = 10
)
type Table struct {
......@@ -24,27 +25,50 @@ type Table struct {
buckets [nBuckets]*bucket // index of known nodes by distance
nursery []*Node // bootstrap nodes
bondmu sync.Mutex
bonding map[NodeID]*bondproc
bondslots chan struct{} // limits total number of active bonding processes
net transport
self *Node // metadata of the local node
db *nodeDB
}
type bondproc struct {
err error
n *Node
done chan struct{}
}
// transport is implemented by the UDP transport.
// it is an interface so we can test without opening lots of UDP
// sockets and without generating a private key.
type transport interface {
ping(*Node) error
findnode(e *Node, target NodeID) ([]*Node, error)
ping(NodeID, *net.UDPAddr) error
waitping(NodeID) error
findnode(toid NodeID, addr *net.UDPAddr, target NodeID) ([]*Node, error)
close()
}
// bucket contains nodes, ordered by their last activity.
// the entry that was most recently active is the last element
// in entries.
type bucket struct {
lastLookup time.Time
entries []*Node
}
func newTable(t transport, ourID NodeID, ourAddr *net.UDPAddr) *Table {
tab := &Table{net: t, self: newNode(ourID, ourAddr)}
tab := &Table{
net: t,
db: new(nodeDB),
self: newNode(ourID, ourAddr),
bonding: make(map[NodeID]*bondproc),
bondslots: make(chan struct{}, maxBondingPingPongs),
}
for i := 0; i < cap(tab.bondslots); i++ {
tab.bondslots <- struct{}{}
}
for i := range tab.buckets {
tab.buckets[i] = new(bucket)
}
......@@ -107,8 +131,8 @@ func (tab *Table) Lookup(target NodeID) []*Node {
asked[n.ID] = true
pendingQueries++
go func() {
result, _ := tab.net.findnode(n, target)
reply <- result
r, _ := tab.net.findnode(n.ID, n.addr(), target)
reply <- tab.bondall(r)
}()
}
}
......@@ -116,13 +140,11 @@ func (tab *Table) Lookup(target NodeID) []*Node {
// we have asked all closest nodes, stop the search
break
}
// wait for the next reply
for _, n := range <-reply {
cn := n
if !seen[n.ID] {
if n != nil && !seen[n.ID] {
seen[n.ID] = true
result.push(cn, bucketSize)
result.push(n, bucketSize)
}
}
pendingQueries--
......@@ -145,8 +167,9 @@ func (tab *Table) refresh() {
result := tab.Lookup(randomID(tab.self.ID, ld))
if len(result) == 0 {
// bootstrap the table with a self lookup
all := tab.bondall(tab.nursery)
tab.mutex.Lock()
tab.add(tab.nursery)
tab.add(all)
tab.mutex.Unlock()
tab.Lookup(tab.self.ID)
// TODO: the Kademlia paper says that we're supposed to perform
......@@ -176,45 +199,105 @@ func (tab *Table) len() (n int) {
return n
}
// bumpOrAdd updates the activity timestamp for the given node and
// attempts to insert the node into a bucket. The returned Node might
// not be part of the table. The caller must hold tab.mutex.
func (tab *Table) bumpOrAdd(node NodeID, from *net.UDPAddr) (n *Node) {
b := tab.buckets[logdist(tab.self.ID, node)]
if n = b.bump(node); n == nil {
n = newNode(node, from)
if len(b.entries) == bucketSize {
tab.pingReplace(n, b)
} else {
b.entries = append(b.entries, n)
// bondall bonds with all given nodes concurrently and returns
// those nodes for which bonding has probably succeeded.
func (tab *Table) bondall(nodes []*Node) (result []*Node) {
rc := make(chan *Node, len(nodes))
for i := range nodes {
go func(n *Node) {
nn, _ := tab.bond(false, n.ID, n.addr(), uint16(n.TCPPort))
rc <- nn
}(nodes[i])
}
for _ = range nodes {
if n := <-rc; n != nil {
result = append(result, n)
}
}
return n
return result
}
func (tab *Table) pingReplace(n *Node, b *bucket) {
old := b.entries[bucketSize-1]
go func() {
if err := tab.net.ping(old); err == nil {
// it responded, we don't need to replace it.
return
// bond ensures the local node has a bond with the given remote node.
// It also attempts to insert the node into the table if bonding succeeds.
// The caller must not hold tab.mutex.
//
// A bond is must be established before sending findnode requests.
// Both sides must have completed a ping/pong exchange for a bond to
// exist. The total number of active bonding processes is limited in
// order to restrain network use.
//
// bond is meant to operate idempotently in that bonding with a remote
// node which still remembers a previously established bond will work.
// The remote node will simply not send a ping back, causing waitping
// to time out.
//
// If pinged is true, the remote node has just pinged us and one half
// of the process can be skipped.
func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) (*Node, error) {
var n *Node
if n = tab.db.get(id); n == nil {
tab.bondmu.Lock()
w := tab.bonding[id]
if w != nil {
// Wait for an existing bonding process to complete.
tab.bondmu.Unlock()
<-w.done
} else {
// Register a new bonding process.
w = &bondproc{done: make(chan struct{})}
tab.bonding[id] = w
tab.bondmu.Unlock()
// Do the ping/pong. The result goes into w.
tab.pingpong(w, pinged, id, addr, tcpPort)
// Unregister the process after it's done.
tab.bondmu.Lock()
delete(tab.bonding, id)
tab.bondmu.Unlock()
}
// it didn't respond, replace the node if it is still the oldest node.
tab.mutex.Lock()
if len(b.entries) > 0 && b.entries[len(b.entries)-1] == old {
// slide down other entries and put the new one in front.
// TODO: insert in correct position to keep the order
copy(b.entries[1:], b.entries)
b.entries[0] = n
n = w.n
if w.err != nil {
return nil, w.err
}
tab.mutex.Unlock()
}()
}
tab.mutex.Lock()
defer tab.mutex.Unlock()
if b := tab.buckets[logdist(tab.self.ID, n.ID)]; !b.bump(n) {
tab.pingreplace(n, b)
}
return n, nil
}
func (tab *Table) pingpong(w *bondproc, pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) {
<-tab.bondslots
defer func() { tab.bondslots <- struct{}{} }()
if w.err = tab.net.ping(id, addr); w.err != nil {
close(w.done)
return
}
if !pinged {
// Give the remote node a chance to ping us before we start
// sending findnode requests. If they still remember us,
// waitping will simply time out.
tab.net.waitping(id)
}
w.n = tab.db.add(id, addr, tcpPort)
close(w.done)
}
// bump updates the activity timestamp for the given node.
// The caller must hold tab.mutex.
func (tab *Table) bump(node NodeID) {
tab.buckets[logdist(tab.self.ID, node)].bump(node)
func (tab *Table) pingreplace(new *Node, b *bucket) {
if len(b.entries) == bucketSize {
oldest := b.entries[bucketSize-1]
if err := tab.net.ping(oldest.ID, oldest.addr()); err == nil {
// The node responded, we don't need to replace it.
return
}
} else {
// Add a slot at the end so the last entry doesn't
// fall off when adding the new node.
b.entries = append(b.entries, nil)
}
copy(b.entries[1:], b.entries)
b.entries[0] = new
}
// add puts the entries into the table if their corresponding
......@@ -240,17 +323,17 @@ outer:
}
}
func (b *bucket) bump(id NodeID) *Node {
for i, n := range b.entries {
if n.ID == id {
n.active = time.Now()
func (b *bucket) bump(n *Node) bool {
for i := range b.entries {
if b.entries[i].ID == n.ID {
n.bumpActive()
// move it to the front
copy(b.entries[1:], b.entries[:i+1])
copy(b.entries[1:], b.entries[:i])
b.entries[0] = n
return n
return true
}
}
return nil
return false
}
// nodesByDistance is a list of nodes, ordered by
......
......@@ -2,78 +2,109 @@ package discover
import (
"crypto/ecdsa"
"errors"
"fmt"
"math/rand"
"net"
"reflect"
"testing"
"testing/quick"
"time"
"github.com/ethereum/go-ethereum/crypto"
)
func TestTable_bumpOrAddBucketAssign(t *testing.T) {
tab := newTable(nil, NodeID{}, &net.UDPAddr{})
for i := 1; i < len(tab.buckets); i++ {
tab.bumpOrAdd(randomID(tab.self.ID, i), &net.UDPAddr{})
}
for i, b := range tab.buckets {
if i > 0 && len(b.entries) != 1 {
t.Errorf("bucket %d has %d entries, want 1", i, len(b.entries))
func TestTable_pingReplace(t *testing.T) {
doit := func(newNodeIsResponding, lastInBucketIsResponding bool) {
transport := newPingRecorder()
tab := newTable(transport, NodeID{}, &net.UDPAddr{})
last := fillBucket(tab, 200)
pingSender := randomID(tab.self.ID, 200)
// this gotPing should replace the last node
// if the last node is not responding.
transport.responding[last.ID] = lastInBucketIsResponding
transport.responding[pingSender] = newNodeIsResponding
tab.bond(true, pingSender, &net.UDPAddr{}, 0)
// first ping goes to sender (bonding pingback)
if !transport.pinged[pingSender] {
t.Error("table did not ping back sender")
}
if newNodeIsResponding {
// second ping goes to oldest node in bucket
// to see whether it is still alive.
if !transport.pinged[last.ID] {
t.Error("table did not ping last node in bucket")
}
}
}
}
func TestTable_bumpOrAddPingReplace(t *testing.T) {
pingC := make(pingC)
tab := newTable(pingC, NodeID{}, &net.UDPAddr{})
last := fillBucket(tab, 200)
// this bumpOrAdd should not replace the last node
// because the node replies to ping.
new := tab.bumpOrAdd(randomID(tab.self.ID, 200), &net.UDPAddr{})
tab.mutex.Lock()
defer tab.mutex.Unlock()
if l := len(tab.buckets[200].entries); l != bucketSize {
t.Errorf("wrong bucket size after gotPing: got %d, want %d", bucketSize, l)
}
pinged := <-pingC
if pinged != last.ID {
t.Fatalf("pinged wrong node: %v\nwant %v", pinged, last.ID)
if lastInBucketIsResponding || !newNodeIsResponding {
if !contains(tab.buckets[200].entries, last.ID) {
t.Error("last entry was removed")
}
if contains(tab.buckets[200].entries, pingSender) {
t.Error("new entry was added")
}
} else {
if contains(tab.buckets[200].entries, last.ID) {
t.Error("last entry was not removed")
}
if !contains(tab.buckets[200].entries, pingSender) {
t.Error("new entry was not added")
}
}
}
tab.mutex.Lock()
defer tab.mutex.Unlock()
if l := len(tab.buckets[200].entries); l != bucketSize {
t.Errorf("wrong bucket size after bumpOrAdd: got %d, want %d", bucketSize, l)
}
if !contains(tab.buckets[200].entries, last.ID) {
t.Error("last entry was removed")
}
if contains(tab.buckets[200].entries, new.ID) {
t.Error("new entry was added")
}
doit(true, true)
doit(false, true)
doit(false, true)
doit(false, false)
}
func TestTable_bumpOrAddPingTimeout(t *testing.T) {
tab := newTable(pingC(nil), NodeID{}, &net.UDPAddr{})
last := fillBucket(tab, 200)
// this bumpOrAdd should replace the last node
// because the node does not reply to ping.
new := tab.bumpOrAdd(randomID(tab.self.ID, 200), &net.UDPAddr{})
// wait for async bucket update. damn. this needs to go away.
time.Sleep(2 * time.Millisecond)
tab.mutex.Lock()
defer tab.mutex.Unlock()
if l := len(tab.buckets[200].entries); l != bucketSize {
t.Errorf("wrong bucket size after bumpOrAdd: got %d, want %d", bucketSize, l)
func TestBucket_bumpNoDuplicates(t *testing.T) {
t.Parallel()
cfg := &quick.Config{
MaxCount: 1000,
Rand: quickrand,
Values: func(args []reflect.Value, rand *rand.Rand) {
// generate a random list of nodes. this will be the content of the bucket.
n := rand.Intn(bucketSize-1) + 1
nodes := make([]*Node, n)
for i := range nodes {
nodes[i] = &Node{ID: randomID(NodeID{}, 200)}
}
args[0] = reflect.ValueOf(nodes)
// generate random bump positions.
bumps := make([]int, rand.Intn(100))
for i := range bumps {
bumps[i] = rand.Intn(len(nodes))
}
args[1] = reflect.ValueOf(bumps)
},
}
if contains(tab.buckets[200].entries, last.ID) {
t.Error("last entry was not removed")
prop := func(nodes []*Node, bumps []int) (ok bool) {
b := &bucket{entries: make([]*Node, len(nodes))}
copy(b.entries, nodes)
for i, pos := range bumps {
b.bump(b.entries[pos])
if hasDuplicates(b.entries) {
t.Logf("bucket has duplicates after %d/%d bumps:", i+1, len(bumps))
for _, n := range b.entries {
t.Logf(" %p", n)
}
return false
}
}
return true
}
if !contains(tab.buckets[200].entries, new.ID) {
t.Error("new entry was not added")
if err := quick.Check(prop, cfg); err != nil {
t.Error(err)
}
}
......@@ -85,44 +116,27 @@ func fillBucket(tab *Table, ld int) (last *Node) {
return b.entries[bucketSize-1]
}
type pingC chan NodeID
type pingRecorder struct{ responding, pinged map[NodeID]bool }
func (t pingC) findnode(n *Node, target NodeID) ([]*Node, error) {
func newPingRecorder() *pingRecorder {
return &pingRecorder{make(map[NodeID]bool), make(map[NodeID]bool)}
}
func (t *pingRecorder) findnode(toid NodeID, toaddr *net.UDPAddr, target NodeID) ([]*Node, error) {
panic("findnode called on pingRecorder")
}
func (t pingC) close() {
func (t *pingRecorder) close() {
panic("close called on pingRecorder")
}
func (t pingC) ping(n *Node) error {
if t == nil {
return errTimeout
}
t <- n.ID
return nil
func (t *pingRecorder) waitping(from NodeID) error {
return nil // remote always pings
}
func TestTable_bump(t *testing.T) {
tab := newTable(nil, NodeID{}, &net.UDPAddr{})
// add an old entry and two recent ones
oldactive := time.Now().Add(-2 * time.Minute)
old := &Node{ID: randomID(tab.self.ID, 200), active: oldactive}
others := []*Node{
&Node{ID: randomID(tab.self.ID, 200), active: time.Now()},
&Node{ID: randomID(tab.self.ID, 200), active: time.Now()},
}
tab.add(append(others, old))
if tab.buckets[200].entries[0] == old {
t.Fatal("old entry is at front of bucket")
}
// bumping the old entry should move it to the front
tab.bump(old.ID)
if old.active == oldactive {
t.Error("activity timestamp not updated")
}
if tab.buckets[200].entries[0] != old {
t.Errorf("bumped entry did not move to the front of bucket")
func (t *pingRecorder) ping(toid NodeID, toaddr *net.UDPAddr) error {
t.pinged[toid] = true
if t.responding[toid] {
return nil
} else {
return errTimeout
}
}
......@@ -210,7 +224,7 @@ func TestTable_Lookup(t *testing.T) {
t.Fatalf("lookup on empty table returned %d results: %#v", len(results), results)
}
// seed table with initial node (otherwise lookup will terminate immediately)
tab.bumpOrAdd(randomID(target, 200), &net.UDPAddr{Port: 200})
tab.add([]*Node{newNode(randomID(target, 200), &net.UDPAddr{Port: 200})})
results := tab.Lookup(target)
t.Logf("results:")
......@@ -238,16 +252,16 @@ type findnodeOracle struct {
target NodeID
}
func (t findnodeOracle) findnode(n *Node, target NodeID) ([]*Node, error) {
t.t.Logf("findnode query at dist %d", n.DiscPort)
func (t findnodeOracle) findnode(toid NodeID, toaddr *net.UDPAddr, target NodeID) ([]*Node, error) {
t.t.Logf("findnode query at dist %d", toaddr.Port)
// current log distance is encoded in port number
var result []*Node
switch n.DiscPort {
switch toaddr.Port {
case 0:
panic("query to node at distance 0")
default:
// TODO: add more randomness to distances
next := n.DiscPort - 1
next := toaddr.Port - 1
for i := 0; i < bucketSize; i++ {
result = append(result, &Node{ID: randomID(t.target, next), DiscPort: next})
}
......@@ -255,11 +269,9 @@ func (t findnodeOracle) findnode(n *Node, target NodeID) ([]*Node, error) {
return result, nil
}
func (t findnodeOracle) close() {}
func (t findnodeOracle) ping(n *Node) error {
return errors.New("ping is not supported by this transport")
}
func (t findnodeOracle) close() {}
func (t findnodeOracle) waitping(from NodeID) error { return nil }
func (t findnodeOracle) ping(toid NodeID, toaddr *net.UDPAddr) error { return nil }
func hasDuplicates(slice []*Node) bool {
seen := make(map[NodeID]bool)
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment