Unverified Commit dcae0d34 authored by Felix Lange's avatar Felix Lange Committed by GitHub

p2p/simulations: fix a deadlock and clean up adapters (#17891)

This fixes a rare deadlock with the inproc adapter:

- A node is stopped, which acquires Network.lock.
- The protocol code being simulated (swarm/network in my case)
  waits for its goroutines to shut down.
- One of those goroutines calls into the simulation to add a peer,
  which waits for Network.lock.

The fix for the deadlock is really simple, just release the lock
before stopping the simulation node.

Other changes in this PR clean up the exec adapter so it reports
node startup errors better and remove the docker adapter because
it just adds overhead.

In the exec adapter, node information is now posted to a one-shot
server. This avoids log parsing and allows reporting startup
errors to the simulation host.

A small change in package node was needed because simulation
nodes use port zero. Node.{HTTP,WS}Endpoint now return the live
endpoints after startup by checking the TCP listener.
parent f951e23f
......@@ -549,11 +549,23 @@ func (n *Node) IPCEndpoint() string {
// HTTPEndpoint retrieves the current HTTP endpoint used by the protocol stack.
func (n *Node) HTTPEndpoint() string {
n.lock.Lock()
defer n.lock.Unlock()
if n.httpListener != nil {
return n.httpListener.Addr().String()
}
return n.httpEndpoint
}
// WSEndpoint retrieves the current WS endpoint used by the protocol stack.
func (n *Node) WSEndpoint() string {
n.lock.Lock()
defer n.lock.Unlock()
if n.wsListener != nil {
return n.wsListener.Addr().String()
}
return n.wsEndpoint
}
......
......@@ -63,18 +63,6 @@ using the devp2p node stack rather than executing `main()`.
The nodes listen for devp2p connections and WebSocket RPC clients on random
localhost ports.
### DockerAdapter
The `DockerAdapter` is similar to the `ExecAdapter` but executes `docker run`
to run the node in a Docker container using a Docker image containing the
simulation binary at `/bin/p2p-node`.
The Docker image is built using `docker build` when the adapter is initialised,
meaning no prior setup is necessary other than having a working Docker client.
Each node listens on the external IP of the container and the default p2p and
RPC ports (`30303` and `8546` respectively).
## Network
A simulation network is created with an ID and default service (which is used
......
// Copyright 2017 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package adapters
import (
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"github.com/docker/docker/pkg/reexec"
"github.com/ethereum/go-ethereum/node"
"github.com/ethereum/go-ethereum/p2p/enode"
)
var (
ErrLinuxOnly = errors.New("DockerAdapter can only be used on Linux as it uses the current binary (which must be a Linux binary)")
)
// DockerAdapter is a NodeAdapter which runs simulation nodes inside Docker
// containers.
//
// A Docker image is built which contains the current binary at /bin/p2p-node
// which when executed runs the underlying service (see the description
// of the execP2PNode function for more details)
type DockerAdapter struct {
ExecAdapter
}
// NewDockerAdapter builds the p2p-node Docker image containing the current
// binary and returns a DockerAdapter
func NewDockerAdapter() (*DockerAdapter, error) {
// Since Docker containers run on Linux and this adapter runs the
// current binary in the container, it must be compiled for Linux.
//
// It is reasonable to require this because the caller can just
// compile the current binary in a Docker container.
if runtime.GOOS != "linux" {
return nil, ErrLinuxOnly
}
if err := buildDockerImage(); err != nil {
return nil, err
}
return &DockerAdapter{
ExecAdapter{
nodes: make(map[enode.ID]*ExecNode),
},
}, nil
}
// Name returns the name of the adapter for logging purposes
func (d *DockerAdapter) Name() string {
return "docker-adapter"
}
// NewNode returns a new DockerNode using the given config
func (d *DockerAdapter) NewNode(config *NodeConfig) (Node, error) {
if len(config.Services) == 0 {
return nil, errors.New("node must have at least one service")
}
for _, service := range config.Services {
if _, exists := serviceFuncs[service]; !exists {
return nil, fmt.Errorf("unknown node service %q", service)
}
}
// generate the config
conf := &execNodeConfig{
Stack: node.DefaultConfig,
Node: config,
}
conf.Stack.DataDir = "/data"
conf.Stack.WSHost = "0.0.0.0"
conf.Stack.WSOrigins = []string{"*"}
conf.Stack.WSExposeAll = true
conf.Stack.P2P.EnableMsgEvents = false
conf.Stack.P2P.NoDiscovery = true
conf.Stack.P2P.NAT = nil
conf.Stack.NoUSB = true
// listen on all interfaces on a given port, which we set when we
// initialise NodeConfig (usually a random port)
conf.Stack.P2P.ListenAddr = fmt.Sprintf(":%d", config.Port)
node := &DockerNode{
ExecNode: ExecNode{
ID: config.ID,
Config: conf,
adapter: &d.ExecAdapter,
},
}
node.newCmd = node.dockerCommand
d.ExecAdapter.nodes[node.ID] = &node.ExecNode
return node, nil
}
// DockerNode wraps an ExecNode but exec's the current binary in a docker
// container rather than locally
type DockerNode struct {
ExecNode
}
// dockerCommand returns a command which exec's the binary in a Docker
// container.
//
// It uses a shell so that we can pass the _P2P_NODE_CONFIG environment
// variable to the container using the --env flag.
func (n *DockerNode) dockerCommand() *exec.Cmd {
return exec.Command(
"sh", "-c",
fmt.Sprintf(
`exec docker run --interactive --env _P2P_NODE_CONFIG="${_P2P_NODE_CONFIG}" %s p2p-node %s %s`,
dockerImage, strings.Join(n.Config.Node.Services, ","), n.ID.String(),
),
)
}
// dockerImage is the name of the Docker image which gets built to run the
// simulation node
const dockerImage = "p2p-node"
// buildDockerImage builds the Docker image which is used to run the simulation
// node in a Docker container.
//
// It adds the current binary as "p2p-node" so that it runs execP2PNode
// when executed.
func buildDockerImage() error {
// create a directory to use as the build context
dir, err := ioutil.TempDir("", "p2p-docker")
if err != nil {
return err
}
defer os.RemoveAll(dir)
// copy the current binary into the build context
bin, err := os.Open(reexec.Self())
if err != nil {
return err
}
defer bin.Close()
dst, err := os.OpenFile(filepath.Join(dir, "self.bin"), os.O_WRONLY|os.O_CREATE, 0755)
if err != nil {
return err
}
defer dst.Close()
if _, err := io.Copy(dst, bin); err != nil {
return err
}
// create the Dockerfile
dockerfile := []byte(`
FROM ubuntu:16.04
RUN mkdir /data
ADD self.bin /bin/p2p-node
`)
if err := ioutil.WriteFile(filepath.Join(dir, "Dockerfile"), dockerfile, 0644); err != nil {
return err
}
// run 'docker build'
cmd := exec.Command("docker", "build", "-t", dockerImage, dir)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("error building docker image: %s", err)
}
return nil
}
This diff is collapsed.
package adapters
import (
"bufio"
"errors"
"io"
"regexp"
"strings"
"time"
)
// wsAddrPattern is a regex used to read the WebSocket address from the node's
// log
var wsAddrPattern = regexp.MustCompile(`ws://[\d.:]+`)
func matchWSAddr(str string) (string, bool) {
if !strings.Contains(str, "WebSocket endpoint opened") {
return "", false
}
return wsAddrPattern.FindString(str), true
}
// findWSAddr scans through reader r, looking for the log entry with
// WebSocket address information.
func findWSAddr(r io.Reader, timeout time.Duration) (string, error) {
ch := make(chan string)
go func() {
s := bufio.NewScanner(r)
for s.Scan() {
addr, ok := matchWSAddr(s.Text())
if ok {
ch <- addr
}
}
close(ch)
}()
var wsAddr string
select {
case wsAddr = <-ch:
if wsAddr == "" {
return "", errors.New("empty result")
}
case <-time.After(timeout):
return "", errors.New("timed out")
}
return wsAddr, nil
}
package adapters
import (
"bytes"
"testing"
"time"
)
func TestFindWSAddr(t *testing.T) {
line := `t=2018-05-02T19:00:45+0200 lvl=info msg="WebSocket endpoint opened" node.id=26c65a606d1125a44695bc08573190d047152b6b9a776ccbbe593e90f91444d9c1ebdadac6a775ad9fdd0923468a1d698ed3a842c1fb89c1bc0f9d4801f8c39c url=ws://127.0.0.1:59975`
buf := bytes.NewBufferString(line)
got, err := findWSAddr(buf, 10*time.Second)
if err != nil {
t.Fatalf("Failed to find addr: %v", err)
}
expected := `ws://127.0.0.1:59975`
if got != expected {
t.Fatalf("Expected to get '%s', but got '%s'", expected, got)
}
}
......@@ -70,14 +70,6 @@ func main() {
log.Info("using exec adapter", "tmpdir", tmpdir)
adapter = adapters.NewExecAdapter(tmpdir)
case "docker":
log.Info("using docker adapter")
var err error
adapter, err = adapters.NewDockerAdapter()
if err != nil {
log.Crit("error creating docker adapter", "err", err)
}
default:
log.Crit(fmt.Sprintf("unknown node adapter %q", *adapterType))
}
......
......@@ -116,7 +116,7 @@ func (net *Network) NewNodeWithConfig(conf *adapters.NodeConfig) (*Node, error)
Node: adapterNode,
Config: conf,
}
log.Trace(fmt.Sprintf("node %v created", conf.ID))
log.Trace("Node created", "id", conf.ID)
net.nodeMap[conf.ID] = len(net.Nodes)
net.Nodes = append(net.Nodes, node)
......@@ -167,6 +167,7 @@ func (net *Network) Start(id enode.ID) error {
func (net *Network) startWithSnapshots(id enode.ID, snapshots map[string][]byte) error {
net.lock.Lock()
defer net.lock.Unlock()
node := net.getNode(id)
if node == nil {
return fmt.Errorf("node %v does not exist", id)
......@@ -174,13 +175,13 @@ func (net *Network) startWithSnapshots(id enode.ID, snapshots map[string][]byte)
if node.Up {
return fmt.Errorf("node %v already up", id)
}
log.Trace(fmt.Sprintf("starting node %v: %v using %v", id, node.Up, net.nodeAdapter.Name()))
log.Trace("Starting node", "id", id, "adapter", net.nodeAdapter.Name())
if err := node.Start(snapshots); err != nil {
log.Warn(fmt.Sprintf("start up failed: %v", err))
log.Warn("Node startup failed", "id", id, "err", err)
return err
}
node.Up = true
log.Info(fmt.Sprintf("started node %v: %v", id, node.Up))
log.Info("Started node", "id", id)
net.events.Send(NewEvent(node))
......@@ -209,7 +210,6 @@ func (net *Network) watchPeerEvents(id enode.ID, events chan *p2p.PeerEvent, sub
defer net.lock.Unlock()
node := net.getNode(id)
if node == nil {
log.Error("Can not find node for id", "id", id)
return
}
node.Up = false
......@@ -240,7 +240,7 @@ func (net *Network) watchPeerEvents(id enode.ID, events chan *p2p.PeerEvent, sub
case err := <-sub.Err():
if err != nil {
log.Error(fmt.Sprintf("error getting peer events for node %v", id), "err", err)
log.Error("Error in peer event subscription", "id", id, "err", err)
}
return
}
......@@ -250,7 +250,6 @@ func (net *Network) watchPeerEvents(id enode.ID, events chan *p2p.PeerEvent, sub
// Stop stops the node with the given ID
func (net *Network) Stop(id enode.ID) error {
net.lock.Lock()
defer net.lock.Unlock()
node := net.getNode(id)
if node == nil {
return fmt.Errorf("node %v does not exist", id)
......@@ -258,12 +257,17 @@ func (net *Network) Stop(id enode.ID) error {
if !node.Up {
return fmt.Errorf("node %v already down", id)
}
if err := node.Stop(); err != nil {
return err
}
node.Up = false
log.Info(fmt.Sprintf("stop node %v: %v", id, node.Up))
net.lock.Unlock()
err := node.Stop()
if err != nil {
net.lock.Lock()
node.Up = true
net.lock.Unlock()
return err
}
log.Info("Stopped node", "id", id, "err", err)
net.events.Send(ControlEvent(node))
return nil
}
......@@ -271,7 +275,7 @@ func (net *Network) Stop(id enode.ID) error {
// Connect connects two nodes together by calling the "admin_addPeer" RPC
// method on the "one" node so that it connects to the "other" node
func (net *Network) Connect(oneID, otherID enode.ID) error {
log.Debug(fmt.Sprintf("connecting %s to %s", oneID, otherID))
log.Debug("Connecting nodes with addPeer", "id", oneID, "other", otherID)
conn, err := net.InitConn(oneID, otherID)
if err != nil {
return err
......@@ -481,10 +485,10 @@ func (net *Network) InitConn(oneID, otherID enode.ID) (*Conn, error) {
err = conn.nodesUp()
if err != nil {
log.Trace(fmt.Sprintf("nodes not up: %v", err))
log.Trace("Nodes not up", "err", err)
return nil, fmt.Errorf("nodes not up: %v", err)
}
log.Debug("InitConn - connection initiated")
log.Debug("Connection initiated", "id", oneID, "other", otherID)
conn.initiated = time.Now()
return conn, nil
}
......@@ -492,9 +496,9 @@ func (net *Network) InitConn(oneID, otherID enode.ID) (*Conn, error) {
// Shutdown stops all nodes in the network and closes the quit channel
func (net *Network) Shutdown() {
for _, node := range net.Nodes {
log.Debug(fmt.Sprintf("stopping node %s", node.ID().TerminalString()))
log.Debug("Stopping node", "id", node.ID())
if err := node.Stop(); err != nil {
log.Warn(fmt.Sprintf("error stopping node %s", node.ID().TerminalString()), "err", err)
log.Warn("Can't stop node", "id", node.ID(), "err", err)
}
}
close(net.quitc)
......@@ -708,18 +712,18 @@ func (net *Network) Subscribe(events chan *Event) {
}
func (net *Network) executeControlEvent(event *Event) {
log.Trace("execute control event", "type", event.Type, "event", event)
log.Trace("Executing control event", "type", event.Type, "event", event)
switch event.Type {
case EventTypeNode:
if err := net.executeNodeEvent(event); err != nil {
log.Error("error executing node event", "event", event, "err", err)
log.Error("Error executing node event", "event", event, "err", err)
}
case EventTypeConn:
if err := net.executeConnEvent(event); err != nil {
log.Error("error executing conn event", "event", event, "err", err)
log.Error("Error executing conn event", "event", event, "err", err)
}
case EventTypeMsg:
log.Warn("ignoring control msg event")
log.Warn("Ignoring control msg event")
}
}
......
......@@ -125,22 +125,6 @@ func BenchmarkDiscovery_64_4(b *testing.B) { benchmarkDiscovery(b, 64, 4) }
func BenchmarkDiscovery_128_4(b *testing.B) { benchmarkDiscovery(b, 128, 4) }
func BenchmarkDiscovery_256_4(b *testing.B) { benchmarkDiscovery(b, 256, 4) }
func TestDiscoverySimulationDockerAdapter(t *testing.T) {
testDiscoverySimulationDockerAdapter(t, *nodeCount, *initCount)
}
func testDiscoverySimulationDockerAdapter(t *testing.T, nodes, conns int) {
adapter, err := adapters.NewDockerAdapter()
if err != nil {
if err == adapters.ErrLinuxOnly {
t.Skip(err)
} else {
t.Fatal(err)
}
}
testDiscoverySimulation(t, nodes, conns, adapter)
}
func TestDiscoverySimulationExecAdapter(t *testing.T) {
testDiscoverySimulationExecAdapter(t, *nodeCount, *initCount)
}
......@@ -545,8 +529,7 @@ func triggerChecks(trigger chan enode.ID, net *simulations.Network, id enode.ID)
}
func newService(ctx *adapters.ServiceContext) (node.Service, error) {
node := enode.NewV4(&ctx.Config.PrivateKey.PublicKey, adapters.ExternalIP(), int(ctx.Config.Port), int(ctx.Config.Port))
addr := network.NewAddr(node)
addr := network.NewAddr(ctx.Config.Node())
kp := network.NewKadParams()
kp.MinProxBinSize = testMinProxBinSize
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment