From 04770a4b817cebcb123ee397c66012e915be4163 Mon Sep 17 00:00:00 2001
From: mpl <mathieu.lonjaret@gmail.com>
Date: Wed, 9 Sep 2020 11:59:07 +0200
Subject: [PATCH] interp: fix data races (#839)

This change fixes two distinct data races:

1) some global vars of type *itype of the interp package are actually
mutated during the lifecycle of an Interpreter. Even worse: if more than
one Interpreter instance are created and used at a given time, they are
actually racing each other for these global vars.
Therefore, this change replaces these global vars with generator
functions that create the needed type on the fly.

2) the symbols given as argument of Interpreter.Use were directly copied
as reference (since they're maps) when mapped inside an Interpreter
instance. Since the usual case is to give the symbols from the stdlib
package, it means when the interpreter mutates its own symbols in
fixStdio, it would actually mutate the corresponding global vars of the
stdlib package. Again, this is at least racy as soon as several
instances of an Intepreter are concurrently running.
This change fixes the race by making sure Interpreter.Use actually
copies the symbol values instead of copying the references.
---
 interp/interp.go                     |  11 +-
 interp/interp_eval_test.go           | 195 +++++++++++++++++++++++++++
 interp/testdata/concurrent/hello1.go |  10 ++
 interp/testdata/concurrent/hello2.go |  10 ++
 interp/type.go                       |  31 +++--
 5 files changed, 235 insertions(+), 22 deletions(-)
 create mode 100644 interp/testdata/concurrent/hello1.go
 create mode 100644 interp/testdata/concurrent/hello2.go

diff --git a/interp/interp.go b/interp/interp.go
index 1a079d31e..021e1da8f 100644
--- a/interp/interp.go
+++ b/interp/interp.go
@@ -319,9 +319,9 @@ func initUniverse() *scope {
 		"uintptr":     {kind: typeSym, typ: &itype{cat: uintptrT, name: "uintptr"}},
 
 		// predefined Go constants
-		"false": {kind: constSym, typ: untypedBool, rval: reflect.ValueOf(false)},
-		"true":  {kind: constSym, typ: untypedBool, rval: reflect.ValueOf(true)},
-		"iota":  {kind: constSym, typ: untypedInt},
+		"false": {kind: constSym, typ: untypedBool(), rval: reflect.ValueOf(false)},
+		"true":  {kind: constSym, typ: untypedBool(), rval: reflect.ValueOf(true)},
+		"iota":  {kind: constSym, typ: untypedInt()},
 
 		// predefined Go zero value
 		"nil": {typ: &itype{cat: nilT, untyped: true}},
@@ -561,8 +561,7 @@ func (interp *Interpreter) Use(values Exports) {
 		}
 
 		if interp.binPkg[k] == nil {
-			interp.binPkg[k] = v
-			continue
+			interp.binPkg[k] = make(map[string]reflect.Value)
 		}
 
 		for s, sym := range v {
@@ -571,7 +570,7 @@ func (interp *Interpreter) Use(values Exports) {
 	}
 
 	// Checks if input values correspond to stdlib packages by looking for one
-	// well knwonw stdlib package path.
+	// well known stdlib package path.
 	if _, ok := values["fmt"]; ok {
 		fixStdio(interp)
 	}
diff --git a/interp/interp_eval_test.go b/interp/interp_eval_test.go
index b0164be81..4d7804e02 100644
--- a/interp/interp_eval_test.go
+++ b/interp/interp_eval_test.go
@@ -1,6 +1,7 @@
 package interp_test
 
 import (
+	"bufio"
 	"bytes"
 	"context"
 	"fmt"
@@ -917,6 +918,199 @@ func TestImportPathIsKey(t *testing.T) {
 	}
 }
 
+// The code in hello1.go and hello2.go spawns a "long-running" goroutine, which
+// means each call to EvalPath actually terminates before the evaled code is done
+// running. So this test demonstrates:
+// 1) That two sequential calls to EvalPath don't see their "compilation phases"
+// collide (no data race on the fields of the interpreter), which is somewhat
+// obvious since the calls (and hence the "compilation phases") are sequential too.
+// 2) That two concurrent goroutine runs spawned by the same interpreter do not
+// collide either.
+func TestConcurrentEvals(t *testing.T) {
+	if testing.Short() {
+		return
+	}
+	pin, pout := io.Pipe()
+	defer func() {
+		_ = pin.Close()
+		_ = pout.Close()
+	}()
+	interpr := interp.New(interp.Options{Stdout: pout})
+	interpr.Use(stdlib.Symbols)
+
+	if _, err := interpr.EvalPath("testdata/concurrent/hello1.go"); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := interpr.EvalPath("testdata/concurrent/hello2.go"); err != nil {
+		t.Fatal(err)
+	}
+
+	c := make(chan error)
+	go func() {
+		hello1, hello2 := false, false
+		sc := bufio.NewScanner(pin)
+		for sc.Scan() {
+			l := sc.Text()
+			switch l {
+			case "hello world1":
+				hello1 = true
+			case "hello world2":
+				hello2 = true
+			default:
+				c <- fmt.Errorf("unexpected output: %v", l)
+				return
+			}
+			if hello1 && hello2 {
+				break
+			}
+		}
+		c <- nil
+	}()
+
+	timeout := time.NewTimer(5 * time.Second)
+	select {
+	case <-timeout.C:
+		t.Fatal("timeout")
+	case err := <-c:
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+}
+
+// TestConcurrentEvals2 shows that even though EvalWithContext calls Eval in a
+// goroutine, it indeed waits for Eval to terminate, and that therefore the code
+// called by EvalWithContext is sequential. And that there is no data race for the
+// interp package global vars or the interpreter fields in this case.
+func TestConcurrentEvals2(t *testing.T) {
+	pin, pout := io.Pipe()
+	defer func() {
+		_ = pin.Close()
+		_ = pout.Close()
+	}()
+	interpr := interp.New(interp.Options{Stdout: pout})
+	interpr.Use(stdlib.Symbols)
+
+	done := make(chan error)
+	go func() {
+		hello1 := false
+		sc := bufio.NewScanner(pin)
+		for sc.Scan() {
+			l := sc.Text()
+			if hello1 {
+				if l == "hello world2" {
+					break
+				} else {
+					done <- fmt.Errorf("unexpected output: %v", l)
+					return
+				}
+			}
+			if l == "hello world1" {
+				hello1 = true
+			} else {
+				done <- fmt.Errorf("unexpected output: %v", l)
+				return
+			}
+		}
+		done <- nil
+	}()
+
+	ctx := context.Background()
+	if _, err := interpr.EvalWithContext(ctx, `import "time"`); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := interpr.EvalWithContext(ctx, `time.Sleep(time.Second); println("hello world1")`); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := interpr.EvalWithContext(ctx, `time.Sleep(time.Second); println("hello world2")`); err != nil {
+		t.Fatal(err)
+	}
+
+	timeout := time.NewTimer(5 * time.Second)
+	select {
+	case <-timeout.C:
+		t.Fatal("timeout")
+	case err := <-done:
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+}
+
+// TestConcurrentEvals3 makes sure that we don't regress into data races at the package level, i.e from:
+// - global vars, which should obviously not be mutated.
+// - when calling Interpreter.Use, the symbols given as argument should be
+// copied when being inserted into interp.binPkg, and not directly used as-is.
+func TestConcurrentEvals3(t *testing.T) {
+	allDone := make(chan bool)
+	runREPL := func() {
+		done := make(chan error)
+		pinin, poutin := io.Pipe()
+		pinout, poutout := io.Pipe()
+		i := interp.New(interp.Options{Stdin: pinin, Stdout: poutout})
+		i.Use(stdlib.Symbols)
+
+		go func() {
+			_, _ = i.REPL()
+		}()
+
+		input := []string{
+			`hello one`,
+			`hello two`,
+			`hello three`,
+		}
+
+		go func() {
+			sc := bufio.NewScanner(pinout)
+			k := 0
+			for sc.Scan() {
+				l := sc.Text()
+				if l != input[k] {
+					done <- fmt.Errorf("unexpected output, want %q, got %q", input[k], l)
+					return
+				}
+				k++
+				if k > 2 {
+					break
+				}
+			}
+			done <- nil
+		}()
+
+		for _, v := range input {
+			in := strings.NewReader(fmt.Sprintf("println(\"%s\")\n", v))
+			if _, err := io.Copy(poutin, in); err != nil {
+				t.Fatal(err)
+			}
+			time.Sleep(time.Second)
+		}
+
+		if err := <-done; err != nil {
+			t.Fatal(err)
+		}
+		_ = pinin.Close()
+		_ = poutin.Close()
+		_ = pinout.Close()
+		_ = poutout.Close()
+		allDone <- true
+	}
+
+	for i := 0; i < 2; i++ {
+		go func() {
+			runREPL()
+		}()
+	}
+
+	timeout := time.NewTimer(10 * time.Second)
+	for i := 0; i < 2; i++ {
+		select {
+		case <-allDone:
+		case <-timeout.C:
+			t.Fatal("timeout")
+		}
+	}
+}
+
 func TestEvalScanner(t *testing.T) {
 	type testCase struct {
 		desc      string
@@ -1005,6 +1199,7 @@ func TestEvalScanner(t *testing.T) {
 	}
 
 	runREPL := func(t *testing.T, test testCase) {
+		// TODO(mpl): use a pipe for the output as well, just as in TestConcurrentEvals5
 		var stdout bytes.Buffer
 		safeStdout := &safeBuffer{buf: &stdout}
 		var stderr bytes.Buffer
diff --git a/interp/testdata/concurrent/hello1.go b/interp/testdata/concurrent/hello1.go
new file mode 100644
index 000000000..2740781cc
--- /dev/null
+++ b/interp/testdata/concurrent/hello1.go
@@ -0,0 +1,10 @@
+package main
+
+import "time"
+
+func main() {
+	go func() {
+		time.Sleep(3 * time.Second)
+		println("hello world1")
+	}()
+}
diff --git a/interp/testdata/concurrent/hello2.go b/interp/testdata/concurrent/hello2.go
new file mode 100644
index 000000000..416fc0254
--- /dev/null
+++ b/interp/testdata/concurrent/hello2.go
@@ -0,0 +1,10 @@
+package main
+
+import "time"
+
+func main() {
+	go func() {
+		time.Sleep(3 * time.Second)
+		println("hello world2")
+	}()
+}
diff --git a/interp/type.go b/interp/type.go
index dff5636de..c890a6ed4 100644
--- a/interp/type.go
+++ b/interp/type.go
@@ -125,14 +125,12 @@ type itype struct {
 	scope       *scope        // type declaration scope (in case of re-parse incomplete type)
 }
 
-var (
-	untypedBool    = &itype{cat: boolT, name: "bool", untyped: true}
-	untypedString  = &itype{cat: stringT, name: "string", untyped: true}
-	untypedRune    = &itype{cat: int32T, name: "int32", untyped: true}
-	untypedInt     = &itype{cat: intT, name: "int", untyped: true}
-	untypedFloat   = &itype{cat: float64T, name: "float64", untyped: true}
-	untypedComplex = &itype{cat: complex128T, name: "complex128", untyped: true}
-)
+func untypedBool() *itype    { return &itype{cat: boolT, name: "bool", untyped: true} }
+func untypedString() *itype  { return &itype{cat: stringT, name: "string", untyped: true} }
+func untypedRune() *itype    { return &itype{cat: int32T, name: "int32", untyped: true} }
+func untypedInt() *itype     { return &itype{cat: intT, name: "int", untyped: true} }
+func untypedFloat() *itype   { return &itype{cat: float64T, name: "float64", untyped: true} }
+func untypedComplex() *itype { return &itype{cat: complex128T, name: "complex128", untyped: true} }
 
 // nodeType returns a type definition for the corresponding AST subtree.
 func nodeType(interp *Interpreter, sc *scope, n *node) (*itype, error) {
@@ -221,24 +219,24 @@ func nodeType(interp *Interpreter, sc *scope, n *node) (*itype, error) {
 		switch v := n.rval.Interface().(type) {
 		case bool:
 			n.rval = reflect.ValueOf(constant.MakeBool(v))
-			t = untypedBool
+			t = untypedBool()
 		case rune:
 			// It is impossible to work out rune const literals in AST
 			// with the correct type so we must make the const type here.
 			n.rval = reflect.ValueOf(constant.MakeInt64(int64(v)))
-			t = untypedRune
+			t = untypedRune()
 		case constant.Value:
 			switch v.Kind() {
 			case constant.Bool:
-				t = untypedBool
+				t = untypedBool()
 			case constant.String:
-				t = untypedString
+				t = untypedString()
 			case constant.Int:
-				t = untypedInt
+				t = untypedInt()
 			case constant.Float:
-				t = untypedFloat
+				t = untypedFloat()
 			case constant.Complex:
-				t = untypedComplex
+				t = untypedComplex()
 			default:
 				err = n.cfgErrorf("missing support for type %v", n.rval)
 			}
@@ -299,7 +297,7 @@ func nodeType(interp *Interpreter, sc *scope, n *node) (*itype, error) {
 					case isFloat64(t0) && isFloat64(t1):
 						t = sc.getType("complex128")
 					case nt0.untyped && isNumber(t0) && nt1.untyped && isNumber(t1):
-						t = untypedComplex
+						t = untypedComplex()
 					case nt0.untyped && isFloat32(t1) || nt1.untyped && isFloat32(t0):
 						t = sc.getType("complex64")
 					case nt0.untyped && isFloat64(t1) || nt1.untyped && isFloat64(t0):
@@ -1302,6 +1300,7 @@ func exportName(s string) string {
 }
 
 var (
+	// TODO(mpl): generators.
 	interf   = reflect.TypeOf((*interface{})(nil)).Elem()
 	constVal = reflect.TypeOf((*constant.Value)(nil)).Elem()
 )