Merge pull request #260 from symflower/testing-docker-runtime

Testing docker runtime
symflower · Jul 16, 2024 · 25e8d81 · 25e8d81
2 parents 2c9aa0c + 638d20d
commit 25e8d81
Show file tree

Hide file tree

Showing 7 changed files with 432 additions and 52 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -22,7 +22,7 @@ jobs:
           java-version: '11'
 
       - name: Set up Maven
-        uses: stCarolas/setup-maven@v4.5
+        uses: stCarolas/setup-maven@v5
         with:
           maven-version: '3.9.1'
 
@@ -36,6 +36,11 @@ jobs:
         run: make install
         shell: bash # Explicitly use Bash because otherwise failing Windows jobs are not erroring.
 
+      - name: Install required tools
+        run: |
+          make install-tools
+        shell: bash # Explicitly use Bash because otherwise failing Windows jobs are not erroring.
+
       - name: Install testing tools
         run: |
           make install-tools-testing

diff --git a/Dockerfile b/Dockerfile
@@ -1,40 +1,54 @@
-FROM ubuntu:noble
-RUN apt-get update && apt-get install -y ca-certificates wget unzip git make && update-ca-certificates
+# Builder image.
+FROM golang:latest as builder
 
-WORKDIR /home/ubuntu/eval-dev-quality
+WORKDIR /app
 COPY ./ ./
-RUN chown -R ubuntu:ubuntu ./
 
+# Build the binary.
+RUN go mod tidy
+RUN CGO_ENABLED=0 go build -o eval-dev-quality ./cmd/eval-dev-quality
+
+# Actual running image.
+FROM ubuntu:noble
+RUN apt-get update && apt-get install -y ca-certificates wget unzip git make && update-ca-certificates
+
+# Switch to the ubuntu user as we want it to run as non-root.
 USER ubuntu
-RUN mkdir -p ~/.eval-dev-quality
+WORKDIR /app
+COPY --chown=ubuntu:ubuntu ./testdata ./testdata
+COPY --chown=ubuntu:ubuntu ./Makefile ./Makefile
+RUN mkdir -p .eval-dev-quality
+RUN mkdir -p /app/evaluation
 
 # Install Maven
 RUN wget https://archive.apache.org/dist/maven/maven-3/3.9.1/binaries/apache-maven-3.9.1-bin.tar.gz && \
-	tar -xf apache-maven-3.9.1-bin.tar.gz -C ~/.eval-dev-quality/ && \
+	tar -xf apache-maven-3.9.1-bin.tar.gz -C /app/.eval-dev-quality/ && \
 	rm apache-maven-3.9.1-bin.tar.gz
-ENV PATH="${PATH}:/home/ubuntu/.eval-dev-quality/apache-maven-3.9.1/bin"
+ENV PATH="${PATH}:/app/.eval-dev-quality/apache-maven-3.9.1/bin"
 
 # Install Gradle
 RUN wget https://services.gradle.org/distributions/gradle-8.0.2-bin.zip && \
-	unzip gradle-8.0.2-bin.zip -d ~/.eval-dev-quality/ && \
+	unzip gradle-8.0.2-bin.zip -d /app/.eval-dev-quality/ && \
 	rm gradle-8.0.2-bin.zip
-ENV PATH="${PATH}:/home/ubuntu/.eval-dev-quality/gradle-8.0.2/bin"
+ENV PATH="${PATH}:/app/.eval-dev-quality/gradle-8.0.2/bin"
 
 # Install Java
 RUN wget https://corretto.aws/downloads/latest/amazon-corretto-11-x64-linux-jdk.tar.gz && \
-	tar -xf amazon-corretto-11-x64-linux-jdk.tar.gz -C ~/.eval-dev-quality/ && \
+	tar -xf amazon-corretto-11-x64-linux-jdk.tar.gz -C /app/.eval-dev-quality/ && \
 	rm amazon-corretto-11-x64-linux-jdk.tar.gz
-ENV JAVA_HOME="/home/ubuntu/.eval-dev-quality/amazon-corretto-11.0.23.9.1-linux-x64"
+ENV JAVA_HOME="/app/.eval-dev-quality/amazon-corretto-11.0.23.9.1-linux-x64"
 ENV PATH="${PATH}:${JAVA_HOME}/bin"
 
 # Install Go
 RUN wget https://go.dev/dl/go1.21.5.linux-amd64.tar.gz && \
-	tar -xf go1.21.5.linux-amd64.tar.gz -C ~/.eval-dev-quality/ && \
+	tar -xf go1.21.5.linux-amd64.tar.gz -C /app/.eval-dev-quality/ && \
 	rm go1.21.5.linux-amd64.tar.gz
-ENV PATH="${PATH}:/home/ubuntu/.eval-dev-quality/go/bin"
+ENV PATH="${PATH}:/app/.eval-dev-quality/go/bin"
+ENV GOROOT="/app/.eval-dev-quality/go"
 ENV PATH="${PATH}:/home/ubuntu/go/bin"
 
-# Setup the evaluation
-
-RUN make install-all
-ENV PATH="${PATH}:/home/ubuntu/.eval-dev-quality/bin"
+# Install the binary
+COPY --from=builder --chown=ubuntu:ubuntu /app/eval-dev-quality /app/.eval-dev-quality/bin/
+ENV PATH="${PATH}:/app/.eval-dev-quality/bin"
+RUN make install-tools-testing
+RUN make install-tools /app/.eval-dev-quality/bin
diff --git a/Makefile b/Makefile
@@ -1,7 +1,7 @@
 export ROOT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
 
 export PACKAGE_BASE := github.com/symflower/eval-dev-quality
-export UNIT_TEST_TIMEOUT := 480
+export UNIT_TEST_TIMEOUT := 720
 
 ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
 $(eval $(ARGS):;@:) # turn arguments into do-nothing targets
@@ -47,13 +47,16 @@ install: # [<Go package] - # Build and install everything, or only the specified
 	go install -v -ldflags="$(GO_LDFLAGS)" $(PACKAGE)
 .PHONY: install
 
-install-all: install install-tools-testing # Install everything for and of this repository.
+install-all: install install-tools install-tools-testing # Install everything for and of this repository.
 .PHONY: install-all
 
+install-tools: # Install tools that are required for running the evaluation.
+	eval-dev-quality install-tools $(if $(ARGS), --install-tools-path $(word 1,$(ARGS)))
+.PHONY: install-tools
+
 install-tools-testing: # Install tools that are used for testing.
 	go install -v github.com/vektra/mockery/v2@v2.40.3
 	go install -v gotest.tools/gotestsum@v1.11.0
-	eval-dev-quality install-tools
 .PHONY: install-tools-testing
 
 generate: # Run code generation.

diff --git a/README.md b/README.md
@@ -135,9 +135,16 @@ Total coverage 100.000000%
 
 The execution by default also creates a report file `REPORT.md` that contains additional evaluation results and links to individual result files.
 
-# Docker
+# Containerized use
 
-## Setup
+## Notes
+
+The following parameters do have a special behavior when using a containerized runtime.
+- `--testdata`: The check if the path exists is ignored on the host system but still enforced inside the container because the paths of the host and inside the container might differ.
+
+## Docker
+
+### Setup
 
 Ensure that docker is installed on the system.
 
@@ -163,7 +170,7 @@ docker run -v ./:/home/ubuntu/evaluation --user $(id -u):$(id -g) eval-dev-quali
 docker run -v ./:/home/ubuntu/evaluation --user $(id -u):$(id -g) ghcr.io/symflower/eval-dev-quality:latest eval-dev-quality evaluate --model symflower/symbolic-execution --result-path /home/ubuntu/evaluation/%datetime%
 ```
 
-# Kubernetes
+## Kubernetes
 
 Please check the [Kubernetes](./docs/kubernetes/README.md) documentation.
 

diff --git a/cmd/eval-dev-quality/cmd/command.go b/cmd/eval-dev-quality/cmd/command.go
@@ -32,6 +32,10 @@ func Execute(logger *log.Logger, arguments []string) {
 			c.SetLogger(logger)
 		}
 
+		if c, ok := command.(SetArguments); ok {
+			c.SetArguments(arguments)
+		}
+
 		return command.Execute(args)
 	}
 
@@ -53,3 +57,9 @@ type SetLogger interface {
 	// SetLogger sets the logger of the command.
 	SetLogger(logger *log.Logger)
 }
+
+// SetArguments defines a command that allows to set its arguments.
+type SetArguments interface {
+	// SetArguments sets the commands arguments.
+	SetArguments(args []string)
+}
diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go
@@ -3,6 +3,7 @@ package cmd
 import (
 	"bytes"
 	"context"
+	"errors"
 	"fmt"
 	"os"
 	"os/exec"
@@ -82,6 +83,8 @@ type Evaluate struct {
 	// Namespace the namespace under which the kubernetes resources should be created.
 	Namespace string `long:"namespace" description:"The Namespace which should be used for kubernetes resources." default:"eval-dev-quality"`
 
+	// args holds a list of all the passed arguments.
+	args []string
 	// logger holds the logger of the command.
 	logger *log.Logger
 	// timestamp holds the timestamp of the command execution.
@@ -95,6 +98,14 @@ func (command *Evaluate) SetLogger(logger *log.Logger) {
 	command.logger = logger
 }
 
+var _ SetArguments = (*Evaluate)(nil)
+
+// SetArguments sets the commands arguments.
+func (command *Evaluate) SetArguments(args []string) {
+	availableFlags := util.Flags(command)
+	command.args = util.FilterArgsKeep(args, availableFlags)
+}
+
 // Initialize initializes the command according to the arguments.
 func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.Context) {
 	evaluationContext = &evaluate.Context{}
@@ -163,15 +174,17 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
 
 	// Ensure the "testdata" path exists and make it absolute.
 	{
-		if err := osutil.DirExists(command.TestdataPath); err != nil {
-			command.logger.Panicf("ERROR: testdata path %q cannot be accessed: %s", command.TestdataPath, err)
-		}
-		testdataPath, err := filepath.Abs(command.TestdataPath)
-		if err != nil {
-			command.logger.Panicf("ERROR: could not resolve testdata path %q to an absolute path: %s", command.TestdataPath, err)
+		if command.Runtime == "local" { // Ignore testdata path during containerized execution.
+			if err := osutil.DirExists(command.TestdataPath); err != nil {
+				command.logger.Panicf("ERROR: testdata path %q cannot be accessed: %s", command.TestdataPath, err)
+			}
+			testdataPath, err := filepath.Abs(command.TestdataPath)
+			if err != nil {
+				command.logger.Panicf("ERROR: could not resolve testdata path %q to an absolute path: %s", command.TestdataPath, err)
+			}
+			command.TestdataPath = testdataPath
+			evaluationContext.TestdataPath = testdataPath
 		}
-		command.TestdataPath = testdataPath
-		evaluationContext.TestdataPath = testdataPath
 	}
 
 	// Setup evaluation result directory.
@@ -443,21 +456,52 @@ func (command *Evaluate) evaluateLocal(evaluationContext *evaluate.Context) (err
 
 // evaluateDocker executes the evaluation for each model inside a docker container.
 func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) {
-	availableFlags := util.Flags(command)
 	ignoredFlags := []string{
 		"model",
 		"parallel",
 		"result-path",
+		"runtime-image",
 		"runtime",
 	}
 
-	// Filter all the args to only contain flags which can be used.
-	args := util.FilterArgsKeep(os.Args[2:], availableFlags)
 	// Filter the args to remove all flags unsuited for running the container.
-	args = util.FilterArgsRemove(args, ignoredFlags)
+	args := util.FilterArgsRemove(command.args, ignoredFlags)
 
 	parallel := util.NewParallel(command.Parallel)
 
+	volumeName := "evaluation-volume"
+
+	// Create data volume.
+	{
+		// Create the volume where all the data of the evaluations is stored.
+		output, err := util.CommandWithResult(context.Background(), command.logger, &util.Command{
+			Command: []string{
+				"docker",
+				"volume",
+				"create",
+				volumeName,
+			},
+		})
+		if err != nil {
+			return pkgerrors.WithMessage(pkgerrors.WithStack(err), output)
+		}
+
+		// Cleanup volume.
+		defer func() {
+			output, deferErr := util.CommandWithResult(context.Background(), command.logger, &util.Command{
+				Command: []string{
+					"docker",
+					"volume",
+					"rm",
+					volumeName,
+				},
+			})
+			if deferErr != nil {
+				err = errors.Join(err, pkgerrors.WithMessage(pkgerrors.WithStack(deferErr), output))
+			}
+		}()
+	}
+
 	// Iterate over each model and start the container.
 	for _, model := range ctx.Models {
 		// We are skipping ollama models until we fully support pulling. https://github.com/symflower/eval-dev-quality/issues/100.
@@ -467,23 +511,12 @@ func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) {
 			continue
 		}
 
-		// Create for each model a dedicated subfolder inside the results path.
-		resultPath, err := filepath.Abs(command.ResultPath)
-		if err != nil {
-			return err
-		}
-		// Set permission 777 so the non-root docker image is able to store its results inside the result path.
-		if err := os.Chmod(resultPath, 0777); err != nil {
-			return err
-		}
-
 		// Commands regarding the docker runtime.
 		dockerCommand := []string{
 			"docker",
 			"run",
 			"-e", "PROVIDER_TOKEN",
-			"-v", // bind volume
-			resultPath + ":/home/ubuntu/evaluation",
+			"-v", volumeName + ":/app/evaluation",
 			"--rm", // automatically remove container after it finished
 			command.RuntimeImage,
 		}
@@ -492,10 +525,8 @@ func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) {
 		evaluationCommand := []string{
 			"eval-dev-quality",
 			"evaluate",
-			"--model",
-			model.ID(),
-			"--result-path",
-			"/home/ubuntu/evaluation/" + model.ID(),
+			"--model", model.ID(),
+			"--result-path", "/app/evaluation/" + model.ID(),
 		}
 
 		cmd := append(dockerCommand, evaluationCommand...)
@@ -514,6 +545,52 @@ func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) {
 	}
 	parallel.Wait()
 
+	// Copy data from volume back to host.
+	{
+		// Run a container mounting the volume.
+		output, err := util.CommandWithResult(context.Background(), command.logger, &util.Command{
+			Command: []string{
+				"docker",
+				"run",
+				"-d",
+				"--name", "volume-fetch",
+				"-v", volumeName + ":/data",
+				"busybox",
+				"true",
+			},
+		})
+		if err != nil {
+			return pkgerrors.WithMessage(pkgerrors.WithStack(err), output)
+		}
+
+		// Cleanup volume mount container.
+		defer func() {
+			output, deferErr := util.CommandWithResult(context.Background(), command.logger, &util.Command{
+				Command: []string{
+					"docker",
+					"rm",
+					"volume-fetch",
+				},
+			})
+			if deferErr != nil {
+				err = errors.Join(err, pkgerrors.WithMessage(pkgerrors.WithStack(deferErr), output))
+			}
+		}()
+
+		// Copy data from volume to filesystem.
+		output, err = util.CommandWithResult(context.Background(), command.logger, &util.Command{
+			Command: []string{
+				"docker",
+				"cp",
+				"volume-fetch:/data/.",
+				command.ResultPath,
+			},
+		})
+		if err != nil {
+			return pkgerrors.WithMessage(pkgerrors.WithStack(err), output)
+		}
+	}
+
 	return nil
 }
 
@@ -529,6 +606,7 @@ func (command *Evaluate) evaluateKubernetes(ctx *evaluate.Context) (err error) {
 		"model",
 		"parallel",
 		"result-path",
+		"runtime-image",
 		"runtime",
 	}