Skip to content

Commit

Permalink
Merge pull request #260 from symflower/testing-docker-runtime
Browse files Browse the repository at this point in the history
Testing docker runtime
  • Loading branch information
ruiAzevedo19 authored Jul 16, 2024
2 parents 2c9aa0c + 638d20d commit 25e8d81
Show file tree
Hide file tree
Showing 7 changed files with 432 additions and 52 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
java-version: '11'

- name: Set up Maven
uses: stCarolas/setup-maven@v4.5
uses: stCarolas/setup-maven@v5
with:
maven-version: '3.9.1'

Expand All @@ -36,6 +36,11 @@ jobs:
run: make install
shell: bash # Explicitly use Bash because otherwise failing Windows jobs are not erroring.

- name: Install required tools
run: |
make install-tools
shell: bash # Explicitly use Bash because otherwise failing Windows jobs are not erroring.

- name: Install testing tools
run: |
make install-tools-testing
Expand Down
48 changes: 31 additions & 17 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,40 +1,54 @@
FROM ubuntu:noble
RUN apt-get update && apt-get install -y ca-certificates wget unzip git make && update-ca-certificates
# Builder image.
FROM golang:latest as builder

WORKDIR /home/ubuntu/eval-dev-quality
WORKDIR /app
COPY ./ ./
RUN chown -R ubuntu:ubuntu ./

# Build the binary.
RUN go mod tidy
RUN CGO_ENABLED=0 go build -o eval-dev-quality ./cmd/eval-dev-quality

# Actual running image.
FROM ubuntu:noble
RUN apt-get update && apt-get install -y ca-certificates wget unzip git make && update-ca-certificates

# Switch to the ubuntu user as we want it to run as non-root.
USER ubuntu
RUN mkdir -p ~/.eval-dev-quality
WORKDIR /app
COPY --chown=ubuntu:ubuntu ./testdata ./testdata
COPY --chown=ubuntu:ubuntu ./Makefile ./Makefile
RUN mkdir -p .eval-dev-quality
RUN mkdir -p /app/evaluation

# Install Maven
RUN wget https://archive.apache.org/dist/maven/maven-3/3.9.1/binaries/apache-maven-3.9.1-bin.tar.gz && \
tar -xf apache-maven-3.9.1-bin.tar.gz -C ~/.eval-dev-quality/ && \
tar -xf apache-maven-3.9.1-bin.tar.gz -C /app/.eval-dev-quality/ && \
rm apache-maven-3.9.1-bin.tar.gz
ENV PATH="${PATH}:/home/ubuntu/.eval-dev-quality/apache-maven-3.9.1/bin"
ENV PATH="${PATH}:/app/.eval-dev-quality/apache-maven-3.9.1/bin"

# Install Gradle
RUN wget https://services.gradle.org/distributions/gradle-8.0.2-bin.zip && \
unzip gradle-8.0.2-bin.zip -d ~/.eval-dev-quality/ && \
unzip gradle-8.0.2-bin.zip -d /app/.eval-dev-quality/ && \
rm gradle-8.0.2-bin.zip
ENV PATH="${PATH}:/home/ubuntu/.eval-dev-quality/gradle-8.0.2/bin"
ENV PATH="${PATH}:/app/.eval-dev-quality/gradle-8.0.2/bin"

# Install Java
RUN wget https://corretto.aws/downloads/latest/amazon-corretto-11-x64-linux-jdk.tar.gz && \
tar -xf amazon-corretto-11-x64-linux-jdk.tar.gz -C ~/.eval-dev-quality/ && \
tar -xf amazon-corretto-11-x64-linux-jdk.tar.gz -C /app/.eval-dev-quality/ && \
rm amazon-corretto-11-x64-linux-jdk.tar.gz
ENV JAVA_HOME="/home/ubuntu/.eval-dev-quality/amazon-corretto-11.0.23.9.1-linux-x64"
ENV JAVA_HOME="/app/.eval-dev-quality/amazon-corretto-11.0.23.9.1-linux-x64"
ENV PATH="${PATH}:${JAVA_HOME}/bin"

# Install Go
RUN wget https://go.dev/dl/go1.21.5.linux-amd64.tar.gz && \
tar -xf go1.21.5.linux-amd64.tar.gz -C ~/.eval-dev-quality/ && \
tar -xf go1.21.5.linux-amd64.tar.gz -C /app/.eval-dev-quality/ && \
rm go1.21.5.linux-amd64.tar.gz
ENV PATH="${PATH}:/home/ubuntu/.eval-dev-quality/go/bin"
ENV PATH="${PATH}:/app/.eval-dev-quality/go/bin"
ENV GOROOT="/app/.eval-dev-quality/go"
ENV PATH="${PATH}:/home/ubuntu/go/bin"

# Setup the evaluation

RUN make install-all
ENV PATH="${PATH}:/home/ubuntu/.eval-dev-quality/bin"
# Install the binary
COPY --from=builder --chown=ubuntu:ubuntu /app/eval-dev-quality /app/.eval-dev-quality/bin/
ENV PATH="${PATH}:/app/.eval-dev-quality/bin"
RUN make install-tools-testing
RUN make install-tools /app/.eval-dev-quality/bin
9 changes: 6 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
export ROOT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))

export PACKAGE_BASE := github.com/symflower/eval-dev-quality
export UNIT_TEST_TIMEOUT := 480
export UNIT_TEST_TIMEOUT := 720

ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
$(eval $(ARGS):;@:) # turn arguments into do-nothing targets
Expand Down Expand Up @@ -47,13 +47,16 @@ install: # [<Go package] - # Build and install everything, or only the specified
go install -v -ldflags="$(GO_LDFLAGS)" $(PACKAGE)
.PHONY: install

install-all: install install-tools-testing # Install everything for and of this repository.
install-all: install install-tools install-tools-testing # Install everything for and of this repository.
.PHONY: install-all

install-tools: # Install tools that are required for running the evaluation.
eval-dev-quality install-tools $(if $(ARGS), --install-tools-path $(word 1,$(ARGS)))
.PHONY: install-tools

install-tools-testing: # Install tools that are used for testing.
go install -v github.com/vektra/mockery/v2@v2.40.3
go install -v gotest.tools/gotestsum@v1.11.0
eval-dev-quality install-tools
.PHONY: install-tools-testing

generate: # Run code generation.
Expand Down
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,16 @@ Total coverage 100.000000%

The execution by default also creates a report file `REPORT.md` that contains additional evaluation results and links to individual result files.

# Docker
# Containerized use

## Setup
## Notes

The following parameters do have a special behavior when using a containerized runtime.
- `--testdata`: The check if the path exists is ignored on the host system but still enforced inside the container because the paths of the host and inside the container might differ.

## Docker

### Setup

Ensure that docker is installed on the system.

Expand All @@ -163,7 +170,7 @@ docker run -v ./:/home/ubuntu/evaluation --user $(id -u):$(id -g) eval-dev-quali
docker run -v ./:/home/ubuntu/evaluation --user $(id -u):$(id -g) ghcr.io/symflower/eval-dev-quality:latest eval-dev-quality evaluate --model symflower/symbolic-execution --result-path /home/ubuntu/evaluation/%datetime%
```

# Kubernetes
## Kubernetes

Please check the [Kubernetes](./docs/kubernetes/README.md) documentation.

Expand Down
10 changes: 10 additions & 0 deletions cmd/eval-dev-quality/cmd/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ func Execute(logger *log.Logger, arguments []string) {
c.SetLogger(logger)
}

if c, ok := command.(SetArguments); ok {
c.SetArguments(arguments)
}

return command.Execute(args)
}

Expand All @@ -53,3 +57,9 @@ type SetLogger interface {
// SetLogger sets the logger of the command.
SetLogger(logger *log.Logger)
}

// SetArguments defines a command that allows to set its arguments.
type SetArguments interface {
// SetArguments sets the commands arguments.
SetArguments(args []string)
}
134 changes: 106 additions & 28 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cmd
import (
"bytes"
"context"
"errors"
"fmt"
"os"
"os/exec"
Expand Down Expand Up @@ -82,6 +83,8 @@ type Evaluate struct {
// Namespace the namespace under which the kubernetes resources should be created.
Namespace string `long:"namespace" description:"The Namespace which should be used for kubernetes resources." default:"eval-dev-quality"`

// args holds a list of all the passed arguments.
args []string
// logger holds the logger of the command.
logger *log.Logger
// timestamp holds the timestamp of the command execution.
Expand All @@ -95,6 +98,14 @@ func (command *Evaluate) SetLogger(logger *log.Logger) {
command.logger = logger
}

var _ SetArguments = (*Evaluate)(nil)

// SetArguments sets the commands arguments.
func (command *Evaluate) SetArguments(args []string) {
availableFlags := util.Flags(command)
command.args = util.FilterArgsKeep(args, availableFlags)
}

// Initialize initializes the command according to the arguments.
func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.Context) {
evaluationContext = &evaluate.Context{}
Expand Down Expand Up @@ -163,15 +174,17 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.

// Ensure the "testdata" path exists and make it absolute.
{
if err := osutil.DirExists(command.TestdataPath); err != nil {
command.logger.Panicf("ERROR: testdata path %q cannot be accessed: %s", command.TestdataPath, err)
}
testdataPath, err := filepath.Abs(command.TestdataPath)
if err != nil {
command.logger.Panicf("ERROR: could not resolve testdata path %q to an absolute path: %s", command.TestdataPath, err)
if command.Runtime == "local" { // Ignore testdata path during containerized execution.
if err := osutil.DirExists(command.TestdataPath); err != nil {
command.logger.Panicf("ERROR: testdata path %q cannot be accessed: %s", command.TestdataPath, err)
}
testdataPath, err := filepath.Abs(command.TestdataPath)
if err != nil {
command.logger.Panicf("ERROR: could not resolve testdata path %q to an absolute path: %s", command.TestdataPath, err)
}
command.TestdataPath = testdataPath
evaluationContext.TestdataPath = testdataPath
}
command.TestdataPath = testdataPath
evaluationContext.TestdataPath = testdataPath
}

// Setup evaluation result directory.
Expand Down Expand Up @@ -443,21 +456,52 @@ func (command *Evaluate) evaluateLocal(evaluationContext *evaluate.Context) (err

// evaluateDocker executes the evaluation for each model inside a docker container.
func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) {
availableFlags := util.Flags(command)
ignoredFlags := []string{
"model",
"parallel",
"result-path",
"runtime-image",
"runtime",
}

// Filter all the args to only contain flags which can be used.
args := util.FilterArgsKeep(os.Args[2:], availableFlags)
// Filter the args to remove all flags unsuited for running the container.
args = util.FilterArgsRemove(args, ignoredFlags)
args := util.FilterArgsRemove(command.args, ignoredFlags)

parallel := util.NewParallel(command.Parallel)

volumeName := "evaluation-volume"

// Create data volume.
{
// Create the volume where all the data of the evaluations is stored.
output, err := util.CommandWithResult(context.Background(), command.logger, &util.Command{
Command: []string{
"docker",
"volume",
"create",
volumeName,
},
})
if err != nil {
return pkgerrors.WithMessage(pkgerrors.WithStack(err), output)
}

// Cleanup volume.
defer func() {
output, deferErr := util.CommandWithResult(context.Background(), command.logger, &util.Command{
Command: []string{
"docker",
"volume",
"rm",
volumeName,
},
})
if deferErr != nil {
err = errors.Join(err, pkgerrors.WithMessage(pkgerrors.WithStack(deferErr), output))
}
}()
}

// Iterate over each model and start the container.
for _, model := range ctx.Models {
// We are skipping ollama models until we fully support pulling. https://github.com/symflower/eval-dev-quality/issues/100.
Expand All @@ -467,23 +511,12 @@ func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) {
continue
}

// Create for each model a dedicated subfolder inside the results path.
resultPath, err := filepath.Abs(command.ResultPath)
if err != nil {
return err
}
// Set permission 777 so the non-root docker image is able to store its results inside the result path.
if err := os.Chmod(resultPath, 0777); err != nil {
return err
}

// Commands regarding the docker runtime.
dockerCommand := []string{
"docker",
"run",
"-e", "PROVIDER_TOKEN",
"-v", // bind volume
resultPath + ":/home/ubuntu/evaluation",
"-v", volumeName + ":/app/evaluation",
"--rm", // automatically remove container after it finished
command.RuntimeImage,
}
Expand All @@ -492,10 +525,8 @@ func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) {
evaluationCommand := []string{
"eval-dev-quality",
"evaluate",
"--model",
model.ID(),
"--result-path",
"/home/ubuntu/evaluation/" + model.ID(),
"--model", model.ID(),
"--result-path", "/app/evaluation/" + model.ID(),
}

cmd := append(dockerCommand, evaluationCommand...)
Expand All @@ -514,6 +545,52 @@ func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) {
}
parallel.Wait()

// Copy data from volume back to host.
{
// Run a container mounting the volume.
output, err := util.CommandWithResult(context.Background(), command.logger, &util.Command{
Command: []string{
"docker",
"run",
"-d",
"--name", "volume-fetch",
"-v", volumeName + ":/data",
"busybox",
"true",
},
})
if err != nil {
return pkgerrors.WithMessage(pkgerrors.WithStack(err), output)
}

// Cleanup volume mount container.
defer func() {
output, deferErr := util.CommandWithResult(context.Background(), command.logger, &util.Command{
Command: []string{
"docker",
"rm",
"volume-fetch",
},
})
if deferErr != nil {
err = errors.Join(err, pkgerrors.WithMessage(pkgerrors.WithStack(deferErr), output))
}
}()

// Copy data from volume to filesystem.
output, err = util.CommandWithResult(context.Background(), command.logger, &util.Command{
Command: []string{
"docker",
"cp",
"volume-fetch:/data/.",
command.ResultPath,
},
})
if err != nil {
return pkgerrors.WithMessage(pkgerrors.WithStack(err), output)
}
}

return nil
}

Expand All @@ -529,6 +606,7 @@ func (command *Evaluate) evaluateKubernetes(ctx *evaluate.Context) (err error) {
"model",
"parallel",
"result-path",
"runtime-image",
"runtime",
}

Expand Down
Loading

0 comments on commit 25e8d81

Please sign in to comment.