diff --git a/README.md b/README.md index 5208aac..131712e 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ You can override it with the `-i` flag. 1. Construct the `docker run` CLI: * Mount to `/model` from the model file directly in either the `docker model` or the HuggingFace cache directory for the specific model. * Mount to `/dataset`, if any provided, from the dataset file directly in either the `docker model` or the HuggingFace cache directory for the specific dataset. + * Mount any other required caching directories, e.g. for `.bundle` files. * If the ET devices are available, mount them in as devices. * Select the image based on the default for the runtime or the provided override. * Select the command based on the default for the runtime or the provided override. diff --git a/cmd/nekko/run.go b/cmd/nekko/run.go index 9e8ec87..a8bfe60 100644 --- a/cmd/nekko/run.go +++ b/cmd/nekko/run.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "os/exec" + "path/filepath" "strings" "github.com/nekkoai/cli/pkg/runtime" @@ -17,8 +18,10 @@ import ( ) const ( - mgmtDevice = "/dev/et0_mgmt" - opsDevice = "/dev/et0_ops" + mgmtDevice = "/dev/et0_mgmt" + opsDevice = "/dev/et0_ops" + defaultCacheSubdir = ".cache/nekko/cache" + defaultBundlesSubdir = ".cache/nekko/bundles" ) var ( @@ -26,6 +29,12 @@ var ( ) func runCommand(conf *config, logger *log.Logger, v *viper.Viper) (*cobra.Command, error) { + homeDir, err := os.UserHomeDir() + if err != nil { + return nil, err + } + defaultCacheDir := filepath.Join(homeDir, defaultCacheSubdir) + defaultBundlesDir := filepath.Join(homeDir, defaultBundlesSubdir) cmd := &cobra.Command{ Use: "run", Short: "Run a model", @@ -45,7 +54,7 @@ func runCommand(conf *config, logger *log.Logger, v *viper.Viper) (*cobra.Comman image = runtimeImpl.Image() } - var modelMount, datasetMount string + var modelMount, datasetMount, cacheMount, bundlesMount string model := v.GetString("model") if model == "" { return fmt.Errorf("model must be specified") @@ -62,7 +71,8 @@ func runCommand(conf *config, logger *log.Logger, v *viper.Viper) (*cobra.Comman if err != nil { return err } - modelMount = fmt.Sprintf("-v %s:/model", modelPath) + modelMountTarget := runtimeImpl.ModelMount() + modelMount = fmt.Sprintf("-v %s:%s", modelPath, modelMountTarget) modelFiles, err := modelRepo.Files() if err != nil { return err @@ -83,7 +93,18 @@ func runCommand(conf *config, logger *log.Logger, v *viper.Viper) (*cobra.Comman if err != nil { return err } - datasetMount = fmt.Sprintf("-v %s:/dataset", datasetPath) + datasetMountTarget := runtimeImpl.DatasetMount() + datasetMount = fmt.Sprintf("-v %s:%s", datasetPath, datasetMountTarget) + } + cacheDir := v.GetString("cache") + cacheMountTarget := runtimeImpl.CacheMount() + if cacheDir != "" && cacheMountTarget != "" { + cacheMount = fmt.Sprintf("-v %s:%s", cacheDir, cacheMountTarget) + } + bundlesDir := v.GetString("bundles") + bundlesMountTarget := runtimeImpl.BundlesMount() + if bundlesDir != "" && bundlesMountTarget != "" { + bundlesMount = fmt.Sprintf("-v %s:%s", bundlesDir, bundlesMountTarget) } // determine the commands @@ -112,23 +133,35 @@ func runCommand(conf *config, logger *log.Logger, v *viper.Viper) (*cobra.Comman if datasetMount != "" { dockerArgs = append(dockerArgs, datasetMount) } + if cacheMount != "" { + dockerArgs = append(dockerArgs, cacheMount) + } + if bundlesMount != "" { + dockerArgs = append(dockerArgs, bundlesMount) + } dockerArgs = append(dockerArgs, image) if command != "" { dockerArgs = append(dockerArgs, command) } // Execute the docker create command - cmdExec := "docker" + dryRun := v.GetBool("dry-run") + var ( + cmdExec = "docker" + cid []byte + ) logger.Infof("Executing: %s %s", cmdExec, strings.Join(dockerArgs, " ")) - execCmd := exec.Command(cmdExec, dockerArgs...) - stdout := bytes.NewBuffer(nil) - execCmd.Stdout = stdout - execCmd.Stderr = cmd.OutOrStderr() - execCmd.Stdin = cmd.InOrStdin() - if err := execCmd.Run(); err != nil { - return fmt.Errorf("failed to run docker: %w", err) + if !dryRun { + execCmd := exec.Command(cmdExec, dockerArgs...) + stdout := bytes.NewBuffer(nil) + execCmd.Stdout = stdout + execCmd.Stderr = cmd.OutOrStderr() + execCmd.Stdin = cmd.InOrStdin() + if err := execCmd.Run(); err != nil { + return fmt.Errorf("failed to run docker: %w", err) + } + cid = stdout.Bytes() } - cid := stdout.Bytes() // copy anything in it that needs to be content, err := runtimeImpl.Content() @@ -136,6 +169,10 @@ func runCommand(conf *config, logger *log.Logger, v *viper.Viper) (*cobra.Comman return fmt.Errorf("failed to get content: %w", err) } for target, content := range content { + if dryRun { + logger.Infof("Copying contents to container: %s", target) + continue + } cpExec := exec.Command("docker", "cp", "-", fmt.Sprintf("%s:%s", cid, target)) cpExec.Stdin = strings.NewReader(string(content)) if err := cpExec.Run(); err != nil { @@ -144,6 +181,10 @@ func runCommand(conf *config, logger *log.Logger, v *viper.Viper) (*cobra.Comman } // and then start the container + if dryRun { + logger.Infof("Executing: docker start -ai ") + return nil + } runExec := exec.Command("docker", "start", "-ai", string(cid)) runExec.Stdout = cmd.OutOrStdout() runExec.Stderr = cmd.OutOrStderr() @@ -156,8 +197,11 @@ func runCommand(conf *config, logger *log.Logger, v *viper.Viper) (*cobra.Comman flags.StringP("model", "m", "", "model reference to run") flags.StringP("dataset", "d", "", "dataset to use") flags.StringP("runtime", "r", runtimes[0], fmt.Sprintf("runtime to use, one of: %s", strings.Join(runtimes, " "))) + flags.String("cache", defaultCacheDir, "Cache directory; usage depends on runtime") + flags.String("bundles", defaultBundlesDir, "Bundles directory; usage depends on runtime") flags.StringP("image", "i", "ghcr.io/nekkoai/onnx-eis", "Image to run the model, determined by runtime option, or can be set explicitly") flags.StringP("command", "c", "", "Command to run inside the container (default determined by runtime)") + flags.Bool("dry-run", false, "If true, only print the docker command that would be executed") if err := cmd.MarkFlagRequired("model"); err != nil { return nil, err diff --git a/pkg/runtime/onnxeis/onnxeis.go b/pkg/runtime/onnxeis/onnxeis.go index 9cb7eeb..ec5585f 100644 --- a/pkg/runtime/onnxeis/onnxeis.go +++ b/pkg/runtime/onnxeis/onnxeis.go @@ -30,3 +30,18 @@ func (o *ONNXEIS) Command(modelPath, modelRoot string) string { func (o *ONNXEIS) Content() (map[string][]byte, error) { return map[string][]byte{}, nil } + +func (o *ONNXEIS) ModelMount() string { + return "/model" +} +func (o *ONNXEIS) DatasetMount() string { + return "/dataset" +} + +func (o *ONNXEIS) BundlesMount() string { + return "/bundles" +} + +func (o *ONNXEIS) CacheMount() string { + return "/cache" +} diff --git a/pkg/runtime/onnxruntime/command.txt b/pkg/runtime/onnxruntime/command.txt index eb2de77..661b5bb 100644 --- a/pkg/runtime/onnxruntime/command.txt +++ b/pkg/runtime/onnxruntime/command.txt @@ -1,4 +1,5 @@ python /llm-kvc.py --model=/model/%MODEL_SNAPSHOT%/model.onnx --tokenizer=/model/%MODEL_SNAPSHOT% + --bundle-dir=/bundles -g 100 --etglow-implementation=llm_kvc_inference 2>&1 | tee llm-kvc.out \ No newline at end of file diff --git a/pkg/runtime/onnxruntime/onnxruntime.go b/pkg/runtime/onnxruntime/onnxruntime.go index 1e86978..3e54337 100644 --- a/pkg/runtime/onnxruntime/onnxruntime.go +++ b/pkg/runtime/onnxruntime/onnxruntime.go @@ -56,3 +56,18 @@ func (o *onnxRuntime) Content() (map[string][]byte, error) { } return m, nil } + +func (o *onnxRuntime) ModelMount() string { + return "/model" +} +func (o *onnxRuntime) DatasetMount() string { + return "/dataset" +} + +func (o *onnxRuntime) BundlesMount() string { + return "/bundles" +} + +func (o *onnxRuntime) CacheMount() string { + return "/cache" +} diff --git a/pkg/runtime/spec/runtime.go b/pkg/runtime/spec/runtime.go index 0e0458c..c17fd55 100644 --- a/pkg/runtime/spec/runtime.go +++ b/pkg/runtime/spec/runtime.go @@ -5,5 +5,15 @@ type Runtime interface { // Command return the command that should be executed. modelRoot parameter is relative to the container where it is run // modelPath parameter is relative to modelRoot Command(modelRoot, modelPath string) string + // Content returns content that should be injected into the container before starting. + // This is mainly used for llm-kvc.py for onnxruntime, and likely will go away in the future. Content() (map[string][]byte, error) + // ModelMount returns the mount path for the model inside the container + ModelMount() string + // DatasetMount returns the mount path for the dataset inside the container + DatasetMount() string + // BundlesMount returns the mount path for the bundles inside the container + BundlesMount() string + // CacheMount returns the mount path for the cache inside the container + CacheMount() string }