From 0af60a5069e6d6b88a296066dc908fdf06390985 Mon Sep 17 00:00:00 2001 From: Sertac Ozercan Date: Sun, 22 Sep 2024 02:58:10 +0000 Subject: [PATCH] feat: auto load into memory on startup Signed-off-by: Sertac Ozercan --- core/backend/embeddings.go | 2 +- core/backend/image.go | 2 +- core/backend/llm.go | 2 +- core/backend/options.go | 2 +- core/backend/rerank.go | 2 +- core/backend/soundgeneration.go | 2 +- core/backend/tts.go | 2 +- core/cli/run.go | 2 + core/config/application_config.go | 7 + core/startup/startup.go | 449 ++++++++++++++++-------------- 10 files changed, 259 insertions(+), 213 deletions(-) diff --git a/core/backend/embeddings.go b/core/backend/embeddings.go index 31b10a1966e0..9f0f8be9d359 100644 --- a/core/backend/embeddings.go +++ b/core/backend/embeddings.go @@ -12,7 +12,7 @@ import ( func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { modelFile := backendConfig.Model - grpcOpts := gRPCModelOpts(backendConfig) + grpcOpts := GRPCModelOpts(backendConfig) var inferenceModel interface{} var err error diff --git a/core/backend/image.go b/core/backend/image.go index 8c3f56b3bafc..5c2a950c6ab5 100644 --- a/core/backend/image.go +++ b/core/backend/image.go @@ -12,7 +12,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat if *threads == 0 && appConfig.Threads != 0 { threads = &appConfig.Threads } - gRPCOpts := gRPCModelOpts(backendConfig) + gRPCOpts := GRPCModelOpts(backendConfig) opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(backendConfig.Backend), model.WithAssetDir(appConfig.AssetsDestination), diff --git a/core/backend/llm.go b/core/backend/llm.go index f74071ba8528..cac9beba7ecd 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -37,7 +37,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im if *threads == 0 && o.Threads != 0 { threads = &o.Threads } - grpcOpts := gRPCModelOpts(c) + grpcOpts := GRPCModelOpts(c) var inferenceModel grpc.Backend var err error diff --git a/core/backend/options.go b/core/backend/options.go index d986b8e67eb2..d431aab6da97 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -44,7 +44,7 @@ func getSeed(c config.BackendConfig) int32 { return seed } -func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { +func GRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { b := 512 if c.Batch != 0 { b = c.Batch diff --git a/core/backend/rerank.go b/core/backend/rerank.go index 1b718be2c213..a7573adec411 100644 --- a/core/backend/rerank.go +++ b/core/backend/rerank.go @@ -15,7 +15,7 @@ func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *mod return nil, fmt.Errorf("backend is required") } - grpcOpts := gRPCModelOpts(backendConfig) + grpcOpts := GRPCModelOpts(backendConfig) opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ model.WithBackendString(bb), diff --git a/core/backend/soundgeneration.go b/core/backend/soundgeneration.go index abd5221bce6a..b6a1c8272a23 100644 --- a/core/backend/soundgeneration.go +++ b/core/backend/soundgeneration.go @@ -29,7 +29,7 @@ func SoundGeneration( return "", nil, fmt.Errorf("backend is a required parameter") } - grpcOpts := gRPCModelOpts(backendConfig) + grpcOpts := GRPCModelOpts(backendConfig) opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ model.WithBackendString(backend), model.WithModel(modelFile), diff --git a/core/backend/tts.go b/core/backend/tts.go index 258882ae4351..2401748c1c3b 100644 --- a/core/backend/tts.go +++ b/core/backend/tts.go @@ -28,7 +28,7 @@ func ModelTTS( bb = model.PiperBackend } - grpcOpts := gRPCModelOpts(backendConfig) + grpcOpts := GRPCModelOpts(backendConfig) opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ model.WithBackendString(bb), diff --git a/core/cli/run.go b/core/cli/run.go index afb7204cdbd2..a67839a0ae43 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -69,6 +69,7 @@ type RunCMD struct { WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"` Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"` DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"` + LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"` } func (r *RunCMD) Run(ctx *cliContext.Context) error { @@ -104,6 +105,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { config.WithDisableApiKeyRequirementForHttpGet(r.DisableApiKeyRequirementForHttpGet), config.WithHttpGetExemptedEndpoints(r.HttpGetExemptedEndpoints), config.WithP2PNetworkID(r.Peer2PeerNetworkID), + config.WithLoadToMemory(r.LoadToMemory), } token := "" diff --git a/core/config/application_config.go b/core/config/application_config.go index afbf325f2719..2af0c7ae16df 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -41,6 +41,7 @@ type ApplicationConfig struct { DisableApiKeyRequirementForHttpGet bool HttpGetExemptedEndpoints []*regexp.Regexp DisableGalleryEndpoint bool + LoadToMemory []string ModelLibraryURL string @@ -331,6 +332,12 @@ func WithOpaqueErrors(opaque bool) AppOption { } } +func WithLoadToMemory(models []string) AppOption { + return func(o *ApplicationConfig) { + o.LoadToMemory = models + } +} + func WithSubtleKeyComparison(subtle bool) AppOption { return func(o *ApplicationConfig) { o.UseSubtleKeyComparison = subtle diff --git a/core/startup/startup.go b/core/startup/startup.go index 3565d196aa5e..b7b9ce8fad44 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -1,206 +1,243 @@ -package startup - -import ( - "fmt" - "os" - - "github.com/mudler/LocalAI/core" - "github.com/mudler/LocalAI/core/config" - "github.com/mudler/LocalAI/core/services" - "github.com/mudler/LocalAI/internal" - "github.com/mudler/LocalAI/pkg/assets" - "github.com/mudler/LocalAI/pkg/library" - "github.com/mudler/LocalAI/pkg/model" - pkgStartup "github.com/mudler/LocalAI/pkg/startup" - "github.com/mudler/LocalAI/pkg/xsysinfo" - "github.com/rs/zerolog/log" -) - -func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { - options := config.NewApplicationConfig(opts...) - - log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath) - log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) - caps, err := xsysinfo.CPUCapabilities() - if err == nil { - log.Debug().Msgf("CPU capabilities: %v", caps) - } - gpus, err := xsysinfo.GPUs() - if err == nil { - log.Debug().Msgf("GPU count: %d", len(gpus)) - for _, gpu := range gpus { - log.Debug().Msgf("GPU: %s", gpu.String()) - } - } - - // Make sure directories exists - if options.ModelPath == "" { - return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty") - } - err = os.MkdirAll(options.ModelPath, 0750) - if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err) - } - if options.ImageDir != "" { - err := os.MkdirAll(options.ImageDir, 0750) - if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err) - } - } - if options.AudioDir != "" { - err := os.MkdirAll(options.AudioDir, 0750) - if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err) - } - } - if options.UploadDir != "" { - err := os.MkdirAll(options.UploadDir, 0750) - if err != nil { - return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err) - } - } - - if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil { - log.Error().Err(err).Msg("error installing models") - } - - cl := config.NewBackendConfigLoader(options.ModelPath) - ml := model.NewModelLoader(options.ModelPath) - - configLoaderOpts := options.ToConfigLoaderOptions() - - if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil { - log.Error().Err(err).Msg("error loading config files") - } - - if options.ConfigFile != "" { - if err := cl.LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil { - log.Error().Err(err).Msg("error loading config file") - } - } - - if err := cl.Preload(options.ModelPath); err != nil { - log.Error().Err(err).Msg("error downloading models") - } - - if options.PreloadJSONModels != "" { - if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil { - return nil, nil, nil, err - } - } - - if options.PreloadModelsFromPath != "" { - if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil { - return nil, nil, nil, err - } - } - - if options.Debug { - for _, v := range cl.GetAllBackendConfigs() { - log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v) - } - } - - if options.AssetsDestination != "" { - // Extract files from the embedded FS - err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination) - log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination) - if err != nil { - log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) - } - } - - if options.LibPath != "" { - // If there is a lib directory, set LD_LIBRARY_PATH to include it - err := library.LoadExternal(options.LibPath) - if err != nil { - log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries") - } - } - - // turn off any process that was started by GRPC if the context is canceled - go func() { - <-options.Context.Done() - log.Debug().Msgf("Context canceled, shutting down") - err := ml.StopAllGRPC() - if err != nil { - log.Error().Err(err).Msg("error while stopping all grpc backends") - } - }() - - if options.WatchDog { - wd := model.NewWatchDog( - ml, - options.WatchDogBusyTimeout, - options.WatchDogIdleTimeout, - options.WatchDogBusy, - options.WatchDogIdle) - ml.SetWatchDog(wd) - go wd.Run() - go func() { - <-options.Context.Done() - log.Debug().Msgf("Context canceled, shutting down") - wd.Shutdown() - }() - } - - // Watch the configuration directory - startWatcher(options) - - log.Info().Msg("core/startup process completed!") - return cl, ml, options, nil -} - -func startWatcher(options *config.ApplicationConfig) { - if options.DynamicConfigsDir == "" { - // No need to start the watcher if the directory is not set - return - } - - if _, err := os.Stat(options.DynamicConfigsDir); err != nil { - if os.IsNotExist(err) { - // We try to create the directory if it does not exist and was specified - if err := os.MkdirAll(options.DynamicConfigsDir, 0700); err != nil { - log.Error().Err(err).Msg("failed creating DynamicConfigsDir") - } - } else { - // something else happened, we log the error and don't start the watcher - log.Error().Err(err).Msg("failed to read DynamicConfigsDir, watcher will not be started") - return - } - } - - configHandler := newConfigFileHandler(options) - if err := configHandler.Watch(); err != nil { - log.Error().Err(err).Msg("failed creating watcher") - } -} - -// In Lieu of a proper DI framework, this function wires up the Application manually. -// This is in core/startup rather than core/state.go to keep package references clean! -func createApplication(appConfig *config.ApplicationConfig) *core.Application { - app := &core.Application{ - ApplicationConfig: appConfig, - BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath), - ModelLoader: model.NewModelLoader(appConfig.ModelPath), - } - - var err error - - // app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - // app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - // app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - // app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - // app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - - app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) - app.GalleryService = services.NewGalleryService(app.ApplicationConfig) - // app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService) - - app.LocalAIMetricsService, err = services.NewLocalAIMetricsService() - if err != nil { - log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.") - } - - return app -} +package startup + +import ( + "fmt" + "os" + + "github.com/mudler/LocalAI/core" + "github.com/mudler/LocalAI/core/backend" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/services" + "github.com/mudler/LocalAI/internal" + "github.com/mudler/LocalAI/pkg/assets" + "github.com/mudler/LocalAI/pkg/library" + "github.com/mudler/LocalAI/pkg/model" + pkgStartup "github.com/mudler/LocalAI/pkg/startup" + "github.com/mudler/LocalAI/pkg/xsysinfo" + "github.com/rs/zerolog/log" +) + +func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { + options := config.NewApplicationConfig(opts...) + + log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath) + log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) + caps, err := xsysinfo.CPUCapabilities() + if err == nil { + log.Debug().Msgf("CPU capabilities: %v", caps) + } + gpus, err := xsysinfo.GPUs() + if err == nil { + log.Debug().Msgf("GPU count: %d", len(gpus)) + for _, gpu := range gpus { + log.Debug().Msgf("GPU: %s", gpu.String()) + } + } + + // Make sure directories exists + if options.ModelPath == "" { + return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty") + } + err = os.MkdirAll(options.ModelPath, 0750) + if err != nil { + return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err) + } + if options.ImageDir != "" { + err := os.MkdirAll(options.ImageDir, 0750) + if err != nil { + return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err) + } + } + if options.AudioDir != "" { + err := os.MkdirAll(options.AudioDir, 0750) + if err != nil { + return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err) + } + } + if options.UploadDir != "" { + err := os.MkdirAll(options.UploadDir, 0750) + if err != nil { + return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err) + } + } + + if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil { + log.Error().Err(err).Msg("error installing models") + } + + cl := config.NewBackendConfigLoader(options.ModelPath) + ml := model.NewModelLoader(options.ModelPath) + + configLoaderOpts := options.ToConfigLoaderOptions() + + if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil { + log.Error().Err(err).Msg("error loading config files") + } + + if options.ConfigFile != "" { + if err := cl.LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil { + log.Error().Err(err).Msg("error loading config file") + } + } + + if err := cl.Preload(options.ModelPath); err != nil { + log.Error().Err(err).Msg("error downloading models") + } + + if options.PreloadJSONModels != "" { + if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil { + return nil, nil, nil, err + } + } + + if options.PreloadModelsFromPath != "" { + if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil { + return nil, nil, nil, err + } + } + + if options.Debug { + for _, v := range cl.GetAllBackendConfigs() { + log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v) + } + } + + if options.AssetsDestination != "" { + // Extract files from the embedded FS + err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination) + log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination) + if err != nil { + log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) + } + } + + if options.LibPath != "" { + // If there is a lib directory, set LD_LIBRARY_PATH to include it + err := library.LoadExternal(options.LibPath) + if err != nil { + log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries") + } + } + + // turn off any process that was started by GRPC if the context is canceled + go func() { + <-options.Context.Done() + log.Debug().Msgf("Context canceled, shutting down") + err := ml.StopAllGRPC() + if err != nil { + log.Error().Err(err).Msg("error while stopping all grpc backends") + } + }() + + if options.WatchDog { + wd := model.NewWatchDog( + ml, + options.WatchDogBusyTimeout, + options.WatchDogIdleTimeout, + options.WatchDogBusy, + options.WatchDogIdle) + ml.SetWatchDog(wd) + go wd.Run() + go func() { + <-options.Context.Done() + log.Debug().Msgf("Context canceled, shutting down") + wd.Shutdown() + }() + } + + if options.LoadToMemory != nil { + for _, m := range options.LoadToMemory { + cfg, err := cl.LoadBackendConfigFileByName(m, options.ModelPath, + config.LoadOptionDebug(options.Debug), + config.LoadOptionThreads(options.Threads), + config.LoadOptionContextSize(options.ContextSize), + config.LoadOptionF16(options.F16), + config.ModelPath(options.ModelPath), + ) + if err != nil { + return nil, nil, nil, err + } + + log.Debug().Msgf("Auto loading model %s into memory from file: %s", m, cfg.Model) + + grpcOpts := backend.GRPCModelOpts(*cfg) + o := []model.Option{ + model.WithModel(cfg.Model), + model.WithAssetDir(options.AssetsDestination), + model.WithThreads(uint32(options.Threads)), + model.WithLoadGRPCLoadModelOpts(grpcOpts), + } + + var backendErr error + if cfg.Backend != "" { + o = append(o, model.WithBackendString(cfg.Backend)) + _, backendErr = ml.BackendLoader(o...) + } else { + _, backendErr = ml.GreedyLoader(o...) + } + if backendErr != nil { + return nil, nil, nil, err + } + } + } + + // Watch the configuration directory + startWatcher(options) + + log.Info().Msg("core/startup process completed!") + return cl, ml, options, nil +} + +func startWatcher(options *config.ApplicationConfig) { + if options.DynamicConfigsDir == "" { + // No need to start the watcher if the directory is not set + return + } + + if _, err := os.Stat(options.DynamicConfigsDir); err != nil { + if os.IsNotExist(err) { + // We try to create the directory if it does not exist and was specified + if err := os.MkdirAll(options.DynamicConfigsDir, 0700); err != nil { + log.Error().Err(err).Msg("failed creating DynamicConfigsDir") + } + } else { + // something else happened, we log the error and don't start the watcher + log.Error().Err(err).Msg("failed to read DynamicConfigsDir, watcher will not be started") + return + } + } + + configHandler := newConfigFileHandler(options) + if err := configHandler.Watch(); err != nil { + log.Error().Err(err).Msg("failed creating watcher") + } +} + +// In Lieu of a proper DI framework, this function wires up the Application manually. +// This is in core/startup rather than core/state.go to keep package references clean! +func createApplication(appConfig *config.ApplicationConfig) *core.Application { + app := &core.Application{ + ApplicationConfig: appConfig, + BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath), + ModelLoader: model.NewModelLoader(appConfig.ModelPath), + } + + var err error + + // app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + // app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + // app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + // app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + // app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + + app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) + app.GalleryService = services.NewGalleryService(app.ApplicationConfig) + // app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService) + + app.LocalAIMetricsService, err = services.NewLocalAIMetricsService() + if err != nil { + log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.") + } + + return app +}