From 72e52c4f6a9fb29bfa2d85006245fc3e05ae8082 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 30 Jan 2025 00:03:01 +0100 Subject: [PATCH] chore: drop embedded models (#4715) Since the remote gallery was introduced this is now completely superseded by it. In order to keep the code clean and remove redudant parts let's simplify the usage. Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- core/application/startup.go | 2 +- core/cli/models.go | 2 +- core/cli/run.go | 2 - core/config/application_config.go | 8 -- core/services/gallery.go | 2 +- .../content/docs/advanced/run-other-models.md | 126 ------------------ .../docs/getting-started/container-images.md | 2 +- embedded/embedded.go | 72 ---------- embedded/model_library.yaml | 9 -- embedded/models/all-minilm-l6-v2.yaml | 13 -- embedded/models/animagine-xl.yaml | 17 --- embedded/models/bakllava.yaml | 40 ------ embedded/models/bark.yaml | 8 -- embedded/models/cerbero.yaml | 24 ---- embedded/models/codellama-7b-gguf.yaml | 20 --- embedded/models/codellama-7b.yaml | 14 -- embedded/models/coqui.yaml | 9 -- embedded/models/dolphin-2.5-mixtral-8x7b.yaml | 31 ----- embedded/models/hermes-2-pro-mistral.yaml | 59 -------- embedded/models/llama3-instruct.yaml | 48 ------- embedded/models/llava-1.5.yaml | 33 ----- embedded/models/llava-1.6-mistral.yaml | 33 ----- embedded/models/llava-1.6-vicuna.yaml | 37 ----- embedded/models/llava.yaml | 40 ------ embedded/models/mamba-bagel.yaml | 21 --- embedded/models/mamba-chat.yaml | 28 ---- embedded/models/mistral-openorca.yaml | 32 ----- embedded/models/mixtral-instruct.yaml | 25 ---- embedded/models/phi-2-chat.yaml | 25 ---- embedded/models/phi-2-orange.yaml | 30 ----- embedded/models/rhasspy-voice-en-us-amy.yaml | 13 -- embedded/models/tinyllama-chat.yaml | 29 ---- embedded/models/transformers-tinyllama.yaml | 31 ----- embedded/models/vall-e-x.yaml | 8 -- embedded/models/whisper-base.yaml | 18 --- pkg/startup/model_preload.go | 28 +--- pkg/startup/model_preload_test.go | 53 +------- .../webui_static.yaml => webui_static.yaml | 0 39 files changed, 8 insertions(+), 986 deletions(-) delete mode 100644 docs/content/docs/advanced/run-other-models.md delete mode 100644 embedded/embedded.go delete mode 100644 embedded/model_library.yaml delete mode 100644 embedded/models/all-minilm-l6-v2.yaml delete mode 100644 embedded/models/animagine-xl.yaml delete mode 100644 embedded/models/bakllava.yaml delete mode 100644 embedded/models/bark.yaml delete mode 100644 embedded/models/cerbero.yaml delete mode 100644 embedded/models/codellama-7b-gguf.yaml delete mode 100644 embedded/models/codellama-7b.yaml delete mode 100644 embedded/models/coqui.yaml delete mode 100644 embedded/models/dolphin-2.5-mixtral-8x7b.yaml delete mode 100644 embedded/models/hermes-2-pro-mistral.yaml delete mode 100644 embedded/models/llama3-instruct.yaml delete mode 100644 embedded/models/llava-1.5.yaml delete mode 100644 embedded/models/llava-1.6-mistral.yaml delete mode 100644 embedded/models/llava-1.6-vicuna.yaml delete mode 100644 embedded/models/llava.yaml delete mode 100644 embedded/models/mamba-bagel.yaml delete mode 100644 embedded/models/mamba-chat.yaml delete mode 100644 embedded/models/mistral-openorca.yaml delete mode 100644 embedded/models/mixtral-instruct.yaml delete mode 100644 embedded/models/phi-2-chat.yaml delete mode 100644 embedded/models/phi-2-orange.yaml delete mode 100644 embedded/models/rhasspy-voice-en-us-amy.yaml delete mode 100644 embedded/models/tinyllama-chat.yaml delete mode 100644 embedded/models/transformers-tinyllama.yaml delete mode 100644 embedded/models/vall-e-x.yaml delete mode 100644 embedded/models/whisper-base.yaml rename embedded/webui_static.yaml => webui_static.yaml (100%) diff --git a/Makefile b/Makefile index 20ef7199defe..5b903d7dd6e2 100644 --- a/Makefile +++ b/Makefile @@ -861,7 +861,7 @@ swagger: .PHONY: gen-assets gen-assets: - $(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets + $(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets ## Documentation docs/layouts/_default: diff --git a/core/application/startup.go b/core/application/startup.go index cd52d37ae962..fffcd8bbb7ac 100644 --- a/core/application/startup.go +++ b/core/application/startup.go @@ -62,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) { } } - if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil { + if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil { log.Error().Err(err).Msg("error installing models") } diff --git a/core/cli/models.go b/core/cli/models.go index 56d13fc7a12f..28b2944f9361 100644 --- a/core/cli/models.go +++ b/core/cli/models.go @@ -100,7 +100,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error { log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model") } - err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName) + err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName) if err != nil { return err } diff --git a/core/cli/run.go b/core/cli/run.go index 279ff94be8b2..3162ef1452e4 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -32,7 +32,6 @@ type RunCMD struct { Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"` AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"` - RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"` PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"` Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"` PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"` @@ -90,7 +89,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval), config.WithF16(r.F16), config.WithStringGalleries(r.Galleries), - config.WithModelLibraryURL(r.RemoteLibrary), config.WithCors(r.CORS), config.WithCorsAllowOrigins(r.CORSAllowOrigins), config.WithCsrf(r.CSRF), diff --git a/core/config/application_config.go b/core/config/application_config.go index 1ffcb297f98c..2cc9b01bb2ee 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -44,8 +44,6 @@ type ApplicationConfig struct { DisableGalleryEndpoint bool LoadToMemory []string - ModelLibraryURL string - Galleries []Gallery BackendAssets embed.FS @@ -126,12 +124,6 @@ func WithP2PToken(s string) AppOption { } } -func WithModelLibraryURL(url string) AppOption { - return func(o *ApplicationConfig) { - o.ModelLibraryURL = url - } -} - func WithLibPath(path string) AppOption { return func(o *ApplicationConfig) { o.LibPath = path diff --git a/core/services/gallery.go b/core/services/gallery.go index 45bebd4f5087..f499d3818192 100644 --- a/core/services/gallery.go +++ b/core/services/gallery.go @@ -129,7 +129,7 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader if op.GalleryModelName != "" { err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryModelName, g.appConfig.ModelPath, op.Req, progressCallback, g.appConfig.EnforcePredownloadScans) } else if op.ConfigURL != "" { - err = startup.InstallModels(op.Galleries, op.ConfigURL, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback, op.ConfigURL) + err = startup.InstallModels(op.Galleries, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback, op.ConfigURL) if err != nil { updateError(err) continue diff --git a/docs/content/docs/advanced/run-other-models.md b/docs/content/docs/advanced/run-other-models.md deleted file mode 100644 index f9bdc22d4977..000000000000 --- a/docs/content/docs/advanced/run-other-models.md +++ /dev/null @@ -1,126 +0,0 @@ -+++ -disableToc = false -title = "Run other Models" -weight = 23 -icon = "rocket_launch" - -+++ - -## Running other models - -> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/models" %}})_. - -To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/models" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model. - -To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs. - -There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture. - -{{% alert icon="πŸ’‘" %}} - -To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI-examples/tree/main/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models). -{{% /alert %}} - -{{< tabs tabTotal="3" >}} -{{% tab tabName="CPU-only" %}} - -> πŸ’‘Don't need GPU acceleration? use the CPU images which are lighter and do not have Nvidia dependencies - -| Model | Category | Docker command | -| --- | --- | --- | -| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` | -| πŸŒ‹ [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bakllava``` | -| πŸŒ‹ [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.5``` | -| πŸŒ‹ [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-mistral``` | -| πŸŒ‹ [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-vicuna``` | -| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` | -| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` | -| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` | -| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core whisper-base``` | -| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core rhasspy-voice-en-us-amy``` | -| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg coqui``` | -| 🐢 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg bark``` | -| πŸ”Š [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg vall-e-x``` | -| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mixtral-instruct``` | -| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core tinyllama-chat``` | -| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core dolphin-2.5-mixtral-8x7b``` | -| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | -| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only | -| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | -| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) (with transformers) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only | -| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) (with llama.cpp) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core codellama-7b-gguf``` | -| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core hermes-2-pro-mistral``` | -{{% /tab %}} - -{{% tab tabName="GPU (CUDA 11)" %}} - - -> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}). - -| Model | Category | Docker command | -| --- | --- | --- | -| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` | -| πŸŒ‹ [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bakllava``` | -| πŸŒ‹ [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.5``` | -| πŸŒ‹ [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-mistral``` | -| πŸŒ‹ [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-vicuna``` | -| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` | -| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` | -| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` | -| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core whisper-base``` | -| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core rhasspy-voice-en-us-amy``` | -| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 coqui``` | -| 🐢 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 bark``` | -| πŸ”Š [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 vall-e-x``` | -| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mixtral-instruct``` | -| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core tinyllama-chat``` | -| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core dolphin-2.5-mixtral-8x7b``` | -| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 mamba-chat``` | -| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda11 animagine-xl``` | -| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` | -| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` | -| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` | -| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` | -{{% /tab %}} - - -{{% tab tabName="GPU (CUDA 12)" %}} - -> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}). - -| Model | Category | Docker command | -| --- | --- | --- | -| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` | -| πŸŒ‹ [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bakllava``` | -| πŸŒ‹ [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.5``` | -| πŸŒ‹ [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-mistral``` | -| πŸŒ‹ [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-vicuna``` | -| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` | -| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` | -| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` | -| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core whisper-base``` | -| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core rhasspy-voice-en-us-amy``` | -| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 coqui``` | -| 🐢 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 bark``` | -| πŸ”Š [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 vall-e-x``` | -| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mixtral-instruct``` | -| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core tinyllama-chat``` | -| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core dolphin-2.5-mixtral-8x7b``` | -| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 mamba-chat``` | -| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda12 animagine-xl``` | -| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` | -| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` | -| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` | -| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` | -{{% /tab %}} - -{{< /tabs >}} - -{{% alert icon="πŸ’‘" %}} -**Tip** You can actually specify multiple models to start an instance with the models loaded, for example to have both llava and phi-2 configured: - -```bash -docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava phi-2 -``` - -{{% /alert %}} diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md index a6a955adbcf0..d1930805bcd7 100644 --- a/docs/content/docs/getting-started/container-images.md +++ b/docs/content/docs/getting-started/container-images.md @@ -143,7 +143,7 @@ The AIO Images are inheriting the same environment variables as the base images | Variable | Default | Description | | ---------------------| ------- | ----------- | | `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` | -| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/advanced/run-other-models" %}})) | +| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/models" %}})) | ## Standard container images diff --git a/embedded/embedded.go b/embedded/embedded.go deleted file mode 100644 index 3a4ea2628d43..000000000000 --- a/embedded/embedded.go +++ /dev/null @@ -1,72 +0,0 @@ -package embedded - -import ( - "embed" - "fmt" - "slices" - "strings" - - "github.com/mudler/LocalAI/pkg/downloader" - "github.com/rs/zerolog/log" - - "github.com/mudler/LocalAI/pkg/assets" - "gopkg.in/yaml.v3" -) - -var modelShorteners map[string]string - -//go:embed model_library.yaml -var modelLibrary []byte - -//go:embed models/* -var embeddedModels embed.FS - -func ModelShortURL(s string) string { - if _, ok := modelShorteners[s]; ok { - s = modelShorteners[s] - } - - return s -} - -func init() { - err := yaml.Unmarshal(modelLibrary, &modelShorteners) - if err != nil { - log.Error().Err(err).Msg("error while unmarshalling embedded modelLibrary") - } -} - -func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) { - remoteLibrary := map[string]string{} - uri := downloader.URI(url) - err := uri.DownloadWithCallback(basePath, func(_ string, i []byte) error { - return yaml.Unmarshal(i, &remoteLibrary) - }) - if err != nil { - return nil, fmt.Errorf("error downloading remote library: %s", err.Error()) - } - - return remoteLibrary, err -} - -// ExistsInModelsLibrary checks if a model exists in the embedded models library -func ExistsInModelsLibrary(s string) bool { - f := fmt.Sprintf("%s.yaml", s) - - a := []string{} - - for _, j := range assets.ListFiles(embeddedModels) { - a = append(a, strings.TrimPrefix(j, "models/")) - } - - return slices.Contains(a, f) -} - -// ResolveContent returns the content in the embedded model library -func ResolveContent(s string) ([]byte, error) { - if ExistsInModelsLibrary(s) { - return embeddedModels.ReadFile(fmt.Sprintf("models/%s.yaml", s)) - } - - return nil, fmt.Errorf("cannot find model %s", s) -} diff --git a/embedded/model_library.yaml b/embedded/model_library.yaml deleted file mode 100644 index 281941a5a6e6..000000000000 --- a/embedded/model_library.yaml +++ /dev/null @@ -1,9 +0,0 @@ -### -### -### This file contains the list of models that are available in the library -### The URLs are automatically expanded when local-ai is being called with the key as argument -### -### For models with an entire YAML file to be embededd, put the file inside the `models` -### directory, it will be automatically available with the file name as key (without the .yaml extension) - -phi-2: "github://mudler/LocalAI-examples/configurations/phi-2.yaml@main" diff --git a/embedded/models/all-minilm-l6-v2.yaml b/embedded/models/all-minilm-l6-v2.yaml deleted file mode 100644 index 512d63a41dfe..000000000000 --- a/embedded/models/all-minilm-l6-v2.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: all-minilm-l6-v2 -backend: sentencetransformers -embeddings: true -parameters: - model: all-MiniLM-L6-v2 - -usage: | - You can test this model with curl like this: - - curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{ - "input": "Your text string goes here", - "model": "all-minilm-l6-v2" - }' \ No newline at end of file diff --git a/embedded/models/animagine-xl.yaml b/embedded/models/animagine-xl.yaml deleted file mode 100644 index d492c080d37e..000000000000 --- a/embedded/models/animagine-xl.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: animagine-xl -parameters: - model: Linaqruf/animagine-xl -backend: diffusers -f16: true -diffusers: - scheduler_type: euler_a - -usage: | - curl http://localhost:8080/v1/images/generations \ - -H "Content-Type: application/json" \ - -d '{ - "prompt": "|", - "model": "animagine-xl", - "step": 51, - "size": "1024x1024" - }' \ No newline at end of file diff --git a/embedded/models/bakllava.yaml b/embedded/models/bakllava.yaml deleted file mode 100644 index 52fd9466a217..000000000000 --- a/embedded/models/bakllava.yaml +++ /dev/null @@ -1,40 +0,0 @@ -backend: llama-cpp -context_size: 4096 -f16: true - -gpu_layers: 90 -mmap: true -name: bakllava - -roles: - user: "USER:" - assistant: "ASSISTANT:" - system: "SYSTEM:" - -mmproj: bakllava-mmproj.gguf -parameters: - model: bakllava.gguf - temperature: 0.2 - top_k: 40 - top_p: 0.95 - seed: -1 -mirostat: 2 -mirostat_eta: 1.0 -mirostat_tau: 1.0 - -template: - chat: | - A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. - {{.Input}} - ASSISTANT: - -download_files: -- filename: bakllava.gguf - uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf -- filename: bakllava-mmproj.gguf - uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf - -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "bakllava", - "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/embedded/models/bark.yaml b/embedded/models/bark.yaml deleted file mode 100644 index da1b1db45370..000000000000 --- a/embedded/models/bark.yaml +++ /dev/null @@ -1,8 +0,0 @@ -usage: | - bark works without any configuration, to test it, you can run the following curl command: - - curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ - "backend": "bark", - "input":"Hello, this is a test!" - }' | aplay -# TODO: This is a placeholder until we manage to pre-load HF/Transformers models \ No newline at end of file diff --git a/embedded/models/cerbero.yaml b/embedded/models/cerbero.yaml deleted file mode 100644 index 8ace4e35c7a5..000000000000 --- a/embedded/models/cerbero.yaml +++ /dev/null @@ -1,24 +0,0 @@ -backend: llama -context_size: 8192 -f16: false -gpu_layers: 90 -name: cerbero -mmap: false -parameters: - model: huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q8_0.gguf - top_k: 80 - temperature: 0.2 - top_p: 0.7 -template: - completion: "{{.Input}}" - chat: "Questa Γ¨ una conversazione tra un umano ed un assistente AI.\n{{.Input}}\n[|Assistente|] " -roles: - user: "[|Umano|] " - system: "[|Umano|] " - assistant: "[|Assistente|] " - -stopwords: -- "[|Umano|]" - -trimsuffix: -- "\n" \ No newline at end of file diff --git a/embedded/models/codellama-7b-gguf.yaml b/embedded/models/codellama-7b-gguf.yaml deleted file mode 100644 index 413c838b7b00..000000000000 --- a/embedded/models/codellama-7b-gguf.yaml +++ /dev/null @@ -1,20 +0,0 @@ -name: codellama-7b-gguf -backend: transformers -parameters: - model: huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_K_M.gguf - temperature: 0.5 - top_k: 40 - seed: -1 - top_p: 0.95 -mirostat: 2 -mirostat_eta: 1.0 -mirostat_tau: 1.0 - -context_size: 4096 -f16: true -gpu_layers: 90 -usage: | - curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{ - "model": "codellama-7b-gguf", - "prompt": "import socket\n\ndef ping_exponential_backoff(host: str):" - }' \ No newline at end of file diff --git a/embedded/models/codellama-7b.yaml b/embedded/models/codellama-7b.yaml deleted file mode 100644 index d9b5c62c60bc..000000000000 --- a/embedded/models/codellama-7b.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: codellama-7b -backend: transformers -type: AutoModelForCausalLM -parameters: - model: codellama/CodeLlama-7b-hf - temperature: 0.2 - top_k: 40 - top_p: 0.95 - -usage: | - curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{ - "model": "codellama-7b", - "prompt": "import socket\n\ndef ping_exponential_backoff(host: str):" - }' diff --git a/embedded/models/coqui.yaml b/embedded/models/coqui.yaml deleted file mode 100644 index 5d67f241eb66..000000000000 --- a/embedded/models/coqui.yaml +++ /dev/null @@ -1,9 +0,0 @@ -usage: | - coqui works without any configuration, to test it, you can run the following curl command: - - curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ - "backend": "coqui", - "model": "tts_models/en/ljspeech/glow-tts", - "input":"Hello, this is a test!" - }' -# TODO: This is a placeholder until we manage to pre-load HF/Transformers models \ No newline at end of file diff --git a/embedded/models/dolphin-2.5-mixtral-8x7b.yaml b/embedded/models/dolphin-2.5-mixtral-8x7b.yaml deleted file mode 100644 index 12ee1efc2600..000000000000 --- a/embedded/models/dolphin-2.5-mixtral-8x7b.yaml +++ /dev/null @@ -1,31 +0,0 @@ -name: dolphin-mixtral-8x7b -mmap: true -parameters: - model: huggingface://TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/dolphin-2.5-mixtral-8x7b.Q2_K.gguf - temperature: 0.5 - top_k: 40 - top_p: 0.95 - seed: -1 -mirostat: 2 -mirostat_eta: 1.0 -mirostat_tau: 1.0 -template: - chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} - {{if .Content}}{{.Content}}{{end}}<|im_end|> - chat: | - {{.Input}} - <|im_start|>assistant - completion: | - {{.Input}} -context_size: 4096 -f16: true -stopwords: -- <|im_end|> -gpu_layers: 90 - -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "dolphin-mixtral-8x7b", - "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] - }' \ No newline at end of file diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml deleted file mode 100644 index 74d98eebc4f7..000000000000 --- a/embedded/models/hermes-2-pro-mistral.yaml +++ /dev/null @@ -1,59 +0,0 @@ -name: hermes-2-pro-mistral -mmap: true -parameters: - model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf - -template: - chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }} - - {{- else if eq .RoleName "tool" }} - - {{- end }} - {{- if .Content}} - {{.Content }} - {{- end }} - {{- if .FunctionCall}} - {{toJson .FunctionCall}} - {{- end }} - {{- if .FunctionCall }} - - {{- else if eq .RoleName "tool" }} - - {{- end }}<|im_end|> - # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling - function: | - <|im_start|>system - You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: - - {{range .Functions}} - {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} - {{end}} - - Use the following pydantic model json schema for each tool call you will make: - {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} - For each function call return a json object with function name and arguments within XML tags as follows: - - {'arguments': , 'name': } - <|im_end|> - {{.Input -}} - <|im_start|>assistant - - chat: | - {{.Input -}} - <|im_start|>assistant - completion: | - {{.Input}} -context_size: 4096 -f16: true -stopwords: -- <|im_end|> -- -- "\n" -- "\n\n\n" -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "hermes-2-pro-mistral", - "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] - }' diff --git a/embedded/models/llama3-instruct.yaml b/embedded/models/llama3-instruct.yaml deleted file mode 100644 index d483d2b2a16e..000000000000 --- a/embedded/models/llama3-instruct.yaml +++ /dev/null @@ -1,48 +0,0 @@ -name: llama3-8b-instruct -mmap: true -parameters: - model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf - -template: - chat_message: | - <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> - - {{ if .FunctionCall -}} - Function call: - {{ else if eq .RoleName "tool" -}} - Function response: - {{ end -}} - {{ if .Content -}} - {{.Content -}} - {{ else if .FunctionCall -}} - {{ toJson .FunctionCall -}} - {{ end -}} - <|eot_id|> - function: | - <|start_header_id|>system<|end_header_id|> - - You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: - - {{range .Functions}} - {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} - {{end}} - - Use the following pydantic model json schema for each tool call you will make: - {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|> - Function call: - chat: | - <|begin_of_text|>{{.Input }} - <|start_header_id|>assistant<|end_header_id|> - completion: | - {{.Input}} -context_size: 8192 -f16: true -stopwords: -- <|im_end|> -- -- "<|eot_id|>" -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "llama3-8b-instruct", - "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] - }' diff --git a/embedded/models/llava-1.5.yaml b/embedded/models/llava-1.5.yaml deleted file mode 100644 index 3db48524ab36..000000000000 --- a/embedded/models/llava-1.5.yaml +++ /dev/null @@ -1,33 +0,0 @@ -backend: llama-cpp -context_size: 4096 -f16: true - -gpu_layers: 90 -mmap: true -name: llava-1.5 - -roles: - user: "USER:" - assistant: "ASSISTANT:" - system: "SYSTEM:" - -mmproj: llava-v1.5-7b-mmproj-Q8_0.gguf -parameters: - model: llava-v1.5-7b-Q4_K.gguf - -template: - chat: | - A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. - {{.Input}} - ASSISTANT: - -download_files: -- filename: llava-v1.5-7b-Q4_K.gguf - uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf -- filename: llava-v1.5-7b-mmproj-Q8_0.gguf - uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf - -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "llava-1.5", - "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/embedded/models/llava-1.6-mistral.yaml b/embedded/models/llava-1.6-mistral.yaml deleted file mode 100644 index 602ceb62d4cf..000000000000 --- a/embedded/models/llava-1.6-mistral.yaml +++ /dev/null @@ -1,33 +0,0 @@ -backend: llama-cpp -context_size: 4096 -f16: true - -gpu_layers: 90 -mmap: true -name: llava-1.6-mistral - -roles: - user: "USER:" - assistant: "ASSISTANT:" - system: "SYSTEM:" - -mmproj: llava-v1.6-7b-mmproj-f16.gguf -parameters: - model: llava-v1.6-mistral-7b.gguf - -template: - chat: | - A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. - {{.Input}} - ASSISTANT: - -download_files: -- filename: llava-v1.6-mistral-7b.gguf - uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf -- filename: llava-v1.6-7b-mmproj-f16.gguf - uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf - -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "llava-1.6-mistral", - "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/embedded/models/llava-1.6-vicuna.yaml b/embedded/models/llava-1.6-vicuna.yaml deleted file mode 100644 index cea33e7f8fb5..000000000000 --- a/embedded/models/llava-1.6-vicuna.yaml +++ /dev/null @@ -1,37 +0,0 @@ -backend: llama-cpp -context_size: 4096 -f16: true - -gpu_layers: 90 -mmap: true -name: llava-1.6-vicuna - -roles: - user: "USER:" - assistant: "ASSISTANT:" - system: "SYSTEM:" - -mmproj: mmproj-vicuna7b-f16.gguf -parameters: - model: vicuna-7b-q5_k.gguf - temperature: 0.2 - top_k: 40 - top_p: 0.95 - seed: -1 - -template: - chat: | - A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. - {{.Input}} - ASSISTANT: - -download_files: -- filename: vicuna-7b-q5_k.gguf - uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf -- filename: mmproj-vicuna7b-f16.gguf - uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf - -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "llava-1.6-vicuna", - "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/embedded/models/llava.yaml b/embedded/models/llava.yaml deleted file mode 100644 index 3d240681f175..000000000000 --- a/embedded/models/llava.yaml +++ /dev/null @@ -1,40 +0,0 @@ -backend: llama-cpp -context_size: 4096 -f16: true - -gpu_layers: 90 -mmap: true -name: llava - -roles: - user: "USER:" - assistant: "ASSISTANT:" - system: "SYSTEM:" - -mmproj: bakllava-mmproj.gguf -parameters: - model: bakllava.gguf - temperature: 0.2 - top_k: 40 - top_p: 0.95 - seed: -1 -mirostat: 2 -mirostat_eta: 1.0 -mirostat_tau: 1.0 - -template: - chat: | - A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. - {{.Input}} - ASSISTANT: - -download_files: -- filename: bakllava.gguf - uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf -- filename: bakllava-mmproj.gguf - uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf - -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "llava", - "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/embedded/models/mamba-bagel.yaml b/embedded/models/mamba-bagel.yaml deleted file mode 100644 index aed7142fcbd4..000000000000 --- a/embedded/models/mamba-bagel.yaml +++ /dev/null @@ -1,21 +0,0 @@ -name: bagel -backend: mamba -parameters: - model: "jondurbin/bagel-dpo-2.8b-v0.2" -systemPrompt: "You are a helpful, unbiased, uncensored assistant." -template: - chat_message: | - {{if eq .RoleName "assistant"}}{{.Content}}{{else}} - [INST] - {{if .SystemPrompt}}{{.SystemPrompt}}{{else if eq .RoleName "system"}}<>{{.Content}}<> - - {{else if .Content}}{{.Content}}{{end}} - [/INST] - {{end}} - completion: | - {{.Input}} -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "bagel", - "messages": [{"role": "user", "content": "how are you doing"}], - }' diff --git a/embedded/models/mamba-chat.yaml b/embedded/models/mamba-chat.yaml deleted file mode 100644 index b0d7fc62412c..000000000000 --- a/embedded/models/mamba-chat.yaml +++ /dev/null @@ -1,28 +0,0 @@ -name: mamba-chat -backend: mamba -parameters: - model: "havenhq/mamba-chat" - -trimsuffix: -- <|endoftext|> - -# https://huggingface.co/HuggingFaceH4/zephyr-7b-beta/blob/main/tokenizer_config.json -# "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}", -template: - chat_message: | - {{if eq .RoleName "assistant"}}<|assistant|>{{else if eq .RoleName "system"}}<|system|>{{else if eq .RoleName "user"}}<|user|>{{end}} - {{if .Content}}{{.Content}}{{end}} - - - chat: | - {{.Input}} - <|assistant|> - - completion: | - {{.Input}} -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "mamba-chat", - "messages": [{"role": "user", "content": "how are you doing"}], - "temperature": 0.7 - }' \ No newline at end of file diff --git a/embedded/models/mistral-openorca.yaml b/embedded/models/mistral-openorca.yaml deleted file mode 100644 index 0794a69b0a20..000000000000 --- a/embedded/models/mistral-openorca.yaml +++ /dev/null @@ -1,32 +0,0 @@ -name: mistral-openorca -mmap: true -parameters: - model: huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q6_K.gguf - temperature: 0.2 - top_k: 40 - top_p: 0.95 - seed: -1 -mirostat: 2 -mirostat_eta: 1.0 -mirostat_tau: 1.0 - -template: - chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} - {{if .Content}}{{.Content}}{{end}} - <|im_end|> - chat: | - {{.Input}} - <|im_start|>assistant - completion: | - {{.Input}} -context_size: 4096 -f16: true -stopwords: -- <|im_end|> -- -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "mistral-openorca", - "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] - }' diff --git a/embedded/models/mixtral-instruct.yaml b/embedded/models/mixtral-instruct.yaml deleted file mode 100644 index 246b23240f48..000000000000 --- a/embedded/models/mixtral-instruct.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: mixtral-instruct -mmap: true -parameters: - model: huggingface://TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/mixtral-8x7b-instruct-v0.1.Q2_K.gguf - temperature: 0.2 - top_k: 40 - seed: -1 - top_p: 0.95 -mirostat: 2 -mirostat_eta: 1.0 -mirostat_tau: 1.0 - -template: - chat: &chat | - [INST] {{.Input}} [/INST] - completion: *chat -context_size: 4096 -f16: true -gpu_layers: 90 - -usage: | - curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{ - "model": "mixtral-instruct", - "prompt": "How are you doing?" - }' \ No newline at end of file diff --git a/embedded/models/phi-2-chat.yaml b/embedded/models/phi-2-chat.yaml deleted file mode 100644 index 4a3ca7aa6498..000000000000 --- a/embedded/models/phi-2-chat.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: phi-2-chat -mmap: true -parameters: - model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf - -template: - chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} - {{if .Content}}{{.Content}}{{end}} - <|im_end|> - chat: | - {{.Input}} - <|im_start|>assistant - completion: | - {{.Input}} -context_size: 4096 -f16: true -stopwords: -- <|im_end|> -- -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "phi-2-chat", - "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] - }' diff --git a/embedded/models/phi-2-orange.yaml b/embedded/models/phi-2-orange.yaml deleted file mode 100644 index 838909c9b269..000000000000 --- a/embedded/models/phi-2-orange.yaml +++ /dev/null @@ -1,30 +0,0 @@ -name: phi-2-orange -mmap: true -parameters: - model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf - -template: - chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} - {{if .Content}}{{.Content}}{{end}} - <|im_end|> - chat: | - {{.Input}} - <|im_start|>assistant - completion: | - {{.Input}} -context_size: 4096 -f16: true -stopwords: -- <|im_end|> -- - -description: | - This model is a chatbot that can be used for general conversation. - [Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF) - -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "phi-2-orange", - "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] - }' diff --git a/embedded/models/rhasspy-voice-en-us-amy.yaml b/embedded/models/rhasspy-voice-en-us-amy.yaml deleted file mode 100644 index 911293cac956..000000000000 --- a/embedded/models/rhasspy-voice-en-us-amy.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: voice-en-us-amy-low -download_files: - - filename: voice-en-us-amy-low.tar.gz - uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz - - -usage: | - To test if this model works as expected, you can use the following curl command: - - curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ - "model":"en-us-amy-low.onnx", - "input": "Hi, this is a test." - }' \ No newline at end of file diff --git a/embedded/models/tinyllama-chat.yaml b/embedded/models/tinyllama-chat.yaml deleted file mode 100644 index 48c44f9fc853..000000000000 --- a/embedded/models/tinyllama-chat.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: tinyllama-chat -mmap: true -parameters: - model: huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q8_0.gguf - temperature: 0.2 - top_k: 40 - seed: -1 - top_p: 0.95 -template: - chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} - {{if .Content}}{{.Content}}{{end}}<|im_end|> - chat: | - {{.Input}} - <|im_start|>assistant - - completion: | - {{.Input}} -context_size: 4096 -f16: true -stopwords: -- <|im_end|> -gpu_layers: 90 - -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "tinyllama-chat", - "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] - }' \ No newline at end of file diff --git a/embedded/models/transformers-tinyllama.yaml b/embedded/models/transformers-tinyllama.yaml deleted file mode 100644 index ee6e78894609..000000000000 --- a/embedded/models/transformers-tinyllama.yaml +++ /dev/null @@ -1,31 +0,0 @@ -name: tinyllama-chat -backend: transformers -type: AutoModelForCausalLM - -parameters: - model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 - temperature: 0.2 - top_k: 40 - top_p: 0.95 - max_tokens: 4096 - -template: - chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} - {{if .Content}}{{.Content}}{{end}}<|im_end|> - chat: | - {{.Input}} - <|im_start|>assistant - - completion: | - {{.Input}} - -stopwords: -- <|im_end|> - -usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "tinyllama-chat", - "messages": [{"role": "user", "content": "Say this is a test!"}], - "temperature": 0.7 - }' diff --git a/embedded/models/vall-e-x.yaml b/embedded/models/vall-e-x.yaml deleted file mode 100644 index b97015f60174..000000000000 --- a/embedded/models/vall-e-x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -usage: | - Vall-e-x works without any configuration, to test it, you can run the following curl command: - - curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ - "backend": "vall-e-x", - "input":"Hello, this is a test!" - }' | aplay -# TODO: This is a placeholder until we manage to pre-load HF/Transformers models \ No newline at end of file diff --git a/embedded/models/whisper-base.yaml b/embedded/models/whisper-base.yaml deleted file mode 100644 index f7ebd2176f3f..000000000000 --- a/embedded/models/whisper-base.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: whisper -backend: whisper -parameters: - model: ggml-whisper-base.bin - -usage: | - ## example audio file - wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg - - ## Send the example audio file to the transcriptions endpoint - curl http://localhost:8080/v1/audio/transcriptions \ - -H "Content-Type: multipart/form-data" \ - -F file="@$PWD/gb1.ogg" -F model="whisper" - -download_files: -- filename: "ggml-whisper-base.bin" - sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" - uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" \ No newline at end of file diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go index a445b10eb38e..0f598df593c4 100644 --- a/pkg/startup/model_preload.go +++ b/pkg/startup/model_preload.go @@ -9,7 +9,6 @@ import ( "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" - "github.com/mudler/LocalAI/embedded" "github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/utils" "github.com/rs/zerolog/log" @@ -18,42 +17,17 @@ import ( // InstallModels will preload models from the given list of URLs and galleries // It will download the model if it is not already present in the model path // It will also try to resolve if the model is an embedded model YAML configuration -func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error { +func InstallModels(galleries []config.Gallery, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error { // create an error that groups all errors var err error - lib, _ := embedded.GetRemoteLibraryShorteners(modelLibraryURL, modelPath) - for _, url := range models { // As a best effort, try to resolve the model from the remote library // if it's not resolved we try with the other method below - if modelLibraryURL != "" { - if lib[url] != "" { - log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url]) - url = lib[url] - } - } - url = embedded.ModelShortURL(url) uri := downloader.URI(url) switch { - case embedded.ExistsInModelsLibrary(url): - modelYAML, e := embedded.ResolveContent(url) - // If we resolve something, just save it to disk and continue - if e != nil { - log.Error().Err(e).Msg("error resolving model content") - err = errors.Join(err, e) - continue - } - - log.Debug().Msgf("[startup] resolved embedded model: %s", url) - md5Name := utils.MD5(url) - modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml" - if e := os.WriteFile(modelDefinitionFilePath, modelYAML, 0600); err != nil { - log.Error().Err(e).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition") - err = errors.Join(err, e) - } case uri.LooksLikeOCI(): log.Debug().Msgf("[startup] resolved OCI model to download: %s", url) diff --git a/pkg/startup/model_preload_test.go b/pkg/startup/model_preload_test.go index 78cf73115def..51e6d7026dbe 100644 --- a/pkg/startup/model_preload_test.go +++ b/pkg/startup/model_preload_test.go @@ -7,7 +7,6 @@ import ( "github.com/mudler/LocalAI/core/config" . "github.com/mudler/LocalAI/pkg/startup" - "github.com/mudler/LocalAI/pkg/utils" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -16,29 +15,13 @@ import ( var _ = Describe("Preload test", func() { Context("Preloading from strings", func() { - It("loads from remote url", func() { - tmpdir, err := os.MkdirTemp("", "") - Expect(err).ToNot(HaveOccurred()) - libraryURL := "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml" - fileName := fmt.Sprintf("%s.yaml", "phi-2") - - InstallModels([]config.Gallery{}, libraryURL, tmpdir, true, nil, "phi-2") - - resultFile := filepath.Join(tmpdir, fileName) - - content, err := os.ReadFile(resultFile) - Expect(err).ToNot(HaveOccurred()) - - Expect(string(content)).To(ContainSubstring("name: phi-2")) - }) - It("loads from embedded full-urls", func() { tmpdir, err := os.MkdirTemp("", "") Expect(err).ToNot(HaveOccurred()) url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml" fileName := fmt.Sprintf("%s.yaml", "phi-2") - InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url) + InstallModels([]config.Gallery{}, tmpdir, true, nil, url) resultFile := filepath.Join(tmpdir, fileName) @@ -47,45 +30,13 @@ var _ = Describe("Preload test", func() { Expect(string(content)).To(ContainSubstring("name: phi-2")) }) - It("loads from embedded short-urls", func() { - tmpdir, err := os.MkdirTemp("", "") - Expect(err).ToNot(HaveOccurred()) - url := "phi-2" - - InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url) - - entry, err := os.ReadDir(tmpdir) - Expect(err).ToNot(HaveOccurred()) - Expect(entry).To(HaveLen(1)) - resultFile := entry[0].Name() - - content, err := os.ReadFile(filepath.Join(tmpdir, resultFile)) - Expect(err).ToNot(HaveOccurred()) - - Expect(string(content)).To(ContainSubstring("name: phi-2")) - }) - It("loads from embedded models", func() { - tmpdir, err := os.MkdirTemp("", "") - Expect(err).ToNot(HaveOccurred()) - url := "mistral-openorca" - fileName := fmt.Sprintf("%s.yaml", utils.MD5(url)) - - InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url) - - resultFile := filepath.Join(tmpdir, fileName) - - content, err := os.ReadFile(resultFile) - Expect(err).ToNot(HaveOccurred()) - - Expect(string(content)).To(ContainSubstring("name: mistral-openorca")) - }) It("downloads from urls", func() { tmpdir, err := os.MkdirTemp("", "") Expect(err).ToNot(HaveOccurred()) url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf" fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K") - err = InstallModels([]config.Gallery{}, "", tmpdir, false, nil, url) + err = InstallModels([]config.Gallery{}, tmpdir, false, nil, url) Expect(err).ToNot(HaveOccurred()) resultFile := filepath.Join(tmpdir, fileName) diff --git a/embedded/webui_static.yaml b/webui_static.yaml similarity index 100% rename from embedded/webui_static.yaml rename to webui_static.yaml