diff --git a/gallery/index.yaml b/gallery/index.yaml index b58e8ca7dbc3..41f3d3f3a020 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,4 +1,29 @@ --- +- name: "got-r1-14b" + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + urls: + - https://huggingface.co/mradermacher/GoT-R1-14B-GGUF + description: | + The model `mradermacher/GoT-R1-14B-GGUF` is a quantized version of a 14-billion parameter (14B) language model. It is optimized for efficiency through techniques like Q4_K_S, Q2_K, and Q8_0 quantization. The model is designed for text generation tasks and is based on the GoT series, which is a series of large-scale language models. The quantization reduces computational demands while maintaining performance, making it suitable for deployment in resource-constrained environments. The exact base model (e.g., the original unquantized version) is not explicitly stated in the repository's README, but it is derived from the GoT series. + overrides: + parameters: + model: llama-cpp/models/GoT-R1-14B.Q4_K_M.gguf + name: GoT-R1-14B-GGUF + backend: llama-cpp + template: + use_tokenizer_template: true + known_usecases: + - chat + function: + grammar: + disable: true + description: Imported from https://huggingface.co/mradermacher/GoT-R1-14B-GGUF + options: + - use_jinja:true + files: + - filename: llama-cpp/models/GoT-R1-14B.Q4_K_M.gguf + sha256: 76e9c75606e5059b5f91a9425894438429de195d52722b0bc2b03bac786a4f89 + uri: https://huggingface.co/mradermacher/GoT-R1-14B-GGUF/resolve/main/GoT-R1-14B.Q4_K_M.gguf - name: "rwkv7-g1c-13.3b" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" urls: