kherud · vaiju1981 · Mar 14, 2025 · Mar 15, 2025 · Mar 18, 2025 · Mar 18, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -4,10 +4,17 @@ on:
   - pull_request
   - workflow_dispatch
 env:
-  MODEL_URL: https://huggingface.co/TheBloke/CodeLlama-7B-GGUF/resolve/main/codellama-7b.Q2_K.gguf
-  MODEL_NAME: codellama-7b.Q2_K.gguf
+
+  REASONING_MODEL_URL: https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf
+  REASONING_MODEL_NAME: stories260K.gguf
+  INFILL_MODEL_URL: https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K-infill.gguf
+  INFILL_MODEL_NAME: stories260K-infill.gguf
+  MOE_MODEL_URL: https://huggingface.co/ggml-org/stories15M_MOE/resolve/main/stories15M_MOE-F16.gguf
+  MOE_MODEL_NAME: stories15M_MOE-F16.gguf
   RERANKING_MODEL_URL: https://huggingface.co/gpustack/jina-reranker-v1-tiny-en-GGUF/resolve/main/jina-reranker-v1-tiny-en-Q4_0.gguf
   RERANKING_MODEL_NAME: jina-reranker-v1-tiny-en-Q4_0.gguf
+  EMBEDDING_MODEL_URL: https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16.gguf
+  EMBEDDING_MODEL_NAME: ggml-model-f16.gguf
 jobs:
 
   build-and-test-linux:
@@ -23,10 +30,21 @@ jobs:
         run: |
           mvn compile
           .github/build.sh -DLLAMA_VERBOSE=ON
-      - name: Download text generation model
-        run: curl -L ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
       - name: Download reranking model 
         run: curl -L ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME}
+
+      - name: Download reasoning calling model
+        run: curl -L ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
+
+      - name: Download infill  calling model
+        run: curl -L ${INFILL_MODEL_URL} --create-dirs -o models/${INFILL_MODEL_NAME}
+
+      - name: Download MOE  model
+        run: curl -L ${MOE_MODEL_URL} --create-dirs -o models/${MOE_MODEL_NAME}
+
+      - name: Download EMBEDDING  model
+        run: curl -L ${EMBEDDING_MODEL_URL} --create-dirs -o models/${EMBEDDING_MODEL_NAME}
+
       - name: List files in models directory
         run: ls -l models/
       - name: Run tests
@@ -59,10 +77,22 @@ jobs:
         run: |
           mvn compile
           .github/build.sh ${{ matrix.target.cmake }}
-      - name: Download text generaton model model
-        run: curl -L ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
+
       - name: Download reranking model 
         run: curl -L ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME}
+
+      - name: Download reasoning calling model
+        run: curl -L ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
+
+      - name: Download infill  calling model
+        run: curl -L ${INFILL_MODEL_URL} --create-dirs -o models/${INFILL_MODEL_NAME}
+
+      - name: Download MOE  model
+        run: curl -L ${MOE_MODEL_URL} --create-dirs -o models/${MOE_MODEL_NAME}
+
+      - name: Download EMBEDDING  model
+        run: curl -L ${EMBEDDING_MODEL_URL} --create-dirs -o models/${EMBEDDING_MODEL_NAME}
+
       - name: List files in models directory
         run: ls -l models/
       - name: Run tests
@@ -87,10 +117,22 @@ jobs:
         run: |
           mvn compile
           .github\build.bat -DLLAMA_VERBOSE=ON
-      - name: Download model
-        run: curl -L $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME
+
       - name: Download reranking model 
         run: curl -L $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME
+
+      - name: Download reasoning calling model
+        run: curl -L $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME
+
+      - name: Download infill  calling model
+        run: curl -L $env:INFILL_MODEL_URL --create-dirs -o models/$env:INFILL_MODEL_NAME
+
+      - name: Download MOE  model
+        run: curl -L $env:MOE_MODEL_URL --create-dirs -o models/$env:MOE_MODEL_NAME
+
+      - name: Download EMBEDDING  model
+        run: curl -L $env:EMBEDDING_MODEL_URL --create-dirs -o models/$env:EMBEDDING_MODEL_NAME
+
       - name: List files in models directory
         run: ls -l models/
       - name: Run tests

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -9,10 +9,16 @@ on:
   release:
     types: [ created ]
 env:
-  MODEL_URL: "https://huggingface.co/TheBloke/CodeLlama-7B-GGUF/resolve/main/codellama-7b.Q2_K.gguf"
-  MODEL_NAME: "codellama-7b.Q2_K.gguf"
+  REASONING_MODEL_URL: "https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf"
+  REASONING_MODEL_NAME: "stories260K.gguf"
+  INFILL_MODEL_URL: "https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K-infill.gguf"
+  INFILL_MODEL_NAME: "stories260K-infill.gguf"
+  MOE_MODEL_URL: "https://huggingface.co/ggml-org/stories15M_MOE/resolve/main/stories15M_MOE-F16.gguf"
+  MOE_MODEL_NAME: "stories15M_MOE-F16.gguf"
   RERANKING_MODEL_URL: "https://huggingface.co/gpustack/jina-reranker-v1-tiny-en-GGUF/resolve/main/jina-reranker-v1-tiny-en-Q4_0.gguf"
   RERANKING_MODEL_NAME: "jina-reranker-v1-tiny-en-Q4_0.gguf"
+  EMBEDDING_MODEL_URL: "https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16.gguf"
+  EMBEDDING_MODEL_NAME: "ggml-model-f16.gguf"
 jobs:
 
 # todo: doesn't work with the newest llama.cpp version
@@ -146,10 +152,21 @@ jobs:
         with:
           name: Linux-x86_64-libraries
           path: ${{ github.workspace }}/src/main/resources/de/kherud/llama/
-      - name: Download text generation model
-        run: curl -L ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
-      - name: Download reranking model 
+
+      - name: Download reranking model
         run: curl -L ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME}
+
+      - name: Download reasoning calling model
+        run: curl -L ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
+
+      - name: Download infill  calling model
+        run: curl -L ${INFILL_MODEL_URL} --create-dirs -o models/${INFILL_MODEL_NAME}
+
+      - name: Download MOE  model
+        run: curl -L ${MOE_MODEL_URL} --create-dirs -o models/${MOE_MODEL_NAME}
+
+      - name: Download EMBEDDING  model
+        run: curl -L ${EMBEDDING_MODEL_URL} --create-dirs -o models/${EMBEDDING_MODEL_NAME}
       - uses: actions/setup-java@v4
         with:
           distribution: 'zulu'

diff --git a/.gitignore b/.gitignore
@@ -42,4 +42,6 @@ src/test/resources/**/*.gbnf
 
 **/*.etag
 **/*.lastModified
-src/main/cpp/llama.cpp/
+src/main/cpp/llama.cpp/
+/.classpath
+/.project
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -25,7 +25,7 @@ set(LLAMA_BUILD_COMMON ON)
 FetchContent_Declare(
 	llama.cpp
 	GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git
-	GIT_TAG        b4916
+	GIT_TAG        b4940
 )
 FetchContent_MakeAvailable(llama.cpp)
 

diff --git a/pom.xml b/pom.xml
@@ -5,7 +5,7 @@
 
 	<groupId>de.kherud</groupId>
 	<artifactId>llama</artifactId>
-	<version>4.1.0</version>
+	<version>4.1.1</version>
 	<packaging>jar</packaging>
 
 	<name>${project.groupId}:${project.artifactId}</name>
@@ -65,6 +65,16 @@
 			<version>24.1.0</version>
 			<scope>compile</scope>
 		</dependency>
+
+		<!--
+		https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
+		<dependency>
+			<groupId>com.fasterxml.jackson.core</groupId>
+			<artifactId>jackson-databind</artifactId>
+			<version>2.18.3</version>
+		</dependency>
+
+
 	</dependencies>
 
 	<build>