janhq · vansangpfiev · Dec 26, 2024 · Dec 26, 2024 · Dec 26, 2024 · Dec 26, 2024
diff --git a/.github/scripts/e2e-test-server-linux-and-mac.sh b/.github/scripts/e2e-test-server-linux-and-mac.sh
diff --git a/.github/scripts/e2e-test-server-windows.bat b/.github/scripts/e2e-test-server-windows.bat
@@ -20,7 +20,6 @@ echo BINARY_NAME=%BINARY_NAME%
 
 del %TEMP%\response1.log 2>nul
 del %TEMP%\response2.log 2>nul
-del %TEMP%\response3.log 2>nul
 del %TEMP%\response4.log 2>nul
 del %TEMP%\response5.log 2>nul
 del %TEMP%\response6.log 2>nul
@@ -65,18 +64,18 @@ call set "MODEL_LLM_PATH_STRING=%%MODEL_LLM_PATH:\=\\%%"
 call set "MODEL_EMBEDDING_PATH_STRING=%%MODEL_EMBEDDING_PATH:\=\\%%"
 set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
 set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":false,\"model\":\"testllm\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
-set "curl_data3={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
 set "curl_data4={\"llama_model_path\":\"%MODEL_EMBEDDING_PATH_STRING%\", \"embedding\": true, \"model_type\": \"embedding\"}"
 set "curl_data5={}"
 set "curl_data6={\"input\": \"Hello\", \"model\": \"test-embedding\", \"encoding_format\": \"float\"}"
+@REM set "curl_data7={\"model\": \"test-embedding\"}"
 
 rem Print the values of curl_data for debugging
 echo curl_data1=%curl_data1%
 echo curl_data2=%curl_data2%
-echo curl_data3=%curl_data3%
 echo curl_data4=%curl_data4%
 echo curl_data5=%curl_data5%
 echo curl_data6=%curl_data6%
+@REM echo curl_data7=%curl_data7%
 
 rem Run the curl commands and capture the status code
 curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/loadmodel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1
@@ -85,8 +84,6 @@ curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --
 --header "Content-Type: application/json" ^
 --data "%curl_data2%" > %TEMP%\response2.log 2>&1
 
-curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/unloadmodel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
-
 curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/loadmodel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1
 
 curl.exe --connect-timeout 60 -o "%TEMP%\response5.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/models" --header "Content-Type: application/json" --data "%curl_data5%" > %TEMP%\response5.log 2>&1
@@ -100,7 +97,6 @@ set "error_occurred=0"
 rem Read the status codes from the log files
 for /f %%a in (%TEMP%\response1.log) do set "response1=%%a"
 for /f %%a in (%TEMP%\response2.log) do set "response2=%%a"
-for /f %%a in (%TEMP%\response3.log) do set "response3=%%a"
 for /f %%a in (%TEMP%\response4.log) do set "response4=%%a"
 for /f %%a in (%TEMP%\response5.log) do set "response5=%%a"
 for /f %%a in (%TEMP%\response6.log) do set "response6=%%a"
@@ -117,12 +113,6 @@ if "%response2%" neq "200" (
     set "error_occurred=1"
 )
 
-if "%response3%" neq "200" (
-    echo The third curl command failed with status code: %response3%
-    type %TEMP%\response3.log
-    set "error_occurred=1"
-)
-
 if "%response4%" neq "200" (
     echo The fourth curl command failed with status code: %response4%
     type %TEMP%\response4.log
@@ -158,10 +148,6 @@ echo ----------------------
 echo Log run test:
 type %TEMP%\response2.log
 
-echo ----------------------
-echo Log unload model:
-type %TEMP%\response3.log
-
 echo ----------------------
 echo Log load embedding model:
 type %TEMP%\response4.log

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
diff --git a/.github/workflows/convert-model-all-quant.yml b/.github/workflows/convert-model-all-quant.yml
@@ -39,7 +39,7 @@ jobs:
       - name: Apply patch file
         run: |
           cd llama.cpp
-          git apply ../patches/0001-Add-API-query-buffer-size.patch
+          git apply ../patches/0002-Build-llama-cpp-examples.patch
 
       - name: Set up Python
         uses: actions/setup-python@v5 # v5.1.1

diff --git a/.github/workflows/create-pr-sync-remote.yml b/.github/workflows/create-pr-sync-remote.yml
@@ -47,4 +47,4 @@ jobs:
     - name: Apply patch file
       run: |
         cd llama.cpp
-        git apply ../patches/0001-Add-API-query-buffer-size.patch
+        git apply ../patches/0002-Build-llama-cpp-examples.patch
diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
@@ -85,7 +85,7 @@ jobs:
     - name: Apply patch file
       run: |
         cd llama.cpp
-        git apply ../patches/0001-Add-API-query-buffer-size.patch
+        git apply ../patches/0002-Build-llama-cpp-examples.patch
 
     - name: Wait for CI to pass
       env:
@@ -133,7 +133,7 @@ jobs:
     - name: Apply patch file
       run: |
         cd llama.cpp
-        git apply ../patches/0001-Add-API-query-buffer-size.patch
+        git apply ../patches/0002-Build-llama-cpp-examples.patch
 
     - name: Configure Git
       run: |

diff --git a/.github/workflows/template-e2e-weekend-test.yml b/.github/workflows/template-e2e-weekend-test.yml
@@ -91,7 +91,7 @@ jobs:
       - name: Apply patch file
         run: |
           cd llama.cpp
-          git apply ../patches/0001-Add-API-query-buffer-size.patch
+          git apply ../patches/0002-Build-llama-cpp-examples.patch
 
       - name: Set up Python
         uses: actions/setup-python@v5 # v5.1.1