Merge branch 'upstream' into concedo_experimental

# Conflicts: # README.md # tests/test-chat-template.cpp
LostRuins · May 24, 2024 · 6530501 · 6530501
2 parents 2dedea9 + 0df0aa8
commit 6530501
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 12 deletions.
diff --git a/examples/sycl/win-build-sycl.bat b/examples/sycl/win-build-sycl.bat
@@ -13,10 +13,10 @@ if %errorlevel% neq 0 goto ERROR
 
 :: for FP16
 :: faster for long-prompt inference
-:: cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
+:: cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
 
 :: for FP32
-cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
+cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release
 if %errorlevel% neq 0 goto ERROR
 :: build example/main only
 :: make main

diff --git a/ggml-quants.c b/ggml-quants.c
@@ -12145,7 +12145,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
  printf("\n");
  GGML_ASSERT(false);
  }
- q2[2*ib+0] |= (grid_index << 8*k);
+ q2[2*ib+0] |= ((uint32_t) grid_index << 8*k);
  q2[2*ib+1] |= (block_signs[k] << 7*k);
  }
  GGML_ASSERT(scale >= 0);

diff --git a/llama.cpp b/llama.cpp
@@ -18170,6 +18170,15 @@ static int32_t llama_chat_apply_template_internal(
  }
  }
  // llama2 templates seem to not care about "add_generation_prompt"
+ } else if (tmpl == "phi3" || (tmpl.find("<|assistant|>") != std::string::npos && tmpl.find("<|end|>") != std::string::npos)) {
+ // Phi 3
+ for (auto message : chat) {
+ std::string role(message->role);
+ ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
+ }
+ if (add_ass) {
+ ss << "<|assistant|>\n";
+ }
  } else if (tmpl == "zephyr" || tmpl.find("<|user|>") != std::string::npos) {
  // zephyr template
  for (auto message : chat) {
@@ -18302,15 +18311,6 @@ static int32_t llama_chat_apply_template_internal(
  if (add_ass) {
  ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
  }
- } else if (tmpl == "phi3" || (tmpl.find("<|assistant|>") != std::string::npos && tmpl.find("<|end|>") != std::string::npos )) {
- // Phi 3
- for (auto message : chat) {
- std::string role(message->role);
- ss << "<|" << role << "|>\n" << trim(message->content) << "<|end|>\n";
- }
- if (add_ass) {
- ss << "<|assistant|>\n";
- }
  } else {
  // template not supported
  return -1;