Code Llama

OpenInterpreter · Aug 28, 2023 · ecba9ef · ecba9ef
1 parent e318329
commit ecba9ef
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/interpreter/llama_2.py b/interpreter/llama_2.py
@@ -34,7 +34,7 @@ def get_llama_2_instance():
         {'URL': 'https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q8_0.bin', 'Param': '13B', 'Bits': 8, 'Size': '13.83 GB', 'RAM': '16.33 GB', 'Description': 'Original quant method, 8-bit. Almost indistinguishable from float16. High resource use and slow. Not recommended for most users.'}
     ]
     code_llama_13b = [
-        {'URL': 'https://huggingface.co/TheBloke/CodeLlama-13B-oasst-sft-v10-GGUF/blob/main/codellama-13b-oasst-sft-v10.Q4_K_M.gguf', 'Param': '13B', 'Bits': 8, 'Size': '13.83 GB', 'RAM': '16.33 GB', 'Description': 'Original quant method, 8-bit. Almost indistinguishable from float16. High resource use and slow. Not recommended for most users.'}
+        {'URL': 'https://huggingface.co/TheBloke/CodeLlama-13B-oasst-sft-v10-GGUF/resolve/main/codellama-13b-oasst-sft-v10.Q4_K_M.gguf', 'Param': '13B', 'Bits': 8, 'Size': '13.83 GB', 'RAM': '16.33 GB', 'Description': 'Original quant method, 8-bit. Almost indistinguishable from float16. High resource use and slow. Not recommended for most users.'}
     ]
 
     #all_models = llama_2_7b + llama_2_13b + code_llama_13b
@@ -103,7 +103,7 @@ def format_choice(model):
 
     # Initialize and return Llama-2
     # n_gpu_layers=1 should use GPU, but frankly I can't tell if it does (Mac OSX)
-    llama_2 = Llama(model_path=model_path)
+    llama_2 = Llama(model_path=model_path, n_gpu_layers=-1)
 
     print("Installed llama:", llama_2)