From 3bd365f86a7d22ff472130b54e5b59655ec88683 Mon Sep 17 00:00:00 2001
From: Jack Cloudman <maylo360xd@gmail.com>
Date: Sun, 24 Nov 2024 06:10:31 -0600
Subject: [PATCH 1/2] Added excluded pattern param to download-model.py script

---
 download-model.py | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/download-model.py b/download-model.py
index 306784a355..ba963f103c 100644
--- a/download-model.py
+++ b/download-model.py
@@ -2,7 +2,7 @@
 Downloads models from Hugging Face to models/username_modelname.
 
 Example:
-python download-model.py facebook/opt-1.3b
+python download-model.py facebook/opt-1.3b --exclude-pattern consolidated-.*\.safetensors
 
 '''
 
@@ -72,7 +72,7 @@ def sanitize_model_and_branch_names(self, model, branch):
 
         return model, branch
 
-    def get_download_links_from_huggingface(self, model, branch, text_only=False, specific_file=None):
+    def get_download_links_from_huggingface(self, model, branch, text_only=False, specific_file=None, exclude_pattern=None):
         session = self.session
         page = f"/api/models/{model}/tree/{branch}"
         cursor = b""
@@ -100,6 +100,10 @@ def get_download_links_from_huggingface(self, model, branch, text_only=False, sp
                 if specific_file not in [None, ''] and fname != specific_file:
                     continue
 
+                # Excluir archivos que coinciden con el patrón de exclusión
+                if exclude_pattern is not None and re.match(exclude_pattern, fname):
+                    continue
+
                 if not is_lora and fname.endswith(('adapter_config.json', 'adapter_model.bin')):
                     is_lora = True
 
@@ -138,18 +142,14 @@ def get_download_links_from_huggingface(self, model, branch, text_only=False, sp
             cursor = base64.b64encode(cursor)
             cursor = cursor.replace(b'=', b'%3D')
 
-        # If both pytorch and safetensors are available, download safetensors only
-        # Also if GGUF and safetensors are available, download only safetensors
-        # (why do people do this?)
+        # Si hay archivos PyTorch o GGUF y también safetensors, solo descargamos safetensors
         if (has_pytorch or has_pt or has_gguf) and has_safetensors:
             has_gguf = False
             for i in range(len(classifications) - 1, -1, -1):
                 if classifications[i] in ['pytorch', 'pt', 'gguf']:
                     links.pop(i)
 
-        # For GGUF, try to download only the Q4_K_M if no specific file is specified.
-        # If not present, exclude all GGUFs, as that's likely a repository with both
-        # GGUF and fp16 files.
+        # Lógica para GGUF
         if has_gguf and specific_file is None:
             has_q4km = False
             for i in range(len(classifications) - 1, -1, -1):
@@ -174,7 +174,7 @@ def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, model_dir
         else:
             base_folder = 'models' if not is_lora else 'loras'
 
-        # If the model is of type GGUF, save directly in the base_folder
+        # Si el modelo es de tipo GGUF, guardamos directamente en la carpeta base
         if is_llamacpp:
             return Path(base_folder)
 
@@ -312,6 +312,7 @@ def check_model_files(self, model, branch, links, sha256, output_folder):
     parser.add_argument('--threads', type=int, default=4, help='Number of files to download simultaneously.')
     parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).')
     parser.add_argument('--specific-file', type=str, default=None, help='Name of the specific file to download (if not provided, downloads all).')
+    parser.add_argument('--exclude-pattern', type=str, default=None, help='Regex pattern to exclude files from download.')
     parser.add_argument('--output', type=str, default=None, help='Save the model files to this folder.')
     parser.add_argument('--model-dir', type=str, default=None, help='Save the model files to a subfolder of this folder instead of the default one (text-generation-webui/models).')
     parser.add_argument('--clean', action='store_true', help='Does not resume the previous download.')
@@ -322,6 +323,7 @@ def check_model_files(self, model, branch, links, sha256, output_folder):
     branch = args.branch
     model = args.MODEL
     specific_file = args.specific_file
+    exclude_pattern = args.exclude_pattern
 
     if model is None:
         print("Error: Please specify the model you'd like to download (e.g. 'python download-model.py facebook/opt-1.3b').")
@@ -336,7 +338,9 @@ def check_model_files(self, model, branch, links, sha256, output_folder):
         sys.exit()
 
     # Get the download links from Hugging Face
-    links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=args.text_only, specific_file=specific_file)
+    links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(
+        model, branch, text_only=args.text_only, specific_file=specific_file, exclude_pattern=exclude_pattern
+    )
 
     # Get the output folder
     if args.output:
@@ -349,4 +353,8 @@ def check_model_files(self, model, branch, links, sha256, output_folder):
         downloader.check_model_files(model, branch, links, sha256, output_folder)
     else:
         # Download files
-        downloader.download_model_files(model, branch, links, sha256, output_folder, specific_file=specific_file, threads=args.threads, is_llamacpp=is_llamacpp)
+        downloader.download_model_files(
+            model, branch, links, sha256, output_folder,
+            specific_file=specific_file, threads=args.threads, is_llamacpp=is_llamacpp
+        )
+

From aa02d6e0d3784abf1f775e985c71352779940465 Mon Sep 17 00:00:00 2001
From: Jack Cloudman <maylo360xd@gmail.com>
Date: Sun, 24 Nov 2024 06:20:16 -0600
Subject: [PATCH 2/2] Fixed comments

---
 download-model.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/download-model.py b/download-model.py
index ba963f103c..8fe94371f2 100644
--- a/download-model.py
+++ b/download-model.py
@@ -2,7 +2,7 @@
 Downloads models from Hugging Face to models/username_modelname.
 
 Example:
-python download-model.py facebook/opt-1.3b --exclude-pattern consolidated-.*\.safetensors
+python download-model.py facebook/opt-1.3b
 
 '''
 
@@ -100,7 +100,7 @@ def get_download_links_from_huggingface(self, model, branch, text_only=False, sp
                 if specific_file not in [None, ''] and fname != specific_file:
                     continue
 
-                # Excluir archivos que coinciden con el patrón de exclusión
+                # Exclude files matching the exclude pattern
                 if exclude_pattern is not None and re.match(exclude_pattern, fname):
                     continue
 
@@ -110,7 +110,7 @@ def get_download_links_from_huggingface(self, model, branch, text_only=False, sp
                 is_pytorch = re.match(r"(pytorch|adapter|gptq)_model.*\.bin", fname)
                 is_safetensors = re.match(r".*\.safetensors", fname)
                 is_pt = re.match(r".*\.pt", fname)
-                is_gguf = re.match(r'.*\.gguf', fname)
+                is_gguf = re.match(r".*\.gguf", fname)
                 is_tiktoken = re.match(r".*\.tiktoken", fname)
                 is_tokenizer = re.match(r"(tokenizer|ice|spiece).*\.model", fname) or is_tiktoken
                 is_text = re.match(r".*\.(txt|json|py|md)", fname) or is_tokenizer
@@ -142,14 +142,15 @@ def get_download_links_from_huggingface(self, model, branch, text_only=False, sp
             cursor = base64.b64encode(cursor)
             cursor = cursor.replace(b'=', b'%3D')
 
-        # Si hay archivos PyTorch o GGUF y también safetensors, solo descargamos safetensors
+        # If both pytorch and safetensors are available, download safetensors only
+        # Also if GGUF and safetensors are available, download only safetensors
         if (has_pytorch or has_pt or has_gguf) and has_safetensors:
             has_gguf = False
             for i in range(len(classifications) - 1, -1, -1):
                 if classifications[i] in ['pytorch', 'pt', 'gguf']:
                     links.pop(i)
 
-        # Lógica para GGUF
+        # For GGUF, try to download only the Q4_K_M if no specific file is specified.
         if has_gguf and specific_file is None:
             has_q4km = False
             for i in range(len(classifications) - 1, -1, -1):
@@ -174,7 +175,7 @@ def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, model_dir
         else:
             base_folder = 'models' if not is_lora else 'loras'
 
-        # Si el modelo es de tipo GGUF, guardamos directamente en la carpeta base
+        # If the model is of type GGUF, save directly in the base_folder
         if is_llamacpp:
             return Path(base_folder)
 
@@ -357,4 +358,3 @@ def check_model_files(self, model, branch, links, sha256, output_folder):
             model, branch, links, sha256, output_folder,
             specific_file=specific_file, threads=args.threads, is_llamacpp=is_llamacpp
         )
-