Fix security issue or ignore false positives

axolotl-ai-cloud · May 30, 2023 · a1f9850 · a1f9850
1 parent 83d2920
commit a1f9850
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 10 deletions.
diff --git a/scripts/finetune.py b/scripts/finetune.py
@@ -136,7 +136,7 @@ def train(
 
     # load the config from the yaml file
     with open(config, encoding="utf-8") as file:
-        cfg: DictDefault = DictDefault(yaml.load(file, Loader=yaml.Loader))
+        cfg: DictDefault = DictDefault(yaml.safe_load(file))
     # if there are any options passed in the cli, if it is something that seems valid from the yaml,
     # then overwrite the value
     cfg_keys = cfg.keys()
@@ -185,7 +185,7 @@ def train(
         logging.info("check_dataset_labels...")
         check_dataset_labels(
             train_dataset.select(
-                [random.randrange(0, len(train_dataset) - 1) for i in range(5)]
+                [random.randrange(0, len(train_dataset) - 1) for _ in range(5)]  # nosec
             ),
             tokenizer,
         )

diff --git a/src/axolotl/prompt_tokenizers.py b/src/axolotl/prompt_tokenizers.py
@@ -11,10 +11,10 @@
 from axolotl.prompters import IGNORE_TOKEN_ID
 
 IGNORE_INDEX = -100
-LLAMA_DEFAULT_PAD_TOKEN = "[PAD]"
-LLAMA_DEFAULT_EOS_TOKEN = "</s>"
-LLAMA_DEFAULT_BOS_TOKEN = "<s>"
-LLAMA_DEFAULT_UNK_TOKEN = "<unk>"
+LLAMA_DEFAULT_PAD_TOKEN = "[PAD]"  # nosec
+LLAMA_DEFAULT_EOS_TOKEN = "</s>"  # nosec
+LLAMA_DEFAULT_BOS_TOKEN = "<s>"  # nosec
+LLAMA_DEFAULT_UNK_TOKEN = "<unk>"  # nosec
 
 
 class InvalidDataException(Exception):

diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py
@@ -40,7 +40,7 @@ def load_tokenized_prepared_datasets(
 ) -> DatasetDict:
     tokenizer_name = tokenizer.__class__.__name__
     ds_hash = str(
-        md5(
+        md5(  # nosec
             (
                 str(cfg.sequence_len)
                 + "@"
@@ -66,7 +66,7 @@ def load_tokenized_prepared_datasets(
                 use_auth_token=use_auth_token,
             )
             dataset = dataset["train"]
-    except Exception:  # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except # nosec
         pass
 
     if dataset:
@@ -272,7 +272,7 @@ def load_prepare_datasets(
         # see if we can go ahead and load the stacked dataset
         seed = f"@{str(cfg.seed)}" if cfg.seed else ""
         ds_hash = str(
-            md5(
+            md5(  # nosec
                 (
                     str(cfg.sequence_len)
                     + "@"
@@ -304,7 +304,7 @@ def load_prepare_datasets(
                     use_auth_token=use_auth_token,
                 )
                 dataset = dataset["train"]
-        except Exception:  # pylint: disable=broad-except
+        except Exception:  # pylint: disable=broad-except # nosec
             pass
 
         if dataset: