Skip to content

Commit

Permalink
Fix security issue or ignore false positives
Browse files Browse the repository at this point in the history
  • Loading branch information
NanoCode012 committed May 30, 2023
1 parent 83d2920 commit a1f9850
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 10 deletions.
4 changes: 2 additions & 2 deletions scripts/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def train(

# load the config from the yaml file
with open(config, encoding="utf-8") as file:
cfg: DictDefault = DictDefault(yaml.load(file, Loader=yaml.Loader))
cfg: DictDefault = DictDefault(yaml.safe_load(file))
# if there are any options passed in the cli, if it is something that seems valid from the yaml,
# then overwrite the value
cfg_keys = cfg.keys()
Expand Down Expand Up @@ -185,7 +185,7 @@ def train(
logging.info("check_dataset_labels...")
check_dataset_labels(
train_dataset.select(
[random.randrange(0, len(train_dataset) - 1) for i in range(5)]
[random.randrange(0, len(train_dataset) - 1) for _ in range(5)] # nosec
),
tokenizer,
)
Expand Down
8 changes: 4 additions & 4 deletions src/axolotl/prompt_tokenizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
from axolotl.prompters import IGNORE_TOKEN_ID

IGNORE_INDEX = -100
LLAMA_DEFAULT_PAD_TOKEN = "[PAD]"
LLAMA_DEFAULT_EOS_TOKEN = "</s>"
LLAMA_DEFAULT_BOS_TOKEN = "<s>"
LLAMA_DEFAULT_UNK_TOKEN = "<unk>"
LLAMA_DEFAULT_PAD_TOKEN = "[PAD]" # nosec
LLAMA_DEFAULT_EOS_TOKEN = "</s>" # nosec
LLAMA_DEFAULT_BOS_TOKEN = "<s>" # nosec
LLAMA_DEFAULT_UNK_TOKEN = "<unk>" # nosec


class InvalidDataException(Exception):
Expand Down
8 changes: 4 additions & 4 deletions src/axolotl/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def load_tokenized_prepared_datasets(
) -> DatasetDict:
tokenizer_name = tokenizer.__class__.__name__
ds_hash = str(
md5(
md5( # nosec
(
str(cfg.sequence_len)
+ "@"
Expand All @@ -66,7 +66,7 @@ def load_tokenized_prepared_datasets(
use_auth_token=use_auth_token,
)
dataset = dataset["train"]
except Exception: # pylint: disable=broad-except
except Exception: # pylint: disable=broad-except # nosec
pass

if dataset:
Expand Down Expand Up @@ -272,7 +272,7 @@ def load_prepare_datasets(
# see if we can go ahead and load the stacked dataset
seed = f"@{str(cfg.seed)}" if cfg.seed else ""
ds_hash = str(
md5(
md5( # nosec
(
str(cfg.sequence_len)
+ "@"
Expand Down Expand Up @@ -304,7 +304,7 @@ def load_prepare_datasets(
use_auth_token=use_auth_token,
)
dataset = dataset["train"]
except Exception: # pylint: disable=broad-except
except Exception: # pylint: disable=broad-except # nosec
pass

if dataset:
Expand Down

0 comments on commit a1f9850

Please sign in to comment.