Skip to content

Commit

Permalink
Fixed pre-commit problems, fixed small bug in logging_config to handl…
Browse files Browse the repository at this point in the history
…e LOG_LEVEL env var
  • Loading branch information
theobjectivedad committed Jul 15, 2023
1 parent 3a46b8b commit 9ae7958
Show file tree
Hide file tree
Showing 8 changed files with 14 additions and 15 deletions.
2 changes: 1 addition & 1 deletion scripts/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
from optimum.bettertransformer import BetterTransformer
from transformers import GenerationConfig, TextStreamer

from axolotl.logging_config import configure_logging
from axolotl.utils.data import load_prepare_datasets, load_pretraining_dataset
from axolotl.utils.dict import DictDefault
from axolotl.utils.models import load_model, load_tokenizer
from axolotl.utils.tokenization import check_dataset_labels
from axolotl.utils.trainer import setup_trainer
from axolotl.utils.validation import validate_config
from axolotl.utils.wandb import setup_wandb_env_vars
from axolotl.logging_config import configure_logging

project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
src_dir = os.path.join(project_root, "src")
Expand Down
1 change: 1 addition & 0 deletions src/axolotl/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

LOG = logging.getLogger("axolotl")


class TokenizedPromptDataset(IterableDataset):
"""
Iterable dataset that returns tokenized prompts from a stream of text files.
Expand Down
5 changes: 4 additions & 1 deletion src/axolotl/logging_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""Logging configuration settings"""

import os
import sys
from logging.config import dictConfig
from typing import Any, Dict
Expand All @@ -18,7 +21,7 @@
"stream": sys.stdout,
},
},
"root": {"handlers": ["console"], "level": "INFO"},
"root": {"handlers": ["console"], "level": os.getenv("LOG_LEVEL", "INFO")},
}


Expand Down
3 changes: 2 additions & 1 deletion src/axolotl/monkeypatch/llama_landmark_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
logging,
replace_return_docstrings,
)

LOG = logging.getLogger("axolotl")

_CONFIG_FOR_DOC = "LlamaConfig"
Expand Down Expand Up @@ -861,7 +862,7 @@ def forward(

if self.gradient_checkpointing and self.training:
if use_cache:
logger.warning_once(
LOG.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache = False
Expand Down
2 changes: 2 additions & 0 deletions src/axolotl/prompt_strategies/pygmalion.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
tokenize_prompt_default,
)

LOG = logging.getLogger("axolotl")

IGNORE_TOKEN_ID = -100


Expand Down
1 change: 1 addition & 0 deletions src/axolotl/prompters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from enum import Enum, auto
from typing import Generator, List, Optional, Tuple, Union

LOG = logging.getLogger("axolotl")
IGNORE_TOKEN_ID = -100


Expand Down
12 changes: 3 additions & 9 deletions src/axolotl/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,9 +258,7 @@ def load_tokenized_prepared_datasets(
suffix = ""
if ":load_" in d.type:
suffix = f" Did you mean {d.type.replace(':load_', '.load_')}?"
LOG.error(
f"unhandled prompt tokenization strategy: {d.type}. {suffix}"
)
LOG.error(f"unhandled prompt tokenization strategy: {d.type}. {suffix}")
raise ValueError(
f"unhandled prompt tokenization strategy: {d.type} {suffix}"
)
Expand All @@ -271,9 +269,7 @@ def load_tokenized_prepared_datasets(
samples = samples + list(d)
dataset = Dataset.from_list(samples).shuffle(seed=seed)
if cfg.local_rank == 0:
LOG.info(
f"Saving merged prepared dataset to disk... {prepared_ds_path}"
)
LOG.info(f"Saving merged prepared dataset to disk... {prepared_ds_path}")
dataset.save_to_disk(prepared_ds_path)
if cfg.push_dataset_to_hub:
LOG.info(
Expand Down Expand Up @@ -366,9 +362,7 @@ def load_prepare_datasets(
[dataset],
seq_length=max_packed_sequence_len,
)
LOG.info(
f"packing master dataset to len: {cfg.max_packed_sequence_len}"
)
LOG.info(f"packing master dataset to len: {cfg.max_packed_sequence_len}")
dataset = Dataset.from_list(list(constant_len_dataset))

# filter out bad data
Expand Down
3 changes: 0 additions & 3 deletions tests/test_prompt_tokenizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@
ShareGPTPromptTokenizingStrategy,
)
from axolotl.prompters import AlpacaPrompter, PromptStyle, ShareGPTPrompter
from axolotl.logging_config import configure_logging

configure_logging()

LOG = logging.getLogger("axolotl")

Expand Down

0 comments on commit 9ae7958

Please sign in to comment.