diff --git a/llmfoundry/callbacks/hf_checkpointer.py b/llmfoundry/callbacks/hf_checkpointer.py index 2c4603ea87..14ccae7191 100644 --- a/llmfoundry/callbacks/hf_checkpointer.py +++ b/llmfoundry/callbacks/hf_checkpointer.py @@ -124,6 +124,7 @@ def _register_model_with_run_id_multiprocess( logging.basicConfig( format= f'%(asctime)s: rank{dist.get_global_rank()}[%(process)d][%(threadName)s]: %(levelname)s: %(name)s: %(message)s', + force=True, ) logging.getLogger('composer').setLevel(composer_logging_level) diff --git a/llmfoundry/command_utils/data_prep/convert_text_to_mds.py b/llmfoundry/command_utils/data_prep/convert_text_to_mds.py index 11eac121d0..4aea183737 100644 --- a/llmfoundry/command_utils/data_prep/convert_text_to_mds.py +++ b/llmfoundry/command_utils/data_prep/convert_text_to_mds.py @@ -515,6 +515,7 @@ def _configure_logging(logging_level: str): logging.basicConfig( format= f'%(asctime)s: [%(process)d][%(threadName)s]: %(levelname)s: %(name)s: %(message)s', + force=True, ) logging_level = logging_level.upper() logging.getLogger('llmfoundry').setLevel(logging_level) diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index f25f2b5cef..472fdd52d4 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -272,6 +272,7 @@ def evaluate(cfg: DictConfig) -> tuple[list[Trainer], pd.DataFrame]: # 2022-06-29 11:22:26,152: rank0[822018][MainThread]: INFO: Message here format= f'%(asctime)s: rank{dist.get_global_rank()}[%(process)d][%(threadName)s]: %(levelname)s: %(name)s: %(message)s', + force=True, ) logging.getLogger('llmfoundry').setLevel( eval_config.python_log_level.upper(), diff --git a/llmfoundry/command_utils/train.py b/llmfoundry/command_utils/train.py index cb287b029c..8fb833ddde 100644 --- a/llmfoundry/command_utils/train.py +++ b/llmfoundry/command_utils/train.py @@ -227,6 +227,7 @@ def train(cfg: DictConfig) -> Trainer: # 2022-06-29 11:22:26,152: rank0[822018][MainThread]: INFO: Message here format= f'%(asctime)s: rank{dist.get_global_rank()}[%(process)d][%(threadName)s]: %(levelname)s: %(name)s: %(message)s', + force=True, ) logging.getLogger('llmfoundry').setLevel( train_cfg.python_log_level.upper(), @@ -311,10 +312,11 @@ def train(cfg: DictConfig) -> Trainer: eval_gauntlet_config = train_cfg.eval_gauntlet or train_cfg.eval_gauntlet_str # Optional parameters will be set to default values if not specified. - env_run_name: Optional[str] = os.environ.get('RUN_NAME', None) - run_name: str = ( - train_cfg.run_name if train_cfg.run_name else env_run_name - ) or 'llm' + run_name: Optional[ + str] = train_cfg.run_name if train_cfg.run_name else os.environ.get( + 'RUN_NAME', + None, + ) is_state_dict_sharded: bool = ( fsdp_config.get('state_dict_type', 'full') == 'sharded' ) if fsdp_config else False @@ -322,9 +324,8 @@ def train(cfg: DictConfig) -> Trainer: save_filename: str = train_cfg.save_filename if train_cfg.save_filename else 'ep{epoch}-ba{batch}-rank{rank}.pt' # Enable autoresume from model checkpoints if possible - is_user_set_run_name: bool = train_cfg.run_name is not None or env_run_name is not None autoresume_default: bool = False - if is_user_set_run_name and \ + if run_name is not None and \ train_cfg.save_folder is not None \ and not train_cfg.save_overwrite \ and not train_cfg.save_weights_only: diff --git a/scripts/inference/endpoint_generate.py b/scripts/inference/endpoint_generate.py index 9991f5093f..c42ccd9b40 100644 --- a/scripts/inference/endpoint_generate.py +++ b/scripts/inference/endpoint_generate.py @@ -25,7 +25,11 @@ from llmfoundry.utils import prompt_files as utils -logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') +logging.basicConfig( + format= + f'%(asctime)s: [%(process)d][%(threadName)s]: %(levelname)s: %(name)s: %(message)s', + force=True, +) log = logging.getLogger(__name__) ENDPOINT_API_KEY_ENV: str = 'ENDPOINT_API_KEY' diff --git a/scripts/misc/download_model.py b/scripts/misc/download_model.py index 91b0c5a037..cf264b12b2 100644 --- a/scripts/misc/download_model.py +++ b/scripts/misc/download_model.py @@ -31,8 +31,9 @@ HF_TOKEN_ENV_VAR = 'HF_TOKEN' logging.basicConfig( - format=f'%(asctime)s: %(levelname)s: %(name)s: %(message)s', - level=logging.INFO, + format= + f'%(asctime)s: [%(process)d][%(threadName)s]: %(levelname)s: %(name)s: %(message)s', + force=True, ) log = logging.getLogger(__name__)