From 40491eb6d41b418ad852dea12753cd6fadb786a1 Mon Sep 17 00:00:00 2001 From: zhouchangda Date: Fri, 2 Aug 2024 07:43:30 +0000 Subject: [PATCH] support benchmark for paddlepaddle3.0 --- ppocr/utils/loggers/wandb_logger.py | 5 ++--- ppocr/utils/logging.py | 11 +++++++++-- tools/infer/utility.py | 4 +--- tools/program.py | 18 ++++++++++-------- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/ppocr/utils/loggers/wandb_logger.py b/ppocr/utils/loggers/wandb_logger.py index 3b528b3fa9..44cbfc163f 100644 --- a/ppocr/utils/loggers/wandb_logger.py +++ b/ppocr/utils/loggers/wandb_logger.py @@ -2,8 +2,6 @@ from .base_logger import BaseLogger from ppocr.utils.logging import get_logger -logger = get_logger() - class WandbLogger(BaseLogger): def __init__( @@ -40,6 +38,7 @@ def __init__( resume="allow", ) self._wandb_init.update(**kwargs) + self.logger = get_logger() _ = self.run @@ -50,7 +49,7 @@ def __init__( def run(self): if self._run is None: if self.wandb.run is not None: - logger.info( + self.logger.info( "There is a wandb run already in progress " "and newly created instances of `WandbLogger` will reuse" " this run. If this is not desired, call `wandb.finish()`" diff --git a/ppocr/utils/logging.py b/ppocr/utils/logging.py index 945bb3ee75..c460936d85 100644 --- a/ppocr/utils/logging.py +++ b/ppocr/utils/logging.py @@ -26,7 +26,7 @@ @functools.lru_cache() -def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG): +def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG, log_ranks="0"): """Initialize and get a logger by name. If the logger has not been initialized, this method will initialize the logger by adding one or two handlers, otherwise the initialized logger will @@ -39,6 +39,7 @@ def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG): log_level (int): The logger level. Note that only the process of rank 0 is affected, and other processes will set the level to "Error" thus be silent most of the time. + log_ranks (str): The ids of gpu to log which are separated by "," when more than 1, "0" by default. Returns: logging.Logger: The expected logger. """ @@ -62,7 +63,13 @@ def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG): file_handler = logging.FileHandler(log_file, "a") file_handler.setFormatter(formatter) logger.addHandler(file_handler) - if dist.get_rank() == 0: + + if isinstance(log_ranks, str): + log_ranks = [int(i) for i in log_ranks.split(",")] + elif isinstance(log_ranks, int): + log_ranks = [log_ranks] + + if dist.get_rank() in log_ranks: logger.setLevel(log_level) else: logger.setLevel(logging.ERROR) diff --git a/tools/infer/utility.py b/tools/infer/utility.py index f019e97e86..41f4350bff 100644 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -26,9 +26,6 @@ from ppocr.utils.logging import get_logger -logger = get_logger() - - def str2bool(v): return v.lower() in ("true", "yes", "t", "y", "1") @@ -340,6 +337,7 @@ def get_infer_gpuid(): Returns: int: The GPU ID to be used for inference. """ + logger = get_logger() if not paddle.device.is_compiled_with_rocm: gpu_id_str = os.environ.get("CUDA_VISIBLE_DEVICES", "0") else: diff --git a/tools/program.py b/tools/program.py index 1cc5bbac1c..882f03f58f 100755 --- a/tools/program.py +++ b/tools/program.py @@ -204,6 +204,7 @@ def train( eval_batch_step = config["Global"]["eval_batch_step"] eval_batch_epoch = config["Global"].get("eval_batch_epoch", None) profiler_options = config["profiler_options"] + print_mem_info = config["Global"].get("print_mem_info", True) global_step = 0 if "global_step" in pre_best_model_dict: @@ -406,9 +407,8 @@ def train( metrics=train_stats.get(), prefix="TRAIN", step=global_step ) - if dist.get_rank() == 0 and ( - (global_step > 0 and global_step % print_batch_step == 0) - or (idx >= len(train_dataloader) - 1) + if (global_step > 0 and global_step % print_batch_step == 0) or ( + idx >= len(train_dataloader) - 1 ): logs = train_stats.log() @@ -418,13 +418,13 @@ def train( eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec))) max_mem_reserved_str = "" max_mem_allocated_str = "" - if paddle.device.is_compiled_with_cuda(): - max_mem_reserved_str = f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB," - max_mem_allocated_str = f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB" + if paddle.device.is_compiled_with_cuda() and print_mem_info: + max_mem_reserved_str = f", max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB," + max_mem_allocated_str = f" max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB" strs = ( "epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: " "{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, " - "ips: {:.5f} samples/s, eta: {}, {} {}".format( + "ips: {:.5f} samples/s, eta: {}{}{}".format( epoch, epoch_num, global_step, @@ -740,7 +740,9 @@ def preprocess(is_train=False): log_file = "{}/train.log".format(save_model_dir) else: log_file = None - logger = get_logger(log_file=log_file) + + log_ranks = config["Global"].get("log_ranks", "0") + logger = get_logger(log_file=log_file, log_ranks=log_ranks) # check if set use_gpu=True in paddlepaddle cpu version use_gpu = config["Global"].get("use_gpu", False)