support benchmark for paddlepaddle3.0

PaddlePaddle · Aug 2, 2024 · 40491eb · 40491eb
1 parent 9c19e6d
commit 40491eb
Show file tree

Hide file tree

Showing 4 changed files with 22 additions and 16 deletions.
diff --git a/ppocr/utils/loggers/wandb_logger.py b/ppocr/utils/loggers/wandb_logger.py
@@ -2,8 +2,6 @@
 from .base_logger import BaseLogger
 from ppocr.utils.logging import get_logger
 
-logger = get_logger()
-
 
 class WandbLogger(BaseLogger):
     def __init__(
@@ -40,6 +38,7 @@ def __init__(
             resume="allow",
         )
         self._wandb_init.update(**kwargs)
+        self.logger = get_logger()
 
         _ = self.run
 
@@ -50,7 +49,7 @@ def __init__(
     def run(self):
         if self._run is None:
             if self.wandb.run is not None:
-                logger.info(
+                self.logger.info(
                     "There is a wandb run already in progress "
                     "and newly created instances of `WandbLogger` will reuse"
                     " this run. If this is not desired, call `wandb.finish()`"

diff --git a/ppocr/utils/logging.py b/ppocr/utils/logging.py
@@ -26,7 +26,7 @@
 
 
 @functools.lru_cache()
-def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG):
+def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG, log_ranks="0"):
     """Initialize and get a logger by name.
     If the logger has not been initialized, this method will initialize the
     logger by adding one or two handlers, otherwise the initialized logger will
@@ -39,6 +39,7 @@ def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG):
         log_level (int): The logger level. Note that only the process of
             rank 0 is affected, and other processes will set the level to
             "Error" thus be silent most of the time.
+        log_ranks (str): The ids of gpu to log which are separated by "," when more than 1, "0" by default.
     Returns:
         logging.Logger: The expected logger.
     """
@@ -62,7 +63,13 @@ def get_logger(name="ppocr", log_file=None, log_level=logging.DEBUG):
         file_handler = logging.FileHandler(log_file, "a")
         file_handler.setFormatter(formatter)
         logger.addHandler(file_handler)
-    if dist.get_rank() == 0:
+
+    if isinstance(log_ranks, str):
+        log_ranks = [int(i) for i in log_ranks.split(",")]
+    elif isinstance(log_ranks, int):
+        log_ranks = [log_ranks]
+
+    if dist.get_rank() in log_ranks:
         logger.setLevel(log_level)
     else:
         logger.setLevel(logging.ERROR)

diff --git a/tools/infer/utility.py b/tools/infer/utility.py
@@ -26,9 +26,6 @@
 from ppocr.utils.logging import get_logger
 
 
-logger = get_logger()
-
-
 def str2bool(v):
     return v.lower() in ("true", "yes", "t", "y", "1")
 
@@ -340,6 +337,7 @@ def get_infer_gpuid():
     Returns:
         int: The GPU ID to be used for inference.
     """
+    logger = get_logger()
     if not paddle.device.is_compiled_with_rocm:
         gpu_id_str = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
     else:

diff --git a/tools/program.py b/tools/program.py
@@ -204,6 +204,7 @@ def train(
     eval_batch_step = config["Global"]["eval_batch_step"]
     eval_batch_epoch = config["Global"].get("eval_batch_epoch", None)
     profiler_options = config["profiler_options"]
+    print_mem_info = config["Global"].get("print_mem_info", True)
 
     global_step = 0
     if "global_step" in pre_best_model_dict:
@@ -406,9 +407,8 @@ def train(
                     metrics=train_stats.get(), prefix="TRAIN", step=global_step
                 )
 
-            if dist.get_rank() == 0 and (
-                (global_step > 0 and global_step % print_batch_step == 0)
-                or (idx >= len(train_dataloader) - 1)
+            if (global_step > 0 and global_step % print_batch_step == 0) or (
+                idx >= len(train_dataloader) - 1
             ):
                 logs = train_stats.log()
 
@@ -418,13 +418,13 @@ def train(
                 eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec)))
                 max_mem_reserved_str = ""
                 max_mem_allocated_str = ""
-                if paddle.device.is_compiled_with_cuda():
-                    max_mem_reserved_str = f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB,"
-                    max_mem_allocated_str = f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
+                if paddle.device.is_compiled_with_cuda() and print_mem_info:
+                    max_mem_reserved_str = f", max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB,"
+                    max_mem_allocated_str = f" max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
                 strs = (
                     "epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: "
                     "{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, "
-                    "ips: {:.5f} samples/s, eta: {}, {} {}".format(
+                    "ips: {:.5f} samples/s, eta: {}{}{}".format(
                         epoch,
                         epoch_num,
                         global_step,
@@ -740,7 +740,9 @@ def preprocess(is_train=False):
         log_file = "{}/train.log".format(save_model_dir)
     else:
         log_file = None
-    logger = get_logger(log_file=log_file)
+
+    log_ranks = config["Global"].get("log_ranks", "0")
+    logger = get_logger(log_file=log_file, log_ranks=log_ranks)
 
     # check if set use_gpu=True in paddlepaddle cpu version
     use_gpu = config["Global"].get("use_gpu", False)