Skip to content

Commit

Permalink
Avoid modifying log level globally (#1944)
Browse files Browse the repository at this point in the history
* Avoid modifying log level globally

* Address get_job_logs

* Fix integration tests
  • Loading branch information
droctothorpe authored Nov 13, 2023
1 parent 2a91d83 commit 230bfb4
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 12 deletions.
22 changes: 14 additions & 8 deletions sdk/python/kubeflow/training/api/training_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@
from kubeflow.training.constants import constants
from kubeflow.training.utils import utils

logging.basicConfig(format="%(message)s")
logging.getLogger().setLevel(logging.INFO)
logger = logging.getLogger(__name__)

status_logger = utils.StatusLogger(
header="{:<30.30} {:<20.20} {}".format("NAME", "STATE", "TIME"),
Expand Down Expand Up @@ -222,7 +221,7 @@ def create_job(
f"Failed to create {job_kind}: {namespace}/{job.metadata.name}"
)

logging.info(f"{job_kind} {namespace}/{job.metadata.name} has been created")
logger.debug(f"{job_kind} {namespace}/{job.metadata.name} has been created")

def get_job(
self,
Expand Down Expand Up @@ -771,7 +770,7 @@ def get_job_logs(
replica_index: Optional[int] = None,
follow: bool = False,
timeout: int = constants.DEFAULT_TIMEOUT,
):
) -> Dict[str, str]:
"""Print the training logs for the Job. By default it returns logs from
the `master` pod.
Expand Down Expand Up @@ -801,6 +800,10 @@ def get_job_logs(
timeout: Optional, Kubernetes API server timeout in seconds
to execute the request.
Returns:
Dict[str, str]: A dictionary in which the keys are pod names and the
values are the corresponding logs.
Raises:
ValueError: Job replica type is invalid.
TimeoutError: Timeout to get Job pods.
Expand All @@ -819,6 +822,7 @@ def get_job_logs(
timeout=timeout,
)

logs_dict = {}
if pods and follow:
log_streams = []
for pod in pods:
Expand Down Expand Up @@ -849,7 +853,7 @@ def get_job_logs(
if logline is None:
finished[index] = True
break
logging.info("[Pod %s]: %s", pods[index], logline)
print(f"[Pod {pods[index]}]: {logline}")
except queue.Empty:
break
elif pods:
Expand All @@ -860,10 +864,12 @@ def get_job_logs(
namespace,
container=constants.JOB_PARAMETERS[job_kind]["container"],
)
logging.info("The logs of pod %s:\n %s", pod, pod_logs)
logs_dict[pod] = pod_logs
except Exception:
raise RuntimeError(f"Failed to read logs for pod {namespace}/{pod}")

return logs_dict

def update_job(
self,
job: constants.JOB_MODELS_TYPE,
Expand Down Expand Up @@ -908,7 +914,7 @@ def update_job(
except Exception:
raise RuntimeError(f"Failed to update {job_kind}: {namespace}/{name}")

logging.info(f"{job_kind} {namespace}/{name} has been updated")
logger.debug(f"{job_kind} {namespace}/{name} has been updated")

def delete_job(
self,
Expand Down Expand Up @@ -950,4 +956,4 @@ def delete_job(
except Exception:
raise RuntimeError(f"Failed to delete {job_kind}: {namespace}/{name}")

logging.info(f"{job_kind} {namespace}/{name} has been deleted")
logger.debug(f"{job_kind} {namespace}/{name} has been deleted")
7 changes: 3 additions & 4 deletions sdk/python/kubeflow/training/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@
from kubeflow.training import models


logging.basicConfig(format="%(message)s")
logging.getLogger().setLevel(logging.INFO)
logger = logging.getLogger(__name__)


class StatusLogger:
Expand All @@ -39,9 +38,9 @@ def __init__(self, header, column_format):

def __call__(self, *values):
if self.first_call:
logging.info(self.header)
logger.debug(self.header)
self.first_call = False
logging.info(self.column_format.format(*values))
logger.debug(self.column_format.format(*values))


class FakeResponse:
Expand Down

0 comments on commit 230bfb4

Please sign in to comment.