Skip to content

Commit

Permalink
Merge pull request #52 from noisebridge/logging-fixes
Browse files Browse the repository at this point in the history
Logging fixes and common file paths
  • Loading branch information
skyfenton authored Nov 25, 2024
2 parents 7219a4f + 80a892e commit b4a97c3
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 20 deletions.
8 changes: 4 additions & 4 deletions mediabridge/data_processing/wiki_to_netflix.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def read_netflix_txt(txt_file, test):
"""
num_rows = None
if test:
num_rows = 100
num_rows = 50

with open(txt_file, "r", encoding="ISO-8859-1") as netflix_data:
for i, line in enumerate(netflix_data):
Expand Down Expand Up @@ -266,11 +266,11 @@ def process_data(test=False):

create_netflix_csv(netflix_csv, processed_data)

print(f"missing: {missing_count} ({missing_count / num_rows * 100:.2f}%)")
print(
f"found: {num_rows - missing_count} ({(num_rows - missing_count) / num_rows * 100:.2f}%)"
f"missing: {missing_count} ({missing_count / num_rows * 100:.2f}%)\n"
f"found: {num_rows - missing_count} ({(num_rows - missing_count) / num_rows * 100:.2f}%)\n"
f"total: {num_rows}\n",
)
print(f"total: {num_rows}")


if __name__ == "__main__":
Expand Down
15 changes: 15 additions & 0 deletions mediabridge/definitions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from pathlib import Path

if __package__ != "mediabridge":
raise Exception(
"File path definitions are incorrect, definitions.py is not in the root 'mediabridge' module."
)

MODULE_DIR = Path(__file__).absolute().parent
PROJECT_DIR = MODULE_DIR.parent
OUTPUT_DIR = PROJECT_DIR.joinpath("out")

if __name__ == "__main__":
print(MODULE_DIR)
print(PROJECT_DIR)
print(OUTPUT_DIR)
61 changes: 45 additions & 16 deletions mediabridge/main.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,60 @@
import logging
from contextlib import nullcontext
from datetime import datetime

import typer as typer
from tqdm.contrib.logging import logging_redirect_tqdm

from mediabridge.data_processing import wiki_to_netflix
from mediabridge.definitions import OUTPUT_DIR


def main(
verbose: bool = typer.Option(
False, "--verbose", "-v", help="Enable verbose logging."
),
log: bool = typer.Option(
False, "--log", "-l", help="Enable all logging message levels and log to file."
),
full: bool = typer.Option(
False, "--full", "-f", help="Run processing on full dataset."
),
):
if not OUTPUT_DIR.exists():
print(
f"[WARNING] Output directory does not exist, creating new directory at {OUTPUT_DIR}"
)
OUTPUT_DIR.mkdir()


def main(verbose: bool = typer.Option(False, "--verbose", "-v"), test: bool = False):
if test:
# log all messages to file
if log:
# log all messages to new file
logging.basicConfig(
level=logging.DEBUG,
filename="out/mediabridge.log",
filemode="a",
filename=OUTPUT_DIR.joinpath(
f"mb_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
),
filemode="x",
format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s",
datefmt="%H:%M:%S",
)
wiki_to_netflix.process_data(True)
return

if verbose:
logging.basicConfig(level=logging.INFO)

# Redirect logging to tqdm.write function to avoid colliding with
# progress bar formatting
with logging_redirect_tqdm():
wiki_to_netflix.process_data(True)
else:
if verbose:
level = logging.INFO
else:
level = logging.WARNING
logging.basicConfig(level=level, format="[%(levelname)s] %(message)s")

# We redirect logs to stdout through tqdm to avoid breaking progress bar.
# But when logging to file, we use nullcontext or tqdm will redirect logs
# back to stdout.
with logging_redirect_tqdm() if not log else nullcontext():
try:
wiki_to_netflix.process_data(not full)
except Exception as e:
# include fatal exceptions with traceback in logs
if log:
logging.exception("Uncaught exception")
raise e


if __name__ == "__main__":
Expand Down

0 comments on commit b4a97c3

Please sign in to comment.