FFengIll · FFengIll · Mar 11, 2023 · Mar 11, 2023 · Mar 11, 2023 · Mar 11, 2023
diff --git a/.gitpod.yml b/.gitpod.yml
@@ -8,6 +8,6 @@ tasks:
 vscode:
   extensions:
     - ms-python.python
-
     - cweijan.vscode-office
-
+    - ms-python.black-formatter
+    - eamodio.gitlens
diff --git a/cli.py b/cli.py
@@ -11,9 +11,12 @@
         logger.remove()
         logger.add(sys.stderr, level="DEBUG")
 
-    if args.input and args.output:
-        edit_pdf(args.input, args.output, args.ignore)
-    elif args.indir and args.outdir:
-        batch_edit_pdf(args.indir, args.outdir, args.ignore)
-    else:
-        get_parser().print_help()
+    try:
+        if args.input and args.output:
+            edit_pdf(args.input, args.output, args.ignore)
+        elif args.indir and args.outdir:
+            batch_edit_pdf(args.indir, args.outdir, args.ignore)
+        else:
+            get_parser().print_help()
+    except:
+        exit(1)
diff --git a/pdf_white_cut/analyzer.py b/pdf_white_cut/analyzer.py
@@ -61,7 +61,7 @@ def extract_item_box(item):
     elif isinstance(item, LTCurve):
         logger.debug("use itself: {}", item)
     elif isinstance(item, LTTextBox):
-        logger.warning("NotImplemented and use itself: {}", item)
+        logger.warning("use itself since NotImplemented: {}", item)
     elif isinstance(item, LTTextLine):
         # there is 2 types of `LTTextLine`: horizontal and vertical
         text = item.get_text().encode("unicode_escape")
@@ -91,7 +91,7 @@ def extract_item_box(item):
             bbox[3] + item.height / 2,
         )
     elif isinstance(item, LTImage):
-        logger.warning("NotImplemented and use itself: {}", item)
+        logger.warning("use itself since NotImplemented: {}", item)
     elif isinstance(item, LTFigure):
         logger.debug("analyse LTFigure:{}", item)
         # for `LTFigure`, the bbox is modified in `PDFMiner`
@@ -160,7 +160,6 @@ def extract_pdf_boxs(filename, ignore=0):
             boxs.append(box)
 
         max_box = get_max_box(boxs)
-        logger.warning("visible bbox: {}", max_box)
         page_boxs.append(max_box)
 
         logger.warning("max visible bbox for the page: {}", max_box)

diff --git a/pdf_white_cut/cutter.py b/pdf_white_cut/cutter.py
@@ -37,14 +37,21 @@ def edit_page_box(page, visible_box):
     logger.info("cut media box to: {}", box)
 
 
-def edit_pdf(source: str, target: str, ignore=0):
+def edit_pdf(source: Path, target: Path, ignore=0):
     """
     edit to cut the white slide of the input pdf file, and output a new pdf file.
     """
+    # guard type
+    source = Path(source)
+    target = Path(target)
+
     if source == target:
         logger.error("{} {}", source, target)
         raise Exception("input and output can not be the same!")
 
+    if not source.exists():
+        raise Exception("input file not exists! ({})".format(source))
+
     try:
         # MENTION: never move and change the sequence, since file IO.
         # analyses the visible box of each page, aka the box scale. res=[(x1,y1,x2,y2)]
@@ -66,39 +73,34 @@ def edit_pdf(source: str, target: str, ignore=0):
                 edit_page_box(page, box)
                 outpdf.add_page(page)
 
-            Path(target).dirname().makedirs_p()
+            logger.info("output to {}", Path(target))
+            target.abspath().dirname().makedirs_p()
             with open(target, "wb") as outfd:
                 outpdf.write(outfd)
-                logger.info("output file: {}", target)
 
     except UnicodeEncodeError as ue:
         logger.exception("UnicodeEncodeError while processing file:{}", source)
         logger.exception(ue)
+        raise ue
     except Exception as e:
-        logger.exception("Some other Error while processing file:{}", source)
+        logger.exception("Some unknown Error while processing file:{}", source)
         logger.exception(e)
+        raise e
 
 
-def scan_files(folder, glob=""):
-    """
-    scan files under the dir with spec prefix and postfix
-    """
-    files = []
-    for item in Path(folder).listdir(glob):
-        item: "Path"
-        files.append(item.basename())
-    return files
-
+def batch_edit_pdf(indir: Path, outdir: Path, ignore=0):
+    # guard type
+    indir = Path(indir)
+    outdir = Path(outdir)
 
-def batch_edit_pdf(indir, outdir, ignore=0):
     if indir == outdir:
         raise Exception("input and output can not be the same!")
 
-    files = scan_files(indir, glob="*.pdf")
-    logger.info(files)
+    files = [pdf.basename() for pdf in indir.listdir("*.pdf")]
+    logger.info("pdf files in spec folder: {}", files)
 
-    if not os.path.exists(indir):
-        os.mkdir(indir)
+    # guard dir
+    outdir.makedirs_p()
 
     logger.info("input dir: {}", indir)
     logger.info("output dir: {}", outdir)

diff --git a/pdf_white_cut/parser.py b/pdf_white_cut/parser.py
@@ -19,15 +19,15 @@ def get_parser():
         "-id",
         help="input directory",
         action="store",
-        default="",
+        default=".",
         type=str,
         dest="indir",
     )
     parser.add_argument(
         "-od",
         help="output directory",
         action="store",
-        default="",
+        default=".",
         type=str,
         dest="outdir",
     )

diff --git a/tests/test_cli.sh b/tests/test_cli.sh
@@ -0,0 +1,4 @@
+python cli.py -i cases/input/input.pdf -o output/output.pdf > /dev/null 2>&1
+echo $? "== 0"
+python cli.py -i cases/input/non_exist.pdf -o output/output.pdf > /dev/null 2>&1
+echo $? "== 1"