Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated the Codebase to use pathlib instead of using os.path and path.py #208

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "hatchling.build"
name = "gutenberg2zim"
authors = [{ name = "Kiwix", email = "dev@kiwix.org" }]
keywords = ["kiwix", "zim", "offline", "gutenberg"]
requires-python = ">=3.11"
requires-python = ">=3.11,<3.12"
description = "Make ZIM file from Gutenberg books"
readme = "pypi-readme.rst"
license = { text = "GPL-3.0-or-later" }
Expand Down
30 changes: 15 additions & 15 deletions src/gutenberg2zim/download.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
import pathlib
from pathlib import Path

Check warning on line 2 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L2

Added line #L2 was not covered by tests
import shutil
import tempfile
import zipfile
Expand All @@ -9,7 +9,6 @@
import apsw
import backoff
from kiwixstorage import KiwixStorage
from path import Path

from gutenberg2zim.constants import TMP_FOLDER, logger
from gutenberg2zim.database import Book, BookFormat
Expand All @@ -36,15 +35,15 @@
# return False


def handle_zipped_epub(zippath, book, dst_dir: pathlib.Path):
def handle_zipped_epub(zippath, book, dst_dir: Path):

Check warning on line 38 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L38

Added line #L38 was not covered by tests
def clfn(fn):
return os.path.join(*os.path.split(fn)[1:])
return Path(*Path(fn).parts[1:])

Check warning on line 40 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L40

Added line #L40 was not covered by tests

def is_safe(fname):
fname = ensure_unicode(clfn(fname))
if Path(fname).basename() == fname:
if Path(fname).name == fname:
return True
return fname == os.path.join("images", Path(fname).splitpath()[-1])
return fname == Path("images") / Path(fname).name

Check warning on line 46 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L46

Added line #L46 was not covered by tests

zipped_files = []
# create temp directory to extract to
Expand All @@ -53,7 +52,7 @@
with zipfile.ZipFile(zippath, "r") as zf:
# check that there is no insecure data (absolute names)
if sum([1 for n in zf.namelist() if not is_safe(ensure_unicode(n))]):
Path(tmpd).rmtree_p()
shutil.rmtree(tmpd)

Check warning on line 55 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L55

Added line #L55 was not covered by tests
return False
# zipped_files = [clfn(fn) for fn in zf.namelist()]
zipped_files = zf.namelist()
Expand All @@ -73,12 +72,12 @@
# move all extracted files to proper locations
for zipped_file in zipped_files:
# skip folders
if not Path(zipped_file).ext:
if not Path(zipped_file).suffix:
continue
benoit74 marked this conversation as resolved.
Show resolved Hide resolved

src = os.path.join(tmpd, zipped_file)
if os.path.exists(src):
fname = Path(zipped_file).basename()
src = Path(tmpd) / zipped_file

Check warning on line 78 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L78

Added line #L78 was not covered by tests
if Path(src).exists():
fname = Path(zipped_file).name

Check warning on line 80 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L80

Added line #L80 was not covered by tests
benoit74 marked this conversation as resolved.
Show resolved Hide resolved

if fname.endswith(".html") or fname.endswith(".htm"):
if mhtml:
Expand All @@ -91,7 +90,7 @@
else:
dst = dst_dir.joinpath(f"{book.id}_{fname}")
try:
Path(src).move(str(dst))
Path(src).rename(dst)

Check warning on line 93 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L93

Added line #L93 was not covered by tests
except Exception as e:
benoit74 marked this conversation as resolved.
Show resolved Hide resolved
import traceback

Expand All @@ -102,7 +101,8 @@
# delete temp directory and zipfile
if Path(zippath).exists():
os.unlink(zippath)
Path(tmpd).rmtree_p()
shutil.rmtree(tmpd)

Check warning on line 104 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L104

Added line #L104 was not covered by tests



def download_book(
Expand All @@ -124,7 +124,7 @@
if "html" not in formats:
formats.append("html")

book_dir = pathlib.Path(download_cache).joinpath(str(book.id))
book_dir = Path(download_cache).joinpath(str(book.id))

Check warning on line 127 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L127

Added line #L127 was not covered by tests
benoit74 marked this conversation as resolved.
Show resolved Hide resolved
optimized_dir = book_dir.joinpath("optimized")
unoptimized_dir = book_dir.joinpath("unoptimized")
unsuccessful_formats = []
Expand Down Expand Up @@ -372,7 +372,7 @@
)

# ensure dir exist
Path(download_cache).mkdir_p()
Path(download_cache).mkdir(parents=True, exist_ok=True)

Check warning on line 375 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L375

Added line #L375 was not covered by tests

def backoff_busy_error_hdlr(details):
logger.warning(
Expand Down
7 changes: 3 additions & 4 deletions src/gutenberg2zim/entrypoint.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import os
import sys

from pathlib import Path

Check warning on line 3 in src/gutenberg2zim/entrypoint.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/entrypoint.py#L3

Added line #L3 was not covered by tests
from docopt import docopt
from path import Path

from gutenberg2zim.checkdeps import check_dependencies
from gutenberg2zim.constants import TMP_FOLDER_PATH, VERSION, logger
Expand Down Expand Up @@ -90,7 +89,7 @@
arguments.get("--rdf-url")
or "http://www.gutenberg.org/cache/epub/feeds/rdf-files.tar.bz2"
)
dl_cache = arguments.get("--dl-folder") or os.path.join("dl-cache")
dl_cache = arguments.get("--dl-folder") or "dl-cache"

Check warning on line 92 in src/gutenberg2zim/entrypoint.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/entrypoint.py#L92

Added line #L92 was not covered by tests
books_csv = arguments.get("--books") or ""
zim_title = arguments.get("--zim-title")
zim_desc = arguments.get("--zim-desc")
Expand Down Expand Up @@ -209,7 +208,7 @@
if do_zim:
logger.info("BUILDING ZIM dynamically")
build_zimfile(
output_folder=Path(one_lang_one_zim_folder or ".").abspath(),
output_folder=Path(one_lang_one_zim_folder or ".").resolve(),
download_cache=dl_cache,
concurrency=concurrency,
languages=zim_lang,
Expand Down
41 changes: 22 additions & 19 deletions src/gutenberg2zim/export.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import os
import pathlib
from pathlib import Path

Check warning on line 3 in src/gutenberg2zim/export.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/export.py#L3

Added line #L3 was not covered by tests
import shutil
import tempfile
import traceback
Expand All @@ -11,7 +11,6 @@
import bs4
from bs4 import BeautifulSoup
from jinja2 import Environment, PackageLoader
from path import Path
from schedule import every
from six import text_type
from zimscraperlib.image.transformation import resize_image
Expand Down Expand Up @@ -95,7 +94,7 @@


def tmpl_path():
return os.path.join(Path(gutenberg2zim.__file__).parent, "templates")
return Path(gutenberg2zim.__file__).parent / "templates"

Check warning on line 97 in src/gutenberg2zim/export.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/export.py#L97

Added line #L97 was not covered by tests


def get_list_of_all_languages():
Expand All @@ -105,8 +104,8 @@
def export_illustration():
logger.info("Adding illustration")

src_illus_fpath = pathlib.Path(tmpl_path(), "favicon.png")
tmp_illus_fpath = pathlib.Path(TMP_FOLDER_PATH, "illustration.png")
src_illus_fpath = Path(tmpl_path(), "favicon.png")
tmp_illus_fpath = Path(TMP_FOLDER_PATH, "illustration.png")

Check warning on line 108 in src/gutenberg2zim/export.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/export.py#L107-L108

Added lines #L107 - L108 were not covered by tests

shutil.copy(src_illus_fpath, tmp_illus_fpath)

Expand Down Expand Up @@ -152,18 +151,18 @@
"datatables",
"fonts",
):
src = os.path.join(src_folder, fname)
src = Path(src_folder) / fname

Check warning on line 154 in src/gutenberg2zim/export.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/export.py#L154

Added line #L154 was not covered by tests

# recursively add our assets, at a path identical to position in repo
assets_root = pathlib.Path(src)
assets_root = Path(src)

Check warning on line 157 in src/gutenberg2zim/export.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/export.py#L157

Added line #L157 was not covered by tests
if assets_root.is_file():
Global.add_item_for(path=fname, fpath=assets_root)
else:
for fpath in assets_root.glob("**/*"):
if not fpath.is_file() or fpath.name == "l10n.js":
continue
path = str(fpath.relative_to(src))
Global.add_item_for(path=os.path.join(fname, path), fpath=fpath)
Global.add_item_for(path=str(Path(fname) / path), fpath=fpath)

Check warning on line 165 in src/gutenberg2zim/export.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/export.py#L165

Added line #L165 was not covered by tests

# export homepage
tpl_path = "Home.html"
Expand Down Expand Up @@ -273,7 +272,7 @@
def dlb(b):
export_book(
b,
book_dir=pathlib.Path(download_cache).joinpath(str(b.id)),
book_dir=Path(download_cache).joinpath(str(b.id)),
formats=formats,
books=books,
project_id=project_id,
Expand Down Expand Up @@ -711,23 +710,23 @@

remove_cover = False
for fname in zipped_files:
fnp = os.path.join(tmpd, fname)
if Path(fname).ext in (".png", ".jpeg", ".jpg", ".gif"):
fnp = Path(tmpd) / fname

Check warning on line 713 in src/gutenberg2zim/export.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/export.py#L713

Added line #L713 was not covered by tests
if Path(fname).suffix in (".png", ".jpeg", ".jpg", ".gif"):
# special case to remove ugly cover
if fname.endswith("cover.jpg") and is_bad_cover(fnp):
zipped_files.remove(fname)
remove_cover = True
else:
optimize_image(pathlib.Path(fnp), pathlib.Path(fnp), force=True)
optimize_image(Path(fnp), Path(fnp), force=True)

Check warning on line 720 in src/gutenberg2zim/export.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/export.py#L720

Added line #L720 was not covered by tests

if Path(fname).ext in (".htm", ".html"):
if Path(fname).suffix in (".htm", ".html"):
html_content, _ = read_file(fnp)
html = update_html_for_static(
book=book, html_content=html_content, formats=formats, epub=True
)
save_bs_output(html, fnp, UTF8)

if Path(fname).ext == ".ncx":
if Path(fname).suffix == ".ncx":
pattern = "*** START: FULL LICENSE ***"
ncx, _ = read_file(fnp)
soup = BeautifulSoup(ncx, "lxml-xml")
Expand All @@ -744,11 +743,15 @@
# delete {id}/cover.jpg if exist and update {id}/content.opf
if remove_cover:
# remove cover
Path(os.path.join(tmpd, text_type(book.id), "cover.jpg")).unlink_p()
file_path = Path(tmpd) / text_type(book.id) / "cover.jpg"
try:
file_path.unlink()
except FileNotFoundError:
pass

Check warning on line 750 in src/gutenberg2zim/export.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/export.py#L746-L750

Added lines #L746 - L750 were not covered by tests

benoit74 marked this conversation as resolved.
Show resolved Hide resolved
soup = None
opff = os.path.join(tmpd, text_type(book.id), "content.opf")
if os.path.exists(opff):
opff = Path(tmpd) / text_type(book.id) / "content.opf"

Check warning on line 753 in src/gutenberg2zim/export.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/export.py#L753

Added line #L753 was not covered by tests
if Path(opff).exists():
opff_content, _ = read_file(opff)
benoit74 marked this conversation as resolved.
Show resolved Hide resolved
soup = BeautifulSoup(opff_content, "lxml-xml")

Expand All @@ -761,7 +764,7 @@
# bundle epub as zip
zip_epub(epub_fpath=dst, root_folder=tmpd, fpaths=zipped_files)

Path(tmpd).rmtree_p()
shutil.rmtree(tmpd)

Check warning on line 767 in src/gutenberg2zim/export.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/export.py#L767

Added line #L767 was not covered by tests

def handle_companion_file(
fname,
Expand Down Expand Up @@ -821,7 +824,7 @@
as_ext=".zip",
)
else:
Path(tmp_epub.name).move(str(dst))
tmp_epub.rename(Path(dst) / tmp_epub.name)

Check warning on line 827 in src/gutenberg2zim/export.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/export.py#L827

Added line #L827 was not covered by tests
Global.add_item_for(path=dstfname, fpath=dst)
benoit74 marked this conversation as resolved.
Show resolved Hide resolved
if s3_storage:
upload_to_cache(
Expand Down
14 changes: 7 additions & 7 deletions src/gutenberg2zim/rdf.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import os
import pathlib
import re
import tarfile

from pathlib import Path

Check warning on line 4 in src/gutenberg2zim/rdf.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/rdf.py#L4

Added line #L4 was not covered by tests
import peewee
from bs4 import BeautifulSoup

Expand All @@ -18,7 +17,7 @@

def get_rdf_fpath():
fname = "rdf-files.tar.bz2"
fpath = pathlib.Path(fname).resolve()
fpath = Path(fname).resolve()

Check warning on line 20 in src/gutenberg2zim/rdf.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/rdf.py#L20

Added line #L20 was not covered by tests
return fpath


Expand All @@ -38,7 +37,7 @@
rdf_tarfile = tarfile.open(name=rdf_path, mode="r|bz2")

for rdf_member in rdf_tarfile:
rdf_member_path = pathlib.Path(rdf_member.name)
rdf_member_path = Path(rdf_member.name)

Check warning on line 40 in src/gutenberg2zim/rdf.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/rdf.py#L40

Added line #L40 was not covered by tests

# skip books outside of requested list
if (
Expand Down Expand Up @@ -297,9 +296,10 @@
nums = [f"{i:0=5d}" for i in range(21000, 40000)]
for num in nums:
print(num) # noqa: T201
curd = os.path.dirname(os.path.realpath(__file__))
rdf = os.path.join(curd, "..", "rdf-files", num, "pg" + num + ".rdf")
if os.path.isfile(rdf):
curd = Path(__file__).resolve().parent
rdf = curd.parent / "rdf-files" / num / f"pg{num}.rdf"

if rdf.is_file():
data = ""
with open(rdf) as f:
data = f.read()
Expand Down
Loading
Loading