Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated the Codebase to use pathlib instead of using os.path and path.py #208

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions hatch_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,13 @@ def initialize(self, version, build_data):
logger.info("JS dependencies are already installed, skipping it")
return
subprocess.run(
str(Path(self.root).joinpath("get_js_deps.sh")),
str(Path(self.root) / "get_js_deps.sh"),
check=True,
)
return super().initialize(version, build_data)

def deps_already_installed(self) -> bool:
for dep in JS_DEPS:
if (
not Path(self.root)
.joinpath("gutebergtozim/templates")
.joinpath(dep)
.exists()
):
if not Path(self.root, "gutebergtozim/templates", dep).exists():
return False
return True
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "hatchling.build"
name = "gutenberg2zim"
authors = [{ name = "Kiwix", email = "dev@kiwix.org" }]
keywords = ["kiwix", "zim", "offline", "gutenberg"]
requires-python = ">=3.11"
requires-python = ">=3.11,<3.12"
description = "Make ZIM file from Gutenberg books"
readme = "pypi-readme.rst"
license = { text = "GPL-3.0-or-later" }
Expand Down
63 changes: 31 additions & 32 deletions src/gutenberg2zim/download.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import os
import pathlib
import shutil
import tempfile
import zipfile
from multiprocessing.dummy import Pool
from pathlib import Path

Check warning on line 6 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L6

Added line #L6 was not covered by tests
from pprint import pprint as pp

import apsw
import backoff
from kiwixstorage import KiwixStorage
from path import Path

from gutenberg2zim.constants import TMP_FOLDER, logger
from gutenberg2zim.database import Book, BookFormat
Expand All @@ -36,15 +35,15 @@
# return False


def handle_zipped_epub(zippath, book, dst_dir: pathlib.Path):
def handle_zipped_epub(zippath, book, dst_dir: Path):

Check warning on line 38 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L38

Added line #L38 was not covered by tests
def clfn(fn):
return os.path.join(*os.path.split(fn)[1:])
return Path(*Path(fn).parts[1:])

Check warning on line 40 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L40

Added line #L40 was not covered by tests

def is_safe(fname):
fname = ensure_unicode(clfn(fname))
if Path(fname).basename() == fname:
if Path(fname).name == fname:
return True
return fname == os.path.join("images", Path(fname).splitpath()[-1])
return fname == Path("images") / Path(fname).name

Check warning on line 46 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L46

Added line #L46 was not covered by tests

zipped_files = []
# create temp directory to extract to
Expand All @@ -53,7 +52,7 @@
with zipfile.ZipFile(zippath, "r") as zf:
# check that there is no insecure data (absolute names)
if sum([1 for n in zf.namelist() if not is_safe(ensure_unicode(n))]):
Path(tmpd).rmtree_p()
shutil.rmtree(tmpd)

Check warning on line 55 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L55

Added line #L55 was not covered by tests
return False
# zipped_files = [clfn(fn) for fn in zf.namelist()]
zipped_files = zf.namelist()
Expand All @@ -73,25 +72,25 @@
# move all extracted files to proper locations
for zipped_file in zipped_files:
# skip folders
if not Path(zipped_file).ext:
if not Path(zipped_file).is_file:
continue
benoit74 marked this conversation as resolved.
Show resolved Hide resolved

src = os.path.join(tmpd, zipped_file)
if os.path.exists(src):
fname = Path(zipped_file).basename()
src = Path(tmpd) / zipped_file

Check warning on line 78 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L78

Added line #L78 was not covered by tests
if src.exists():
fname = Path(zipped_file).name

Check warning on line 80 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L80

Added line #L80 was not covered by tests
benoit74 marked this conversation as resolved.
Show resolved Hide resolved

if fname.endswith(".html") or fname.endswith(".htm"):
if mhtml:
if fname.startswith(f"{book.id}-h."):
dst = dst_dir.joinpath(f"{book.id}.html")
dst = dst_dir / f"{book.id}.html"

Check warning on line 85 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L85

Added line #L85 was not covered by tests
else:
dst = dst_dir.joinpath(f"{book.id}_{fname}")
dst = dst_dir / f"{book.id}_{fname}"

Check warning on line 87 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L87

Added line #L87 was not covered by tests
else:
dst = dst_dir.joinpath(f"{book.id}.html")
dst = dst_dir / f"{book.id}.html"

Check warning on line 89 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L89

Added line #L89 was not covered by tests
else:
dst = dst_dir.joinpath(f"{book.id}_{fname}")
dst = dst_dir / f"{book.id}_{fname}"

Check warning on line 91 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L91

Added line #L91 was not covered by tests
try:
Path(src).move(str(dst))
src.rename(dst)

Check warning on line 93 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L93

Added line #L93 was not covered by tests
except Exception as e:
benoit74 marked this conversation as resolved.
Show resolved Hide resolved
import traceback

Expand All @@ -102,12 +101,12 @@
# delete temp directory and zipfile
if Path(zippath).exists():
os.unlink(zippath)
Path(tmpd).rmtree_p()
shutil.rmtree(tmpd)

Check warning on line 104 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L104

Added line #L104 was not covered by tests


def download_book(
book: Book,
download_cache: str,
download_cache: Path,
formats: list[str],
*,
force: bool,
Expand All @@ -124,13 +123,14 @@
if "html" not in formats:
formats.append("html")

book_dir = pathlib.Path(download_cache).joinpath(str(book.id))
optimized_dir = book_dir.joinpath("optimized")
unoptimized_dir = book_dir.joinpath("unoptimized")
book_dir = download_cache / str(book.id)
optimized_dir = book_dir / "optimized"
unoptimized_dir = book_dir / "unoptimized"

Check warning on line 128 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L126-L128

Added lines #L126 - L128 were not covered by tests
unsuccessful_formats = []

for book_format in formats:
unoptimized_fpath = unoptimized_dir.joinpath(fname_for(book, book_format))
optimized_fpath = optimized_dir.joinpath(archive_name_for(book, book_format))
unoptimized_fpath = unoptimized_dir / fname_for(book, book_format)
optimized_fpath = optimized_dir / archive_name_for(book, book_format)

Check warning on line 133 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L132-L133

Added lines #L132 - L133 were not covered by tests

# check if already downloaded
if (unoptimized_fpath.exists() or optimized_fpath.exists()) and not force:
Expand Down Expand Up @@ -233,7 +233,7 @@

# HTML files are *sometime* available as ZIP files
if url.endswith(".zip"):
zpath = unoptimized_dir.joinpath(f"{fname_for(book, book_format)}.zip")
zpath = unoptimized_dir / f"{fname_for(book, book_format)}.zip"

Check warning on line 236 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L236

Added line #L236 was not covered by tests

etag = get_etag_from_url(url)
if s3_storage:
Expand Down Expand Up @@ -329,10 +329,9 @@
etag = get_etag_from_url(url)
downloaded_from_cache = False
cover = f"{book.id}_cover_image.jpg"
if (
book_dir.joinpath("optimized").joinpath(cover).exists()
or book_dir.joinpath("unoptimized").joinpath(cover).exists()
):
if (book_dir / "optimized" / cover).exists() or (
book_dir / "unoptimized" / cover
).exists():
logger.debug(f"Cover already exists for book #{book.id}")
return
if s3_storage:
Expand All @@ -343,21 +342,21 @@
book=book,
etag=etag,
book_format="cover",
dest_dir=book_dir.joinpath("optimized"),
dest_dir=book_dir / "optimized",
s3_storage=s3_storage,
optimizer_version=optimizer_version,
)
if not downloaded_from_cache:
logger.debug(f"Downloading {url}")
if download_file(url, book_dir.joinpath("unoptimized").joinpath(cover)):
if download_file(url, book_dir / "unoptimized" / cover):
book.cover_etag = etag
book.save()
else:
logger.debug(f"No Book Cover found for Book #{book.id}")


def download_all_books(
download_cache: str,
download_cache: Path,
concurrency: int,
languages: list[str],
formats: list[str],
Expand All @@ -372,7 +371,7 @@
)

# ensure dir exist
Path(download_cache).mkdir_p()
download_cache.mkdir(parents=True, exist_ok=True)

Check warning on line 374 in src/gutenberg2zim/download.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/download.py#L374

Added line #L374 was not covered by tests

def backoff_busy_error_hdlr(details):
logger.warning(
Expand Down
7 changes: 3 additions & 4 deletions src/gutenberg2zim/entrypoint.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import os
import sys
from pathlib import Path

Check warning on line 2 in src/gutenberg2zim/entrypoint.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/entrypoint.py#L2

Added line #L2 was not covered by tests

from docopt import docopt
from path import Path

from gutenberg2zim.checkdeps import check_dependencies
from gutenberg2zim.constants import TMP_FOLDER_PATH, VERSION, logger
Expand Down Expand Up @@ -90,7 +89,7 @@
arguments.get("--rdf-url")
or "http://www.gutenberg.org/cache/epub/feeds/rdf-files.tar.bz2"
)
dl_cache = arguments.get("--dl-folder") or os.path.join("dl-cache")
dl_cache = Path(arguments.get("--dl-folder") or "dl-cache")

Check warning on line 92 in src/gutenberg2zim/entrypoint.py

View check run for this annotation

Codecov / codecov/patch

src/gutenberg2zim/entrypoint.py#L92

Added line #L92 was not covered by tests
books_csv = arguments.get("--books") or ""
zim_title = arguments.get("--zim-title")
zim_desc = arguments.get("--zim-desc")
Expand Down Expand Up @@ -209,7 +208,7 @@
if do_zim:
logger.info("BUILDING ZIM dynamically")
build_zimfile(
output_folder=Path(one_lang_one_zim_folder or ".").abspath(),
output_folder=Path(one_lang_one_zim_folder or ".").resolve(),
download_cache=dl_cache,
concurrency=concurrency,
languages=zim_lang,
Expand Down
Loading