Skip to content

Commit

Permalink
Replace os.path with pathlib
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 committed Aug 2, 2024
1 parent 9a4c9df commit ddbf74e
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 32 deletions.
3 changes: 1 addition & 2 deletions src/warc2zim/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import json
import logging
import mimetypes
import os
import pathlib
import re
import sys
Expand Down Expand Up @@ -147,7 +146,7 @@ def __init__(self, args):
self.full_filename = self.output / self.zim_file

# ensure output file exists
if not os.path.isdir(self.output):
if not self.output.is_dir():
logger.error(
f"Output directory {self.output} does not exist. Exiting with error "
"code 1"
Expand Down
54 changes: 24 additions & 30 deletions tests/test_warc_to_zim.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import io
import json
import os
import pathlib
import re
import time
Expand All @@ -20,12 +19,10 @@
from warc2zim.url_rewriting import HttpUrl, ZimPath, normalize
from warc2zim.utils import get_record_url

TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data")
TEST_DATA_DIR = pathlib.Path(__file__).parent / "data"
# special data dir for WARC files which are not supposed to be ran in the
# `test_all_warcs_root_dir` test
TEST_DATA_SPECIAL_DIR = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "data-special"
)
TEST_DATA_SPECIAL_DIR = pathlib.Path(__file__).parent / "data-special"

SCRAPER_SUFFIX = " + zimit x.y.z-devw"

Expand Down Expand Up @@ -125,8 +122,8 @@ def assert_item_does_not_exist(self, zimfile, path):
assert payload is None

def verify_warc_and_zim(self, warcfile, zimfile, verify_scraper_suffix):
assert os.path.isfile(warcfile)
assert os.path.isfile(zimfile)
assert pathlib.Path(warcfile).is_file()
assert pathlib.Path(zimfile).is_file()

# [TOFIX]
head_insert = b""
Expand Down Expand Up @@ -337,7 +334,7 @@ def test_warc_to_zim_specify_params_and_metadata(self, tmp_path):
main(
[
"-v",
os.path.join(TEST_DATA_DIR, "example-response.warc"),
str(TEST_DATA_DIR / "example-response.warc"),
"--name",
"example-response",
"--output",
Expand All @@ -355,7 +352,7 @@ def test_warc_to_zim_specify_params_and_metadata(self, tmp_path):

zim_output = tmp_path / zim_output

assert os.path.isfile(zim_output)
assert pathlib.Path(zim_output).is_file()

all_articles = {
article.path: article.title for article in self.list_articles(zim_output)
Expand Down Expand Up @@ -408,8 +405,8 @@ def test_warc_to_zim_main(self, cmdline, tmp_path):
filename = cmdline[0]

# set intput filename (first arg) to absolute path from test dir
warcfile = os.path.join(TEST_DATA_DIR, filename)
cmdline[0] = warcfile
warcfile = TEST_DATA_DIR / filename
cmdline[0] = str(warcfile)

cmdline.extend(["--output", str(tmp_path), "--name", filename])

Expand All @@ -434,7 +431,7 @@ def test_same_domain_only(self, tmp_path):
zim_output = "same-domain.zim"
main(
[
os.path.join(TEST_DATA_DIR, "example-revisit.warc.gz"),
str(TEST_DATA_DIR / "example-revisit.warc.gz"),
"--favicon",
"http://example.com/favicon.ico",
"--include-domains",
Expand Down Expand Up @@ -462,7 +459,7 @@ def test_skip_self_redirect(self, tmp_path):
zim_output = "self-redir.zim"
main(
[
os.path.join(TEST_DATA_DIR, "self-redirect.warc"),
str(TEST_DATA_DIR / "self-redirect.warc"),
"--output",
str(tmp_path),
"--zim-file",
Expand All @@ -478,7 +475,7 @@ def test_include_domains_favicon_and_language(self, tmp_path):
zim_output = "spt.zim"
main(
[
os.path.join(TEST_DATA_DIR, "single-page-test.warc"),
str(TEST_DATA_DIR / "single-page-test.warc"),
"-i",
"reseau-canope.fr",
"--output",
Expand Down Expand Up @@ -526,7 +523,7 @@ def test_website_with_redirect(self, tmp_path):
zim_output = "kiwix.zim"
main(
[
os.path.join(TEST_DATA_DIR, "kiwix-with-redirects.warc.gz"),
str(TEST_DATA_DIR / "kiwix-with-redirects.warc.gz"),
"-u",
"http://www.kiwix.org",
"--output",
Expand Down Expand Up @@ -562,7 +559,7 @@ def test_all_warcs_root_dir(self, tmp_path):
zim_output = "test-all.zim"
main(
[
os.path.join(TEST_DATA_DIR),
str(TEST_DATA_DIR),
"--output",
str(tmp_path),
"--zim-file",
Expand Down Expand Up @@ -595,7 +592,7 @@ def test_fuzzy_urls(self, tmp_path, fuzzycheck):
zim_output = fuzzycheck["filename"] + ".zim"
main(
[
os.path.join(TEST_DATA_DIR, fuzzycheck["filename"]),
str(TEST_DATA_DIR / fuzzycheck["filename"]),
"--output",
str(tmp_path),
"--zim-file",
Expand All @@ -617,7 +614,7 @@ def test_error_bad_main_page(self, tmp_path):
main(
[
"-v",
os.path.join(TEST_DATA_DIR, "example-response.warc"),
str(TEST_DATA_DIR / "example-response.warc"),
"-u",
"https://no-such-url.example.com",
"--output",
Expand All @@ -637,7 +634,7 @@ def test_error_main_page_unprocessable(self, tmp_path):
main(
[
"-v",
os.path.join(TEST_DATA_DIR, "main-entry-403.warc.gz"),
str(TEST_DATA_DIR / "main-entry-403.warc.gz"),
"-u",
"https://wikizilla.org/wiki/Doug",
"--output",
Expand Down Expand Up @@ -681,7 +678,7 @@ def test_custom_css(self, tmp_path):

main(
[
os.path.join(TEST_DATA_DIR, "example-response.warc"),
str(TEST_DATA_DIR / "example-response.warc"),
"--output",
str(tmp_path),
"--zim-file",
Expand Down Expand Up @@ -709,7 +706,7 @@ def test_custom_css_remote(self, tmp_path):

main(
[
os.path.join(TEST_DATA_DIR, "example-response.warc"),
str(TEST_DATA_DIR / "example-response.warc"),
"--output",
str(tmp_path),
"--zim-file",
Expand All @@ -734,7 +731,7 @@ def test_http_return_codes(self, tmp_path):

main(
[
os.path.join(TEST_DATA_DIR, "http-return-codes.warc.gz"),
str(TEST_DATA_DIR / "http-return-codes.warc.gz"),
"--output",
str(tmp_path),
"--zim-file",
Expand Down Expand Up @@ -798,7 +795,7 @@ def test_redirection_loops(self, tmp_path):

main(
[
os.path.join(TEST_DATA_DIR, "redir-loops.warc.gz"),
str(TEST_DATA_DIR / "redir-loops.warc.gz"),
"--output",
str(tmp_path),
"--zim-file",
Expand Down Expand Up @@ -831,7 +828,7 @@ def test_content_resource_types(self, tmp_path):

main(
[
os.path.join(TEST_DATA_DIR, "content-resource-types.warc.gz"),
str(TEST_DATA_DIR / "content-resource-types.warc.gz"),
"--output",
str(tmp_path),
"--zim-file",
Expand Down Expand Up @@ -859,12 +856,9 @@ def test_content_encoding_aliases(self, tmp_path):

main(
[
os.path.join(
TEST_DATA_DIR,
"..",
"data-special",
"qsl.net-encoding-alias.warc.gz",
),
# cannot be processed like other TEST_DATA_DIR warcs since it needs
# special encoding aliases to be used in --encoding-aliases
str(TEST_DATA_SPECIAL_DIR / "qsl.net-encoding-alias.warc.gz"),
"--output",
str(tmp_path),
"--zim-file",
Expand Down

0 comments on commit ddbf74e

Please sign in to comment.