Skip to content

Commit

Permalink
extended the handle_user_provided_file api to include user-agent support
Browse files Browse the repository at this point in the history
  • Loading branch information
aryanA101a committed Feb 22, 2024
1 parent 6241658 commit 8d54363
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 7 deletions.
1 change: 1 addition & 0 deletions src/zimscraperlib/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
NAME = pathlib.Path(__file__).parent.name
SCRAPER = f"{NAME} {__version__}"
CONTACT = "dev@openzim.org"
DEFAULT_USER_AGENT = f"{NAME}/{__version__} ({CONTACT})"

UTF8 = "UTF-8"

Expand Down
10 changes: 3 additions & 7 deletions src/zimscraperlib/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,13 @@
from typing import Optional, Tuple, Union

from zimscraperlib import logger
from zimscraperlib.constants import (
CONTACT,
)
from zimscraperlib.constants import DEFAULT_USER_AGENT
from zimscraperlib.constants import (
MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH,
)
from zimscraperlib.constants import (
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH,
)
from zimscraperlib.constants import (
SCRAPER as PROJECT_NAME,
)
from zimscraperlib.download import stream_file


Expand All @@ -27,6 +22,7 @@ def handle_user_provided_file(
dest: Optional[pathlib.Path] = None,
in_dir: Optional[pathlib.Path] = None,
nocopy: bool = False, # noqa: FBT001, FBT002
user_agent: Optional[str] = DEFAULT_USER_AGENT,
) -> Union[pathlib.Path, None]:
"""path to downloaded or copied a user provided file (URL or path)
Expand All @@ -48,7 +44,7 @@ def handle_user_provided_file(

if str(source).startswith("http"):
logger.debug(f"download {source} -> {dest}")
headers = {"User-Agent": f"{PROJECT_NAME.replace(' ','/')} ({CONTACT})"}
headers = {"User-Agent": user_agent} if user_agent else None
stream_file(url=str(source), fpath=dest, headers=headers)
else:
source = pathlib.Path(source).expanduser().resolve()
Expand Down
10 changes: 10 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,16 @@ def webp_image():
return file_src("ninja.webp")


@pytest.fixture(scope="module")
def valid_user_agent():
return "name/version (contact)"


@pytest.fixture(scope="module")
def invalid_user_agent():
return "name version) (contact)"


@pytest.fixture(scope="session")
def small_zim_file(tmpdir_factory):
from zimscraperlib.download import stream_file
Expand Down
36 changes: 36 additions & 0 deletions tests/inputs/test_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,17 @@

import pytest

import zimscraperlib
from zimscraperlib.constants import CONTACT
from zimscraperlib.constants import (
MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH,
)
from zimscraperlib.constants import (
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH,
)
from zimscraperlib.constants import (
NAME as PROJECT_NAME,
)
from zimscraperlib.inputs import compute_descriptions, handle_user_provided_file


Expand Down Expand Up @@ -80,6 +85,37 @@ def test_remote_indir(tmp_path, valid_http_url):
assert fpath.parent == tmp_path


def test_remote_default_user_agent(valid_http_url, monkeypatch):
def mock_stream_file(**kwargs):
headers = kwargs.get("headers")
assert headers is not None
user_agent = headers.get("User-Agent")
assert isinstance(user_agent, str)
assert user_agent.startswith(PROJECT_NAME)
assert user_agent.endswith(f"({CONTACT})")

monkeypatch.setattr(
zimscraperlib.inputs, # pyright: ignore[reportAttributeAccessIssue]
"stream_file",
mock_stream_file,
raising=True,
)
handle_user_provided_file(source=valid_http_url)


def test_remote_provided_none_user_agent(valid_http_url, monkeypatch):
def mock_stream_file(**kwargs):
assert kwargs.get("headers") is None

monkeypatch.setattr(
zimscraperlib.inputs, # pyright: ignore[reportAttributeAccessIssue]
"stream_file",
mock_stream_file,
raising=True,
)
handle_user_provided_file(source=valid_http_url, user_agent=None)


TEXT_NOT_USED = "text not used"

LONG_TEXT = (
Expand Down

0 comments on commit 8d54363

Please sign in to comment.