Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 59 additions & 19 deletions src/crawlee/cli.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# ruff: noqa: FA100 ASYNC210
# ruff: noqa: FA100 ASYNC210 ASYNC100
import asyncio
from functools import wraps
from pathlib import Path
from typing import Annotated, Any, Callable, Coroutine, List, Union

import httpx
import inquirer # type: ignore
import typer
from cookiecutter.main import cookiecutter # type: ignore
from rich.progress import Progress, SpinnerColumn, TextColumn

TEMPLATE_LIST_URL = 'https://api.github.com/repos/apify/crawlee-python/contents/templates'

Expand All @@ -21,12 +23,26 @@ def wrapper(*args: Any, **kwargs: Any) -> None:
return wrapper


cli = typer.Typer()
cli = typer.Typer(no_args_is_help=True)


@cli.callback()
def callback() -> None:
"""An empty callback to force typer into making a CLI with a single command."""
@cli.callback(invoke_without_command=True)
def callback(
version: Annotated[ # noqa: FBT002
bool,
typer.Option(
'-V',
'--version',
is_flag=True,
help='Print Crawlee version',
),
] = False,
) -> None:
"""Implements the 'no command' behavior."""
if version:
from crawlee import __version__

typer.echo(__version__)


@cli.command()
Expand All @@ -46,20 +62,36 @@ async def create(
) -> None:
"""Bootstrap a new Crawlee project."""
if template is None:
templates_response = httpx.get(TEMPLATE_LIST_URL)
templates_response = httpx.get(TEMPLATE_LIST_URL, timeout=httpx.Timeout(10))
template_choices: List[str] = [item['name'] for item in templates_response.json() if item['type'] == 'dir']
else:
template_choices = []

while project_name is None:
answers = (
inquirer.prompt(
[
inquirer.Text(
'project_name',
message='Name of the new project folder',
validate=lambda _, it: len(it) > 0,
ignore=project_name is not None,
),
]
)
or {}
)

project_path = Path.cwd() / answers['project_name']

if project_path.exists():
typer.echo(f'Folder {project_path} exists', err=True)
else:
project_name = answers['project_name']

answers = (
inquirer.prompt(
[
inquirer.Text(
'project_name',
message='Name of the new project folder',
validate=lambda _, it: len(it) > 0,
ignore=project_name is not None,
),
inquirer.List(
'template',
message='Please select the template for your new Crawlee project',
Expand All @@ -71,12 +103,20 @@ async def create(
or {}
)

project_name = project_name or answers['project_name']
template = template or answers['template']

cookiecutter(
'gh:apify/crawlee-python',
directory=f'templates/{template}',
no_input=True,
extra_context={'project_name': project_name},
)
with Progress(
SpinnerColumn(),
TextColumn('[progress.description]{task.description}'),
transient=True,
) as progress:
progress.add_task(description='Bootstrapping...', total=None)
cookiecutter(
'gh:apify/crawlee-python',
directory=f'templates/{template}',
no_input=True,
extra_context={'project_name': project_name},
)

typer.echo(f'Your project was created in {Path.cwd() / project_name}')
typer.echo(f'To run your project, run `cd {project_name}`, `poetry install` and `python -m {project_name}`')
6 changes: 5 additions & 1 deletion src/crawlee/http_clients/httpx_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ def __init__(

def _get_client(self, proxy_url: str | None) -> httpx.AsyncClient:
if proxy_url not in self._client_by_proxy_url:
self._client_by_proxy_url[proxy_url] = httpx.AsyncClient(transport=HttpTransport(), proxy=proxy_url)
self._client_by_proxy_url[proxy_url] = httpx.AsyncClient(
transport=HttpTransport(),
proxy=proxy_url,
timeout=httpx.Timeout(10),
)

return self._client_by_proxy_url[proxy_url]

Expand Down
12 changes: 5 additions & 7 deletions website/generate_module_shortcuts.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,15 @@ def get_module_shortcuts(module, parent_classes=None):

if parent_classes is None:
parent_classes = []
parent_module_name = ".".join(module.__name__.split(".")[:-1])
parent_module_name = '.'.join(module.__name__.split('.')[:-1])
module_classes = []
for classname, cls in inspect.getmembers(module, inspect.isclass):
module_classes.append(cls)
if cls in parent_classes:
shortcuts[f"{module.__name__}.{classname}"] = (
f"{parent_module_name}.{classname}"
)
shortcuts[f'{module.__name__}.{classname}'] = f'{parent_module_name}.{classname}'

for _, submodule in inspect.getmembers(module, inspect.ismodule):
if submodule.__name__.startswith("apify"):
if submodule.__name__.startswith('apify'):
shortcuts.update(get_module_shortcuts(submodule, module_classes))

return shortcuts
Expand All @@ -40,7 +38,7 @@ def resolve_shortcuts(shortcuts):


shortcuts = {}
for module_name in ["crawlee"]:
for module_name in ['crawlee']:
try:
module = importlib.import_module(module_name)
module_shortcuts = get_module_shortcuts(module)
Expand All @@ -50,5 +48,5 @@ def resolve_shortcuts(shortcuts):

resolve_shortcuts(shortcuts)

with open("module_shortcuts.json", "w", encoding="utf-8") as shortcuts_file:
with open('module_shortcuts.json', 'w', encoding='utf-8') as shortcuts_file:
json.dump(shortcuts, shortcuts_file, indent=4, sort_keys=True)