diff --git a/src/crawlee/cli.py b/src/crawlee/cli.py index cedb61637a..28a6fed409 100644 --- a/src/crawlee/cli.py +++ b/src/crawlee/cli.py @@ -1,12 +1,14 @@ -# ruff: noqa: FA100 ASYNC210 +# ruff: noqa: FA100 ASYNC210 ASYNC100 import asyncio from functools import wraps +from pathlib import Path from typing import Annotated, Any, Callable, Coroutine, List, Union import httpx import inquirer # type: ignore import typer from cookiecutter.main import cookiecutter # type: ignore +from rich.progress import Progress, SpinnerColumn, TextColumn TEMPLATE_LIST_URL = 'https://api.github.com/repos/apify/crawlee-python/contents/templates' @@ -21,12 +23,26 @@ def wrapper(*args: Any, **kwargs: Any) -> None: return wrapper -cli = typer.Typer() +cli = typer.Typer(no_args_is_help=True) -@cli.callback() -def callback() -> None: - """An empty callback to force typer into making a CLI with a single command.""" +@cli.callback(invoke_without_command=True) +def callback( + version: Annotated[ # noqa: FBT002 + bool, + typer.Option( + '-V', + '--version', + is_flag=True, + help='Print Crawlee version', + ), + ] = False, +) -> None: + """Implements the 'no command' behavior.""" + if version: + from crawlee import __version__ + + typer.echo(__version__) @cli.command() @@ -46,20 +62,36 @@ async def create( ) -> None: """Bootstrap a new Crawlee project.""" if template is None: - templates_response = httpx.get(TEMPLATE_LIST_URL) + templates_response = httpx.get(TEMPLATE_LIST_URL, timeout=httpx.Timeout(10)) template_choices: List[str] = [item['name'] for item in templates_response.json() if item['type'] == 'dir'] else: template_choices = [] + while project_name is None: + answers = ( + inquirer.prompt( + [ + inquirer.Text( + 'project_name', + message='Name of the new project folder', + validate=lambda _, it: len(it) > 0, + ignore=project_name is not None, + ), + ] + ) + or {} + ) + + project_path = Path.cwd() / answers['project_name'] + + if project_path.exists(): + typer.echo(f'Folder {project_path} exists', err=True) + else: + project_name = answers['project_name'] + answers = ( inquirer.prompt( [ - inquirer.Text( - 'project_name', - message='Name of the new project folder', - validate=lambda _, it: len(it) > 0, - ignore=project_name is not None, - ), inquirer.List( 'template', message='Please select the template for your new Crawlee project', @@ -71,12 +103,20 @@ async def create( or {} ) - project_name = project_name or answers['project_name'] template = template or answers['template'] - cookiecutter( - 'gh:apify/crawlee-python', - directory=f'templates/{template}', - no_input=True, - extra_context={'project_name': project_name}, - ) + with Progress( + SpinnerColumn(), + TextColumn('[progress.description]{task.description}'), + transient=True, + ) as progress: + progress.add_task(description='Bootstrapping...', total=None) + cookiecutter( + 'gh:apify/crawlee-python', + directory=f'templates/{template}', + no_input=True, + extra_context={'project_name': project_name}, + ) + + typer.echo(f'Your project was created in {Path.cwd() / project_name}') + typer.echo(f'To run your project, run `cd {project_name}`, `poetry install` and `python -m {project_name}`') diff --git a/src/crawlee/http_clients/httpx_client.py b/src/crawlee/http_clients/httpx_client.py index 477c7ab425..76f61530bf 100644 --- a/src/crawlee/http_clients/httpx_client.py +++ b/src/crawlee/http_clients/httpx_client.py @@ -72,7 +72,11 @@ def __init__( def _get_client(self, proxy_url: str | None) -> httpx.AsyncClient: if proxy_url not in self._client_by_proxy_url: - self._client_by_proxy_url[proxy_url] = httpx.AsyncClient(transport=HttpTransport(), proxy=proxy_url) + self._client_by_proxy_url[proxy_url] = httpx.AsyncClient( + transport=HttpTransport(), + proxy=proxy_url, + timeout=httpx.Timeout(10), + ) return self._client_by_proxy_url[proxy_url] diff --git a/website/generate_module_shortcuts.py b/website/generate_module_shortcuts.py index 0d87dff811..dbd9214f9c 100755 --- a/website/generate_module_shortcuts.py +++ b/website/generate_module_shortcuts.py @@ -11,17 +11,15 @@ def get_module_shortcuts(module, parent_classes=None): if parent_classes is None: parent_classes = [] - parent_module_name = ".".join(module.__name__.split(".")[:-1]) + parent_module_name = '.'.join(module.__name__.split('.')[:-1]) module_classes = [] for classname, cls in inspect.getmembers(module, inspect.isclass): module_classes.append(cls) if cls in parent_classes: - shortcuts[f"{module.__name__}.{classname}"] = ( - f"{parent_module_name}.{classname}" - ) + shortcuts[f'{module.__name__}.{classname}'] = f'{parent_module_name}.{classname}' for _, submodule in inspect.getmembers(module, inspect.ismodule): - if submodule.__name__.startswith("apify"): + if submodule.__name__.startswith('apify'): shortcuts.update(get_module_shortcuts(submodule, module_classes)) return shortcuts @@ -40,7 +38,7 @@ def resolve_shortcuts(shortcuts): shortcuts = {} -for module_name in ["crawlee"]: +for module_name in ['crawlee']: try: module = importlib.import_module(module_name) module_shortcuts = get_module_shortcuts(module) @@ -50,5 +48,5 @@ def resolve_shortcuts(shortcuts): resolve_shortcuts(shortcuts) -with open("module_shortcuts.json", "w", encoding="utf-8") as shortcuts_file: +with open('module_shortcuts.json', 'w', encoding='utf-8') as shortcuts_file: json.dump(shortcuts, shortcuts_file, indent=4, sort_keys=True)