Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,7 @@ It is a reimplementation of [Apify's fingerprint-suite](https://github.com/apify

```
pip install browserforge[all]
python -m browserforge update
```

The `[all]` extra will include optional libraries like orjson.

Use `python -m browserforge update` to fetch necessary model files. If the command is not run, files will be downloaded on the first import.

<hr width=50>

## Usage

## Generating Headers
Expand Down Expand Up @@ -594,10 +586,7 @@ Parameters:

## Uninstall

To fully remove all files, run the following commands:

```
python -m browserforge remove
pip uninstall browserforge
```

Expand Down
39 changes: 0 additions & 39 deletions browserforge/__main__.py

This file was deleted.

134 changes: 17 additions & 117 deletions browserforge/download.py
Original file line number Diff line number Diff line change
@@ -1,147 +1,47 @@
import shutil
import urllib.request
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, Iterator

import click

"""
Downloads the required model definitions
Downloads the required model definitions - deprecated
"""


ROOT_DIR: Path = Path(__file__).parent

"""Constants for headers and fingerprints data"""
DATA_DIRS: Dict[str, Path] = {
"headers": ROOT_DIR / "headers/data",
"fingerprints": ROOT_DIR / "fingerprints/data",
}
DATA_FILES: Dict[str, Dict[str, str]] = {
"headers": {
"browser-helper-file.json": "browser-helper-file.json",
"header-network.zip": "header-network-definition.zip",
"headers-order.json": "headers-order.json",
"input-network.zip": "input-network-definition.zip",
},
"fingerprints": {
"fingerprint-network.zip": "fingerprint-network-definition.zip",
},
}
REMOTE_PATHS: Dict[str, str] = {
"headers": "https://github.com/apify/fingerprint-suite/raw/master/packages/header-generator/src/data_files",
"fingerprints": "https://github.com/apify/fingerprint-suite/raw/master/packages/fingerprint-generator/src/data_files",
}


class DownloadException(Exception):
"""Raises when the download fails."""


class DataDownloader:
"""
Download and extract data files for both headers and fingerprints.
"""

def __init__(self, **kwargs: bool) -> None:
self.options = _enabled_flags(kwargs)

def download_file(self, url: str, path: str) -> None:
"""
Download a file from the specified URL and save it to the given path.
"""
with urllib.request.urlopen(url) as resp: # nosec
if resp.status != 200:
raise DownloadException(f"Download failed with status code: {resp.status}")
with open(path, "wb") as f:
shutil.copyfileobj(resp, f)

def download(self) -> None:
"""
Download and extract data files for both headers and fingerprints.
"""
futures = {}
with ThreadPoolExecutor(10) as executor:
for data_type in self.options:
for local_name, remote_name in DATA_FILES[data_type].items():
url = f"{REMOTE_PATHS[data_type]}/{remote_name}"
path = str(DATA_DIRS[data_type] / local_name)
future = executor.submit(self.download_file, url, path)
futures[future] = local_name
for f in as_completed(futures):
try:
future.result()
click.secho(f"{futures[f]:<30}OK!", fg="green")
except Exception as e:
click.secho(f"Error downloading {local_name}: {e}", fg="red")


def _enabled_flags(flags: Dict[str, bool]) -> Iterator[str]:
"""
Returns a list of enabled flags based on a given dictionary
"""
for flag, enabled in flags.items():
if enabled:
yield flag


def _get_all_paths(**flags: bool) -> Iterator[Path]:
"""
Yields all the paths to the downloaded data files
"""
for data_type in _enabled_flags(flags):
data_path = DATA_DIRS[data_type]
for local_name, _ in DATA_FILES[data_type].items():
yield data_path / local_name


"""
Public download functions
"""


def Download(headers=False, fingerprints=False) -> None:
"""
Download the required data files
Deprecated. Downloading model definition files is no longer needed.

Files are included as explicit python package dependency.
"""
# Announce that files are being downloaded
click.secho('Downloading model definition files...', fg='bright_yellow')
try:
DataDownloader(headers=headers, fingerprints=fingerprints).download()
except KeyboardInterrupt:
print("Download interrupted.")
Remove()
exit()
click.secho('Deprecated. Downloading model definition files is no longer needed.', fg='bright_yellow')


def DownloadIfNotExists(**flags: bool) -> None:
"""
Download the required data files if they don't exist
Deprecated. Downloading model definition files is no longer needed.

Files are included as explicit python package dependency.
"""
if not IsDownloaded(**flags):
Download(**flags)
pass


def IsDownloaded(**flags: bool) -> bool:
"""
Check if the required data files are already downloaded and not older than a month.
Returns True if all the requested data files are present and not older than a month, False otherwise.
"""
for path in _get_all_paths(**flags):
if not path.exists():
return False
Deprecated. Downloading model definition files is no longer needed.

# Check if the file is older than a month
file_creation_time = datetime.fromtimestamp(path.stat().st_ctime)
one_month_ago = datetime.now() - timedelta(weeks=5)
return file_creation_time >= one_month_ago
Files are included as explicit python package dependency.
"""
return True


def Remove() -> None:
"""
Deletes all downloaded data files
Deprecated. Downloading model definition files is no longer needed.

Files are included as explicit python package dependency.
"""
for path in _get_all_paths(headers=True, fingerprints=True):
path.unlink(missing_ok=True)
pass
Empty file.
4 changes: 3 additions & 1 deletion browserforge/fingerprints/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from pathlib import Path
from typing import Dict, List, Optional

from apify_fingerprint_datapoints import get_fingerprint_network

from browserforge.bayesian_network import BayesianNetwork, get_possible_values
from browserforge.headers import HeaderGenerator
from browserforge.headers.utils import get_user_agent
Expand Down Expand Up @@ -128,7 +130,7 @@ def is_set(self) -> bool:
class FingerprintGenerator:
"""Generates realistic browser fingerprints"""

fingerprint_generator_network = BayesianNetwork(DATA_DIR / "fingerprint-network.zip")
fingerprint_generator_network = BayesianNetwork(get_fingerprint_network())

def __init__(
self,
Expand Down
Empty file.
14 changes: 6 additions & 8 deletions browserforge/headers/generator.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union

from browserforge.bayesian_network import BayesianNetwork, get_possible_values
from apify_fingerprint_datapoints import (get_header_network, get_headers_order, get_browser_helper_file,
get_input_network)

from .utils import get_browser, get_user_agent, pascalize_headers, tuplify

Expand Down Expand Up @@ -35,7 +36,6 @@
'sec-fetch-site': '?1',
'sec-fetch-user': 'document',
}
DATA_DIR: Path = Path(__file__).parent / 'data'
ListOrString: TypeAlias = Union[Tuple[str, ...], List[str], str]


Expand Down Expand Up @@ -83,8 +83,8 @@ class HeaderGenerator:
relaxation_order: Tuple[str, ...] = ('locales', 'devices', 'operatingSystems', 'browsers')

# Initialize networks
input_generator_network = BayesianNetwork(DATA_DIR / "input-network.zip")
header_generator_network = BayesianNetwork(DATA_DIR / "header-network.zip")
input_generator_network = BayesianNetwork(get_input_network())
header_generator_network = BayesianNetwork(get_header_network())

def __init__(
self,
Expand Down Expand Up @@ -433,8 +433,7 @@ def _load_headers_order(self) -> Dict[str, List[str]]:
Returns:
Dict[str, List[str]]: Dictionary of headers order for each browser.
"""
headers_order_path = DATA_DIR / "headers-order.json"
return json.loads(headers_order_path.read_bytes())
return json.loads(get_headers_order().read_bytes())

def _load_unique_browsers(self) -> List[HttpBrowserObject]:
"""
Expand All @@ -443,8 +442,7 @@ def _load_unique_browsers(self) -> List[HttpBrowserObject]:
Returns:
List[HttpBrowserObject]: List of HttpBrowserObject instances.
"""
browser_helper_path = DATA_DIR / 'browser-helper-file.json'
unique_browser_strings = json.loads(browser_helper_path.read_bytes())
unique_browser_strings = json.loads(get_browser_helper_file().read_bytes())
return [
self._prepare_http_browser_object(browser_str)
for browser_str in unique_browser_strings
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "browserforge"
version = "1.2.3"
version = "1.2.4"
description = "Intelligent browser header & fingerprint generator"
authors = ["daijro <daijro.dev@gmail.com>"]
license = "Apache-2.0"
Expand Down Expand Up @@ -32,6 +32,7 @@ python = "^3.8"
click = "*"
orjson = { version = "*", optional = true }
typing_extensions = {version = "*", python = "<3.10"}
apify_fingerprint_datapoints = "*"

[tool.poetry.extras]
all = ["orjson"]