diff --git a/README.md b/README.md
index 709c65d..7d853c8 100644
--- a/README.md
+++ b/README.md
@@ -50,15 +50,7 @@ It is a reimplementation of [Apify's fingerprint-suite](https://github.com/apify
```
pip install browserforge[all]
-python -m browserforge update
```
-
-The `[all]` extra will include optional libraries like orjson.
-
-Use `python -m browserforge update` to fetch necessary model files. If the command is not run, files will be downloaded on the first import.
-
-
-
## Usage
## Generating Headers
@@ -594,10 +586,7 @@ Parameters:
## Uninstall
-To fully remove all files, run the following commands:
-
```
-python -m browserforge remove
pip uninstall browserforge
```
diff --git a/browserforge/__main__.py b/browserforge/__main__.py
deleted file mode 100644
index 8429688..0000000
--- a/browserforge/__main__.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import click
-
-from browserforge.download import Download, Remove
-
-
-class DownloadException(Exception):
- """Raises when the download fails."""
-
-
-@click.group()
-def cli() -> None:
- pass
-
-
-@cli.command(name='update')
-@click.option('--headers', is_flag=True, help='Only update header definitions')
-@click.option('--fingerprints', is_flag=True, help='Only update fingerprint definitions')
-def update(headers=False, fingerprints=False):
- """
- Fetches header and fingerprint definitions
- """
- # if no options passed, mark both as True
- if not headers ^ fingerprints:
- headers = fingerprints = True
-
- Download(headers=headers, fingerprints=fingerprints)
-
-
-@cli.command(name='remove')
-def remove():
- """
- Remove all downloaded files
- """
- Remove()
- click.secho('Removed all files!', fg='bright_yellow')
-
-
-if __name__ == '__main__':
- cli()
diff --git a/browserforge/download.py b/browserforge/download.py
index 22f2ab1..71087d6 100644
--- a/browserforge/download.py
+++ b/browserforge/download.py
@@ -1,102 +1,11 @@
-import shutil
-import urllib.request
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from datetime import datetime, timedelta
-from pathlib import Path
-from typing import Dict, Iterator
import click
"""
-Downloads the required model definitions
+Downloads the required model definitions - deprecated
"""
-ROOT_DIR: Path = Path(__file__).parent
-
-"""Constants for headers and fingerprints data"""
-DATA_DIRS: Dict[str, Path] = {
- "headers": ROOT_DIR / "headers/data",
- "fingerprints": ROOT_DIR / "fingerprints/data",
-}
-DATA_FILES: Dict[str, Dict[str, str]] = {
- "headers": {
- "browser-helper-file.json": "browser-helper-file.json",
- "header-network.zip": "header-network-definition.zip",
- "headers-order.json": "headers-order.json",
- "input-network.zip": "input-network-definition.zip",
- },
- "fingerprints": {
- "fingerprint-network.zip": "fingerprint-network-definition.zip",
- },
-}
-REMOTE_PATHS: Dict[str, str] = {
- "headers": "https://github.com/apify/fingerprint-suite/raw/master/packages/header-generator/src/data_files",
- "fingerprints": "https://github.com/apify/fingerprint-suite/raw/master/packages/fingerprint-generator/src/data_files",
-}
-
-
-class DownloadException(Exception):
- """Raises when the download fails."""
-
-
-class DataDownloader:
- """
- Download and extract data files for both headers and fingerprints.
- """
-
- def __init__(self, **kwargs: bool) -> None:
- self.options = _enabled_flags(kwargs)
-
- def download_file(self, url: str, path: str) -> None:
- """
- Download a file from the specified URL and save it to the given path.
- """
- with urllib.request.urlopen(url) as resp: # nosec
- if resp.status != 200:
- raise DownloadException(f"Download failed with status code: {resp.status}")
- with open(path, "wb") as f:
- shutil.copyfileobj(resp, f)
-
- def download(self) -> None:
- """
- Download and extract data files for both headers and fingerprints.
- """
- futures = {}
- with ThreadPoolExecutor(10) as executor:
- for data_type in self.options:
- for local_name, remote_name in DATA_FILES[data_type].items():
- url = f"{REMOTE_PATHS[data_type]}/{remote_name}"
- path = str(DATA_DIRS[data_type] / local_name)
- future = executor.submit(self.download_file, url, path)
- futures[future] = local_name
- for f in as_completed(futures):
- try:
- future.result()
- click.secho(f"{futures[f]:<30}OK!", fg="green")
- except Exception as e:
- click.secho(f"Error downloading {local_name}: {e}", fg="red")
-
-
-def _enabled_flags(flags: Dict[str, bool]) -> Iterator[str]:
- """
- Returns a list of enabled flags based on a given dictionary
- """
- for flag, enabled in flags.items():
- if enabled:
- yield flag
-
-
-def _get_all_paths(**flags: bool) -> Iterator[Path]:
- """
- Yields all the paths to the downloaded data files
- """
- for data_type in _enabled_flags(flags):
- data_path = DATA_DIRS[data_type]
- for local_name, _ in DATA_FILES[data_type].items():
- yield data_path / local_name
-
-
"""
Public download functions
"""
@@ -104,44 +13,35 @@ def _get_all_paths(**flags: bool) -> Iterator[Path]:
def Download(headers=False, fingerprints=False) -> None:
"""
- Download the required data files
+ Deprecated. Downloading model definition files is no longer needed.
+
+ Files are included as explicit python package dependency.
"""
- # Announce that files are being downloaded
- click.secho('Downloading model definition files...', fg='bright_yellow')
- try:
- DataDownloader(headers=headers, fingerprints=fingerprints).download()
- except KeyboardInterrupt:
- print("Download interrupted.")
- Remove()
- exit()
+ click.secho('Deprecated. Downloading model definition files is no longer needed.', fg='bright_yellow')
def DownloadIfNotExists(**flags: bool) -> None:
"""
- Download the required data files if they don't exist
+ Deprecated. Downloading model definition files is no longer needed.
+
+ Files are included as explicit python package dependency.
"""
- if not IsDownloaded(**flags):
- Download(**flags)
+ pass
def IsDownloaded(**flags: bool) -> bool:
"""
- Check if the required data files are already downloaded and not older than a month.
- Returns True if all the requested data files are present and not older than a month, False otherwise.
- """
- for path in _get_all_paths(**flags):
- if not path.exists():
- return False
+ Deprecated. Downloading model definition files is no longer needed.
- # Check if the file is older than a month
- file_creation_time = datetime.fromtimestamp(path.stat().st_ctime)
- one_month_ago = datetime.now() - timedelta(weeks=5)
- return file_creation_time >= one_month_ago
+ Files are included as explicit python package dependency.
+ """
+ return True
def Remove() -> None:
"""
- Deletes all downloaded data files
+ Deprecated. Downloading model definition files is no longer needed.
+
+ Files are included as explicit python package dependency.
"""
- for path in _get_all_paths(headers=True, fingerprints=True):
- path.unlink(missing_ok=True)
+ pass
diff --git a/browserforge/fingerprints/data/__init__.py b/browserforge/fingerprints/data/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/browserforge/fingerprints/generator.py b/browserforge/fingerprints/generator.py
index 052a17c..150c663 100644
--- a/browserforge/fingerprints/generator.py
+++ b/browserforge/fingerprints/generator.py
@@ -2,6 +2,8 @@
from pathlib import Path
from typing import Dict, List, Optional
+from apify_fingerprint_datapoints import get_fingerprint_network
+
from browserforge.bayesian_network import BayesianNetwork, get_possible_values
from browserforge.headers import HeaderGenerator
from browserforge.headers.utils import get_user_agent
@@ -128,7 +130,7 @@ def is_set(self) -> bool:
class FingerprintGenerator:
"""Generates realistic browser fingerprints"""
- fingerprint_generator_network = BayesianNetwork(DATA_DIR / "fingerprint-network.zip")
+ fingerprint_generator_network = BayesianNetwork(get_fingerprint_network())
def __init__(
self,
diff --git a/browserforge/headers/data/__init__.py b/browserforge/headers/data/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/browserforge/headers/generator.py b/browserforge/headers/generator.py
index 876ee16..05883d1 100644
--- a/browserforge/headers/generator.py
+++ b/browserforge/headers/generator.py
@@ -1,8 +1,9 @@
from dataclasses import dataclass
-from pathlib import Path
from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union
from browserforge.bayesian_network import BayesianNetwork, get_possible_values
+from apify_fingerprint_datapoints import (get_header_network, get_headers_order, get_browser_helper_file,
+ get_input_network)
from .utils import get_browser, get_user_agent, pascalize_headers, tuplify
@@ -35,7 +36,6 @@
'sec-fetch-site': '?1',
'sec-fetch-user': 'document',
}
-DATA_DIR: Path = Path(__file__).parent / 'data'
ListOrString: TypeAlias = Union[Tuple[str, ...], List[str], str]
@@ -83,8 +83,8 @@ class HeaderGenerator:
relaxation_order: Tuple[str, ...] = ('locales', 'devices', 'operatingSystems', 'browsers')
# Initialize networks
- input_generator_network = BayesianNetwork(DATA_DIR / "input-network.zip")
- header_generator_network = BayesianNetwork(DATA_DIR / "header-network.zip")
+ input_generator_network = BayesianNetwork(get_input_network())
+ header_generator_network = BayesianNetwork(get_header_network())
def __init__(
self,
@@ -433,8 +433,7 @@ def _load_headers_order(self) -> Dict[str, List[str]]:
Returns:
Dict[str, List[str]]: Dictionary of headers order for each browser.
"""
- headers_order_path = DATA_DIR / "headers-order.json"
- return json.loads(headers_order_path.read_bytes())
+ return json.loads(get_headers_order().read_bytes())
def _load_unique_browsers(self) -> List[HttpBrowserObject]:
"""
@@ -443,8 +442,7 @@ def _load_unique_browsers(self) -> List[HttpBrowserObject]:
Returns:
List[HttpBrowserObject]: List of HttpBrowserObject instances.
"""
- browser_helper_path = DATA_DIR / 'browser-helper-file.json'
- unique_browser_strings = json.loads(browser_helper_path.read_bytes())
+ unique_browser_strings = json.loads(get_browser_helper_file().read_bytes())
return [
self._prepare_http_browser_object(browser_str)
for browser_str in unique_browser_strings
diff --git a/pyproject.toml b/pyproject.toml
index acac848..2101d42 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "browserforge"
-version = "1.2.3"
+version = "1.2.4"
description = "Intelligent browser header & fingerprint generator"
authors = ["daijro "]
license = "Apache-2.0"
@@ -32,6 +32,7 @@ python = "^3.8"
click = "*"
orjson = { version = "*", optional = true }
typing_extensions = {version = "*", python = "<3.10"}
+apify_fingerprint_datapoints = "*"
[tool.poetry.extras]
all = ["orjson"]