Skip to content

Commit

Permalink
Release (#1057)
Browse files Browse the repository at this point in the history
* chore(deps): update dependencies

* chore(firefox): upgrade to FF 118.0.2

* docs(version): add changelog and bump version

* chore(formatting): fix package.json formatting
  • Loading branch information
vringar authored Oct 21, 2023
1 parent c27643a commit 49aa218
Show file tree
Hide file tree
Showing 17 changed files with 734 additions and 492 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## v0.25.0 - 2023-10-13

Bump to Firefox 118.0.2
Introduce StorageWatchdog #1056 (Thanks @gridl0ck for contributing this)
Upgrade Docker image to Ubuntu 22.04 #1055

## v0.24.0 - 2023-09-05

Bump to Firefox 117
Expand Down
931 changes: 577 additions & 354 deletions Extension/package-lock.json

Large diffs are not rendered by default.

22 changes: 11 additions & 11 deletions Extension/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,34 @@
"start": "Start is required for the manual_test.py to run"
},
"devDependencies": {
"@babel/cli": "^7.22.15",
"@babel/core": "^7.22.15",
"@babel/cli": "^7.23.0",
"@babel/core": "^7.23.0",
"@babel/eslint-parser": "^7.22.15",
"@babel/preset-env": "^7.22.15",
"@babel/preset-env": "^7.23.0",
"@types/download": "^8.0.2",
"@types/firefox-webext-browser": "^111.0.1",
"@typescript-eslint/eslint-plugin": "^6.6.0",
"@typescript-eslint/parser": "^6.6.0",
"@types/firefox-webext-browser": "^111.0.2",
"@typescript-eslint/eslint-plugin": "^6.7.5",
"@typescript-eslint/parser": "^6.7.5",
"ajv": "^8.12.0",
"body-parser": "^1.20.2",
"download": "^8.0.0",
"eslint": "^8.48.0",
"eslint": "^8.51.0",
"eslint-config-prettier": "^8.0.0",
"eslint-plugin-html": "^7.1.0",
"eslint-plugin-import": "^2.28.1",
"eslint-plugin-jsdoc": "^46.5.1",
"eslint-plugin-jsdoc": "^46.8.2",
"eslint-plugin-json": "^3.1.0",
"eslint-plugin-mozilla": "^3.1.0",
"eslint-plugin-mozilla": "^3.2.0",
"eslint-plugin-no-unsanitized": "^4.0.2",
"eslint-plugin-prettier": "^5.0.0",
"eslint-plugin-unicorn": "^48.0.1",
"express": "^4.18.2",
"prettier": "^3.0.3",
"safe-compare": "^1.1.4",
"ts-loader": "^9.4.4",
"ts-loader": "^9.5.0",
"typedoc": "^0.25.1",
"typescript": "^5.2.2",
"web-ext": "^7.6.2",
"web-ext": "^7.8.0",
"webpack": "^5.88.2",
"webpack-cli": "^5.1.4"
},
Expand Down
2 changes: 1 addition & 1 deletion Extension/src/loggingdb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ export const open = async function (
);
console.log("StorageController started?", rv);
}

storageController.send(JSON.stringify(`Browser-${crawlID}`));
// Listen for incoming urls as visit ids
listeningSocket = new socket.ListeningSocket(listeningSocketCallback);
console.log("Starting socket listening for incoming connections.");
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.24.0
0.25.0
32 changes: 16 additions & 16 deletions environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,42 +9,42 @@ dependencies:
- dill=0.3.7
- dill=0.3.7
- easyprocess=1.1
- gcsfs=2023.9.1
- gcsfs=2023.9.2
- geckodriver=0.33.0
- ipython=8.15.0
- ipython=8.16.1
- isort=5.12.0
- leveldb=1.23
- multiprocess=0.70.15
- mypy=1.5.1
- nodejs=20.7.0
- pandas=2.1.0
- mypy=1.6.0
- nodejs=20.8.0
- pandas=2.1.1
- pillow=10.0.1
- pip=23.2.1
- plyvel=1.5.0
- pre-commit=3.4.0
- psutil=5.9.5
- pyarrow=13.0.0
- pytest-asyncio=0.21.1
- pytest-cov=4.1.0
- pytest=7.4.2
- python=3.11.5
- pyvirtualdisplay=3.0
- python=3.12.0
- pyvirtualdisplay=2.2
- recommonmark=0.7.1
- redis-py=5.0.0
- s3fs=2023.9.1
- selenium=4.12.0
- sentry-sdk=1.31.0
- redis-py=5.0.1
- s3fs=2023.9.2
- selenium=4.13.0
- sentry-sdk=1.32.0
- sphinx-markdown-tables=0.0.17
- sphinx=7.2.6
- tabulate=0.9.0
- tblib=2.0.0
- wget=1.20.3
- pip:
- dataclasses-json==0.6.0
- dataclasses-json==0.6.1
- domain-utils==0.7.1
- jsonschema==4.19.0
- plyvel==1.5.0
- jsonschema==4.19.1
- tranco==0.6
- types-pyyaml==6.0.12.11
- types-redis==4.6.0.6
- types-pyyaml==6.0.12.12
- types-redis==4.6.0.7
- types-tabulate==0.9.0.3
name: openwpm
22 changes: 13 additions & 9 deletions openwpm/browser_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,15 +549,17 @@ def kill_browser_manager(self):
if self.browser_manager is not None and self.browser_manager.pid is not None:
self.logger.debug(
"BROWSER %i: Attempting to kill BrowserManager with pid %i. "
"Browser PID: %s"
% (self.browser_id, self.browser_manager.pid, self.geckodriver_pid)
"Browser PID: %s",
self.browser_id,
self.browser_manager.pid,
self.geckodriver_pid,
)
try:
os.kill(self.browser_manager.pid, signal.SIGKILL)
except OSError:
self.logger.debug(
"BROWSER %i: Browser manager process does "
"not exist" % self.browser_id
"BROWSER %i: Browser manager process does not exist",
self.browser_id,
)
pass

Expand All @@ -566,13 +568,14 @@ def kill_browser_manager(self):
os.kill(self.display_pid, signal.SIGKILL)
except OSError:
self.logger.debug(
"BROWSER %i: Display process does not exit" % self.browser_id
"BROWSER %i: Display process does not exit", self.browser_id
)
pass
except TypeError:
self.logger.error(
"BROWSER %i: PID may not be the correct "
"type %s" % (self.browser_id, str(self.display_pid))
"BROWSER %i: PID may not be the correct " "type %s",
self.browser_id,
str(self.display_pid),
)
if self.display_port is not None: # xvfb display lock
# lockfile = "/tmp/.X%s-lock" % self.display_port
Expand All @@ -584,8 +587,9 @@ def kill_browser_manager(self):
os.remove(lockfile)
except OSError:
self.logger.debug(
"BROWSER %i: Screen lockfile (%s) already "
"removed" % (self.browser_id, lockfile)
"BROWSER %i: Screen lockfile (%s) already removed",
self.browser_id,
lockfile,
)
pass

Expand Down
2 changes: 1 addition & 1 deletion openwpm/deploy_browsers/deploy_firefox.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def deploy_firefox(
display_port = None
display = None
if display_mode == "headless":
fo.headless = True
fo.add_argument("--headless")
fo.add_argument("--width={}".format(DEFAULT_SCREEN_RES[0]))
fo.add_argument("--height={}".format(DEFAULT_SCREEN_RES[1]))
if display_mode == "xvfb":
Expand Down
31 changes: 23 additions & 8 deletions openwpm/storage/storage_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ def __init__(
self._shutdown_flag = False
self._relaxed = False
self.logger = logging.getLogger("openwpm")
self.store_record_tasks: DefaultDict[VisitId, List[Task[None]]] = defaultdict(
self.store_record_tasks: DefaultDict[VisitId, list[Task[None]]] = defaultdict(
list
)
"""Contains all store_record tasks for a given visit_id"""
self.finalize_tasks: List[Tuple[VisitId, Optional[Task[None]], bool]] = []
self.finalize_tasks: list[tuple[VisitId, Optional[Task[None]], bool]] = []
"""Contains all information required for update_completion_queue to work
Tuple structure is: VisitId, optional completion token, success
"""
Expand All @@ -97,18 +97,21 @@ async def _handler(
self.logger.error(
"An exception occurred while processing records", exc_info=e
)
writer.close()
await writer.wait_closed()

async def handler(
self, reader: asyncio.StreamReader, _: asyncio.StreamWriter
) -> None:
"""Created for every new connection to the Server"""
self.logger.debug("Initializing new handler")
client_name = await get_message_from_reader(reader)
self.logger.info(f"Initializing new handler for {client_name}")
while True:
try:
record: Tuple[str, Any] = await get_message_from_reader(reader)
except IncompleteReadError:
self.logger.info(
"Terminating handler, because the underlying socket closed"
f"Terminating handler for {client_name}, because the underlying socket closed"
)
break
if len(record) != 2:
Expand Down Expand Up @@ -248,6 +251,7 @@ async def update_status_queue(self) -> NoReturn:
)

async def shutdown(self, completion_queue_task: Task[None]) -> None:
self.logger.info("Entering self.shutdown")
completion_tokens = {}
visit_ids = list(self.store_record_tasks.keys())
for visit_id in visit_ids:
Expand All @@ -261,6 +265,7 @@ async def shutdown(self, completion_queue_task: Task[None]) -> None:
self.completion_queue.put((visit_id, False))

await self.structured_storage.shutdown()
self.logger.info("structured_storage is shut down")

if self.unstructured_storage is not None:
await self.unstructured_storage.flush_cache()
Expand Down Expand Up @@ -342,13 +347,21 @@ async def _run(self) -> None:
update_completion_queue = asyncio.create_task(
self.update_completion_queue(), name="CompletionQueueFeeder"
)
# Blocks until we should shutdown
# Blocks until we should shut down
await self.should_shutdown()

self.logger.info(f"Closing Server")
server.close()
self.logger.info("Closed Server")
self.logger.info("Cancelling status_queue_update")
status_queue_update.cancel()
self.logger.info("Cancelled status_queue_update")
self.logger.info("Cancelling timeout_check")
timeout_check.cancel()
self.logger.info("Cancelled timeout_check")
self.logger.info("Starting wait_closed")
await server.wait_closed()
self.logger.info("Completed wait_closed")

await self.shutdown(update_completion_queue)

def run(self) -> None:
Expand All @@ -359,10 +372,11 @@ def run(self) -> None:
class DataSocket:
"""Wrapper around ClientSocket to make sending records to the StorageController more convenient"""

def __init__(self, listener_address: Tuple[str, int]) -> None:
def __init__(self, listener_address: Tuple[str, int], client_name: str) -> None:
self.socket = ClientSocket(serialization="dill")
self.socket.connect(*listener_address)
self.logger = logging.getLogger("openwpm")
self.socket.send(client_name)

def store_record(
self, table_name: TableName, visit_id: VisitId, data: Dict[str, Any]
Expand Down Expand Up @@ -443,7 +457,7 @@ def save_configuration(
browser_version: str,
) -> None:
assert self.listener_address is not None
sock = DataSocket(self.listener_address)
sock = DataSocket(self.listener_address, "StorageControllerHandle")
task_id = random.getrandbits(32)
sock.store_record(
TableName("task"),
Expand All @@ -467,6 +481,7 @@ def save_configuration(
},
)
sock.finalize_visit_id(INVALID_VISIT_ID, success=True)
sock.close()

def launch(self) -> None:
"""Starts the storage controller"""
Expand Down
4 changes: 3 additions & 1 deletion openwpm/task_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,9 @@ def _launch_storage_controller(
)
assert self.manager_params.storage_controller_address is not None
# open connection to storage controller for saving crawl details
self.sock = DataSocket(self.manager_params.storage_controller_address)
self.sock = DataSocket(
self.manager_params.storage_controller_address, "TaskManager"
)

def _shutdown_manager(
self, during_init: bool = False, relaxed: bool = True
Expand Down
Loading

0 comments on commit 49aa218

Please sign in to comment.