11from __future__ import annotations
22
33import json
4- import os
5- from typing import TYPE_CHECKING
4+ from pathlib import Path
5+ from unittest .mock import AsyncMock , patch
6+
7+ import pytest
68
79from crawlee ._consts import METADATA_FILENAME
810from crawlee .storage_clients ._memory ._creation_management import persist_metadata_if_enabled
911
10- if TYPE_CHECKING :
11- from pathlib import Path
12-
1312
1413async def test_persist_metadata_skips_when_disabled (tmp_path : Path ) -> None :
1514 await persist_metadata_if_enabled (data = {'key' : 'value' }, entity_directory = str (tmp_path ), write_metadata = False )
@@ -18,17 +17,43 @@ async def test_persist_metadata_skips_when_disabled(tmp_path: Path) -> None:
1817
1918async def test_persist_metadata_creates_files_and_directories_when_enabled (tmp_path : Path ) -> None :
2019 data = {'key' : 'value' }
21- entity_directory = os . path . join (tmp_path , 'new_dir' )
22- await persist_metadata_if_enabled (data = data , entity_directory = entity_directory , write_metadata = True )
23- assert os . path . exists (entity_directory ) # Check if directory was created
24- assert os . path . isfile ( os . path . join ( entity_directory , METADATA_FILENAME )) # Check if file was created
20+ entity_directory = Path (tmp_path , 'new_dir' )
21+ await persist_metadata_if_enabled (data = data , entity_directory = str ( entity_directory ) , write_metadata = True )
22+ assert entity_directory . exists () is True # Check if directory was created
23+ assert ( entity_directory / METADATA_FILENAME ). is_file ( ) # Check if file was created
2524
2625
2726async def test_persist_metadata_correctly_writes_data (tmp_path : Path ) -> None :
2827 data = {'key' : 'value' }
29- entity_directory = os . path . join (tmp_path , 'data_dir' )
30- await persist_metadata_if_enabled (data = data , entity_directory = entity_directory , write_metadata = True )
31- metadata_path = os . path . join ( entity_directory , METADATA_FILENAME )
28+ entity_directory = Path (tmp_path , 'data_dir' )
29+ await persist_metadata_if_enabled (data = data , entity_directory = str ( entity_directory ) , write_metadata = True )
30+ metadata_path = entity_directory / METADATA_FILENAME
3231 with open (metadata_path ) as f : # noqa: ASYNC230
3332 content = f .read ()
3433 assert json .loads (content ) == data # Check if correct data was written
34+
35+
36+ async def test_persist_metadata_rewrites_data_with_error (tmp_path : Path ) -> None :
37+ init_data = {'key' : 'very_long_value' }
38+ update_data = {'key' : 'short_value' }
39+ error_data = {'key' : 'error' }
40+
41+ entity_directory = Path (tmp_path , 'data_dir' )
42+ metadata_path = entity_directory / METADATA_FILENAME
43+
44+ # write metadata with init_data
45+ await persist_metadata_if_enabled (data = init_data , entity_directory = str (entity_directory ), write_metadata = True )
46+
47+ # rewrite metadata with new_data
48+ await persist_metadata_if_enabled (data = update_data , entity_directory = str (entity_directory ), write_metadata = True )
49+ with open (metadata_path ) as f : # noqa: ASYNC230
50+ content = f .read ()
51+ assert json .loads (content ) == update_data # Check if correct data was rewritten
52+
53+ # raise interrupt between opening a file and writing
54+ module_for_patch = 'crawlee.storage_clients._memory._creation_management.json_dumps'
55+ with patch (module_for_patch , AsyncMock (side_effect = KeyboardInterrupt ())), pytest .raises (KeyboardInterrupt ):
56+ await persist_metadata_if_enabled (data = error_data , entity_directory = str (entity_directory ), write_metadata = True )
57+ with open (metadata_path ) as f : # noqa: ASYNC230
58+ content = f .read ()
59+ assert content == '' # The file is empty after an error
0 commit comments