-
Notifications
You must be signed in to change notification settings - Fork 132
Fix distributed tests #1451
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix distributed tests #1451
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |||||
| from datachain.catalog.loader import get_metastore, get_warehouse | ||||||
| from datachain.cli.utils import CommaSeparatedArgs | ||||||
| from datachain.config import Config, ConfigLevel | ||||||
| from datachain.data_storage import AbstractMetastore, JobQueryType, JobStatus | ||||||
| from datachain.data_storage.sqlite import ( | ||||||
| SQLiteDatabaseEngine, | ||||||
| SQLiteMetastore, | ||||||
|
|
@@ -129,6 +130,15 @@ def clean_environment( | |||||
| monkeypatch_session.delenv(DataChainDir.ENV_VAR_DATACHAIN_ROOT, raising=False) | ||||||
|
|
||||||
|
|
||||||
| def _create_job(metastore: AbstractMetastore) -> str: | ||||||
| return metastore.create_job( | ||||||
| "my-job", | ||||||
| 'import datachain as dc; dc.read_values(num=[1, 2, 3]).save("nums")', | ||||||
| query_type=JobQueryType.PYTHON, | ||||||
| status=JobStatus.RUNNING, | ||||||
| ) | ||||||
|
|
||||||
|
|
||||||
| @pytest.fixture | ||||||
| def sqlite_db(): | ||||||
| if os.environ.get("DATACHAIN_METASTORE") or os.environ.get("DATACHAIN_WAREHOUSE"): | ||||||
|
|
@@ -162,9 +172,13 @@ def cleanup_sqlite_db( | |||||
|
|
||||||
|
|
||||||
| @pytest.fixture | ||||||
| def metastore(): | ||||||
| def metastore(monkeypatch): | ||||||
| if os.environ.get("DATACHAIN_METASTORE"): | ||||||
| _metastore = get_metastore() | ||||||
|
|
||||||
| job_id = _create_job(_metastore) | ||||||
| monkeypatch.setenv("DATACHAIN_JOB_ID", job_id) | ||||||
|
|
||||||
| yield _metastore | ||||||
|
|
||||||
| _metastore.cleanup_for_tests() | ||||||
|
|
@@ -233,14 +247,18 @@ def test_session(catalog): | |||||
|
|
||||||
|
|
||||||
| @pytest.fixture | ||||||
| def metastore_tmpfile(tmp_path): | ||||||
| def metastore_tmpfile(monkeypatch, tmp_path): | ||||||
| if os.environ.get("DATACHAIN_METASTORE"): | ||||||
| _metastore = get_metastore() | ||||||
|
|
||||||
| job_id = _create_job(_metastore) | ||||||
| monkeypatch.setenv("DATACHAIN_JOB_ID", job_id) | ||||||
|
|
||||||
| yield _metastore | ||||||
|
|
||||||
| _metastore.cleanup_for_tests() | ||||||
| else: | ||||||
| _metastore = SQLiteMetastore(db_file=tmp_path / "test.db") | ||||||
| _metastore = SQLiteMetastore(db_file=str(tmp_path / "test.db")) | ||||||
|
||||||
| _metastore = SQLiteMetastore(db_file=str(tmp_path / "test.db")) | |
| _metastore = SQLiteMetastore(db_file=tmp_path / "test.db") |
Copilot
AI
Nov 6, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Converting Path to string is unnecessary here. The SQLiteWarehouse constructor likely accepts Path objects directly. If the change from tmp_path / \"test.db\" to str(tmp_path / \"test.db\") is intentional to fix an issue, it would be helpful to understand why, but this appears inconsistent with other code that uses paths directly.
| _warehouse = SQLiteWarehouse(db_file=str(tmp_path / "test.db")) | |
| _warehouse = SQLiteWarehouse(db_file=tmp_path / "test.db") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,10 @@ def mock_is_script_run(monkeypatch): | |
| monkeypatch.setattr("datachain.query.session.is_script_run", lambda: True) | ||
|
|
||
|
|
||
| @pytest.mark.skipif( | ||
| "os.environ.get('DATACHAIN_DISTRIBUTED')", | ||
| reason="Checkpoints test skipped in distributed mode", | ||
| ) | ||
|
Comment on lines
+14
to
+17
|
||
| def test_checkpoints_parallel(test_session_tmpfile, monkeypatch): | ||
| def mapper_fail(num) -> int: | ||
| raise Exception("Error") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,10 @@ def nums_dataset(test_session): | |
| return dc.read_values(num=[1, 2, 3], session=test_session).save("nums") | ||
|
|
||
|
|
||
| @pytest.mark.skipif( | ||
| "os.environ.get('DATACHAIN_DISTRIBUTED')", | ||
| reason="Checkpoints test skipped in distributed mode", | ||
| ) | ||
|
Comment on lines
+24
to
+27
|
||
| @pytest.mark.parametrize("reset_checkpoints", [True, False]) | ||
| @pytest.mark.parametrize("with_delta", [True, False]) | ||
| @pytest.mark.parametrize("use_datachain_job_id_env", [True, False]) | ||
|
|
@@ -84,6 +88,10 @@ def test_checkpoints( | |
| assert len(list(catalog.metastore.list_checkpoints(second_job_id))) == 3 | ||
|
|
||
|
|
||
| @pytest.mark.skipif( | ||
| "os.environ.get('DATACHAIN_DISTRIBUTED')", | ||
| reason="Checkpoints test skipped in distributed mode", | ||
| ) | ||
|
Comment on lines
+91
to
+94
|
||
| @pytest.mark.parametrize("reset_checkpoints", [True, False]) | ||
| def test_checkpoints_modified_chains( | ||
| test_session, monkeypatch, nums_dataset, reset_checkpoints | ||
|
|
@@ -115,6 +123,10 @@ def test_checkpoints_modified_chains( | |
| assert len(list(catalog.metastore.list_checkpoints(second_job_id))) == 3 | ||
|
|
||
|
|
||
| @pytest.mark.skipif( | ||
| "os.environ.get('DATACHAIN_DISTRIBUTED')", | ||
| reason="Checkpoints test skipped in distributed mode", | ||
| ) | ||
|
Comment on lines
+126
to
+129
|
||
| @pytest.mark.parametrize("reset_checkpoints", [True, False]) | ||
| def test_checkpoints_multiple_runs( | ||
| test_session, monkeypatch, nums_dataset, reset_checkpoints | ||
|
|
@@ -180,6 +192,10 @@ def test_checkpoints_multiple_runs( | |
| assert len(list(catalog.metastore.list_checkpoints(fourth_job_id))) == 3 | ||
|
|
||
|
|
||
| @pytest.mark.skipif( | ||
| "os.environ.get('DATACHAIN_DISTRIBUTED')", | ||
| reason="Checkpoints test skipped in distributed mode", | ||
| ) | ||
|
Comment on lines
+195
to
+198
|
||
| def test_checkpoints_check_valid_chain_is_returned( | ||
| test_session, | ||
| monkeypatch, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing space in the command substitution syntax. The correct syntax should be
\"$(which python\"${PYTHON_VERSION}\")\"or more clearly\"$(which python${PYTHON_VERSION})\". The current syntaxpython\"${PYTHON_VERSION}\"will not properly interpolate the variable.