From 52b8884e754b2f526d35d0fa3d7a9c638870fd24 Mon Sep 17 00:00:00 2001 From: Andy Kuny Date: Mon, 18 Dec 2023 15:13:04 -0500 Subject: [PATCH 01/11] Update infrastructure layer --- nad_ch/domain/entities.py | 18 ++++++- nad_ch/domain/repositories.py | 12 ++++- nad_ch/infrastructure/database.py | 75 +++++++++++++++++++++++++-- tests/infrastructure/test_database.py | 44 +++++++++++++++- 4 files changed, 138 insertions(+), 11 deletions(-) diff --git a/nad_ch/domain/entities.py b/nad_ch/domain/entities.py index 1f3d65e..02456d1 100644 --- a/nad_ch/domain/entities.py +++ b/nad_ch/domain/entities.py @@ -3,9 +3,23 @@ def __init__(self, name: str, id: int = None): self.id = id self.name = name + def __repr__(self): + return f'DataProvider {self.id}, {self.name}' + class DataSubmission: - def __init__(self, file_path: str, provider: DataProvider, id: int = None): + def __init__( + self, + file_name: str, + url: str, + provider: DataProvider, + id: int = None, + ): self.id = id - self.file_path = file_path + self.file_name = file_name + self.url = url self.provider = provider + + def __repr__(self): + return f'DataSubmission \ + {self.id}, {self.file_name}, {self.url}, {self.provider}' diff --git a/nad_ch/domain/repositories.py b/nad_ch/domain/repositories.py index cf9c1fa..d463fcd 100644 --- a/nad_ch/domain/repositories.py +++ b/nad_ch/domain/repositories.py @@ -1,10 +1,10 @@ from typing import Protocol from collections.abc import Iterable -from nad_ch.domain.entities import DataProvider +from nad_ch.domain.entities import DataProvider, DataSubmission class DataProviderRepository(Protocol): - def add(self, provider: DataProvider) -> None: + def add(self, provider: DataProvider) -> DataProvider: ... def get_by_name(self, name: str) -> DataProvider: @@ -12,3 +12,11 @@ def get_by_name(self, name: str) -> DataProvider: def get_all(self) -> Iterable[DataProvider]: ... + + +class DataSubmissionRepository(Protocol): + def add(self, submission: DataSubmission) -> DataSubmission: + ... + + def get_by_provider(self, provider: DataProvider) -> Iterable[DataSubmission]: + ... diff --git a/nad_ch/infrastructure/database.py b/nad_ch/infrastructure/database.py index 2f34234..92d02af 100644 --- a/nad_ch/infrastructure/database.py +++ b/nad_ch/infrastructure/database.py @@ -1,10 +1,10 @@ from typing import List, Optional -from sqlalchemy import Column, Integer, String, create_engine -from sqlalchemy.orm import sessionmaker, declarative_base +from sqlalchemy import Column, Integer, String, create_engine, ForeignKey +from sqlalchemy.orm import sessionmaker, declarative_base, relationship import contextlib from nad_ch.config import DATABASE_URL -from nad_ch.domain.entities import DataProvider -from nad_ch.domain.repositories import DataProviderRepository +from nad_ch.domain.entities import DataProvider, DataSubmission +from nad_ch.domain.repositories import DataProviderRepository, DataSubmissionRepository engine = create_engine(DATABASE_URL) @@ -33,6 +33,11 @@ class DataProviderModel(ModelBase): id = Column(Integer, primary_key=True) name = Column(String) + data_submissions = relationship( + 'DataSubmissionModel', + back_populates='data_provider' + ) + @staticmethod def from_entity(provider): return DataProviderModel(id=provider.id, name=provider.name) @@ -41,14 +46,44 @@ def to_entity(self): return DataProvider(id=self.id, name=self.name) +class DataSubmissionModel(ModelBase): + __tablename__ = 'data_submissions' + + id = Column(Integer, primary_key=True) + file_name = Column(String) + url = Column(String) + data_provider_id = Column(Integer, ForeignKey('data_providers.id')) + + data_provider = relationship('DataProviderModel', back_populates='data_submissions') + + @staticmethod + def from_entity(submission): + return DataSubmissionModel( + id=submission.id, + file_name=submission.file_name, + url=submission.url, + data_provider_id=submission.provider.id + ) + + def to_entity(self, provider: DataProvider): + return DataSubmission( + id=self.id, + file_name=self.file_name, + url=self.url, + provider=provider + ) + + class SqlAlchemyDataProviderRepository(DataProviderRepository): def __init__(self, session_factory): self.session_factory = session_factory - def add(self, provider: DataProvider): + def add(self, provider: DataProvider) -> DataProvider: with self.session_factory() as session: provider_model = DataProviderModel.from_entity(provider) session.add(provider_model) + session.commit() + session.refresh(provider_model) return provider_model.to_entity() def get_by_name(self, name: str) -> Optional[DataProvider]: @@ -65,3 +100,33 @@ def get_all(self) -> List[DataProvider]: provider_models = session.query(DataProviderModel).all() providers_entities = [provider.to_entity() for provider in provider_models] return providers_entities + + +class SqlAlchemyDataSubmissionRepository(DataSubmissionRepository): + def __init__(self, session_factory): + self.session_factory = session_factory + + def add(self, submission: DataSubmission) -> DataSubmission: + with self.session_factory() as session: + submission_model = DataSubmissionModel.from_entity(submission) + session.add(submission_model) + session.commit() + session.refresh(submission_model) + provider_model = ( + session.query(DataProviderModel) + .filter(DataProviderModel.id == submission_model.data_provider_id) + .first() + ) + return submission_model.to_entity(provider_model.to_entity()) + + def get_by_provider(self, provider: DataProvider) -> List[DataSubmission]: + with self.session_factory() as session: + submission_models = ( + session.query(DataSubmissionModel) + .filter(DataSubmissionModel.data_provider_id == provider.id) + .all() + ) + submission_entities = ( + [submission.to_entity(provider) for submission in submission_models] + ) + return submission_entities diff --git a/tests/infrastructure/test_database.py b/tests/infrastructure/test_database.py index 953167f..8d9ea1a 100644 --- a/tests/infrastructure/test_database.py +++ b/tests/infrastructure/test_database.py @@ -3,8 +3,12 @@ from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from nad_ch.config import DATABASE_URL -from nad_ch.domain.entities import DataProvider -from nad_ch.infrastructure.database import ModelBase, SqlAlchemyDataProviderRepository +from nad_ch.domain.entities import DataProvider, DataSubmission +from nad_ch.infrastructure.database import ( + ModelBase, + SqlAlchemyDataProviderRepository, + SqlAlchemyDataSubmissionRepository +) @pytest.fixture(scope='function') @@ -33,6 +37,11 @@ def providers(test_session): return SqlAlchemyDataProviderRepository(test_session) +@pytest.fixture(scope='function') +def submissions(test_session): + return SqlAlchemyDataSubmissionRepository(test_session) + + def test_add_data_provider_to_repository_and_get_by_name(providers): provider_name = 'State X' new_provider = DataProvider(provider_name) @@ -43,3 +52,34 @@ def test_add_data_provider_to_repository_and_get_by_name(providers): assert retreived_provider.id == 1 assert retreived_provider.name == provider_name assert isinstance(retreived_provider, DataProvider) is True + + +def test_add_data_provider_and_then_data_submission(providers, submissions): + provider_name = 'State X' + new_provider = DataProvider(provider_name) + saved_provider = providers.add(new_provider) + new_submission = DataSubmission( + 'some-file-name', saved_provider, None, 'some-url') + + result = submissions.add(new_submission) + + assert result.id == 1 + assert result.provider.id == saved_provider.id + assert result.file_name == 'some-file-name' + assert result.url == 'some-url' + + +def test_retrieve_a_list_of_submissions_by_provider(providers, submissions): + provider_name = 'State X' + new_provider = DataProvider(provider_name) + saved_provider = providers.add(new_provider) + new_submission = DataSubmission( + 'some-file-name', saved_provider, None, 'some-url') + submissions.add(new_submission) + another_new_submission = DataSubmission( + 'some-other-file-name', saved_provider, None, 'some-other-url') + submissions.add(another_new_submission) + + submissions = submissions.get_by_provider(saved_provider) + + assert len(submissions) == 2 From 06358d9e0e127205229902eb7a244d467dcca808 Mon Sep 17 00:00:00 2001 From: Andy Kuny Date: Mon, 18 Dec 2023 15:33:35 -0500 Subject: [PATCH 02/11] Fix error --- nad_ch/use_cases.py | 2 +- tests/infrastructure/test_database.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nad_ch/use_cases.py b/nad_ch/use_cases.py index cbf764b..8035544 100644 --- a/nad_ch/use_cases.py +++ b/nad_ch/use_cases.py @@ -30,6 +30,6 @@ def list_data_providers(ctx: ApplicationContext) -> List[DataProvider]: def ingest_data_submission( - ctx: ApplicationContext, file_path: str, provider_name: str + ctx: ApplicationContext, file_name: str, provider_name: str ) -> None: pass diff --git a/tests/infrastructure/test_database.py b/tests/infrastructure/test_database.py index 8d9ea1a..b66a0f8 100644 --- a/tests/infrastructure/test_database.py +++ b/tests/infrastructure/test_database.py @@ -59,7 +59,7 @@ def test_add_data_provider_and_then_data_submission(providers, submissions): new_provider = DataProvider(provider_name) saved_provider = providers.add(new_provider) new_submission = DataSubmission( - 'some-file-name', saved_provider, None, 'some-url') + 'some-file-name', 'some-url', saved_provider) result = submissions.add(new_submission) @@ -74,10 +74,10 @@ def test_retrieve_a_list_of_submissions_by_provider(providers, submissions): new_provider = DataProvider(provider_name) saved_provider = providers.add(new_provider) new_submission = DataSubmission( - 'some-file-name', saved_provider, None, 'some-url') + 'some-file-name', 'some-url', saved_provider) submissions.add(new_submission) another_new_submission = DataSubmission( - 'some-other-file-name', saved_provider, None, 'some-other-url') + 'some-other-file-name', 'some-other-url', saved_provider) submissions.add(another_new_submission) submissions = submissions.get_by_provider(saved_provider) From 28618016dda7d8b88e8f92e4c8d6e3c65f46640d Mon Sep 17 00:00:00 2001 From: Andy Kuny Date: Tue, 19 Dec 2023 11:30:31 -0500 Subject: [PATCH 03/11] Add provider use cases --- .gitignore | 5 ++++- nad_ch/application_context.py | 27 +++++++++++++++++++++++-- nad_ch/config.py | 1 + nad_ch/controllers/cli.py | 11 ++++++++++- nad_ch/infrastructure/storage.py | 16 +++++++++++++++ nad_ch/use_cases.py | 34 ++++++++++++++++++++++++++++++-- tests/mocks.py | 24 +++++++++++++++++++--- 7 files changed, 109 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index f15b188..ef36a6e 100644 --- a/.gitignore +++ b/.gitignore @@ -160,4 +160,7 @@ cython_debug/ #.idea/ # Development databases -*.sqlite3 \ No newline at end of file +*.sqlite3 + +# Development storage +storage/ \ No newline at end of file diff --git a/nad_ch/application_context.py b/nad_ch/application_context.py index ede8691..a41ff07 100644 --- a/nad_ch/application_context.py +++ b/nad_ch/application_context.py @@ -1,40 +1,63 @@ import os import logging +from nad_ch.config import STORAGE_PATH from nad_ch.infrastructure.database import ( session_scope, - SqlAlchemyDataProviderRepository + SqlAlchemyDataProviderRepository, + SqlAlchemyDataSubmissionRepository ) from nad_ch.infrastructure.logger import Logger -from tests.mocks import MockDataProviderRepository +from nad_ch.infrastructure.storage import LocalStorage +from tests.mocks import MockDataProviderRepository, MockDataSubmissionRepository class ApplicationContext: def __init__(self): self._providers = SqlAlchemyDataProviderRepository(session_scope) + self._submissions = SqlAlchemyDataSubmissionRepository(session_scope) self._logger = Logger(__name__) + self._storage = LocalStorage(STORAGE_PATH) @property def providers(self): return self._providers + @property + def submissions(self): + return self._submissions + @property def logger(self): return self._logger + @property + def storage(self): + return self._storage + class TestApplicationContext(ApplicationContext): def __init__(self): self._providers = MockDataProviderRepository() + self._submissions = MockDataSubmissionRepository() self._logger = Logger(__name__, logging.DEBUG) + self._storage = LocalStorage(STORAGE_PATH) @property def providers(self): return self._providers + @property + def submissions(self): + return self._submissions + @property def logger(self): return self._logger + @property + def storage(self): + return self._storage + def create_app_context(): if os.environ.get('APP_ENV') == 'test': diff --git a/nad_ch/config.py b/nad_ch/config.py index c61f8f5..47a2b43 100644 --- a/nad_ch/config.py +++ b/nad_ch/config.py @@ -7,3 +7,4 @@ APP_ENV = os.getenv('APP_ENV') DATABASE_URL = os.getenv('DATABASE_URL') +STORAGE_PATH = os.getenv('STORAGE_PATH') diff --git a/nad_ch/controllers/cli.py b/nad_ch/controllers/cli.py index 00398de..4450c11 100644 --- a/nad_ch/controllers/cli.py +++ b/nad_ch/controllers/cli.py @@ -3,6 +3,7 @@ add_data_provider, list_data_providers, ingest_data_submission, + list_data_submissions_by_provider ) @@ -29,8 +30,16 @@ def list_providers(ctx): @cli.command() @click.pass_context -@click.argument('filepath') +@click.argument('file_path') @click.argument('provider') def ingest(ctx, file_path, provider): context = ctx.obj ingest_data_submission(context, file_path, provider) + + +@cli.command() +@click.pass_context +@click.argument('provider') +def list_submissions_by_provider(ctx, provider): + context = ctx.obj + list_data_submissions_by_provider(context, provider) diff --git a/nad_ch/infrastructure/storage.py b/nad_ch/infrastructure/storage.py index e69de29..60068df 100644 --- a/nad_ch/infrastructure/storage.py +++ b/nad_ch/infrastructure/storage.py @@ -0,0 +1,16 @@ +import os +import shutil + + +class LocalStorage: + def __init__(self, base_path: str): + self.base_path = base_path + + def _full_path(self, path: str) -> str: + return os.path.join(self.base_path, path) + + def upload(self, source: str, destination: str) -> None: + shutil.copy(source, self._full_path(destination)) + + def get_file_url(self, file_name: str) -> str: + return file_name diff --git a/nad_ch/use_cases.py b/nad_ch/use_cases.py index 8035544..d9a7109 100644 --- a/nad_ch/use_cases.py +++ b/nad_ch/use_cases.py @@ -1,6 +1,6 @@ from typing import List from nad_ch.application_context import ApplicationContext -from nad_ch.domain.entities import DataProvider +from nad_ch.domain.entities import DataProvider, DataSubmission def add_data_provider( @@ -32,4 +32,34 @@ def list_data_providers(ctx: ApplicationContext) -> List[DataProvider]: def ingest_data_submission( ctx: ApplicationContext, file_name: str, provider_name: str ) -> None: - pass + if not file_name: + ctx.logger.error('File name required') + return + + provider = ctx.providers.get_by_name(provider_name) + if not provider: + ctx.logger.error('Provider with that name does not exist') + return + + ctx.storage.upload(file_name, f'{provider.name}_{file_name}') + url = ctx.storage.get_file_url(file_name) + + submission = DataSubmission(file_name, url, provider) + ctx.submissions.add(submission) + ctx.logger.info('Submission added') + + +def list_data_submissions_by_provider( + ctx: ApplicationContext, provider_name: str +) -> List[DataSubmission]: + provider = ctx.providers.get_by_name(provider_name) + if not provider: + ctx.logger.error('Provider with that name does not exist') + return + + submissions = ctx.submissions.get_by_provider(provider) + ctx.logger.info(f'Data submissions for {provider.name}') + for s in submissions: + ctx.logger.info(f'{s.provider.name}: {s.file_name}') + + return submissions diff --git a/tests/mocks.py b/tests/mocks.py index 1b80e2b..ed3b942 100644 --- a/tests/mocks.py +++ b/tests/mocks.py @@ -1,6 +1,6 @@ from typing import Optional -from nad_ch.domain.entities import DataProvider -from nad_ch.domain.repositories import DataProviderRepository +from nad_ch.domain.entities import DataProvider, DataSubmission +from nad_ch.domain.repositories import DataProviderRepository, DataSubmissionRepository class MockDataProviderRepository(DataProviderRepository): @@ -8,13 +8,31 @@ def __init__(self) -> None: self._providers = set() self._next_id = 1 - def add(self, provider: DataProvider) -> None: + def add(self, provider: DataProvider) -> DataProvider: provider.id = self._next_id self._providers.add(provider) self._next_id += 1 + return provider def get_by_name(self, name: str) -> Optional[DataProvider]: return next((p for p in self._providers if p.name == name), None) def get_all(self): return sorted(list(self._providers), key=lambda provider: provider.id) + + +class MockDataSubmissionRepository(DataSubmissionRepository): + def __init__(self) -> None: + self._submissions = set() + self._next_id = 1 + + def add(self, submission: DataSubmission) -> DataSubmission: + submission.id = self._next_id + self._submissions.add(submission) + self._next_id += 1 + return submission + + def get_by_provider(self, provider: DataProvider) -> Optional[DataSubmission]: + return next( + (s for s in self._submissions if s.provider.name == provider.name), None + ) From 2e39231f1dbe1287e6c7f267a8baa4162ef0be95 Mon Sep 17 00:00:00 2001 From: Andy Kuny Date: Tue, 19 Dec 2023 11:53:30 -0500 Subject: [PATCH 04/11] Add basic use cases and tests --- nad_ch/application_context.py | 12 ++++++++---- nad_ch/infrastructure/database.py | 14 ++++++++++++++ tests/{mocks.py => fakes.py} | 22 ++++++++++++++++++---- tests/test_use_cases.py | 28 +++++++++++++++++++++++++++- 4 files changed, 67 insertions(+), 9 deletions(-) rename tests/{mocks.py => fakes.py} (64%) diff --git a/nad_ch/application_context.py b/nad_ch/application_context.py index a41ff07..7da70f5 100644 --- a/nad_ch/application_context.py +++ b/nad_ch/application_context.py @@ -8,7 +8,11 @@ ) from nad_ch.infrastructure.logger import Logger from nad_ch.infrastructure.storage import LocalStorage -from tests.mocks import MockDataProviderRepository, MockDataSubmissionRepository +from tests.fakes import ( + FakeDataProviderRepository, + FakeDataSubmissionRepository, + FakeStorage +) class ApplicationContext: @@ -37,10 +41,10 @@ def storage(self): class TestApplicationContext(ApplicationContext): def __init__(self): - self._providers = MockDataProviderRepository() - self._submissions = MockDataSubmissionRepository() + self._providers = FakeDataProviderRepository() + self._submissions = FakeDataSubmissionRepository() self._logger = Logger(__name__, logging.DEBUG) - self._storage = LocalStorage(STORAGE_PATH) + self._storage = FakeStorage() @property def providers(self): diff --git a/nad_ch/infrastructure/database.py b/nad_ch/infrastructure/database.py index 92d02af..01b41d3 100644 --- a/nad_ch/infrastructure/database.py +++ b/nad_ch/infrastructure/database.py @@ -119,6 +119,20 @@ def add(self, submission: DataSubmission) -> DataSubmission: ) return submission_model.to_entity(provider_model.to_entity()) + def get_by_name(self, file_name: str) -> Optional[DataSubmission]: + with self.session_factory() as session: + submission_model = ( + session.query(DataSubmissionModel) + .filter(DataSubmissionModel.name == file_name) + .first() + ) + provider_model = ( + session.query(DataProviderModel) + .filter(DataProviderModel.id == submission_model.data_provider_id) + .first() + ) + return submission_model.to_entity(provider_model.to_entity()) + def get_by_provider(self, provider: DataProvider) -> List[DataSubmission]: with self.session_factory() as session: submission_models = ( diff --git a/tests/mocks.py b/tests/fakes.py similarity index 64% rename from tests/mocks.py rename to tests/fakes.py index ed3b942..c1531ea 100644 --- a/tests/mocks.py +++ b/tests/fakes.py @@ -3,7 +3,7 @@ from nad_ch.domain.repositories import DataProviderRepository, DataSubmissionRepository -class MockDataProviderRepository(DataProviderRepository): +class FakeDataProviderRepository(DataProviderRepository): def __init__(self) -> None: self._providers = set() self._next_id = 1 @@ -21,7 +21,7 @@ def get_all(self): return sorted(list(self._providers), key=lambda provider: provider.id) -class MockDataSubmissionRepository(DataSubmissionRepository): +class FakeDataSubmissionRepository(DataSubmissionRepository): def __init__(self) -> None: self._submissions = set() self._next_id = 1 @@ -32,7 +32,21 @@ def add(self, submission: DataSubmission) -> DataSubmission: self._next_id += 1 return submission - def get_by_provider(self, provider: DataProvider) -> Optional[DataSubmission]: + def get_by_name(self, file_name: str) -> Optional[DataSubmission]: return next( - (s for s in self._submissions if s.provider.name == provider.name), None + (s for s in self._submissions if s.file_name == file_name), None ) + + def get_by_provider(self, provider: DataProvider) -> Optional[DataSubmission]: + return [s for s in self._submissions if s.provider.name == provider.name] + + +class FakeStorage(): + def __init__(self): + self._files = set() + + def upload(self, source: str, destination: str) -> None: + self._files.add(destination) + + def get_file_url(self, file_name: str) -> str: + return file_name diff --git a/tests/test_use_cases.py b/tests/test_use_cases.py index 86589da..ceba712 100644 --- a/tests/test_use_cases.py +++ b/tests/test_use_cases.py @@ -1,9 +1,10 @@ import pytest from nad_ch.application_context import create_app_context -from nad_ch.domain.entities import DataProvider +from nad_ch.domain.entities import DataProvider, DataSubmission from nad_ch.use_cases import ( add_data_provider, list_data_providers, + ingest_data_submission ) @@ -57,3 +58,28 @@ def test_list_multiple_data_providers(app_context): assert len(providers) == 2 assert providers[0].name == first_name assert providers[1].name == second_name + + +def test_ingest_data_submission(app_context): + provider_name = 'State X' + add_data_provider(app_context, provider_name) + + file_name = 'my_cool_file.txt' + ingest_data_submission(app_context, file_name, provider_name) + + submission = app_context.submissions.get_by_name(file_name) + assert submission.file_name == file_name + assert isinstance(submission, DataSubmission) is True + + +def test_list_data_submissions_by_provider(app_context): + provider_name = 'State X' + add_data_provider(app_context, provider_name) + + file_name = 'my_cool_file.txt' + ingest_data_submission(app_context, file_name, provider_name) + + provider = app_context.providers.get_by_name(provider_name) + submissions = app_context.submissions.get_by_provider(provider) + print(submissions) + assert len(submissions) == 1 From 14624a337730c8a8d64de5139512fcf639602527 Mon Sep 17 00:00:00 2001 From: Andy Kuny Date: Tue, 19 Dec 2023 12:11:37 -0500 Subject: [PATCH 05/11] Fix error in submission repo --- nad_ch/infrastructure/database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nad_ch/infrastructure/database.py b/nad_ch/infrastructure/database.py index 01b41d3..89fd0be 100644 --- a/nad_ch/infrastructure/database.py +++ b/nad_ch/infrastructure/database.py @@ -123,7 +123,7 @@ def get_by_name(self, file_name: str) -> Optional[DataSubmission]: with self.session_factory() as session: submission_model = ( session.query(DataSubmissionModel) - .filter(DataSubmissionModel.name == file_name) + .filter(DataSubmissionModel.file_name == file_name) .first() ) provider_model = ( From 764e20f945da55fb8a88238f70e139b6fd7e6608 Mon Sep 17 00:00:00 2001 From: Andy Kuny Date: Tue, 19 Dec 2023 13:08:38 -0500 Subject: [PATCH 06/11] Refactor to remove extraneous query --- nad_ch/infrastructure/database.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/nad_ch/infrastructure/database.py b/nad_ch/infrastructure/database.py index 89fd0be..e246987 100644 --- a/nad_ch/infrastructure/database.py +++ b/nad_ch/infrastructure/database.py @@ -121,17 +121,21 @@ def add(self, submission: DataSubmission) -> DataSubmission: def get_by_name(self, file_name: str) -> Optional[DataSubmission]: with self.session_factory() as session: - submission_model = ( - session.query(DataSubmissionModel) + result = ( + session.query(DataSubmissionModel, DataProviderModel) + .join( + DataProviderModel, DataProviderModel.id == + DataSubmissionModel.data_provider_id + ) .filter(DataSubmissionModel.file_name == file_name) .first() ) - provider_model = ( - session.query(DataProviderModel) - .filter(DataProviderModel.id == submission_model.data_provider_id) - .first() - ) - return submission_model.to_entity(provider_model.to_entity()) + + if result: + submission_model, provider_model = result + return submission_model.to_entity(provider_model.to_entity()) + else: + return None def get_by_provider(self, provider: DataProvider) -> List[DataSubmission]: with self.session_factory() as session: From 871de7ac7e1c767034e2bfe7302c23bd8a86e56c Mon Sep 17 00:00:00 2001 From: Andy Kuny Date: Tue, 19 Dec 2023 14:32:41 -0500 Subject: [PATCH 07/11] Factor out base classes for entities and sqlalchemy models, include created_at and updated_at --- nad_ch/domain/entities.py | 27 +++++++++++++---- nad_ch/infrastructure/database.py | 43 +++++++++++++++++++++------ tests/infrastructure/test_database.py | 10 ++++--- tests/test_use_cases.py | 1 - 4 files changed, 61 insertions(+), 20 deletions(-) diff --git a/nad_ch/domain/entities.py b/nad_ch/domain/entities.py index 02456d1..4b9c903 100644 --- a/nad_ch/domain/entities.py +++ b/nad_ch/domain/entities.py @@ -1,13 +1,27 @@ -class DataProvider: - def __init__(self, name: str, id: int = None): +class Entity: + def __init__(self, id: int = None): self.id = id + self.created_at = None + self.updated_at = None + + def set_created_at(self, created_at): + self.created_at = created_at + + def set_updated_at(self, updated_at): + self.updated_at = updated_at + + +class DataProvider(Entity): + def __init__(self, name: str, id: int = None): + super().__init__(id) self.name = name def __repr__(self): - return f'DataProvider {self.id}, {self.name}' + return f'DataProvider {self.id}, {self.name} \ + (created: {self.created_at}; updated: {self.updated_at})' -class DataSubmission: +class DataSubmission(Entity): def __init__( self, file_name: str, @@ -15,11 +29,12 @@ def __init__( provider: DataProvider, id: int = None, ): - self.id = id + super().__init__(id) self.file_name = file_name self.url = url self.provider = provider def __repr__(self): return f'DataSubmission \ - {self.id}, {self.file_name}, {self.url}, {self.provider}' + {self.id}, {self.file_name}, {self.url}, {self.provider} \ + (created: {self.created_at}; updated: {self.updated_at})' diff --git a/nad_ch/infrastructure/database.py b/nad_ch/infrastructure/database.py index e246987..40721e7 100644 --- a/nad_ch/infrastructure/database.py +++ b/nad_ch/infrastructure/database.py @@ -1,6 +1,7 @@ from typing import List, Optional -from sqlalchemy import Column, Integer, String, create_engine, ForeignKey +from sqlalchemy import Column, Integer, String, create_engine, ForeignKey, DateTime from sqlalchemy.orm import sessionmaker, declarative_base, relationship +from sqlalchemy.sql import func import contextlib from nad_ch.config import DATABASE_URL from nad_ch.domain.entities import DataProvider, DataSubmission @@ -27,10 +28,17 @@ def session_scope(): ModelBase = declarative_base() -class DataProviderModel(ModelBase): - __tablename__ = 'data_providers' +class CommonBase(ModelBase): + __abstract__ = True id = Column(Integer, primary_key=True) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now()) + + +class DataProviderModel(CommonBase): + __tablename__ = 'data_providers' + name = Column(String) data_submissions = relationship( @@ -40,16 +48,24 @@ class DataProviderModel(ModelBase): @staticmethod def from_entity(provider): - return DataProviderModel(id=provider.id, name=provider.name) + model = DataProviderModel(id=provider.id, name=provider.name) + return model def to_entity(self): - return DataProvider(id=self.id, name=self.name) + entity = DataProvider(id=self.id, name=self.name) + + if self.created_at is not None: + entity.set_created_at(self.created_at) + if self.updated_at is not None: + entity.set_updated_at(self.updated_at) -class DataSubmissionModel(ModelBase): + return entity + + +class DataSubmissionModel(CommonBase): __tablename__ = 'data_submissions' - id = Column(Integer, primary_key=True) file_name = Column(String) url = Column(String) data_provider_id = Column(Integer, ForeignKey('data_providers.id')) @@ -58,21 +74,30 @@ class DataSubmissionModel(ModelBase): @staticmethod def from_entity(submission): - return DataSubmissionModel( + model = DataSubmissionModel( id=submission.id, file_name=submission.file_name, url=submission.url, data_provider_id=submission.provider.id ) + return model def to_entity(self, provider: DataProvider): - return DataSubmission( + entity = DataSubmission( id=self.id, file_name=self.file_name, url=self.url, provider=provider ) + if self.created_at is not None: + entity.set_created_at(self.created_at) + + if self.updated_at is not None: + entity.set_updated_at(self.updated_at) + + return entity + class SqlAlchemyDataProviderRepository(DataProviderRepository): def __init__(self, session_factory): diff --git a/tests/infrastructure/test_database.py b/tests/infrastructure/test_database.py index b66a0f8..18ad2a4 100644 --- a/tests/infrastructure/test_database.py +++ b/tests/infrastructure/test_database.py @@ -48,10 +48,11 @@ def test_add_data_provider_to_repository_and_get_by_name(providers): providers.add(new_provider) - retreived_provider = providers.get_by_name(provider_name) - assert retreived_provider.id == 1 - assert retreived_provider.name == provider_name - assert isinstance(retreived_provider, DataProvider) is True + retrieved_provider = providers.get_by_name(provider_name) + assert retrieved_provider.id == 1 + assert retrieved_provider.created_at is not None + assert retrieved_provider.name == provider_name + assert isinstance(retrieved_provider, DataProvider) is True def test_add_data_provider_and_then_data_submission(providers, submissions): @@ -64,6 +65,7 @@ def test_add_data_provider_and_then_data_submission(providers, submissions): result = submissions.add(new_submission) assert result.id == 1 + assert result.created_at is not None assert result.provider.id == saved_provider.id assert result.file_name == 'some-file-name' assert result.url == 'some-url' diff --git a/tests/test_use_cases.py b/tests/test_use_cases.py index ceba712..19b08d1 100644 --- a/tests/test_use_cases.py +++ b/tests/test_use_cases.py @@ -81,5 +81,4 @@ def test_list_data_submissions_by_provider(app_context): provider = app_context.providers.get_by_name(provider_name) submissions = app_context.submissions.get_by_provider(provider) - print(submissions) assert len(submissions) == 1 From bf178f04483325a775572e1ea4f419bcd7f91d11 Mon Sep 17 00:00:00 2001 From: Andy Kuny Date: Tue, 19 Dec 2023 14:55:31 -0500 Subject: [PATCH 08/11] Set default value of updated_at --- nad_ch/infrastructure/database.py | 2 +- tests/infrastructure/test_database.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/nad_ch/infrastructure/database.py b/nad_ch/infrastructure/database.py index 40721e7..4995d09 100644 --- a/nad_ch/infrastructure/database.py +++ b/nad_ch/infrastructure/database.py @@ -33,7 +33,7 @@ class CommonBase(ModelBase): id = Column(Integer, primary_key=True) created_at = Column(DateTime(timezone=True), server_default=func.now()) - updated_at = Column(DateTime(timezone=True), onupdate=func.now()) + updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) class DataProviderModel(CommonBase): diff --git a/tests/infrastructure/test_database.py b/tests/infrastructure/test_database.py index 18ad2a4..b01304a 100644 --- a/tests/infrastructure/test_database.py +++ b/tests/infrastructure/test_database.py @@ -51,6 +51,7 @@ def test_add_data_provider_to_repository_and_get_by_name(providers): retrieved_provider = providers.get_by_name(provider_name) assert retrieved_provider.id == 1 assert retrieved_provider.created_at is not None + assert retrieved_provider.updated_at is not None assert retrieved_provider.name == provider_name assert isinstance(retrieved_provider, DataProvider) is True @@ -66,6 +67,7 @@ def test_add_data_provider_and_then_data_submission(providers, submissions): assert result.id == 1 assert result.created_at is not None + assert result.updated_at is not None assert result.provider.id == saved_provider.id assert result.file_name == 'some-file-name' assert result.url == 'some-url' From 0004507f780950fde7334fc98bd34d424916fe0f Mon Sep 17 00:00:00 2001 From: Andy Kuny Date: Tue, 19 Dec 2023 14:56:36 -0500 Subject: [PATCH 09/11] Fix linting error --- nad_ch/infrastructure/database.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nad_ch/infrastructure/database.py b/nad_ch/infrastructure/database.py index 4995d09..39148d4 100644 --- a/nad_ch/infrastructure/database.py +++ b/nad_ch/infrastructure/database.py @@ -33,7 +33,9 @@ class CommonBase(ModelBase): id = Column(Integer, primary_key=True) created_at = Column(DateTime(timezone=True), server_default=func.now()) - updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) + updated_at = Column( + DateTime(timezone=True), server_default=func.now(), onupdate=func.now() + ) class DataProviderModel(CommonBase): From 4d89a8c8c224cd8762d1bc843409dd33cb9783d2 Mon Sep 17 00:00:00 2001 From: Andy Kuny Date: Thu, 28 Dec 2023 10:11:39 -0500 Subject: [PATCH 10/11] Renamve parameter in use case --- nad_ch/use_cases.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nad_ch/use_cases.py b/nad_ch/use_cases.py index d9a7109..d5864ce 100644 --- a/nad_ch/use_cases.py +++ b/nad_ch/use_cases.py @@ -30,10 +30,10 @@ def list_data_providers(ctx: ApplicationContext) -> List[DataProvider]: def ingest_data_submission( - ctx: ApplicationContext, file_name: str, provider_name: str + ctx: ApplicationContext, file_path: str, provider_name: str ) -> None: - if not file_name: - ctx.logger.error('File name required') + if not file_path: + ctx.logger.error('File path required') return provider = ctx.providers.get_by_name(provider_name) @@ -41,10 +41,10 @@ def ingest_data_submission( ctx.logger.error('Provider with that name does not exist') return - ctx.storage.upload(file_name, f'{provider.name}_{file_name}') - url = ctx.storage.get_file_url(file_name) + ctx.storage.upload(file_path, f'{provider.name}_{file_path}') + url = ctx.storage.get_file_url(file_path) - submission = DataSubmission(file_name, url, provider) + submission = DataSubmission(file_path, url, provider) ctx.submissions.add(submission) ctx.logger.info('Submission added') From 7d5c1b625cbcb1daca028cd57b579b4d51703390 Mon Sep 17 00:00:00 2001 From: Andy Kuny Date: Thu, 28 Dec 2023 10:16:08 -0500 Subject: [PATCH 11/11] Add error handling to ingest use case --- nad_ch/infrastructure/storage.py | 5 +++++ nad_ch/use_cases.py | 16 ++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/nad_ch/infrastructure/storage.py b/nad_ch/infrastructure/storage.py index 60068df..4b83e68 100644 --- a/nad_ch/infrastructure/storage.py +++ b/nad_ch/infrastructure/storage.py @@ -12,5 +12,10 @@ def _full_path(self, path: str) -> str: def upload(self, source: str, destination: str) -> None: shutil.copy(source, self._full_path(destination)) + def delete(self, file_path: str) -> None: + full_file_path = self._full_path(file_path) + if os.path.exists(full_file_path): + os.remove(full_file_path) + def get_file_url(self, file_name: str) -> str: return file_name diff --git a/nad_ch/use_cases.py b/nad_ch/use_cases.py index d5864ce..acc0ed5 100644 --- a/nad_ch/use_cases.py +++ b/nad_ch/use_cases.py @@ -41,12 +41,16 @@ def ingest_data_submission( ctx.logger.error('Provider with that name does not exist') return - ctx.storage.upload(file_path, f'{provider.name}_{file_path}') - url = ctx.storage.get_file_url(file_path) - - submission = DataSubmission(file_path, url, provider) - ctx.submissions.add(submission) - ctx.logger.info('Submission added') + try: + ctx.storage.upload(file_path, f'{provider.name}_{file_path}') + url = ctx.storage.get_file_url(file_path) + + submission = DataSubmission(file_path, url, provider) + ctx.submissions.add(submission) + ctx.logger.info('Submission added') + except Exception as e: + ctx.storage.delete(file_path) + ctx.logger.error(f'Failed to process submission: {e}') def list_data_submissions_by_provider(