diff --git a/Dockerfile b/Dockerfile index f503a9a..0b2d984 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,6 @@ RUN apk add git build-base musl-dev linux-headers WORKDIR /app ADD . . -RUN pip install --editable ".[dev,test,docs,ui]" +RUN pip install --editable ".[ui]" ENTRYPOINT ["aross-stations-db"] diff --git a/README.md b/README.md index 442c3a4..1ecaf90 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,9 @@ Now, you can use Adminer's SQL Query menu to select some data:
Example SQL query +This query returns 13 results at the time of this writing, but it may return more at a +future time. + ```sql select event.* from event @@ -183,6 +186,8 @@ where ) AND event.time_start > '2023-01-01'::date AND event.time_end < '2023-06-01'::date + AND event.snow_on_ground + AND event.rain_hours >= 1 ; ```
@@ -236,7 +241,9 @@ docker compose down ##### Database -Remove the `_db/` directory to start over with a fresh database. +There is no need to remove the `_data/` directory to start over with a fresh database; the +`init` CLI command will do that for you! However, if you want to completely remove the +database to save space on your system, you may want to delete the `_data/` directory. ##### Containers and images diff --git a/noxfile.py b/noxfile.py index b3e53fe..3689473 100644 --- a/noxfile.py +++ b/noxfile.py @@ -15,14 +15,14 @@ @nox.session def typecheck(session: nox.Session) -> None: - session.install(".[test]") + session.install("--editable", ".[test]") session.run("mypy") @nox.session def test(session: nox.Session) -> None: """Run the unit and regular tests.""" - session.install(".[test]") + session.install("--editable", ".[test]") session.run("pytest", *session.posargs) @@ -45,7 +45,7 @@ def build_docs(session: nox.Session) -> None: extra_installs = ["sphinx-autobuild"] if args.serve else [] - session.install("-e.[docs]", *extra_installs) + session.install("--editable", ".[docs]", *extra_installs) session.chdir("docs") if args.builder == "linkcheck": diff --git a/src/aross_stations_db/api/v1/climatology.py b/src/aross_stations_db/api/v1/climatology.py index 3a10f50..99452d4 100644 --- a/src/aross_stations_db/api/v1/climatology.py +++ b/src/aross_stations_db/api/v1/climatology.py @@ -10,7 +10,7 @@ ClimatologyJsonElement, climatology_query_results_to_json, ) -from aross_stations_db.query import climatology_query +from aross_stations_db.db.query import climatology_query router = APIRouter() diff --git a/src/aross_stations_db/api/v1/stations.py b/src/aross_stations_db/api/v1/stations.py index 47ac6c1..e7e8bdb 100644 --- a/src/aross_stations_db/api/v1/stations.py +++ b/src/aross_stations_db/api/v1/stations.py @@ -10,7 +10,7 @@ StationsGeoJson, stations_query_results_to_geojson, ) -from aross_stations_db.query import stations_query +from aross_stations_db.db.query import stations_query router = APIRouter() diff --git a/src/aross_stations_db/api/v1/timeseries.py b/src/aross_stations_db/api/v1/timeseries.py index 30cdc48..65a94e9 100644 --- a/src/aross_stations_db/api/v1/timeseries.py +++ b/src/aross_stations_db/api/v1/timeseries.py @@ -10,7 +10,7 @@ TimeseriesJsonElement, timeseries_query_results_to_json, ) -from aross_stations_db.query import timeseries_query +from aross_stations_db.db.query import timeseries_query router = APIRouter() diff --git a/src/aross_stations_db/cli.py b/src/aross_stations_db/cli.py index 00aa499..dc0cace 100644 --- a/src/aross_stations_db/cli.py +++ b/src/aross_stations_db/cli.py @@ -3,10 +3,10 @@ from sqlalchemy.orm import Session from aross_stations_db.config import CliLoadSettings, Settings -from aross_stations_db.db import ( - create_tables, +from aross_stations_db.db.setup import ( load_events, load_stations, + recreate_tables, ) from aross_stations_db.source_data import ( get_events, @@ -21,15 +21,15 @@ def cli() -> None: @cli.command def init() -> None: - """Create the database tables.""" + """Create the database tables, dropping any that pre-exist.""" # TODO: False-positive. Remove type-ignore. # See: https://github.com/pydantic/pydantic/issues/6713 config = Settings() # type:ignore[call-arg] with Session(config.db_engine) as db_session: - create_tables(db_session) + recreate_tables(db_session) - logger.success("Tables created") + logger.success("Database initialized") @cli.command diff --git a/src/aross_stations_db/db/__init__.py b/src/aross_stations_db/db/__init__.py index 092a777..e69de29 100644 --- a/src/aross_stations_db/db/__init__.py +++ b/src/aross_stations_db/db/__init__.py @@ -1,50 +0,0 @@ -import datetime as dt -from collections.abc import Iterator - -from sqlalchemy.orm import Session - -from aross_stations_db.db.tables import Base, Event, Station - - -def create_tables(session: Session) -> None: - """Create all tables. - - IMPORTANT: Because this data is purely derived and can be loaded in a reasonable - amount of time, there is no need to ever drop tables or migrate data. We - just start with a fresh database every time we need to change the - structure. - """ - Base.metadata.create_all(session.get_bind()) - - -def load_stations(stations: list[dict[str, str]], *, session: Session) -> None: - session.add_all( - [ - Station( - id=station["stid"], - name=station["station_name"], - country_code=station["country"], - location=_station_location_wkt(station), - ) - for station in stations - ] - ) - session.commit() - - -def load_events(events: Iterator[dict[str, str]], *, session: Session) -> None: - session.add_all( - [ - Event( - station_id=event["station_id"], - time_start=dt.datetime.fromisoformat(event["start"]), - time_end=dt.datetime.fromisoformat(event["end"]), - ) - for event in events - ] - ) - session.commit() - - -def _station_location_wkt(station: dict[str, str]) -> str: - return f"SRID=4326;POINT({station['longitude']} {station['latitude']})" diff --git a/src/aross_stations_db/query.py b/src/aross_stations_db/db/query.py similarity index 100% rename from src/aross_stations_db/query.py rename to src/aross_stations_db/db/query.py diff --git a/src/aross_stations_db/db/setup.py b/src/aross_stations_db/db/setup.py new file mode 100644 index 0000000..53c94cf --- /dev/null +++ b/src/aross_stations_db/db/setup.py @@ -0,0 +1,90 @@ +import datetime as dt +from collections.abc import Iterator + +from sqlalchemy import MetaData +from sqlalchemy.orm import Session + +from aross_stations_db.db.tables import Base, Event, Station + + +def _tables_to_drop(session: Session) -> MetaData: + """Select our application's tables for dropping. + + This isn't as simple as it sounds, because we want to drop tables, even if we've + changed the name of the table, without dealing with migrations. We just want to + start over and we don't want the user to have to know to delete the database files. + + WARNING: This function is fragile! If extensions other than PostGIS are installed, + or PostGIS changes its use of system tables, or this is deployed using a different + database than the official PostGIS docker image, it may not work! + """ + # NOTE: "public" is the default schema. Because we're using the out-of-the-box + # postgres container image config, this works. + reflected_md = MetaData(schema="public") + reflected_md.reflect( + bind=session.get_bind(), + only=lambda tablename, _: tablename != "spatial_ref_sys", + ) + return reflected_md + + +def recreate_tables(session: Session) -> None: + """Create all application tables, dropping any pre-existing tables.""" + tables_to_drop = _tables_to_drop(session) + tables_to_drop.drop_all(bind=session.get_bind()) + + Base.metadata.create_all(session.get_bind()) + session.commit() + + +def load_stations(stations: list[dict[str, str]], *, session: Session) -> None: + session.add_all( + [ + Station( + id=station["stid"], + name=station["station_name"], + country_code=station["country"], + # HACK: Passing a string for location is "wrong" here, but it's working. + # Something is being handled implicitly to convert the string to binary + # (WKB). + location=_station_location_wkt(station), # type: ignore[arg-type] + ) + for station in stations + ] + ) + session.commit() + + +def load_events(events: Iterator[dict[str, str]], *, session: Session) -> None: + session.add_all( + [ + Event( + station_id=event["station_id"], + time_start=dt.datetime.fromisoformat(event["start"]), + time_end=dt.datetime.fromisoformat(event["end"]), + snow_on_ground=_snow_on_ground_status(event["sog"]), + rain_hours=int(event["RA"]), + freezing_rain_hours=int(event["FZRA"]), + solid_precipitation_hours=int(event["SOLID"]), + unknown_precipitation_hours=int(event["UP"]), + ) + for event in events + ] + ) + session.commit() + + +def _station_location_wkt(station: dict[str, str]) -> str: + return f"SRID=4326;POINT({station['longitude']} {station['latitude']})" + + +def _snow_on_ground_status(sog_str: str) -> bool | None: + if sog_str == "": + return None + if sog_str.lower() == "true": + return True + if sog_str.lower() == "false": + return False + + msg = f"Unexpected snow-on-ground value: {sog_str}" + raise RuntimeError(msg) diff --git a/src/aross_stations_db/db/tables.py b/src/aross_stations_db/db/tables.py index ab88cc7..54fd3c3 100644 --- a/src/aross_stations_db/db/tables.py +++ b/src/aross_stations_db/db/tables.py @@ -6,12 +6,13 @@ from sqlalchemy.orm import ( DeclarativeBase, Mapped, + MappedAsDataclass, mapped_column, relationship, ) -class Base(DeclarativeBase): +class Base(MappedAsDataclass, DeclarativeBase): pass @@ -40,5 +41,18 @@ class Event(Base): time_start: Mapped[dt.datetime] = mapped_column(primary_key=True) time_end: Mapped[dt.datetime] = mapped_column(primary_key=True) - # TODO: More fields: duration,RA,UP,FZRA,SOLID,t2m_mean,t2m_min,t2m_max,sog - # Don't think we need to keep duration. + # Was there snow on the ground during this event? Only available after 2004 for some + # stations, never available for other stations. + snow_on_ground: Mapped[bool | None] = mapped_column(index=True) + + # During how many hours of this event was rain (or other event type) detected? These + # precipitation types are detected by a horizontal beam that the precipitation falls + # through. + rain_hours: Mapped[int] = mapped_column(index=True) + freezing_rain_hours: Mapped[int] = mapped_column(index=True) + # Solid precipitation = snow, ice, graupel, hail, etc. + solid_precipitation_hours: Mapped[int] = mapped_column(index=True) + unknown_precipitation_hours: Mapped[int] = mapped_column(index=True) + + # TODO: More fields: duration,t2m_mean,t2m_min,t2m_max + # I don't think we need to keep duration.