diff --git a/schedview/collect/opsim.py b/schedview/collect/opsim.py index 132399ba..c43f7811 100644 --- a/schedview/collect/opsim.py +++ b/schedview/collect/opsim.py @@ -23,6 +23,45 @@ def _all_visits_columns(): return current_cols.union(backwards_cols) +def _normalize_opsim_columns(opsim_rp: ResourcePath, dbcols: list[str]): + # At least one opsim column has been renamed since the start of + # simulations. The mapping between new and old names can be found in + # rubin_scheduler.scheduler.utils.SchemaConverter.backwards. + + # We want our queries to work the same whether run on simulation from + # before or after the name change. So, if a requested column is missing, + # see if it is the renamed value of one that used to exist, and if so, + # use the old column name instead. + + # In addition to returning the list of columns with the replaced column + # name, return the mapping so that a table's column headings can be + # updated from the old name used to the new (requested) name. + + with opsim_rp.as_local() as local_obs_path: + with sqlite3.connect(local_obs_path.ospath) as sim_connection: + query = "SELECT name FROM PRAGMA_TABLE_INFO('observations');" + present_columns = set(pd.read_sql(query, sim_connection).name.values) + + new_columns = [] + used_column_map = {} + backwards_column_map = {v: k for k, v in SchemaConverter().backwards.items()} + for column in dbcols: + if column in present_columns: + new_columns.append(column) + elif column in backwards_column_map: + old_column = backwards_column_map[column] + if old_column in present_columns: + warn(f"Column {column} not found in {opsim_rp}, using deprecated {old_column} instead") + used_column_map[old_column] = column + new_columns.append(old_column) + else: + warn(f"Neither column {column} nor deprecated {old_column} found in {opsim_rp}, skipping.") + else: + warn(f"Column {column} not found in {opsim_rp}, skipping.") + + return new_columns, used_column_map + + def read_opsim( opsim_uri, start_time=None, @@ -87,13 +126,21 @@ def read_opsim( with sqlite3.connect(local_obs_path.ospath) as sim_connection: if dbcols is None: col_query = "SELECT name FROM PRAGMA_TABLE_INFO('observations')" - dbcols = [ + raw_dbcols = [ c for c in pd.read_sql(col_query, sim_connection).name if c in _all_visits_columns() ] + # Update any outdated column names + backwards = SchemaConverter().backwards + dbcols = [(backwards[c] if c in backwards else c) for c in raw_dbcols] + + norm_columns, used_column_map = _normalize_opsim_columns(obs_path, dbcols) + try: try: - visits = pd.DataFrame(maf.get_sim_data(sim_connection, constraint, dbcols, **kwargs)) + visits = pd.DataFrame( + maf.get_sim_data(sim_connection, constraint, norm_columns, **kwargs) + ) except UserWarning: warn("No visits match constraints.") visits = ( @@ -110,13 +157,17 @@ def read_opsim( f"Argument {list(kwargs)[0]} not supported without rubin_sim installed" ) - query = f'SELECT {", ".join(dbcols)} FROM observations' + query = f'SELECT {", ".join(norm_columns)} FROM observations' if constraint: query += f" WHERE {constraint}" visits = pd.read_sql(query, sim_connection) else: raise e + # If we replaced modern columns with legacy ones in the query, + # update the column names. + visits.rename(columns=used_column_map, inplace=True) + if "start_date" not in visits: if "observationStartDatetime64" in visits: visits["start_date"] = pd.to_datetime( @@ -127,7 +178,6 @@ def read_opsim( visits.observationStartMJD + 2400000.5, origin="julian", unit="D", utc=True ) - visits.rename(columns=SchemaConverter().backwards, inplace=True) visits.set_index("observationId", inplace=True) return visits @@ -150,9 +200,9 @@ def read_ddf_visits( The start time for visits to be loaded end_time : `str`, `astropy.time.Time` The end time for visits ot be loaded - dbcols : `None` or `list` [`str`] - Columns required from the database. Defaults to None, which queries - all columns known to rubin_scheduler. + dbcols : `Note` oc `list` [`str`] + Columns required from the database. Defaults to None, + which uses all columns in the database. stackers : `list` [`rubin_sim.maf.stackers`], optional Stackers to be used to generate additional columns. @@ -161,7 +211,6 @@ def read_ddf_visits( visits : `pandas.DataFrame` The visits and their parameters. """ - ddf_field_names = tuple(ddf_locations().keys()) constraint = f"target IN {tuple(field_name for field_name in ddf_field_names)}" visits = read_opsim( diff --git a/schedview/compute/visits.py b/schedview/compute/visits.py index b3bb6a1d..717e5a65 100644 --- a/schedview/compute/visits.py +++ b/schedview/compute/visits.py @@ -196,7 +196,7 @@ def accum_teff_by_night(visits): The effective exposure time (`float`). ``"filter"`` The filter (`str`). - ``"target"`` + ``"target_name"`` The target name (`str`). Returns @@ -220,11 +220,11 @@ def accum_teff_by_night(visits): f"{teff_col} column not found for visits; use the rubin_sim.maf.stackers.TeffStacker." ) - nightly_teff = visits.groupby(["target", day_obs_col, "filter"])[teff_col].sum().reset_index() + nightly_teff = visits.groupby(["target_name", day_obs_col, "filter"])[teff_col].sum().reset_index() nightly_teff = ( - nightly_teff.pivot(index=["target", day_obs_col], columns="filter", values=teff_col) + nightly_teff.pivot(index=["target_name", day_obs_col], columns="filter", values=teff_col) .fillna(0.0) .reset_index() - .set_index(["target", day_obs_col]) + .set_index(["target_name", day_obs_col]) ) return nightly_teff diff --git a/schedview/data/opsim_prenight_2024-07-30_1.db b/schedview/data/opsim_prenight_2024-07-30_1.db new file mode 100644 index 00000000..010f52dc Binary files /dev/null and b/schedview/data/opsim_prenight_2024-07-30_1.db differ diff --git a/schedview/data/opsim_prenight_2024-08-13_1.db b/schedview/data/opsim_prenight_2024-08-13_1.db new file mode 100644 index 00000000..e2862fd5 Binary files /dev/null and b/schedview/data/opsim_prenight_2024-08-13_1.db differ diff --git a/tests/test_collect_opsim.py b/tests/test_collect_opsim.py new file mode 100644 index 00000000..5fa53a12 --- /dev/null +++ b/tests/test_collect_opsim.py @@ -0,0 +1,16 @@ +import unittest + +import schedview.collect.opsim + + +class TestCollectOpsim(unittest.TestCase): + def test_read_opsim(self): + test_rp = "resource://schedview/data/opsim_prenight_2024-08-13_1.db" + visits = schedview.collect.opsim.read_opsim(test_rp) + assert "target_name" in visits.columns + + # 'target_name' used to be called 'target'. + # Verify that read_opsim finds and renames it correctly + old_test_rp = "resource://schedview/data/opsim_prenight_2024-07-30_1.db" + old_visits = schedview.collect.opsim.read_opsim(old_test_rp) + assert "target_name" in old_visits.columns diff --git a/tests/test_compute_visits.py b/tests/test_compute_visits.py index 1ce2ca98..7d6e4747 100644 --- a/tests/test_compute_visits.py +++ b/tests/test_compute_visits.py @@ -51,7 +51,7 @@ def test_accum_teff_by_night(self): visits = schedview.collect.read_ddf_visits(self.visit_db_fname, stackers=stackers) night_teff = schedview.compute.visits.accum_teff_by_night(visits) - self.assertEqual(night_teff.index.names[0], "target") + self.assertEqual(night_teff.index.names[0], "target_name") self.assertEqual(night_teff.index.names[1], "day_obs_iso8601") for col_name in night_teff.columns: self.assertTrue(col_name in "ugrizy")