From 62787813e51085f5b305b108c499999425bd8ef3 Mon Sep 17 00:00:00 2001 From: Christina Gosnell Date: Thu, 15 Feb 2024 11:24:29 -0700 Subject: [PATCH] Make an EIA860m Changelog table (#3331) * first draft of all eia860m extraction * first draft of transform process: runs through existing 860 transform does not do changelog yet * simplify replaces in tranform and add changelog dropdupes * first pass of adding full transform for eia860 and schema * Fix bad monthly expand_timeseries * [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci * clean up settings and add alembic migration * fix the settings grabbing in eia860 settings with new eia860m setup * Convert 860m table into db table * make a new 860m settings class, dont pass in report_date for 860, & use the right table name * remove FK relationships to the changelog table and make expand_timeseries have a dec unit test * change eia86m io manager to our cool new db + parquet manager * add docs and fix b4by missp3lls and change tbl name * add migration and update fast 860m month post new 860m integration * alembic migrations * [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci * Fix the working partitions in settings and helpers * Fix settings partitions and be better about selecting 860m only columns * Update nightly build script to distribute parquet (#3399) * Update nightly build script to distribute parquet * Fix logging cut-and-paste error * Name parquet distribution success variable like all the others * [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Zane Selvans --- ..._to_.py => 9dfb4295511e_wipe_and_reset.py} | 76 +++++++++--- src/pudl/etl/__init__.py | 2 + src/pudl/extract/eia860.py | 5 +- src/pudl/extract/eia860m.py | 55 ++++++++- src/pudl/helpers.py | 24 ++-- src/pudl/metadata/fields.py | 4 + src/pudl/metadata/resources/eia.py | 13 +- src/pudl/metadata/resources/eia860.py | 3 + src/pudl/metadata/resources/eia860m.py | 56 +++++++++ src/pudl/metadata/sources.py | 5 +- .../column_maps/generator_existing.csv | 8 +- .../column_maps/generator_proposed.csv | 4 +- src/pudl/package_data/settings/etl_fast.yml | 2 + src/pudl/package_data/settings/etl_full.yml | 2 + src/pudl/settings.py | 76 ++++++------ src/pudl/transform/__init__.py | 1 + src/pudl/transform/eia860.py | 109 +++++++---------- src/pudl/transform/eia860m.py | 113 ++++++++++++++++++ ...ries_fillin_through_month_expected_out.csv | 71 +++++------ test/unit/helpers_test.py | 4 +- test/unit/settings_test.py | 21 +++- 21 files changed, 464 insertions(+), 190 deletions(-) rename migrations/versions/{c716ec617469_wipe_alembic_migrations_due_to_.py => 9dfb4295511e_wipe_and_reset.py} (98%) create mode 100644 src/pudl/metadata/resources/eia860m.py create mode 100644 src/pudl/transform/eia860m.py diff --git a/migrations/versions/c716ec617469_wipe_alembic_migrations_due_to_.py b/migrations/versions/9dfb4295511e_wipe_and_reset.py similarity index 98% rename from migrations/versions/c716ec617469_wipe_alembic_migrations_due_to_.py rename to migrations/versions/9dfb4295511e_wipe_and_reset.py index 9c32078e8f..cc0aeb436c 100644 --- a/migrations/versions/c716ec617469_wipe_alembic_migrations_due_to_.py +++ b/migrations/versions/9dfb4295511e_wipe_and_reset.py @@ -1,16 +1,16 @@ -"""Wipe alembic migrations due to conflicting heads. +"""wipe and reset -Revision ID: c716ec617469 +Revision ID: 9dfb4295511e Revises: -Create Date: 2024-01-30 18:54:34.893455 +Create Date: 2024-02-14 14:31:28.957138 """ -import sqlalchemy as sa from alembic import op +import sqlalchemy as sa from sqlalchemy.dialects import sqlite # revision identifiers, used by Alembic. -revision = 'c716ec617469' +revision = '9dfb4295511e' down_revision = None branch_labels = None depends_on = None @@ -278,22 +278,22 @@ def upgrade() -> None: sa.PrimaryKeyConstraint('code', name=op.f('pk_core_pudl__codes_data_maturities')) ) op.create_table('core_pudl__codes_datasources', - sa.Column('datasource', sa.Enum('censusdp1tract', 'eia176', 'eia191', 'eia757a', 'eia860', 'eia860m', 'eia861', 'eia923', 'eia_bulk_elec', 'eiawater', 'epacems', 'epacamd_eia', 'ferc1', 'ferc2', 'ferc6', 'ferc60', 'ferc714', 'ferceqr', 'mshamines', 'phmsagas', 'pudl'), nullable=False, comment='Code identifying a dataset available within PUDL.'), + sa.Column('datasource', sa.Enum('censusdp1tract', 'eia176', 'eia191', 'eia757a', 'eia860', 'eia860m', 'eia861', 'eia923', 'eia_bulk_elec', 'eiawater', 'epacems', 'epacamd_eia', 'ferc1', 'ferc2', 'ferc6', 'ferc60', 'ferc714', 'ferceqr', 'mshamines', 'phmsagas', 'pudl', 'sec10k'), nullable=False, comment='Code identifying a dataset available within PUDL.'), sa.Column('partitions', sa.Text(), nullable=True, comment='The data parititions used to generate this instance of the database.'), sa.Column('doi', sa.Text(), nullable=True, comment='Unique digitial object identifier of Zenodo archive.'), sa.Column('pudl_version', sa.Text(), nullable=True, comment='The version of PUDL used to generate this database.'), sa.PrimaryKeyConstraint('datasource', name=op.f('pk_core_pudl__codes_datasources')) ) op.create_table('core_pudl__codes_subdivisions', - sa.Column('country_code', sa.Enum('USA', 'CAN'), nullable=False, comment='Three letter ISO-3166 country code (e.g. USA or CAN).'), + sa.Column('country_code', sa.Enum('CAN', 'USA'), nullable=False, comment='Three letter ISO-3166 country code (e.g. USA or CAN).'), sa.Column('country_name', sa.Text(), nullable=True, comment='Full country name (e.g. United States of America).'), - sa.Column('subdivision_code', sa.Enum('WV', 'GA', 'TX', 'UT', 'CT', 'DE', 'QC', 'SD', 'KS', 'SK', 'AR', 'IN', 'IL', 'PE', 'GU', 'NB', 'NV', 'SC', 'LA', 'AK', 'AL', 'NJ', 'MN', 'PA', 'DC', 'MA', 'MS', 'MD', 'NC', 'NH', 'ON', 'NM', 'MT', 'FL', 'KY', 'HI', 'NY', 'PR', 'MI', 'VI', 'TN', 'VT', 'VA', 'MP', 'ND', 'NT', 'BC', 'AB', 'WI', 'MB', 'IA', 'NE', 'WA', 'ID', 'OH', 'ME', 'NS', 'AZ', 'CA', 'NL', 'NU', 'YT', 'RI', 'OR', 'AS', 'OK', 'WY', 'CO', 'MO'), nullable=False, comment='Two-letter ISO-3166 political subdivision code (e.g. US state or Canadian provice abbreviations like CA or AB).'), + sa.Column('subdivision_code', sa.Enum('OH', 'MT', 'NH', 'WV', 'GA', 'IN', 'KS', 'MA', 'MB', 'MD', 'NC', 'CT', 'BC', 'ND', 'NE', 'NV', 'UT', 'YT', 'AS', 'PR', 'IL', 'AZ', 'MI', 'CO', 'AR', 'NT', 'VI', 'QC', 'FL', 'WI', 'CA', 'NB', 'MN', 'KY', 'NJ', 'PE', 'NS', 'AB', 'NL', 'HI', 'SC', 'NM', 'OR', 'AK', 'GU', 'MO', 'TX', 'WA', 'IA', 'LA', 'PA', 'VT', 'RI', 'DE', 'WY', 'OK', 'DC', 'SD', 'MP', 'SK', 'TN', 'AL', 'MS', 'ON', 'VA', 'ME', 'NY', 'NU', 'ID'), nullable=False, comment='Two-letter ISO-3166 political subdivision code (e.g. US state or Canadian provice abbreviations like CA or AB).'), sa.Column('subdivision_name', sa.Text(), nullable=True, comment='Full name of political subdivision (e.g. US state or Canadian province names like California or Alberta.'), sa.Column('subdivision_type', sa.Text(), nullable=True, comment='ISO-3166 political subdivision type. E.g. state, province, outlying_area.'), sa.Column('timezone_approx', sa.Enum('Africa/Abidjan', 'Africa/Accra', 'Africa/Addis_Ababa', 'Africa/Algiers', 'Africa/Asmara', 'Africa/Asmera', 'Africa/Bamako', 'Africa/Bangui', 'Africa/Banjul', 'Africa/Bissau', 'Africa/Blantyre', 'Africa/Brazzaville', 'Africa/Bujumbura', 'Africa/Cairo', 'Africa/Casablanca', 'Africa/Ceuta', 'Africa/Conakry', 'Africa/Dakar', 'Africa/Dar_es_Salaam', 'Africa/Djibouti', 'Africa/Douala', 'Africa/El_Aaiun', 'Africa/Freetown', 'Africa/Gaborone', 'Africa/Harare', 'Africa/Johannesburg', 'Africa/Juba', 'Africa/Kampala', 'Africa/Khartoum', 'Africa/Kigali', 'Africa/Kinshasa', 'Africa/Lagos', 'Africa/Libreville', 'Africa/Lome', 'Africa/Luanda', 'Africa/Lubumbashi', 'Africa/Lusaka', 'Africa/Malabo', 'Africa/Maputo', 'Africa/Maseru', 'Africa/Mbabane', 'Africa/Mogadishu', 'Africa/Monrovia', 'Africa/Nairobi', 'Africa/Ndjamena', 'Africa/Niamey', 'Africa/Nouakchott', 'Africa/Ouagadougou', 'Africa/Porto-Novo', 'Africa/Sao_Tome', 'Africa/Timbuktu', 'Africa/Tripoli', 'Africa/Tunis', 'Africa/Windhoek', 'America/Adak', 'America/Anchorage', 'America/Anguilla', 'America/Antigua', 'America/Araguaina', 'America/Argentina/Buenos_Aires', 'America/Argentina/Catamarca', 'America/Argentina/ComodRivadavia', 'America/Argentina/Cordoba', 'America/Argentina/Jujuy', 'America/Argentina/La_Rioja', 'America/Argentina/Mendoza', 'America/Argentina/Rio_Gallegos', 'America/Argentina/Salta', 'America/Argentina/San_Juan', 'America/Argentina/San_Luis', 'America/Argentina/Tucuman', 'America/Argentina/Ushuaia', 'America/Aruba', 'America/Asuncion', 'America/Atikokan', 'America/Atka', 'America/Bahia', 'America/Bahia_Banderas', 'America/Barbados', 'America/Belem', 'America/Belize', 'America/Blanc-Sablon', 'America/Boa_Vista', 'America/Bogota', 'America/Boise', 'America/Buenos_Aires', 'America/Cambridge_Bay', 'America/Campo_Grande', 'America/Cancun', 'America/Caracas', 'America/Catamarca', 'America/Cayenne', 'America/Cayman', 'America/Chicago', 'America/Chihuahua', 'America/Ciudad_Juarez', 'America/Coral_Harbour', 'America/Cordoba', 'America/Costa_Rica', 'America/Creston', 'America/Cuiaba', 'America/Curacao', 'America/Danmarkshavn', 'America/Dawson', 'America/Dawson_Creek', 'America/Denver', 'America/Detroit', 'America/Dominica', 'America/Edmonton', 'America/Eirunepe', 'America/El_Salvador', 'America/Ensenada', 'America/Fort_Nelson', 'America/Fort_Wayne', 'America/Fortaleza', 'America/Glace_Bay', 'America/Godthab', 'America/Goose_Bay', 'America/Grand_Turk', 'America/Grenada', 'America/Guadeloupe', 'America/Guatemala', 'America/Guayaquil', 'America/Guyana', 'America/Halifax', 'America/Havana', 'America/Hermosillo', 'America/Indiana/Indianapolis', 'America/Indiana/Knox', 'America/Indiana/Marengo', 'America/Indiana/Petersburg', 'America/Indiana/Tell_City', 'America/Indiana/Vevay', 'America/Indiana/Vincennes', 'America/Indiana/Winamac', 'America/Indianapolis', 'America/Inuvik', 'America/Iqaluit', 'America/Jamaica', 'America/Jujuy', 'America/Juneau', 'America/Kentucky/Louisville', 'America/Kentucky/Monticello', 'America/Knox_IN', 'America/Kralendijk', 'America/La_Paz', 'America/Lima', 'America/Los_Angeles', 'America/Louisville', 'America/Lower_Princes', 'America/Maceio', 'America/Managua', 'America/Manaus', 'America/Marigot', 'America/Martinique', 'America/Matamoros', 'America/Mazatlan', 'America/Mendoza', 'America/Menominee', 'America/Merida', 'America/Metlakatla', 'America/Mexico_City', 'America/Miquelon', 'America/Moncton', 'America/Monterrey', 'America/Montevideo', 'America/Montreal', 'America/Montserrat', 'America/Nassau', 'America/New_York', 'America/Nipigon', 'America/Nome', 'America/Noronha', 'America/North_Dakota/Beulah', 'America/North_Dakota/Center', 'America/North_Dakota/New_Salem', 'America/Nuuk', 'America/Ojinaga', 'America/Panama', 'America/Pangnirtung', 'America/Paramaribo', 'America/Phoenix', 'America/Port-au-Prince', 'America/Port_of_Spain', 'America/Porto_Acre', 'America/Porto_Velho', 'America/Puerto_Rico', 'America/Punta_Arenas', 'America/Rainy_River', 'America/Rankin_Inlet', 'America/Recife', 'America/Regina', 'America/Resolute', 'America/Rio_Branco', 'America/Rosario', 'America/Santa_Isabel', 'America/Santarem', 'America/Santiago', 'America/Santo_Domingo', 'America/Sao_Paulo', 'America/Scoresbysund', 'America/Shiprock', 'America/Sitka', 'America/St_Barthelemy', 'America/St_Johns', 'America/St_Kitts', 'America/St_Lucia', 'America/St_Thomas', 'America/St_Vincent', 'America/Swift_Current', 'America/Tegucigalpa', 'America/Thule', 'America/Thunder_Bay', 'America/Tijuana', 'America/Toronto', 'America/Tortola', 'America/Vancouver', 'America/Virgin', 'America/Whitehorse', 'America/Winnipeg', 'America/Yakutat', 'America/Yellowknife', 'Antarctica/Casey', 'Antarctica/Davis', 'Antarctica/DumontDUrville', 'Antarctica/Macquarie', 'Antarctica/Mawson', 'Antarctica/McMurdo', 'Antarctica/Palmer', 'Antarctica/Rothera', 'Antarctica/South_Pole', 'Antarctica/Syowa', 'Antarctica/Troll', 'Antarctica/Vostok', 'Arctic/Longyearbyen', 'Asia/Aden', 'Asia/Almaty', 'Asia/Amman', 'Asia/Anadyr', 'Asia/Aqtau', 'Asia/Aqtobe', 'Asia/Ashgabat', 'Asia/Ashkhabad', 'Asia/Atyrau', 'Asia/Baghdad', 'Asia/Bahrain', 'Asia/Baku', 'Asia/Bangkok', 'Asia/Barnaul', 'Asia/Beirut', 'Asia/Bishkek', 'Asia/Brunei', 'Asia/Calcutta', 'Asia/Chita', 'Asia/Choibalsan', 'Asia/Chongqing', 'Asia/Chungking', 'Asia/Colombo', 'Asia/Dacca', 'Asia/Damascus', 'Asia/Dhaka', 'Asia/Dili', 'Asia/Dubai', 'Asia/Dushanbe', 'Asia/Famagusta', 'Asia/Gaza', 'Asia/Harbin', 'Asia/Hebron', 'Asia/Ho_Chi_Minh', 'Asia/Hong_Kong', 'Asia/Hovd', 'Asia/Irkutsk', 'Asia/Istanbul', 'Asia/Jakarta', 'Asia/Jayapura', 'Asia/Jerusalem', 'Asia/Kabul', 'Asia/Kamchatka', 'Asia/Karachi', 'Asia/Kashgar', 'Asia/Kathmandu', 'Asia/Katmandu', 'Asia/Khandyga', 'Asia/Kolkata', 'Asia/Krasnoyarsk', 'Asia/Kuala_Lumpur', 'Asia/Kuching', 'Asia/Kuwait', 'Asia/Macao', 'Asia/Macau', 'Asia/Magadan', 'Asia/Makassar', 'Asia/Manila', 'Asia/Muscat', 'Asia/Nicosia', 'Asia/Novokuznetsk', 'Asia/Novosibirsk', 'Asia/Omsk', 'Asia/Oral', 'Asia/Phnom_Penh', 'Asia/Pontianak', 'Asia/Pyongyang', 'Asia/Qatar', 'Asia/Qostanay', 'Asia/Qyzylorda', 'Asia/Rangoon', 'Asia/Riyadh', 'Asia/Saigon', 'Asia/Sakhalin', 'Asia/Samarkand', 'Asia/Seoul', 'Asia/Shanghai', 'Asia/Singapore', 'Asia/Srednekolymsk', 'Asia/Taipei', 'Asia/Tashkent', 'Asia/Tbilisi', 'Asia/Tehran', 'Asia/Tel_Aviv', 'Asia/Thimbu', 'Asia/Thimphu', 'Asia/Tokyo', 'Asia/Tomsk', 'Asia/Ujung_Pandang', 'Asia/Ulaanbaatar', 'Asia/Ulan_Bator', 'Asia/Urumqi', 'Asia/Ust-Nera', 'Asia/Vientiane', 'Asia/Vladivostok', 'Asia/Yakutsk', 'Asia/Yangon', 'Asia/Yekaterinburg', 'Asia/Yerevan', 'Atlantic/Azores', 'Atlantic/Bermuda', 'Atlantic/Canary', 'Atlantic/Cape_Verde', 'Atlantic/Faeroe', 'Atlantic/Faroe', 'Atlantic/Jan_Mayen', 'Atlantic/Madeira', 'Atlantic/Reykjavik', 'Atlantic/South_Georgia', 'Atlantic/St_Helena', 'Atlantic/Stanley', 'Australia/ACT', 'Australia/Adelaide', 'Australia/Brisbane', 'Australia/Broken_Hill', 'Australia/Canberra', 'Australia/Currie', 'Australia/Darwin', 'Australia/Eucla', 'Australia/Hobart', 'Australia/LHI', 'Australia/Lindeman', 'Australia/Lord_Howe', 'Australia/Melbourne', 'Australia/NSW', 'Australia/North', 'Australia/Perth', 'Australia/Queensland', 'Australia/South', 'Australia/Sydney', 'Australia/Tasmania', 'Australia/Victoria', 'Australia/West', 'Australia/Yancowinna', 'Brazil/Acre', 'Brazil/DeNoronha', 'Brazil/East', 'Brazil/West', 'CET', 'CST6CDT', 'Canada/Atlantic', 'Canada/Central', 'Canada/Eastern', 'Canada/Mountain', 'Canada/Newfoundland', 'Canada/Pacific', 'Canada/Saskatchewan', 'Canada/Yukon', 'Chile/Continental', 'Chile/EasterIsland', 'Cuba', 'EET', 'EST', 'EST5EDT', 'Egypt', 'Eire', 'Etc/GMT', 'Etc/GMT+0', 'Etc/GMT+1', 'Etc/GMT+10', 'Etc/GMT+11', 'Etc/GMT+12', 'Etc/GMT+2', 'Etc/GMT+3', 'Etc/GMT+4', 'Etc/GMT+5', 'Etc/GMT+6', 'Etc/GMT+7', 'Etc/GMT+8', 'Etc/GMT+9', 'Etc/GMT-0', 'Etc/GMT-1', 'Etc/GMT-10', 'Etc/GMT-11', 'Etc/GMT-12', 'Etc/GMT-13', 'Etc/GMT-14', 'Etc/GMT-2', 'Etc/GMT-3', 'Etc/GMT-4', 'Etc/GMT-5', 'Etc/GMT-6', 'Etc/GMT-7', 'Etc/GMT-8', 'Etc/GMT-9', 'Etc/GMT0', 'Etc/Greenwich', 'Etc/UCT', 'Etc/UTC', 'Etc/Universal', 'Etc/Zulu', 'Europe/Amsterdam', 'Europe/Andorra', 'Europe/Astrakhan', 'Europe/Athens', 'Europe/Belfast', 'Europe/Belgrade', 'Europe/Berlin', 'Europe/Bratislava', 'Europe/Brussels', 'Europe/Bucharest', 'Europe/Budapest', 'Europe/Busingen', 'Europe/Chisinau', 'Europe/Copenhagen', 'Europe/Dublin', 'Europe/Gibraltar', 'Europe/Guernsey', 'Europe/Helsinki', 'Europe/Isle_of_Man', 'Europe/Istanbul', 'Europe/Jersey', 'Europe/Kaliningrad', 'Europe/Kiev', 'Europe/Kirov', 'Europe/Kyiv', 'Europe/Lisbon', 'Europe/Ljubljana', 'Europe/London', 'Europe/Luxembourg', 'Europe/Madrid', 'Europe/Malta', 'Europe/Mariehamn', 'Europe/Minsk', 'Europe/Monaco', 'Europe/Moscow', 'Europe/Nicosia', 'Europe/Oslo', 'Europe/Paris', 'Europe/Podgorica', 'Europe/Prague', 'Europe/Riga', 'Europe/Rome', 'Europe/Samara', 'Europe/San_Marino', 'Europe/Sarajevo', 'Europe/Saratov', 'Europe/Simferopol', 'Europe/Skopje', 'Europe/Sofia', 'Europe/Stockholm', 'Europe/Tallinn', 'Europe/Tirane', 'Europe/Tiraspol', 'Europe/Ulyanovsk', 'Europe/Uzhgorod', 'Europe/Vaduz', 'Europe/Vatican', 'Europe/Vienna', 'Europe/Vilnius', 'Europe/Volgograd', 'Europe/Warsaw', 'Europe/Zagreb', 'Europe/Zaporozhye', 'Europe/Zurich', 'GB', 'GB-Eire', 'GMT', 'GMT+0', 'GMT-0', 'GMT0', 'Greenwich', 'HST', 'Hongkong', 'Iceland', 'Indian/Antananarivo', 'Indian/Chagos', 'Indian/Christmas', 'Indian/Cocos', 'Indian/Comoro', 'Indian/Kerguelen', 'Indian/Mahe', 'Indian/Maldives', 'Indian/Mauritius', 'Indian/Mayotte', 'Indian/Reunion', 'Iran', 'Israel', 'Jamaica', 'Japan', 'Kwajalein', 'Libya', 'MET', 'MST', 'MST7MDT', 'Mexico/BajaNorte', 'Mexico/BajaSur', 'Mexico/General', 'NZ', 'NZ-CHAT', 'Navajo', 'PRC', 'PST8PDT', 'Pacific/Apia', 'Pacific/Auckland', 'Pacific/Bougainville', 'Pacific/Chatham', 'Pacific/Chuuk', 'Pacific/Easter', 'Pacific/Efate', 'Pacific/Enderbury', 'Pacific/Fakaofo', 'Pacific/Fiji', 'Pacific/Funafuti', 'Pacific/Galapagos', 'Pacific/Gambier', 'Pacific/Guadalcanal', 'Pacific/Guam', 'Pacific/Honolulu', 'Pacific/Johnston', 'Pacific/Kanton', 'Pacific/Kiritimati', 'Pacific/Kosrae', 'Pacific/Kwajalein', 'Pacific/Majuro', 'Pacific/Marquesas', 'Pacific/Midway', 'Pacific/Nauru', 'Pacific/Niue', 'Pacific/Norfolk', 'Pacific/Noumea', 'Pacific/Pago_Pago', 'Pacific/Palau', 'Pacific/Pitcairn', 'Pacific/Pohnpei', 'Pacific/Ponape', 'Pacific/Port_Moresby', 'Pacific/Rarotonga', 'Pacific/Saipan', 'Pacific/Samoa', 'Pacific/Tahiti', 'Pacific/Tarawa', 'Pacific/Tongatapu', 'Pacific/Truk', 'Pacific/Wake', 'Pacific/Wallis', 'Pacific/Yap', 'Poland', 'Portugal', 'ROC', 'ROK', 'Singapore', 'Turkey', 'UCT', 'US/Alaska', 'US/Aleutian', 'US/Arizona', 'US/Central', 'US/East-Indiana', 'US/Eastern', 'US/Hawaii', 'US/Indiana-Starke', 'US/Michigan', 'US/Mountain', 'US/Pacific', 'US/Samoa', 'UTC', 'Universal', 'W-SU', 'WET', 'Zulu'), nullable=True, comment='IANA timezone name of the timezone which encompasses the largest portion of the population in the associated geographic area.'), sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), sa.Column('division_name_us_census', sa.Text(), nullable=True, comment='Longer human readable name describing the US Census division.'), - sa.Column('division_code_us_census', sa.Enum('ENC', 'WNC', 'ESC', 'PCN', 'WSC', 'MAT', 'NEW', 'MTN', 'PCC', 'SAT'), nullable=True, comment='Three-letter US Census division code as it appears in the bulk electricity data published by the EIA. Note that EIA splits the Pacific division into distinct contiguous (CA, OR, WA) and non-contiguous (AK, HI) states. For reference see this US Census region and division map: https://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf'), + sa.Column('division_code_us_census', sa.Enum('ESC', 'PCN', 'ENC', 'NEW', 'SAT', 'WSC', 'WNC', 'MAT', 'PCC', 'MTN'), nullable=True, comment='Three-letter US Census division code as it appears in the bulk electricity data published by the EIA. Note that EIA splits the Pacific division into distinct contiguous (CA, OR, WA) and non-contiguous (AK, HI) states. For reference see this US Census region and division map: https://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf'), sa.Column('region_name_us_census', sa.Text(), nullable=True, comment='Human-readable name of a US Census region.'), sa.Column('is_epacems_state', sa.Boolean(), nullable=True, comment="Indicates whether the associated state reports data within the EPA's Continuous Emissions Monitoring System."), sa.PrimaryKeyConstraint('country_code', 'subdivision_code', name=op.f('pk_core_pudl__codes_subdivisions')) @@ -400,6 +400,43 @@ def upgrade() -> None: sa.ForeignKeyConstraint(['utility_id_eia'], ['core_eia__entity_utilities.utility_id_eia'], name=op.f('fk_core_eia860__scd_utilities_utility_id_eia_core_eia__entity_utilities')), sa.PrimaryKeyConstraint('utility_id_eia', 'report_date', name=op.f('pk_core_eia860__scd_utilities')) ) + op.create_table('core_eia860m__changelog_generators', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('valid_till_date', sa.Date(), nullable=True, comment='The record in the changelog is valid until this date. The record is valid from the report_date up until but not including the valid_till_date.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('county', sa.Text(), nullable=True, comment='County name.'), + sa.Column('current_planned_generator_operating_date', sa.Date(), nullable=True, comment='The most recently updated effective date on which the generator is scheduled to start operation'), + sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), + sa.Column('energy_source_code_1', sa.Text(), nullable=True, comment='The code representing the most predominant type of energy that fuels the generator.'), + sa.Column('energy_storage_capacity_mwh', sa.Float(), nullable=True, comment='Energy storage capacity in MWh (e.g. for batteries).'), + sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), + sa.Column('generator_retirement_date', sa.Date(), nullable=True, comment='Date of the scheduled or effected retirement of the generator.'), + sa.Column('latitude', sa.Float(), nullable=True, comment="Latitude of the plant's location, in degrees."), + sa.Column('longitude', sa.Float(), nullable=True, comment="Longitude of the plant's location, in degrees."), + sa.Column('net_capacity_mwdc', sa.Float(), nullable=True, comment='Generation capacity in megawatts of direct current that is subject to a net metering agreement. Typically used for behind-the-meter solar PV.'), + sa.Column('operational_status', sa.Text(), nullable=True, comment='The operating status of the asset. For generators this is based on which tab the generator was listed in in EIA 860.'), + sa.Column('operational_status_code', sa.Text(), nullable=True, comment='The operating status of the asset.'), + sa.Column('planned_derate_date', sa.Date(), nullable=True, comment='Planned effective month that the generator is scheduled to enter operation after the derate modification.'), + sa.Column('planned_generator_retirement_date', sa.Date(), nullable=True, comment='Planned effective date of the scheduled retirement of the generator.'), + sa.Column('planned_net_summer_capacity_derate_mw', sa.Float(), nullable=True, comment='Decrease in summer capacity expected to be realized from the derate modification to the equipment.'), + sa.Column('planned_net_summer_capacity_uprate_mw', sa.Float(), nullable=True, comment='Increase in summer capacity expected to be realized from the modification to the equipment.'), + sa.Column('planned_uprate_date', sa.Date(), nullable=True, comment='Planned effective date that the generator is scheduled to enter operation after the uprate modification.'), + sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), + sa.Column('summer_capacity_mw', sa.Float(), nullable=True, comment='The net summer capacity.'), + sa.Column('technology_description', sa.Text(), nullable=True, comment='High level description of the technology used by the generator to produce electricity.'), + sa.Column('winter_capacity_mw', sa.Float(), nullable=True, comment='The net winter capacity.'), + sa.ForeignKeyConstraint(['data_maturity'], ['core_pudl__codes_data_maturities.code'], name=op.f('fk_core_eia860m__changelog_generators_data_maturity_core_pudl__codes_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_code_1'], ['core_eia__codes_energy_sources.code'], name=op.f('fk_core_eia860m__changelog_generators_energy_source_code_1_core_eia__codes_energy_sources')), + sa.ForeignKeyConstraint(['operational_status_code'], ['core_eia__codes_operational_status.code'], name=op.f('fk_core_eia860m__changelog_generators_operational_status_code_core_eia__codes_operational_status')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['core_eia__codes_prime_movers.code'], name=op.f('fk_core_eia860m__changelog_generators_prime_mover_code_core_eia__codes_prime_movers')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date', name=op.f('pk_core_eia860m__changelog_generators')) + ) op.create_table('core_eia861__yearly_advanced_metering_infrastructure', sa.Column('advanced_metering_infrastructure', sa.Integer(), nullable=True, comment='Number of meters that measure and record usage data at a minimum, in hourly intervals and provide usage data at least daily to energy companies and may also provide data to consumers. Data are used for billing and other purposes. Advanced meters include basic hourly interval meters and extend to real-time meters with built-in two-way communication capable of recording and transmitting instantaneous data.'), sa.Column('automated_meter_reading', sa.Integer(), nullable=True, comment='Number of meters that collect data for billing purposes only and transmit this data one way, usually from the customer to the distribution utility.'), @@ -2657,7 +2694,7 @@ def upgrade() -> None: sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), sa.Column('plant_name_ferc1', sa.Text(), nullable=True, comment='Name of the plant, as reported to FERC. This is a freeform string, not guaranteed to be consistent across references to the same plant.'), sa.Column('project_num', sa.Integer(), nullable=True, comment='FERC Licensed Project Number.'), - sa.Column('plant_type', sa.Enum('run_of_river', 'na_category', 'run_of_river_with_storage', 'storage', 'hydro'), nullable=True, comment='Type of plant.'), + sa.Column('plant_type', sa.Enum('storage', 'hydro', 'run_of_river', 'run_of_river_with_storage', 'na_category'), nullable=True, comment='Type of plant.'), sa.Column('construction_type', sa.Enum('conventional', 'outdoor', 'semioutdoor'), nullable=True, comment="Type of plant construction ('outdoor', 'semioutdoor', or 'conventional'). Categorized by PUDL based on our best guess of intended value in FERC1 freeform strings."), sa.Column('construction_year', sa.Integer(), nullable=True, comment="Year the plant's oldest still operational unit was built."), sa.Column('installation_year', sa.Integer(), nullable=True, comment="Year the plant's most recently built unit was installed."), @@ -2774,7 +2811,7 @@ def upgrade() -> None: sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), sa.Column('plant_name_ferc1', sa.Text(), nullable=True, comment='Name of the plant, as reported to FERC. This is a freeform string, not guaranteed to be consistent across references to the same plant.'), - sa.Column('plant_type', sa.Enum('nuclear', 'na_category', 'internal_combustion', 'solar_thermal', 'photovoltaic', 'geothermal', 'steam', 'combined_cycle', 'combustion_turbine', 'wind'), nullable=True, comment='Type of plant.'), + sa.Column('plant_type', sa.Enum('wind', 'combustion_turbine', 'internal_combustion', 'solar_thermal', 'nuclear', 'steam', 'geothermal', 'combined_cycle', 'na_category', 'photovoltaic'), nullable=True, comment='Type of plant.'), sa.Column('construction_type', sa.Enum('conventional', 'outdoor', 'semioutdoor'), nullable=True, comment="Type of plant construction ('outdoor', 'semioutdoor', or 'conventional'). Categorized by PUDL based on our best guess of intended value in FERC1 freeform strings."), sa.Column('construction_year', sa.Integer(), nullable=True, comment="Year the plant's oldest still operational unit was built."), sa.Column('installation_year', sa.Integer(), nullable=True, comment="Year the plant's most recently built unit was installed."), @@ -3310,9 +3347,9 @@ def upgrade() -> None: sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), sa.Column('owner_utility_name_eia', sa.Text(), nullable=True, comment='The name of the EIA owner utility.'), - sa.Column('owner_state', sa.Enum('WV', 'GA', 'TX', 'UT', 'CT', 'DE', 'QC', 'SD', 'KS', 'SK', 'AR', 'IN', 'IL', 'PE', 'GU', 'NB', 'NV', 'SC', 'LA', 'AK', 'AL', 'NJ', 'MN', 'PA', 'DC', 'MA', 'MS', 'MD', 'NC', 'NH', 'ON', 'NM', 'MT', 'FL', 'KY', 'HI', 'NY', 'PR', 'MI', 'VI', 'TN', 'VT', 'VA', 'MP', 'ND', 'NT', 'BC', 'AB', 'WI', 'MB', 'IA', 'NE', 'WA', 'ID', 'OH', 'ME', 'NS', 'AZ', 'CA', 'NL', 'NU', 'YT', 'RI', 'OR', 'AS', 'OK', 'WY', 'CO', 'MO'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), + sa.Column('owner_state', sa.Enum('OH', 'MT', 'NH', 'WV', 'GA', 'IN', 'KS', 'MA', 'MB', 'MD', 'NC', 'CT', 'BC', 'ND', 'NE', 'NV', 'UT', 'YT', 'AS', 'PR', 'IL', 'AZ', 'MI', 'CO', 'AR', 'NT', 'VI', 'QC', 'FL', 'WI', 'CA', 'NB', 'MN', 'KY', 'NJ', 'PE', 'NS', 'AB', 'NL', 'HI', 'SC', 'NM', 'OR', 'AK', 'GU', 'MO', 'TX', 'WA', 'IA', 'LA', 'PA', 'VT', 'RI', 'DE', 'WY', 'OK', 'DC', 'SD', 'MP', 'SK', 'TN', 'AL', 'MS', 'ON', 'VA', 'ME', 'NY', 'NU', 'ID'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), sa.Column('owner_city', sa.Text(), nullable=True, comment='City of owner.'), - sa.Column('owner_country', sa.Enum('USA', 'CAN'), nullable=True, comment='Three letter ISO-3166 country code.'), + sa.Column('owner_country', sa.Enum('CAN', 'USA'), nullable=True, comment='Three letter ISO-3166 country code.'), sa.Column('owner_street_address', sa.Text(), nullable=True, comment='Steet address of owner.'), sa.Column('owner_zip_code', sa.Text(), nullable=True, comment='Zip code of owner.'), sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), @@ -3331,9 +3368,9 @@ def upgrade() -> None: sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('owner_utility_name_eia', sa.Text(), nullable=True, comment='The name of the EIA owner utility.'), sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('owner_state', sa.Enum('WV', 'GA', 'TX', 'UT', 'CT', 'DE', 'QC', 'SD', 'KS', 'SK', 'AR', 'IN', 'IL', 'PE', 'GU', 'NB', 'NV', 'SC', 'LA', 'AK', 'AL', 'NJ', 'MN', 'PA', 'DC', 'MA', 'MS', 'MD', 'NC', 'NH', 'ON', 'NM', 'MT', 'FL', 'KY', 'HI', 'NY', 'PR', 'MI', 'VI', 'TN', 'VT', 'VA', 'MP', 'ND', 'NT', 'BC', 'AB', 'WI', 'MB', 'IA', 'NE', 'WA', 'ID', 'OH', 'ME', 'NS', 'AZ', 'CA', 'NL', 'NU', 'YT', 'RI', 'OR', 'AS', 'OK', 'WY', 'CO', 'MO'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), + sa.Column('owner_state', sa.Enum('OH', 'MT', 'NH', 'WV', 'GA', 'IN', 'KS', 'MA', 'MB', 'MD', 'NC', 'CT', 'BC', 'ND', 'NE', 'NV', 'UT', 'YT', 'AS', 'PR', 'IL', 'AZ', 'MI', 'CO', 'AR', 'NT', 'VI', 'QC', 'FL', 'WI', 'CA', 'NB', 'MN', 'KY', 'NJ', 'PE', 'NS', 'AB', 'NL', 'HI', 'SC', 'NM', 'OR', 'AK', 'GU', 'MO', 'TX', 'WA', 'IA', 'LA', 'PA', 'VT', 'RI', 'DE', 'WY', 'OK', 'DC', 'SD', 'MP', 'SK', 'TN', 'AL', 'MS', 'ON', 'VA', 'ME', 'NY', 'NU', 'ID'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), sa.Column('owner_city', sa.Text(), nullable=True, comment='City of owner.'), - sa.Column('owner_country', sa.Enum('USA', 'CAN'), nullable=True, comment='Three letter ISO-3166 country code.'), + sa.Column('owner_country', sa.Enum('CAN', 'USA'), nullable=True, comment='Three letter ISO-3166 country code.'), sa.Column('owner_street_address', sa.Text(), nullable=True, comment='Steet address of owner.'), sa.Column('owner_zip_code', sa.Text(), nullable=True, comment='Zip code of owner.'), sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), @@ -3792,7 +3829,7 @@ def upgrade() -> None: sa.Column('record_id_eia', sa.Text(), nullable=False, comment='Identifier for EIA plant parts analysis records.'), sa.Column('plant_id_eia', sa.Integer(), nullable=True, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), - sa.Column('plant_part', sa.Enum('plant_operating_year', 'plant_unit', 'plant_technology', 'plant_match_ferc1', 'plant_prime_fuel', 'plant', 'plant_gen', 'plant_ferc_acct', 'plant_prime_mover'), nullable=True, comment='The part of the plant a record corresponds to.'), + sa.Column('plant_part', sa.Enum('plant_prime_fuel', 'plant_operating_year', 'plant_ferc_acct', 'plant_unit', 'plant_gen', 'plant_prime_mover', 'plant_match_ferc1', 'plant_technology', 'plant'), nullable=True, comment='The part of the plant a record corresponds to.'), sa.Column('generator_id', sa.Text(), nullable=True, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), @@ -3801,7 +3838,7 @@ def upgrade() -> None: sa.Column('ferc_acct_name', sa.Enum('Hydraulic', 'Nuclear', 'Steam', 'Other'), nullable=True, comment='Name of FERC account, derived from technology description and prime mover code.'), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('true_gran', sa.Boolean(), nullable=True, comment='Indicates whether a plant part list record is associated with the highest priority plant part for all identical records.'), - sa.Column('appro_part_label', sa.Enum('plant_operating_year', 'plant_unit', 'plant_technology', 'plant_match_ferc1', 'plant_prime_fuel', 'plant', 'plant_gen', 'plant_ferc_acct', 'plant_prime_mover'), nullable=True, comment='Plant part of the associated true granularity record.'), + sa.Column('appro_part_label', sa.Enum('plant_prime_fuel', 'plant_operating_year', 'plant_ferc_acct', 'plant_unit', 'plant_gen', 'plant_prime_mover', 'plant_match_ferc1', 'plant_technology', 'plant'), nullable=True, comment='Plant part of the associated true granularity record.'), sa.Column('appro_record_id_eia', sa.Text(), nullable=True, comment='EIA record ID of the associated true granularity record.'), sa.Column('ferc1_generator_agg_id', sa.Integer(), nullable=True, comment='ID dynamically assigned by PUDL to EIA records with multiple matches to a single FERC ID in the FERC-EIA manual matching process.'), sa.Column('capacity_eoy_mw', sa.Float(), nullable=True, comment='Total end of year installed (nameplate) capacity for a plant part, in megawatts.'), @@ -3846,7 +3883,7 @@ def upgrade() -> None: sa.Column('record_id_eia', sa.Text(), nullable=True, comment='Identifier for EIA plant parts analysis records.'), sa.Column('match_type', sa.Text(), nullable=True, comment='Indicates the source and validation of the match between EIA and FERC. Match types include matches was generated from the model, verified by the training data, overridden by the training data, etc.'), sa.Column('plant_name_ppe', sa.Text(), nullable=True, comment='Derived plant name that includes EIA plant name and other strings associated with ID and PK columns of the plant part.'), - sa.Column('plant_part', sa.Enum('plant_operating_year', 'plant_unit', 'plant_technology', 'plant_match_ferc1', 'plant_prime_fuel', 'plant', 'plant_gen', 'plant_ferc_acct', 'plant_prime_mover'), nullable=True, comment='The part of the plant a record corresponds to.'), + sa.Column('plant_part', sa.Enum('plant_prime_fuel', 'plant_operating_year', 'plant_ferc_acct', 'plant_unit', 'plant_gen', 'plant_prime_mover', 'plant_match_ferc1', 'plant_technology', 'plant'), nullable=True, comment='The part of the plant a record corresponds to.'), sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), sa.Column('ownership_record_type', sa.Enum('owned', 'total'), nullable=True, comment='Whether each generator record is for one owner or represents a total of all ownerships.'), @@ -3862,7 +3899,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('true_gran', sa.Boolean(), nullable=True, comment='Indicates whether a plant part list record is associated with the highest priority plant part for all identical records.'), - sa.Column('appro_part_label', sa.Enum('plant_operating_year', 'plant_unit', 'plant_technology', 'plant_match_ferc1', 'plant_prime_fuel', 'plant', 'plant_gen', 'plant_ferc_acct', 'plant_prime_mover'), nullable=True, comment='Plant part of the associated true granularity record.'), + sa.Column('appro_part_label', sa.Enum('plant_prime_fuel', 'plant_operating_year', 'plant_ferc_acct', 'plant_unit', 'plant_gen', 'plant_prime_mover', 'plant_match_ferc1', 'plant_technology', 'plant'), nullable=True, comment='Plant part of the associated true granularity record.'), sa.Column('appro_record_id_eia', sa.Text(), nullable=True, comment='EIA record ID of the associated true granularity record.'), sa.Column('record_count', sa.Integer(), nullable=True, comment='Number of distinct generator IDs that partcipated in the aggregation for a plant part list record.'), sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), @@ -4114,6 +4151,7 @@ def downgrade() -> None: op.drop_table('core_eia861__yearly_demand_response_water_heater') op.drop_table('core_eia861__yearly_demand_response') op.drop_table('core_eia861__yearly_advanced_metering_infrastructure') + op.drop_table('core_eia860m__changelog_generators') op.drop_table('core_eia860__scd_utilities') op.drop_table('core_eia860__scd_emissions_control_equipment') op.drop_table('_out_eia__monthly_heat_rate_by_unit') diff --git a/src/pudl/etl/__init__.py b/src/pudl/etl/__init__.py index 09455ad58a..a56a524c6d 100644 --- a/src/pudl/etl/__init__.py +++ b/src/pudl/etl/__init__.py @@ -36,8 +36,10 @@ *load_assets_from_modules([epacems_assets], group_name="core_epacems"), *load_assets_from_modules([pudl.extract.eia176], group_name="raw_eia176"), *load_assets_from_modules([pudl.extract.phmsagas], group_name="raw_phmsagas"), + *load_assets_from_modules([pudl.extract.eia860m], group_name="raw_eia860m"), *load_assets_from_modules([pudl.extract.eia860], group_name="raw_eia860"), *load_assets_from_modules([pudl.transform.eia860], group_name="_core_eia860"), + *load_assets_from_modules([pudl.transform.eia860m], group_name="core_eia860m"), *load_assets_from_modules([pudl.extract.eia861], group_name="raw_eia861"), *load_assets_from_modules( [pudl.transform.eia861], group_name="core_eia861" diff --git a/src/pudl/extract/eia860.py b/src/pudl/extract/eia860.py index 4357982707..a74dba9d46 100644 --- a/src/pudl/extract/eia860.py +++ b/src/pudl/extract/eia860.py @@ -11,7 +11,6 @@ import pudl.logging_helpers from pudl.extract import excel from pudl.helpers import remove_leading_zeros_from_numeric_strings -from pudl.metadata.classes import DataSource logger = pudl.logging_helpers.get_logger(__name__) @@ -106,10 +105,8 @@ def extract_eia860(context, raw_eia860__all_dfs): ds = context.resources.datastore if eia_settings.eia860.eia860m: - eia860m_data_source = DataSource.from_id("eia860m") - eia860m_date = eia860m_data_source.working_partitions["year_month"] eia860m_raw_dfs = pudl.extract.eia860m.Extractor(ds).extract( - year_month=eia860m_date + year_month=[eia_settings.eia860.eia860m_year_month] ) raw_eia860__all_dfs = pudl.extract.eia860m.append_eia860m( eia860_raw_dfs=raw_eia860__all_dfs, eia860m_raw_dfs=eia860m_raw_dfs diff --git a/src/pudl/extract/eia860m.py b/src/pudl/extract/eia860m.py index 69134235c1..f6636437f4 100644 --- a/src/pudl/extract/eia860m.py +++ b/src/pudl/extract/eia860m.py @@ -15,6 +15,7 @@ from datetime import datetime import pandas as pd +from dagster import AssetOut, Output, asset, multi_asset import pudl.logging_helpers from pudl.extract import excel @@ -43,8 +44,12 @@ def process_raw(self, df, page, **partition): df["report_year"] = datetime.strptime( list(partition.values())[0], "%Y-%m" ).year + df["report_date"] = pd.to_datetime( + list(partition.values())[0], format="%Y-%m", exact=False + ) df = self.add_data_maturity(df, page, **partition) self.cols_added.append("report_year") + self.cols_added.append("report_date") # Eventually we should probably make this a transform for col in ["generator_id", "boiler_id"]: if col in df.columns: @@ -80,8 +85,56 @@ def append_eia860m(eia860_raw_dfs, eia860m_raw_dfs): # page names in 860m and 860 are the same. for page in pages_eia860m: eia860_raw_dfs[page] = pd.concat( - [eia860_raw_dfs[page], eia860m_raw_dfs[page]], + [eia860_raw_dfs[page], eia860m_raw_dfs[page].drop(columns=["report_date"])], ignore_index=True, sort=True, ) return eia860_raw_dfs + + +@asset( + required_resource_keys={"datastore", "dataset_settings"}, +) +def raw_eia860m__all_dfs(context): + """Extract raw EIAm data from excel sheets into dict of dataframes.""" + eia_settings = context.resources.dataset_settings.eia + ds = context.resources.datastore + + eia860m_extractor = Extractor(ds=ds) + raw_eia860m__all_dfs = eia860m_extractor.extract( + year_month=eia_settings.eia860m.year_months + ) + return raw_eia860m__all_dfs + + +raw_table_names = ( + "raw_eia860m__generator_existing", + "raw_eia860m__generator_proposed", + "raw_eia860m__generator_retired", +) + + +@multi_asset( + outs={table_name: AssetOut() for table_name in sorted(raw_table_names)}, + required_resource_keys={"datastore", "dataset_settings"}, +) +def extract_eia860m(raw_eia860m__all_dfs): + """Extract raw EIA data from excel sheets into dataframes. + + Args: + context: dagster keyword that provides access to resources and config. + + Returns: + A tuple of extracted EIA dataframes. + """ + # create descriptive table_names + raw_eia860m__all_dfs = { + "raw_eia860m__" + table_name: df + for table_name, df in raw_eia860m__all_dfs.items() + } + raw_eia860m__all_dfs = dict(sorted(raw_eia860m__all_dfs.items())) + + return ( + Output(output_name=table_name, value=df) + for table_name, df in raw_eia860m__all_dfs.items() + ) diff --git a/src/pudl/helpers.py b/src/pudl/helpers.py index bed81132c7..bd7800af08 100644 --- a/src/pudl/helpers.py +++ b/src/pudl/helpers.py @@ -620,25 +620,21 @@ def expand_timeseries( } ) elif fill_through_freq == "month": + end_dates.loc[:, date_col] = end_dates[ + date_col + ] + pd.tseries.offsets.DateOffset(months=1) end_dates.loc[:, date_col] = pd.to_datetime( { "year": end_dates[date_col].dt.year, - "month": end_dates[date_col].dt.month + 1, + "month": end_dates[date_col].dt.month, "day": 1, } ) elif fill_through_freq == "day": - end_dates.loc[:, date_col] = pd.to_datetime( - { - "year": end_dates[date_col].dt.year, - "month": end_dates[date_col].dt.month, - "day": end_dates[date_col].dt.day + 1, - } - ) - else: - raise ValueError( - f"{fill_through_freq} is not a valid frequency to fill through." - ) + end_dates.loc[:, date_col] = end_dates[ + date_col + ] + pd.tseries.offsets.DateOffset(days=1) + end_dates["drop_row"] = True df = ( pd.concat([df, end_dates.reset_index()]) @@ -1408,9 +1404,9 @@ def get_working_dates_by_datasource(datasource: str) -> pd.DatetimeIndex: dates = dates.append( pd.to_datetime(working_partitions["years"], format="%Y") ) - if "year_month" in working_partitions: + if "year_months" in working_partitions: dates = dates.append( - pd.DatetimeIndex([pd.to_datetime(working_partitions["year_month"])]) + pd.DatetimeIndex(pd.to_datetime(working_partitions["year_months"])) ) return dates diff --git a/src/pudl/metadata/fields.py b/src/pudl/metadata/fields.py index 267009c1b3..2d9e3d8197 100644 --- a/src/pudl/metadata/fields.py +++ b/src/pudl/metadata/fields.py @@ -3525,6 +3525,10 @@ "type": "string", "description": "Freeform description of type of utility reported in one of the other three other utility_type sections in the core_ferc1__yearly_utility_plant_summary_sched200 table. This field is reported only in the DBF reporting years (1994-2020).", }, + "valid_till_date": { + "type": "date", + "description": "The record in the changelog is valid until this date. The record is valid from the report_date up until but not including the valid_till_date.", + }, "variable_peak_pricing": { "type": "boolean", "description": ( diff --git a/src/pudl/metadata/resources/eia.py b/src/pudl/metadata/resources/eia.py index 6b0721b41c..54955be18f 100644 --- a/src/pudl/metadata/resources/eia.py +++ b/src/pudl/metadata/resources/eia.py @@ -520,7 +520,12 @@ "fields": [["plant_id_eia", "generator_id"]], # exclude core_epa__assn_eia_epacamd_subplant_ids bc there are generator ids in this # glue table that come only from epacamd - "exclude": ["core_epa__assn_eia_epacamd_subplant_ids"], + # also exclude the 860 changelog table bc that table doesn't get harvested + # and therefor there are a few straggler generators that don't end up in this table + "exclude": [ + "core_epa__assn_eia_epacamd_subplant_ids", + "core_eia860m__changelog_generators", + ], }, }, "sources": ["eia860", "eia923"], @@ -573,9 +578,12 @@ # violations. # See: https://github.com/catalyst-cooperative/pudl/issues/1196 # Exclude the core_epa__assn_eia_epacamd_subplant_ids table + # also exclude the 860 changelog table bc that table doesn't get harvested + # and therefor there are a few straggler generators that don't end up in this table "exclude": [ "core_pudl__assn_eia_pudl_plants", "core_epa__assn_eia_epacamd_subplant_ids", + "core_eia860m__changelog_generators", ], }, }, @@ -672,6 +680,9 @@ # not yet harvesting owner_utility_id_eia from core_eia860__scd_ownership. # See https://github.com/catalyst-cooperative/pudl/issues/1393 "out_eia923__yearly_generation_fuel_by_generator_energy_source_owner", + # also exclude the 860 changelog table bc that table doesn't get harvested + # and therefor there are a few straggler generators that don't end up in this table + "core_eia860m__changelog_generators", ], }, }, diff --git a/src/pudl/metadata/resources/eia860.py b/src/pudl/metadata/resources/eia860.py index 4dc2119132..8f309bd164 100644 --- a/src/pudl/metadata/resources/eia860.py +++ b/src/pudl/metadata/resources/eia860.py @@ -240,6 +240,7 @@ "_out_eia__monthly_heat_rate_by_generator", "_out_eia__monthly_derived_generator_attributes", "out_eia__monthly_generators", + "core_eia860m__changelog_generators", ], }, }, @@ -391,6 +392,7 @@ "_out_eia__monthly_capacity_factor_by_generator", "_out_eia__monthly_derived_generator_attributes", "out_eia__monthly_generators", + "core_eia860m__changelog_generators", ], }, }, @@ -461,6 +463,7 @@ "_out_eia__monthly_fuel_cost_by_generator", "out_eia923__monthly_generation_fuel_by_generator_energy_source", "out_eia923__monthly_generation_fuel_by_generator", + "core_eia860m__changelog_generators", # Utility IDs in this table are owners, not operators, and we are # not yet harvesting owner_utility_id_eia from core_eia860__scd_ownership. # See https://github.com/catalyst-cooperative/pudl/issues/1393 diff --git a/src/pudl/metadata/resources/eia860m.py b/src/pudl/metadata/resources/eia860m.py new file mode 100644 index 0000000000..1106498d7a --- /dev/null +++ b/src/pudl/metadata/resources/eia860m.py @@ -0,0 +1,56 @@ +"""Definitions of data tables primarily coming from EIA-860m.""" +from typing import Any + +RESOURCE_METADATA: dict[str, dict[str, Any]] = { + "core_eia860m__changelog_generators": { + "description": ( + "This table is a changelog of the monthly reported EIA-860m data. EIA-860m " + "includes generator tables with the most up-to-date catalogue of EIA " + "generators and their operational status and other generator characteristics. " + "EIA-860m is reported monthly, although for the vast majority of the generators " + "nothing changes month-to-month. This table is a changelog of that monthly " + "reported generator data. There is a record cooresponding to the first instance " + "of a generator and associated characteristics with a report_date column and a " + "valid_till_date column. Whenever any of the reported EIA-860m data was changed " + "for a record, there will be a new changelog record with a new report_date." + ), + "schema": { + "fields": [ + "report_date", + "valid_till_date", + "plant_id_eia", + "plant_name_eia", + "utility_id_eia", + "utility_name_eia", + "generator_id", + "capacity_mw", + "county", + "current_planned_generator_operating_date", + "data_maturity", + "energy_source_code_1", + "energy_storage_capacity_mwh", + "fuel_type_code_pudl", + "generator_retirement_date", + "latitude", + "longitude", + "net_capacity_mwdc", + "operational_status", + "operational_status_code", + "planned_derate_date", + "planned_generator_retirement_date", + "planned_net_summer_capacity_derate_mw", + "planned_net_summer_capacity_uprate_mw", + "planned_uprate_date", + "prime_mover_code", + "state", + "summer_capacity_mw", + "technology_description", + "winter_capacity_mw", + ], + "primary_key": ["plant_id_eia", "generator_id", "report_date"], + }, + "field_namespace": "eia", + "sources": ["eia860"], + "etl_group": "eia860", + }, +} diff --git a/src/pudl/metadata/sources.py b/src/pudl/metadata/sources.py index fcbdcd36f1..82d4d2a1a3 100644 --- a/src/pudl/metadata/sources.py +++ b/src/pudl/metadata/sources.py @@ -175,7 +175,10 @@ "field_namespace": "eia", "contributors": [], "working_partitions": { - "year_month": "2023-12", + "year_months": [ + str(q).lower() + for q in pd.period_range(start="2015-07", end="2023-12", freq="M") + ], }, "keywords": sorted( set( diff --git a/src/pudl/package_data/eia860m/column_maps/generator_existing.csv b/src/pudl/package_data/eia860m/column_maps/generator_existing.csv index fb406b8895..c5e3cbfeeb 100644 --- a/src/pudl/package_data/eia860m/column_maps/generator_existing.csv +++ b/src/pudl/package_data/eia860m/column_maps/generator_existing.csv @@ -12,8 +12,8 @@ operational_status_code,status,status,status,status,status,status,status,status, capacity_mw,,,,,,,,,,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capacity_mw summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw winter_capacity_mw,,,,,,,,,,,,,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw -operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month -operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year +generator_operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month,operating_month +generator_operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year,operating_year energy_source_code_1,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector planned_net_summer_capacity_uprate_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw,planned_uprate_of_summer_capacity_mw @@ -30,5 +30,5 @@ longitude,,,,,,longitude,longitude,longitude,longitude,longitude,longitude,longi map_bing,,,,,,,,,,,,,,,,,,,,,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map,bing_map map_google,,,,,,,,,,,,,,,,,,,,,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map,google_map balancing_authority_code_eia,,,,,,,,,,,,,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code,balancing_authority_code -energy_storage_capacity_mwh,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,nameplate_energy_capacity_mwh,,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh -net_capacity_mwdc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,dc_net_capacity_mw,,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw +energy_storage_capacity_mwh,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh,nameplate_energy_capacity_mwh +net_capacity_mwdc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw,dc_net_capacity_mw diff --git a/src/pudl/package_data/eia860m/column_maps/generator_proposed.csv b/src/pudl/package_data/eia860m/column_maps/generator_proposed.csv index ef4d2dec53..0a273816bc 100644 --- a/src/pudl/package_data/eia860m/column_maps/generator_proposed.csv +++ b/src/pudl/package_data/eia860m/column_maps/generator_proposed.csv @@ -13,8 +13,8 @@ capacity_mw,,,,,,,,,,nameplate_capacity_mw,nameplate_capacity_mw,nameplate_capac summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw,net_summer_capacity_mw winter_capacity_mw,,,,,,,,,,,,,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw,net_winter_capacity_mw unit_id_eia,,,,,,,,,,,,,,,,,,,,,,,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code,unit_code -current_planned_operating_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month -current_planned_operating_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year +current_planned_generator_operating_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month,planned_operation_month +current_planned_generator_operating_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year,planned_operation_year energy_source_code_1,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code,energy_source_code sector_name,sector_name,sector_name,sector_name,sector_name,sector_name,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector,sector latitude,,,,,,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude diff --git a/src/pudl/package_data/settings/etl_fast.yml b/src/pudl/package_data/settings/etl_fast.yml index 71c207811d..7b7914f4d1 100644 --- a/src/pudl/package_data/settings/etl_fast.yml +++ b/src/pudl/package_data/settings/etl_fast.yml @@ -43,6 +43,8 @@ datasets: eia860: years: [2020, 2022] eia860m: true + eia860m: + year_months: ["2023-11", "2023-12"] eia861: # eia861 runs fast. Discontinued tables break single-year ETL. # This is a temporary hack to make the tests pass! diff --git a/src/pudl/package_data/settings/etl_full.yml b/src/pudl/package_data/settings/etl_full.yml index 1624f735b7..a9e9aebdb2 100644 --- a/src/pudl/package_data/settings/etl_full.yml +++ b/src/pudl/package_data/settings/etl_full.yml @@ -240,6 +240,8 @@ datasets: 2022, ] eia860m: true + eia860m: + year_months: ["all"] eia861: years: [ diff --git a/src/pudl/settings.py b/src/pudl/settings.py index bb4bdfa4bd..f763d0da9f 100644 --- a/src/pudl/settings.py +++ b/src/pudl/settings.py @@ -97,10 +97,12 @@ def partitions(cls) -> list[None | dict[str, str]]: # noqa: N805 ``pd.json_normalize``. """ partitions = [] - if hasattr(cls, "year_quarters"): - partitions = [{"year_quarters": part} for part in cls.year_quarters] - elif hasattr(cls, "years"): - partitions = [{"year": part} for part in cls.years] + for part_name in ["year_quarters", "years", "year_months"]: + if hasattr(cls, part_name): + partitions = [ + {part_name.removesuffix("s"): part} + for part in getattr(cls, part_name) + ] return partitions @@ -207,20 +209,21 @@ class Eia860Settings(GenericDatasetSettings): Args: data_source: DataSource metadata object years: list of years to validate. - - eia860m_date ClassVar[str]: The 860m year to date. + eia860m: whether or not to incorporate an EIA-860m month. + eia860m_year_month ClassVar[str]: The 860m year-month to incorporate. """ data_source: ClassVar[DataSource] = DataSource.from_id("eia860") - eia860m_data_source: ClassVar[DataSource] = DataSource.from_id("eia860m") - eia860m_date: ClassVar[str] = eia860m_data_source.working_partitions["year_month"] - years: list[int] = data_source.working_partitions["years"] + eia860m: bool = True + eia860m_year_month: ClassVar[str] = max( + DataSource.from_id("eia860m").working_partitions["year_months"] + ) @field_validator("eia860m") @classmethod - def check_eia860m_date(cls, eia860m: bool) -> bool: + def check_eia860m_year_month(cls, eia860m: bool) -> bool: """Check 860m date-year is exactly one year after most recent working 860 year. Args: @@ -232,18 +235,38 @@ def check_eia860m_date(cls, eia860m: bool) -> bool: Raises: ValueError: the 860m date is within 860 working years. """ - eia860m_year = pd.to_datetime(cls.eia860m_date).year + eia860m_year = pd.to_datetime(cls.eia860m_year_month).year expected_year = max(cls.data_source.working_partitions["years"]) + 1 if eia860m and (eia860m_year != expected_year): raise AssertionError( f"Attempting to integrate an eia860m year " - f"({eia860m_year}) from {cls.eia860m_date} not immediately following" + f"({eia860m_year}) from {cls.eia860m_year_month} not immediately following" f"the eia860 years: {cls.data_source.working_partitions['years']}. " f"Consider switching eia860m parameter to False." ) return eia860m +class Eia860mSettings(GenericDatasetSettings): + """An immutable pydantic model to validate EIA 860m settings. + + Args: + data_source: DataSource metadata object + year_months ClassVar[str]: The 860m year to date. + """ + + data_source: ClassVar[DataSource] = DataSource.from_id("eia860m") + year_months: list[str] = data_source.working_partitions["year_months"] + + @field_validator("year_months") + @classmethod + def allow_all_keyword_year_months(cls, year_months): + """Allow users to specify ['all'] to get all quarters.""" + if year_months == ["all"]: + year_months = cls.data_source.working_partitions["year_months"] + return year_months + + class GlueSettings(FrozenBaseModel): """An immutable pydantic model to validate Glue settings. @@ -266,6 +289,7 @@ class EiaSettings(FrozenBaseModel): """ eia860: Eia860Settings | None = None + eia860m: Eia860mSettings | None = None eia861: Eia861Settings | None = None eia923: Eia923Settings | None = None @@ -275,6 +299,7 @@ def default_load_all(cls, data: dict[str, Any]) -> dict[str, Any]: """If no datasets are specified default to all.""" if not any(data.values()): data["eia860"] = Eia860Settings() + data["eia860m"] = Eia860mSettings() data["eia861"] = Eia861Settings() data["eia923"] = Eia923Settings() @@ -397,6 +422,7 @@ def make_datasources_table(self: Self, ds: Datastore) -> pd.DataFrame: datasets_in_datastore_format.update( { "eia860": datasets_settings["eia"].eia860, + "eia860m": datasets_settings["eia"].eia860m, "eia861": datasets_settings["eia"].eia861, "eia923": datasets_settings["eia"].eia923, } @@ -416,32 +442,6 @@ def make_datasources_table(self: Self, ds: Datastore) -> pd.DataFrame: ], } ) - # add in EIA860m if eia in general is in the settings and the 860m bool is True - special_nested_datasets = pd.DataFrame() - if ( - datasets_settings.get("eia", False) - and datasets_settings["eia"].eia860.eia860m - ): - special_nested_datasets = pd.DataFrame( - data={ - "datasource": ["eia860m"], - "partitions": [ - json.dumps( - datasets_in_datastore_format[ - "eia860" - ].eia860m_data_source.working_partitions - ) - ], - "doi": [ - str( - _zenodo_doi_to_url( - ds.get_datapackage_descriptor("eia860m").doi - ) - ) - ], - } - ) - df = pd.concat([df, special_nested_datasets]).reset_index(drop=True) df["pudl_version"] = pudl.__version__ return df diff --git a/src/pudl/transform/__init__.py b/src/pudl/transform/__init__.py index 63aa3bd5e6..13bff4afec 100644 --- a/src/pudl/transform/__init__.py +++ b/src/pudl/transform/__init__.py @@ -63,6 +63,7 @@ classes, eia, eia860, + eia860m, eia861, eia923, eia_bulk_elec, diff --git a/src/pudl/transform/eia860.py b/src/pudl/transform/eia860.py index 7798f1cd04..40a32c8c5e 100644 --- a/src/pudl/transform/eia860.py +++ b/src/pudl/transform/eia860.py @@ -234,56 +234,32 @@ def _core_eia860__generators( # operational_status). We could do this by fillna w/ the retirement_date, but # this way seems more straightforward. gr_df["operational_status_code"] = gr_df["operational_status_code"].fillna("RE") - - gens_df = ( - pd.concat([ge_df, gp_df, gr_df, g_df], sort=True) - .dropna(subset=["generator_id", "plant_id_eia"]) - .pipe(pudl.helpers.fix_eia_na) - ) - + # Prep dicts for column based pd.replace: # A subset of the columns have zero values, where NA is appropriate: - columns_to_fix = [ - "planned_generator_retirement_month", - "planned_generator_retirement_year", - "planned_uprate_month", - "planned_uprate_year", - "other_modifications_month", - "other_modifications_year", - "planned_derate_month", - "planned_derate_year", - "planned_repower_month", - "planned_repower_year", - "planned_net_summer_capacity_derate_mw", - "planned_net_summer_capacity_uprate_mw", - "planned_net_winter_capacity_derate_mw", - "planned_net_winter_capacity_uprate_mw", - "planned_new_capacity_mw", - "nameplate_power_factor", - "minimum_load_mw", - "winter_capacity_mw", - "summer_capacity_mw", - ] - - for column in columns_to_fix: - gens_df[column] = gens_df[column].replace(to_replace=[" ", 0], value=np.nan) - - # A subset of the columns have "X" values, where other columns_to_fix - # have "N" values. Replacing these values with "N" will make for uniform - # values that can be converted to Boolean True and False pairs. - gens_df.duct_burners = gens_df.duct_burners.replace(to_replace="X", value="N") - gens_df.bypass_heat_recovery = gens_df.bypass_heat_recovery.replace( - to_replace="X", value="N" - ) - gens_df.syncronized_transmission_grid = gens_df.bypass_heat_recovery.replace( - to_replace="X", value="N" - ) - - # A subset of the columns have "U" values, presumably for "Unknown," which - # must be set to None in order to convert the columns to datatype Boolean. - - gens_df.multiple_fuels = gens_df.multiple_fuels.replace(to_replace="U", value=None) - gens_df.switch_oil_gas = gens_df.switch_oil_gas.replace(to_replace="U", value=None) - + nulls_replace_cols = { + col: {" ": np.nan, 0: np.nan} + for col in [ + "planned_generator_retirement_month", + "planned_generator_retirement_year", + "planned_uprate_month", + "planned_uprate_year", + "other_modifications_month", + "other_modifications_year", + "planned_derate_month", + "planned_derate_year", + "planned_repower_month", + "planned_repower_year", + "planned_net_summer_capacity_derate_mw", + "planned_net_summer_capacity_uprate_mw", + "planned_net_winter_capacity_derate_mw", + "planned_net_winter_capacity_uprate_mw", + "planned_new_capacity_mw", + "nameplate_power_factor", + "minimum_load_mw", + "winter_capacity_mw", + "summer_capacity_mw", + ] + } boolean_columns_to_fix = [ "duct_burners", "multiple_fuels", @@ -315,27 +291,32 @@ def _core_eia860__generators( "ferc_exempt_wholesale_generator", "ferc_qualifying_facility", ] - - for column in boolean_columns_to_fix: - gens_df[column] = ( - gens_df[column] - .fillna("NaN") - .replace(to_replace=["Y", "N", "NaN"], value=[True, False, pd.NA]) - ) - + # Most boolean columns have either "Y" for True or "N" for False. + # A subset of the columns have "X" values which represents a False value. + # A subset of the columns have "U" values, presumably for "Unknown," which + # must be set to None in order to convert the columns to datatype Boolean. + fillna_cols = {col: pd.NA for col in boolean_columns_to_fix} + boolean_replace_cols = { + col: {"Y": True, "N": False, "X": False, "U": pd.NA} + for col in boolean_columns_to_fix + } gens_df = ( - gens_df.pipe(pudl.helpers.month_year_to_date) + pd.concat([ge_df, gp_df, gr_df, g_df], sort=True) + .dropna(subset=["generator_id", "plant_id_eia"]) + .pipe(pudl.helpers.fix_eia_na) + .fillna(fillna_cols) + .replace(to_replace=nulls_replace_cols | boolean_replace_cols) + .pipe(pudl.helpers.month_year_to_date) .pipe( pudl.helpers.simplify_strings, columns=["rto_iso_lmp_node_id", "rto_iso_location_wholesale_reporting_id"], ) .pipe(pudl.helpers.convert_to_date) - ) - - gens_df = ( - pudl.metadata.classes.Package.from_resource_ids() - .get_resource("core_eia860__scd_generators") - .encode(gens_df) + .pipe( + pudl.metadata.classes.Package.from_resource_ids() + .get_resource("core_eia860__scd_generators") + .encode + ) ) gens_df["fuel_type_code_pudl"] = gens_df.energy_source_code_1.str.upper().map( diff --git a/src/pudl/transform/eia860m.py b/src/pudl/transform/eia860m.py new file mode 100644 index 0000000000..cafd1dcc13 --- /dev/null +++ b/src/pudl/transform/eia860m.py @@ -0,0 +1,113 @@ +"""Module to perform data cleaning functions on EIA860m data tables.""" + +import pandas as pd +from dagster import asset + +import pudl + +logger = pudl.logging_helpers.get_logger(__name__) + + +@asset(io_manager_key="pudl_io_manager") +def core_eia860m__changelog_generators( + raw_eia860m__generator_proposed, + raw_eia860m__generator_existing, + raw_eia860m__generator_retired, +): + """Changelog of EIA860m Generators based on operating status. + + The monthly reported EIA80m tables includes existing, proposed and retired + generators. This table combines all monthly reported data and preserves the first + reported record when any new information about the generator was reported. + + We are not putting this table through PUDL's standard normalization process for EIA + tables (see :func:pudl.transform.eia.harvest_entity_tables). EIA-860m includes + provisional data reported monthly so it changes frequently compared to the more + stable annually reported EIA data. If we fed all of the EIA-860m data into the + harvesting process, we would get failures because the records from EIA-80m are too + inconsistent for our thresholds for harvesting canonical values for entities. A + ramification of this table not being harvested is that if there are any entities + (generators, plants, utilities) that were only ever reported in an older 860m file, + there will be no record of it in the PUDL entity or scd tables. Therefor, this + asset cannot have foreign key relationships with the rest of the core EIA tables. + """ + # compile all of the columns so these 860m bbs have everything for the transform + eia860_columns = pudl.helpers.dedupe_n_flatten_list_of_lists( + [ + pudl.extract.excel.Metadata("eia860").get_all_columns(gen_table) + for gen_table in [ + "generator_proposed", + "generator_existing", + "generator_retired", + "generator", + ] + ] + ) + eia860m_all = pudl.transform.eia860._core_eia860__generators( + raw_eia860__generator_proposed=raw_eia860m__generator_proposed, + raw_eia860__generator_existing=raw_eia860m__generator_existing, + raw_eia860__generator_retired=raw_eia860m__generator_retired.assign( + operational_status_code=pd.NA + ), + # pass an empty genertor df here. 860 old years had one big gens tab + # but 860m doesn't. we do this just to enable us to run the 860 transform + # function. We add all of the columns to it so we don't have any errors + # from missing columns + raw_eia860__generator=pd.DataFrame( + columns=list(eia860_columns) + ).convert_dtypes(), + )[ + # drop all the non 860m cols + [ + field.name + for field in pudl.metadata.classes.Resource.from_id( + "core_eia860m__changelog_generators" + ).schema.fields + if field.name != "valid_till_date" + ] + ] + # there is one plant/gen that has duplicate values + gens_idx = ["plant_id_eia", "generator_id", "report_date"] + dupe_mask = (eia860m_all.plant_id_eia == 56032) & (eia860m_all.generator_id == "1") + deduped = eia860m_all[dupe_mask].drop_duplicates(subset=gens_idx, keep="first") + without_known_dupes = eia860m_all[~dupe_mask] + eia860m_deduped = pd.concat([without_known_dupes, deduped]) + + # Check whether we have truly deduplicated the dataframe. + remaining_dupes = eia860m_deduped[ + eia860m_deduped.duplicated(subset=gens_idx, keep=False) + ] + if not remaining_dupes.empty: + raise ValueError( + f"Duplicate ownership slices found in 860m table: {remaining_dupes}" + ) + + gen_idx_no_date = [c for c in gens_idx if c != "report_date"] + eia860m_all = pudl.helpers.expand_timeseries( + df=eia860m_deduped, + key_cols=gen_idx_no_date, + date_col="report_date", + freq="MS", + fill_through_freq="month", + ) + + # assign a max report_date column for use in the valid_till_date column + eia860m_all["report_date_max"] = eia860m_all.groupby(gen_idx_no_date)[ + "report_date" + ].transform("max") + # drop duplicates after sorting by date so we get the first appreance + eia860m_changelog = eia860m_all.sort_values( + by=["report_date"], ascending=True + ).drop_duplicates( + subset=[c for c in eia860m_all if c != "report_date"], + keep="first", + ) + + eia860m_changelog["valid_till_date"] = ( + eia860m_changelog.sort_values(gens_idx, ascending=False) + .groupby(gen_idx_no_date)["report_date"] + .transform("shift") + .fillna(eia860m_changelog.report_date_max) + .where(eia860m_changelog["report_date"] != eia860m_changelog["report_date_max"]) + ) + return eia860m_changelog diff --git a/test/data/date_merge_unit_test/timeseries_fillin_through_month_expected_out.csv b/test/data/date_merge_unit_test/timeseries_fillin_through_month_expected_out.csv index 3267eb3128..d267aa79dd 100644 --- a/test/data/date_merge_unit_test/timeseries_fillin_through_month_expected_out.csv +++ b/test/data/date_merge_unit_test/timeseries_fillin_through_month_expected_out.csv @@ -1,4 +1,4 @@ -plant_id_eia,generator_id,report_date,data +plant_id_eia,generator_id,report_date,data 1,1,2019-12-30,1 1,1,2019-12-31,1 1,1,2020-01-01,1 @@ -39,37 +39,38 @@ 1,2,2020-01-29,1 1,2,2020-01-30,1 1,2,2020-01-31,1 -2,1,2020-02-27,3 -2,1,2020-02-28,3 -2,1,2020-02-29,3 -2,1,2020-03-01,4 -2,1,2020-03-02,4 -2,1,2020-03-03,4 -2,1,2020-03-04,4 -2,1,2020-03-05,4 -2,1,2020-03-06,4 -2,1,2020-03-07,4 -2,1,2020-03-08,4 -2,1,2020-03-09,4 -2,1,2020-03-10,4 -2,1,2020-03-11,4 -2,1,2020-03-12,4 -2,1,2020-03-13,4 -2,1,2020-03-14,4 -2,1,2020-03-15,4 -2,1,2020-03-16,4 -2,1,2020-03-17,4 -2,1,2020-03-18,4 -2,1,2020-03-19,4 -2,1,2020-03-20,4 -2,1,2020-03-21,4 -2,1,2020-03-22,4 -2,1,2020-03-23,4 -2,1,2020-03-24,4 -2,1,2020-03-25,4 -2,1,2020-03-26,4 -2,1,2020-03-27,4 -2,1,2020-03-28,4 -2,1,2020-03-29,4 -2,1,2020-03-30,4 -2,1,2020-03-31,4 +2,1,2020-11-27,3 +2,1,2020-11-28,3 +2,1,2020-11-29,3 +2,1,2020-11-30,3 +2,1,2020-12-01,4 +2,1,2020-12-02,4 +2,1,2020-12-03,4 +2,1,2020-12-04,4 +2,1,2020-12-05,4 +2,1,2020-12-06,4 +2,1,2020-12-07,4 +2,1,2020-12-08,4 +2,1,2020-12-09,4 +2,1,2020-12-10,4 +2,1,2020-12-11,4 +2,1,2020-12-12,4 +2,1,2020-12-13,4 +2,1,2020-12-14,4 +2,1,2020-12-15,4 +2,1,2020-12-16,4 +2,1,2020-12-17,4 +2,1,2020-12-18,4 +2,1,2020-12-19,4 +2,1,2020-12-20,4 +2,1,2020-12-21,4 +2,1,2020-12-22,4 +2,1,2020-12-23,4 +2,1,2020-12-24,4 +2,1,2020-12-25,4 +2,1,2020-12-26,4 +2,1,2020-12-27,4 +2,1,2020-12-28,4 +2,1,2020-12-29,4 +2,1,2020-12-30,4 +2,1,2020-12-31,4 diff --git a/test/unit/helpers_test.py b/test/unit/helpers_test.py index 6b99e15683..a4301d53d2 100644 --- a/test/unit/helpers_test.py +++ b/test/unit/helpers_test.py @@ -411,8 +411,8 @@ def test_timeseries_fillin_through_month(test_dir): "2019-12-30", "2020-01-02", "2020-01-25", - "2020-02-27", - "2020-03-01", + "2020-11-27", + "2020-12-01", ], "plant_id_eia": [1, 1, 1, 2, 2], "generator_id": [1, 1, 2, 1, 1], diff --git a/test/unit/settings_test.py b/test/unit/settings_test.py index e858b9871d..5909b73e56 100644 --- a/test/unit/settings_test.py +++ b/test/unit/settings_test.py @@ -11,6 +11,7 @@ from pudl.resources import dataset_settings from pudl.settings import ( DatasetsSettings, + Eia860mSettings, Eia860Settings, Eia923Settings, EiaSettings, @@ -133,12 +134,22 @@ class TestEIA860Settings: def test_860m(self: Self): """Test validation error is raised when eia860m date is within 860 years.""" settings_cls = Eia860Settings - original_eia80m_date = settings_cls.eia860m_date - settings_cls.eia860m_date = "2019-11" + original_eia80m_year_month = settings_cls.eia860m_year_month + settings_cls.eia860m_year_month = "2019-11" with pytest.raises(ValidationError): settings_cls(eia860m=True) - settings_cls.eia860m_date = original_eia80m_date + settings_cls.eia860m_year_month = original_eia80m_year_month + + +class TestEia860mSettings: + """Test EIA860m settings.""" + + def test_all_year_quarters(self: Self): + """Test the `all` option for the eia860m settings.""" + settings_all = Eia860mSettings(year_months=["all"]).year_months + partitions_all = DataSource.from_id("eia860m").working_partitions["year_months"] + assert settings_all == partitions_all class TestEiaSettings: @@ -277,9 +288,9 @@ def test_partitions_with_json_normalize(pudl_etl_settings): ) cems_parts = json_normalize(datasets["epacems"].partitions) - if list(cems_parts.columns) != ["year_quarters"]: + if list(cems_parts.columns) != ["year_quarter"]: raise AssertionError( - "CEMS paritions should have year_quarters columns only, found:" + "CEMS paritions should have year_quarter columns only, found:" f"{cems_parts}" )