Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New waterwebservices #124

Draft
wants to merge 23 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions ddlpy/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ def cli(verbose, args=None):
help='Station codes, e.g. HOEKVHLD',
multiple=True
)
@click.option(
'--procestype',
help='Procestype, e.g. meting, astronomisch, verwachting',
multiple=True
)
@click.option(
'--grootheid-code',
help='Grootheid code, e.g. WATHTE',
Expand Down Expand Up @@ -69,6 +74,7 @@ def cli(verbose, args=None):
)
def locations(output,
station,
procestype,
grootheid_code,
groepering_code,
hoedanigheid_code,
Expand All @@ -82,7 +88,8 @@ def locations(output,
locations_df = ddlpy.locations()

stations = station
quantities = {'Grootheid.Code': list(grootheid_code),
quantities = {'ProcesType':list(procestype),
'Grootheid.Code': list(grootheid_code),
'Groepering.Code': list(groepering_code),
'Hoedanigheid.Code': list(hoedanigheid_code),
'Eenheid.Code': list(eenheid_code),
Expand Down Expand Up @@ -128,24 +135,23 @@ def measurements(locations, start_date, end_date):
except:
raise ValueError('locations.json file not found. First run "ddlpy locations"')

for obs in range(locations_df.shape[0]): #goes through rows in table
selected = locations_df.loc[obs]

for irow, selected in locations_df.iterrows(): #goes through rows in table
measurements = ddlpy.measurements(
selected, start_date=start_date, end_date=end_date)

if (len(measurements) > 0):
if len(measurements) > 0:
print('Measurements of %s were obtained' % selected['Code'])
station = selected['Code']
pt = selected['ProcesType']
cc = selected['Compartiment.Code']
ec = selected['Eenheid.Code']
gc = selected['Grootheid.Code']
grc = selected['Groepering.Code']
hc = selected['Hoedanigheid.Code']
pc = selected['Parameter.Code']

measurements.to_csv('%s_%s_%s_%s_%s_%s_%s.csv' %
(station, cc, ec, gc, grc, hc, pc))
measurements.to_csv('%s_%s_%s_%s_%s_%s_%s_%s.csv' %
(station, pt ,cc, ec, gc, grc, hc, pc))
else:
print('No Data of station %s were retrieved from Water Info' %
selected['Code'])
Expand Down
41 changes: 23 additions & 18 deletions ddlpy/ddlpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,26 @@ class UnsuccessfulRequestError(ValueError):
def _send_post_request(url, request, timeout=None):
logger.debug("Requesting at {} with request: {}".format(url, json.dumps(request)))
resp = requests.post(url, json=request, timeout=timeout)
if not resp.ok:
raise IOError("Request failed: {}".format(resp.text))

if resp.status_code==204:
# this error is raised here, but catched in ddlpy.ddlpy.measurements() so the process can continue.
raise NoDataError(resp.reason)

result = resp.json()
if not result['Succesvol']:
if not resp.ok:
# bijv Foutmelding: "Het max aantal waarnemingen (160000) is overschreven. Beperk uw request."
logger.debug('Response result is unsuccessful: {}'.format(result))
error_message = result.get('Foutmelding', 'No error returned')
if error_message == "Geen gegevens gevonden!":
# Foutmelding: "Geen gegevens gevonden!"
# this is a valid response for periods where there is no data
# this error is raised here, but catched in ddlpy.ddlpy.measurements() so the process can continue.
raise NoDataError(error_message)
else:
# Foutmelding: "Het max aantal waarnemingen (157681) is overschreven, beperk uw request."
# or any other possible error message are raised here
raise UnsuccessfulRequestError(error_message)
raise IOError("Request failed: {}".format(error_message))

if not result['Succesvol']:
# TODO: this is probably never reached anymore. Ask whether Succesvol can be removed from the response
# if not it can be false if resp.ok=True, add a testcase
logger.debug('Response result is unsuccessful: {}'.format(result))
error_message = result.get('Foutmelding', 'No error returned')
# or any other possible error message are raised here
raise UnsuccessfulRequestError(error_message)

# continue if request was successful
return result

Expand All @@ -66,6 +69,8 @@ def catalog(catalog_filter=None):
request = endpoint["request"]
else:
assert isinstance(catalog_filter, list)
# TODO: incorrect filter keys results in empty catalog instead of proper error
# https://github.com/Rijkswaterstaat/wm-ws-dl/issues/44
request = {"CatalogusFilter": {x:True for x in catalog_filter}}

result = _send_post_request(endpoint["url"], request, timeout=None)
Expand Down Expand Up @@ -142,10 +147,7 @@ def _get_request_dicts(location):

# generate location dict from relevant values
locatie_dict = {
"X": location["X"],
"Y": location["Y"],
# assert code is used as index
# TODO: use a numpy compatible json encoder in requests
"Code": location.get("Code", location.name),
}

Expand Down Expand Up @@ -261,7 +263,10 @@ def measurements_amount(location:pd.Series, start_date:(str,pd.Timestamp), end_d
df = df.set_index("Groeperingsperiode")
df = df[["AantalMetingen"]]
df_list.append(df)


if len(df_list) == 0:
raise NoDataError("no measurements available returned")

# concatenate and sum duplicated index
df_amount = pd.concat(df_list).sort_index()
df_amount = df_amount.groupby(df_amount.index).sum()
Expand Down Expand Up @@ -311,8 +316,8 @@ def _combine_waarnemingenlijst(result, location):
for name in [
"Coordinatenstelsel",
"Naam",
"X",
"Y",
"Lon",
"Lat",
]:
df[name] = location[name]

Expand Down
41 changes: 14 additions & 27 deletions ddlpy/endpoints.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"collect_catalogue": {
"name": "OphalenCatalogus",
"url": "https://waterwebservices.rijkswaterstaat.nl/METADATASERVICES_DBO/OphalenCatalogus",
"url": "https://waterwebservices.beta.rijkswaterstaat.nl/test/METADATASERVICES/OphalenCatalogus",
"type": "POST",
"request": {
"CatalogusFilter": {
Expand All @@ -10,20 +10,19 @@
"Hoedanigheden": true,
"Groeperingen": true,
"Parameters": true,
"ProcesTypes": true,
"Compartimenten": true
}
}
},
"collect_observations": {
"name": "OphalenWaarnemingen",
"url": "https://waterwebservices.rijkswaterstaat.nl/ONLINEWAARNEMINGENSERVICES_DBO/OphalenWaarnemingen",
"url": "https://waterwebservices.beta.rijkswaterstaat.nl/test/ONLINEWAARNEMINGENSERVICES/OphalenWaarnemingen",
"type": "POST",
"_comment": "Make sure you specify the request location to the exact number of decimals and the time in the exact format (samen number of zeros).",
"request": {
"Locatie": {
"X": 761899.770959577,
"Y": 5915790.48491405,
"Code": "DELFZL"
"Code": "delfzijl"
},
"AquoPlusWaarnemingMetadata": {
"AquoMetadata": {
Expand All @@ -46,7 +45,7 @@
},
"collect_latest_observations": {
"name": "OphalenLaatsteWaarnemingen",
"url": "https://waterwebservices.rijkswaterstaat.nl/ONLINEWAARNEMINGENSERVICES_DBO/OphalenLaatsteWaarnemingen",
"url": "https://waterwebservices.beta.rijkswaterstaat.nl/test/ONLINEWAARNEMINGENSERVICES/OphalenLaatsteWaarnemingen",
"type": "POST",
"_comment": "Make sure you specify the request location to the exact number of decimals.",
"request": {
Expand All @@ -67,16 +66,14 @@
],
"LocatieLijst": [
{
"X": 518882.333320247,
"Y": 5760829.11729589,
"Code": "EURPFM"
"Code": "europlatform"
}
]
}
},
"check_observations_available": {
"name": "CheckWaarnemingenAanwezig",
"url": "https://waterwebservices.rijkswaterstaat.nl/ONLINEWAARNEMINGENSERVICES_DBO/CheckWaarnemingenAanwezig",
"url": "https://waterwebservices.beta.rijkswaterstaat.nl/test/ONLINEWAARNEMINGENSERVICES/CheckWaarnemingenAanwezig",
"type": "POST",
"_comment": "Make sure you specify the request location to the exact number of decimals and the time in the exact format (samen number of zeros).",
"request": {
Expand All @@ -92,9 +89,7 @@
],
"LocatieLijst": [
{
"X": 518882.333320247,
"Y": 5760829.11729589,
"Code": "EURPFM"
"Code": "europlatform"
}
],
"Periode": {
Expand All @@ -105,7 +100,7 @@
},
"collect_number_of_observations": {
"name": "OphalenAantalWaarnemingen",
"url": "https://waterwebservices.rijkswaterstaat.nl/ONLINEWAARNEMINGENSERVICES_DBO/OphalenAantalWaarnemingen",
"url": "https://waterwebservices.beta.rijkswaterstaat.nl/test/ONLINEWAARNEMINGENSERVICES/OphalenAantalWaarnemingen",
"type": "POST",
"request": {
"AquoMetadataLijst": [
Expand All @@ -117,9 +112,7 @@
"Groeperingsperiode": "Week",
"LocatieLijst": [
{
"X": 518882.333320247,
"Y": 5760829.11729589,
"Code": "EURPFM"
"Code": "europlatform"
}
],
"Periode": {
Expand All @@ -130,7 +123,7 @@
},
"request_bulk_observations": {
"name": "AanvragenBulkWaarnemingen",
"url": "https://waterwebservices.rijkswaterstaat.nl/BULKWAARNEMINGSERVICES_DBO/AanvragenBulkWaarnemingen",
"url": "https://waterwebservices.beta.rijkswaterstaat.nl/test/BULKWAARNEMINGSERVICES/AanvragenBulkWaarnemingen",
"type": "POST",
"_comment": "Make sure you specify the request location to the exact number of decimals and the time in the exact format (samen number of zeros).",
"request": {
Expand Down Expand Up @@ -172,19 +165,13 @@
],
"LocatieLijst": [
{
"X": 742469.913149676,
"Y": 5940708.14824459,
"Code": "HUIBGOT"
"Code": "huibertgat.oost"
},
{
"X": 595875.376191307,
"Y": 5790952.82210343,
"Code": "NOORDWK2"
"Code": "noordwijk.2kmuitdekust.flachsee"
},
{
"X": 571670.054611366,
"Y": 5822651.05560318,
"Code": "IJMDMNTSPS"
"Code": "ijmuiden.munitiestort.1"
}
],
"Periode": {
Expand Down
2 changes: 1 addition & 1 deletion ddlpy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def dataframe_to_xarray(df: pd.DataFrame, drop_if_constant=[]):
cols_onlynvt_code = [x for x in cols_onlynvt_code if x.endswith(".Code")]

# create list of location columns, will be dropped (added as ds.attrs)
cols_location = ['Code', 'Naam', 'Coordinatenstelsel', 'X', 'Y']
cols_location = ['Code', 'Naam', 'Coordinatenstelsel', 'Lon', 'Lat']

# add drop_if_constant colums to list if values are indeed constant, will be dropped (added as ds.attrs)
cols_constant = []
Expand Down
11 changes: 7 additions & 4 deletions docs/examples/minimal_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,17 @@

#select a set of parameters
# Filter the locations dataframe with the desired parameters and stations.
bool_stations = locations.index.isin(['IJMDBTHVN', 'DANTZGZD','HOEKVHLD'])
# measured (WATHTE) versus computed/astro
bool_stations = locations.index.isin(['ijmuiden.buitenhaven', 'dantziggat.zuid', 'hoekvanholland', 'ameland.nes'])
# meting/astronomisch/verwachting
bool_procestype = locations['ProcesType'].isin(['meting'])
# waterlevel/waterhoogte (WATHTE)
bool_grootheid = locations['Grootheid.Code'].isin(['WATHTE'])
# timeseries (NVT) versus extremes
bool_groepering = locations['Groepering.Code'].isin(['NVT'])
bool_groepering = locations['Groepering.Code'].isin([''])
# vertical reference (NAP/MSL)
bool_hoedanigheid = locations['Hoedanigheid.Code'].isin(['NAP'])
selected = locations.loc[bool_stations & bool_grootheid &
selected = locations.loc[bool_procestype &
bool_stations & bool_grootheid &
bool_groepering & bool_hoedanigheid]

start_date = dt.datetime(2023, 1, 1)
Expand Down
15 changes: 6 additions & 9 deletions docs/examples/retrieve_parallel_to_netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@


def get_data(location, start_date, end_date, dir_output, overwrite=True):

station_id = location.name
station_messageid = location["Locatie_MessageID"]
filename = os.path.join(dir_output, f"{station_id}-{station_messageid}.nc")
Expand Down Expand Up @@ -44,19 +43,18 @@ def get_data(location, start_date, end_date, dir_output, overwrite=True):
ds.to_netcdf(filename, format="NETCDF4_CLASSIC")


if ( __name__ == "__main__" ):

if __name__ == "__main__":
dir_output = './ddl_retrieved_data'
os.makedirs(dir_output, exist_ok=True)

# get locations
locations = ddlpy.locations()
bool_stations = locations.index.isin(['IJMDBTHVN', 'DANTZGZD', 'HOEKVHLD', 'VLISSGN', 'HOEK', 'VLIS', "OLST"])
bool_grootheid = locations['Grootheid.Code'].isin(['WATHTE']) # measured (WATHTE) versus computed/astro
bool_groepering = locations['Groepering.Code'].isin(['NVT']) # timeseries (NVT) versus extremes
bool_stations = locations.index.isin(['ijmuiden.buitenhaven', 'dantziggat.zuid', 'hoekvanholland', 'ameland.nes', 'vlissingen', 'olst'])
bool_procestype = locations['ProcesType'].isin(['meting']) # meting/astronomisch/verwachting
bool_grootheid = locations['Grootheid.Code'].isin(['WATHTE']) # waterlevel (WATHTE)
bool_groepering = locations['Groepering.Code'].isin(['']) # timeseries (NVT) versus extremes
bool_hoedanigheid = locations['Hoedanigheid.Code'].isin(['NAP']) # vertical reference (NAP/MSL)
selected = locations.loc[bool_stations & bool_grootheid & bool_groepering & bool_hoedanigheid]

selected = locations.loc[bool_stations & bool_procestype & bool_grootheid & bool_groepering & bool_hoedanigheid]

start_date = dt.datetime(2022, 1, 1)
end_date = dt.datetime(2022, 3, 1)
Expand All @@ -70,7 +68,6 @@ def get_data(location, start_date, end_date, dir_output, overwrite=True):
for station_code, location in selected.iterrows():
executor.submit(get_data, location, start_date, end_date, dir_output)


file_list = glob.glob(os.path.join(dir_output, "*.nc"))
fig, ax = plt.subplots()
for file_nc in file_list:
Expand Down
Loading
Loading