Skip to content

Commit

Permalink
Fix read json
Browse files Browse the repository at this point in the history
  • Loading branch information
thodson-usgs committed Jan 9, 2024
1 parent 3f164e8 commit d5b19a8
Showing 1 changed file with 74 additions and 52 deletions.
126 changes: 74 additions & 52 deletions dataretrieval/nwis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1306,60 +1306,82 @@ def _read_json(json):
"""
merged_df = pd.DataFrame(columns=['site_no', 'datetime'])

for timeseries in json['value']['timeSeries']:
site_no = timeseries['sourceInfo']['siteCode'][0]['value']
param_cd = timeseries['variable']['variableCode'][0]['value']
# check whether min, max, mean record XXX
option = timeseries['variable']['options']['option'][0].get('value')

# loop through each parameter in timeseries.
for parameter in timeseries['values']:
col_name = param_cd
method = parameter['method'][0]['methodDescription']

# if len(timeseries['values']) > 1 and method:
if method:
# get method, format it, and append to column name
method = method.strip('[]()').lower()
col_name = f'{col_name}_{method}'

if option:
col_name = f'{col_name}_{option}'

record_json = parameter['value']

if not record_json:
# no data in record
continue
# should be able to avoid this by dumping
record_json = str(record_json).replace("'", '"')

# read json, converting all values to float64 and all qualifiers
# Lists can't be hashed, thus we cannot df.merge on a list column
record_df = pd.read_json(
StringIO(record_json),
orient='records',
dtype={'value': 'float64', 'qualifiers': 'unicode'},
convert_dates=False,
)
site_list = [
ts['sourceInfo']['siteCode'][0]['value'] for ts in json['value']['timeSeries']
]

record_df['qualifiers'] = (
record_df['qualifiers'].str.strip('[]').str.replace("'", '')
)
record_df['site_no'] = site_no

record_df.rename(
columns={
'value': col_name,
'dateTime': 'datetime',
'qualifiers': col_name + '_cd',
},
inplace=True,
)
# create a list of indexes for each change in site no
# for example, [0, 21, 22] would be the first and last indeces
index_list = [0]
index_list.extend(
[i + 1 for i, (a, b) in enumerate(zip(site_list[:-1], site_list[1:])) if a != b]
)
index_list.append(len(site_list))

for i in range(len(index_list) - 1):
start = index_list[i] # [0]
end = index_list[i + 1] # [21]

# grab a block containing timeseries 0:21,
# which are all from the same site
site_block = json['value']['timeSeries'][start:end]
site_no = site_block[0]['sourceInfo']['siteCode'][0]['value']
site_df = pd.DataFrame(columns=['datetime'])

for timeseries in site_block:
param_cd = timeseries['variable']['variableCode'][0]['value']
# check whether min, max, mean record XXX
option = timeseries['variable']['options']['option'][0].get('value')

# loop through each parameter in timeseries, then concat to the merged_df
for parameter in timeseries['values']:
col_name = param_cd
method = parameter['method'][0]['methodDescription']

# if len(timeseries['values']) > 1 and method:
if method:
# get method, format it, and append to column name
method = method.strip('[]()').lower()
col_name = f'{col_name}_{method}'

if option:
col_name = f'{col_name}_{option}'

record_json = parameter['value']

if not record_json:
# no data in record
continue
# should be able to avoid this by dumping
record_json = str(record_json).replace("'", '"')

# read json, converting all values to float64 and all qualifiers
# Lists can't be hashed, thus we cannot df.merge on a list column
record_df = pd.read_json(
StringIO(record_json),
orient='records',
dtype={'value': 'float64', 'qualifiers': 'unicode'},
convert_dates=False,
)

merged_df = merged_df.merge(
record_df, how='outer', on=['site_no', 'datetime']
)
record_df['qualifiers'] = (
record_df['qualifiers'].str.strip('[]').str.replace("'", '')
)

record_df.rename(
columns={
'value': col_name,
'dateTime': 'datetime',
'qualifiers': col_name + '_cd',
},
inplace=True,
)

site_df = site_df.merge(record_df, how='outer', on='datetime')

# end of site loop
site_df['site_no'] = site_no
merged_df = pd.concat([merged_df, site_df])

# convert to datetime, normalizing the timezone to UTC when doing so
if 'datetime' in merged_df.columns:
Expand Down

0 comments on commit d5b19a8

Please sign in to comment.