diff --git a/test/files/lcra/waterquality/12147_params.html b/test/files/lcra/waterquality/12147_params.html new file mode 100644 index 00000000..5e7e2229 --- /dev/null +++ b/test/files/lcra/waterquality/12147_params.html @@ -0,0 +1,199 @@ + + + + + + + waterquality.lcra.org + + + + + + + + + + +
+ +
+ +
+
+ +
+ +
+ + + +
+ +
+ + + + + + + + + + + +
+ + + SAN BERNARD RIVER MID CHANNEL 60 M DOWNSTREAM OF FM 442 BRIDGE SW OF NEEDVILLE
+ Monitored By:  

+ Texas Commission on Environmental Quality __ +
+  List results (tabular is default)
+  Include profile data (surface is default) +
+
+
+ +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  + Select All: + Parameter CodeDescription
Field and Flow
 72053DAYS SINCE PRECIPITATION EVENT (DAYS)
 82903DEPTH OF BOTTOM OF WATER BODY AT SAMPLE SITE MET
 89979EVIDENCE OF PRIMARY CONTACT RECREATION
 00061FLOW STREAM, INSTANTANEOUS (CUBIC FEET PER SEC)
 89835FLOW MTH 1=GAGE 2=ELEC 3=MECH 4=WEIR/FLU 5=DOPPL
 01351FLOW:1=No Flow,2=Low,3=Normal,4=Flood,5=High,6=D
 00300OXYGEN, DISSOLVED (MG/L)
 00400PH (STANDARD UNITS)
 89978PRIMARY CONTACT RECREATION OBSERVED
 00094SPECIFIC CONDUCTANCE,FIELD (UMHOS/CM @ 25C)
 00010TEMPERATURE, WATER (DEGREES CENTIGRADE)
 00078TRANSPARENCY, SECCHI DISC (METERS)
Bacteria
 31699E. COLI, COLILERT, IDEXX METHOD, MPN/100ML
 31704E.COLI, COLILERT, IDEXX, HOLDING TIME
Conventional Chemistry
 00410ALKALINITY, TOTAL (MG/L AS CACO3)
 00680CARBON, TOTAL ORGANIC, NPOC (TOC), MG/L
 00940CHLORIDE (MG/L AS CL)
 70953CHLOROPHYLL-A, PHYTOPLANKTON UG/L, CHROMO-FLOURO
 00951FLUORIDE, TOTAL (MG/L AS F)
 00630NITRITE PLUS NITRATE, TOTAL 1 DET. (MG/L AS N)
 00610NITROGEN, AMMONIA, TOTAL (MG/L AS N)
 00625NITROGEN, KJELDAHL, TOTAL (MG/L AS N)
 00593NO2 PLUS NO3-N, TOTAL, WHATMAN GF/F FILT (MG/L)
 00671ORTHPHOSPHATE PHOSPHORUS,DISS,MG/L,FLDFILT<15MIN
 00665PHOSPHORUS, TOTAL, WET METHOD (MG/L AS P)
 00530RESIDUE, TOTAL NONFILTRABLE (MG/L)
 00535RESIDUE, VOLATILE NONFILTRABLE (MG/L)
 70300RESIDUE,TOTAL FILTRABLE (DRIED AT 180C) (MG/L)
 00945SULFATE (MG/L AS SO4)
+
+ +
+ +
+
+ +
+
+ + + + + + +
+
+ + + + \ No newline at end of file diff --git a/test/files/lcra/waterquality/12147_results.html b/test/files/lcra/waterquality/12147_results.html new file mode 100644 index 00000000..b6cd5b39 --- /dev/null +++ b/test/files/lcra/waterquality/12147_results.html @@ -0,0 +1,135 @@ + + + + + + + waterquality.lcra.org + + + + + + + + + +
+ +
+ +
+
+ +
+ +
+ + +
+ +
+ + + + + + + + + + + +
+ + + SAN BERNARD RIVER MID CHANNEL 60 M DOWNSTREAM OF FM 442 BRIDGE SW OF NEEDVILLE
+ Monitored By:  

+ Texas Commission on Environmental Quality __
+
+ +
+ +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SiteDateDepth (m)LegendDAYS SINCE PRECIPDPTH BOT AT SITE METERSPRIMARY CONTACT EVIDENCESTREAM FLOW INST-CFSFLOW MEASUREM METHODSTREAM FLOW SEVERITYDO MG/LPH SUPRIMARY CONTACT # INDCNDUCTVY FIELD MICROMHOWATER TEMP CENTTRANSP SECCHI METERSE COLI IDEXX MPN100MLE.COLI COLILERT HoursT ALK CACO3 MG/LT ORG C C MG/LCHLORIDE CL MG/LCHLRPHYL A-PHYTO CHFLUG/LFLUORIDE F,TOTAL MG/LN02&N03 N-TOTAL MG/LNH3-N TOTAL MG/LTOT KJEL N MG/LNO2&NO3 N TOTWTM MG/LO-P DISS FIELDFIL MG/LPHOS-T P-WET MG/LRESIDUE TOT NFLT MG/LRESIDUE VOL NFLT MG/LRESIDUE DISS-180 MG/LSULFATE SO4 MG/L
121471/28/20100.3TCEQ >7  125138.67.8 40014.7 12081447575.87<0.250.770.16 0.770.180.34681036038
121474/8/20100.2TCEQ             10415081143.38<0.250.380.050.80.380.1080.2134850227
121474/8/20100.3TCEQ >70.5 19126.97.8 72621.20.12                 
121477/8/20100.3TCEQ <1  1740156.47.7 20026.5 2600581912 lt;0.250.29<0.051.050.290.230.4918325182<5
1214710/19/20100.3TCEQ >140.5 64137.47.9 620220.33664169 725.150.310.14<0.05 0.140.120.1714<437632
121471/5/20110.3TCEQ <10.2 27128.27.7 42015.10.1177041011254<3<0.250.96<0.051 0.220.3632 32325
121474/6/20110.3TCEQ >70.6 14127.68 183019.10.35252924381<3 0.410.160.78 0.140.1922<4118062
121477/26/20110.3TCEQ >7  26 26.48 58729.60.214031788 3.420.490.25<0.050.7  0.23355 21
1214710/19/20110.1TCEQ 100.3 38127.68.1 63019.50.15                 
1214710/19/20110.3TCEQ             180417513 1.91 0.22 0.96  0.25305  
121471/12/20120.3TCEQ 3  2140156.77.2 13013.70.05130043511101.5<0.250.86<0.051.14  0.469211 8
121474/25/20120.15TCEQ 50.3 14126.37.7 598220.09                 
+

+ 12 record(s) +
+
+ +
+
+ +
+
+ + + + + + + +
+
+ + + + + diff --git a/test/files/lcra/waterquality/stations.html b/test/files/lcra/waterquality/stations.html new file mode 100644 index 00000000..2bd97e50 --- /dev/null +++ b/test/files/lcra/waterquality/stations.html @@ -0,0 +1,87 @@ + + + + waterquality.lcra.org + + + + + + + + +
+ +
+ +
+
+ +
+ +
+ + +
+ +
+ + + + +
+ Select stream segment for multiple sites or choose an individual site.

+
+ +
+ + + + + + + + + + + + + + + + + + + + + + +
Site IDDescription
Segment 1301 San Bernard River Tidal
20460SAN BERNARD RIVER TIDAL AT SH 35 SOUTHWEST OF WEST COLUMBIA
12146SAN BERNARD RIVER TIDAL EAST BANK IMMEDIATELY UPSTREAM OF FM 2611
Segment 1302 San Bernard River Above Tidal
20723MOUND CREEK AT BRAZORIA CR 450/JACKSON SETTLEMENT ROAD 1.22 KILOMETERS UPSTREAM OF FM 1301 IN WEST OF WEST COLUMBIA
20722PEACH CREEK AT WHARTON CR 117/CHUDALLA ROAD/ARCHER ROAD 89 METERS SOUTH OF THE INTERSECTION OF WHARTON CR 117/CHUDALLA ROAD/ARCHER ROAD AND WHARTON CR 121/ WHARTON CR 119/DONALDSON ROAD IN EAST OF WHARTON
12517TRES PALACIOS CREEK AT FM 456
Segment 2002 Mission River Above Tidal
20062SARCO CREEK AT FM 2441 1.30 KM UPSTREAM OF THE ELKINS BRANCH CONFLUENCE
+
+ +
+ +
+
+ +
+
+ + + + + +
+ + + + + \ No newline at end of file diff --git a/test/lcra_waterquality_test.py b/test/lcra_waterquality_test.py new file mode 100644 index 00000000..544e545f --- /dev/null +++ b/test/lcra_waterquality_test.py @@ -0,0 +1,45 @@ + + +from ulmo.lcra.waterquality import get_stations, get_station_data +import test_util +import os + +def test_get_stations(): + service_info_url = 'http://waterquality.lcra.org/sitelist.aspx' + service_info_file = 'lcra/waterquality/stations.html' + + url_files = { + (service_info_url, ('GET',)): service_info_file, + } + + with test_util.mocked_urls(url_files): + stations = get_stations() + + assert len(stations) == 6 + assert "SH 35 SOUTHWEST" in stations['20460'] + +def test_get_station_data(): + os.environ["ULMO_TESTING"] ="1" + + service_info_url = 'http://waterquality.lcra.org/parameter.aspx?qrySite=12147' + service_info_file = 'lcra/waterquality/12147_params.html' + + service_data_url = 'http://waterquality.lcra.org/events.aspx' + service_data_file = 'lcra/waterquality/12147_results.html' + + url_files = { + (service_info_url, ('GET',)): service_info_file, + (service_data_url, ('POST',)): service_data_file, + } + + with test_util.mocked_urls(url_files): + results = get_station_data(12147) + + assert len(results) == 12 + for data in results: + assert data['Site'] == u'12147' + + del os.environ["ULMO_TESTING"] + + + diff --git a/ulmo/lcra/__init__.py b/ulmo/lcra/__init__.py new file mode 100644 index 00000000..67d8a236 --- /dev/null +++ b/ulmo/lcra/__init__.py @@ -0,0 +1 @@ +from . import waterquality \ No newline at end of file diff --git a/ulmo/lcra/waterquality/__init__.py b/ulmo/lcra/waterquality/__init__.py new file mode 100644 index 00000000..f8cb35ad --- /dev/null +++ b/ulmo/lcra/waterquality/__init__.py @@ -0,0 +1 @@ +from .core import get_stations, get_station_data diff --git a/ulmo/lcra/waterquality/core.py b/ulmo/lcra/waterquality/core.py new file mode 100644 index 00000000..aa6f47b6 --- /dev/null +++ b/ulmo/lcra/waterquality/core.py @@ -0,0 +1,197 @@ +""" + ulmo.lcra.waterquality.core + ~~~~~~~~~~~~~~~~~~~~~ + This module provides access to data provided by the `Lower Colorado + River Authority` _ `Water Quality`_ web site. + .. _United States Army Corps of Engineers: http://www.lcra.org + .. _Tulsa District Water Control: http://waterquality.lcra.org/ +""" +from bs4 import BeautifulSoup +import logging + +from ulmo import util + + + +import pickle + +import os + +# import datetime +import os.path as op + +LCRA_WATERQUALITY_DIR = op.join(util.get_ulmo_dir(), 'lcra/waterquality') + + +log = logging.getLogger(__name__) + +from bs4 import BeautifulSoup +import requests + + + + +# import numpy as np +# import pandas + + +# try: +# import cStringIO as StringIO +# except ImportError: +# import StringIO + + +def get_stations(): + """Fetches a list of station codes and descriptions. + Returns + ------- + stations_dict : dict + a python dict with station codes mapped to station information + """ + stations_url = 'http://waterquality.lcra.org/sitelist.aspx' + path = op.join(LCRA_WATERQUALITY_DIR, 'stationids.htm') + + response = requests.get(stations_url) + + soup = BeautifulSoup(response.content, 'html.parser') + gridview = soup.find(id="GridView1") + + stations = [ + (row.findAll('td')[0].string, row.findAll('td')[1].string) + for row in gridview.findAll('tr') + if len(row.findAll('td'))==2 + ] + + return dict(stations) + + +def get_station_data(station_code, date=None, as_dataframe=False): + """Fetches data for a station at a given date. + Parameters + ---------- + station_code: str + The station code to fetch data for. A list of stations can be retrieved with + ``get_stations()`` + date : ``None`` or date (see :ref:`dates-and-times`) + The date of the data to be queried. If date is ``None`` (default), then + data for the current day is retreived. + as_dataframe : bool + This determines what format values are returned as. If ``False`` + (default), the values dict will be a dict with timestamps as keys mapped + to a dict of gauge variables and values. If ``True`` then the values + dict will be a pandas.DataFrame object containing the equivalent + information. + Returns + ------- + data_dict : dict + A dict containing station information and values. + """ + + + if isinstance(station_code, (str)): + pass + elif isinstance(station_code, (int)): + station_code = str(station_code) + else: + log.error("Unsure of the station_code parameter type. \ + Try string or int") + raise + + if date: + log.info("Date parameter not implemented yet") + if as_dataframe: + log.info("as_dataframe parameter not implemented yet") + + + waterquality_url = "http://waterquality.lcra.org/parameter.aspx?qrySite=%s" %station_code + waterquality_url2 = 'http://waterquality.lcra.org/events.aspx' + + dir_path = op.join(LCRA_WATERQUALITY_DIR, str(station_code)) + + resp_path = op.join(dir_path, "resp.html") + + pickle_path = op.join(dir_path, "data.pickle") + + util.mkdir_if_doesnt_exist(dir_path) + + + + initial_request = requests.get(waterquality_url) + initialsoup = BeautifulSoup(initial_request.content, 'html.parser') + + # stationvals = [ statag.get('value', None) + # for statag in initialsoup.findAll(id="multiple") + # if statag.get('value', None) + # ] + + + result = _make_next_request(waterquality_url2, + initial_request, + {'site': station_code}) + + if op.exists(resp_path) and \ + util.misc._request_file_size_matches(result, resp_path)\ + and not os.environ.get('ULMO_TESTING', None): + #means nothing has changed return cached pickle + log.info("%s was not processed because it is the same size"%station_code) + try: + with open(pickle_path, 'rb') as f: + return pickle.load(f) + except IOError: + log.info("Couldn't find the pickle that should be there for \ + %s" %station_code) + pass + + + if not os.environ.get('ULMO_TESTING', None): + with open(resp_path, 'wb') as wf: + wf.write(result.content) + + + soup = BeautifulSoup(result.content, 'html.parser') + + gridview = soup.find(id="GridView1") + + + results = [] + headers = [head.text for head in gridview.findAll('th')] + + #uses \xa0 for blank + + for row in gridview.findAll('tr'): + vals = [_parse_val(aux.text) for aux in row.findAll('td')] + if len(vals) == 0: + continue + + results.append(dict(zip(headers, vals))) + + if not os.environ.get('ULMO_TESTING', None): + with open(pickle_path, 'wb') as mf: + pickle.dump(results, mf) + + return results + + +def _extract_headers_for_next_request(request): + payload = dict() + for tag in BeautifulSoup(request.content, 'html.parser').findAll('input'): + tag_dict = dict(tag.attrs) + #some tags don't have a value and are used w/ JS to toggle a set of checkboxes + payload[tag_dict['name']] = tag_dict.get('value') + return payload + + +def _make_next_request(url, previous_request, data): + data_headers = _extract_headers_for_next_request(previous_request) + data_headers.update(data) + return requests.post(url, cookies=previous_request.cookies, data=data_headers) + + +def _parse_val(val): + #the &nsbp translates to the following unicode + if val == u'\xa0': + return None + else: + return val + +