Skip to content

Commit

Permalink
add lcra water quality module
Browse files Browse the repository at this point in the history
  • Loading branch information
nathanhilbert committed Jul 22, 2015
1 parent d71cb71 commit 4c27192
Show file tree
Hide file tree
Showing 7 changed files with 665 additions and 0 deletions.
199 changes: 199 additions & 0 deletions test/files/lcra/waterquality/12147_params.html

Large diffs are not rendered by default.

135 changes: 135 additions & 0 deletions test/files/lcra/waterquality/12147_results.html

Large diffs are not rendered by default.

87 changes: 87 additions & 0 deletions test/files/lcra/waterquality/stations.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!-- saved from url=(0042)http://waterquality.lcra.org/sitelist.aspx -->
<html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>waterquality.lcra.org</title>
<link href="./waterquality.lcra.org_files/www09_consolidated2.css" rel="stylesheet" type="text/css">
<link rel="shortcut icon" href="http://waterquality.lcra.org/Images/faviconLCRA.ico" type="image/x-icon">
<script src="./waterquality.lcra.org_files/www09_top_menu.js" type="text/javascript"></script>
<style type="text/css"></style></head>


<body style="height: 100%; background-color:#033968; width:100%">

<div align="left">
<script type="text/javascript" language="javascript">showTopNavBar();</script><div id="MenuBar" class="NoPrint"><div class="NavButtons" id="logo"><a target="_new" href="http://www.lcra.org/"><img src="./waterquality.lcra.org_files/lcralogo.png" border="0"></a></div><div id="NavDivider"><img src="./waterquality.lcra.org_files/button_divider.png" border="0"></div><div class="NavButtons" id="NavAbout" onmouseover="navOn(&quot;NavAbout&quot;)" onmouseout="navOff(&quot;NavAbout&quot;)" style="background-image: url(http://waterquality.lcra.org/images/button_gradient.png);"><a href="http://www.lcra.org/about" target="_new"><img src="./waterquality.lcra.org_files/about.png" border="0"></a></div><div id="NavDivider"><img src="./waterquality.lcra.org_files/button_divider.png" border="0"></div><div class="NavButtons" id="NavEnergy" onmouseover="navOn(&quot;NavEnergy&quot;)" onmouseout="navOff(&quot;NavEnergy&quot;)" style="background-image: url(http://waterquality.lcra.org/images/button_gradient.png);"><a href="http://www.lcra.org/energy" target="_new"><img src="./waterquality.lcra.org_files/energy.png" border="0"></a></div><div id="NavDivider"><img src="./waterquality.lcra.org_files/button_divider.png" border="0"></div><div class="NavButtons" id="NavWater" onmouseover="navOn(&quot;NavWater&quot;)" onmouseout="navOff(&quot;NavWater&quot;)" style="background-image: url(http://waterquality.lcra.org/images/button_gradient.png);"><a href="http://www.lcra.org/water" target="_new"><img src="./waterquality.lcra.org_files/water.png" border="0"></a></div><div id="NavDivider"><img src="./waterquality.lcra.org_files/button_divider.png" border="0"></div><div class="NavButtons" id="NavParks" onmouseover="navOn(&quot;NavParks&quot;)" onmouseout="navOff(&quot;NavParks&quot;)"><a href="http://www.lcra.org/parks" target="_new"><img src="./waterquality.lcra.org_files/parks.png" border="0"></a></div><div id="NavDivider"><img src="./waterquality.lcra.org_files/button_divider.png" border="0"></div><div class="NavButtons" id="NavJobs" onmouseover="navOn(&quot;NavJobs&quot;)" onmouseout="navOff(&quot;NavJobs&quot;)" style="background-image: url(http://waterquality.lcra.org/images/button_gradient.png);"><a href="http://www.lcra.org/about/employment" target="_new"><img src="./waterquality.lcra.org_files/jobs.png" border="0"></a></div><div id="NavDivider"><img src="./waterquality.lcra.org_files/button_divider.png" border="0"></div><div class="NavButtons" id="NavContactUs" onmouseover="navOn(&quot;NavContactUs&quot;)" onmouseout="navOff(&quot;NavContactUs&quot;)"><a href="http://www.lcra.org/about/overview" target="_new"><img src="./waterquality.lcra.org_files/contact_us.png" border="0"></a></div><div id="NavDivider"><img src="./waterquality.lcra.org_files/button_divider.png" border="0"></div><div id="SearchBoxWrapper"><form name="gs" action="http://www.lcra.org/search" method="GET" target="_new"><table height="39px" border="0" align="right"><tbody><tr><td valign="middle"><img src="./waterquality.lcra.org_files/search.png" border="0"></td><td><input type="hidden" name="site" value="dotorg"><input type="hidden" name="client" value="dotorg_new"><input type="hidden" name="proxystylesheet" value="dotorg_new"><input type="hidden" name="output" value="xml_no_dtd"><input type="text" name="q" size="15" class="SearchInputBox"></td><td><input src="./waterquality.lcra.org_files/search_submit.png" name="search" type="image" alt="Click to submit your search">&nbsp;</td></tr></tbody></table></form></div></div>
</div>

<form name="form1" method="post" action="./waterquality.lcra.org_files/waterquality.lcra.org.html" id="form1" style="margin: 0px; padding: 0px;">
<div>
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUJNjg0NzA5NjQ2ZBgBBQlHcmlkVmlldzEPPCsADAEIAgFkdrUlLeK5FmsdI/Lbf1BTyvSFsC4S2WBBt6DwnxoC3UQ=">
</div>

<div>

<input type="hidden" name="__VIEWSTATEGENERATOR" id="__VIEWSTATEGENERATOR" value="F8CC5443">
</div>

<div style="padding:6px;">
<table style="width: 100%; background-color:Black;" cellpadding="4">
<tbody><tr>
<td width="50%">
<span class="HomepageColumnHeadings">Select stream segment for multiple sites or choose an individual site.</span><br><br>
</td>
</tr>
</tbody></table>

<div>
<table cellspacing="0" cellpadding="3" border="0" id="GridView1" style="font-size:Smaller;width:100%;border-collapse:collapse;">
<tbody><tr class="header" style="color:White;font-size:Small;font-weight:bold;">
<th class="header" scope="col">Site ID</th><th class="header" align="left" scope="col">Description</th>
</tr><tr style="color:White;background-color:Gainsboro;">
<td colspan="2" style="background-color:#033968;font-size:8pt;font-weight:bold;width:100%;"><a class="WhiteLinks" href="http://waterquality.lcra.org/parameter_segments.aspx?qrySegment=1301" target="_blank">Segment 1301 San Bernard River Tidal</a></td>
</tr><tr style="color:White;background-color:Gainsboro;">
<td align="center"><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20460" style="display:inline-block;color:Black;width:150px;">20460</a></td><td><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20460" style="color:Black;">SAN BERNARD RIVER TIDAL AT SH 35 SOUTHWEST OF WEST COLUMBIA</a></td>
</tr><tr style="color:White;background-color:#EBEBEB;">
<td align="center"><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=12146" style="display:inline-block;color:Black;width:150px;">12146</a></td><td><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=12146" style="color:Black;">SAN BERNARD RIVER TIDAL EAST BANK IMMEDIATELY UPSTREAM OF FM 2611</a></td>
</tr><tr style="color:White;background-color:Gainsboro;">
<td colspan="2" style="background-color:#033968;font-size:8pt;font-weight:bold;width:100%;"><a class="WhiteLinks" href="http://waterquality.lcra.org/parameter_segments.aspx?qrySegment=1302" target="_blank">Segment 1302 San Bernard River Above Tidal</a></td>
</tr><tr style="color:White;background-color:Gainsboro;">
<td align="center"><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20723" style="display:inline-block;color:Black;width:150px;">20723</a></td><td><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20723" style="color:Black;">MOUND CREEK AT BRAZORIA CR 450/JACKSON SETTLEMENT ROAD 1.22 KILOMETERS UPSTREAM OF FM 1301 IN WEST OF WEST COLUMBIA</a></td>
</tr><tr style="color:White;background-color:#EBEBEB;">
<td align="center"><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20722" style="display:inline-block;color:Black;width:150px;">20722</a></td><td><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20722" style="color:Black;">PEACH CREEK AT WHARTON CR 117/CHUDALLA ROAD/ARCHER ROAD 89 METERS SOUTH OF THE INTERSECTION OF WHARTON CR 117/CHUDALLA ROAD/ARCHER ROAD AND WHARTON CR 121/ WHARTON CR 119/DONALDSON ROAD IN EAST OF WHARTON</a></td>
</tr><tr style="color:White;background-color:#EBEBEB;">
<td align="center"><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=12517" style="display:inline-block;color:Black;width:150px;">12517</a></td><td><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=12517" style="color:Black;">TRES PALACIOS CREEK AT FM 456</a></td>
</tr><tr style="color:White;background-color:Gainsboro;">
<td colspan="2" style="background-color:#033968;font-size:8pt;font-weight:bold;width:100%;"><a class="WhiteLinks" href="http://waterquality.lcra.org/parameter_segments.aspx?qrySegment=2002" target="_blank">Segment 2002 Mission River Above Tidal</a></td>
</tr><tr style="color:White;background-color:Gainsboro;">
<td align="center"><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20062" style="display:inline-block;color:Black;width:150px;">20062</a></td><td><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20062" style="color:Black;">SARCO CREEK AT FM 2441 1.30 KM UPSTREAM OF THE ELKINS BRANCH CONFLUENCE</a></td>
</tr>
</tbody></table>
</div>

</div>

<div id="DoesNothingButCenter" align="center">
<div id="OuterWrapper">
<div id="Footer">
<a class="WhiteLinks" href="http://www.lcra.org/about/doing_business/index.html" target="_new">Purchasing</a> | <a class="WhiteLinks" href="http://www.lcra.org/about/overview/openrecords.html" target="_new">Open Records</a> | <a class="WhiteLinks" href="http://www.lcra.org/asklcra/" target="_new">Ask LCRA</a> | <a class="WhiteLinks" href="http://www.lcra.org/sitemap.html" target="_new">Sitemap</a>
<br>
© 1996-<script type="text/javascript"> var d = new Date(); document.write(d.getFullYear());</script>2015 Lower Colorado River Authority. All rights reserved.
</div> <!-- end "Footer" div -->
</div> <!-- end "OuterWrapper" div -->
</div> <!-- end "DoesNothingButCenter" div -->





</form>
<script src="./waterquality.lcra.org_files/urchin.js" type="text/javascript">
</script>
<script type="text/javascript">
_uacct = "UA-1180003-2";
urchinTracker();
</script>


</body></html>
45 changes: 45 additions & 0 deletions test/lcra_waterquality_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@


from ulmo.lcra.waterquality import get_stations, get_station_data
import test_util
import os

def test_get_stations():
service_info_url = 'http://waterquality.lcra.org/sitelist.aspx'
service_info_file = 'lcra/waterquality/stations.html'

url_files = {
(service_info_url, ('GET',)): service_info_file,
}

with test_util.mocked_urls(url_files):
stations = get_stations()

assert len(stations) == 6
assert "SH 35 SOUTHWEST" in stations['20460']

def test_get_station_data():
os.environ["ULMO_TESTING"] ="1"

service_info_url = 'http://waterquality.lcra.org/parameter.aspx?qrySite=12147'
service_info_file = 'lcra/waterquality/12147_params.html'

service_data_url = 'http://waterquality.lcra.org/events.aspx'
service_data_file = 'lcra/waterquality/12147_results.html'

url_files = {
(service_info_url, ('GET',)): service_info_file,
(service_data_url, ('POST',)): service_data_file,
}

with test_util.mocked_urls(url_files):
results = get_station_data(12147)

assert len(results) == 12
for data in results:
assert data['Site'] == u'12147'

del os.environ["ULMO_TESTING"]



1 change: 1 addition & 0 deletions ulmo/lcra/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from . import waterquality
1 change: 1 addition & 0 deletions ulmo/lcra/waterquality/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .core import get_stations, get_station_data
197 changes: 197 additions & 0 deletions ulmo/lcra/waterquality/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
"""
ulmo.lcra.waterquality.core
~~~~~~~~~~~~~~~~~~~~~
This module provides access to data provided by the `Lower Colorado
River Authority` _ `Water Quality`_ web site.
.. _United States Army Corps of Engineers: http://www.lcra.org
.. _Tulsa District Water Control: http://waterquality.lcra.org/
"""
from bs4 import BeautifulSoup
import logging

from ulmo import util



import pickle

import os

# import datetime
import os.path as op

LCRA_WATERQUALITY_DIR = op.join(util.get_ulmo_dir(), 'lcra/waterquality')


log = logging.getLogger(__name__)

from bs4 import BeautifulSoup
import requests




# import numpy as np
# import pandas


# try:
# import cStringIO as StringIO
# except ImportError:
# import StringIO


def get_stations():
"""Fetches a list of station codes and descriptions.
Returns
-------
stations_dict : dict
a python dict with station codes mapped to station information
"""
stations_url = 'http://waterquality.lcra.org/sitelist.aspx'
path = op.join(LCRA_WATERQUALITY_DIR, 'stationids.htm')

response = requests.get(stations_url)

soup = BeautifulSoup(response.content, 'html.parser')
gridview = soup.find(id="GridView1")

stations = [
(row.findAll('td')[0].string, row.findAll('td')[1].string)
for row in gridview.findAll('tr')
if len(row.findAll('td'))==2
]

return dict(stations)


def get_station_data(station_code, date=None, as_dataframe=False):
"""Fetches data for a station at a given date.
Parameters
----------
station_code: str
The station code to fetch data for. A list of stations can be retrieved with
``get_stations()``
date : ``None`` or date (see :ref:`dates-and-times`)
The date of the data to be queried. If date is ``None`` (default), then
data for the current day is retreived.
as_dataframe : bool
This determines what format values are returned as. If ``False``
(default), the values dict will be a dict with timestamps as keys mapped
to a dict of gauge variables and values. If ``True`` then the values
dict will be a pandas.DataFrame object containing the equivalent
information.
Returns
-------
data_dict : dict
A dict containing station information and values.
"""


if isinstance(station_code, (str)):
pass
elif isinstance(station_code, (int)):
station_code = str(station_code)
else:
log.error("Unsure of the station_code parameter type. \
Try string or int")
raise

if date:
log.info("Date parameter not implemented yet")
if as_dataframe:
log.info("as_dataframe parameter not implemented yet")


waterquality_url = "http://waterquality.lcra.org/parameter.aspx?qrySite=%s" %station_code
waterquality_url2 = 'http://waterquality.lcra.org/events.aspx'

dir_path = op.join(LCRA_WATERQUALITY_DIR, str(station_code))

resp_path = op.join(dir_path, "resp.html")

pickle_path = op.join(dir_path, "data.pickle")

util.mkdir_if_doesnt_exist(dir_path)



initial_request = requests.get(waterquality_url)
initialsoup = BeautifulSoup(initial_request.content, 'html.parser')

# stationvals = [ statag.get('value', None)
# for statag in initialsoup.findAll(id="multiple")
# if statag.get('value', None)
# ]


result = _make_next_request(waterquality_url2,
initial_request,
{'site': station_code})

if op.exists(resp_path) and \
util.misc._request_file_size_matches(result, resp_path)\
and not os.environ.get('ULMO_TESTING', None):
#means nothing has changed return cached pickle
log.info("%s was not processed because it is the same size"%station_code)
try:
with open(pickle_path, 'rb') as f:
return pickle.load(f)
except IOError:
log.info("Couldn't find the pickle that should be there for \
%s" %station_code)
pass


if not os.environ.get('ULMO_TESTING', None):
with open(resp_path, 'wb') as wf:
wf.write(result.content)


soup = BeautifulSoup(result.content, 'html.parser')

gridview = soup.find(id="GridView1")


results = []
headers = [head.text for head in gridview.findAll('th')]

#uses \xa0 for blank

for row in gridview.findAll('tr'):
vals = [_parse_val(aux.text) for aux in row.findAll('td')]
if len(vals) == 0:
continue

results.append(dict(zip(headers, vals)))

if not os.environ.get('ULMO_TESTING', None):
with open(pickle_path, 'wb') as mf:
pickle.dump(results, mf)

return results


def _extract_headers_for_next_request(request):
payload = dict()
for tag in BeautifulSoup(request.content, 'html.parser').findAll('input'):
tag_dict = dict(tag.attrs)
#some tags don't have a value and are used w/ JS to toggle a set of checkboxes
payload[tag_dict['name']] = tag_dict.get('value')
return payload


def _make_next_request(url, previous_request, data):
data_headers = _extract_headers_for_next_request(previous_request)
data_headers.update(data)
return requests.post(url, cookies=previous_request.cookies, data=data_headers)


def _parse_val(val):
#the &nsbp translates to the following unicode
if val == u'\xa0':
return None
else:
return val


0 comments on commit 4c27192

Please sign in to comment.