-
Notifications
You must be signed in to change notification settings - Fork 64
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d71cb71
commit 4c27192
Showing
7 changed files
with
665 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | ||
<!-- saved from url=(0042)http://waterquality.lcra.org/sitelist.aspx --> | ||
<html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> | ||
<title>waterquality.lcra.org</title> | ||
<link href="./waterquality.lcra.org_files/www09_consolidated2.css" rel="stylesheet" type="text/css"> | ||
<link rel="shortcut icon" href="http://waterquality.lcra.org/Images/faviconLCRA.ico" type="image/x-icon"> | ||
<script src="./waterquality.lcra.org_files/www09_top_menu.js" type="text/javascript"></script> | ||
<style type="text/css"></style></head> | ||
|
||
|
||
<body style="height: 100%; background-color:#033968; width:100%"> | ||
|
||
<div align="left"> | ||
<script type="text/javascript" language="javascript">showTopNavBar();</script><div id="MenuBar" class="NoPrint"><div class="NavButtons" id="logo"><a target="_new" href="http://www.lcra.org/"><img src="./waterquality.lcra.org_files/lcralogo.png" border="0"></a></div><div id="NavDivider"><img src="./waterquality.lcra.org_files/button_divider.png" border="0"></div><div class="NavButtons" id="NavAbout" onmouseover="navOn("NavAbout")" onmouseout="navOff("NavAbout")" style="background-image: url(http://waterquality.lcra.org/images/button_gradient.png);"><a href="http://www.lcra.org/about" target="_new"><img src="./waterquality.lcra.org_files/about.png" border="0"></a></div><div id="NavDivider"><img src="./waterquality.lcra.org_files/button_divider.png" border="0"></div><div class="NavButtons" id="NavEnergy" onmouseover="navOn("NavEnergy")" onmouseout="navOff("NavEnergy")" style="background-image: url(http://waterquality.lcra.org/images/button_gradient.png);"><a href="http://www.lcra.org/energy" target="_new"><img src="./waterquality.lcra.org_files/energy.png" border="0"></a></div><div id="NavDivider"><img src="./waterquality.lcra.org_files/button_divider.png" border="0"></div><div class="NavButtons" id="NavWater" onmouseover="navOn("NavWater")" onmouseout="navOff("NavWater")" style="background-image: url(http://waterquality.lcra.org/images/button_gradient.png);"><a href="http://www.lcra.org/water" target="_new"><img src="./waterquality.lcra.org_files/water.png" border="0"></a></div><div id="NavDivider"><img src="./waterquality.lcra.org_files/button_divider.png" border="0"></div><div class="NavButtons" id="NavParks" onmouseover="navOn("NavParks")" onmouseout="navOff("NavParks")"><a href="http://www.lcra.org/parks" target="_new"><img src="./waterquality.lcra.org_files/parks.png" border="0"></a></div><div id="NavDivider"><img src="./waterquality.lcra.org_files/button_divider.png" border="0"></div><div class="NavButtons" id="NavJobs" onmouseover="navOn("NavJobs")" onmouseout="navOff("NavJobs")" style="background-image: url(http://waterquality.lcra.org/images/button_gradient.png);"><a href="http://www.lcra.org/about/employment" target="_new"><img src="./waterquality.lcra.org_files/jobs.png" border="0"></a></div><div id="NavDivider"><img src="./waterquality.lcra.org_files/button_divider.png" border="0"></div><div class="NavButtons" id="NavContactUs" onmouseover="navOn("NavContactUs")" onmouseout="navOff("NavContactUs")"><a href="http://www.lcra.org/about/overview" target="_new"><img src="./waterquality.lcra.org_files/contact_us.png" border="0"></a></div><div id="NavDivider"><img src="./waterquality.lcra.org_files/button_divider.png" border="0"></div><div id="SearchBoxWrapper"><form name="gs" action="http://www.lcra.org/search" method="GET" target="_new"><table height="39px" border="0" align="right"><tbody><tr><td valign="middle"><img src="./waterquality.lcra.org_files/search.png" border="0"></td><td><input type="hidden" name="site" value="dotorg"><input type="hidden" name="client" value="dotorg_new"><input type="hidden" name="proxystylesheet" value="dotorg_new"><input type="hidden" name="output" value="xml_no_dtd"><input type="text" name="q" size="15" class="SearchInputBox"></td><td><input src="./waterquality.lcra.org_files/search_submit.png" name="search" type="image" alt="Click to submit your search"> </td></tr></tbody></table></form></div></div> | ||
</div> | ||
|
||
<form name="form1" method="post" action="./waterquality.lcra.org_files/waterquality.lcra.org.html" id="form1" style="margin: 0px; padding: 0px;"> | ||
<div> | ||
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUJNjg0NzA5NjQ2ZBgBBQlHcmlkVmlldzEPPCsADAEIAgFkdrUlLeK5FmsdI/Lbf1BTyvSFsC4S2WBBt6DwnxoC3UQ="> | ||
</div> | ||
|
||
<div> | ||
|
||
<input type="hidden" name="__VIEWSTATEGENERATOR" id="__VIEWSTATEGENERATOR" value="F8CC5443"> | ||
</div> | ||
|
||
<div style="padding:6px;"> | ||
<table style="width: 100%; background-color:Black;" cellpadding="4"> | ||
<tbody><tr> | ||
<td width="50%"> | ||
<span class="HomepageColumnHeadings">Select stream segment for multiple sites or choose an individual site.</span><br><br> | ||
</td> | ||
</tr> | ||
</tbody></table> | ||
|
||
<div> | ||
<table cellspacing="0" cellpadding="3" border="0" id="GridView1" style="font-size:Smaller;width:100%;border-collapse:collapse;"> | ||
<tbody><tr class="header" style="color:White;font-size:Small;font-weight:bold;"> | ||
<th class="header" scope="col">Site ID</th><th class="header" align="left" scope="col">Description</th> | ||
</tr><tr style="color:White;background-color:Gainsboro;"> | ||
<td colspan="2" style="background-color:#033968;font-size:8pt;font-weight:bold;width:100%;"><a class="WhiteLinks" href="http://waterquality.lcra.org/parameter_segments.aspx?qrySegment=1301" target="_blank">Segment 1301 San Bernard River Tidal</a></td> | ||
</tr><tr style="color:White;background-color:Gainsboro;"> | ||
<td align="center"><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20460" style="display:inline-block;color:Black;width:150px;">20460</a></td><td><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20460" style="color:Black;">SAN BERNARD RIVER TIDAL AT SH 35 SOUTHWEST OF WEST COLUMBIA</a></td> | ||
</tr><tr style="color:White;background-color:#EBEBEB;"> | ||
<td align="center"><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=12146" style="display:inline-block;color:Black;width:150px;">12146</a></td><td><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=12146" style="color:Black;">SAN BERNARD RIVER TIDAL EAST BANK IMMEDIATELY UPSTREAM OF FM 2611</a></td> | ||
</tr><tr style="color:White;background-color:Gainsboro;"> | ||
<td colspan="2" style="background-color:#033968;font-size:8pt;font-weight:bold;width:100%;"><a class="WhiteLinks" href="http://waterquality.lcra.org/parameter_segments.aspx?qrySegment=1302" target="_blank">Segment 1302 San Bernard River Above Tidal</a></td> | ||
</tr><tr style="color:White;background-color:Gainsboro;"> | ||
<td align="center"><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20723" style="display:inline-block;color:Black;width:150px;">20723</a></td><td><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20723" style="color:Black;">MOUND CREEK AT BRAZORIA CR 450/JACKSON SETTLEMENT ROAD 1.22 KILOMETERS UPSTREAM OF FM 1301 IN WEST OF WEST COLUMBIA</a></td> | ||
</tr><tr style="color:White;background-color:#EBEBEB;"> | ||
<td align="center"><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20722" style="display:inline-block;color:Black;width:150px;">20722</a></td><td><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20722" style="color:Black;">PEACH CREEK AT WHARTON CR 117/CHUDALLA ROAD/ARCHER ROAD 89 METERS SOUTH OF THE INTERSECTION OF WHARTON CR 117/CHUDALLA ROAD/ARCHER ROAD AND WHARTON CR 121/ WHARTON CR 119/DONALDSON ROAD IN EAST OF WHARTON</a></td> | ||
</tr><tr style="color:White;background-color:#EBEBEB;"> | ||
<td align="center"><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=12517" style="display:inline-block;color:Black;width:150px;">12517</a></td><td><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=12517" style="color:Black;">TRES PALACIOS CREEK AT FM 456</a></td> | ||
</tr><tr style="color:White;background-color:Gainsboro;"> | ||
<td colspan="2" style="background-color:#033968;font-size:8pt;font-weight:bold;width:100%;"><a class="WhiteLinks" href="http://waterquality.lcra.org/parameter_segments.aspx?qrySegment=2002" target="_blank">Segment 2002 Mission River Above Tidal</a></td> | ||
</tr><tr style="color:White;background-color:Gainsboro;"> | ||
<td align="center"><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20062" style="display:inline-block;color:Black;width:150px;">20062</a></td><td><a href="http://waterquality.lcra.org/parameter.aspx?qrySite=20062" style="color:Black;">SARCO CREEK AT FM 2441 1.30 KM UPSTREAM OF THE ELKINS BRANCH CONFLUENCE</a></td> | ||
</tr> | ||
</tbody></table> | ||
</div> | ||
|
||
</div> | ||
|
||
<div id="DoesNothingButCenter" align="center"> | ||
<div id="OuterWrapper"> | ||
<div id="Footer"> | ||
<a class="WhiteLinks" href="http://www.lcra.org/about/doing_business/index.html" target="_new">Purchasing</a> | <a class="WhiteLinks" href="http://www.lcra.org/about/overview/openrecords.html" target="_new">Open Records</a> | <a class="WhiteLinks" href="http://www.lcra.org/asklcra/" target="_new">Ask LCRA</a> | <a class="WhiteLinks" href="http://www.lcra.org/sitemap.html" target="_new">Sitemap</a> | ||
<br> | ||
© 1996-<script type="text/javascript"> var d = new Date(); document.write(d.getFullYear());</script>2015 Lower Colorado River Authority. All rights reserved. | ||
</div> <!-- end "Footer" div --> | ||
</div> <!-- end "OuterWrapper" div --> | ||
</div> <!-- end "DoesNothingButCenter" div --> | ||
|
||
|
||
|
||
|
||
|
||
</form> | ||
<script src="./waterquality.lcra.org_files/urchin.js" type="text/javascript"> | ||
</script> | ||
<script type="text/javascript"> | ||
_uacct = "UA-1180003-2"; | ||
urchinTracker(); | ||
</script> | ||
|
||
|
||
</body></html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
|
||
|
||
from ulmo.lcra.waterquality import get_stations, get_station_data | ||
import test_util | ||
import os | ||
|
||
def test_get_stations(): | ||
service_info_url = 'http://waterquality.lcra.org/sitelist.aspx' | ||
service_info_file = 'lcra/waterquality/stations.html' | ||
|
||
url_files = { | ||
(service_info_url, ('GET',)): service_info_file, | ||
} | ||
|
||
with test_util.mocked_urls(url_files): | ||
stations = get_stations() | ||
|
||
assert len(stations) == 6 | ||
assert "SH 35 SOUTHWEST" in stations['20460'] | ||
|
||
def test_get_station_data(): | ||
os.environ["ULMO_TESTING"] ="1" | ||
|
||
service_info_url = 'http://waterquality.lcra.org/parameter.aspx?qrySite=12147' | ||
service_info_file = 'lcra/waterquality/12147_params.html' | ||
|
||
service_data_url = 'http://waterquality.lcra.org/events.aspx' | ||
service_data_file = 'lcra/waterquality/12147_results.html' | ||
|
||
url_files = { | ||
(service_info_url, ('GET',)): service_info_file, | ||
(service_data_url, ('POST',)): service_data_file, | ||
} | ||
|
||
with test_util.mocked_urls(url_files): | ||
results = get_station_data(12147) | ||
|
||
assert len(results) == 12 | ||
for data in results: | ||
assert data['Site'] == u'12147' | ||
|
||
del os.environ["ULMO_TESTING"] | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from . import waterquality |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .core import get_stations, get_station_data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
""" | ||
ulmo.lcra.waterquality.core | ||
~~~~~~~~~~~~~~~~~~~~~ | ||
This module provides access to data provided by the `Lower Colorado | ||
River Authority` _ `Water Quality`_ web site. | ||
.. _United States Army Corps of Engineers: http://www.lcra.org | ||
.. _Tulsa District Water Control: http://waterquality.lcra.org/ | ||
""" | ||
from bs4 import BeautifulSoup | ||
import logging | ||
|
||
from ulmo import util | ||
|
||
|
||
|
||
import pickle | ||
|
||
import os | ||
|
||
# import datetime | ||
import os.path as op | ||
|
||
LCRA_WATERQUALITY_DIR = op.join(util.get_ulmo_dir(), 'lcra/waterquality') | ||
|
||
|
||
log = logging.getLogger(__name__) | ||
|
||
from bs4 import BeautifulSoup | ||
import requests | ||
|
||
|
||
|
||
|
||
# import numpy as np | ||
# import pandas | ||
|
||
|
||
# try: | ||
# import cStringIO as StringIO | ||
# except ImportError: | ||
# import StringIO | ||
|
||
|
||
def get_stations(): | ||
"""Fetches a list of station codes and descriptions. | ||
Returns | ||
------- | ||
stations_dict : dict | ||
a python dict with station codes mapped to station information | ||
""" | ||
stations_url = 'http://waterquality.lcra.org/sitelist.aspx' | ||
path = op.join(LCRA_WATERQUALITY_DIR, 'stationids.htm') | ||
|
||
response = requests.get(stations_url) | ||
|
||
soup = BeautifulSoup(response.content, 'html.parser') | ||
gridview = soup.find(id="GridView1") | ||
|
||
stations = [ | ||
(row.findAll('td')[0].string, row.findAll('td')[1].string) | ||
for row in gridview.findAll('tr') | ||
if len(row.findAll('td'))==2 | ||
] | ||
|
||
return dict(stations) | ||
|
||
|
||
def get_station_data(station_code, date=None, as_dataframe=False): | ||
"""Fetches data for a station at a given date. | ||
Parameters | ||
---------- | ||
station_code: str | ||
The station code to fetch data for. A list of stations can be retrieved with | ||
``get_stations()`` | ||
date : ``None`` or date (see :ref:`dates-and-times`) | ||
The date of the data to be queried. If date is ``None`` (default), then | ||
data for the current day is retreived. | ||
as_dataframe : bool | ||
This determines what format values are returned as. If ``False`` | ||
(default), the values dict will be a dict with timestamps as keys mapped | ||
to a dict of gauge variables and values. If ``True`` then the values | ||
dict will be a pandas.DataFrame object containing the equivalent | ||
information. | ||
Returns | ||
------- | ||
data_dict : dict | ||
A dict containing station information and values. | ||
""" | ||
|
||
|
||
if isinstance(station_code, (str)): | ||
pass | ||
elif isinstance(station_code, (int)): | ||
station_code = str(station_code) | ||
else: | ||
log.error("Unsure of the station_code parameter type. \ | ||
Try string or int") | ||
raise | ||
|
||
if date: | ||
log.info("Date parameter not implemented yet") | ||
if as_dataframe: | ||
log.info("as_dataframe parameter not implemented yet") | ||
|
||
|
||
waterquality_url = "http://waterquality.lcra.org/parameter.aspx?qrySite=%s" %station_code | ||
waterquality_url2 = 'http://waterquality.lcra.org/events.aspx' | ||
|
||
dir_path = op.join(LCRA_WATERQUALITY_DIR, str(station_code)) | ||
|
||
resp_path = op.join(dir_path, "resp.html") | ||
|
||
pickle_path = op.join(dir_path, "data.pickle") | ||
|
||
util.mkdir_if_doesnt_exist(dir_path) | ||
|
||
|
||
|
||
initial_request = requests.get(waterquality_url) | ||
initialsoup = BeautifulSoup(initial_request.content, 'html.parser') | ||
|
||
# stationvals = [ statag.get('value', None) | ||
# for statag in initialsoup.findAll(id="multiple") | ||
# if statag.get('value', None) | ||
# ] | ||
|
||
|
||
result = _make_next_request(waterquality_url2, | ||
initial_request, | ||
{'site': station_code}) | ||
|
||
if op.exists(resp_path) and \ | ||
util.misc._request_file_size_matches(result, resp_path)\ | ||
and not os.environ.get('ULMO_TESTING', None): | ||
#means nothing has changed return cached pickle | ||
log.info("%s was not processed because it is the same size"%station_code) | ||
try: | ||
with open(pickle_path, 'rb') as f: | ||
return pickle.load(f) | ||
except IOError: | ||
log.info("Couldn't find the pickle that should be there for \ | ||
%s" %station_code) | ||
pass | ||
|
||
|
||
if not os.environ.get('ULMO_TESTING', None): | ||
with open(resp_path, 'wb') as wf: | ||
wf.write(result.content) | ||
|
||
|
||
soup = BeautifulSoup(result.content, 'html.parser') | ||
|
||
gridview = soup.find(id="GridView1") | ||
|
||
|
||
results = [] | ||
headers = [head.text for head in gridview.findAll('th')] | ||
|
||
#uses \xa0 for blank | ||
|
||
for row in gridview.findAll('tr'): | ||
vals = [_parse_val(aux.text) for aux in row.findAll('td')] | ||
if len(vals) == 0: | ||
continue | ||
|
||
results.append(dict(zip(headers, vals))) | ||
|
||
if not os.environ.get('ULMO_TESTING', None): | ||
with open(pickle_path, 'wb') as mf: | ||
pickle.dump(results, mf) | ||
|
||
return results | ||
|
||
|
||
def _extract_headers_for_next_request(request): | ||
payload = dict() | ||
for tag in BeautifulSoup(request.content, 'html.parser').findAll('input'): | ||
tag_dict = dict(tag.attrs) | ||
#some tags don't have a value and are used w/ JS to toggle a set of checkboxes | ||
payload[tag_dict['name']] = tag_dict.get('value') | ||
return payload | ||
|
||
|
||
def _make_next_request(url, previous_request, data): | ||
data_headers = _extract_headers_for_next_request(previous_request) | ||
data_headers.update(data) | ||
return requests.post(url, cookies=previous_request.cookies, data=data_headers) | ||
|
||
|
||
def _parse_val(val): | ||
#the &nsbp translates to the following unicode | ||
if val == u'\xa0': | ||
return None | ||
else: | ||
return val | ||
|
||
|