diff --git a/README.md b/README.md index 99c0e84..75e542d 100644 --- a/README.md +++ b/README.md @@ -56,3 +56,7 @@ PostgreSQL database with PostGIS. You must have the `shp2pgsql` command. See the `conda_env.sh` script to create a conda environment with the dependencies. **Note**: flask-restplus and daiquiri should be install via `pip`. + +## Tests + +Install the extras dependencies, e.g. `pip install -e ."[dev]"`, and run `pytest`. diff --git a/jitenshea/controller.py b/jitenshea/controller.py index 2cab314..dc5508b 100644 --- a/jitenshea/controller.py +++ b/jitenshea/controller.py @@ -48,6 +48,7 @@ def processing_daily_data(rset, window): 'name': group[0]['name']}) return {"data": values} + def processing_timeseries(rset): """Processing the result of a timeseries SQL query @@ -61,9 +62,9 @@ def processing_timeseries(rset): group = list(group) values.append({'id': k, 'name': group[0]['name'], - "ts": [x['ts'] for x in group], - 'available_bike': [x['available_bike'] for x in group], - 'available_stand': [x['available_stand'] for x in group]}) + "ts": [x['timestamp'] for x in group], + 'available_bikes': [x['available_bikes'] for x in group], + 'available_stands': [x['available_stands'] for x in group]}) return {"data": values} @@ -91,6 +92,7 @@ def time_window(day, window, backward): order_reference_date = start return TimeWindow(start, stop, order_reference_date) + def station_geojson(stations): """Process station data into GeoJSON """ @@ -136,7 +138,7 @@ def clustered_station_geojson(stations): "properties": { "id": data['id'], "cluster_id": data['cluster_id'], - "name": data['nom'], + "name": data['name'], "start": data['start'], "stop": data['stop'] }}) @@ -156,6 +158,7 @@ def cities(): 'country': 'france', 'stations': 174}]} + def stations(city, limit, geojson): """List of bicycle stations @@ -165,12 +168,7 @@ def stations(city, limit, geojson): Return a list of dict, one dict by bicycle station """ - if city == 'bordeaux': - query = bordeaux_stations(limit) - elif city == 'lyon': - query = lyon_stations(limit) - else: - raise ValueError("City {} not supported".format(city)) + query = _query_stations(city, limit) eng = db() rset = eng.execute(query) keys = rset.keys() @@ -180,84 +178,51 @@ def stations(city, limit, geojson): return {"data": result} -def bordeaux_stations(limit=20): - """Query for the list of bicycle stations in Bordeaux - - limit: int - default 20 - - Return a SQL query to execute - """ - return """SELECT numstat::int AS id - ,nom AS name - ,adresse AS address - ,lower(commune) AS city - ,nbsuppor::int AS nb_bikes - ,st_x(st_transform(geom, 4326)) AS x - ,st_y(st_transform(geom, 4326)) AS y - FROM {schema}.vcub_station - LIMIT {limit} - """.format(schema=config['bordeaux']['schema'], - limit=limit) - -def lyon_stations(limit=20): - """Query for the list of bicycle stations in Lyon +def specific_stations(city, ids): + """List of specific bicycle stations. - limit: int - default 20 - - Return a SQL query to execute - """ - return """SELECT idstation::int AS id - ,nom AS name - ,adresse1 AS address - ,lower(commune) AS city - ,nbbornette::int AS nb_bikes - ,st_x(geom) AS x - ,st_y(geom) AS y - FROM {schema}.pvostationvelov - LIMIT {limit} - """.format(schema=config['lyon']['schema'], - limit=limit) - -def bordeaux(station_ids): - """Get some specific bicycle-sharing stations for Bordeaux - station_id: list of int - Ids of the bicycle-sharing station + Parameters + ---------- + city : string + ids : list - Return bicycle stations in a list of dict + Returns + ------- + list of dict + One dict by bicycle station """ - query = bordeaux_stations(1).replace("LIMIT 1", 'WHERE numstat IN %(id_list)s') + query = _query_stations(city, 1).replace("LIMIT 1", 'WHERE id IN %(id_list)s') eng = db() - rset = eng.execute(query, id_list=tuple(str(x) for x in station_ids)).fetchall() + rset = eng.execute(query, id_list=tuple(str(x) for x in ids)).fetchall() if not rset: return [] - return {"data" : [dict(zip(x.keys(), x)) for x in rset]} + return {"data": [dict(zip(x.keys(), x)) for x in rset]} -def lyon(station_ids): - """Get some specific bicycle-sharing stations for Lyon - station_id: list of ints - Ids of the bicycle-sharing stations - Return bicycle stations in a list of dict - """ - query = lyon_stations(1).replace("LIMIT 1", 'WHERE idstation IN %(id_list)s') - eng = db() - rset = eng.execute(query, id_list=tuple(str(x) for x in station_ids)).fetchall() - if not rset: - return [] - return {"data" : [dict(zip(x.keys(), x)) for x in rset]} +def _query_stations(city, limit=20): + """Query to get the list of bicycle stations + Parameters + ---------- + city : str + limit : int -def station_city_table(city): - """Name table and ID column name + Returns + ------- + str """ - if city not in ('bordeaux', 'lyon'): - raise ValueError("City '{}' not supported.".format(city)) - if city == 'bordeaux': - return 'vcub_station', 'numstat' - if city == 'lyon': - return 'pvostationvelov', 'idstation' + return """SELECT id + ,name + ,address + ,city + ,nb_stations as nb_bikes + ,st_x(geom) as x + ,st_y(geom) as y + FROM {schema}.{table} + LIMIT {limit} + """.format(schema=city, + table=config['database']['stations'], + limit=limit) def daily_query(city): @@ -265,15 +230,17 @@ def daily_query(city): """ if city not in ('bordeaux', 'lyon'): raise ValueError("City '{}' not supported.".format(city)) - table, idcol = station_city_table(city) return """SELECT id ,number AS value ,date - ,Y.nom AS name - FROM {schema}.daily_transaction AS X - LEFT JOIN {schema}.{table} AS Y ON X.id=Y.{idcol}::int + ,name + FROM {schema}.{table} AS X + LEFT JOIN {schema}.{station} AS Y using(id) WHERE id IN %(id_list)s AND date >= %(start)s AND date <= %(stop)s - ORDER BY id,date""".format(schema=config[city]['schema'], table=table, idcol=idcol) + ORDER BY id,date""".format(schema=city, + table=config['database']['daily_transaction'], + station=config['database']['stations']) + def daily_query_stations(city, limit, order_by='station'): """SQL query to get daily transactions for all stations @@ -284,11 +251,10 @@ def daily_query_stations(city, limit, order_by='station'): order_by = 'id' if order_by == 'value': order_by = 'number DESC' - table, idcol = station_city_table(city) return """WITH station AS ( SELECT id ,row_number() over (partition by null order by {order_by}) AS rank - FROM {schema}.daily_transaction + FROM {schema}.{table} WHERE date = %(order_reference_date)s ORDER BY {order_by} LIMIT {limit} @@ -296,14 +262,14 @@ def daily_query_stations(city, limit, order_by='station'): SELECT S.id ,D.number AS value ,D.date - ,Y.nom AS name + ,Y.name FROM station AS S - LEFT JOIN {schema}.daily_transaction AS D ON (S.id=D.id) - LEFT JOIN {schema}.{table} AS Y ON S.id=Y.{idcol}::int + LEFT JOIN {schema}.{table} AS D ON (S.id=D.id) + LEFT JOIN {schema}.{station} AS Y ON S.id=Y.id WHERE D.date >= %(start)s AND D.date <= %(stop)s ORDER BY S.rank,D.date;""".format(schema=config[city]['schema'], - table=table, - idcol=idcol, + table=config['database']['daily_transaction'], + station=config['database']['stations'], order_by=order_by, limit=limit) @@ -353,18 +319,25 @@ def daily_transaction_list(city, day, limit, order_by, window=0, backward=True): order_reference_date=window.order_reference_date).fetchall() return processing_daily_data(rset, window) + def timeseries(city, station_ids, start, stop): """Get timeseries data between two dates for a specific city and a list of station ids """ - query = """SELECT * - FROM {schema}.timeserie_norm - WHERE id IN %(id_list)s AND ts >= %(start)s AND ts < %(stop)s - """.format(schema=config[city]['schema']) + query = """SELECT T.* + ,S.name as name + FROM {schema}.{table} AS T + LEFT JOIN {schema}.{station} AS S using(id) + WHERE id IN %(id_list)s AND timestamp >= %(start)s AND timestamp < %(stop)s + ORDER BY id,timestamp + """.format(schema=config[city]['schema'], + table=config['database']['timeseries'], + station=config['database']['stations']) eng = db() rset = eng.execute(query, id_list=tuple(x for x in station_ids), start=start, stop=stop) return processing_timeseries(rset) + def hourly_process(df): """DataFrame with timeseries into a hourly transaction profile @@ -374,7 +347,7 @@ def hourly_process(df): Return a DataFrame with the transactions sum & mean for each hour """ df = df.copy().set_index('ts') - transaction = (df['available_bike'] + transaction = (df['available_bikes'] .diff() .abs() .dropna() @@ -382,7 +355,8 @@ def hourly_process(df): .sum() .reset_index()) transaction['hour'] = transaction['ts'].apply(lambda x: x.hour) - return transaction.groupby('hour')['available_bike'].agg(['sum', 'mean']) + return transaction.groupby('hour')['available_bikes'].agg(['sum', 'mean']) + def hourly_profile(city, station_ids, day, window): """Return the number of transaction per hour @@ -425,6 +399,7 @@ def daily_profile_process(df): df['weekday'] = df['date'].apply(lambda x: x.weekday()) return df.groupby('weekday')['value'].agg(['sum', 'mean']) + def daily_profile(city, station_ids, day, window): """Return the number of transaction per day of week @@ -466,9 +441,9 @@ def get_station_ids(city): list of integers IDs of the shared-bike stations in the `city` """ - table, idcol = station_city_table(city) - query = ("SELECT {id} FROM {schema}.{table}" - ";").format(id=idcol, schema=config[city]["schema"], table=table) + query = ("SELECT id FROM {schema}.{table}" + ";").format(schema=config[city]["schema"], + table=config['database']['stations']) eng = db() rset = eng.execute(query).fetchall() if not rset: @@ -491,28 +466,26 @@ def station_cluster_query(city): """ if city not in ('bordeaux', 'lyon'): raise ValueError("City '{}' not supported.".format(city)) - table, idname = station_city_table(city) return ("WITH ranked_clusters AS (" "SELECT cs.station_id AS id, " "cs.cluster_id, " "cs.start AS start, " "cs.stop AS stop, " - "citystation.nom AS nom, " + "citystation.name AS name, " "citystation.geom AS geom, " "rank() OVER (ORDER BY stop DESC) AS rank " "FROM {schema}.{cluster} AS cs " - "JOIN {schema}.{table} AS citystation " - "ON citystation.{idcol} = cs.station_id::varchar " + "JOIN {schema}.{station} AS citystation " + "ON citystation.id = cs.station_id " "WHERE cs.station_id IN %(id_list)s) " - "SELECT id, cluster_id, start, stop, nom, " - "st_x(st_transform(geom, 4326)) as x, " - "st_y(st_transform(geom, 4326)) as y " + "SELECT id, cluster_id, start, stop, name, " + "st_x(geom) as x, " + "st_y(geom) as y " "FROM ranked_clusters " "WHERE rank=1" ";").format(schema=config[city]['schema'], - cluster=config[city]['clustering'], - table=table, - idcol=idname) + cluster=config['database']['clustering'], + station=config['database']['stations']) def station_clusters(city, station_ids=None, geojson=False): @@ -569,13 +542,13 @@ def cluster_profile_query(city): "SELECT *, rank() OVER (ORDER BY stop DESC) AS rank " "FROM {schema}.{centroid}) " "SELECT cluster_id, " - "h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, " + "h00, h01, h02, h03, h04, h05, h06, h07, h08, h09, h10, h11, " "h12, h13, h14, h15, h16, h17, h18, h19, h20, h21, h22, h23, " "start, stop " "FROM ranked_centroids " "WHERE rank=1" ";").format(schema=config[city]['schema'], - centroid=config[city]['centroids']) + centroid=config["database"]['centroids']) def cluster_profiles(city): @@ -604,6 +577,6 @@ def cluster_profiles(city): "start": cluster['start'], 'stop': cluster['stop'], 'hour': list(range(24)), - 'values': [cluster[h] for h in ["h{}".format(i) for i in range(24)]]} + 'values': [cluster[h] for h in ["h{:02d}".format(i) for i in range(24)]]} ) return {"data": result} diff --git a/jitenshea/static/station.js b/jitenshea/static/station.js index b1761d1..daa4cc8 100644 --- a/jitenshea/static/station.js +++ b/jitenshea/static/station.js @@ -98,10 +98,10 @@ $(document).ready(function() { var station_name = content.data[0].name; var date = content.data[0].ts; var stands = date.map(function(t, i) { - return [Date.parse(t), content.data[0].available_stand[i]]; + return [Date.parse(t), content.data[0].available_stands[i]]; }); var bikes = date.map(function(t, i) { - return [Date.parse(t), content.data[0].available_bike[i]]; + return [Date.parse(t), content.data[0].available_bikes[i]]; }); Highcharts.stockChart('stationTimeseries', { // use to select the time window diff --git a/jitenshea/webapi.py b/jitenshea/webapi.py index 91e7674..e05a835 100644 --- a/jitenshea/webapi.py +++ b/jitenshea/webapi.py @@ -170,10 +170,7 @@ class CityStation(Resource): @api.doc(description="Bicycle station(s)") def get(self, city, ids): check_city(city) - if city == 'bordeaux': - rset = controller.bordeaux(ids) - if city == 'lyon': - rset = controller.lyon(ids) + rset = controller.specific_stations(city, ids) if not rset: api.abort(404, "No such id: {}".format(ids)) return jsonify(rset) diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..a5a4aed --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = --ignore=datarepo --ignore=sql --ignore=jitenshea/static/bower --ignore=conf \ No newline at end of file diff --git a/setup.py b/setup.py index de2aa1a..96a1107 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ packages=setuptools.find_packages(), include_package_data=True, install_requires=INSTALL_REQUIRES, + extras_require={'dev': ['pytest', 'pytest-sugar', 'ipython', 'ipdb']}, author="Damien Garaud", author_email='damien.garaud@gmail.com', diff --git a/tests/test_webapi.py b/tests/test_webapi.py new file mode 100644 index 0000000..65a62c7 --- /dev/null +++ b/tests/test_webapi.py @@ -0,0 +1,113 @@ +import json +from datetime import date, timedelta + +import pytest + +from jitenshea.webapp import app +from jitenshea.webapi import api, ISO_DATE + + +app.config['TESTING'] = True +api.init_app(app) + + +def yesterday(): + return date.today() - timedelta(1) + + +@pytest.fixture +def client(): + client = app.test_client() + return client + + +def test_app_index(client): + resp = client.get('/') + assert resp.status_code == 200 + + +def test_api_city_list(client): + resp = client.get('/api/city') + assert resp.status_code == 200 + content = json.loads(resp.data) + expected = [{"city": "lyon", + "country": "france", + "stations": 348}, + {"city": "bordeaux", + "country": "france", + "stations": 174}] + assert expected == content['data'] + + +def test_api_city_stations(client): + resp = client.get('/api/bordeaux/station', query_string={'limit': 10}) + assert resp.status_code == 200 + data = json.loads(resp.data) + assert 10 == len(data['data']) + resp = client.get('/api/lyon/station', query_string={'limit': 5}) + assert resp.status_code == 200 + data = json.loads(resp.data) + assert 5 == len(data['data']) + + +def test_api_specific_stations(client): + resp = client.get('/api/bordeaux/station/93,35') + assert resp.status_code == 200 + data = json.loads(resp.data) + assert len(data['data']) == 2 + assert ['35', '93'] == [x['id'] for x in data['data']] + + +def test_api_daily_transaction(client): + date = yesterday().strftime(ISO_DATE) + resp = client.get('/api/bordeaux/daily/station', + query_string={"limit": 10, "date": date, "by": "value"}) + assert resp.status_code == 200 + data = json.loads(resp.data)['data'] + # order by value must return the first station transaction value higher than the + # second one. + assert data[0]['value'][0] > data[1]['value'][0] + + +def test_api_timeseries(client): + start = yesterday().strftime(ISO_DATE) + stop = date.today().strftime(ISO_DATE) + resp = client.get('/api/bordeaux/timeseries/station/93,33', + query_string={"start": start, "stop": stop}) + assert resp.status_code == 200 + + +def test_api_hourly_profile(client): + date = yesterday().strftime(ISO_DATE) + resp = client.get('/api/bordeaux/profile/hourly/station/93,33', + query_string={'date': date, + 'window': 2}) + assert resp.status_code == 200 + resp = client.get('/api/lyon/profile/hourly/station/1002', + query_string={"date": date}) + assert resp.status_code == 200 + + +def test_api_daily_profile(client): + date = yesterday().strftime(ISO_DATE) + resp = client.get('/api/bordeaux/profile/daily/station/93,33', + query_string={"date": date}) + assert resp.status_code == 200 + + +def test_api_clustering_stations(client): + resp = client.get('/api/bordeaux/clustering/stations') + assert resp.status_code == 200 + data = json.loads(resp.data)['data'] + # there are just 4 clusters + assert {0, 1, 2, 3} == set(x['cluster_id'] for x in data) + resp = client.get('/api/bordeaux/clustering/stations', + query_string={"geojson": True}) + assert resp.status_code == 200 + + +def test_api_clustering_centroids(client): + resp = client.get('/api/bordeaux/clustering/centroids') + assert resp.status_code == 200 + data = json.loads(resp.data)['data'] + assert {0, 1, 2, 3} == set(x['cluster_id'] for x in data)