Skip to content
This repository has been archived by the owner on Jan 22, 2021. It is now read-only.

Update the Web API after the data/table/tasks refactoring #26

Merged
merged 10 commits into from
May 28, 2018
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,7 @@ PostgreSQL database with PostGIS. You must have the `shp2pgsql` command.
See the `conda_env.sh` script to create a conda environment with the dependencies.

**Note**: flask-restplus and daiquiri should be install via `pip`.

## Tests

Install the extras dependencies, e.g. `pip install -e ."[dev]"`, and run `pytest`.
177 changes: 70 additions & 107 deletions jitenshea/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def processing_daily_data(rset, window):
'name': group[0]['name']})
return {"data": values}


def processing_timeseries(rset):
"""Processing the result of a timeseries SQL query

Expand All @@ -61,9 +62,9 @@ def processing_timeseries(rset):
group = list(group)
values.append({'id': k,
'name': group[0]['name'],
"ts": [x['ts'] for x in group],
'available_bike': [x['available_bike'] for x in group],
'available_stand': [x['available_stand'] for x in group]})
"ts": [x['timestamp'] for x in group],
'available_bikes': [x['available_bikes'] for x in group],
'available_stands': [x['available_stands'] for x in group]})
return {"data": values}


Expand Down Expand Up @@ -91,6 +92,7 @@ def time_window(day, window, backward):
order_reference_date = start
return TimeWindow(start, stop, order_reference_date)


def station_geojson(stations):
"""Process station data into GeoJSON
"""
Expand Down Expand Up @@ -136,7 +138,7 @@ def clustered_station_geojson(stations):
"properties": {
"id": data['id'],
"cluster_id": data['cluster_id'],
"name": data['nom'],
"name": data['name'],
"start": data['start'],
"stop": data['stop']
}})
Expand All @@ -156,6 +158,7 @@ def cities():
'country': 'france',
'stations': 174}]}


def stations(city, limit, geojson):
"""List of bicycle stations

Expand All @@ -165,12 +168,7 @@ def stations(city, limit, geojson):

Return a list of dict, one dict by bicycle station
"""
if city == 'bordeaux':
query = bordeaux_stations(limit)
elif city == 'lyon':
query = lyon_stations(limit)
else:
raise ValueError("City {} not supported".format(city))
query = _query_stations(city, limit)
eng = db()
rset = eng.execute(query)
keys = rset.keys()
Expand All @@ -180,100 +178,65 @@ def stations(city, limit, geojson):
return {"data": result}


def bordeaux_stations(limit=20):
"""Query for the list of bicycle stations in Bordeaux

limit: int
default 20

Return a SQL query to execute
"""
return """SELECT numstat::int AS id
,nom AS name
,adresse AS address
,lower(commune) AS city
,nbsuppor::int AS nb_bikes
,st_x(st_transform(geom, 4326)) AS x
,st_y(st_transform(geom, 4326)) AS y
FROM {schema}.vcub_station
LIMIT {limit}
""".format(schema=config['bordeaux']['schema'],
limit=limit)

def lyon_stations(limit=20):
"""Query for the list of bicycle stations in Lyon
def specific_stations(city, ids):
"""List of specific bicycle stations.

limit: int
default 20

Return a SQL query to execute
"""
return """SELECT idstation::int AS id
,nom AS name
,adresse1 AS address
,lower(commune) AS city
,nbbornette::int AS nb_bikes
,st_x(geom) AS x
,st_y(geom) AS y
FROM {schema}.pvostationvelov
LIMIT {limit}
""".format(schema=config['lyon']['schema'],
limit=limit)

def bordeaux(station_ids):
"""Get some specific bicycle-sharing stations for Bordeaux
station_id: list of int
Ids of the bicycle-sharing station
Parameters
----------
city : string
ids : list

Return bicycle stations in a list of dict
Returns
-------
list of dict
One dict by bicycle station
"""
query = bordeaux_stations(1).replace("LIMIT 1", 'WHERE numstat IN %(id_list)s')
query = _query_stations(city, 1).replace("LIMIT 1", 'WHERE id IN %(id_list)s')
eng = db()
rset = eng.execute(query, id_list=tuple(str(x) for x in station_ids)).fetchall()
rset = eng.execute(query, id_list=tuple(str(x) for x in ids)).fetchall()
if not rset:
return []
return {"data" : [dict(zip(x.keys(), x)) for x in rset]}
return {"data": [dict(zip(x.keys(), x)) for x in rset]}

def lyon(station_ids):
"""Get some specific bicycle-sharing stations for Lyon
station_id: list of ints
Ids of the bicycle-sharing stations

Return bicycle stations in a list of dict
"""
query = lyon_stations(1).replace("LIMIT 1", 'WHERE idstation IN %(id_list)s')
eng = db()
rset = eng.execute(query, id_list=tuple(str(x) for x in station_ids)).fetchall()
if not rset:
return []
return {"data" : [dict(zip(x.keys(), x)) for x in rset]}
def _query_stations(city, limit=20):
"""Query to get the list of bicycle stations

Parameters
----------
city : str
limit : int

def station_city_table(city):
"""Name table and ID column name
Returns
-------
str
"""
if city not in ('bordeaux', 'lyon'):
raise ValueError("City '{}' not supported.".format(city))
if city == 'bordeaux':
return 'vcub_station', 'numstat'
if city == 'lyon':
return 'pvostationvelov', 'idstation'
return """SELECT id
,name
,address
,city
,nb_stations as nb_bikes
,st_x(geom) as x
,st_y(geom) as y
FROM {schema}.stations
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could recover the stations table name from config file, as in the previous PR.

LIMIT {limit}
""".format(schema=city, limit=limit)


def daily_query(city):
"""SQL query to get daily transactions according to the city
"""
if city not in ('bordeaux', 'lyon'):
raise ValueError("City '{}' not supported.".format(city))
table, idcol = station_city_table(city)
return """SELECT id
,number AS value
,date
,Y.nom AS name
,name
FROM {schema}.daily_transaction AS X
LEFT JOIN {schema}.{table} AS Y ON X.id=Y.{idcol}::int
LEFT JOIN {schema}.stations AS Y using(id)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same remark as previously.

WHERE id IN %(id_list)s AND date >= %(start)s AND date <= %(stop)s
ORDER BY id,date""".format(schema=config[city]['schema'], table=table, idcol=idcol)
ORDER BY id,date""".format(schema=city)


def daily_query_stations(city, limit, order_by='station'):
"""SQL query to get daily transactions for all stations
Expand All @@ -284,7 +247,6 @@ def daily_query_stations(city, limit, order_by='station'):
order_by = 'id'
if order_by == 'value':
order_by = 'number DESC'
table, idcol = station_city_table(city)
return """WITH station AS (
SELECT id
,row_number() over (partition by null order by {order_by}) AS rank
Expand All @@ -296,14 +258,12 @@ def daily_query_stations(city, limit, order_by='station'):
SELECT S.id
,D.number AS value
,D.date
,Y.nom AS name
,Y.name
FROM station AS S
LEFT JOIN {schema}.daily_transaction AS D ON (S.id=D.id)
LEFT JOIN {schema}.{table} AS Y ON S.id=Y.{idcol}::int
LEFT JOIN {schema}.stations AS Y ON S.id=Y.id
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Idem

WHERE D.date >= %(start)s AND D.date <= %(stop)s
ORDER BY S.rank,D.date;""".format(schema=config[city]['schema'],
table=table,
idcol=idcol,
order_by=order_by,
limit=limit)

Expand Down Expand Up @@ -353,18 +313,23 @@ def daily_transaction_list(city, day, limit, order_by, window=0, backward=True):
order_reference_date=window.order_reference_date).fetchall()
return processing_daily_data(rset, window)


def timeseries(city, station_ids, start, stop):
"""Get timeseries data between two dates for a specific city and a list of station ids
"""
query = """SELECT *
FROM {schema}.timeserie_norm
WHERE id IN %(id_list)s AND ts >= %(start)s AND ts < %(stop)s
query = """SELECT T.*
,S.name as name
FROM {schema}.timeseries AS T
LEFT JOIN {schema}.stations AS S using(id)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two new occurrences of table names there (timeseries and stations).

WHERE id IN %(id_list)s AND timestamp >= %(start)s AND timestamp < %(stop)s
ORDER BY id,timestamp
""".format(schema=config[city]['schema'])
eng = db()
rset = eng.execute(query, id_list=tuple(x for x in station_ids),
start=start, stop=stop)
return processing_timeseries(rset)


def hourly_process(df):
"""DataFrame with timeseries into a hourly transaction profile

Expand All @@ -374,15 +339,16 @@ def hourly_process(df):
Return a DataFrame with the transactions sum & mean for each hour
"""
df = df.copy().set_index('ts')
transaction = (df['available_bike']
transaction = (df['available_bikes']
.diff()
.abs()
.dropna()
.resample('H')
.sum()
.reset_index())
transaction['hour'] = transaction['ts'].apply(lambda x: x.hour)
return transaction.groupby('hour')['available_bike'].agg(['sum', 'mean'])
return transaction.groupby('hour')['available_bikes'].agg(['sum', 'mean'])


def hourly_profile(city, station_ids, day, window):
"""Return the number of transaction per hour
Expand Down Expand Up @@ -425,6 +391,7 @@ def daily_profile_process(df):
df['weekday'] = df['date'].apply(lambda x: x.weekday())
return df.groupby('weekday')['value'].agg(['sum', 'mean'])


def daily_profile(city, station_ids, day, window):
"""Return the number of transaction per day of week

Expand Down Expand Up @@ -466,9 +433,8 @@ def get_station_ids(city):
list of integers
IDs of the shared-bike stations in the `city`
"""
table, idcol = station_city_table(city)
query = ("SELECT {id} FROM {schema}.{table}"
";").format(id=idcol, schema=config[city]["schema"], table=table)
query = ("SELECT id FROM {schema}.stations"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

stations table could be automatized as well.

";").format(schema=config[city]["schema"])
eng = db()
rset = eng.execute(query).fetchall()
if not rset:
Expand All @@ -491,28 +457,25 @@ def station_cluster_query(city):
"""
if city not in ('bordeaux', 'lyon'):
raise ValueError("City '{}' not supported.".format(city))
table, idname = station_city_table(city)
return ("WITH ranked_clusters AS ("
"SELECT cs.station_id AS id, "
"cs.cluster_id, "
"cs.start AS start, "
"cs.stop AS stop, "
"citystation.nom AS nom, "
"citystation.name AS name, "
"citystation.geom AS geom, "
"rank() OVER (ORDER BY stop DESC) AS rank "
"FROM {schema}.{cluster} AS cs "
"JOIN {schema}.{table} AS citystation "
"ON citystation.{idcol} = cs.station_id::varchar "
"JOIN {schema}.stations AS citystation "
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here comes a new occurrence.

"ON citystation.id = cs.station_id "
"WHERE cs.station_id IN %(id_list)s) "
"SELECT id, cluster_id, start, stop, nom, "
"st_x(st_transform(geom, 4326)) as x, "
"st_y(st_transform(geom, 4326)) as y "
"SELECT id, cluster_id, start, stop, name, "
"st_x(geom) as x, "
"st_y(geom) as y "
"FROM ranked_clusters "
"WHERE rank=1"
";").format(schema=config[city]['schema'],
cluster=config[city]['clustering'],
table=table,
idcol=idname)
cluster=config['database']['clustering'])


def station_clusters(city, station_ids=None, geojson=False):
Expand Down Expand Up @@ -569,13 +532,13 @@ def cluster_profile_query(city):
"SELECT *, rank() OVER (ORDER BY stop DESC) AS rank "
"FROM {schema}.{centroid}) "
"SELECT cluster_id, "
"h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, "
"h00, h01, h02, h03, h04, h05, h06, h07, h08, h09, h10, h11, "
"h12, h13, h14, h15, h16, h17, h18, h19, h20, h21, h22, h23, "
"start, stop "
"FROM ranked_centroids "
"WHERE rank=1"
";").format(schema=config[city]['schema'],
centroid=config[city]['centroids'])
centroid=config["database"]['centroids'])


def cluster_profiles(city):
Expand Down Expand Up @@ -604,6 +567,6 @@ def cluster_profiles(city):
"start": cluster['start'],
'stop': cluster['stop'],
'hour': list(range(24)),
'values': [cluster[h] for h in ["h{}".format(i) for i in range(24)]]}
'values': [cluster[h] for h in ["h{:02d}".format(i) for i in range(24)]]}
)
return {"data": result}
4 changes: 2 additions & 2 deletions jitenshea/static/station.js
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,10 @@ $(document).ready(function() {
var station_name = content.data[0].name;
var date = content.data[0].ts;
var stands = date.map(function(t, i) {
return [Date.parse(t), content.data[0].available_stand[i]];
return [Date.parse(t), content.data[0].available_stands[i]];
});
var bikes = date.map(function(t, i) {
return [Date.parse(t), content.data[0].available_bike[i]];
return [Date.parse(t), content.data[0].available_bikes[i]];
});
Highcharts.stockChart('stationTimeseries', {
// use to select the time window
Expand Down
5 changes: 1 addition & 4 deletions jitenshea/webapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,7 @@ class CityStation(Resource):
@api.doc(description="Bicycle station(s)")
def get(self, city, ids):
check_city(city)
if city == 'bordeaux':
rset = controller.bordeaux(ids)
if city == 'lyon':
rset = controller.lyon(ids)
rset = controller.specific_stations(city, ids)
if not rset:
api.abort(404, "No such id: {}".format(ids))
return jsonify(rset)
Expand Down
2 changes: 2 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pytest]
addopts = --ignore=datarepo --ignore=sql --ignore=jitenshea/static/bower --ignore=conf
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
packages=setuptools.find_packages(),
include_package_data=True,
install_requires=INSTALL_REQUIRES,
extras_require={'dev': ['pytest', 'pytest-sugar', 'ipython', 'ipdb']},

author="Damien Garaud",
author_email='damien.garaud@gmail.com',
Expand Down
Loading