Skip to content

Commit

Permalink
made major changes. Demonstration of Bay Area, Richmond Metro, and DC…
Browse files Browse the repository at this point in the history
… in demo.py
  • Loading branch information
tanimislam committed Apr 7, 2020
1 parent eb00610 commit 78917dd
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 124 deletions.
154 changes: 30 additions & 124 deletions demo.py
Original file line number Diff line number Diff line change
@@ -1,125 +1,31 @@
import os, sys, numpy, glob, pylab, tabulate, datetime, pandas
from itertools import chain
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg
#!/usr/bin/env python3

import os, sys, numpy, engine

engine.get_summary_demo_data( doShow = False )
engine.get_summary_demo_data( prefix = 'richmond', state = 'Virginia',
regionName = 'Richmond Metropolitan Area',
list_of_counties = [
"Richmond",
"Petersburg",
"Hopewell",
"Colonial Heights",
"Amelia",
"Caroline",
"Charles City",
"Chesterfield",
"Dinwiddie",
"Goochland",
"Hanover",
"Henrico",
"King William",
"New Kent",
"Powhatan",
"Prince George",
"Sussex" ], doShow = False )


engine.get_summary_demo_data( prefix = 'dc', state = 'District of Columbia',
regionName = 'Washington DC',
list_of_counties = [ "District of Columbia" ], doShow = False )

#
## these are the 9 Bay Area counties
## gotten from https://mtc.ca.gov/about-mtc/what-mtc/nine-bay-area-counties
_bay_area_counties = {
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
}


def get_stat_line( line ):
line_split = list(map(lambda tok: tok.strip(), line.split(',')))
dstring = line_split[0]
county_name = line_split[1]
state_name = line_split[2]
cases_cumulative = int( line_split[-2] )
death_cumulative = int( line_split[-1] )
return {
'date' : datetime.datetime.strptime(
dstring, '%Y-%m-%d' ).date( ),
'county' : county_name,
'state' : state_name,
'cumulative cases' : cases_cumulative,
'cumulative death' : death_cumulative }

all_counties_nytimes_covid19_data = list(
map(get_stat_line,
list( map(lambda line: line.strip(), filter(
lambda line: len( line.strip( ) ) != 0,
open( os.path.join( "covid-19-data", "us-counties.csv" ), "r" ).readlines())))[1:]))

def get_data_county( county_name ):
data_by_date = sorted(filter(lambda entry: entry['county'] == county_name and
entry['state'] == 'California', all_counties_nytimes_covid19_data ), key = lambda entry: entry['date'] )
return data_by_date

#
## now this creates a dictionary of incidents and deaths per county (in Bay Area) per date
all_data_bayarea = sorted( chain.from_iterable(
map( get_data_county, _bay_area_counties ) ), key = lambda entry: entry['date'] )

#
## now create a dictionary of cases, with key being the date, value being list of entries of counties for that date
all_data_bayarea_bydate = { }
for entry in all_data_bayarea:
mydate = entry[ 'date' ]
all_data_bayarea_bydate.setdefault( mydate, [] ).append( entry )

#
## now create a dictionary of cumulative deaths and cases by date
cases_deaths_bayarea_bydate = dict(
map(lambda mydate: ( mydate, {
'cumulative cases' : sum(
map(lambda entry: entry['cumulative cases' ], all_data_bayarea_bydate[ mydate ] ) ),
'cumulative death' : sum(
map(lambda entry: entry['cumulative death' ],
all_data_bayarea_bydate[ mydate ] ) ) } ),
all_data_bayarea_bydate ) )

#
## now create the dataframe to analyse
df_cases_deaths_bayeara = pandas.DataFrame({
'date' : sorted( cases_deaths_bayarea_bydate ),
'cases' : list(map(lambda mydate: cases_deaths_bayarea_bydate[mydate][ 'cumulative cases' ], sorted( cases_deaths_bayarea_bydate ) ) ),
'death' : list(map(lambda mydate: cases_deaths_bayarea_bydate[mydate][ 'cumulative death' ], sorted( cases_deaths_bayarea_bydate ) ) ) })
df_cases_deaths_bayeara[ 'days_from_beginning' ] = list(
map(lambda mydate: ( mydate - min( cases_deaths_bayarea_bydate ) ).days,
df_cases_deaths_bayeara.date ) )
first_date = min( df_cases_deaths_bayeara.date )
last_date = max( df_cases_deaths_bayeara.date )

#
## pickle this pandas data
cur_date_str = datetime.datetime.now( ).date( ).strftime('%d%m%Y' )
df_cases_deaths_bayeara.to_pickle(
'covid19_bayeara_%s.pkl.gz' % cur_date_str )

#
## now make a plot, logarithmic
fig, ax = pylab.subplots( )
fig.set_size_inches([ 12.0, 9.6 ])
df_cases_deaths_bayeara.plot( 'days_from_beginning', 'cases', linewidth = 4.5,
ax = ax, logy = True, grid = True )
df_cases_deaths_bayeara.plot( 'days_from_beginning', 'death', linewidth = 4.5,
ax = ax, logy = True, grid = True )
ax.set_ylim( 1.0, 1.05 * df_cases_deaths_bayeara.cases.max( ) )
ax.set_xlim( 0, df_cases_deaths_bayeara.days_from_beginning.max( ) )
ax.set_xlabel( 'Days from First COVID-19 CASE (%s)' % first_date.strftime( '%d-%m-%Y' ), fontsize = 24, fontweight = 'bold' )
ax.set_ylabel( 'Number of Cases/Deaths', fontsize = 24, fontweight = 'bold' )
ax.set_title( '\n'.join([
'Bay Area Trend in COVID-19',
'from %s through %s' % (
first_date.strftime( '%d-%m-%Y' ),
last_date.strftime( '%d-%m-%Y' ) ) ]), fontsize = 24, fontweight = 'bold' )
#
## tick labels size 20, bold
for tick in ax.xaxis.get_major_ticks( ) + ax.yaxis.get_major_ticks( ):
tick.label.set_fontsize( 20 )
tick.label.set_fontweight( 'bold' )
#
## legend size 24, bold
leg = ax.legend( )
for txt in leg.texts:
txt.set_fontsize( 24 )
txt.set_fontweight( 'bold' )

#
## save figures
fig.savefig( 'covid19_bayeara_%s.pdf' % cur_date_str, bbox_inches = 'tight' )
fig.savefig( 'covid19_bayeara_%s.png' % cur_date_str, bbox_inches = 'tight' )

#
## now SHOW!
pylab.show( )
131 changes: 131 additions & 0 deletions engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import os, sys, numpy, glob, pylab, tabulate, datetime, pandas, titlecase
from itertools import chain
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg

#
## these are the 9 Bay Area counties
## gotten from https://mtc.ca.gov/about-mtc/what-mtc/nine-bay-area-counties
_bay_area_counties = {
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
}

def _get_stat_line( line ):
line_split = list(map(lambda tok: tok.strip(), line.split(',')))
dstring = line_split[0]
county_name = line_split[1]
state_name = line_split[2]
cases_cumulative = int( line_split[-2] )
death_cumulative = int( line_split[-1] )
return {
'date' : datetime.datetime.strptime(
dstring, '%Y-%m-%d' ).date( ),
'county' : county_name,
'state' : state_name,
'cumulative cases' : cases_cumulative,
'cumulative death' : death_cumulative }

all_counties_nytimes_covid19_data = list(
map(_get_stat_line,
list( map(lambda line: line.strip(), filter(
lambda line: len( line.strip( ) ) != 0,
open( os.path.join( "covid-19-data", "us-counties.csv" ), "r" ).readlines())))[1:]))

def get_data_county( county_name, state = 'California' ):
data_by_date = sorted(filter(lambda entry: county_name in entry['county'] and
entry['state'] == state, all_counties_nytimes_covid19_data ),
key = lambda entry: entry['date'] )
return data_by_date

def get_summary_demo_data( prefix = 'bayarea', regionName = 'Bay Area', state = 'California',
list_of_counties = _bay_area_counties, doShow = True ):
#
## now this creates a dictionary of incidents and deaths per county (in Bay Area) per date
all_data_region = sorted( chain.from_iterable(
map(lambda county: get_data_county( county, state = state ),
set( list_of_counties ) ) ),
key = lambda entry: entry['date'] )
#
## now create a dictionary of cases, with key being the date, value being list of entries of counties for that date
all_data_region_bydate = { }
for entry in all_data_region:
mydate = entry[ 'date' ]
all_data_region_bydate.setdefault( mydate, [] ).append( entry )

#
## now create a dictionary of cumulative deaths and cases by date
cases_deaths_region_bydate = dict(
map(lambda mydate: ( mydate, {
'cumulative cases' : sum(
map(lambda entry: entry['cumulative cases' ], all_data_region_bydate[ mydate ] ) ),
'cumulative death' : sum(
map(lambda entry: entry['cumulative death' ],
all_data_region_bydate[ mydate ] ) ) } ),
all_data_region_bydate ) )

#
## now create the dataframe to analyse
df_cases_deaths_region = pandas.DataFrame({
'date' : sorted( cases_deaths_region_bydate ),
'cases' : list(map(lambda mydate:
cases_deaths_region_bydate[mydate][ 'cumulative cases' ],
sorted( cases_deaths_region_bydate ) ) ),
'death' : list(map(lambda mydate:
cases_deaths_region_bydate[mydate][ 'cumulative death' ],
sorted( cases_deaths_region_bydate ) ) ) } )
df_cases_deaths_region[ 'days_from_beginning' ] = list(
map(lambda mydate: ( mydate - min( cases_deaths_region_bydate ) ).days,
df_cases_deaths_region.date ) )
first_date = min( df_cases_deaths_region.date )
last_date = max( df_cases_deaths_region.date )
#
## pickle this pandas data
cur_date_str = datetime.datetime.now( ).date( ).strftime('%d%m%Y' )
df_cases_deaths_region.to_pickle(
'covid19_%s_%s.pkl.gz' % ( prefix, cur_date_str ) )
#
## now make a plot, logarithmic
fig, ax = pylab.subplots( )
fig.set_size_inches([ 12.0, 9.6 ])
df_cases_deaths_region.plot( 'days_from_beginning', 'cases', linewidth = 4.5,
ax = ax, logy = True, grid = True )
df_cases_deaths_region.plot( 'days_from_beginning', 'death', linewidth = 4.5,
ax = ax, logy = True, grid = True )
ax.set_ylim( 1.0, 1.05 * df_cases_deaths_region.cases.max( ) )
ax.set_xlim( 0, df_cases_deaths_region.days_from_beginning.max( ) )
ax.set_xlabel( 'Days from First COVID-19 CASE (%s)' %
first_date.strftime( '%d-%m-%Y' ),
fontsize = 24, fontweight = 'bold' )
ax.set_ylabel( 'Cumulative Number of Cases/Deaths', fontsize = 24, fontweight = 'bold' )
ax.set_title( '\n'.join([
'%s Trend in COVID-19' % titlecase.titlecase( regionName ),
'from %s through %s' % (
first_date.strftime( '%d-%m-%Y' ),
last_date.strftime( '%d-%m-%Y' ) ) ]),
fontsize = 24, fontweight = 'bold' )
#
## tick labels size 20, bold
for tick in ax.xaxis.get_major_ticks( ) + ax.yaxis.get_major_ticks( ):
tick.label.set_fontsize( 20 )
tick.label.set_fontweight( 'bold' )
#
## legend size 24, bold
leg = ax.legend( )
for txt in leg.texts:
txt.set_fontsize( 24 )
txt.set_fontweight( 'bold' )
#
## save figures
fig.savefig( 'covid19_%s_%s.pdf' % ( prefix, cur_date_str ), bbox_inches = 'tight' )
fig.savefig( 'covid19_%s_%s.png' % ( prefix, cur_date_str ), bbox_inches = 'tight' )
#
## now SHOW!
if doShow: pylab.show( )

0 comments on commit 78917dd

Please sign in to comment.