updated agency_reports script so that it can be run for all agencies,…

… and added author counts
jessykate · Mar 7, 2010 · eb200eb · eb200eb
1 parent cb51a5f
commit eb200eb
Show file tree

Hide file tree

Showing 2 changed files with 60 additions and 19 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,4 +3,5 @@
 todo
 cache/*
 secrets.py
-simulate.py
+simulate.py
+reports/
diff --git a/agency_report.py b/agency_report.py
@@ -1,18 +1,22 @@
 #!/usr/bin/python
 
 '''
-A standalone script that builds a report for a given agency about
-participation over time on their ideascale site.
+A standalone script that builds agency reports showing participation
+over time on their ideascale site.
+
+Add to /etc/crontab to run 05 minutes after midnight each day:
+05 00 * * *   username /path/to/agency_report.py
+
 '''
 
-import pymongo
+import pymongo, os, datetime
 
-def report(agency):
+def generate_stats(agency):
     db = pymongo.Connection().opengovtracker
     collections = db.collection_names()    
     # the first few collections were used for testing, so skip them. 
-    collections = collections[12:]
-    stats = {'timestamps': [], 'ideas': [], 'comments': [], 'votes': [] }
+    collections = collections[13:]
+    stats = {'timestamps': [], 'ideas': [], 'comments': [], 'votes': [], 'authors': [] }
     for collection in collections:
         current = db[collection]
         stats['timestamps'].append(collection)
@@ -23,34 +27,70 @@ def report(agency):
         ideas = 0
         votes = 0
         comments = 0
+        authors = []
         for idea in cursor:
             ideas += 1
             # the layout of each document changed early on, from
             # storing the idea objects at the top level of the record
             # to storing it in a subdict. checking for the 'idea' key
             # accounts for this.
             if 'idea' in idea.keys():
-                votes += abs(idea['idea']['voteCount'])
-                comments += idea['idea']['commentCount']
-            else:
-                votes += abs(idea['voteCount'])
-                comments += idea['commentCount']
-                
+                idea = idea['idea']
+            votes += abs(idea['voteCount'])
+            comments += idea['commentCount']
+            if idea['author'] not in authors:
+                authors.append(idea['author'])
+
         stats['ideas'].append(ideas)
         stats['votes'].append(votes)
         stats['comments'].append(comments)
+        stats['authors'].append(len(authors))
+
     return stats
 
-if __name__ == '__main__':
+def interactive():
     agency = raw_input("Agency? ")
     report_type = raw_input("Report type (csv or tsv)? ")
     if report_type == 'csv':
         sep = ','
     else: 
         sep = '\t'
-    stats = report(agency)
-    rows = zip(stats['timestamps'], stats['ideas'], stats['votes'], stats['comments'])
-    report = open(agency+'.csv', 'w')
-    report.write("%s%s%s%s%s%s%s\n" % ('Time', sep, 'Ideas', sep,'Votes', sep, 'Comments'))
+    agency_report(agency, sep)
+
+def agency_report(agency, sep):
+    stats = generate_stats(agency)
+    rows = zip(stats['timestamps'], stats['ideas'], stats['votes'], stats['comments'], stats['authors'])
+    # store agency reports in a subdirectory for the date on which
+    # they were generated
+    today = datetime.datetime.date(datetime.datetime.now()).isoformat()
+    report_dir = 'reports/'+today+'/'
+    if not os.path.exists(report_dir):
+        os.makedirs(report_dir)
+
+    # create a readme file that stores the exact time the report was
+    # generated.
+    readme = open(os.path.join(report_dir,agency+'.readme'), 'w')
+    timestamp = datetime.datetime.now().isoformat('-')
+    # remove milliseconds
+    timestamp = timestamp[:timestamp.rfind('.')]
+    readme.write("%s report generated at %s" % (agency, timestamp))
+    readme.close()
+
+    # now write the actual csv file
+    filename = agency+'.csv'
+    report = open(os.path.join(report_dir,filename), 'w')
+    report.write("%s%s%s%s%s%s%s%s%s\n" % ('Time', sep, 'Ideas', sep,'Votes', sep, 'Comments', sep, 'Authors'))
     for row in rows:
-        report.write("%s%s%d%s%d%s%d\n" % (row[0], sep, row[1], sep, row[2], sep, row[3]))
+        report.write("%s%s%d%s%d%s%d%s%d\n" % (row[0], sep, row[1], sep, row[2], sep, row[3], sep, row[4]))
+    report.close()
+
+if __name__ == '__main__':
+
+    agencies =["usaid", "comm", "dod", "ed", "energy", "nasa",
+               'dot', "int", "va", "treas", "gsa", "opm", "labor",
+               "doj", "ssa", "state", "nsf", "hud", "epa", "sba",
+               "dhs", "nrc", "ostp"]
+    for agency in agencies:
+        agency_report(agency, ',')        
+
+