Skip to content

Commit

Permalink
tag analysis and summary: contributes to #31, closes #32
Browse files Browse the repository at this point in the history
  • Loading branch information
grammarware committed Oct 4, 2015
1 parent ffd06ec commit be211c1
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 12 deletions.
68 changes: 68 additions & 0 deletions analyse-topics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-
#
# a module for analysing tagging per entity

import sys, os.path, glob
from fancy.ANSI import C
from lib.AST import Sleigh, last
from lib.JSON import parseJSON, json2lines

ienputdir = '../json'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)
verbose = False

def checkon(fn, o):
if not os.path.exists(fn):
print('Path does not exist:', fn)
return 1
if os.path.isdir(fn):
if os.path.exists(fn+'.json'):
fn = fn + '.json'
elif os.path.exists(fn+'/'+last(fn)+'.json'):
fn = fn+'/'+last(fn)+'.json'
else:
print('Where to find', fn, '?')
return 1
plines = sorted(json2lines(o.getJSON().split('\n')))
o.getQTags()
flines = sorted(json2lines(o.getJSON().split('\n')))
if flines != plines:
if verbose:
print('∆:', '\n'.join([line for line in flines if line not in plines]))
print('vs', '\n'.join([line for line in plines if line not in flines]))
f = open(fn, 'w')
f.write(o.getJSON())
f.close()
return 2
else:
return 0

def checkreport(fn, o):
statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
r = checkon(fn, o)
# non-verbose mode by default
if verbose or r != 0:
print('[ {} ] {}'.format(statuses[r], fn))
return r

if __name__ == "__main__":
verbose = sys.argv[-1] == '-v'
print('{}: {} venues, {} papers\n{}'.format(\
C.purple('BibSLEIGH'),
C.red(len(sleigh.venues)),
C.red(sleigh.numOfPapers()),
C.purple('='*42)))
cx = {0: 0, 1: 0, 2: 0}
for v in sleigh.venues:
# tags per venue
for c in v.getConfs():
cx[checkreport(c.filename, c)] += 1
cx[checkreport(v.filename, v)] += 1
print('{} files checked, {} ok, {} fixed, {} failed'.format(\
C.bold(cx[0] + cx[1] + cx[2]),
C.blue(cx[0]),
C.yellow(cx[2]),
C.red(cx[1])))
3 changes: 1 addition & 2 deletions export-people.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def countAllPapers(ad, ed):
f.write(peoplistHTML.format(\
title='All contributors',
listname='{} people known'.format(len(ps)),
ul=azlist
ul=azlist\
))
f.close()
print('People index:', C.blue('created'))
Expand All @@ -284,4 +284,3 @@ def countAllPapers(ad, ed):
C.red(len(sleigh.venues)),
C.red(sleigh.numOfPapers()),
C.red(sleigh.numOfTags())))
# print(sleigh.getTags())
3 changes: 2 additions & 1 deletion export-tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ def kv2link(k, v):
for k in ts.keys():
f = open('{}/tag/{}.html'.format(outputdir, k), 'w')
# papers are displayed in reverse chronological order
lst = [x.getRestrictedItem(k) for x in sorted(ts[k], key=lambda z:-z.json['year'] if 'year' in z.json.keys() else 0)]
lst = [x.getRestrictedItem(k) for x in \
sorted(ts[k], key=lambda z: -z.json['year'] if 'year' in z.json.keys() else 0)]
# no comprehension possible for this case
for x in ts[k]:
if x not in tagged:
Expand Down
4 changes: 2 additions & 2 deletions fancy/Templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ def leftLinks(stat, edit):
<p> <strong>{cxVen}</strong> venues,
<strong>{cxVol}</strong> volumes,
<strong>{cxPap}</strong> papers,
<strong>347</strong> tags,
<strong>74865</strong> markings.</p></div>
<strong>392</strong> tags,
<strong>95232</strong> markings.</p></div>
{items}
'''+footer

Expand Down
68 changes: 61 additions & 7 deletions lib/AST.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def getBib(self):
s += '\t{0:<13} = "{{<span id="{0}">{1}</span>}}",\n'.format(k, self.json[k])
elif k in ('crossref', 'key', 'type', 'venue', 'twitter', \
'eventtitle', 'eventurl', 'nondblpkey', 'dblpkey', 'dblpurl', \
'programchair', 'generalchair', 'roles'):
'programchair', 'generalchair', 'roles', 'tagged'):
# TODO: ban 'ee' as well
pass
elif k == 'doi':
Expand Down Expand Up @@ -370,7 +370,21 @@ def getPage(self):
else:
clist[a] = 1
adds = '<div class="rbox">' + '<br/>\n'.join(['{} × {}'.format(clist[a], a) for a in sorted(clist.keys())]) + '</div>'
ev = adds + ev
else:
adds = ''
if 'tagged' in self.json.keys():
# gracious continuation
if adds:
adds = adds[:-6]
toptags = '<hr/>\n'
else:
toptags = '<div class="rbox">'
for t in self.json['tagged'][:10]:
toptags += '<span class="tag">{1} ×<a href="tag/{0}.html">#{0}</a></span><br/>\n'.format(*t)
toptags += '</div>'
else:
toptags = ''
ev = adds + toptags + ev
ABBR = self.get('name')
title = self.get('title')
img = self.json['venue'].lower() if 'venue' in self.json.keys() else ABBR.lower()
Expand Down Expand Up @@ -429,6 +443,24 @@ def getTags(self):
self.tags[k] = []
self.tags[k].extend(ts[k])
return self.tags
def getQTags(self):
if 'tagged' not in self.json.keys():
tpv = {}
for y in self.years:
for c in y.confs:
for p in c.papers:
for t in p.getQTags():
if t in tpv.keys():
tpv[t] += 1
else:
tpv[t] = 1
tops = [k for k in tpv.keys() if tpv[k] > 1]
toptags = sorted(tops, key=lambda z: -tpv[z])#[:10]
tagged = [[t, tpv[t]] for t in toptags]
if tagged:
self.json['tagged'] = tagged
return tagged
return self.json['tagged']


class Year(Unser):
Expand Down Expand Up @@ -594,8 +626,16 @@ def getPage(self):
# ev += '<h3>Committee: ' + ', '.join(['<a href="person/{}.html">{}</a> ({})'.format(\
# c.replace(' ', '_'),
# c, t) for c, t in positions]) + '</h3>'
if 'tagged' in self.json.keys():
toptags = '<div class="rbox">'
for t in self.json['tagged'][:10]:
toptags += '<span class="tag">{1} ×<a href="tag/{0}.html">#{0}</a></span><br/>'.format(*t)
toptags += '</div>'
else:
toptags = ''
if self.papers:
ev += '<h3>Contents ({} items)</h3><dl class="toc">'.format(len(self.papers))+\
toptags + \
'\n'.join([p.getItem() for p in sorted(self.papers, key=sortbypages)])+'</dl>'
return bibHTML.format(\
filename=self.getJsonName(),
Expand Down Expand Up @@ -638,6 +678,22 @@ def getTags(self):
self.tags[k] = []
self.tags[k].append(ts[k])
return self.tags
def getQTags(self):
if 'tagged' not in self.json.keys():
tpi = {}
for p in self.papers:
for k in p.getQTags():
if k in tpi.keys():
tpi[k] += 1
else:
tpi[k] = 1
tops = [k for k in tpi.keys() if tpi[k] > 1]
toptags = sorted(tops, key=lambda z: -tpi[z])#[:10]
tagged = [[t, tpi[t]] for t in toptags]
if tagged:
self.json['tagged'] = tagged
return tagged
return self.json['tagged']

class Paper(Unser):
def __init__(self, f, hdir, parent):
Expand Down Expand Up @@ -718,8 +774,6 @@ def seekByKey(self, key):
else:
return None
def getTags(self):
if self.tags:
# myname = self.getHtmlName()
return {k:self for k in self.tags}
else:
return {}
return {k:self for k in self.tags} if self.tags else {}
def getQTags(self):
return self.tags if self.tags else []

0 comments on commit be211c1

Please sign in to comment.