-
Notifications
You must be signed in to change notification settings - Fork 1
/
citeproc_to_html.py
executable file
·195 lines (156 loc) · 6.31 KB
/
citeproc_to_html.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/usr/bin/env python
from __future__ import (absolute_import, division, print_function,
unicode_literals)
from citeproc.py2compat import *
# We'll use json.loads for parsing the JSON data.
import json
import os
import re
import io
from dateutil import parser as dateparser
import datetime
# Import the citeproc-py classes we'll use below.
from citeproc import CitationStylesStyle, CitationStylesBibliography
from citeproc import Citation, CitationItem
from citeproc import formatter
from citeproc.source.json import CiteProcJSON
from config import GROUPS, DB_PATH, DB_FILENAME_FMT
try:
from urllib.parse import unquote
except ImportError:
from urllib import unquote
try:
from html import unescape
except ImportError:
import HTMLParser
parser = HTMLParser.HTMLParser()
unescape = parser.unescape
# The following JSON data describes 5 references picked from the CSL test suite.
MONTHDAY_FIRST_SLASH = re.compile(r'^(\d{1,2}/)+\d{4}$')
YEAR_FIRST_SLASH = re.compile(r'^\d{4}(/\d{1,2})+$')
ISO_STYLE_DATE = re.compile(r'^\d{4}(-\d{1,2}){0,1,2}$')
JUST_YEAR = re.compile(r'\d{4}')
NO_DATE = "1970-01-01 00:00:00"
DEFAULT_DATE = dateparser.parse("1970-01-01 00:00:00")
def get_year(item):
issued = item.get("issued", {})
if 'date-parts' in issued:
return int(issued['date-parts'][0][0])
elif 'raw' in issued:
d = dateparser.parse(issued['raw'])
return d.year
else:
return 0
def get_date_string(item):
issued = item.get("issued", {})
if 'date-parts' in issued:
date_array = issued['date-parts'][0]
ts = "-".join(map(str, date_array))
elif 'raw' in issued:
ts = issued['raw']
else:
ts = NO_DATE
d = dateparser.parse(ts, default=DEFAULT_DATE)
return d.date().isoformat()
def sort_year(items):
year_lookup = {}
for key, value in items:
year = get_year(value)
year_lookup.setdefault(year, [])
year_lookup[year].append(key)
return year_lookup
def generate_md(db, min_year=float('-inf'), max_year=float('inf'), group_by_year=True):
directory = os.path.dirname(os.path.abspath(__file__))
file_location = directory + os.path.sep + "static" + os.path.sep + "SasView_linktitle"
bib_style = CitationStylesStyle(file_location, validate=False)
# Create the citeproc-py bibliography, passing it the:
# * CitationStylesStyle,
# * BibliographySource (CiteProcJSON in this case), and
# * a formatter (plain, html, or you can write a custom formatter)
# add id to each item:
items = db.items()
for k, v in items:
v['id'] = k
bib_source = CiteProcJSON([v for k,v in items])
year_lookup = sort_year(items)
years = list(year_lookup.keys())
years.sort()
output_years = [year for year in years if year <= max_year and year >=min_year]
year_cite_items = []
year_link_items = []
patent_items = []
for year in output_years:
year_link_items.append('[{0}](#{0})'.format(year))
keys = year_lookup[year]
keys.sort(key=lambda l: get_date_string(db[l]), reverse=True)
bibliography = CitationStylesBibliography(bib_style, bib_source, formatter.plain)
citations = [Citation([CitationItem(k)]) for k in keys]
for c in citations:
bibliography.register(c)
bib = bibliography.bibliography()
cite_bib = []
for k, b in zip(keys, bib):
if db[k].get('type', None) == 'patent':
patent_items.append(unescape("".join(b)))
else:
cite_bib.append(b)
bib_output = [unescape("".join(b)) for b in cite_bib]
year_output = ['## {0}\n'.format(year)]
year_output.append('<small>[top](#acknowledgements-and-contacts)</small>\n')
year_output.append('---\n')
for i, bib_i in enumerate(bib_output):
bib_final = ("{0}. " + bib_i).format(i)
year_output.append(bib_final)
year_cite_items.append("\n".join(year_output))
return {"citations": year_cite_items, "links": year_link_items, "patents": patent_items}
def callback(t):
pass
TEMPLATE = """---
layout: page
title: {title}
---
## Acknowledgements and Contacts
If you found this software useful to your work please don't forget to acknowledge its use in your publications as suggested below and reference this website: _http://www.sasview.org/_. Please also consider letting us know by sending us the reference to your work. This will help us to ensure the long term support and development of the software.
> _This work benefited from the use of the SasView application, originally developed under NSF award DMR-0520547. SasView contains code developed with funding from the European Union's Horizon 2020 research and innovation programme under the SINE2020 project, grant agreement No 654000._
{preamble}
---
{year_links}
---
{content}
{postscript}
"""
PATENTS_SECTION = """
{patent_items}
"""
def make_page(group):
csl_db_filename = DB_FILENAME_FMT.format(group=group)
csl_db_path = os.path.join(DB_PATH, csl_db_filename)
with io.open(csl_db_path, 'r', encoding='utf8') as f:
db = json.loads(f.read())
content_pieces = generate_md(db)
citations = content_pieces["citations"]
year_links = content_pieces["links"]
patents = content_pieces["patents"]
citations.reverse()
year_links.reverse()
content = "\n".join(citations)
preamble = GROUPS[group].get('header', '')
if len(patents) > 0:
preamble += PATENTS_SECTION.format(patent_items="\n ".join(patents))
postscript = GROUPS[group].get('footer', '')
title = GROUPS[group].get("title", "{group}".format(group=group))
output = TEMPLATE.format(title=title, content=content, year_links=", ".join(year_links), preamble=preamble, postscript=postscript)
output_filename = "static" + os.path.sep + "{group}_publications.md".format(group=group)
with io.open(output_filename, 'w', encoding='utf8') as f:
f.write(output)
if __name__ == '__main__':
import sys
groups = sys.argv[1:]
if len(groups) < 1:
print("usage: citeproc_to_html.py <group> <other_group>... or citeproc_to_html.py all")
elif groups[0].lower() == "all":
for group in GROUPS:
make_page(group)
else:
for group in groups:
make_page(group)