-
Notifications
You must be signed in to change notification settings - Fork 4
/
eagle-uel.py
145 lines (113 loc) · 3.82 KB
/
eagle-uel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# -*- coding: utf-8 -*-
"""
Importing Ubi Erat Lupa monuments
XML: http://www.ubi-erat-lupa.org/eagle/monuments.xml.php
Guide: http://www.ubi-erat-lupa.org/eagle/
Accepted options:
-dry
Dry run: don't edit the wiki, but process and print all the data.
It's useful together with -always to check for crashes in the script before launching the bot.
-always
Don't ask for confirmation before submitting a new item to the wiki.
-start:<uel_id>
Start from item whose UEL id is <uel_id>. Useful for resuming an interrupted import.
"""
import pywikibot, re
import xml.etree.ElementTree as ET
FILE_PATH = 'EAGLE-data/UbiEratLupa.xml'
IPR = 'CC0'
def main():
always = dryrun = startsWith = False
# Handles command-line arguments for pywikibot.
for arg in pywikibot.handleArgs():
if arg == '-dry': # Performs a dry run (does not edit site)
dryrun = True
if arg == '-always': # Does not ask for confirmation
always = True
if arg.startswith('-start:'): # Example: -start:IRT013
startsWith = arg.replace('-start:', '')
if not dryrun:
# pywikibot/families/eagle_family.py
site = pywikibot.Site('en', 'eagle').data_repository()
tree = ET.parse(FILE_PATH)
root = tree.getroot()
for m in root.findall('monument'):
id = m.find('id').text
if startsWith:
if id != startsWith:
continue # Skips monuments until start
elif id == startsWith:
startsWith = False # Resets
# ID
pywikibot.output("\n>>>>> " + id + " <<<<<\n")
# Title
title = m.find('title').text
if not title:
pywikibot.output('WARNING: no title found for ID: ' + id + '. Skipping.')
continue
pywikibot.output('Title: ' + title)
# Translation DE:
transElem = m.find('./inscription/translation')
if transElem is not None:
translation = elementText(transElem)
else:
pywikibot.output('WARNING: no translation found for ID: ' + id + '. Skipping.')
continue
# (Heuristic) Splits author info from translation text
author = None
authorReg = re.compile(ur' (?:Translated by|Übersetzung): ?(.*)$', re.IGNORECASE | re.DOTALL)
authMatch = authorReg.search(translation)
if authMatch:
author = authMatch.group(1)
pywikibot.output('Author: ' + author)
# Removes author from translation
translation = authorReg.sub('', translation)
pywikibot.output('DE translation: ' + translation)
# Publication title
pubTitle = 'Ubi Erat Lupa'
pywikibot.output('Publication title: ' + pubTitle)
# IPR
ipr = IPR
pywikibot.output('IPR: ' + ipr)
pywikibot.output('') # newline
if not always:
choice = pywikibot.inputChoice(u"Proceed?", ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
else:
choice = 'y'
if choice in ['A', 'a']:
always = True
choice = 'y'
if not dryrun and choice in ['Y', 'y']:
page = pywikibot.ItemPage(site)
page.editEntity({'labels': {'en': id, 'de': id}, 'descriptions': {'de':title}})
page.get()
addClaimToItem(site, page, 'P34', id)
addClaimToItem(site, page, 'P25', ipr)
transClaim = pywikibot.Claim(site, 'P12')
transClaim.setTarget(translation)
page.addClaim(transClaim)
sources = []
pubClaim = pywikibot.Claim(site, 'P26')
pubClaim.setTarget(pubTitle)
sources.append(pubClaim)
if author is not None:
authorClaim = pywikibot.Claim(site, 'P21')
authorClaim.setTarget(author)
sources.append(authorClaim)
transClaim.addSources(sources)
def addClaimToItem(site, page, id, value):
"""Adds a claim to an ItemPage."""
claim = pywikibot.Claim(site, id)
claim.setTarget(value)
page.addClaim(claim)
def elementText(elem):
"""Gets inner element text, stripping tags of sub-elements."""
text = ''.join(elem.itertext()).strip()
text = re.sub('(\n|\t)', ' ', text)
text = re.sub('\s{2,}', ' ', text)
return text
if __name__ == "__main__":
try:
main()
finally:
pywikibot.stopme()