This repository has been archived by the owner on Dec 16, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathretriever.py
executable file
·80 lines (66 loc) · 2.34 KB
/
retriever.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/python2.7
from __future__ import print_function
from hepdata_retriever.retriever import Retriever
import os
import shutil
import errno
import sys
from progressbar import ProgressBar, Percentage, Bar, Widget
data_dir = '/hepdata/data/'
temp_dir = '/hepdata/tmp/'
def makedirs(dirs):
try:
os.makedirs(dirs)
except OSError as err:
if err.errno == errno.EEXIST:
return
else:
raise err
class Label(Widget):
"""Displays an updatable label."""
def __init__(self, min_length=0, starting_text=''):
self.min_length = min_length
self.change_text(starting_text)
def change_text(self, text):
self.text = text.ljust(self.min_length)
def update(self, pbar):
return ' ' + self.text + ' '
class AlwaysUpdatingProgressBar(ProgressBar):
def _need_update(self):
return True
makedirs(temp_dir)
retriever = Retriever(temp_dir)
inspire_ids = retriever.get_all_ids_in_current_system()
submission_label = Label(min_length=10)
pbar = AlwaysUpdatingProgressBar(maxval=len(inspire_ids),
widgets=[
Percentage(),
submission_label,
Bar(marker='#', left='[', right=']')
]).start()
# fallback for very large YAML exports
retriever2 = Retriever(temp_dir, base_url="http://hepdata.cedar.ac.uk/h2test/view/{0}/yaml")
for index, inspire_id in enumerate(inspire_ids):
submission_label.change_text(inspire_id)
pbar.update(index)
if inspire_id == '':
print('Warning: empty inspire_id', file=sys.stderr)
continue
dest_path = data_dir + '/' + inspire_id[-2:] + '/'
makedirs(dest_path)
if os.path.exists(dest_path + inspire_id):
continue
try:
if inspire_id in ['ins825040', 'ins1289225']:
print('Warning: downloading from http://hepdata.cedar.ac.uk/h2test/view/{0}/yaml'.format(inspire_id), file=sys.stderr)
retriever2.get_record(inspire_id)
else:
retriever.get_record(inspire_id)
shutil.move(temp_dir + '/' + inspire_id, dest_path)
os.remove(temp_dir + '/' + inspire_id + '.zip')
except KeyboardInterrupt:
print('\nInterrupted.', file=sys.stderr)
raise SystemExit(2)
except:
print('Ignoring %s.' % inspire_id, file=sys.stderr)
pbar.finish()