-
Notifications
You must be signed in to change notification settings - Fork 5
/
config.yaml
82 lines (72 loc) · 2.39 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# Configuration of the biblio_glutton_harvester
# where to put locally the data, it will store the lmdb keeping track of
# the advancement of the harvesting. If no other cloud storage is defined,
# this local directory will also be used to store the harvested data.
data_path: "./data"
# to convert downloaded NLM files into TEI, install Pub2TEI and indicate
# the install path here
pub2tei_path: ~
# to convert downloaded LaTeX sources into TEI, install LaTeXML and indicate
# the install path here (for TEI compatibility install the fork
# https://github.com/kermitt2/LaTeXML)
latexml_path: ~
# if true, gzip compression of the store object
compression: true
# max parallel tasks (download, storage, compression, validation, ...)
batch_size: 100
# if true, use cloudscraper to manage download following cloudflare challenge(s),
# this will slow down very significantly the average download time, but provide
# a higher download success rate
cloudflare_support: false
# how to access resources, mirrors of dump not accessible at file-level
# and how to access the mirrors if on a S3 compatible storage
resources:
pmc:
prioritize_pmc: true
pmc_base: "ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/"
arxiv:
s3:
arxiv_bucket_name: ~
aws_access_key_id: ~
aws_secret_access_key: ~
region: ~
aws_end_point: ~
plos:
s3:
plos_bucket_name: ~
aws_access_key_id: ~
aws_secret_access_key: ~
region: ~
aws_end_point: ~
ieee:
# skip IEEE download, as they might complain based on the downloaded volume
# other download options will be used as fallback
skip: true
# metadat services to use to retrieve metadata
metadata:
biblio_glutton_base: ~
crossref_base: "https://api.crossref.org"
crossref_email: ~
# storage on S3 compatible object storage
aws:
aws_access_key_id: ~
aws_secret_access_key: ~
bucket_name: ~
region: ~
aws_end_point: ~
# storage on OpenStack Swift object storage
swift:
swift_container: ~
swift_parameters:
auth_version: "3"
auth_url: ~
user: ~
os_username: ~
os_password: ~
key: ~
os_user_domain_name: "Default"
os_project_domain_name: "Default"
os_project_name: ~
os_project_id: ~
os_region_name: ~
os_auth_url: ~