-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.py
91 lines (80 loc) · 2.57 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import argparse
import logging
from datetime import datetime
from os import getenv, mkdir
from os.path import join, expanduser
from shutil import rmtree
from cbi import start
from hdx.api.configuration import Configuration
from hdx.facades.keyword_arguments import facade
from hdx.utilities.downloader import Download
from hdx.utilities.easy_logging import setup_logging
from hdx.utilities.retriever import Retrieve
setup_logging()
logger = logging.getLogger()
lookup = "hdx-scraper-cbi-viz"
VERSION = 1.0
def parse_args():
parser = argparse.ArgumentParser(description="CBi Explorer")
parser.add_argument("-ua", "--user_agent", default=None, help="user agent")
parser.add_argument("-pp", "--preprefix", default=None, help="preprefix")
parser.add_argument("-hs", "--hdx_site", default=None, help="HDX site to use")
parser.add_argument("-od", "--output_dir", default="output", help="Output folder")
parser.add_argument(
"-sd", "--saved_dir", default="saved_data", help="Saved data folder"
)
parser.add_argument(
"-sv", "--save", default=False, action="store_true", help="Save downloaded data"
)
parser.add_argument(
"-usv", "--use_saved", default=False, action="store_true", help="Use saved data"
)
parser.add_argument(
"-wh", "--what", default="ukraine", help="What to run eg. ukraine, turkey"
)
args = parser.parse_args()
return args
def main(
output_dir,
saved_dir,
save,
use_saved,
whattorun,
**ignore,
):
logger.info(f"##### {lookup} version {VERSION:.1f} ####")
configuration = Configuration.read()
output_dir = f"{output_dir}_{whattorun}"
rmtree(output_dir, ignore_errors=True)
mkdir(output_dir)
with Download() as downloader:
retriever = Retrieve(
downloader,
configuration["fallback_dir"],
f"{saved_dir}_{whattorun}",
output_dir,
save,
use_saved,
)
today = datetime.utcnow().isoformat()
start(
configuration,
today,
retriever,
output_dir,
whattorun,
)
if __name__ == "__main__":
args = parse_args()
facade(
main,
hdx_read_only=True,
user_agent_config_yaml=join(expanduser("~"), ".useragents.yml"),
user_agent_lookup=lookup,
project_config_yaml=join("config", "project_configuration.yml"),
output_dir=args.output_dir,
saved_dir=args.saved_dir,
save=args.save,
use_saved=args.use_saved,
whattorun=args.what,
)