-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathorchestrator.py
98 lines (73 loc) · 3.22 KB
/
orchestrator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
from datetime import datetime, timedelta
from dotenv import load_dotenv
from src.cleaners.cleaner import run as run_cleaner
from src.models.model_manager import run as run_analyzer
from src.scrapers.scraper import run as run_scraper
from src.utilities.logger import SmareLogger
from src.scrapers.kbb import run as run_kbb
load_dotenv()
SCRAPER_DURATION = float(os.environ.get("SCRAPE_MINUTES", 6)) * 60
CLEANER_DURATION = float(os.environ.get("CLEAN_MINUTES", 2)) * 60
ANALYZER_DURATION = float(os.environ.get("ANALYZE_MINUTES", 4)) * 60
CL_SCRAPER_VERSION = 6
FB_SCRAPER_VERSION = 6
CLEANER_VERSION = 3
logger = SmareLogger()
def calculate_timestamp(seconds):
try:
return datetime.now() + timedelta(seconds=seconds)
except Exception as e:
logger.critical(f"Orchestrator failed to generate module termination-timestamp. Error: {e}")
def facebook(termination_timestamp=calculate_timestamp(7 * 24 * 60 * 60)):
try:
run_scraper(termination_timestamp, "facebook", FB_SCRAPER_VERSION)
except Exception as e:
logger.critical(f"Orchestrator failed runnning facebook scraper. Error: {e}")
def craigslist(termination_timestamp=calculate_timestamp(7 * 24 * 60 * 60)):
try:
run_scraper(termination_timestamp, "craigslist", CL_SCRAPER_VERSION)
except Exception as e:
logger.critical(f"Orchestrator failed runnning craigslist scraper. Error: {e}")
def clean(termination_timestamp=calculate_timestamp(7 * 24 * 60 * 60)):
run_cleaner(termination_timestamp, CLEANER_VERSION)
def model(termination_timestamp=calculate_timestamp(7 * 24 * 60 * 60)):
run_analyzer(termination_timestamp)
def smare(scraper_name):
try:
if scraper_name == "facebook":
logger.info("Starting SMARE with facebook...")
scraper = facebook
elif scraper_name == "craigslist":
logger.info("Starting SMARE with craigslist...")
scraper = craigslist
else:
logger.critical(f"Invalid scraper name specified '{scraper_name}'")
return None
scraper(calculate_timestamp(SCRAPER_DURATION))
except Exception as e:
logger.critical(f"Orchestrator failed while runnning the scraper module. Error: {e}")
try:
clean(calculate_timestamp(CLEANER_DURATION))
except Exception as e:
logger.critical(f"Orchestrator failed runnning the cleaner module. Error: {e}")
try:
model(calculate_timestamp(ANALYZER_DURATION))
except Exception as e:
logger.critical(f"Orchestrator failed runnning analyzer module (model manager). Error: {e}")
# event and context are needed to work in AWS lambda
def smare_craigslist(event="", context=""):
try:
logger.debug("Attempting to start SMARE with Craigslist scraper")
smare("craigslist")
except Exception as e:
logger.critical(f"SMARE failed running the craigslist pipeline. Error: {e}")
# event and context are needed to work in AWS lambda
def smare_facebook(event="", context=""):
try:
logger.debug("Attempting to start SMARE with Facebook scraper")
smare("facebook")
except Exception as e:
logger.critical(f"SMARE failed running the facebook pipeline. Error: {e}")
def kbb():
run_kbb()