forked from CDLUC3/counter-processor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
executable file
·47 lines (38 loc) · 1.26 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env python
import config
from models import *
import input_processor as ip
import output_processor as op
from upload import upload
import os
import glob
import sys
# import ipdb; ipdb.set_trace()
if not os.path.isfile(config.processing_database):
DbActions.create_db()
else:
DbActions.vacuum() # cleans up DB indices for speed
the_filenames = config.filenames_to_process()
print(f'Running report for {config.start_time().isoformat()} to {config.end_time().isoformat()}')
# process the log lines into a sqlite database
print(f'{len(the_filenames)} daily log file(s) will be added to the database')
print(f'Last processed date: {config.last_processed_on()}')
for lf in the_filenames:
with open(lf) as infile:
print(f'processing {lf}')
for line in infile:
ll = ip.LogLine(line)
ll.populate()
config.update_log_processed_date()
print('')
DbActions.vacuum() # cleanup indices, etc, maybe makes queries faster
# output for each unique identifier (that isn't robots)
if config.output_format == 'tsv':
my_report = op.TsvReport()
my_report.output()
elif config.output_format == 'json':
my_report = op.JsonReport()
my_report.output()
if config.upload_to_hub == True:
upload.send_to_datacite()
sys.exit(0)