forked from ncezid-biome/HMAS-QC-Pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pipeline.py
executable file
·139 lines (117 loc) · 4.88 KB
/
pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env python
import logging, sys, os, argparse, shutil, errno
import re
sys.path.insert(0,r'./helper_scripts')
import config_checker, log_parser, file_remover
def find_tool(name):
"""Checks PATH for existence of an executable
Params
------
name: String
Name of the executable file
Returns
------
True/False: Boolean
True if `name` is on path and executable, False otherwise
"""
found = shutil.which(name) is not None
return(found)
def getErrorCode(error_message):
"""parse the error message and fetch the error code
Parameters
----------
error_message
Returns
-------
either None or an integer value
"""
return_code = re.search(r"return_code=\S+", error_message)[0][12:]
if (return_code == 'None'):
return None
else:
return int(return_code)
def lookUpErrorCode(error_code):
"""
Parameters
----------
error_code
Returns
-------
the description of the error code (POSIX-UNIX system).
Note the error code is system-dependent.
"""
signal_table = {1:'terminate a connection, or reload the configuration for daemons',
2:'interrupt the session from the dialogue station',
3:'terminate the session from the dialogue station',
4:'illegal instruction was executed',
5:'Trace/breakpoint trap',
6:'abnormal termination',
7:'error on the system bus',
8:'Erroneous arithmetic operation',
9:'immediately terminate the process',
10:'user-defined signal',
11:'segmentation fault due to illegal access of a memory segment',
12:'user-defined signal',
13:'writing into a pipe, and nobody is reading from it',
14:'the timer terminated (alarm)',
15:'terminate the process in a soft way'}
if (error_code): # error_code = 0 or None will be evaluated as False
if (abs(error_code) in signal_table): # error code is usually negative
return (signal_table[abs(error_code)])
else:
return ("unknown error code, it's not an unsigned 8 bits value")
else:
return ("There is either no error (code =0), or error_code = None")
def main():
parser = argparse.ArgumentParser(description = 'Run Mothur QC pipeline on HMAS data.')
parser.add_argument('-c', '--config', metavar = '', required = True, help = 'Specify configuration file')
args = parser.parse_args()
cfg_file = args.config
config = config_checker.main(cfg_file)
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
LOG_FORMAT = "%(levelname)s %(asctime)s - %(message)s"
logging.basicConfig(filename = os.path.expanduser(config['file_inputs']['output_dir']) + '/hmas_qc_pipeline.log', format = LOG_FORMAT, level = logging.DEBUG)
logger = logging.getLogger()
logger.info(f'The config file to be parsed is: {args.config}')
if find_tool('mothur') == True:
logger.info('mothur is on path and is executable.')
else:
logger.error('mothur not found on path. Is it installed?')
logger.error('Program exited because Mothur could not be found.')
sys.exit(1)
try:
from mothur_py import Mothur
logger.info('mothur-py module is installed.')
except ModuleNotFoundError as e:
print(f'{e}')
logger.error('Unable to import mothur-py module. Is it installed and on PATH?')
logger.error('Program exited because mothur_py could not be imported.')
sys.exit(1)
try:
# import mpy_batch_v47 as mpy_batch
import mpy_batch
mpy_batch.main(config)
logger.info(f'mothur-py executed on files listed in {args.config}')
except ModuleNotFoundError as e:
print(f'{e}')
logger.error(e)
logger.error('Pgoram exited because mpy_batch module could not be imported.')
sys.exit(1)
except RuntimeError as e:
print(f'{e}')
logger.error(e)
logger.error(f'The return error code might indicate: {lookUpErrorCode(getErrorCode(str(e)))}')
if (not getErrorCode(str(e))): #error_code = 0 or None; MOTHUR_ERROR_FLAG is True in this case
print('Please check mothur logfile for details')
logger.error('Please check mothur log file for details')
finally:
# parce MOTHUR LOG file to remove the redundancy
if (os.access(mpy_batch.MOTHUR_LOG_FILE, os.R_OK)):
log_parser.parse(mpy_batch.MOTHUR_LOG_FILE)
else:
logger.error(f'mothur log file: {mpy_batch.MOTHUR_LOG_FILE} does not exist !')
#remove those temp files created by MOTHUR's chimera.vsearch()
file_remover.remove_vsearch_files(config)
if __name__ == "__main__":
main()