-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix empty output files when no targets are found in inputs #51
Changes from all commits
73365e7
0159bd4
2db16fd
acf4335
0a8fe89
5eea6dc
f8cde66
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,18 +4,20 @@ | |
import argparse | ||
import logging | ||
import sys | ||
import os | ||
import re | ||
import os | ||
from typing import Optional, List, Any, Tuple | ||
|
||
import attr | ||
import pandas as pd | ||
|
||
from . import program_desc, __version__ | ||
from .const import SUBTYPE_SUMMARY_COLS, REGEX_FASTQ, REGEX_FASTA, JSON_EXT_TMPL | ||
from .subtype import Subtype | ||
from .subtype_stats import subtype_counts | ||
from .subtyper import \ | ||
query_contigs_ac, \ | ||
query_reads_ac | ||
subtype_contigs_samples, \ | ||
subtype_reads_samples | ||
from .metadata import read_metadata_table, merge_metadata_with_summary_results | ||
from .utils import \ | ||
genome_name_from_fasta_path, \ | ||
|
@@ -192,36 +194,36 @@ def main(): | |
scheme_subtype_counts = subtype_counts(scheme_fasta) | ||
logging.debug(args) | ||
subtyping_params = init_subtyping_params(args, scheme) | ||
input_genomes, reads = collect_inputs(args) | ||
if len(input_genomes) == 0 and len(reads) == 0: | ||
input_contigs, input_reads = collect_inputs(args) | ||
if len(input_contigs) == 0 and len(input_reads) == 0: | ||
raise Exception('No input files specified!') | ||
df_md = None | ||
if args.scheme_metadata: | ||
df_md = read_metadata_table(args.scheme_metadata) | ||
n_threads = args.threads | ||
|
||
subtype_results = [] # type: List[Subtype] | ||
dfs = [] # type: List[pd.DataFrame] | ||
if len(input_genomes) > 0: | ||
query_contigs_ac(subtype_results=subtype_results, | ||
dfs=dfs, | ||
input_genomes=input_genomes, | ||
scheme=scheme, | ||
scheme_name=scheme_name, | ||
subtyping_params=subtyping_params, | ||
scheme_subtype_counts=scheme_subtype_counts, | ||
n_threads=n_threads) | ||
if len(reads) > 0: | ||
query_reads_ac(subtype_results=subtype_results, | ||
dfs=dfs, | ||
reads=reads, | ||
scheme=scheme, | ||
scheme_name=scheme_name, | ||
subtyping_params=subtyping_params, | ||
scheme_subtype_counts=scheme_subtype_counts, | ||
n_threads=n_threads) | ||
subtype_results = [] # type: List[Tuple[Subtype, pd.DataFrame]] | ||
if len(input_contigs) > 0: | ||
contigs_results = subtype_contigs_samples(input_genomes=input_contigs, | ||
scheme=scheme, | ||
scheme_name=scheme_name, | ||
subtyping_params=subtyping_params, | ||
scheme_subtype_counts=scheme_subtype_counts, | ||
n_threads=n_threads) | ||
logging.info('Generated %s subtyping results from %s contigs samples', len(contigs_results), len(input_contigs)) | ||
subtype_results += contigs_results | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is subtype_results iterable? did you mean subtype_results.append(contigs_results)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a nice thing you can do in Python where you can add 2 lists together with list1 = [1, 2, 3]
list1 += [4, 5]
assert list1 == [1,2,3,4,5]
#or
list1 = [1, 2, 3]
list2 = [4, 5]
list3 = list1 + list2
assert list3 == [1,2,3,4,5]
# list1 and list2 are unmodified Hopefully that makes sense! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh ok I see, that makes sense, thanks! |
||
if len(input_reads) > 0: | ||
reads_results = subtype_reads_samples(reads=input_reads, | ||
scheme=scheme, | ||
scheme_name=scheme_name, | ||
subtyping_params=subtyping_params, | ||
scheme_subtype_counts=scheme_subtype_counts, | ||
n_threads=n_threads) | ||
logging.info('Generated %s subtyping results from %s contigs samples', len(reads_results), len(input_reads)) | ||
subtype_results += reads_results | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
dfsummary = pd.DataFrame(subtype_results) | ||
dfs = [df for st, df in subtype_results] # type: List[pd.DataFrame] | ||
dfsummary = pd.DataFrame([attr.asdict(st) for st, df in subtype_results]) | ||
dfsummary = dfsummary[SUBTYPE_SUMMARY_COLS] | ||
|
||
if dfsummary['avg_tile_coverage'].isnull().all(): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be an "or" statement, that if either of these are empty, then it raises the error? Or is it ok if one of them is empty, that's still ok?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We're allowing the user to specify a mix of file types. They can specify contigs (FASTA) and reads (FASTQ) in the same analysis. If both lists are empty then no input whatsoever has been specified so we can't do anything except let the user know they haven't specified anything to analyze.