Skip to content

Commit

Permalink
Merge pull request #260 from andersen-lab/autoadapt
Browse files Browse the repository at this point in the history
adds automatic adapt parameter selection via median error rate
  • Loading branch information
joshuailevy authored Nov 20, 2024
2 parents 05d8903 + 1f8fa8a commit 9495509
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 14 deletions.
39 changes: 30 additions & 9 deletions freyja/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,15 @@ def print_barcode_version(ctx, param, value):
help='Pathogen of interest.' +
'Not used if using --barcodes option.',
show_default=True)
@click.option('--autoadapt', default=False,
is_flag=True,
help='use error profile to set adapt',
show_default=True)
def demix(variants, depths, output, eps, barcodes, meta,
covcut, confirmedonly, depthcutoff, lineageyml,
adapt, a_eps, region_of_interest,
relaxedmrca, relaxedthresh, solver, pathogen):
relaxedmrca, relaxedthresh, solver, pathogen,
autoadapt):
"""
Generate relative lineage abundances from VARIANTS and DEPTHS
"""
Expand All @@ -110,9 +115,9 @@ def demix(variants, depths, output, eps, barcodes, meta,
# drop intra-lineage diversity naming (keeps separate barcodes)
indexSimplified = [dfi.split('_')[0] for dfi in df_barcodes.index]
df_barcodes = df_barcodes.loc[indexSimplified, :]
df_depth = pd.read_csv(depths, sep='\t', header=None, index_col=1)

if depthcutoff != 0:
df_depth = pd.read_csv(depths, sep='\t', header=None, index_col=1)
df_barcodes = collapse_barcodes(df_barcodes, df_depth, depthcutoff,
lineageyml, locDir, output,
relaxedmrca, relaxedthresh,
Expand All @@ -121,11 +126,20 @@ def demix(variants, depths, output, eps, barcodes, meta,
mapDict = buildLineageMap(meta)
print('building mix/depth matrices')
# assemble data from (possibly) mixed samples
mix, depths_, cov = build_mix_and_depth_arrays(variants, depths, muts,
covcut)
print('demixing')
if autoadapt:
mix, depths_, cov, adapt = build_mix_and_depth_arrays(variants,
depths,
muts,
covcut,
autoadapt)
else:
mix, depths_, cov, _ = build_mix_and_depth_arrays(variants,
depths,
muts,
covcut,
autoadapt)
df_barcodes, mix, depths_ = reindex_dfs(df_barcodes, mix, depths_)

print('demixing')
try:
sample_strains, abundances, error = solve_demixing_problem(df_barcodes,
mix,
Expand Down Expand Up @@ -507,10 +521,14 @@ def variants(bamfile, ref, variants, depths, refname, minq, annot, varthresh):
help='Pathogen of interest.' +
'Not used if using --barcodes option.',
show_default=True)
@click.option('--autoadapt', default=False,
is_flag=True,
help='use error profile to set adapt',
show_default=True)
def boot(variants, depths, output_base, eps, barcodes, meta,
nb, nt, boxplot, confirmedonly, lineageyml, depthcutoff,
rawboots, relaxedmrca, relaxedthresh, bootseed,
solver, pathogen):
solver, pathogen, autoadapt):
"""
Perform bootstrapping method for freyja using VARIANTS and DEPTHS
"""
Expand Down Expand Up @@ -546,8 +564,11 @@ def boot(variants, depths, output_base, eps, barcodes, meta,
print('building mix/depth matrices')
# assemble data from (possibly) mixed samples
covcut = 10 # set value, coverage estimate not returned to user from boot
mix, depths_, cov = build_mix_and_depth_arrays(variants, depths, muts,
covcut)
mix, depths_, cov, adapt = build_mix_and_depth_arrays(variants,
depths,
muts,
covcut,
autoadapt)
print('demixing')
df_barcodes, mix, depths_ = reindex_dfs(df_barcodes, mix, depths_)
lin_df, constell_df = perform_bootstrap(df_barcodes, mix, depths_,
Expand Down
22 changes: 19 additions & 3 deletions freyja/sample_deconv.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,31 @@ def buildLineageMap(locDir):
return mapDict


def build_mix_and_depth_arrays(fn, depthFn, muts, covcut):
def get_error_rate(muts, df0, df_depths0):
from freyja.convert_paths2barcodes import sortFun
sites = set([sortFun(m) for m in muts])
df_depths0 = df_depths0[~df_depths0.index.isin(sites)]
df0 = df0[~df0['POS'].isin(sites)]
df0 = df0[df0['ALT_FREQ'] > 0]
error_rate = df0['ALT_FREQ'].median()
return error_rate


def build_mix_and_depth_arrays(fn, depthFn, muts, covcut, autoadapt):
input_is_vcf = fn.lower().endswith('vcf')
if input_is_vcf:
df = read_snv_frequencies_vcf(fn, depthFn, muts)
else:
df = read_snv_frequencies_ivar(fn, depthFn, muts)

df = df[['REF', 'POS', 'ALT', 'ALT_FREQ', 'ALT_DP']]
# only works for substitutions, but that's what we get from usher tree
df_depth = pd.read_csv(depthFn, sep='\t', header=None, index_col=1)
df_depth = pd.read_csv(depthFn, sep='\t', header=None, index_col=1)[[3]]

if autoadapt:
adapt = get_error_rate(muts, df, df_depth)
else:
adapt = 0

df['mutName'] = df['REF'] + df['POS'].astype(str) + df['ALT']
df = df.drop_duplicates(subset='mutName')
Expand All @@ -77,7 +93,7 @@ def build_mix_and_depth_arrays(fn, depthFn, muts, covcut):
depths = pd.Series({kI: df_depth.loc[int(re.findall(r'\d+', kI)[0]), 3]
.astype(float) for kI in muts}, name=fn)
coverage = 100.*np.sum(df_depth.loc[:, 3] >= covcut)/df_depth.shape[0]
return mix, depths, coverage
return mix, depths, coverage, adapt


def read_snv_frequencies_ivar(fn, depthFn, muts):
Expand Down
6 changes: 4 additions & 2 deletions freyja/tests/test_deconv.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ def test_build_mix_and_depth_plus_reindex(self):
varFn = 'freyja/data/mixture.tsv'
depthFn = 'freyja/data/mixture.depth'
covcut = 10
mix, depths, cov = build_mix_and_depth_arrays(varFn, depthFn, muts,
covcut)
autoadapt = False
mix, depths, cov, adapt = build_mix_and_depth_arrays(varFn, depthFn,
muts, covcut,
autoadapt)
# just making sure the files we've read in are ready for use
self.assertTrue(ptypes.is_float_dtype(mix))
self.assertTrue(ptypes.is_float_dtype(depths))
Expand Down

0 comments on commit 9495509

Please sign in to comment.