Skip to content

Commit

Permalink
Merge pull request #239 from andersen-lab/barcode-format-change
Browse files Browse the repository at this point in the history
Switch from barcode formatted as csv to feather and paired json
  • Loading branch information
joshuailevy authored Jun 10, 2024
2 parents f82a9bd + a1523ba commit e919990
Show file tree
Hide file tree
Showing 11 changed files with 246 additions and 117 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/update_barcodes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,15 @@ jobs:
- name: commit if output is different
run: |
echo "running git diff, committing if different"
if [[($(git status freyja/data/usher_barcodes.csv --porcelain | wc -c) -ne 0)]]; then
if [[($(git status freyja/data/usher_barcodes.feather --porcelain | wc -c) -ne 0)]]; then
echo "changes were made, updating barcodes"
git config --local user.name "$GITHUB_ACTOR"
git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
echo $(date +"%m_%d_%Y-%H-%M")> freyja/data/last_barcode_update.txt
git add freyja/data/last_barcode_update.txt
git add freyja/data/lineages.yml
git add freyja/data/usher_barcodes.csv
git add freyja/data/usher_barcodes.feather
git add freyja/data/lineage_mutations.json
git add freyja/data/curated_lineages.json
git commit -m "updating barcodes and metadata"
git push origin main
Expand Down
3 changes: 2 additions & 1 deletion ci/conda_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ scipy
epiweeks
pysam
biopython
seaborn
seaborn
pyarrow
69 changes: 49 additions & 20 deletions freyja/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


@click.group(context_settings={'show_default': True})
@click.version_option('1.5.0')
@click.version_option('1.5.1')
def cli():
pass

Expand Down Expand Up @@ -81,15 +81,13 @@ def demix(variants, depths, output, eps, barcodes, meta,
buildLineageMap,
map_to_constellation,
reindex_dfs, solve_demixing_problem)
from freyja.utils import (collapse_barcodes, handle_region_of_interest)
from freyja.utils import (collapse_barcodes,
load_barcodes,
handle_region_of_interest)
locDir = os.path.abspath(os.path.join(os.path.realpath(__file__),
os.pardir))
# option for custom barcodes
if barcodes != '':
df_barcodes = pd.read_csv(barcodes, index_col=0)
else:
df_barcodes = pd.read_csv(os.path.join(locDir,
'data/usher_barcodes.csv'), index_col=0)
df_barcodes = load_barcodes(barcodes)

if confirmedonly:
confirmed = [dfi for dfi in df_barcodes.index
if 'proposed' not in dfi and 'misc' not in dfi]
Expand Down Expand Up @@ -176,6 +174,7 @@ def update(outdir, noncl, buildlocal):
from freyja.updates import (convert_tree, download_barcodes,
download_tree, get_cl_lineages,
get_curated_lineage_data)
from freyja.convert_paths2barcodes import sortFun
locDir = os.path.abspath(os.path.join(os.path.realpath(__file__),
os.pardir))

Expand Down Expand Up @@ -221,7 +220,20 @@ def update(outdir, noncl, buildlocal):
df_barcodes = df_barcodes.loc[df_barcodes.index.isin(lineageNames)]
else:
print("Including lineages not yet in cov-lineages.")
df_barcodes.to_csv(os.path.join(locDir, 'usher_barcodes.csv'))
df_barcodes.reset_index().to_feather(
os.path.join(locDir, 'usher_barcodes.feather'))

dictMuts = {}
for lin in df.index:
muts = sorted([df.columns[m0] for m0, v in enumerate(df.loc[lin])
if v > 0], key=sortFun)
dictMuts[lin] = muts

import json
jpath = os.path.join(locDir, "lineage_mutations.json")
with open(jpath, "w") as outfile:
json.dump(dictMuts, outfile)

# delete files generated along the way that aren't needed anymore
print('Cleaning up')
os.remove(lineagePath)
Expand All @@ -247,7 +259,8 @@ def barcode_build(pb, outdir, noncl):
from freyja.convert_paths2barcodes import (check_mutation_chain,
convert_to_barcodes,
parse_tree_paths,
reversion_checking)
reversion_checking,
sortFun)
from freyja.updates import (convert_tree_custom,
get_cl_lineages,
get_curated_lineage_data)
Expand Down Expand Up @@ -284,15 +297,29 @@ def barcode_build(pb, outdir, noncl):
df_barcodes = df_barcodes.loc[df_barcodes.index.isin(lineageNames)]
else:
print("Including lineages not yet in cov-lineages.")
df_barcodes.to_csv(os.path.join(locDir, 'usher_barcodes.csv'))
# df_barcodes.to_csv(os.path.join(locDir, 'usher_barcodes.csv'))
df_barcodes.reset_index().to_feather(
os.path.join(locDir, 'usher_barcodes.feather'))
dictMuts = {}
for lin in df_barcodes.index:
muts = sorted([df_barcodes.columns[m0]
for m0, v in enumerate(df_barcodes.loc[lin])
if v > 0], key=sortFun)
dictMuts[lin] = muts

import json
jpath = os.path.join(locDir, "lineage_mutations.json")
with open(jpath, "w") as outfile:
json.dump(dictMuts, outfile)

# delete files generated along the way that aren't needed anymore
print('Cleaning up')
os.remove(lineagePath)


@cli.command()
@click.argument('lineage', type=str)
@click.option('--barcodes', default='data/usher_barcodes.csv',
@click.option('--barcodes', default='data/usher_barcodes.feather',
help='Path to custom barcode file', show_default=True)
@click.option('--annot', default=None,
help='Path to annotation file in gff3 format. '
Expand All @@ -311,10 +338,16 @@ def get_lineage_def(lineage, barcodes, annot, ref, output):
"""
from freyja.read_analysis_utils import parse_gff, translate_snps

if barcodes == 'data/usher_barcodes.csv':
if 'data/usher' in barcodes:
barcodes = os.path.join(locDir, barcodes)

df = pd.read_csv(barcodes, index_col=0)
if barcodes.endswith('csv'):
df = pd.read_csv(barcodes, index_col=0)
elif barcodes.endswith('feather'):
df = pd.read_feather(barcodes).set_index('index')
else:
raise ValueError('only csv and feather formats supported')

try:
target = df.loc[lineage]
except KeyError:
Expand Down Expand Up @@ -442,16 +475,12 @@ def boot(variants, depths, output_base, eps, barcodes, meta,
"""
Perform bootstrapping method for freyja using VARIANTS and DEPTHS
"""
from freyja.utils import load_barcodes
from freyja.sample_deconv import (build_mix_and_depth_arrays,
buildLineageMap,
perform_bootstrap,
reindex_dfs)
# option for custom barcodes
if barcodes != '':
df_barcodes = pd.read_csv(barcodes, index_col=0)
else:
df_barcodes = pd.read_csv(os.path.join(locDir,
'data/usher_barcodes.csv'), index_col=0)
df_barcodes = load_barcodes(barcodes)

if confirmedonly:
confirmed = [dfi for dfi in df_barcodes.index
Expand Down
2 changes: 1 addition & 1 deletion freyja/data/curated_lineages.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions freyja/data/lineage_mutations.json

Large diffs are not rendered by default.

Loading

0 comments on commit e919990

Please sign in to comment.