Skip to content
This repository has been archived by the owner on May 21, 2024. It is now read-only.

Commit

Permalink
ENH: add sample dict mapping of sample id to filepath for contig dir …
Browse files Browse the repository at this point in the history
…format (#57)
  • Loading branch information
colinvwood authored Nov 27, 2023
1 parent 6aa91a4 commit 87ca3db
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 0 deletions.
35 changes: 35 additions & 0 deletions q2_types_genomics/per_sample_data/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# ----------------------------------------------------------------------------

import os
import re
import subprocess

from q2_types.bowtie2 import Bowtie2IndexDirFmt
Expand Down Expand Up @@ -126,6 +127,40 @@ class ContigSequencesDirFmt(model.DirectoryFormat):
def sequences_path_maker(self, sample_id):
return r'%s_contigs.fasta' % sample_id

def sample_dict(self, relative=False):
'''
Returns a mapping of sample id to file path for each set of per-sample
contigs in the directory format.
Parameters
---------
relative : bool
Whether to return filepaths relative to the directory's location.
Returns absolute filepaths by default.
Returns
-------
dict
Mapping of sample id -> filepath as described above. Sorted
alphabetically by key.
'''
contigs_pattern = re.compile(r'[^\.].+_contigs.(fasta|fa)$')
samples = {}
for sample_path in self.path.iterdir():
if not contigs_pattern.match(sample_path.name):
continue

sample_id = sample_path.name.rsplit('_contigs', 1)[0]
absolute_path = sample_path.absolute()
if relative:
samples[sample_id] = str(
absolute_path.relative_to(self.path.absolute())
)
else:
samples[sample_id] = str(absolute_path)

return dict(sorted(samples.items()))


# borrowed from q2-phylogenomics
class BAMFormat(model.BinaryFileFormat):
Expand Down
22 changes: 22 additions & 0 deletions q2_types_genomics/per_sample_data/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# ----------------------------------------------------------------------------

import os
from pathlib import Path
import shutil
import string
import unittest
Expand Down Expand Up @@ -114,6 +115,27 @@ def test_contig_seqs_dirfmt(self):
shutil.copytree(filepath, self.temp_dir.name, dirs_exist_ok=True)
ContigSequencesDirFmt(self.temp_dir.name, mode='r').validate()

def test_contig_seqs_dirfmt_sample_dict(self):
filepath = self.get_data_path('contigs/')
shutil.copytree(filepath, self.temp_dir.name, dirs_exist_ok=True)
contigs = ContigSequencesDirFmt(self.temp_dir.name, mode='r')

obs = contigs.sample_dict()
exp = {
'sample1': str(Path(contigs.path / 'sample1_contigs.fa')),
'sample2': str(Path(contigs.path / 'sample2_contigs.fa')),
'sample3': str(Path(contigs.path / 'sample3_contigs.fa'))
}
self.assertEqual(obs, exp)

obs = contigs.sample_dict(relative=True)
exp = {
'sample1': 'sample1_contigs.fa',
'sample2': 'sample2_contigs.fa',
'sample3': 'sample3_contigs.fa'
}
self.assertEqual(obs, exp)

@patch('subprocess.run', return_value=Mock(returncode=0))
def test_bam_dirmt(self, p):
filepath = self.get_data_path('bowtie/maps-single')
Expand Down

0 comments on commit 87ca3db

Please sign in to comment.