Skip to content

Load raw #119

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 27 commits into from
Jun 19, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
f3bd589
Add function to retrieve the filetype table
Jun 18, 2014
f68a4e5
Add click command to add raw data
Jun 18, 2014
18fb52d
Add convenience function to load raw data
Jun 18, 2014
5bc0f9c
Merge branch 'master' of https://github.com/biocore/qiita into load_raw
Jun 18, 2014
3e4576a
Add tests for get_filetypes
Jun 18, 2014
5f593d8
fixup test
Jun 18, 2014
f7b3fbd
Fix get_filetypes
Jun 18, 2014
c266b4b
Move env manipulation commands to qiita_env script
Jun 18, 2014
7e2a0ba
Add utility get_filepath_types
Jun 19, 2014
2efa3f5
Merge branch 'master' of https://github.com/biocore/qiita into load_raw
Jun 19, 2014
6599b8f
Merge branch 'master' of https://github.com/biocore/qiita into load_raw
Jun 19, 2014
c28f3fd
Update click script
Jun 19, 2014
1140aa3
Add command used by click interface for load_raw
Jun 19, 2014
5b952b2
Move load_raw_data_cmd to commands.py
Jun 19, 2014
9d955f1
Move _check_count to util, add tests
Jun 19, 2014
29f619f
Add fp_type option to click command
Jun 19, 2014
9e47260
Add underlying command for load_raw_data
Jun 19, 2014
40e4cf0
Add test for load_raw_data_cmd
Jun 19, 2014
a5a2731
Remove test files properly
Jun 19, 2014
6fb5b58
Merge branch 'master' of https://github.com/biocore/qiita into load_raw
Jun 19, 2014
914802e
Add negative control for load_raw
Jun 19, 2014
c85178e
Add "required" and integer type to study option
Jun 19, 2014
1f51f7b
Fix the argument list for load_raw call
Jun 19, 2014
61a4c69
Merge branch 'master' of https://github.com/biocore/qiita into load_raw
Jun 19, 2014
e8c03b1
Fix PEP8
Jun 19, 2014
be01068
Fix assertItemsEqual
Jun 19, 2014
065e635
Fix Python3 failures
Jun 19, 2014
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 30 additions & 2 deletions qiita_db/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
# Python 3
from configparser import ConfigParser

from qiita_db.study import Study, StudyPerson
from qiita_db.user import User
from .study import Study, StudyPerson
from .user import User
from .util import get_filetypes, get_filepath_types
from .data import RawData


def make_study_from_cmd(owner, title, info):
Expand Down Expand Up @@ -55,3 +57,29 @@ def make_study_from_cmd(owner, title, info):
efo_ids = [x.strip() for x in efo_ids.split(',')]

Study.create(User(owner), title, efo_ids, infodict)


def load_raw_data_cmd(filepaths, filepath_types, filetype, study_ids):
"""Add new raw data by populating the relevant tables

Parameters
----------
filepaths : iterable of str
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing description of each of these parameters.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created #126

filepath_types : iterable of str
filetype : str
study_ids : iterable of int
"""
if len(filepaths) != len(filepath_types):
raise ValueError("Please pass exactly one filepath_type for each "
"and every filepath")

filetypes_dict = get_filetypes()
filetype_id = filetypes_dict[filetype]

filepath_types_dict = get_filepath_types()
filepath_types = [filepath_types_dict[x] for x in filepath_types]

studies = [Study(x) for x in study_ids]

return RawData.create(filetype_id, list(zip(filepaths, filepath_types)),
studies)
76 changes: 75 additions & 1 deletion qiita_db/test/test_commands.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from os import remove, close
from os.path import exists, abspath, join, basename
from tempfile import mkstemp
from unittest import TestCase, main
from future.utils.six import StringIO
try:
Expand All @@ -7,9 +10,10 @@
# Python 3
from configparser import NoOptionError

from qiita_db.commands import make_study_from_cmd
from qiita_db.commands import make_study_from_cmd, load_raw_data_cmd
from qiita_db.study import StudyPerson
from qiita_db.user import User
from qiita_db.util import get_count, check_count, get_db_files_base_dir
from qiita_core.util import qiita_test_checker


Expand All @@ -35,6 +39,76 @@ def test_make_study_from_cmd(self):
with self.assertRaises(NoOptionError):
make_study_from_cmd('test@test.com', 'newstudy2', fh2)


@qiita_test_checker()
class TestLoadRawDataFromCmd(TestCase):
def setUp(self):
fd, self.forward_fp = mkstemp(suffix='_forward.fastq.gz')
close(fd)
fd, self.reverse_fp = mkstemp(suffix='_reverse.fastq.gz')
close(fd)
fd, self.barcodes_fp = mkstemp(suffix='_barcodes.fastq.gz')
close(fd)

with open(self.forward_fp, "w") as f:
f.write("\n")
with open(self.reverse_fp, "w") as f:
f.write("\n")
with open(self.barcodes_fp, "w") as f:
f.write("\n")

self.files_to_remove = []
self.files_to_remove.append(self.forward_fp)
self.files_to_remove.append(self.reverse_fp)
self.files_to_remove.append(self.barcodes_fp)

self.db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data')

def tearDown(self):
for fp in self.files_to_remove:
if exists(fp):
remove(fp)

def test_load_data_from_cmd(self):
filepaths = [self.forward_fp, self.reverse_fp, self.barcodes_fp]
filepath_types = ['raw_sequences', 'raw_sequences', 'raw_barcodes']

filetype = 'FASTQ'
study_ids = [1]

initial_raw_count = get_count('qiita.raw_data')
initial_fp_count = get_count('qiita.filepath')
initial_raw_fp_count = get_count('qiita.raw_filepath')
initial_study_raw_data_count = get_count('qiita.study_raw_data')

new = load_raw_data_cmd(filepaths, filepath_types, filetype,
study_ids)
raw_data_id = new.id
self.files_to_remove.append(
join(self.db_test_raw_dir,
'%d_%s' % (raw_data_id, basename(self.forward_fp))))
self.files_to_remove.append(
join(self.db_test_raw_dir,
'%d_%s' % (raw_data_id, basename(self.reverse_fp))))
self.files_to_remove.append(
join(self.db_test_raw_dir,
'%d_%s' % (raw_data_id, basename(self.barcodes_fp))))

self.assertTrue(check_count('qiita.raw_data', initial_raw_count + 1))
self.assertTrue(check_count('qiita.filepath',
initial_fp_count + 3))
self.assertTrue(check_count('qiita.raw_filepath',
initial_raw_fp_count + 3))
self.assertTrue(check_count('qiita.study_raw_data',
initial_raw_count + 1))

# Ensure that the ValueError is raised when a filepath_type is not
# provided for each and every filepath
with self.assertRaises(ValueError):
load_raw_data_cmd(filepaths, filepath_types[:-1], filetype,
study_ids)


CONFIG_1 = """[required]
timeseries_type_id = 1
metadata_complete = True
Expand Down
72 changes: 34 additions & 38 deletions qiita_db/test/test_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,115 +9,111 @@
from unittest import TestCase, main

from qiita_core.util import qiita_test_checker
from qiita_db.util import check_count


@qiita_test_checker()
class SetupTest(TestCase):
"""Tests that the test database have been successfully populated"""

def _check_count(self, table, exp_count):
sql = "SELECT count(1) FROM %s" % table
obs_count = self.conn_handler.execute_fetchone(sql)[0]
self.assertEqual(obs_count, exp_count)

def test_qitta_user(self):
self._check_count("qiita.qiita_user", 3)
check_count("qiita.qiita_user", 3)

def test_study_person(self):
self._check_count("qiita.study_person", 3)
check_count("qiita.study_person", 3)

def test_study(self):
self._check_count("qiita.study", 1)
check_count("qiita.study", 1)

def test_study_users(self):
self._check_count("qiita.study_users", 1)
check_count("qiita.study_users", 1)

def test_investigation(self):
self._check_count("qiita.investigation", 1)
check_count("qiita.investigation", 1)

def test_investigation_study(self):
self._check_count("qiita.investigation_study", 1)
check_count("qiita.investigation_study", 1)

def test_study_experimental_factor(self):
self._check_count("qiita.study_experimental_factor", 1)
check_count("qiita.study_experimental_factor", 1)

def test_filepath(self):
self._check_count("qiita.filepath", 9)
check_count("qiita.filepath", 9)

def test_filepath_type(self):
self._check_count("qiita.filepath_type", 8)
check_count("qiita.filepath_type", 8)

def test_raw_data(self):
self._check_count("qiita.raw_data", 2)
check_count("qiita.raw_data", 2)

def test_raw_filepath(self):
self._check_count("qiita.raw_filepath", 4)
check_count("qiita.raw_filepath", 4)

def test_study_raw_data(self):
self._check_count("qiita.study_raw_data", 2)
check_count("qiita.study_raw_data", 2)

def test_required_sample_info(self):
self._check_count("qiita.required_sample_info", 27)
check_count("qiita.required_sample_info", 27)

def test_study_sample_columns(self):
self._check_count("qiita.study_sample_columns", 23)
check_count("qiita.study_sample_columns", 23)

def test_sample_1(self):
self._check_count("qiita.sample_1", 27)
check_count("qiita.sample_1", 27)

def test_common_prep_info(self):
self._check_count("qiita.common_prep_info", 27)
check_count("qiita.common_prep_info", 27)

def test_raw_data_prep_columns(self):
self._check_count("qiita.raw_data_prep_columns", 19)
check_count("qiita.raw_data_prep_columns", 19)

def test_prep_1(self):
self._check_count("qiita.prep_1", 27)
check_count("qiita.prep_1", 27)

def test_preprocessed_data(self):
self._check_count("qiita.preprocessed_data", 2)
check_count("qiita.preprocessed_data", 2)

def test_study_preprocessed_data(self):
self._check_count("qiita.study_preprocessed_data", 2)
check_count("qiita.study_preprocessed_data", 2)

def test_preprocessed_filepath(self):
self._check_count("qiita.preprocessed_filepath", 2)
check_count("qiita.preprocessed_filepath", 2)

def test_preprocessed_sequence_illumina_params(self):
self._check_count("qiita.preprocessed_sequence_illumina_params", 2)
check_count("qiita.preprocessed_sequence_illumina_params", 2)

def test_processed_data(self):
self._check_count("qiita.processed_data", 1)
check_count("qiita.processed_data", 1)

def test_reference(self):
self._check_count("qiita.reference", 1)
check_count("qiita.reference", 1)

def test_processed_params_uclust(self):
self._check_count("qiita.processed_params_uclust", 1)
check_count("qiita.processed_params_uclust", 1)

def test_processed_filepath(self):
self._check_count("qiita.processed_filepath", 1)
check_count("qiita.processed_filepath", 1)

def test_job(self):
self._check_count("qiita.job", 3)
check_count("qiita.job", 2)

def test_analysis(self):
self._check_count("qiita.analysis", 2)
check_count("qiita.analysis", 1)

def test_analysis_job(self):
self._check_count("qiita.analysis_job", 3)
check_count("qiita.analysis_job", 3)

def test_analysis_filepath(self):
self._check_count("qiita.analysis_filepath", 1)
check_count("qiita.analysis_filepath", 1)

def test_analysis_sample(self):
self._check_count("qiita.analysis_sample", 8)
check_count("qiita.analysis_sample", 8)

def test_analysis_users(self):
self._check_count("qiita.analysis_users", 1)
check_count("qiita.analysis_users", 1)

def test_job_results_filepath(self):
self._check_count("qiita.job_results_filepath", 2)
check_count("qiita.job_results_filepath", 2)

if __name__ == '__main__':
main()
45 changes: 44 additions & 1 deletion qiita_db/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
from qiita_db.util import (exists_table, exists_dynamic_table, scrub_data,
compute_checksum, check_table_cols,
check_required_columns, convert_to_id,
get_table_cols)
get_table_cols, get_filetypes, get_filepath_types,
get_count, check_count)


@qiita_test_checker()
Expand Down Expand Up @@ -107,6 +108,48 @@ def test_convert_to_id_bad_value(self):
with self.assertRaises(IncompetentQiitaDeveloperError):
convert_to_id("FAKE", "filepath_type")

def test_get_filetypes(self):
"""Tests that get_filetypes works with valid arguments"""

obs = get_filetypes()
exp = {'FASTA': 1, 'FASTQ': 2, 'SPECTRA': 3}
self.assertEqual(obs, exp)

obs = get_filetypes(key='filetype_id')
exp = {v: k for k, v in exp.items()}
self.assertEqual(obs, exp)

def test_get_filetypes_fail(self):
"""Tests that get_Filetypes fails with invalid argument"""
with self.assertRaises(QiitaDBColumnError):
get_filetypes(key='invalid')

def test_get_filepath_types(self):
"""Tests that get_filepath_types works with valid arguments"""
obs = get_filepath_types()
exp = {'raw_sequences': 1, 'raw_barcodes': 2, 'raw_spectra': 3,
'preprocessed_sequences': 4, 'preprocessed_sequences_qual': 5,
'biom': 6, 'tar': 7, 'plain_text': 8}
self.assertEqual(obs, exp)

obs = get_filepath_types(key='filepath_type_id')
exp = {v: k for k, v in exp.items()}
self.assertEqual(obs, exp)

def test_get_filepath_types_fail(self):
"""Tests that get_Filetypes fails with invalid argument"""
with self.assertRaises(QiitaDBColumnError):
get_filepath_types(key='invalid')

def test_get_count(self):
"""Checks that get_count retrieves proper count"""
self.assertEqual(get_count('qiita.study_person'), 3)

def test_check_count(self):
"""Checks that check_count returns True and False appropriately"""
self.assertTrue(check_count('qiita.study_person', 3))
self.assertFalse(check_count('qiita.study_person', 2))


class UtilTests(TestCase):
"""Tests for the util functions that do not need to access the DB"""
Expand Down
Loading