diff --git a/qiita_db/commands.py b/qiita_db/commands.py index 835c943e1..7de070751 100644 --- a/qiita_db/commands.py +++ b/qiita_db/commands.py @@ -6,6 +6,9 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from dateutil.parser import parse +from os import listdir +from os.path import join from functools import partial try: # Python 2 @@ -15,7 +18,6 @@ from configparser import ConfigParser import pandas as pd -from dateutil.parser import parse from .study import Study, StudyPerson from .user import User @@ -24,8 +26,19 @@ from .metadata_template import SampleTemplate -def make_study_from_cmd(owner, title, info): +def load_study_from_cmd(owner, title, info): + r"""Adds a study to the database + Parameters + ---------- + owner : str + The email address of the owner of the study_abstract + title : str + The title of the study_abstract + info : file-like object + File-like object containing study information + + """ # Parse the configuration file config = ConfigParser() config.readfp(info) @@ -71,6 +84,35 @@ def make_study_from_cmd(owner, title, info): Study.create(User(owner), title, efo_ids, infodict) +def load_preprocessed_data_from_cmd(study_id, filedir, filepathtype, + params_table, params_id, + submitted_to_insdc): + r"""Adds preprocessed data to the database + + Parameters + ---------- + study_id : int + The study id to which the preprocessed data belongs + filedir : str + Directory path of the preprocessed data + filepathtype: str + The filepath_type of the preprecessed data + params_table_name : str + The name of the table which contains the parameters of the + preprocessing + params_id : int + The id of parameters int the params_table + submitted_to_insdc : bool + Has the data been submitted to insdc + """ + fp_types_dict = get_filepath_types() + fp_type = fp_types_dict[filepathtype] + filepaths = [(join(filedir, fp), fp_type) for fp in listdir(filedir)] + return PreprocessedData.create(Study(study_id), params_table, params_id, + filepaths, + submitted_to_insdc=submitted_to_insdc) + + def sample_template_adder(sample_temp_path, study_id): r"""Adds a sample template to the database @@ -79,7 +121,7 @@ def sample_template_adder(sample_temp_path, study_id): sample_temp_path : str Path to the sample template file study_id : int - The study id to wich the sample template belongs to + The study id to which the sample template belongs """ sample_temp = pd.DataFrame.from_csv(sample_temp_path, sep='\t', infer_datetime_format=True) diff --git a/qiita_db/test/test_commands.py b/qiita_db/test/test_commands.py index 9e90f260b..ba222963c 100644 --- a/qiita_db/test/test_commands.py +++ b/qiita_db/test/test_commands.py @@ -8,7 +8,8 @@ from os import remove, close from os.path import exists, join, basename -from tempfile import mkstemp +from tempfile import mkstemp, mkdtemp +from shutil import rmtree from unittest import TestCase, main from future.utils.six import StringIO try: @@ -18,12 +19,12 @@ # Python 3 from configparser import NoOptionError -from qiita_db.commands import (make_study_from_cmd, load_raw_data_cmd, - sample_template_adder, load_processed_data_cmd) +from qiita_db.commands import (load_study_from_cmd, load_raw_data_cmd, + sample_template_adder, load_processed_data_cmd, + load_preprocessed_data_from_cmd) from qiita_db.study import Study, StudyPerson from qiita_db.user import User from qiita_db.util import get_count, check_count, get_db_files_base_dir -from qiita_db.data import PreprocessedData from qiita_core.util import qiita_test_checker @@ -38,7 +39,7 @@ def setUp(self): def test_make_study_from_cmd(self): fh = StringIO(self.config1) - make_study_from_cmd('test@test.com', 'newstudy', fh) + load_study_from_cmd('test@test.com', 'newstudy', fh) sql = ("select study_id from qiita.study where email = %s and " "study_title = %s") study_id = self.conn_handler.execute_fetchone(sql, ('test@test.com', @@ -47,7 +48,52 @@ def test_make_study_from_cmd(self): fh2 = StringIO(self.config2) with self.assertRaises(NoOptionError): - make_study_from_cmd('test@test.com', 'newstudy2', fh2) + load_study_from_cmd('test@test.com', 'newstudy2', fh2) + + +@qiita_test_checker() +class TestImportPreprocessedData(TestCase): + def setUp(self): + self.tmpdir = mkdtemp() + fd, self.file1 = mkstemp(dir=self.tmpdir) + close(fd) + fd, self.file2 = mkstemp(dir=self.tmpdir) + close(fd) + with open(self.file1, "w") as f: + f.write("\n") + with open(self.file2, "w") as f: + f.write("\n") + + self.files_to_remove = [self.file1, self.file2] + self.dirs_to_remove = [self.tmpdir] + + self.db_test_ppd_dir = join(get_db_files_base_dir(), + 'preprocessed_data') + + def tearDown(self): + for fp in self.files_to_remove: + if exists(fp): + remove(fp) + for dp in self.dirs_to_remove: + if exists(dp): + rmtree(dp) + + def test_import_preprocessed_data(self): + initial_ppd_count = get_count('qiita.preprocessed_data') + initial_fp_count = get_count('qiita.filepath') + ppd = load_preprocessed_data_from_cmd( + 1, self.tmpdir, 'tar', 'preprocessed_sequence_illumina_params', + 1, False) + self.files_to_remove.append( + join(self.db_test_ppd_dir, + '%d_%s' % (ppd.id, basename(self.file1)))) + self.files_to_remove.append( + join(self.db_test_ppd_dir, + '%d_%s' % (ppd.id, basename(self.file2)))) + self.assertEqual(ppd.id, 3) + self.assertTrue(check_count('qiita.preprocessed_data', + initial_ppd_count + 1)) + self.assertTrue(check_count('qiita.filepath', initial_fp_count+2)) @qiita_test_checker() diff --git a/qiita_db/util.py b/qiita_db/util.py index 4b544ceef..3d55a9581 100644 --- a/qiita_db/util.py +++ b/qiita_db/util.py @@ -469,6 +469,19 @@ def check_count(table, exp_count): return obs_count == exp_count +def get_preprocessed_params_tables(): + """returns a list of preprocessed parmaeter tables + + Returns + ------- + list or str + """ + sql = ("SELECT * FROM information_schema.tables WHERE table_schema = " + "'qiita' AND SUBSTR(table_name, 1, 13) = 'preprocessed_'") + conn = SQLConnectionHandler() + return [x[2] for x in conn.execute_fetchall(sql)] + + def get_processed_params_tables(): """Returns a list of all tables starting with "processed_params_" diff --git a/scripts/qiita_db b/scripts/qiita_db index 5b4e61a63..1fc70e658 100755 --- a/scripts/qiita_db +++ b/scripts/qiita_db @@ -11,9 +11,11 @@ import click from qiita_db.util import (get_filetypes, get_filepath_types, - get_processed_params_tables) -from qiita_db.commands import (sample_template_adder, make_study_from_cmd, - load_raw_data_cmd, load_processed_data_cmd) + get_processed_params_tables, + get_preprocessed_params_tables) +from qiita_db.commands import (sample_template_adder, load_study_from_cmd, + load_raw_data_cmd, load_processed_data_cmd, + load_preprocessed_data_from_cmd) @click.group() @@ -62,8 +64,8 @@ def load_raw_data(fp, fp_type, filetype, study): 'interpretable as a datetime. If None, then the current date ' 'and time will be used.') def load_processed_data(fp, fp_type, processed_params_table, - processed_params_id, preprocessed_data_id, - processed_date): + processed_params_id, preprocessed_data_id, + processed_date): load_processed_data_cmd(fp, fp_type, processed_params_table, processed_params_id, preprocessed_data_id, processed_date) @@ -75,8 +77,31 @@ def load_processed_data(fp, fp_type, processed_params_table, @click.option('--info', type=click.File(mode='r'), help="filepath of file with study information in python" "config file format") -def insert_study_to_db(owner, title, info): - make_study_from_cmd(owner, title, info) +def load_study(owner, title, info): + load_study_from_cmd(owner, title, info) + + +@qiita_db.command() +@click.option('--study_id', help="Study id associated with data", + required=True) +@click.option('--params_table', help="Name of the paramaters table for the " + "preprocessed data", required=True, + type=click.Choice(get_preprocessed_params_tables())) +@click.option('--filedir', help="Directory containing preprocessed data", + required=True) +@click.option('--filepathtype', help="Describes the contents of the input " + "files", required=True, + type=click.Choice(get_filepath_types().keys())) +@click.option('--params_id', required=True, + help="id in the paramater table associated with the parameters") +@click.option('--submitted_to_insdc', is_flag=True, + help="If provided, the preprocessed data have been submitted" + " to insdc", required=True) +def load_preprocessed_data(study_id, filedir, filepathtype, + params_table_name, params_id, submitted_to_insdc): + load_preprocessed_data_from_cmd(study_id, filedir, filepathtype, + params_table_name, + params_id, submitted_to_insdc) @qiita_db.command()