Skip to content

Ready for MERGE: Add command to import preprocessed data #122

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 20, 2014
Merged
48 changes: 45 additions & 3 deletions qiita_db/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------

from dateutil.parser import parse
from os import listdir
from os.path import join
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think during conflict resolution you moved these imports to the wrong place. pandas and dateutil are third-party imports, so they should come after the functools, os, and os.path imports, separated by a blank line, then another blank line, then the imports from qiita

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from functools import partial
try:
# Python 2
Expand All @@ -15,7 +18,6 @@
from configparser import ConfigParser

import pandas as pd
from dateutil.parser import parse

from .study import Study, StudyPerson
from .user import User
Expand All @@ -24,8 +26,19 @@
from .metadata_template import SampleTemplate


def make_study_from_cmd(owner, title, info):
def load_study_from_cmd(owner, title, info):
r"""Adds a study to the database

Parameters
----------
owner : str
The email address of the owner of the study_abstract
title : str
The title of the study_abstract
info : file-like object
File-like object containing study information

"""
# Parse the configuration file
config = ConfigParser()
config.readfp(info)
Expand Down Expand Up @@ -71,6 +84,35 @@ def make_study_from_cmd(owner, title, info):
Study.create(User(owner), title, efo_ids, infodict)


def load_preprocessed_data_from_cmd(study_id, filedir, filepathtype,
params_table, params_id,
submitted_to_insdc):
r"""Adds preprocessed data to the database

Parameters
----------
study_id : int
The study id to which the preprocessed data belongs
filedir : str
Directory path of the preprocessed data
filepathtype: str
The filepath_type of the preprecessed data
params_table_name : str
The name of the table which contains the parameters of the
preprocessing
params_id : int
The id of parameters int the params_table
submitted_to_insdc : bool
Has the data been submitted to insdc
"""
fp_types_dict = get_filepath_types()
fp_type = fp_types_dict[filepathtype]
filepaths = [(join(filedir, fp), fp_type) for fp in listdir(filedir)]
return PreprocessedData.create(Study(study_id), params_table, params_id,
filepaths,
submitted_to_insdc=submitted_to_insdc)


def sample_template_adder(sample_temp_path, study_id):
r"""Adds a sample template to the database

Expand All @@ -79,7 +121,7 @@ def sample_template_adder(sample_temp_path, study_id):
sample_temp_path : str
Path to the sample template file
study_id : int
The study id to wich the sample template belongs to
The study id to which the sample template belongs
"""
sample_temp = pd.DataFrame.from_csv(sample_temp_path, sep='\t',
infer_datetime_format=True)
Expand Down
58 changes: 52 additions & 6 deletions qiita_db/test/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

from os import remove, close
from os.path import exists, join, basename
from tempfile import mkstemp
from tempfile import mkstemp, mkdtemp
from shutil import rmtree
from unittest import TestCase, main
from future.utils.six import StringIO
try:
Expand All @@ -18,12 +19,12 @@
# Python 3
from configparser import NoOptionError

from qiita_db.commands import (make_study_from_cmd, load_raw_data_cmd,
sample_template_adder, load_processed_data_cmd)
from qiita_db.commands import (load_study_from_cmd, load_raw_data_cmd,
sample_template_adder, load_processed_data_cmd,
load_preprocessed_data_from_cmd)
from qiita_db.study import Study, StudyPerson
from qiita_db.user import User
from qiita_db.util import get_count, check_count, get_db_files_base_dir
from qiita_db.data import PreprocessedData
from qiita_core.util import qiita_test_checker


Expand All @@ -38,7 +39,7 @@ def setUp(self):

def test_make_study_from_cmd(self):
fh = StringIO(self.config1)
make_study_from_cmd('test@test.com', 'newstudy', fh)
load_study_from_cmd('test@test.com', 'newstudy', fh)
sql = ("select study_id from qiita.study where email = %s and "
"study_title = %s")
study_id = self.conn_handler.execute_fetchone(sql, ('test@test.com',
Expand All @@ -47,7 +48,52 @@ def test_make_study_from_cmd(self):

fh2 = StringIO(self.config2)
with self.assertRaises(NoOptionError):
make_study_from_cmd('test@test.com', 'newstudy2', fh2)
load_study_from_cmd('test@test.com', 'newstudy2', fh2)


@qiita_test_checker()
class TestImportPreprocessedData(TestCase):
def setUp(self):
self.tmpdir = mkdtemp()
fd, self.file1 = mkstemp(dir=self.tmpdir)
close(fd)
fd, self.file2 = mkstemp(dir=self.tmpdir)
close(fd)
with open(self.file1, "w") as f:
f.write("\n")
with open(self.file2, "w") as f:
f.write("\n")

self.files_to_remove = [self.file1, self.file2]
self.dirs_to_remove = [self.tmpdir]

self.db_test_ppd_dir = join(get_db_files_base_dir(),
'preprocessed_data')

def tearDown(self):
for fp in self.files_to_remove:
if exists(fp):
remove(fp)
for dp in self.dirs_to_remove:
if exists(dp):
rmtree(dp)

def test_import_preprocessed_data(self):
initial_ppd_count = get_count('qiita.preprocessed_data')
initial_fp_count = get_count('qiita.filepath')
ppd = load_preprocessed_data_from_cmd(
1, self.tmpdir, 'tar', 'preprocessed_sequence_illumina_params',
1, False)
self.files_to_remove.append(
join(self.db_test_ppd_dir,
'%d_%s' % (ppd.id, basename(self.file1))))
self.files_to_remove.append(
join(self.db_test_ppd_dir,
'%d_%s' % (ppd.id, basename(self.file2))))
self.assertEqual(ppd.id, 3)
self.assertTrue(check_count('qiita.preprocessed_data',
initial_ppd_count + 1))
self.assertTrue(check_count('qiita.filepath', initial_fp_count+2))


@qiita_test_checker()
Expand Down
13 changes: 13 additions & 0 deletions qiita_db/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,19 @@ def check_count(table, exp_count):
return obs_count == exp_count


def get_preprocessed_params_tables():
"""returns a list of preprocessed parmaeter tables

Returns
-------
list or str
"""
sql = ("SELECT * FROM information_schema.tables WHERE table_schema = "
"'qiita' AND SUBSTR(table_name, 1, 13) = 'preprocessed_'")
conn = SQLConnectionHandler()
return [x[2] for x in conn.execute_fetchall(sql)]


def get_processed_params_tables():
"""Returns a list of all tables starting with "processed_params_"

Expand Down
39 changes: 32 additions & 7 deletions scripts/qiita_db
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
import click

from qiita_db.util import (get_filetypes, get_filepath_types,
get_processed_params_tables)
from qiita_db.commands import (sample_template_adder, make_study_from_cmd,
load_raw_data_cmd, load_processed_data_cmd)
get_processed_params_tables,
get_preprocessed_params_tables)
from qiita_db.commands import (sample_template_adder, load_study_from_cmd,
load_raw_data_cmd, load_processed_data_cmd,
load_preprocessed_data_from_cmd)


@click.group()
Expand Down Expand Up @@ -62,8 +64,8 @@ def load_raw_data(fp, fp_type, filetype, study):
'interpretable as a datetime. If None, then the current date '
'and time will be used.')
def load_processed_data(fp, fp_type, processed_params_table,
processed_params_id, preprocessed_data_id,
processed_date):
processed_params_id, preprocessed_data_id,
processed_date):
load_processed_data_cmd(fp, fp_type, processed_params_table,
processed_params_id, preprocessed_data_id,
processed_date)
Expand All @@ -75,8 +77,31 @@ def load_processed_data(fp, fp_type, processed_params_table,
@click.option('--info', type=click.File(mode='r'),
help="filepath of file with study information in python"
"config file format")
def insert_study_to_db(owner, title, info):
make_study_from_cmd(owner, title, info)
def load_study(owner, title, info):
load_study_from_cmd(owner, title, info)


@qiita_db.command()
@click.option('--study_id', help="Study id associated with data",
required=True)
@click.option('--params_table', help="Name of the paramaters table for the "
"preprocessed data", required=True,
type=click.Choice(get_preprocessed_params_tables()))
@click.option('--filedir', help="Directory containing preprocessed data",
required=True)
@click.option('--filepathtype', help="Describes the contents of the input "
"files", required=True,
type=click.Choice(get_filepath_types().keys()))
@click.option('--params_id', required=True,
help="id in the paramater table associated with the parameters")
@click.option('--submitted_to_insdc', is_flag=True,
help="If provided, the preprocessed data have been submitted"
" to insdc", required=True)
def load_preprocessed_data(study_id, filedir, filepathtype,
params_table_name, params_id, submitted_to_insdc):
load_preprocessed_data_from_cmd(study_id, filedir, filepathtype,
params_table_name,
params_id, submitted_to_insdc)


@qiita_db.command()
Expand Down