Skip to content

Commit f3fefc6

Browse files
authored
minor improvements (#3067)
* minor improvements * fix test * adding @wasade suggestion
1 parent 94e01ca commit f3fefc6

File tree

2 files changed

+5
-30
lines changed

2 files changed

+5
-30
lines changed

qiita_db/metadata_template/base_metadata_template.py

Lines changed: 4 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,7 +1172,6 @@ def _common_to_dataframe_steps(self, samples=None):
11721172
"""
11731173
with qdb.sql_connection.TRN:
11741174
# Retrieve all the information from the database
1175-
cols = self.categories
11761175
sql = """SELECT sample_id, sample_values
11771176
FROM qiita.{0}
11781177
WHERE sample_id != '{1}'""".format(
@@ -1183,34 +1182,10 @@ def _common_to_dataframe_steps(self, samples=None):
11831182
sql += ' AND sample_id IN %s'
11841183
qdb.sql_connection.TRN.add(sql, [tuple(samples)])
11851184

1186-
# this query is going to return a tuple
1187-
# (sample_id, dict of columns/values); however it's important to
1188-
# notice that we can't assure that all column/values pairs are the
1189-
# same for all samples as we are not doing full bookkeeping of all
1190-
# the columns in all the samples. Thus, we have 2 options:
1191-
# 1. use dict() on the query result with pd.DataFrame.from_dict so
1192-
# pandas deals with this; but this takes a crazy amount of time,
1193-
# for more info google: "performance pandas from_dict"
1194-
# 2. generate a matrix rows/samples, cols/values and load them
1195-
# via pandas.DataFrame, which actually has good performace
1196-
data = []
1197-
for sid, values in qdb.sql_connection.TRN.execute_fetchindex():
1198-
# creating row of values, first insert sample id
1199-
vals = [sid]
1200-
# then loop over all the possible values making sure that if
1201-
# the column doesn't exist in that sample, it gets a None
1202-
for c in cols:
1203-
v = None
1204-
if c in values:
1205-
v = values[c]
1206-
vals.append(v)
1207-
# append the row to the full matrix
1208-
data.append(vals)
1209-
cols.insert(0, 'sample_id')
1210-
df = pd.DataFrame(data, columns=cols, dtype=str)
1211-
df.set_index('sample_id', inplace=True)
1212-
1213-
# Make sure that we are changing np.NaN by Nones
1185+
data = qdb.sql_connection.TRN.execute_fetchindex()
1186+
df = pd.DataFrame([d for _, d in data], index=[i for i, _ in data],
1187+
dtype=str)
1188+
df.index.name = 'sample_id'
12141189
df.where((pd.notnull(df)), None)
12151190
id_column_name = 'qiita_%sid' % (self._table_prefix)
12161191
if id_column_name == 'qiita_sample_id':

scripts/qiita-recover-jobs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def _retrieve_queue_jobs():
4747
# looking for qiita jobs
4848
# i-1: the line before is the job name, which is the internal qiita job id
4949
job_names = [lines[i-1] for i, l in enumerate(lines)
50-
if 'Job_Owner = qiita@qiita.ucsd.edu' in l]
50+
if l.startswith(' Job_Owner = qiita')]
5151

5252
qiita_jids = []
5353
for job in job_names:

0 commit comments

Comments
 (0)