@@ -1172,7 +1172,6 @@ def _common_to_dataframe_steps(self, samples=None):
1172
1172
"""
1173
1173
with qdb .sql_connection .TRN :
1174
1174
# Retrieve all the information from the database
1175
- cols = self .categories
1176
1175
sql = """SELECT sample_id, sample_values
1177
1176
FROM qiita.{0}
1178
1177
WHERE sample_id != '{1}'""" .format (
@@ -1183,34 +1182,10 @@ def _common_to_dataframe_steps(self, samples=None):
1183
1182
sql += ' AND sample_id IN %s'
1184
1183
qdb .sql_connection .TRN .add (sql , [tuple (samples )])
1185
1184
1186
- # this query is going to return a tuple
1187
- # (sample_id, dict of columns/values); however it's important to
1188
- # notice that we can't assure that all column/values pairs are the
1189
- # same for all samples as we are not doing full bookkeeping of all
1190
- # the columns in all the samples. Thus, we have 2 options:
1191
- # 1. use dict() on the query result with pd.DataFrame.from_dict so
1192
- # pandas deals with this; but this takes a crazy amount of time,
1193
- # for more info google: "performance pandas from_dict"
1194
- # 2. generate a matrix rows/samples, cols/values and load them
1195
- # via pandas.DataFrame, which actually has good performace
1196
- data = []
1197
- for sid , values in qdb .sql_connection .TRN .execute_fetchindex ():
1198
- # creating row of values, first insert sample id
1199
- vals = [sid ]
1200
- # then loop over all the possible values making sure that if
1201
- # the column doesn't exist in that sample, it gets a None
1202
- for c in cols :
1203
- v = None
1204
- if c in values :
1205
- v = values [c ]
1206
- vals .append (v )
1207
- # append the row to the full matrix
1208
- data .append (vals )
1209
- cols .insert (0 , 'sample_id' )
1210
- df = pd .DataFrame (data , columns = cols , dtype = str )
1211
- df .set_index ('sample_id' , inplace = True )
1212
-
1213
- # Make sure that we are changing np.NaN by Nones
1185
+ data = qdb .sql_connection .TRN .execute_fetchindex ()
1186
+ df = pd .DataFrame ([d for _ , d in data ], index = [i for i , _ in data ],
1187
+ dtype = str )
1188
+ df .index .name = 'sample_id'
1214
1189
df .where ((pd .notnull (df )), None )
1215
1190
id_column_name = 'qiita_%sid' % (self ._table_prefix )
1216
1191
if id_column_name == 'qiita_sample_id' :
0 commit comments