|
8 | 8 |
|
9 | 9 | from unittest import TestCase, main
|
10 | 10 | import numpy.testing as npt
|
| 11 | +from tarfile import open as topen |
| 12 | +from os import remove |
| 13 | +from os.path import exists, join |
11 | 14 |
|
12 | 15 | import pandas as pd
|
13 | 16 |
|
|
22 | 25 | class MetaUtilTests(TestCase):
|
23 | 26 | def setUp(self):
|
24 | 27 | self.old_portal = qiita_config.portal
|
| 28 | + self.files_to_remove = [] |
25 | 29 |
|
26 | 30 | def tearDown(self):
|
27 | 31 | qiita_config.portal = self.old_portal
|
| 32 | + for fp in self.files_to_remove: |
| 33 | + if exists(fp): |
| 34 | + remove(fp) |
28 | 35 |
|
29 | 36 | def _set_artifact_private(self):
|
30 | 37 | self.conn_handler.execute(
|
@@ -227,6 +234,164 @@ def test_update_redis_stats(self):
|
227 | 234 | redis_key = '%s:stats:%s' % (portal, k)
|
228 | 235 | self.assertEqual(f(redis_key), exp)
|
229 | 236 |
|
| 237 | + def test_generate_biom_and_metadata_release(self): |
| 238 | + level = 'private' |
| 239 | + qdb.meta_util.generate_biom_and_metadata_release(level) |
| 240 | + portal = qiita_config.portal |
| 241 | + working_dir = qiita_config.working_dir |
| 242 | + |
| 243 | + vals = [ |
| 244 | + ('filepath', r_client.get), |
| 245 | + ('md5sum', r_client.get), |
| 246 | + ('time', r_client.get)] |
| 247 | + # we are storing the [0] filepath, [1] md5sum and [2] time but we are |
| 248 | + # only going to check the filepath contents so ignoring the others |
| 249 | + tgz = vals[0][1]('%s:release:%s:%s' % (portal, level, vals[0][0])) |
| 250 | + tgz = join(working_dir, tgz) |
| 251 | + |
| 252 | + self.files_to_remove.extend([tgz]) |
| 253 | + |
| 254 | + tmp = topen(tgz, "r:gz") |
| 255 | + tgz_obs = [ti.name for ti in tmp] |
| 256 | + tmp.close() |
| 257 | + # files names might change due to updates and patches so just check |
| 258 | + # that the prefix exists. |
| 259 | + fn = 'processed_data/1_study_1001_closed_reference_otu_table.biom' |
| 260 | + self.assertTrue(fn in tgz_obs) |
| 261 | + tgz_obs.remove(fn) |
| 262 | + # yes, this file is there twice |
| 263 | + self.assertTrue(fn in tgz_obs) |
| 264 | + tgz_obs.remove(fn) |
| 265 | + # let's check the next biom |
| 266 | + fn = ('processed_data/1_study_1001_closed_reference_otu_table_Silva.' |
| 267 | + 'biom') |
| 268 | + self.assertTrue(fn in tgz_obs) |
| 269 | + tgz_obs.remove(fn) |
| 270 | + # now let's check prep info files based on their suffix, just take |
| 271 | + # the first one and check/rm the occurances of that file |
| 272 | + fn_prep = [f for f in tgz_obs |
| 273 | + if f.startswith('templates/1_prep_1_')][0] |
| 274 | + # 3 times |
| 275 | + self.assertTrue(fn_prep in tgz_obs) |
| 276 | + tgz_obs.remove(fn_prep) |
| 277 | + self.assertTrue(fn_prep in tgz_obs) |
| 278 | + tgz_obs.remove(fn_prep) |
| 279 | + self.assertTrue(fn_prep in tgz_obs) |
| 280 | + tgz_obs.remove(fn_prep) |
| 281 | + fn_sample = [f for f in tgz_obs if f.startswith('templates/1_')][0] |
| 282 | + # 3 times |
| 283 | + self.assertTrue(fn_sample in tgz_obs) |
| 284 | + tgz_obs.remove(fn_sample) |
| 285 | + self.assertTrue(fn_sample in tgz_obs) |
| 286 | + tgz_obs.remove(fn_sample) |
| 287 | + self.assertTrue(fn_sample in tgz_obs) |
| 288 | + tgz_obs.remove(fn_sample) |
| 289 | + # now we should only have the text file |
| 290 | + txt = tgz_obs.pop() |
| 291 | + # now it should be empty |
| 292 | + self.assertEqual(tgz_obs, []) |
| 293 | + |
| 294 | + tmp = topen(tgz, "r:gz") |
| 295 | + fhd = tmp.extractfile(txt) |
| 296 | + txt_obs = fhd.readlines() |
| 297 | + tmp.close() |
| 298 | + txt_exp = [ |
| 299 | + 'biom_fp\tsample_fp\tprep_fp\tqiita_artifact_id\tcommand\n', |
| 300 | + 'processed_data/1_study_1001_closed_reference_otu_table.biom\t' |
| 301 | + '%s\t%s\t4\tPick closed-reference OTUs, Split libraries FASTQ\n' |
| 302 | + % (fn_sample, fn_prep), |
| 303 | + 'processed_data/1_study_1001_closed_reference_otu_table.biom\t' |
| 304 | + '%s\t%s\t5\tPick closed-reference OTUs, Split libraries FASTQ\n' |
| 305 | + % (fn_sample, fn_prep), |
| 306 | + 'processed_data/1_study_1001_closed_reference_otu_table_Silva.bio' |
| 307 | + 'm\t%s\t%s\t6\tPick closed-reference OTUs, Split libraries FASTQ\n' |
| 308 | + % (fn_sample, fn_prep)] |
| 309 | + self.assertEqual(txt_obs, txt_exp) |
| 310 | + |
| 311 | + # whatever the configuration was, we will change to settings so we can |
| 312 | + # test the other option when dealing with the end '/' |
| 313 | + with qdb.sql_connection.TRN: |
| 314 | + qdb.sql_connection.TRN.add( |
| 315 | + "SELECT base_data_dir FROM settings") |
| 316 | + obdr = qdb.sql_connection.TRN.execute_fetchlast() |
| 317 | + if obdr[-1] == '/': |
| 318 | + bdr = obdr[:-1] |
| 319 | + else: |
| 320 | + bdr = obdr + '/' |
| 321 | + |
| 322 | + qdb.sql_connection.TRN.add( |
| 323 | + "UPDATE settings SET base_data_dir = '%s'" % bdr) |
| 324 | + bdr = qdb.sql_connection.TRN.execute() |
| 325 | + |
| 326 | + qdb.meta_util.generate_biom_and_metadata_release(level) |
| 327 | + # we are storing the [0] filepath, [1] md5sum and [2] time but we are |
| 328 | + # only going to check the filepath contents so ignoring the others |
| 329 | + tgz = vals[0][1]('%s:release:%s:%s' % (portal, level, vals[0][0])) |
| 330 | + tgz = join(working_dir, tgz) |
| 331 | + |
| 332 | + tmp = topen(tgz, "r:gz") |
| 333 | + tgz_obs = [ti.name for ti in tmp] |
| 334 | + tmp.close() |
| 335 | + # files names might change due to updates and patches so just check |
| 336 | + # that the prefix exists. |
| 337 | + fn = 'processed_data/1_study_1001_closed_reference_otu_table.biom' |
| 338 | + self.assertTrue(fn in tgz_obs) |
| 339 | + tgz_obs.remove(fn) |
| 340 | + # yes, this file is there twice |
| 341 | + self.assertTrue(fn in tgz_obs) |
| 342 | + tgz_obs.remove(fn) |
| 343 | + # let's check the next biom |
| 344 | + fn = ('processed_data/1_study_1001_closed_reference_otu_table_Silva.' |
| 345 | + 'biom') |
| 346 | + self.assertTrue(fn in tgz_obs) |
| 347 | + tgz_obs.remove(fn) |
| 348 | + # now let's check prep info files based on their suffix, just take |
| 349 | + # the first one and check/rm the occurances of that file |
| 350 | + fn_prep = [f for f in tgz_obs |
| 351 | + if f.startswith('templates/1_prep_1_')][0] |
| 352 | + # 3 times |
| 353 | + self.assertTrue(fn_prep in tgz_obs) |
| 354 | + tgz_obs.remove(fn_prep) |
| 355 | + self.assertTrue(fn_prep in tgz_obs) |
| 356 | + tgz_obs.remove(fn_prep) |
| 357 | + self.assertTrue(fn_prep in tgz_obs) |
| 358 | + tgz_obs.remove(fn_prep) |
| 359 | + fn_sample = [f for f in tgz_obs if f.startswith('templates/1_')][0] |
| 360 | + # 3 times |
| 361 | + self.assertTrue(fn_sample in tgz_obs) |
| 362 | + tgz_obs.remove(fn_sample) |
| 363 | + self.assertTrue(fn_sample in tgz_obs) |
| 364 | + tgz_obs.remove(fn_sample) |
| 365 | + self.assertTrue(fn_sample in tgz_obs) |
| 366 | + tgz_obs.remove(fn_sample) |
| 367 | + # now we should only have the text file |
| 368 | + txt = tgz_obs.pop() |
| 369 | + # now it should be empty |
| 370 | + self.assertEqual(tgz_obs, []) |
| 371 | + |
| 372 | + tmp = topen(tgz, "r:gz") |
| 373 | + fhd = tmp.extractfile(txt) |
| 374 | + txt_obs = fhd.readlines() |
| 375 | + tmp.close() |
| 376 | + txt_exp = [ |
| 377 | + 'biom_fp\tsample_fp\tprep_fp\tqiita_artifact_id\tcommand\n', |
| 378 | + 'processed_data/1_study_1001_closed_reference_otu_table.biom\t' |
| 379 | + '%s\t%s\t4\tPick closed-reference OTUs, Split libraries FASTQ\n' |
| 380 | + % (fn_sample, fn_prep), |
| 381 | + 'processed_data/1_study_1001_closed_reference_otu_table.biom\t' |
| 382 | + '%s\t%s\t5\tPick closed-reference OTUs, Split libraries FASTQ\n' |
| 383 | + % (fn_sample, fn_prep), |
| 384 | + 'processed_data/1_study_1001_closed_reference_otu_table_Silva.bio' |
| 385 | + 'm\t%s\t%s\t6\tPick closed-reference OTUs, Split libraries FASTQ\n' |
| 386 | + % (fn_sample, fn_prep)] |
| 387 | + self.assertEqual(txt_obs, txt_exp) |
| 388 | + |
| 389 | + # returning configuration |
| 390 | + with qdb.sql_connection.TRN: |
| 391 | + qdb.sql_connection.TRN.add( |
| 392 | + "UPDATE settings SET base_data_dir = '%s'" % obdr) |
| 393 | + bdr = qdb.sql_connection.TRN.execute() |
| 394 | + |
230 | 395 |
|
231 | 396 | EXP_LAT_LONG = (
|
232 | 397 | '[[60.1102854322, 74.7123248382], [23.1218032799, 42.838497795],'
|
|
0 commit comments