diff --git a/aviary/modules/binning/scripts/finalise_stats.py b/aviary/modules/binning/scripts/finalise_stats.py index b16e3038..0b519cf7 100644 --- a/aviary/modules/binning/scripts/finalise_stats.py +++ b/aviary/modules/binning/scripts/finalise_stats.py @@ -85,8 +85,8 @@ def get_taxonomy(rename_columns="Bin Id"): taxa = get_taxonomy(checkm_output.columns[0]) - merged_out = pd.merge(checkm_output, coverage_file, on=[checkm_output.columns[0]]) - merged_out = pd.merge(merged_out, taxa, on=[checkm_output.columns[0]]) + merged_out = pd.merge(checkm_output, coverage_file, on=[checkm_output.columns[0]], how="left") + merged_out = pd.merge(merged_out, taxa, on=[merged_out.columns[0]], how="left") merged_out.to_csv(snakemake.output.bin_stats, sep='\t', index=False) checkm_minimal = checkm_output[["Bin Id", "Marker lineage", "# genomes", "# markers", "# marker sets", diff --git a/test/test_integration.py b/test/test_integration.py index e0e4e0a2..1623e7fa 100755 --- a/test/test_integration.py +++ b/test/test_integration.py @@ -77,7 +77,12 @@ def test_short_read_recovery(self): ) subprocess.run(cmd, shell=True, check=True) - self.assertTrue(os.path.isfile(f"{tmpdir}/aviary_out/bins/bin_info.tsv")) + bin_info_path = f"{tmpdir}/aviary_out/bins/bin_info.tsv" + self.assertTrue(os.path.isfile(bin_info_path)) + with open(bin_info_path) as f: + num_lines = sum(1 for _ in f) + self.assertTrue(num_lines > 1) + self.assertTrue(os.path.isfile(f"{tmpdir}/aviary_out/data/final_contigs.fasta")) self.assertTrue(os.path.islink(f"{tmpdir}/aviary_out/assembly/final_contigs.fasta")) @@ -97,7 +102,11 @@ def test_short_read_recovery_skip_binners(self): ) subprocess.run(cmd, shell=True, check=True) - self.assertTrue(os.path.isfile(f"{tmpdir}/aviary_out/bins/bin_info.tsv")) + bin_info_path = f"{tmpdir}/aviary_out/bins/bin_info.tsv" + self.assertTrue(os.path.isfile(bin_info_path)) + with open(bin_info_path) as f: + num_lines = sum(1 for _ in f) + self.assertTrue(num_lines > 1) self.assertFalse(os.path.isfile(f"{tmpdir}/aviary_out/data/final_contigs.fasta")) @@ -117,7 +126,12 @@ def test_short_read_recovery_skip_abundances(self): ) subprocess.run(cmd, shell=True, check=True) - self.assertTrue(os.path.isfile(f"{tmpdir}/aviary_out/bins/bin_info.tsv")) + bin_info_path = f"{tmpdir}/aviary_out/bins/bin_info.tsv" + self.assertTrue(os.path.isfile(bin_info_path)) + with open(bin_info_path) as f: + num_lines = sum(1 for _ in f) + self.assertEqual(num_lines, 3) + self.assertFalse(os.path.isfile(f"{tmpdir}/aviary_out/data/final_contigs.fasta"))