dib-lab · ctb · Feb 6, 2024 · Feb 11, 2024
diff --git a/Snakefile b/Snakefile
@@ -371,13 +371,14 @@ rule metag_x_genomes_prefetch:
     input:
         genomes = expand("sketches/genomes/{n}.sig.zip", n=GENOME_NAMES),
         metag="sketches/metag/{metag}.sig.zip",
-        bin = "scripts/calc-weighted-overlap.py",
     output:
         "outputs/prefetch/{metag}.x.genomes.{k}.csv",
     threads: 1
     shell: """
-        {input.bin} -k {wildcards.k} --genomes {input.genomes} \
-            --metagenomes {input.metag} -o {output}
+        sourmash scripts mgmanysearch -k {wildcards.k} \
+            --queries {input.genomes} \
+            --against {input.metag} \
+            -o {output}
     """
 
 rule metag_x_genomes_prefetch_summary:

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,6 @@
+sourmash_plugin_containment_search>=0.4.3
+
+# mkdocs stuff
 pymdown-extensions
 mkdocs-material
 mkdocs-material-extensions

diff --git a/scripts/calc-weighted-overlap.py b/scripts/calc-weighted-overlap.py
diff --git a/scripts/summarize-weighted-overlap.py b/scripts/summarize-weighted-overlap.py
@@ -31,7 +31,7 @@ def main():
     sample_dfs = {}
     for sample in samples:
         sample_df = df[df['match_name'] == sample]
-        sample_df = sample_df[['query_name', 'f_unique_weighted']]
+        sample_df = sample_df[['query_name', 'f_match_weighted']]
         sample_dfs[sample] = sample_df
 
     # go through and merge, retaining the column named `containment`;
@@ -40,14 +40,14 @@ def main():
     # do the first sample:
     sample = samples.pop(0)
     combined_df = sample_dfs[sample]
-    combined_df.rename(columns={'f_unique_weighted': sample}, inplace=True)
+    combined_df.rename(columns={'f_match_weighted': sample}, inplace=True)
 
     # and then... the rest!
     while samples:
         sample = samples.pop(0)
         sample_df = sample_dfs[sample]
-        sample_df = sample_df[['query_name', 'f_unique_weighted']]
-        sample_df.rename(columns={'f_unique_weighted': sample},
+        sample_df = sample_df[['query_name', 'f_match_weighted']]
+        sample_df.rename(columns={'f_match_weighted': sample},
                          inplace=True)
         combined_df = combined_df.merge(sample_df, on='query_name',
                                         how='outer')