diff --git a/src/reports/index_wide_compendia_tests.py b/src/reports/index_wide_compendia_tests.py
new file mode 100644
index 00000000..b360193a
--- /dev/null
+++ b/src/reports/index_wide_compendia_tests.py
@@ -0,0 +1,72 @@
+"""
+There are some tests we would like to do that apply to the entire Babel compendia.
+
+To do this, our current strategy is to go through the entire Babel compendia and
+add the relevant information into a SQLite database. We can then check with this
+database to look for relevant duplication.
+"""
+import json
+import logging
+import sqlite3
+from pathlib import Path
+
+
+def report_on_index_wide_compendia_tests(compendia_files, sqlite_file, report_file):
+    Path(sqlite_file).touch()
+    Path(report_file).touch()
+
+    # Open the SQLite file that we will use to keep track of duplicates.
+    # Connect to the SQLite database
+    conn = sqlite3.connect(sqlite_file + '.db')
+    c = conn.cursor()
+
+    # Create a compendia table if it doesn't exist
+    c.execute('''CREATE TABLE IF NOT EXISTS compendia (
+                        preferred_curie TEXT NOT NULL PRIMARY KEY,
+                        curie TEXT NOT NULL
+                    )''')
+
+    # Go through all the compendia files in the order provided.
+    for compendia_file_index, compendia_file in enumerate(compendia_files):
+        # Go through every entry in each compendia_file
+        logging.info(f"Reading {compendia_file} ({compendia_file_index + 1}/{len(compendia_files)})")
+
+        count_curies = 0
+        with open(compendia_file, 'r') as compendiafile:
+            for line in compendiafile:
+                entry = json.loads(line)
+                identifiers = entry['identifiers']
+
+                if len(identifiers) > 0:
+                    preferred_curie = identifiers[0]['i']
+                    for identifier in identifiers:
+                        curie = identifier['i']
+                        count_curies += 1
+                        c.execute("INSERT INTO compendia (preferred_curie, curie) VALUES (?, ?)", (preferred_curie, curie))
+
+        logging.info(f"Read {count_curies} into SQLite database {sqlite_file}.")
+
+        # Query the table to check if the data was inserted correctly
+        conn.commit()
+        c.execute("SELECT COUNT(*) FROM compendia")
+        record_count = c.fetchone()
+
+        logging.info(f"SQLite database contains {record_count} records.")
+
+    # Start writing the report file.
+    with open(report_file, 'w') as reportfile:
+        c.execute("SELECT COUNT(curie) FROM compendia")
+        curie_count = c.fetchone()
+
+        # Look for curies mapped to multiple preferred_curies.
+        c.execute("SELECT curie, COUNT(DISTINCT preferred_curie), GROUP_CONCAT(DISTINCT preferred_curie) FROM compendia GROUP BY curie HAVING COUNT(DISTINCT preferred_curie) > 1 ORDER BY COUNT(DISTINCT preferred_curie) DESC;")
+        results = c.fetchall()
+        duplicates = [{'curie': duplicate[0], 'count': duplicate[1], 'preferred_curies': duplicate[2].split(',')} for duplicate in results]
+
+        json.dump({
+            'curie_count': curie_count,
+            'duplicates': duplicates
+        }, reportfile)
+
+    # Close the database connection
+    conn.close()
diff --git a/src/reports/index_wide_synonym_tests.py b/src/reports/index_wide_synonym_tests.py
new file mode 100644
index 00000000..84ec8f37
--- /dev/null
+++ b/src/reports/index_wide_synonym_tests.py
@@ -0,0 +1,77 @@
+"""
+There are some tests we would like to do that apply to the entire Babel synonyms.
+
+To do this, our current strategy is to go through the entire Babel synonyms and
+add the relevant information into a SQLite database. We can then check with this
+database to look for relevant duplication.
+"""
+import json
+import logging
+import sqlite3
+from pathlib import Path
+
+
+def report_on_index_wide_synonym_tests(synonym_files, sqlite_file, report_file):
+    # Start writing to the report file so Snakemake knows we're working.
+    Path(report_file).touch()
+    Path(sqlite_file).touch()
+
+    # Open the SQLite file that we will use to keep track of duplicates.
+    # Connect to the SQLite database
+    conn = sqlite3.connect(sqlite_file + '.db')
+    c = conn.cursor()
+
+    # Create a compendia table if it doesn't exist
+    c.execute('''CREATE TABLE IF NOT EXISTS synonyms (
+                        curie TEXT NOT NULL PRIMARY KEY UNIQUE,
+                        biolink_type TEXT,
+                        preferred_name TEXT,
+                        preferred_name_lc TEXT
+                    )''')
+
+    # Go through all the compendia files in the order provided.
+    for synonyms_file_index, synonyms_file in enumerate(synonym_files):
+        # Go through every entry in each synonyms_file
+        logging.info(f"Reading synonyms file {synonyms_file} ({synonyms_file_index + 1}/{len(synonym_files)})")
+
+        count_entries = 0
+        with open(synonyms_file, 'r') as synonymsfile:
+            for line in synonymsfile:
+                entry = json.loads(line)
+                count_entries += 1
+
+                curie = entry['curie']
+                if len(entry['types']) > 0:
+                    biolink_type = 'biolink:' + entry['types'][0]
+                preferred_name = entry['preferred_name']
+                preferred_name_lc = preferred_name.lower()
+
+                # This should give us an error if we see the same CURIE in multiple files.
+                c.execute("INSERT INTO synonyms (curie, biolink_type, preferred_name, preferred_name_lc) VALUES (?, ?, ?, ?)",
+                (curie, biolink_type, preferred_name, preferred_name_lc))
+
+        logging.info(f"Read {count_entries} entries from {synonyms_file}.")
+        conn.commit()
+
+        # Count the number of curie values in the synonyms table in SQLite.
+        c.execute("SELECT COUNT(curie) FROM synonyms")
+        curie_count = c.fetchone()
+
+        logging.info(f"{curie_count} CURIEs loaded into {sqlite_file}")
+
+    with open(report_file, 'w') as reportfile:
+        c.execute("SELECT COUNT(curie) FROM synonyms")
+        curie_count = c.fetchone()
+
+        # Look for identical preferred_name_lc values.
+        c.execute("SELECT preferred_name_lc, COUNT(preferred_name_lc), GROUP_CONCAT(DISTINCT curie) FROM synonyms GROUP BY preferred_name_lc HAVING COUNT(preferred_name_lc) > 1 ORDER BY COUNT(preferred_name_lc) DESC;")
+        results = c.fetchall()
+        duplicates = [{'preferred_name_lc': duplicate[0], 'count': duplicate[1], 'curies': duplicate[2].split(',')} for duplicate in results]
+
+        json.dump({
+            'curie_count': curie_count,
+            'duplicates': duplicates
+        }, reportfile)
+
+    # Close the database connection
+    conn.close()
diff --git a/src/snakefiles/reports.snakefile b/src/snakefiles/reports.snakefile
index 13d58438..4dc1c078 100644
--- a/src/snakefiles/reports.snakefile
+++ b/src/snakefiles/reports.snakefile
@@ -2,6 +2,8 @@ import os
 
 from src.reports.compendia_per_file_reports import assert_files_in_directory, \
     generate_content_report_for_compendium, summarize_content_report_for_compendia
+from src.reports.index_wide_synonym_tests import report_on_index_wide_synonym_tests
+from src.reports.index_wide_compendia_tests import report_on_index_wide_compendia_tests
 
 # Some paths we will use at multiple times in these reports.
 compendia_path = config['output_directory'] + '/compendia'
@@ -90,6 +92,23 @@ rule generate_summary_content_report_for_compendia:
     run:
         summarize_content_report_for_compendia(input.expected_content_reports, output.report_path)
 
+rule test_compendia_for_duplication:
+    input:
+        compendia_files = expand("{compendia_path}/{compendium_file}", compendia_path=compendia_path, compendium_file=compendia_files),
+   output:
+        sqlite_file = config['output_directory']+'/reports/duplication/synonyms.sqlite3',
+        report_path = config['output_directory']+'/reports/duplication/synonym_duplication_report.json',
+   run:
+        report_on_index_wide_compendia_tests(input.compendia_files, output.sqlite_file, output.report_path)
+
+rule test_synonyms_for_duplication:
+    input:
+        synonyms_files = expand("{synonyms_path}/{synonym_file}", synonyms_path=synonyms_path, synonym_file=synonyms_files),
+    output:
+        sqlite_file = config['output_directory']+'/reports/duplication/synonyms.sqlite3',
+        report_path = config['output_directory']+'/reports/duplication/synonym_duplication_report.json',
+    run:
+        report_on_index_wide_synonym_tests(input.synonyms_files, output.sqlite_file, output.report_path)
 
 # Check that all the reports were built correctly.
 rule all_reports: