diff --git a/pyperf/_compare.py b/pyperf/_compare.py
index a83ee2a1..f8d3ebd5 100644
--- a/pyperf/_compare.py
+++ b/pyperf/_compare.py
@@ -49,6 +49,10 @@ def format_geometric_mean(norm_means):
     return format_normalized_mean(geo_mean)
 
 
+def get_tags_for_result(result):
+    return result.ref.benchmark.get_metadata().get("tags", [])
+
+
 class CompareResult(object):
     def __init__(self, ref, changed, min_speed=None):
         # CompareData object
@@ -242,6 +246,12 @@ def __init__(self, benchmarks, args):
 
         self.show_name = (len(grouped_by_name) > 1)
 
+        self.tags = set()
+        for results in self.all_results:
+            for result in results:
+                self.tags.update(get_tags_for_result(result))
+        self.tags = sorted(list(self.tags))
+
     def compare_benchmarks(self, name, benchmarks):
         min_speed = self.min_speed
 
@@ -258,11 +268,11 @@ def compare_benchmarks(self, name, benchmarks):
         return results
 
     @staticmethod
-    def display_not_signiticant(not_significant):
+    def display_not_significant(not_significant):
         print("Benchmark hidden because not significant (%s): %s"
               % (len(not_significant), ', '.join(not_significant)))
 
-    def compare_suites_table(self):
+    def compare_suites_table(self, all_results):
         if self.group_by_speed:
             def sort_key(results):
                 result = results[0]
@@ -280,7 +290,7 @@ def sort_key(results):
 
         rows = []
         not_significant = []
-        for results in self.all_results:
+        for results in all_results:
             row = [results.name]
 
             ref_bench = results[0].ref.benchmark
@@ -324,14 +334,14 @@ def sort_key(results):
         if not_significant:
             if rows:
                 print()
-            self.display_not_signiticant(not_significant)
+            self.display_not_significant(not_significant)
 
-    def compare_suites_by_speed(self):
+    def compare_suites_by_speed(self, all_results):
         not_significant = []
         slower = []
         faster = []
         same = []
-        for results in self.all_results:
+        for results in all_results:
             result = results[0]
             if not result.significant:
                 not_significant.append(results.name)
@@ -372,14 +382,14 @@ def sort_key(item):
         if not self.quiet and not_significant:
             if empty_line:
                 print()
-            self.display_not_signiticant(not_significant)
+            self.display_not_significant(not_significant)
 
-    def compare_suites_list(self):
+    def compare_suites_list(self, all_results):
         not_significant = []
         empty_line = False
         last_index = (len(self.all_results) - 1)
 
-        for index, results in enumerate(self.all_results):
+        for index, results in enumerate(all_results):
             significant = any(result.significant for result in results)
             lines = []
             for result in results:
@@ -406,7 +416,7 @@ def compare_suites_list(self):
         if not self.quiet and not_significant:
             if empty_line:
                 print()
-            self.display_not_signiticant(not_significant)
+            self.display_not_significant(not_significant)
 
     def list_ignored(self):
         for suite, hidden in self.benchmarks.group_by_name_ignored():
@@ -416,9 +426,7 @@ def list_ignored(self):
             print("Ignored benchmarks (%s) of %s: %s"
                   % (len(hidden), suite.filename, ', '.join(sorted(hidden_names))))
 
-    def compare_geometric_mean(self):
-        all_results = self.all_results
-
+    def compare_geometric_mean(self, all_results):
         # use a list since two filenames can be identical,
         # even if results are different
         all_norm_means = []
@@ -443,16 +451,29 @@ def compare_geometric_mean(self):
             geo_mean = format_geometric_mean(all_norm_means[0][1])
             print(f'Geometric mean: {geo_mean}')
 
-    def compare(self):
+    def compare_suites(self, results):
         if self.table:
-            self.compare_suites_table()
+            self.compare_suites_table(results)
         else:
             if self.group_by_speed:
-                self.compare_suites_by_speed()
+                self.compare_suites_by_speed(results)
             else:
-                self.compare_suites_list()
+                self.compare_suites_list(results)
 
-            self.compare_geometric_mean()
+            self.compare_geometric_mean(results)
+
+    def compare(self):
+        if len(self.tags):
+            for tag in self.tags:
+                display_title(f"Benchmarks with tag '{tag}':")
+                all_results = [
+                    results for results in self.all_results
+                    if tag is None or tag in get_tags_for_result(results[0])
+                ]
+                self.compare_suites(all_results)
+                print()
+            display_title(f"All benchmarks:")
+        self.compare_suites(self.all_results)
 
         if not self.quiet:
             self.list_ignored()
diff --git a/pyperf/_metadata.py b/pyperf/_metadata.py
index c8cdabdc..b7a7ba8e 100644
--- a/pyperf/_metadata.py
+++ b/pyperf/_metadata.py
@@ -42,6 +42,12 @@ def is_positive(value):
     return (value >= 0)
 
 
+def is_tags(value):
+    if not isinstance(value, list):
+        return False
+    return all(isinstance(x, str) and x not in ('all', '') for x in value)
+
+
 def parse_load_avg(value):
     if isinstance(value, NUMBER_TYPES):
         return value
@@ -62,6 +68,7 @@ def format_noop(value):
 LOOPS = _MetadataInfo(format_number, (int,), is_strictly_positive, 'integer')
 WARMUPS = _MetadataInfo(format_number, (int,), is_positive, 'integer')
 SECONDS = _MetadataInfo(format_seconds, NUMBER_TYPES, is_positive, 'second')
+TAGS = _MetadataInfo(format_generic, (list,), is_tags, 'tag')
 
 # Registry of metadata keys
 METADATA = {
@@ -84,6 +91,7 @@ def format_noop(value):
     'recalibrate_loops': LOOPS,
     'calibrate_warmups': WARMUPS,
     'recalibrate_warmups': WARMUPS,
+    'tags': TAGS,
 }
 
 DEFAULT_METADATA_INFO = _MetadataInfo(format_generic, METADATA_VALUE_TYPES, None, None)
diff --git a/pyperf/tests/test_perf_cli.py b/pyperf/tests/test_perf_cli.py
index a559e5b7..dbd5a393 100644
--- a/pyperf/tests/test_perf_cli.py
+++ b/pyperf/tests/test_perf_cli.py
@@ -330,6 +330,83 @@ def test_compare_to_cli(self):
         """
         self.check_command(expected, 'compare_to', '--table', "--group-by-speed", py36, py37)
 
+    def test_compare_to_cli_tags(self):
+        py36 = os.path.join(TESTDIR, 'mult_list_py36_tags.json')
+        py37 = os.path.join(TESTDIR, 'mult_list_py37_tags.json')
+
+        # 2 files
+        expected = """
+            Benchmarks with tag 'bar':
+            ==========================
+
+            [1,2]*1000: Mean +- std dev: [mult_list_py36_tags] 3.70 us +- 0.05 us -> [mult_list_py37_tags] 5.28 us +- 0.09 us: 1.42x slower
+            [1,2,3]*1000: Mean +- std dev: [mult_list_py36_tags] 4.61 us +- 0.13 us -> [mult_list_py37_tags] 6.05 us +- 0.11 us: 1.31x slower
+
+            Geometric mean: 1.37x slower
+
+            Benchmarks with tag 'foo':
+            ==========================
+
+            [1]*1000: Mean +- std dev: [mult_list_py36_tags] 2.13 us +- 0.06 us -> [mult_list_py37_tags] 2.09 us +- 0.04 us: 1.02x faster
+            [1,2]*1000: Mean +- std dev: [mult_list_py36_tags] 3.70 us +- 0.05 us -> [mult_list_py37_tags] 5.28 us +- 0.09 us: 1.42x slower
+
+            Geometric mean: 1.18x slower
+
+            All benchmarks:
+            ===============
+
+            [1]*1000: Mean +- std dev: [mult_list_py36_tags] 2.13 us +- 0.06 us -> [mult_list_py37_tags] 2.09 us +- 0.04 us: 1.02x faster
+            [1,2]*1000: Mean +- std dev: [mult_list_py36_tags] 3.70 us +- 0.05 us -> [mult_list_py37_tags] 5.28 us +- 0.09 us: 1.42x slower
+            [1,2,3]*1000: Mean +- std dev: [mult_list_py36_tags] 4.61 us +- 0.13 us -> [mult_list_py37_tags] 6.05 us +- 0.11 us: 1.31x slower
+
+            Geometric mean: 1.22x slower
+        """
+        self.check_command(expected, 'compare_to', py36, py37)
+
+        expected = """
+            Benchmarks with tag 'bar':
+            ==========================
+
+            +----------------+---------------------+-----------------------+
+            | Benchmark      | mult_list_py36_tags | mult_list_py37_tags   |
+            +================+=====================+=======================+
+            | [1,2]*1000     | 3.70 us             | 5.28 us: 1.42x slower |
+            +----------------+---------------------+-----------------------+
+            | [1,2,3]*1000   | 4.61 us             | 6.05 us: 1.31x slower |
+            +----------------+---------------------+-----------------------+
+            | Geometric mean | (ref)               | 1.37x slower          |
+            +----------------+---------------------+-----------------------+
+
+            Benchmarks with tag 'foo':
+            ==========================
+
+            +----------------+---------------------+-----------------------+
+            | Benchmark      | mult_list_py36_tags | mult_list_py37_tags   |
+            +================+=====================+=======================+
+            | [1]*1000       | 2.13 us             | 2.09 us: 1.02x faster |
+            +----------------+---------------------+-----------------------+
+            | [1,2]*1000     | 3.70 us             | 5.28 us: 1.42x slower |
+            +----------------+---------------------+-----------------------+
+            | Geometric mean | (ref)               | 1.18x slower          |
+            +----------------+---------------------+-----------------------+
+
+            All benchmarks:
+            ===============
+
+            +----------------+---------------------+-----------------------+
+            | Benchmark      | mult_list_py36_tags | mult_list_py37_tags   |
+            +================+=====================+=======================+
+            | [1]*1000       | 2.13 us             | 2.09 us: 1.02x faster |
+            +----------------+---------------------+-----------------------+
+            | [1,2]*1000     | 3.70 us             | 5.28 us: 1.42x slower |
+            +----------------+---------------------+-----------------------+
+            | [1,2,3]*1000   | 4.61 us             | 6.05 us: 1.31x slower |
+            +----------------+---------------------+-----------------------+
+            | Geometric mean | (ref)               | 1.22x slower          |
+            +----------------+---------------------+-----------------------+
+        """
+        self.check_command(expected, 'compare_to', '--table', py36, py37)
+
     def test_compare_to_cli_min_speed(self):
         py36 = os.path.join(TESTDIR, 'mult_list_py36.json')
         py37 = os.path.join(TESTDIR, 'mult_list_py37.json')