diff --git a/.circleci/config.yml b/.circleci/config.yml
index 44beab1d..73ce2a54 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -11,7 +11,7 @@ jobs:
       # Important: Don't change this otherwise we will stop testing the earliest
       # python version we have to support.
       - image: python:3.6-buster
-    resource_class: small
+    resource_class: medium
     parallelism: 6
     steps:
       - checkout # checkout source code to working directory
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 79ed31b7..e6bac2b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,13 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.14.11](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.12) - 2022-08-05
+## [0.14.13](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.13) - 2022-08-10
+
+### Fixed
+- Validate Segmentation IOU being thresholded and non max suppressed.
+- Validate Segmentation metrics now ignore out of taxonomy indexes for metrics
+
+## [0.14.12](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.12) - 2022-08-05
 
 ### Added
 - Added auto-paginated `Slice.export_predictions_generator`
diff --git a/nucleus/metrics/segmentation_metrics.py b/nucleus/metrics/segmentation_metrics.py
index e3eea69b..7f66ba52 100644
--- a/nucleus/metrics/segmentation_metrics.py
+++ b/nucleus/metrics/segmentation_metrics.py
@@ -38,7 +38,7 @@ def __init__(
         prediction_filters: Optional[
             Union[ListOfOrAndFilters, ListOfAndFilters]
         ] = None,
-        iou_threshold: float = 0.5,
+        iou_threshold: Optional[float] = None,
     ):
         """Initializes PolygonMetric abstract object.
 
@@ -61,6 +61,8 @@ def __init__(
                 each describe a single column predicate. The list of inner predicates is interpreted as a conjunction
                 (AND), forming a more selective `and` multiple field predicate.
                 Finally, the most outer list combines these filters as a disjunction (OR).
+            iou_threshold: Threshold to consider detections under IOU to be false positives. None if no
+                non-max-suppression is supposed to happen.
         """
         # TODO -> add custom filtering to Segmentation(Annotation|Prediction).annotations.(metadata|label)
         super().__init__(annotation_filters, prediction_filters)
@@ -135,7 +137,7 @@ def _calculate_confusion_matrix(
         annotation_img,
         prediction,
         prediction_img,
-        iou_threshold,
+        iou_threshold=None,
     ) -> Tuple[np.ndarray, Set[int]]:
         """This calculates a confusion matrix with ground_truth_index X predicted_index summary
 
@@ -166,17 +168,19 @@ def _calculate_confusion_matrix(
         confusion = self._filter_confusion_matrix(
             confusion, annotation, prediction
         )
-        confusion = non_max_suppress_confusion(confusion, iou_threshold)
-        false_positive = Segment(FALSE_POSITIVES, index=confusion.shape[0] - 1)
-        if annotation.annotations[-1].label != FALSE_POSITIVES:
-            annotation.annotations.append(false_positive)
-            if annotation.annotations is not prediction.annotations:
-                # Probably likely that this structure is re-used -> check if same list instance and only append once
-                # TODO(gunnar): Should this uniqueness be handled by the base class?
-                prediction.annotations.append(false_positive)
+        if iou_threshold is not None:
+            confusion = non_max_suppress_confusion(confusion, iou_threshold)
+            false_positive = Segment(
+                FALSE_POSITIVES, index=confusion.shape[0] - 1
+            )
+            if annotation.annotations[-1].label != FALSE_POSITIVES:
+                annotation.annotations.append(false_positive)
+                if annotation.annotations is not prediction.annotations:
+                    # Probably likely that this structure is re-used -> check if same list instance and only append once
+                    # TODO(gunnar): Should this uniqueness be handled by the base class?
+                    prediction.annotations.append(false_positive)
 
         # TODO(gunnar): Detect non_taxonomy classes for segmentation as well as instance segmentation
-        non_taxonomy_classes = set()
         if self._is_instance_segmentation(annotation, prediction):
             (
                 confusion,
@@ -198,6 +202,12 @@ def _calculate_confusion_matrix(
                 for segment in annotation.annotations
                 if segment.label in missing_or_filtered_labels
             }
+            missing_indexes = (
+                set(range(confusion.shape[0]))
+                - set(a.index for a in annotation.annotations)
+                - set(a.index for a in prediction.annotations)
+            )
+            non_taxonomy_classes.update(missing_indexes)
 
         return confusion, non_taxonomy_classes
 
@@ -246,7 +256,6 @@ def __init__(
         prediction_filters: Optional[
             Union[ListOfOrAndFilters, ListOfAndFilters]
         ] = None,
-        iou_threshold: float = 0.5,
     ):
         """Initializes PolygonIOU object.
 
@@ -273,7 +282,6 @@ def __init__(
         super().__init__(
             annotation_filters,
             prediction_filters,
-            iou_threshold,
         )
 
     def _metric_impl(
@@ -288,18 +296,14 @@ def _metric_impl(
             annotation_img,
             prediction,
             prediction_img,
-            self.iou_threshold,
         )
 
         with np.errstate(divide="ignore", invalid="ignore"):
-            tp = confusion[:-1, :-1]
-            fp = confusion[:, -1]
-            iou = np.diag(tp) / (
-                tp.sum(axis=1) + tp.sum(axis=0) + fp.sum() - np.diag(tp)
+            iou = np.diag(confusion) / (
+                confusion.sum(axis=1)
+                + confusion.sum(axis=0)
+                - np.diag(confusion)
             )
-            non_taxonomy_classes = non_taxonomy_classes - {
-                confusion.shape[1] - 1
-            }
             iou.put(list(non_taxonomy_classes), np.nan)
             mean_iou = np.nanmean(iou)
             return ScalarResult(value=mean_iou, weight=annotation_img.size)  # type: ignore
diff --git a/nucleus/metrics/segmentation_to_poly_metrics.py b/nucleus/metrics/segmentation_to_poly_metrics.py
index 6eabb355..da8f2084 100644
--- a/nucleus/metrics/segmentation_to_poly_metrics.py
+++ b/nucleus/metrics/segmentation_to_poly_metrics.py
@@ -252,7 +252,6 @@ def configure_metric(self):
             metric = SegmentationIOU(
                 self.annotation_filters,
                 self.prediction_filters,
-                self.iou_threshold,
             )
         else:
             metric = PolygonIOU(
diff --git a/pyproject.toml b/pyproject.toml
index f71f60ef..df6a8395 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,7 @@ exclude = '''
 
 [tool.poetry]
 name = "scale-nucleus"
-version = "0.14.12"
+version = "0.14.13"
 description = "The official Python client library for Nucleus, the Data Platform for AI"
 license =  "MIT"
 authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]
diff --git a/tests/metrics/test_segmentation.py b/tests/metrics/test_segmentation.py
index 5a2ac8f5..680bf4f8 100644
--- a/tests/metrics/test_segmentation.py
+++ b/tests/metrics/test_segmentation.py
@@ -90,7 +90,7 @@ def compose_input_variables(setup: SegmentationTestSetup):
 def test_segmentation_iou(setup):
     annotation, prediction, url_to_img = compose_input_variables(setup)
 
-    metric = SegmentationIOU(iou_threshold=setup.iou_threshold)
+    metric = SegmentationIOU()
     metric.loader = InMemoryLoader(url_to_img)
     result = metric(
         AnnotationList(segmentation_annotations=[annotation]),