diff --git a/torchvision/models/detection/_utils.py b/torchvision/models/detection/_utils.py
index 3595114f24d..58d547479ae 100644
--- a/torchvision/models/detection/_utils.py
+++ b/torchvision/models/detection/_utils.py
@@ -41,8 +41,8 @@ def __call__(self, matched_idxs):
         pos_idx = []
         neg_idx = []
         for matched_idxs_per_image in matched_idxs:
-            positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
-            negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
+            positive = torch.stack(torch.where((matched_idxs_per_image >= 1) > 0), dim=1).squeeze(1)
+            negative = torch.stack(torch.where((matched_idxs_per_image == 0) > 0), dim=1).squeeze(1)
 
             num_pos = int(self.batch_size_per_image * self.positive_fraction)
             # protect against not enough positive examples
@@ -317,9 +317,11 @@ def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix):
         # For each gt, find the prediction with which it has highest quality
         highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
         # Find highest quality match available, even if it is low, including ties
-        gt_pred_pairs_of_highest_quality = torch.nonzero(
-            match_quality_matrix == highest_quality_foreach_gt[:, None]
-        )
+        gt_pred_pairs_of_highest_quality = torch.stack(
+            torch.where(
+                (match_quality_matrix == highest_quality_foreach_gt[:, None]) > 0
+            ), dim=1
+        ).squeeze(1)
         # Example gt_pred_pairs_of_highest_quality:
         #   tensor([[    0, 39796],
         #           [    1, 32055],
diff --git a/torchvision/models/detection/generalized_rcnn.py b/torchvision/models/detection/generalized_rcnn.py
index 1ee0542c9c6..cd846057590 100644
--- a/torchvision/models/detection/generalized_rcnn.py
+++ b/torchvision/models/detection/generalized_rcnn.py
@@ -86,7 +86,7 @@ def forward(self, images, targets=None):
                 degenerate_boxes = boxes[:, 2:] <= boxes[:, :2]
                 if degenerate_boxes.any():
                     # print the first degenrate box
-                    bb_idx = degenerate_boxes.any(dim=1).nonzero().view(-1)[0]
+                    bb_idx = torch.stack(torch.where((degenerate_boxes.any(dim=1)) > 0), dim=1).view(-1)[0]
                     degen_bb: List[float] = boxes[bb_idx].tolist()
                     raise ValueError("All bounding boxes should have positive height and width."
                                      " Found invaid box {} for target at index {}."
diff --git a/torchvision/models/detection/roi_heads.py b/torchvision/models/detection/roi_heads.py
index 82ba6e8b5c0..0e7efe6c545 100644
--- a/torchvision/models/detection/roi_heads.py
+++ b/torchvision/models/detection/roi_heads.py
@@ -37,7 +37,7 @@ def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
     # get indices that correspond to the regression targets for
     # the corresponding ground truth labels, to be used with
     # advanced indexing
-    sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
+    sampled_pos_inds_subset = torch.stack(torch.where((labels > 0) > 0), dim=1).squeeze(1)
     labels_pos = labels[sampled_pos_inds_subset]
     N, num_classes = class_logits.shape
     box_regression = box_regression.reshape(N, -1, 4)
@@ -296,7 +296,7 @@ def keypointrcnn_loss(keypoint_logits, proposals, gt_keypoints, keypoint_matched
 
     keypoint_targets = torch.cat(heatmaps, dim=0)
     valid = torch.cat(valid, dim=0).to(dtype=torch.uint8)
-    valid = torch.nonzero(valid).squeeze(1)
+    valid = torch.stack(torch.where(valid > 0), dim=1).squeeze(1)
 
     # torch.mean (in binary_cross_entropy_with_logits) does'nt
     # accept empty tensors, so handle it sepaartely
@@ -604,7 +604,7 @@ def subsample(self, labels):
         for img_idx, (pos_inds_img, neg_inds_img) in enumerate(
             zip(sampled_pos_inds, sampled_neg_inds)
         ):
-            img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1)
+            img_sampled_inds = torch.stack(torch.where((pos_inds_img | neg_inds_img) > 0), dim=1).squeeze(1)
             sampled_inds.append(img_sampled_inds)
         return sampled_inds
 
@@ -700,7 +700,7 @@ def postprocess_detections(self,
             labels = labels.reshape(-1)
 
             # remove low scoring boxes
-            inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
+            inds = torch.stack(torch.where((scores > self.score_thresh) > 0), dim=1).squeeze(1)
             boxes, scores, labels = boxes[inds], scores[inds], labels[inds]
 
             # remove empty boxes
@@ -784,7 +784,7 @@ def forward(self,
                 mask_proposals = []
                 pos_matched_idxs = []
                 for img_id in range(num_images):
-                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
+                    pos = torch.stack(torch.where((labels[img_id] > 0) > 0), dim=1).squeeze(1)
                     mask_proposals.append(proposals[img_id][pos])
                     pos_matched_idxs.append(matched_idxs[img_id][pos])
             else:
@@ -832,7 +832,7 @@ def forward(self,
                 pos_matched_idxs = []
                 assert matched_idxs is not None
                 for img_id in range(num_images):
-                    pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
+                    pos = torch.stack(torch.where((labels[img_id] > 0) > 0), dim=1).squeeze(1)
                     keypoint_proposals.append(proposals[img_id][pos])
                     pos_matched_idxs.append(matched_idxs[img_id][pos])
             else:
diff --git a/torchvision/models/detection/rpn.py b/torchvision/models/detection/rpn.py
index 35cd224cfbe..2ca0b0f13b3 100644
--- a/torchvision/models/detection/rpn.py
+++ b/torchvision/models/detection/rpn.py
@@ -430,8 +430,8 @@ def compute_loss(self, objectness, pred_bbox_deltas, labels, regression_targets)
         """
 
         sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
-        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
-        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)
+        sampled_pos_inds = torch.stack(torch.where((torch.cat(sampled_pos_inds, dim=0)) > 0), dim=1).squeeze(1)
+        sampled_neg_inds = torch.stack(torch.where((torch.cat(sampled_neg_inds, dim=0)) > 0), dim=1).squeeze(1)
 
         sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)
 
diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py
index 6183d259212..5f7af8fb6ea 100644
--- a/torchvision/ops/boxes.py
+++ b/torchvision/ops/boxes.py
@@ -100,7 +100,7 @@ def remove_small_boxes(boxes: Tensor, min_size: float) -> Tensor:
     """
     ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]
     keep = (ws >= min_size) & (hs >= min_size)
-    keep = keep.nonzero().squeeze(1)
+    keep = torch.stack(torch.where(keep > 0), dim=1).squeeze(1)
     return keep
 
 
diff --git a/torchvision/ops/poolers.py b/torchvision/ops/poolers.py
index 32734cff86a..9c0ea062640 100644
--- a/torchvision/ops/poolers.py
+++ b/torchvision/ops/poolers.py
@@ -22,7 +22,7 @@ def _onnx_merge_levels(levels: Tensor, unmerged_results: List[Tensor]) -> Tensor
                        first_result.size(2), first_result.size(3)),
                       dtype=dtype, device=device)
     for level in range(len(unmerged_results)):
-        index = (levels == level).nonzero().view(-1, 1, 1, 1)
+        index = torch.stack(torch.where((levels == level) > 0), dim=1).view(-1, 1, 1, 1)
         index = index.expand(index.size(0),
                              unmerged_results[level].size(1),
                              unmerged_results[level].size(2),
@@ -232,7 +232,7 @@ def forward(
 
         tracing_results = []
         for level, (per_level_feature, scale) in enumerate(zip(x_filtered, scales)):
-            idx_in_level = torch.nonzero(levels == level).squeeze(1)
+            idx_in_level = torch.stack(torch.where((levels == level) > 0), dim=1).squeeze(1)
             rois_per_level = rois[idx_in_level]
 
             result_idx_in_level = roi_align(