facebookresearch · stephenroller · Apr 27, 2022 · Apr 26, 2022 · Apr 26, 2022 · Apr 26, 2022
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -139,20 +139,6 @@ commands:
             python -c 'import torch; print("Torch version:", torch.__version__)'
             python -m torch.utils.collect_env
 
-  installdetectrondeps:
-    description: Install opencv, vqa-maskrcnn-benchmark
-    steps:
-      - run:
-          name: Install opencv, vqa-maskrcnn-benchmark
-          command: |
-            if (! python -c 'import maskrcnn_benchmark')
-            then
-                python -m pip install yacs 'opencv-python~=4.3.0.00'
-                git clone https://gitlab.com/vedanuj/vqa-maskrcnn-benchmark.git maskbench
-                cd maskbench; git checkout 4c168a637f45dc69efed384c00a7f916f57b25b8 -b stable
-                python setup.py install; cd -
-            fi
-
   installcrowdsourcingdeps:
     description: Install Mephisto
     steps:
@@ -222,26 +208,26 @@ commands:
             - setupcuda
       - fixgit
       - restore_cache:
-          key: deps-20220328-<< parameters.cachename >>-{{ checksum "requirements.txt" }}
+          key: deps-20220426-<< parameters.cachename >>-{{ checksum "requirements.txt" }}
       - setup
       - installdeps
       - << parameters.more_installs >>
       - save_cache:
-          key: deps-20220328-<< parameters.cachename >>-{{ checksum "requirements.txt" }}
+          key: deps-20220426-<< parameters.cachename >>-{{ checksum "requirements.txt" }}
           paths:
             - "~/venv/bin"
             - "~/venv/lib"
       - findtests:
           marker: << parameters.marker >>
       - restore_cache:
-          key: data-20220328-<< parameters.cachename >>-{{ checksum "teststorun.txt" }}
+          key: data-20220426-<< parameters.cachename >>-{{ checksum "teststorun.txt" }}
       - run:
           name: Run tests
           no_output_timeout: 60m
           command: |
             coverage run -m pytest -m << parameters.marker >> << parameters.pytest_flags >> --junitxml=test-results/junit.xml
       - save_cache:
-          key: data-20220328-<< parameters.cachename >>-{{ checksum "teststorun.txt" }}
+          key: data-20220426-<< parameters.cachename >>-{{ checksum "teststorun.txt" }}
           paths:
             - "~/ParlAI/data"
       - codecov
@@ -258,12 +244,12 @@ commands:
       - checkout
       - fixgit
       - restore_cache:
-          key: deps-20220328-bw-{{ checksum "requirements.txt" }}
+          key: deps-20220426-bw-{{ checksum "requirements.txt" }}
       - setup
       - installdeps
       - installtorchgpu
       - save_cache:
-          key: deps-20220328-bw-{{ checksum "requirements.txt" }}
+          key: deps-20220426-bw-{{ checksum "requirements.txt" }}
           paths:
             - "~/venv/bin"
             - "~/venv/lib"
@@ -347,7 +333,6 @@ jobs:
       - runtests:
           more_installs:
             - installtorchgpu
-            - installdetectrondeps
           install_cuda: true
           cachename: gpu18
           marker: unit
@@ -360,7 +345,6 @@ jobs:
       - runtests:
           more_installs:
             - installtorchgpu
-            - installdetectrondeps
           install_cuda: true
           cachename: nightly
           marker: nightly_gpu

diff --git a/parlai/core/image_featurizers.py b/parlai/core/image_featurizers.py
@@ -12,7 +12,6 @@
 from parlai.core.opt import Opt
 import parlai.utils.logging as logging
 from parlai.utils.io import PathManager
-from parlai.zoo.detectron.build import build
 
 import os
 from PIL import Image
@@ -44,7 +43,6 @@
     'resnext101_32x16d_wsl_spatial': ['resnext101_32x16d_wsl', -2],
     'resnext101_32x32d_wsl_spatial': ['resnext101_32x32d_wsl', -2],
     'resnext101_32x48d_wsl_spatial': ['resnext101_32x48d_wsl', -2],
-    'faster_r_cnn_152_32x8d': ['', -1],
 }
 
 
@@ -72,8 +70,6 @@ def __init__(self, opt):
                 self._init_resnet_cnn()
             elif 'resnext' in self.image_mode:
                 self._init_resnext_cnn()
-            elif 'faster_r_cnn_152_32x8d' in self.image_mode:
-                self._init_faster_r_cnn()
             else:
                 raise RuntimeError(
                     'Image mode {} not supported'.format(self.image_mode)
@@ -84,7 +80,7 @@ def is_spatial(cls, image_mode: str):
         """
         Return if image mode has spatial dimensionality.
         """
-        return any([s in image_mode for s in ['spatial', 'faster_r_cnn']])
+        return any([s in image_mode for s in ['spatial']])
 
     def _init_transform(self):
         # initialize the transform function using torch vision.
@@ -158,12 +154,6 @@ def _init_resnext_cnn(self):
         if self.use_cuda:
             self.netCNN.cuda()
 
-    def _init_faster_r_cnn(self):
-        """
-        Initialize Detectron Model.
-        """
-        self.netCNN = DetectronFeatureExtractor(self.opt, self.use_cuda)
-
     def _image_mode_switcher(self):
         if self.image_mode not in IMAGE_MODE_SWITCHER:
             raise NotImplementedError(
@@ -190,7 +180,7 @@ def extract(self, image, path=None):
             with torch.no_grad():
                 feature = self.netCNN(transform)
         else:
-            feature = self.netCNN.get_detectron_features([image])[0]
+            raise RuntimeError("detectron support has been removed.")
         # save the feature
         if path is not None:
             import parlai.utils.torch as torch_utils
@@ -268,229 +258,3 @@ def load(self, path):
         else:
             with PathManager.open(new_path, 'rb') as f:
                 return torch.load(f)
-
-
-class DetectronFeatureExtractor:
-    """
-    Code adapted from https://github.com/facebookresearch/mmf/blob/main/tools/scripts/
-    features/extract_features_vmb.py.
-
-    Docstrings and type annotations added post hoc.
-    """
-
-    MAX_SIZE = 1333
-    MIN_SIZE = 800
-
-    def __init__(self, opt: Opt, use_cuda: bool = False):
-        self.opt = opt
-        self.use_cuda = use_cuda
-        self.num_features = 100
-
-        try:
-            import cv2
-
-            self.cv2 = cv2
-        except ImportError:
-            raise ImportError("Please install opencv: pip install opencv-python")
-        try:
-            import maskrcnn_benchmark  # noqa
-        except ImportError:
-            raise ImportError(
-                'Please install vqa-maskrcnn-benchmark to use faster_r_cnn_152_32x8d features: '
-                '1. git clone https://gitlab.com/vedanuj/vqa-maskrcnn-benchmark.git\n'
-                '2. cd vqa-maskrcnn-benchmark\n'
-                '3. git checkout 4c168a637f45dc69efed384c00a7f916f57b25b8 -b stable\n'
-                '4. python setup.py develop'
-            )
-        self._build_detection_model()
-
-    def _build_detection_model(self):
-        """
-        Build the detection model.
-
-        Builds a CNN using the vqa-maskrcnn-benchmark repository.
-        """
-        from maskrcnn_benchmark.config import cfg
-        from maskrcnn_benchmark.modeling.detector import build_detection_model
-        from maskrcnn_benchmark.utils.model_serialization import load_state_dict
-
-        dp = self.opt['datapath']
-        build(dp)
-        cfg_path = os.path.join(dp, 'models/detectron/detectron_config.yaml')
-        model_path = os.path.join(dp, 'models/detectron/detectron_model.pth')
-
-        cfg.merge_from_file(cfg_path)
-        cfg.freeze()
-
-        model = build_detection_model(cfg)
-        checkpoint = torch.load(model_path, map_location=torch.device("cpu"))
-
-        load_state_dict(model, checkpoint.pop("model"))
-
-        if self.use_cuda:
-            model.to("cuda")
-        model.eval()
-        self.detection_model = model
-
-    def _image_transform(
-        self, img: "Image"
-    ) -> Tuple[torch.Tensor, float, Dict[str, int]]:
-        """
-        Using Open-CV, perform image transform on a raw image.
-
-        :param img:
-            raw image to transform
-
-        :return (img, scale, info):
-            img: tensor representation of image
-            scale: scale of image WRT self.MIN_SIZE & self.MAX_SIZE
-            info: dict containing values for img width & height
-        """
-        im = np.array(img).astype(np.float32)
-
-        if im.shape[-1] > 3:
-            im = np.array(img.convert("RGB")).astype(np.float32)
-
-        # IndexError: too many indices for array, grayscale images
-        if len(im.shape) < 3:
-            im = np.repeat(im[:, :, np.newaxis], 3, axis=2)
-
-        im = im[:, :, ::-1]
-        im -= np.array([102.9801, 115.9465, 122.7717])
-        im_shape = im.shape
-        im_height = im_shape[0]
-        im_width = im_shape[1]
-        im_size_min = np.min(im_shape[0:2])
-        im_size_max = np.max(im_shape[0:2])
-
-        # Scale based on minimum size
-        im_scale = self.MIN_SIZE / im_size_min
-
-        # Prevent the biggest axis from being more than max_size
-        # If bigger, scale it down
-        if np.round(im_scale * im_size_max) > self.MAX_SIZE:
-            im_scale = self.MAX_SIZE / im_size_max
-
-        im = self.cv2.resize(
-            im,
-            None,
-            None,
-            fx=im_scale,
-            fy=im_scale,
-            interpolation=self.cv2.INTER_LINEAR,
-        )
-        img = torch.from_numpy(im).permute(2, 0, 1)
-
-        im_info = {"width": im_width, "height": im_height}
-
-        return img, im_scale, im_info
-
-    def _process_feature_extraction(
-        self,
-        output: torch.Tensor,
-        im_scales: List[float],
-        im_infos: List[Dict[str, int]],
-        feature_name: str = "fc6",
-        conf_thresh: int = 0,
-    ):
-        """
-        Post-process feature extraction from the detection model.
-
-        :param output:
-            output from the detection model
-        :param im_scales:
-            list of scales for the processed images
-        :param im_infos:
-            list of dicts containing width/height for images
-        :param feature_name:
-            which feature to extract for the image
-        :param conf_thresh:
-            threshold for bounding box scores (?)
-
-        :return (feature_list, info_list):
-            return list of processed image features, and list of information for each image
-        """
-        from maskrcnn_benchmark.layers import nms
-
-        batch_size = len(output[0]["proposals"])
-        n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]]
-        score_list = output[0]["scores"].split(n_boxes_per_image)
-        score_list = [torch.nn.functional.softmax(x, -1) for x in score_list]
-        feats = output[0][feature_name].split(n_boxes_per_image)
-        cur_device = score_list[0].device
-
-        feat_list = []
-        info_list = []
-
-        for i in range(batch_size):
-            dets = output[0]["proposals"][i].bbox / im_scales[i]
-            scores = score_list[i]
-            max_conf = torch.zeros(scores.shape[0]).to(cur_device)
-            conf_thresh_tensor = torch.full_like(max_conf, conf_thresh)
-            start_index = 1
-            # Column 0 of the scores matrix is for the background class
-            for cls_ind in range(start_index, scores.shape[1]):
-                cls_scores = scores[:, cls_ind]
-                keep = nms(dets, cls_scores, 0.5)
-                max_conf[keep] = torch.where(
-                    # Better than max one till now and minimally greater
-                    # than conf_thresh
-                    (cls_scores[keep] > max_conf[keep])
-                    & (cls_scores[keep] > conf_thresh_tensor[keep]),
-                    cls_scores[keep],
-                    max_conf[keep],
-                )
-
-            sorted_scores, sorted_indices = torch.sort(max_conf, descending=True)
-            num_boxes = (sorted_scores[: self.num_features] != 0).sum()
-            keep_boxes = sorted_indices[: self.num_features]
-            feat_list.append(feats[i][keep_boxes])
-            bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i]
-            # Predict the class label using the scores
-            objects = torch.argmax(scores[keep_boxes][:, start_index:], dim=1)
-
-            info_list.append(
-                {
-                    "bbox": bbox.cpu().numpy(),
-                    "num_boxes": num_boxes.item(),
-                    "objects": objects.cpu().numpy(),
-                    "cls_prob": scores[keep_boxes][:, start_index:].cpu().numpy(),
-                    "image_width": im_infos[i]["width"],
-                    "image_height": im_infos[i]["height"],
-                }
-            )
-
-        return feat_list, info_list
-
-    def get_detectron_features(self, images: List["Image"]) -> List[torch.Tensor]:
-        """
-        Extract detectron features.
-
-        :param images:
-            a list of PIL Images
-
-        :return features:
-            return a list of features
-        """
-        from maskrcnn_benchmark.structures.image_list import to_image_list
-
-        img_tensor, im_scales, im_infos = [], [], []
-
-        for image in images:
-            im, im_scale, im_info = self._image_transform(image)
-            img_tensor.append(im)
-            im_scales.append(im_scale)
-            im_infos.append(im_info)
-
-        # Image dimensions should be divisible by 32, to allow convolutions
-        # in detector to work
-        current_img_list = to_image_list(img_tensor, size_divisible=32)
-        if self.use_cuda:
-            current_img_list = current_img_list.to("cuda")
-
-        with torch.no_grad():
-            output = self.detection_model(current_img_list)
-
-        features, _ = self._process_feature_extraction(output, im_scales, im_infos)
-
-        return features