diff --git a/.circleci/config.yml b/.circleci/config.yml index 39245cf0261..943f3a4aa82 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -139,20 +139,6 @@ commands: python -c 'import torch; print("Torch version:", torch.__version__)' python -m torch.utils.collect_env - installdetectrondeps: - description: Install opencv, vqa-maskrcnn-benchmark - steps: - - run: - name: Install opencv, vqa-maskrcnn-benchmark - command: | - if (! python -c 'import maskrcnn_benchmark') - then - python -m pip install yacs 'opencv-python~=4.3.0.00' - git clone https://gitlab.com/vedanuj/vqa-maskrcnn-benchmark.git maskbench - cd maskbench; git checkout 4c168a637f45dc69efed384c00a7f916f57b25b8 -b stable - python setup.py install; cd - - fi - installcrowdsourcingdeps: description: Install Mephisto steps: @@ -222,26 +208,26 @@ commands: - setupcuda - fixgit - restore_cache: - key: deps-20220328-<< parameters.cachename >>-{{ checksum "requirements.txt" }} + key: deps-20220426-<< parameters.cachename >>-{{ checksum "requirements.txt" }} - setup - installdeps - << parameters.more_installs >> - save_cache: - key: deps-20220328-<< parameters.cachename >>-{{ checksum "requirements.txt" }} + key: deps-20220426-<< parameters.cachename >>-{{ checksum "requirements.txt" }} paths: - "~/venv/bin" - "~/venv/lib" - findtests: marker: << parameters.marker >> - restore_cache: - key: data-20220328-<< parameters.cachename >>-{{ checksum "teststorun.txt" }} + key: data-20220426-<< parameters.cachename >>-{{ checksum "teststorun.txt" }} - run: name: Run tests no_output_timeout: 60m command: | coverage run -m pytest -m << parameters.marker >> << parameters.pytest_flags >> --junitxml=test-results/junit.xml - save_cache: - key: data-20220328-<< parameters.cachename >>-{{ checksum "teststorun.txt" }} + key: data-20220426-<< parameters.cachename >>-{{ checksum "teststorun.txt" }} paths: - "~/ParlAI/data" - codecov @@ -258,12 +244,12 @@ commands: - checkout - fixgit - restore_cache: - key: deps-20220328-bw-{{ checksum "requirements.txt" }} + key: deps-20220426-bw-{{ checksum "requirements.txt" }} - setup - installdeps - installtorchgpu - save_cache: - key: deps-20220328-bw-{{ checksum "requirements.txt" }} + key: deps-20220426-bw-{{ checksum "requirements.txt" }} paths: - "~/venv/bin" - "~/venv/lib" @@ -347,7 +333,6 @@ jobs: - runtests: more_installs: - installtorchgpu - - installdetectrondeps install_cuda: true cachename: gpu18 marker: unit @@ -360,7 +345,6 @@ jobs: - runtests: more_installs: - installtorchgpu - - installdetectrondeps install_cuda: true cachename: nightly marker: nightly_gpu diff --git a/parlai/core/image_featurizers.py b/parlai/core/image_featurizers.py index 53364fa341c..f0102defa20 100644 --- a/parlai/core/image_featurizers.py +++ b/parlai/core/image_featurizers.py @@ -9,16 +9,12 @@ """ import parlai.core.build_data as build_data -from parlai.core.opt import Opt import parlai.utils.logging as logging from parlai.utils.io import PathManager -from parlai.zoo.detectron.build import build import os from PIL import Image -import numpy as np import torch -from typing import Dict, Tuple, List from zipfile import ZipFile _greyscale = ' .,:;crsA23hHG#98&@' @@ -44,7 +40,6 @@ 'resnext101_32x16d_wsl_spatial': ['resnext101_32x16d_wsl', -2], 'resnext101_32x32d_wsl_spatial': ['resnext101_32x32d_wsl', -2], 'resnext101_32x48d_wsl_spatial': ['resnext101_32x48d_wsl', -2], - 'faster_r_cnn_152_32x8d': ['', -1], } @@ -72,8 +67,6 @@ def __init__(self, opt): self._init_resnet_cnn() elif 'resnext' in self.image_mode: self._init_resnext_cnn() - elif 'faster_r_cnn_152_32x8d' in self.image_mode: - self._init_faster_r_cnn() else: raise RuntimeError( 'Image mode {} not supported'.format(self.image_mode) @@ -84,7 +77,7 @@ def is_spatial(cls, image_mode: str): """ Return if image mode has spatial dimensionality. """ - return any([s in image_mode for s in ['spatial', 'faster_r_cnn']]) + return any([s in image_mode for s in ['spatial']]) def _init_transform(self): # initialize the transform function using torch vision. @@ -158,12 +151,6 @@ def _init_resnext_cnn(self): if self.use_cuda: self.netCNN.cuda() - def _init_faster_r_cnn(self): - """ - Initialize Detectron Model. - """ - self.netCNN = DetectronFeatureExtractor(self.opt, self.use_cuda) - def _image_mode_switcher(self): if self.image_mode not in IMAGE_MODE_SWITCHER: raise NotImplementedError( @@ -190,7 +177,7 @@ def extract(self, image, path=None): with torch.no_grad(): feature = self.netCNN(transform) else: - feature = self.netCNN.get_detectron_features([image])[0] + raise RuntimeError("detectron support has been removed.") # save the feature if path is not None: import parlai.utils.torch as torch_utils @@ -268,229 +255,3 @@ def load(self, path): else: with PathManager.open(new_path, 'rb') as f: return torch.load(f) - - -class DetectronFeatureExtractor: - """ - Code adapted from https://github.com/facebookresearch/mmf/blob/main/tools/scripts/ - features/extract_features_vmb.py. - - Docstrings and type annotations added post hoc. - """ - - MAX_SIZE = 1333 - MIN_SIZE = 800 - - def __init__(self, opt: Opt, use_cuda: bool = False): - self.opt = opt - self.use_cuda = use_cuda - self.num_features = 100 - - try: - import cv2 - - self.cv2 = cv2 - except ImportError: - raise ImportError("Please install opencv: pip install opencv-python") - try: - import maskrcnn_benchmark # noqa - except ImportError: - raise ImportError( - 'Please install vqa-maskrcnn-benchmark to use faster_r_cnn_152_32x8d features: ' - '1. git clone https://gitlab.com/vedanuj/vqa-maskrcnn-benchmark.git\n' - '2. cd vqa-maskrcnn-benchmark\n' - '3. git checkout 4c168a637f45dc69efed384c00a7f916f57b25b8 -b stable\n' - '4. python setup.py develop' - ) - self._build_detection_model() - - def _build_detection_model(self): - """ - Build the detection model. - - Builds a CNN using the vqa-maskrcnn-benchmark repository. - """ - from maskrcnn_benchmark.config import cfg - from maskrcnn_benchmark.modeling.detector import build_detection_model - from maskrcnn_benchmark.utils.model_serialization import load_state_dict - - dp = self.opt['datapath'] - build(dp) - cfg_path = os.path.join(dp, 'models/detectron/detectron_config.yaml') - model_path = os.path.join(dp, 'models/detectron/detectron_model.pth') - - cfg.merge_from_file(cfg_path) - cfg.freeze() - - model = build_detection_model(cfg) - checkpoint = torch.load(model_path, map_location=torch.device("cpu")) - - load_state_dict(model, checkpoint.pop("model")) - - if self.use_cuda: - model.to("cuda") - model.eval() - self.detection_model = model - - def _image_transform( - self, img: "Image" - ) -> Tuple[torch.Tensor, float, Dict[str, int]]: - """ - Using Open-CV, perform image transform on a raw image. - - :param img: - raw image to transform - - :return (img, scale, info): - img: tensor representation of image - scale: scale of image WRT self.MIN_SIZE & self.MAX_SIZE - info: dict containing values for img width & height - """ - im = np.array(img).astype(np.float32) - - if im.shape[-1] > 3: - im = np.array(img.convert("RGB")).astype(np.float32) - - # IndexError: too many indices for array, grayscale images - if len(im.shape) < 3: - im = np.repeat(im[:, :, np.newaxis], 3, axis=2) - - im = im[:, :, ::-1] - im -= np.array([102.9801, 115.9465, 122.7717]) - im_shape = im.shape - im_height = im_shape[0] - im_width = im_shape[1] - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - - # Scale based on minimum size - im_scale = self.MIN_SIZE / im_size_min - - # Prevent the biggest axis from being more than max_size - # If bigger, scale it down - if np.round(im_scale * im_size_max) > self.MAX_SIZE: - im_scale = self.MAX_SIZE / im_size_max - - im = self.cv2.resize( - im, - None, - None, - fx=im_scale, - fy=im_scale, - interpolation=self.cv2.INTER_LINEAR, - ) - img = torch.from_numpy(im).permute(2, 0, 1) - - im_info = {"width": im_width, "height": im_height} - - return img, im_scale, im_info - - def _process_feature_extraction( - self, - output: torch.Tensor, - im_scales: List[float], - im_infos: List[Dict[str, int]], - feature_name: str = "fc6", - conf_thresh: int = 0, - ): - """ - Post-process feature extraction from the detection model. - - :param output: - output from the detection model - :param im_scales: - list of scales for the processed images - :param im_infos: - list of dicts containing width/height for images - :param feature_name: - which feature to extract for the image - :param conf_thresh: - threshold for bounding box scores (?) - - :return (feature_list, info_list): - return list of processed image features, and list of information for each image - """ - from maskrcnn_benchmark.layers import nms - - batch_size = len(output[0]["proposals"]) - n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]] - score_list = output[0]["scores"].split(n_boxes_per_image) - score_list = [torch.nn.functional.softmax(x, -1) for x in score_list] - feats = output[0][feature_name].split(n_boxes_per_image) - cur_device = score_list[0].device - - feat_list = [] - info_list = [] - - for i in range(batch_size): - dets = output[0]["proposals"][i].bbox / im_scales[i] - scores = score_list[i] - max_conf = torch.zeros(scores.shape[0]).to(cur_device) - conf_thresh_tensor = torch.full_like(max_conf, conf_thresh) - start_index = 1 - # Column 0 of the scores matrix is for the background class - for cls_ind in range(start_index, scores.shape[1]): - cls_scores = scores[:, cls_ind] - keep = nms(dets, cls_scores, 0.5) - max_conf[keep] = torch.where( - # Better than max one till now and minimally greater - # than conf_thresh - (cls_scores[keep] > max_conf[keep]) - & (cls_scores[keep] > conf_thresh_tensor[keep]), - cls_scores[keep], - max_conf[keep], - ) - - sorted_scores, sorted_indices = torch.sort(max_conf, descending=True) - num_boxes = (sorted_scores[: self.num_features] != 0).sum() - keep_boxes = sorted_indices[: self.num_features] - feat_list.append(feats[i][keep_boxes]) - bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i] - # Predict the class label using the scores - objects = torch.argmax(scores[keep_boxes][:, start_index:], dim=1) - - info_list.append( - { - "bbox": bbox.cpu().numpy(), - "num_boxes": num_boxes.item(), - "objects": objects.cpu().numpy(), - "cls_prob": scores[keep_boxes][:, start_index:].cpu().numpy(), - "image_width": im_infos[i]["width"], - "image_height": im_infos[i]["height"], - } - ) - - return feat_list, info_list - - def get_detectron_features(self, images: List["Image"]) -> List[torch.Tensor]: - """ - Extract detectron features. - - :param images: - a list of PIL Images - - :return features: - return a list of features - """ - from maskrcnn_benchmark.structures.image_list import to_image_list - - img_tensor, im_scales, im_infos = [], [], [] - - for image in images: - im, im_scale, im_info = self._image_transform(image) - img_tensor.append(im) - im_scales.append(im_scale) - im_infos.append(im_info) - - # Image dimensions should be divisible by 32, to allow convolutions - # in detector to work - current_img_list = to_image_list(img_tensor, size_divisible=32) - if self.use_cuda: - current_img_list = current_img_list.to("cuda") - - with torch.no_grad(): - output = self.detection_model(current_img_list) - - features, _ = self._process_feature_extraction(output, im_scales, im_infos) - - return features diff --git a/parlai/tasks/wizard_of_wikipedia/agents.py b/parlai/tasks/wizard_of_wikipedia/agents.py index 45dc3fb9bbc..9b85a5bf6c3 100644 --- a/parlai/tasks/wizard_of_wikipedia/agents.py +++ b/parlai/tasks/wizard_of_wikipedia/agents.py @@ -284,6 +284,8 @@ class WizardDialogKnowledgeTeacher(WizardOfWikipediaTeacher): def __init__(self, opt, shared=None): self._init_attributes(opt) + if shared is None: + build(opt) if shared and 'rare_word_f1' in shared: self.rare_word_f1 = shared['rare_word_f1'] elif self.label_type == 'response': diff --git a/parlai/utils/testing.py b/parlai/utils/testing.py index c1c1ab160a1..029e9d1e70b 100644 --- a/parlai/utils/testing.py +++ b/parlai/utils/testing.py @@ -54,15 +54,6 @@ except ImportError: BPE_INSTALLED = False -try: - import maskrcnn_benchmark # noqa: F401 - import cv2 # noqa: F401 - - DETECTRON_AVAILABLE = True -except ImportError: - DETECTRON_AVAILABLE = False - - try: import fairseq # noqa: F401 @@ -132,15 +123,6 @@ def skipUnlessVision(testfn, reason='torchvision not installed'): return unittest.skipUnless(VISION_AVAILABLE, reason)(testfn) -def skipUnlessDetectron( - testfn, reason='maskrcnn_benchmark and/or opencv not installed' -): - """ - Decorate a test to skip unless maskrcnn_benchmark and opencv are installed. - """ - return unittest.skipUnless(DETECTRON_AVAILABLE, reason)(testfn) - - def skipUnlessFairseq(testfn, reason='fairseq not installed'): """ Decorate a test to skip unless fairseq is installed. diff --git a/parlai/zoo/detectron/__init__.py b/parlai/zoo/detectron/__init__.py deleted file mode 100644 index 240697e3247..00000000000 --- a/parlai/zoo/detectron/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. diff --git a/parlai/zoo/detectron/build.py b/parlai/zoo/detectron/build.py deleted file mode 100644 index 1aeff9a206c..00000000000 --- a/parlai/zoo/detectron/build.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (c) Facebook, Inc. and its affiliates. -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -""" -Detectron Models used in -"12-in-1: Multi-Task Vision and Language Representation Learning" (Lu et. al). - -See https://github.com/facebookresearch/vilbert-multi-task and -specifically https://github.com/facebookresearch/vilbert-multi-task/tree/main/data -for more details. -""" - -import os -from parlai.core.build_data import download_models, built - -BASE_PATH = 'https://dl.fbaipublicfiles.com/vilbert-multi-task' -DETECTRON_MODEL_URL = ( - 'https://dl.fbaipublicfiles.com/vilbert-multi-task/detectron_model.pth' -) -DETECTRON_CONFIG_URL = ( - 'https://dl.fbaipublicfiles.com/vilbert-multi-task/detectron_config.yaml' -) - - -def build(datapath): - opt = {'datapath': datapath} - dpath = os.path.join(datapath, 'models', 'detectron') - fnames = ['detectron_model.pth', 'detectron_config.yaml'] - version = '1.0' - if not built(dpath, version): - download_models( - opt, - fnames, - 'detectron', - path=BASE_PATH, - version=version, - use_model_type=False, - ) diff --git a/parlai/zoo/model_list.py b/parlai/zoo/model_list.py index 464dafb645e..09f2e66f888 100644 --- a/parlai/zoo/model_list.py +++ b/parlai/zoo/model_list.py @@ -1434,28 +1434,6 @@ .4319 """, # The accuracy is low here because the task was labeled using a different classifier, zoo:style_gen/prev_curr_classifier/model }, - { - "title": "Faster-R-CNN Detectron Features", - "id": "detectron", - "path": "zoo:detectron/detectron_model.pth", - "agent": "n/a", - "task": "n/a", - "project": 'https://github.com/facebookresearch/vilbert-multi-task', - "description": "Detectron Model for extracting 100x2048d object detection features. Model is from linked project website", - "example": "parlai display_data -t flickr30k --image-mode faster_r_cnn_152_32x8d -n 1 -v", - "result": """ - [id]: flickr30k - [image_id]: 1000092795 - [ image ]: tensor([[ 0.0000, 7.1329, 0.0000, ..., 0.0000, 0.0000, 5.3357], - [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], - [ 0.0000, 0.0000, 0.4687, ..., 0.0000, 0.0000, 0.0000], - ..., - [ 0.0000, 3.0936, 3.4888, ..., 0.0000, 0.0000, 0.0000], - [ 3.8596, 0.0000, 0.0000, ..., 0.0000, 4.3454, 0.0000], - [ 0.0000, 7.9822, 1.0979, ..., 3.5514, 0.0000, 15.3559]]) - [labels]: Two young guys with shaggy hair look at their hands while hanging out in the yard.|Two young, White males are outside near many bushes.|Two men in green shirts are standing in a yard.|A man in a blue shirt standing in a garden.|Two friends enjoy time spent together. - """, - }, { "title": "Multi-Modal BlenderBot (MMB DegenPos)", "id": "multimodal_blenderbot", diff --git a/projects/multimodal_blenderbot/README.md b/projects/multimodal_blenderbot/README.md index 06a1db60a6e..44f1c0232aa 100644 --- a/projects/multimodal_blenderbot/README.md +++ b/projects/multimodal_blenderbot/README.md @@ -1,3 +1,10 @@ +**WARNING** + +This project has been archived. If you need to use this project, please revert +to ParlAI 1.4.1. + +------ + # Multi-Modal Open-Domain Dialogue Kurt Shuster, Eric Michael Smith, Da Ju, Jason Weston diff --git a/tests/test_image_featurizers.py b/tests/test_image_featurizers.py index 62b0cbc89e4..c0b9f4747f5 100644 --- a/tests/test_image_featurizers.py +++ b/tests/test_image_featurizers.py @@ -28,7 +28,6 @@ "resnet152_spatial": torch.Size([1, 2048, 7, 7]), "resnext101_32x48d_wsl": torch.Size([2048]), "resnext101_32x48d_wsl_spatial": torch.Size([1, 2048, 7, 7]), - "faster_r_cnn_152_32x8d": torch.Size([100, 2048]), } @@ -67,10 +66,6 @@ def test_resnet(self): def test_resnext(self): self._base_test_loader("resnext") - @testing_utils.skipUnlessDetectron - def test_faster_r_cnn(self): - self._base_test_loader("faster_r_cnn", True) - def test_other_image_modes(self): """ Test non-featurized image modes.