We use the same version of 'Bottom-up and Top-down feature extractor' used in vilbert multi-task. This is based on Vedanuj Goswami's mask-rcnn-benchmark fork.
Please follow the original installation guide.
wget -O model_final.pth https://dl.fbaipublicfiles.com/vilbert-multi-task/detectron_model.pth
wget -O e2e_faster_rcnn_X-152-32x8d-FPN_1x_MLP_2048_FPN_512_train.yaml https://dl.fbaipublicfiles.com/vilbert-multi-task/detectron_config.yaml
# Grid features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/COCO/maskrcnn_train_grid8.h5 -P datasets/COCO/features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/COCO/maskrcnn_valid_grid8.h5 -P datasets/COCO/features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/COCO/maskrcnn_test_grid8.h5 -P datasets/COCO/features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/VG/maskrcnn_grid8.h5 -P datasets/VG/features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/GQA/maskrcnn_grid8.h5 -P datasets/GQA/features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/NLVR2/maskrcnn_train_grid8.h5 -P datasets/NLVR2/features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/NLVR2/maskrcnn_valid_grid8.h5 -P datasets/NLVR2/features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/NLVR2/maskrcnn_test_grid8.h5 -P datasets/NLVR2/features
# bounding box features (optional)
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/COCO/maskrcnn_train_boxes36.h5 -P datasets/COCO/features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/COCO/maskrcnn_valid_boxes36.h5 -P datasets/COCO/features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/COCO/maskrcnn_test_boxes36.h5 -P datasets/COCO/features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/VG/maskrcnn_boxes36.h5 -P datasets/VG/features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/GQA/maskrcnn_boxes36.h5 -P datasets/GQA/features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/NLVR2/maskrcnn_train_boxes36.h5 -P datasets/NLVR2/features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/NLVR2/maskrcnn_valid_boxes36.h5 -P datasets/NLVR2/features
wget https://ai2-vision-x-lxmert.s3-us-west-2.amazonaws.com/butd_features/NLVR2/maskrcnn_test_boxes36.h5 -P datasets/NLVR2/features
cd feature_extraction
# Grid features
python coco_extract_grid_feature.py --split train
python coco_extract_grid_feature.py --split valid
python coco_extract_grid_feature.py --split test
python VG_extract_grid_feature.py
python GQA_extract_grid_feature.py
python nlvr2_extract_grid_feature.py --split train
python nlvr2_extract_grid_feature.py --split valid
python nlvr2_extract_grid_feature.py --split test
# bounding box features (optional)
python coco_extract_bbox_feature.py --split train
python coco_extract_bbox_feature.py --split valid
python coco_extract_bbox_feature.py --split test
python VG_extract_bbox_feature.py
python GQA_extract_bbox_feature.py
python nlvr2_extract_bbox_feature.py --split train
python nlvr2_extract_bbox_feature.py --split valid
python nlvr2_extract_bbox_feature.py --split test
We use faiss implementaion of K-means clustering.
Please follow the original installation guide.
python run_kmeans.py --src=mscoco_train --tgt mscoco_train mscoco valid vg