Updated version

CEWu · Jul 31, 2023 · ce2b334 · ce2b334
1 parent 79bdc35
commit ce2b334
Show file tree

Hide file tree

Showing 32 changed files with 400 additions and 2,888 deletions.
diff --git a/README.md b/README.md
@@ -1,86 +1,89 @@
 
-# Prompt Tuning with Noisy Labels for Vision-Language Models
+# Why Is Prompt Tuning for Vision-Language Models Robust to Noisy Labels? [ICCV 2023]
+
+
+
+> [**Why Is Prompt Tuning for Vision-Language Models Robust to Noisy Labels?**](https://arxiv.org/abs/2307.11978)<br>
+> Cheng-En Wu, Yu Tian, Haichao Yu, Heng Wang, Pedro Morgado, Yu Hen Hu, Linjie Yang
+
+[![paper](https://img.shields.io/badge/arXiv-Paper-<COLOR>.svg)](https://arxiv.org/abs/2307.11978)
 
 
 ## Introduction
 
-This repo is the official implementation of **Prompt Tuning Noisy with Labels for Vision-Language Models**.
+This repo is the official implementation of **Why Is Prompt Tuning for Vision-Language Models Robust to Noisy Labels?**.
 
 
 ## Install
 
-The code is built on the [CoOp](https://github.com/KaiyangZhou/CoOp) and [Dassl](https://github.com/KaiyangZhou/Dassl.pytorch) with commit `ac6e44194b2f90e325f477aadd6d9bc3a92ce255`, so you need to install the dassl environment first. You can follow the [instructions](https://github.com/KaiyangZhou/Dassl.pytorch#installation) to install *dassl* as well as *PyTorch*. After that, run `pip install -r requirements.txt` under `PTNL/` to install a few more packages required by [CLIP](https://github.com/openai/CLIP).
-
-**We also prepare all installation commands for you**:
+**Setup conda environment (recommended).**:
 
 ```bash
-############ install conda env ############
+############ Conda Environment Installation ############
 
-# Download miniconda script
+# Fetch the miniconda script
 export HOME=$PWD
 wget -q https://repo.anaconda.com/miniconda/Miniconda3-py37_4.12.0-Linux-x86_64.sh -O miniconda.sh
 sh miniconda.sh -b -p $HOME/miniconda3
 rm miniconda.sh
 export PATH=$HOME/miniconda3/bin:$PATH
 
-# Set up conda
+# Initialize conda
 source $HOME/miniconda3/etc/profile.d/conda.sh
 hash -r
 conda config --set always_yes yes --set changeps1 yes
 conda activate dassl
 
-############ install Dassl ############
+############ Dassl Installation ############
 
-# Clone this repo
+# Clone the Dassl repository
 git clone https://github.com/KaiyangZhou/Dassl.pytorch.git
 cd Dassl.pytorch/
 git reset --hard ac6e44194b2f90e325f477aadd6d9bc3a92ce255
 
-# Create a conda environment
+# Establish a new conda environment
 conda create -n dassl python=3.7
 
-# Activate the environment
+# Activate the new environment
 conda activate dassl
 
-# Install dependencies
+# Install the required dependencies
 pip install -r requirements.txt
 
-# Install torch (version >= 1.11.0) and torchvision
+# Install PyTorch (version 1.11.0 or above) and torchvision
 conda install pytorch==1.11.0 torchvision==0.12.0 cudatoolkit=11.3 -c pytorch
 
-# Install this library (no need to re-build if the source code is modified)
+# Set up the Dassl library (No need to rebuild even if the source code changes)
 python setup.py develop
 
-############ install PTNL ############
+############ PTNL Installation ############
 
-# Enter the directory at the same level as Dassl
+# Navigate back to the parent directory
 cd ..
 
-# Clone this repo
-git clone https://github.com/tonyhuang2022/PTNL.git
+# Clone the PTNL repository
+git clone https://github.com/CEWu/PTNL
 cd PTNL/
 
-# Install CLIP dependencies
+# Install necessary packages for CLIP
 pip install -r requirements.txt
 
-######## attention ########
-# We have two soft links, and you can redirect them!
-# The `data` is linked to the datasets, and the `temp_analyze_results_miltiple` is linked to the `info`.
-# We strongly recommend that you create these two paths on the disk which has enough space, and then use
+######## Note ########
+# Two symbolic links, `data` and `temp_analyze_results_miltiple`, exist in the repository. It is recommended that these be pointed to locations with sufficient storage capacity.
 
-rm data temp_analyze_results_miltiple # remove the existing file
+rm data temp_analyze_results_miltiple # remove the existing links
 ln -s ${your_data_path} ./data
 ln -s ${your_temp_analyze_results_miltiple_path} ./temp_analyze_results_miltiple
 
-# Finished
+# Installation complete
+
 ```
 
 
 ## Datasets
 
-After that, you can follow the [CoOp Datasets Instructions](https://github.com/KaiyangZhou/CoOp/blob/main/DATASETS.md) to prepare the datasets.
+Please follow the instructions at [CoOp Datasets Instructions](https://github.com/KaiyangZhou/CoOp/blob/main/DATASETS.md)  to prepare all datasets.
 
-Then, you can run the code ~
 
 ## Training
 
@@ -122,6 +125,17 @@ bash upl_test_existing_logits.sh sscaltech101 rn50_ep50 end 16 16 False True 2
 * CLASS_EQULE True of False
 * number of false positive training samples per class
 
+# Citation
+If you find our work beneficial for your research, please consider citing:
+```
+@inproceedings{hanoonavificlip,
+    title={Why Is Prompt Tuning for Vision-Language Models Robust to Noisy Labels?},
+    author={Cheng-En Wu, Yu Tian, Haichao Yu, Heng Wang, Pedro Morgado, Yu Hen Hu, Linjie Yang},
+    booktitle={ICCV},
+    year={2023}
+}
+```
+
 ## Acknowlegment
 
 This repository is based on [UPL](https://github.com/tonyhuang2022/UPL).
diff --git a/configs/trainers/UPLTrainer/ViT_B_32_ep50.yaml b/configs/trainers/UPLTrainer/ViT_B_32_ep50.yaml
@@ -0,0 +1,40 @@
+DATALOADER:
+  TRAIN_X:
+    BATCH_SIZE: 32
+  TEST:
+    BATCH_SIZE: 100
+  NUM_WORKERS: 8
+
+INPUT:
+  SIZE: (224, 224)
+  INTERPOLATION: "bicubic"
+  PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
+  PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
+  TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
+  # TRANSFORMS: ["center_crop", "normalize"]"cutout",
+
+OPTIM:
+  NAME: "sgd"
+  LR: 0.002
+  MAX_EPOCH: 50
+  LR_SCHEDULER: "cosine"
+  WARMUP_EPOCH: 1
+  WARMUP_TYPE: "constant"
+  WARMUP_CONS_LR: 1e-5
+TRAIN:
+
+TRAIN:
+  PRINT_FREQ: 5
+
+TEST:
+  Analyze_Result_Path: './analysis_results_test/'
+  FINAL_MODEL: "last_val"
+  PER_CLASS_RESULT: True
+
+
+MODEL:
+  BACKBONE:
+    NAME: "ViT-B/32"
+  # PSEUDO_LABEL_MODELS: ['RN50', 'RN50x4', 'RN50x16', 'RN50x64', 'RN101', 'ViT-B-16', 'ViT-B-32', 'ViT-L-14']
+  PSEUDO_LABEL_MODELS: ['RN50']
+
diff --git a/datasets/data_manager.py b/datasets/data_manager.py
@@ -113,7 +113,6 @@ def __init__(self,
 
         try:
             if self.dataset.sstrain:
-                # 除了dataset的source是不一样的，其他跟trian都是一样的
                 train_loader_sstrain = build_data_loader(
                     cfg,
                     sampler_type="SequentialSampler",
@@ -122,7 +121,7 @@ def __init__(self,
                     tfm=tfm_test,
                     is_train=False,
                     dataset_wrapper=dataset_wrapper,
-                    tag='sstrain' # 初始化的时候需要设置这个来保证所有样本的载入
+                    tag='sstrain'
                 )
                 self.train_loader_sstrain = train_loader_sstrain
 
@@ -132,7 +131,7 @@ def __init__(self,
                     sampler_type="SequentialSampler",
                     data_source=self.dataset.train_x,
                     batch_size=cfg.DATALOADER.TRAIN_X.BATCH_SIZE,
-                    tfm=tfm_test, # 这个是不训练的，所以要用测试的配置，
+                    tfm=tfm_test,
                     is_train=False,
                     dataset_wrapper=dataset_wrapper,
                     tag='sstrain'
@@ -161,7 +160,7 @@ def update_ssdateloader(self, predict_label_dict):
             data_source=sstrain,
             batch_size=self.cfg.DATALOADER.TRAIN_X.BATCH_SIZE,
             n_domain=self.cfg.DATALOADER.TRAIN_X.N_DOMAIN,
-            n_ins=1, # 每个类别n_ins个instance
+            n_ins=1,
             tfm=self.tfm_train,
             is_train=True,
             dataset_wrapper=self.dataset_wrapper,

diff --git a/datasets/datasetbase.py b/datasets/datasetbase.py
@@ -151,7 +151,6 @@ def _convert_no_label(items):
             train = _convert_no_label(split["train"])
         return train
 
-    # dtd会报错 需要单独处理，它里面有处理的函数，需要copy过来，详见原版的database
 
     def add_label(self, predict_label_dict, dataset_name):
         """add label when training for self-supervised learning

diff --git a/get_info.py b/get_info.py
@@ -89,6 +89,7 @@ def extend_cfg(cfg):
     cfg.TRAINER.UPLTrainer.CTX_INIT = ""  # initialization words
     cfg.TRAINER.UPLTrainer.PREC = "fp16"  # fp16, fp32, amp
     cfg.TRAINER.UPLTrainer.CLASS_TOKEN_POSITION = "end"  # 'middle' or 'end' or 'front'
+    cfg.TRAINER.UPLTrainer.TAG = args.tag  # #false positive training samples per class
 
 
 def setup_cfg(args):
@@ -207,6 +208,12 @@ def main(args):
     parser.add_argument(
         '--no-train', action='store_true', help='do not call trainer.train()'
     )
+    parser.add_argument(
+        '--tag',
+        type=str,
+        default='',
+        help='tag for method'
+    )
     parser.add_argument(
         'opts',
         default=None,

diff --git a/scripts/get_info.sh b/scripts/get_info.sh
@@ -12,11 +12,12 @@ CTP=$3  # class token position (end or middle)
 NCTX=$4  # number of context tokens
 SHOTS=$5  # number of shots (1, 2, 4, 8, 16)
 CSC=$6  # class-specific context (False or True)
+TAG=$7 # note
 
 
 for SEED in 1
 do
-    DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_random_init/nctx${NCTX}_csc${CSC}_ctp${CTP}/seed${SEED}
+    DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_random_init/nctx${NCTX}_csc${CSC}_ctp${CTP}${TAG}/seed${SEED}
 
     echo "Run this job and save the output to ${DIR}"
     python get_info.py \
@@ -26,6 +27,7 @@ do
     --dataset-config-file configs/datasets/${DATASET}.yaml \
     --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
     --output-dir ${DIR} \
+    --tag ${TAG} \
     TRAINER.UPLTrainer.N_CTX ${NCTX} \
     TRAINER.UPLTrainer.CSC ${CSC} \
     TRAINER.UPLTrainer.CLASS_TOKEN_POSITION ${CTP} \

diff --git a/scripts/upl_test_existing_logits.sh b/scripts/upl_test_existing_logits.sh
@@ -14,6 +14,7 @@ SHOTS=$5  # number of shots (1, 2, 4, 8, 16)
 CSC=$6  # class-specific context (False or True)
 CLASS_EQULE=$7  # CLASS_EQULE True of False
 FP=$8 # number of false positive training samples per class
+TAG=$9 # note
 
 
 DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_EQULE_${CLASS_EQULE}_CONF_THRESHOLD_${CONF_THRESHOLD}_RN50_temp/nctx${NCTX}_csc${CSC}_fp${FP}/seed${SEED}
@@ -26,11 +27,9 @@ python upl_test.py \
 --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
 --output-dir ${DIR} \
 --num-fp ${FP} \
+--tag ${TAG} \
 TRAINER.UPLTrainer.N_CTX ${NCTX} \
 TRAINER.UPLTrainer.CSC ${CSC} \
 TRAINER.UPLTrainer.CLASS_TOKEN_POSITION ${CTP} \
 DATASET.NUM_SHOTS ${SHOTS} \
-DATASET.CLASS_EQULE ${CLASS_EQULE}
-
-
-
+DATASET.CLASS_EQULE ${CLASS_EQULE}
diff --git a/scripts/upl_train.sh b/scripts/upl_train.sh
@@ -16,10 +16,9 @@ CLASS_EQULE=$7  # CLASS_EQULE True of False
 TAG=$8 # log tag (multiple_models_random_init or rn50_random_init)
 FP=$9 # number of false positive training samples per class
 
-
-for SEED in {1..16}
-do
-    DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_EQULE_${CLASS_EQULE}_${CONF_THRESHOLD}_${TAG}/nctx${NCTX}_csc${CSC}_ctp${CTP}/seed${SEED}
+for SEED in {1..4}
+do  
+    DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_EQULE_${CLASS_EQULE}_${CONF_THRESHOLD}_${TAG}/nctx${NCTX}_csc${CSC}_ctp${CTP}_fp${FP}/seed${SEED}
     if [ -d "$DIR" ]; then
         echo "Results are available in ${DIR}. Skip this job"
     else
@@ -36,25 +35,6 @@ do
         TRAINER.UPLTrainer.CSC ${CSC} \
         TRAINER.UPLTrainer.CLASS_TOKEN_POSITION ${CTP} \
         DATASET.NUM_SHOTS ${SHOTS} \
-        DATASET.CLASS_EQULE ${CLASS_EQULE} 
+        DATASET.CLASS_EQULE ${CLASS_EQULE}
     fi
-done
-
-
-# #!/bin/bash
-
-# cd ..
-
-# # custom config
-# DATA=./data
-# TRAINER=ZeroshotCLIP
-# DATASET=$1
-# CFG=$2  # rn50, rn101, vit_b32 or vit_b16
-
-# python sstrain.py \
-# --root ${DATA} \
-# --trainer ${TRAINER} \
-# --dataset-config-file configs/datasets/${DATASET}.yaml \
-# --config-file configs/trainers/HHTrainer/${CFG}.yaml \
-# --output-dir output/${TRAINER}/${CFG}/zeroshot/${DATASET} \
-# --eval-only
+done
diff --git a/scripts/upl_train_10.sh b/scripts/upl_train_10.sh