diff --git a/README.md b/README.md index a847db5b..491fbbd9 100644 --- a/README.md +++ b/README.md @@ -1,86 +1,89 @@ -# Prompt Tuning with Noisy Labels for Vision-Language Models +# Why Is Prompt Tuning for Vision-Language Models Robust to Noisy Labels? [ICCV 2023] + + + +> [**Why Is Prompt Tuning for Vision-Language Models Robust to Noisy Labels?**](https://arxiv.org/abs/2307.11978)
+> Cheng-En Wu, Yu Tian, Haichao Yu, Heng Wang, Pedro Morgado, Yu Hen Hu, Linjie Yang + +[![paper](https://img.shields.io/badge/arXiv-Paper-.svg)](https://arxiv.org/abs/2307.11978) ## Introduction -This repo is the official implementation of **Prompt Tuning Noisy with Labels for Vision-Language Models**. +This repo is the official implementation of **Why Is Prompt Tuning for Vision-Language Models Robust to Noisy Labels?**. ## Install -The code is built on the [CoOp](https://github.com/KaiyangZhou/CoOp) and [Dassl](https://github.com/KaiyangZhou/Dassl.pytorch) with commit `ac6e44194b2f90e325f477aadd6d9bc3a92ce255`, so you need to install the dassl environment first. You can follow the [instructions](https://github.com/KaiyangZhou/Dassl.pytorch#installation) to install *dassl* as well as *PyTorch*. After that, run `pip install -r requirements.txt` under `PTNL/` to install a few more packages required by [CLIP](https://github.com/openai/CLIP). - -**We also prepare all installation commands for you**: +**Setup conda environment (recommended).**: ```bash -############ install conda env ############ +############ Conda Environment Installation ############ -# Download miniconda script +# Fetch the miniconda script export HOME=$PWD wget -q https://repo.anaconda.com/miniconda/Miniconda3-py37_4.12.0-Linux-x86_64.sh -O miniconda.sh sh miniconda.sh -b -p $HOME/miniconda3 rm miniconda.sh export PATH=$HOME/miniconda3/bin:$PATH -# Set up conda +# Initialize conda source $HOME/miniconda3/etc/profile.d/conda.sh hash -r conda config --set always_yes yes --set changeps1 yes conda activate dassl -############ install Dassl ############ +############ Dassl Installation ############ -# Clone this repo +# Clone the Dassl repository git clone https://github.com/KaiyangZhou/Dassl.pytorch.git cd Dassl.pytorch/ git reset --hard ac6e44194b2f90e325f477aadd6d9bc3a92ce255 -# Create a conda environment +# Establish a new conda environment conda create -n dassl python=3.7 -# Activate the environment +# Activate the new environment conda activate dassl -# Install dependencies +# Install the required dependencies pip install -r requirements.txt -# Install torch (version >= 1.11.0) and torchvision +# Install PyTorch (version 1.11.0 or above) and torchvision conda install pytorch==1.11.0 torchvision==0.12.0 cudatoolkit=11.3 -c pytorch -# Install this library (no need to re-build if the source code is modified) +# Set up the Dassl library (No need to rebuild even if the source code changes) python setup.py develop -############ install PTNL ############ +############ PTNL Installation ############ -# Enter the directory at the same level as Dassl +# Navigate back to the parent directory cd .. -# Clone this repo -git clone https://github.com/tonyhuang2022/PTNL.git +# Clone the PTNL repository +git clone https://github.com/CEWu/PTNL cd PTNL/ -# Install CLIP dependencies +# Install necessary packages for CLIP pip install -r requirements.txt -######## attention ######## -# We have two soft links, and you can redirect them! -# The `data` is linked to the datasets, and the `temp_analyze_results_miltiple` is linked to the `info`. -# We strongly recommend that you create these two paths on the disk which has enough space, and then use +######## Note ######## +# Two symbolic links, `data` and `temp_analyze_results_miltiple`, exist in the repository. It is recommended that these be pointed to locations with sufficient storage capacity. -rm data temp_analyze_results_miltiple # remove the existing file +rm data temp_analyze_results_miltiple # remove the existing links ln -s ${your_data_path} ./data ln -s ${your_temp_analyze_results_miltiple_path} ./temp_analyze_results_miltiple -# Finished +# Installation complete + ``` ## Datasets -After that, you can follow the [CoOp Datasets Instructions](https://github.com/KaiyangZhou/CoOp/blob/main/DATASETS.md) to prepare the datasets. +Please follow the instructions at [CoOp Datasets Instructions](https://github.com/KaiyangZhou/CoOp/blob/main/DATASETS.md) to prepare all datasets. -Then, you can run the code ~ ## Training @@ -122,6 +125,17 @@ bash upl_test_existing_logits.sh sscaltech101 rn50_ep50 end 16 16 False True 2 * CLASS_EQULE True of False * number of false positive training samples per class +# Citation +If you find our work beneficial for your research, please consider citing: +``` +@inproceedings{hanoonavificlip, + title={Why Is Prompt Tuning for Vision-Language Models Robust to Noisy Labels?}, + author={Cheng-En Wu, Yu Tian, Haichao Yu, Heng Wang, Pedro Morgado, Yu Hen Hu, Linjie Yang}, + booktitle={ICCV}, + year={2023} +} +``` + ## Acknowlegment This repository is based on [UPL](https://github.com/tonyhuang2022/UPL). \ No newline at end of file diff --git a/configs/trainers/UPLTrainer/ViT_B_32_ep50.yaml b/configs/trainers/UPLTrainer/ViT_B_32_ep50.yaml new file mode 100644 index 00000000..2de8577e --- /dev/null +++ b/configs/trainers/UPLTrainer/ViT_B_32_ep50.yaml @@ -0,0 +1,40 @@ +DATALOADER: + TRAIN_X: + BATCH_SIZE: 32 + TEST: + BATCH_SIZE: 100 + NUM_WORKERS: 8 + +INPUT: + SIZE: (224, 224) + INTERPOLATION: "bicubic" + PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] + PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] + TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] + # TRANSFORMS: ["center_crop", "normalize"]"cutout", + +OPTIM: + NAME: "sgd" + LR: 0.002 + MAX_EPOCH: 50 + LR_SCHEDULER: "cosine" + WARMUP_EPOCH: 1 + WARMUP_TYPE: "constant" + WARMUP_CONS_LR: 1e-5 +TRAIN: + +TRAIN: + PRINT_FREQ: 5 + +TEST: + Analyze_Result_Path: './analysis_results_test/' + FINAL_MODEL: "last_val" + PER_CLASS_RESULT: True + + +MODEL: + BACKBONE: + NAME: "ViT-B/32" + # PSEUDO_LABEL_MODELS: ['RN50', 'RN50x4', 'RN50x16', 'RN50x64', 'RN101', 'ViT-B-16', 'ViT-B-32', 'ViT-L-14'] + PSEUDO_LABEL_MODELS: ['RN50'] + \ No newline at end of file diff --git a/datasets/data_manager.py b/datasets/data_manager.py index bab8d876..d49a5500 100644 --- a/datasets/data_manager.py +++ b/datasets/data_manager.py @@ -113,7 +113,6 @@ def __init__(self, try: if self.dataset.sstrain: - # 除了dataset的source是不一样的,其他跟trian都是一样的 train_loader_sstrain = build_data_loader( cfg, sampler_type="SequentialSampler", @@ -122,7 +121,7 @@ def __init__(self, tfm=tfm_test, is_train=False, dataset_wrapper=dataset_wrapper, - tag='sstrain' # 初始化的时候需要设置这个来保证所有样本的载入 + tag='sstrain' ) self.train_loader_sstrain = train_loader_sstrain @@ -132,7 +131,7 @@ def __init__(self, sampler_type="SequentialSampler", data_source=self.dataset.train_x, batch_size=cfg.DATALOADER.TRAIN_X.BATCH_SIZE, - tfm=tfm_test, # 这个是不训练的,所以要用测试的配置, + tfm=tfm_test, is_train=False, dataset_wrapper=dataset_wrapper, tag='sstrain' @@ -161,7 +160,7 @@ def update_ssdateloader(self, predict_label_dict): data_source=sstrain, batch_size=self.cfg.DATALOADER.TRAIN_X.BATCH_SIZE, n_domain=self.cfg.DATALOADER.TRAIN_X.N_DOMAIN, - n_ins=1, # 每个类别n_ins个instance + n_ins=1, tfm=self.tfm_train, is_train=True, dataset_wrapper=self.dataset_wrapper, diff --git a/datasets/datasetbase.py b/datasets/datasetbase.py index 3e70fc68..7c4ef42a 100644 --- a/datasets/datasetbase.py +++ b/datasets/datasetbase.py @@ -151,7 +151,6 @@ def _convert_no_label(items): train = _convert_no_label(split["train"]) return train - # dtd会报错 需要单独处理,它里面有处理的函数,需要copy过来,详见原版的database def add_label(self, predict_label_dict, dataset_name): """add label when training for self-supervised learning diff --git a/get_info.py b/get_info.py index 08357d1b..589db2a2 100644 --- a/get_info.py +++ b/get_info.py @@ -89,6 +89,7 @@ def extend_cfg(cfg): cfg.TRAINER.UPLTrainer.CTX_INIT = "" # initialization words cfg.TRAINER.UPLTrainer.PREC = "fp16" # fp16, fp32, amp cfg.TRAINER.UPLTrainer.CLASS_TOKEN_POSITION = "end" # 'middle' or 'end' or 'front' + cfg.TRAINER.UPLTrainer.TAG = args.tag # #false positive training samples per class def setup_cfg(args): @@ -207,6 +208,12 @@ def main(args): parser.add_argument( '--no-train', action='store_true', help='do not call trainer.train()' ) + parser.add_argument( + '--tag', + type=str, + default='', + help='tag for method' + ) parser.add_argument( 'opts', default=None, diff --git a/scripts/get_info.sh b/scripts/get_info.sh index de0f99c8..63630709 100644 --- a/scripts/get_info.sh +++ b/scripts/get_info.sh @@ -12,11 +12,12 @@ CTP=$3 # class token position (end or middle) NCTX=$4 # number of context tokens SHOTS=$5 # number of shots (1, 2, 4, 8, 16) CSC=$6 # class-specific context (False or True) +TAG=$7 # note for SEED in 1 do - DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_random_init/nctx${NCTX}_csc${CSC}_ctp${CTP}/seed${SEED} + DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_random_init/nctx${NCTX}_csc${CSC}_ctp${CTP}${TAG}/seed${SEED} echo "Run this job and save the output to ${DIR}" python get_info.py \ @@ -26,6 +27,7 @@ do --dataset-config-file configs/datasets/${DATASET}.yaml \ --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ --output-dir ${DIR} \ + --tag ${TAG} \ TRAINER.UPLTrainer.N_CTX ${NCTX} \ TRAINER.UPLTrainer.CSC ${CSC} \ TRAINER.UPLTrainer.CLASS_TOKEN_POSITION ${CTP} \ diff --git a/scripts/upl_test_existing_logits.sh b/scripts/upl_test_existing_logits.sh index c3b8e468..eff50ce3 100644 --- a/scripts/upl_test_existing_logits.sh +++ b/scripts/upl_test_existing_logits.sh @@ -14,6 +14,7 @@ SHOTS=$5 # number of shots (1, 2, 4, 8, 16) CSC=$6 # class-specific context (False or True) CLASS_EQULE=$7 # CLASS_EQULE True of False FP=$8 # number of false positive training samples per class +TAG=$9 # note DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_EQULE_${CLASS_EQULE}_CONF_THRESHOLD_${CONF_THRESHOLD}_RN50_temp/nctx${NCTX}_csc${CSC}_fp${FP}/seed${SEED} @@ -26,11 +27,9 @@ python upl_test.py \ --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ --output-dir ${DIR} \ --num-fp ${FP} \ +--tag ${TAG} \ TRAINER.UPLTrainer.N_CTX ${NCTX} \ TRAINER.UPLTrainer.CSC ${CSC} \ TRAINER.UPLTrainer.CLASS_TOKEN_POSITION ${CTP} \ DATASET.NUM_SHOTS ${SHOTS} \ -DATASET.CLASS_EQULE ${CLASS_EQULE} - - - +DATASET.CLASS_EQULE ${CLASS_EQULE} \ No newline at end of file diff --git a/scripts/upl_train.sh b/scripts/upl_train.sh index 75c8ca2d..fdcabb9a 100644 --- a/scripts/upl_train.sh +++ b/scripts/upl_train.sh @@ -16,10 +16,9 @@ CLASS_EQULE=$7 # CLASS_EQULE True of False TAG=$8 # log tag (multiple_models_random_init or rn50_random_init) FP=$9 # number of false positive training samples per class - -for SEED in {1..16} -do - DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_EQULE_${CLASS_EQULE}_${CONF_THRESHOLD}_${TAG}/nctx${NCTX}_csc${CSC}_ctp${CTP}/seed${SEED} +for SEED in {1..4} +do + DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_EQULE_${CLASS_EQULE}_${CONF_THRESHOLD}_${TAG}/nctx${NCTX}_csc${CSC}_ctp${CTP}_fp${FP}/seed${SEED} if [ -d "$DIR" ]; then echo "Results are available in ${DIR}. Skip this job" else @@ -36,25 +35,6 @@ do TRAINER.UPLTrainer.CSC ${CSC} \ TRAINER.UPLTrainer.CLASS_TOKEN_POSITION ${CTP} \ DATASET.NUM_SHOTS ${SHOTS} \ - DATASET.CLASS_EQULE ${CLASS_EQULE} + DATASET.CLASS_EQULE ${CLASS_EQULE} fi -done - - -# #!/bin/bash - -# cd .. - -# # custom config -# DATA=./data -# TRAINER=ZeroshotCLIP -# DATASET=$1 -# CFG=$2 # rn50, rn101, vit_b32 or vit_b16 - -# python sstrain.py \ -# --root ${DATA} \ -# --trainer ${TRAINER} \ -# --dataset-config-file configs/datasets/${DATASET}.yaml \ -# --config-file configs/trainers/HHTrainer/${CFG}.yaml \ -# --output-dir output/${TRAINER}/${CFG}/zeroshot/${DATASET} \ -# --eval-only \ No newline at end of file +done \ No newline at end of file diff --git a/scripts/upl_train_10.sh b/scripts/upl_train_10.sh deleted file mode 100644 index 07803629..00000000 --- a/scripts/upl_train_10.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - -cd .. - -# custom config -DATA=./data -TRAINER=UPLTrainer - -DATASET=$1 -CFG=$2 # config file -CTP=$3 # class token position (end or middle) -NCTX=$4 # number of context tokens -SHOTS=$5 # number of shots (1, 2, 4, 8, 16) -CSC=$6 # class-specific context (False or True) -CLASS_EQULE=$7 # CLASS_EQULE True of False -TAG=$8 # log tag (multiple_models_random_init or rn50_random_init) -FP=$9 # number of false positive training samples per class - -for SEED in {9..10} -do - DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_EQULE_${CLASS_EQULE}_${CONF_THRESHOLD}_${TAG}/nctx${NCTX}_csc${CSC}_ctp${CTP}_fp${FP}/seed${SEED} - if [ -d "$DIR" ]; then - echo "Results are available in ${DIR}. Skip this job" - else - echo "Run this job and save the output to ${DIR}" - python upl_train.py \ - --root ${DATA} \ - --seed ${SEED} \ - --trainer ${TRAINER} \ - --dataset-config-file configs/datasets/${DATASET}.yaml \ - --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ - --output-dir ${DIR} \ - --num-fp ${FP} \ - TRAINER.UPLTrainer.N_CTX ${NCTX} \ - TRAINER.UPLTrainer.CSC ${CSC} \ - TRAINER.UPLTrainer.CLASS_TOKEN_POSITION ${CTP} \ - DATASET.NUM_SHOTS ${SHOTS} \ - DATASET.CLASS_EQULE ${CLASS_EQULE} - fi -done - - -# #!/bin/bash - -# cd .. - -# # custom config -# DATA=./data -# TRAINER=ZeroshotCLIP -# DATASET=$1 -# CFG=$2 # rn50, rn101, vit_b32 or vit_b16 - -# python sstrain.py \ -# --root ${DATA} \ -# --trainer ${TRAINER} \ -# --dataset-config-file configs/datasets/${DATASET}.yaml \ -# --config-file configs/trainers/HHTrainer/${CFG}.yaml \ -# --output-dir output/${TRAINER}/${CFG}/zeroshot/${DATASET} \ -# --eval-only \ No newline at end of file diff --git a/scripts/upl_train_12.sh b/scripts/upl_train_12.sh deleted file mode 100644 index b9ad4f8d..00000000 --- a/scripts/upl_train_12.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - -cd .. - -# custom config -DATA=./data -TRAINER=UPLTrainer - -DATASET=$1 -CFG=$2 # config file -CTP=$3 # class token position (end or middle) -NCTX=$4 # number of context tokens -SHOTS=$5 # number of shots (1, 2, 4, 8, 16) -CSC=$6 # class-specific context (False or True) -CLASS_EQULE=$7 # CLASS_EQULE True of False -TAG=$8 # log tag (multiple_models_random_init or rn50_random_init) -FP=$9 # number of false positive training samples per class - -for SEED in {11..12} -do - DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_EQULE_${CLASS_EQULE}_${CONF_THRESHOLD}_${TAG}/nctx${NCTX}_csc${CSC}_ctp${CTP}_fp${FP}/seed${SEED} - if [ -d "$DIR" ]; then - echo "Results are available in ${DIR}. Skip this job" - else - echo "Run this job and save the output to ${DIR}" - python upl_train.py \ - --root ${DATA} \ - --seed ${SEED} \ - --trainer ${TRAINER} \ - --dataset-config-file configs/datasets/${DATASET}.yaml \ - --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ - --output-dir ${DIR} \ - --num-fp ${FP} \ - TRAINER.UPLTrainer.N_CTX ${NCTX} \ - TRAINER.UPLTrainer.CSC ${CSC} \ - TRAINER.UPLTrainer.CLASS_TOKEN_POSITION ${CTP} \ - DATASET.NUM_SHOTS ${SHOTS} \ - DATASET.CLASS_EQULE ${CLASS_EQULE} - fi -done - - -# #!/bin/bash - -# cd .. - -# # custom config -# DATA=./data -# TRAINER=ZeroshotCLIP -# DATASET=$1 -# CFG=$2 # rn50, rn101, vit_b32 or vit_b16 - -# python sstrain.py \ -# --root ${DATA} \ -# --trainer ${TRAINER} \ -# --dataset-config-file configs/datasets/${DATASET}.yaml \ -# --config-file configs/trainers/HHTrainer/${CFG}.yaml \ -# --output-dir output/${TRAINER}/${CFG}/zeroshot/${DATASET} \ -# --eval-only \ No newline at end of file diff --git a/scripts/upl_train_14.sh b/scripts/upl_train_14.sh deleted file mode 100644 index da1e78e2..00000000 --- a/scripts/upl_train_14.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - -cd .. - -# custom config -DATA=./data -TRAINER=UPLTrainer - -DATASET=$1 -CFG=$2 # config file -CTP=$3 # class token position (end or middle) -NCTX=$4 # number of context tokens -SHOTS=$5 # number of shots (1, 2, 4, 8, 16) -CSC=$6 # class-specific context (False or True) -CLASS_EQULE=$7 # CLASS_EQULE True of False -TAG=$8 # log tag (multiple_models_random_init or rn50_random_init) -FP=$9 # number of false positive training samples per class - -for SEED in {13..14} -do - DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_EQULE_${CLASS_EQULE}_${CONF_THRESHOLD}_${TAG}/nctx${NCTX}_csc${CSC}_ctp${CTP}_fp${FP}/seed${SEED} - if [ -d "$DIR" ]; then - echo "Results are available in ${DIR}. Skip this job" - else - echo "Run this job and save the output to ${DIR}" - python upl_train.py \ - --root ${DATA} \ - --seed ${SEED} \ - --trainer ${TRAINER} \ - --dataset-config-file configs/datasets/${DATASET}.yaml \ - --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ - --output-dir ${DIR} \ - --num-fp ${FP} \ - TRAINER.UPLTrainer.N_CTX ${NCTX} \ - TRAINER.UPLTrainer.CSC ${CSC} \ - TRAINER.UPLTrainer.CLASS_TOKEN_POSITION ${CTP} \ - DATASET.NUM_SHOTS ${SHOTS} \ - DATASET.CLASS_EQULE ${CLASS_EQULE} - fi -done - - -# #!/bin/bash - -# cd .. - -# # custom config -# DATA=./data -# TRAINER=ZeroshotCLIP -# DATASET=$1 -# CFG=$2 # rn50, rn101, vit_b32 or vit_b16 - -# python sstrain.py \ -# --root ${DATA} \ -# --trainer ${TRAINER} \ -# --dataset-config-file configs/datasets/${DATASET}.yaml \ -# --config-file configs/trainers/HHTrainer/${CFG}.yaml \ -# --output-dir output/${TRAINER}/${CFG}/zeroshot/${DATASET} \ -# --eval-only \ No newline at end of file diff --git a/scripts/upl_train_16.sh b/scripts/upl_train_16.sh deleted file mode 100644 index 06333e6b..00000000 --- a/scripts/upl_train_16.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - -cd .. - -# custom config -DATA=./data -TRAINER=UPLTrainer - -DATASET=$1 -CFG=$2 # config file -CTP=$3 # class token position (end or middle) -NCTX=$4 # number of context tokens -SHOTS=$5 # number of shots (1, 2, 4, 8, 16) -CSC=$6 # class-specific context (False or True) -CLASS_EQULE=$7 # CLASS_EQULE True of False -TAG=$8 # log tag (multiple_models_random_init or rn50_random_init) -FP=$9 # number of false positive training samples per class - -for SEED in {15..16} -do - DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_EQULE_${CLASS_EQULE}_${CONF_THRESHOLD}_${TAG}/nctx${NCTX}_csc${CSC}_ctp${CTP}_fp${FP}/seed${SEED} - if [ -d "$DIR" ]; then - echo "Results are available in ${DIR}. Skip this job" - else - echo "Run this job and save the output to ${DIR}" - python upl_train.py \ - --root ${DATA} \ - --seed ${SEED} \ - --trainer ${TRAINER} \ - --dataset-config-file configs/datasets/${DATASET}.yaml \ - --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ - --output-dir ${DIR} \ - --num-fp ${FP} \ - TRAINER.UPLTrainer.N_CTX ${NCTX} \ - TRAINER.UPLTrainer.CSC ${CSC} \ - TRAINER.UPLTrainer.CLASS_TOKEN_POSITION ${CTP} \ - DATASET.NUM_SHOTS ${SHOTS} \ - DATASET.CLASS_EQULE ${CLASS_EQULE} - fi -done - - -# #!/bin/bash - -# cd .. - -# # custom config -# DATA=./data -# TRAINER=ZeroshotCLIP -# DATASET=$1 -# CFG=$2 # rn50, rn101, vit_b32 or vit_b16 - -# python sstrain.py \ -# --root ${DATA} \ -# --trainer ${TRAINER} \ -# --dataset-config-file configs/datasets/${DATASET}.yaml \ -# --config-file configs/trainers/HHTrainer/${CFG}.yaml \ -# --output-dir output/${TRAINER}/${CFG}/zeroshot/${DATASET} \ -# --eval-only \ No newline at end of file diff --git a/scripts/upl_train_2.sh b/scripts/upl_train_2.sh deleted file mode 100644 index 6a6bd91f..00000000 --- a/scripts/upl_train_2.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - -cd .. - -# custom config -DATA=./data -TRAINER=UPLTrainer - -DATASET=$1 -CFG=$2 # config file -CTP=$3 # class token position (end or middle) -NCTX=$4 # number of context tokens -SHOTS=$5 # number of shots (1, 2, 4, 8, 16) -CSC=$6 # class-specific context (False or True) -CLASS_EQULE=$7 # CLASS_EQULE True of False -TAG=$8 # log tag (multiple_models_random_init or rn50_random_init) -FP=$9 # number of false positive training samples per class - -for SEED in {1..2} -do - DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_EQULE_${CLASS_EQULE}_${CONF_THRESHOLD}_${TAG}/nctx${NCTX}_csc${CSC}_ctp${CTP}_fp${FP}/seed${SEED} - if [ -d "$DIR" ]; then - echo "Results are available in ${DIR}. Skip this job" - else - echo "Run this job and save the output to ${DIR}" - python upl_train.py \ - --root ${DATA} \ - --seed ${SEED} \ - --trainer ${TRAINER} \ - --dataset-config-file configs/datasets/${DATASET}.yaml \ - --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ - --output-dir ${DIR} \ - --num-fp ${FP} \ - TRAINER.UPLTrainer.N_CTX ${NCTX} \ - TRAINER.UPLTrainer.CSC ${CSC} \ - TRAINER.UPLTrainer.CLASS_TOKEN_POSITION ${CTP} \ - DATASET.NUM_SHOTS ${SHOTS} \ - DATASET.CLASS_EQULE ${CLASS_EQULE} - fi -done - - -# #!/bin/bash - -# cd .. - -# # custom config -# DATA=./data -# TRAINER=ZeroshotCLIP -# DATASET=$1 -# CFG=$2 # rn50, rn101, vit_b32 or vit_b16 - -# python sstrain.py \ -# --root ${DATA} \ -# --trainer ${TRAINER} \ -# --dataset-config-file configs/datasets/${DATASET}.yaml \ -# --config-file configs/trainers/HHTrainer/${CFG}.yaml \ -# --output-dir output/${TRAINER}/${CFG}/zeroshot/${DATASET} \ -# --eval-only \ No newline at end of file diff --git a/scripts/upl_train_4.sh b/scripts/upl_train_4.sh deleted file mode 100644 index 80ec9e3b..00000000 --- a/scripts/upl_train_4.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - -cd .. - -# custom config -DATA=./data -TRAINER=UPLTrainer - -DATASET=$1 -CFG=$2 # config file -CTP=$3 # class token position (end or middle) -NCTX=$4 # number of context tokens -SHOTS=$5 # number of shots (1, 2, 4, 8, 16) -CSC=$6 # class-specific context (False or True) -CLASS_EQULE=$7 # CLASS_EQULE True of False -TAG=$8 # log tag (multiple_models_random_init or rn50_random_init) -FP=$9 # number of false positive training samples per class - -for SEED in {3..4} -do - DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_EQULE_${CLASS_EQULE}_${CONF_THRESHOLD}_${TAG}/nctx${NCTX}_csc${CSC}_ctp${CTP}_fp${FP}/seed${SEED} - if [ -d "$DIR" ]; then - echo "Results are available in ${DIR}. Skip this job" - else - echo "Run this job and save the output to ${DIR}" - python upl_train.py \ - --root ${DATA} \ - --seed ${SEED} \ - --trainer ${TRAINER} \ - --dataset-config-file configs/datasets/${DATASET}.yaml \ - --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ - --output-dir ${DIR} \ - --num-fp ${FP} \ - TRAINER.UPLTrainer.N_CTX ${NCTX} \ - TRAINER.UPLTrainer.CSC ${CSC} \ - TRAINER.UPLTrainer.CLASS_TOKEN_POSITION ${CTP} \ - DATASET.NUM_SHOTS ${SHOTS} \ - DATASET.CLASS_EQULE ${CLASS_EQULE} - fi -done - - -# #!/bin/bash - -# cd .. - -# # custom config -# DATA=./data -# TRAINER=ZeroshotCLIP -# DATASET=$1 -# CFG=$2 # rn50, rn101, vit_b32 or vit_b16 - -# python sstrain.py \ -# --root ${DATA} \ -# --trainer ${TRAINER} \ -# --dataset-config-file configs/datasets/${DATASET}.yaml \ -# --config-file configs/trainers/HHTrainer/${CFG}.yaml \ -# --output-dir output/${TRAINER}/${CFG}/zeroshot/${DATASET} \ -# --eval-only \ No newline at end of file diff --git a/scripts/upl_train_6.sh b/scripts/upl_train_6.sh deleted file mode 100644 index ff25aad3..00000000 --- a/scripts/upl_train_6.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - -cd .. - -# custom config -DATA=./data -TRAINER=UPLTrainer - -DATASET=$1 -CFG=$2 # config file -CTP=$3 # class token position (end or middle) -NCTX=$4 # number of context tokens -SHOTS=$5 # number of shots (1, 2, 4, 8, 16) -CSC=$6 # class-specific context (False or True) -CLASS_EQULE=$7 # CLASS_EQULE True of False -TAG=$8 # log tag (multiple_models_random_init or rn50_random_init) -FP=$9 # number of false positive training samples per class - -for SEED in {5..6} -do - DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_EQULE_${CLASS_EQULE}_${CONF_THRESHOLD}_${TAG}/nctx${NCTX}_csc${CSC}_ctp${CTP}_fp${FP}/seed${SEED} - if [ -d "$DIR" ]; then - echo "Results are available in ${DIR}. Skip this job" - else - echo "Run this job and save the output to ${DIR}" - python upl_train.py \ - --root ${DATA} \ - --seed ${SEED} \ - --trainer ${TRAINER} \ - --dataset-config-file configs/datasets/${DATASET}.yaml \ - --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ - --output-dir ${DIR} \ - --num-fp ${FP} \ - TRAINER.UPLTrainer.N_CTX ${NCTX} \ - TRAINER.UPLTrainer.CSC ${CSC} \ - TRAINER.UPLTrainer.CLASS_TOKEN_POSITION ${CTP} \ - DATASET.NUM_SHOTS ${SHOTS} \ - DATASET.CLASS_EQULE ${CLASS_EQULE} - fi -done - - -# #!/bin/bash - -# cd .. - -# # custom config -# DATA=./data -# TRAINER=ZeroshotCLIP -# DATASET=$1 -# CFG=$2 # rn50, rn101, vit_b32 or vit_b16 - -# python sstrain.py \ -# --root ${DATA} \ -# --trainer ${TRAINER} \ -# --dataset-config-file configs/datasets/${DATASET}.yaml \ -# --config-file configs/trainers/HHTrainer/${CFG}.yaml \ -# --output-dir output/${TRAINER}/${CFG}/zeroshot/${DATASET} \ -# --eval-only \ No newline at end of file diff --git a/scripts/upl_train_8.sh b/scripts/upl_train_8.sh deleted file mode 100644 index 60b2f32c..00000000 --- a/scripts/upl_train_8.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - -cd .. - -# custom config -DATA=./data -TRAINER=UPLTrainer - -DATASET=$1 -CFG=$2 # config file -CTP=$3 # class token position (end or middle) -NCTX=$4 # number of context tokens -SHOTS=$5 # number of shots (1, 2, 4, 8, 16) -CSC=$6 # class-specific context (False or True) -CLASS_EQULE=$7 # CLASS_EQULE True of False -TAG=$8 # log tag (multiple_models_random_init or rn50_random_init) -FP=$9 # number of false positive training samples per class - -for SEED in {7..8} -do - DIR=./output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots_EQULE_${CLASS_EQULE}_${CONF_THRESHOLD}_${TAG}/nctx${NCTX}_csc${CSC}_ctp${CTP}_fp${FP}/seed${SEED} - if [ -d "$DIR" ]; then - echo "Results are available in ${DIR}. Skip this job" - else - echo "Run this job and save the output to ${DIR}" - python upl_train.py \ - --root ${DATA} \ - --seed ${SEED} \ - --trainer ${TRAINER} \ - --dataset-config-file configs/datasets/${DATASET}.yaml \ - --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ - --output-dir ${DIR} \ - --num-fp ${FP} \ - TRAINER.UPLTrainer.N_CTX ${NCTX} \ - TRAINER.UPLTrainer.CSC ${CSC} \ - TRAINER.UPLTrainer.CLASS_TOKEN_POSITION ${CTP} \ - DATASET.NUM_SHOTS ${SHOTS} \ - DATASET.CLASS_EQULE ${CLASS_EQULE} - fi -done - - -# #!/bin/bash - -# cd .. - -# # custom config -# DATA=./data -# TRAINER=ZeroshotCLIP -# DATASET=$1 -# CFG=$2 # rn50, rn101, vit_b32 or vit_b16 - -# python sstrain.py \ -# --root ${DATA} \ -# --trainer ${TRAINER} \ -# --dataset-config-file configs/datasets/${DATASET}.yaml \ -# --config-file configs/trainers/HHTrainer/${CFG}.yaml \ -# --output-dir output/${TRAINER}/${CFG}/zeroshot/${DATASET} \ -# --eval-only \ No newline at end of file diff --git a/scripts/upl_train_exp.sh b/scripts/upl_train_exp.sh new file mode 100644 index 00000000..2f6c6a69 --- /dev/null +++ b/scripts/upl_train_exp.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssdtd rn50_ep50 end 16 16 False True rn50_random_init 0 +CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssdtd rn50_ep50 end 16 16 False True rn50_random_init 2 +CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssdtd rn50_ep50 end 16 16 False True rn50_random_init 4 +CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssdtd rn50_ep50 end 16 16 False True rn50_random_init 8 + +CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 0 +CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 2 +CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 4 +CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 8 + diff --git a/scripts/upl_train_exp_gpu0.sh b/scripts/upl_train_exp_gpu0.sh deleted file mode 100644 index 89719e74..00000000 --- a/scripts/upl_train_exp_gpu0.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=0 bash upl_train_2.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 8 - - diff --git a/scripts/upl_train_exp_gpu1.sh b/scripts/upl_train_exp_gpu1.sh deleted file mode 100644 index 319f7d7b..00000000 --- a/scripts/upl_train_exp_gpu1.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=1 bash upl_train_4.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 8 \ No newline at end of file diff --git a/scripts/upl_train_exp_gpu2.sh b/scripts/upl_train_exp_gpu2.sh deleted file mode 100644 index f93bd3c4..00000000 --- a/scripts/upl_train_exp_gpu2.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=2 bash upl_train_6.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 8 \ No newline at end of file diff --git a/scripts/upl_train_exp_gpu3.sh b/scripts/upl_train_exp_gpu3.sh deleted file mode 100644 index 8dce4f71..00000000 --- a/scripts/upl_train_exp_gpu3.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=3 bash upl_train_8.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 8 \ No newline at end of file diff --git a/scripts/upl_train_exp_gpu4.sh b/scripts/upl_train_exp_gpu4.sh deleted file mode 100644 index 9f287deb..00000000 --- a/scripts/upl_train_exp_gpu4.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=4 bash upl_train_10.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 8 \ No newline at end of file diff --git a/scripts/upl_train_exp_gpu5.sh b/scripts/upl_train_exp_gpu5.sh deleted file mode 100644 index d0bd4302..00000000 --- a/scripts/upl_train_exp_gpu5.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=5 bash upl_train_12.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 8 \ No newline at end of file diff --git a/scripts/upl_train_exp_gpu6.sh b/scripts/upl_train_exp_gpu6.sh deleted file mode 100644 index a4f48efb..00000000 --- a/scripts/upl_train_exp_gpu6.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=6 bash upl_train_14.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 8 diff --git a/scripts/upl_train_exp_gpu7.sh b/scripts/upl_train_exp_gpu7.sh deleted file mode 100644 index 07f6ccfd..00000000 --- a/scripts/upl_train_exp_gpu7.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh sscaltech101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssoxford_pets rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssucf101 rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssoxford_flowers rn50_ep50 end 16 16 False True rn50_random_init 8 - -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 0 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 2 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 4 -CUDA_VISIBLE_DEVICES=7 bash upl_train_16.sh ssstanford_cars rn50_ep50 end 16 16 False True rn50_random_init 8 \ No newline at end of file diff --git a/trainers/loss.py b/trainers/loss.py new file mode 100644 index 00000000..e850410d --- /dev/null +++ b/trainers/loss.py @@ -0,0 +1,206 @@ +import math +import torch.nn.functional as F +import torch +import torch.nn as nn + +if torch.cuda.is_available(): + torch.backends.cudnn.benchmark = True + if torch.cuda.device_count() > 1: + device = torch.device('cuda:0') + else: + device = torch.device('cuda') +else: + device = torch.device('cpu') + +class GeneralizedCrossEntropy(nn.Module): + """Computes the generalized cross-entropy loss, from ` + "Generalized Cross Entropy Loss for Training Deep Neural Networks with Noisy Labels" + `_ + Args: + q: Box-Cox transformation parameter, :math:`\in (0,1]` + Shape: + - Input: the raw, unnormalized score for each class. + tensor of size :math:`(minibatch, C)`, with C the number of classes + - Target: the labels, tensor of size :math:`(minibatch)`, where each value + is :math:`0 \leq targets[i] \leq C-1` + - Output: scalar + """ + + def __init__(self, q: float = 0.7) -> None: + super().__init__() + self.q = q + self.epsilon = 1e-6 + self.softmax = nn.Softmax(dim=1) + + def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor: + p = self.softmax(input) + p = p[torch.arange(p.shape[0]), target] + # Avoid undefined gradient for p == 0 by adding epsilon + p += self.epsilon + loss = (1 - p ** self.q) / self.q + return torch.mean(loss) + +class Unhinged(nn.Module): + """Computes the Unhinged (linear) loss, from + `"Learning with Symmetric Label Noise: The Importance of Being Unhinged" + `_ + Shape: + - Input: the raw, unnormalized score for each class. + tensor of size :math:`(minibatch, C)`, with C the number of classes + - Target: the labels, tensor of size :math:`(minibatch)`, where each value + is :math:`0 \leq targets[i] \leq C-1` + - Output: scalar + """ + + def __init__(self) -> None: + super().__init__() + self.softmax = nn.Softmax(dim=1) + + def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor: + p = self.softmax(input) + p = p[torch.arange(p.shape[0]), target] + + loss = 1 - p + + return torch.mean(loss) + + +class PHuberCrossEntropy(nn.Module): + """Computes the partially Huberised (PHuber) cross-entropy loss, from + `"Can gradient clipping mitigate label noise?" + `_ + Args: + tau: clipping threshold, must be > 1 + Shape: + - Input: the raw, unnormalized score for each class. + tensor of size :math:`(minibatch, C)`, with C the number of classes + - Target: the labels, tensor of size :math:`(minibatch)`, where each value + is :math:`0 \leq targets[i] \leq C-1` + - Output: scalar + """ + + def __init__(self, tau: float = 10) -> None: + super().__init__() + self.tau = tau + + # Probability threshold for the clipping + self.prob_thresh = 1 / self.tau + # Negative of the Fenchel conjugate of base loss at tau + self.boundary_term = math.log(self.tau) + 1 + + self.softmax = nn.Softmax(dim=1) + + def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor: + p = self.softmax(input) + p = p[torch.arange(p.shape[0]), target] + + loss = torch.empty_like(p) + clip = p <= self.prob_thresh + loss[clip] = -self.tau * p[clip] + self.boundary_term + loss[~clip] = -torch.log(p[~clip]) + + return torch.mean(loss) + + +class PHuberGeneralizedCrossEntropy(nn.Module): + """Computes the partially Huberised (PHuber) generalized cross-entropy loss, from + `"Can gradient clipping mitigate label noise?" + `_ + Args: + q: Box-Cox transformation parameter, :math:`\in (0,1]` + tau: clipping threshold, must be > 1 + Shape: + - Input: the raw, unnormalized score for each class. + tensor of size :math:`(minibatch, C)`, with C the number of classes + - Target: the labels, tensor of size :math:`(minibatch)`, where each value + is :math:`0 \leq targets[i] \leq C-1` + - Output: scalar + """ + + def __init__(self, q: float = 0.7, tau: float = 10) -> None: + super().__init__() + self.q = q + self.tau = tau + + # Probability threshold for the clipping + self.prob_thresh = tau ** (1 / (q - 1)) + # Negative of the Fenchel conjugate of base loss at tau + self.boundary_term = tau * self.prob_thresh + (1 - self.prob_thresh ** q) / q + + self.softmax = nn.Softmax(dim=1) + + def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor: + p = self.softmax(input) + p = p[torch.arange(p.shape[0]), target] + + loss = torch.empty_like(p) + clip = p <= self.prob_thresh + loss[clip] = -self.tau * p[clip] + self.boundary_term + loss[~clip] = (1 - p[~clip] ** self.q) / self.q + + return torch.mean(loss) + + +class SCELoss(torch.nn.Module): + def __init__(self, alpha, beta, num_classes=10): + super(SCELoss, self).__init__() + self.device = device + self.alpha = alpha + self.beta = beta + self.num_classes = num_classes + self.cross_entropy = torch.nn.CrossEntropyLoss() + + def forward(self, pred, labels): + # CCE + ce = self.cross_entropy(pred, labels) + + # RCE + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1)) + + # Loss + loss = self.alpha * ce + self.beta * rce.mean() + return loss + + +class NormalizedCrossEntropy(torch.nn.Module): + def __init__(self, num_classes, scale=1.0): + super(NormalizedCrossEntropy, self).__init__() + self.device = device + self.num_classes = num_classes + self.scale = scale + + def forward(self, pred, labels): + pred = F.log_softmax(pred, dim=1) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + nce = -1 * torch.sum(label_one_hot * pred, dim=1) / (- pred.sum(dim=1)) + return self.scale * nce.mean() + +class ReverseCrossEntropy(torch.nn.Module): + def __init__(self, num_classes, scale=1.0): + super(ReverseCrossEntropy, self).__init__() + self.device = device + self.num_classes = num_classes + self.scale = scale + + def forward(self, pred, labels): + pred = F.softmax(pred, dim=1) + pred = torch.clamp(pred, min=1e-7, max=1.0) + label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) + label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0) + rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1)) + return self.scale * rce.mean() + + +class NCEandRCE(torch.nn.Module): + def __init__(self, alpha, beta, num_classes): + super(NCEandRCE, self).__init__() + self.num_classes = num_classes + self.nce = NormalizedCrossEntropy(scale=alpha, num_classes=num_classes) + self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes) + + def forward(self, pred, labels): + return self.nce(pred, labels) + self.rce(pred, labels) diff --git a/trainers/upltrainer.py b/trainers/upltrainer.py index 146e15ef..e826a1a3 100644 --- a/trainers/upltrainer.py +++ b/trainers/upltrainer.py @@ -35,6 +35,7 @@ select_top_k_similarity, select_top_by_value, caculate_noise_rate_analyze, select_top_k_similarity_per_class_with_noisy_label) _tokenizer = _Tokenizer() +from trainers.loss import GeneralizedCrossEntropy CUSTOM_TEMPLATES = { @@ -286,6 +287,7 @@ def zero_shot_forward(self, image, device): class UPLTrainer(TrainerX): def __init__(self, cfg): super().__init__(cfg) + self.GCE_loss = GeneralizedCrossEntropy(q=0.5) self.gt_label_dict = self.get_gt_label(cfg) def check_cfg(self, cfg): assert cfg.TRAINER.UPLTrainer.PREC in ["fp16", "fp32", "amp"] @@ -334,6 +336,11 @@ def get_gt_label(self, cfg): "SSUCF101":"ucf101", "SSOxfordFlowers":"oxford_flowers", "SSStanfordCars":"stanford_cars", + "SSFGVCAircraft":"fgvc_aircraft", + "SSDescribableTextures":"dtd", + "SSEuroSAT":"eurosat", + "SSFood101":"food-101", + "SSSUN397":"sun397" } dataset_dir = dataset_map[self.cfg.DATASET.NAME] root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) @@ -376,7 +383,7 @@ def forward_backward(self, batch): self.scaler.update() else: output, image_features, text_features = self.model(image) - # loss = F.cross_entropy(output, label, self.class_weights) + # loss = self.GCE_loss(output, label) loss = F.cross_entropy(output, label) self.model_backward_and_update(loss) @@ -516,12 +523,11 @@ def test(self, split=None, trainer_list=None): for batch_idx, batch in enumerate(data_loader): input, label = self.parse_batch_test(batch) if trainer_list is None or len(trainer_list)==1: - # 如果不是ensemble的测试 output, image_features, text_features = self.model_inference(input) image_features_all.append(image_features) text_features_all.append(text_features) else: - # ensemble的测试 + # ensemble outputs = [t.model_inference(input)[0] for t in trainer_list] output = sum(outputs) / len(outputs) self.evaluator.process(output, label, self.per_image_txt_writer, self.per_class_txt_writer) @@ -565,10 +571,9 @@ def zero_shot_analyze(self, trainer_list=None): for batch_idx, batch in tqdm(enumerate(data_loader)): input, label, impath = self.parse_batch_test_with_impath(batch) if trainer_list is None or len(trainer_list)==1: - # 如果不是ensemble的测试 output, image_features, text_features = self.model.zero_shot_forward(input, self.device) else: - # ensemble的测试 + # ensemble outputs = [t.model.zero_shot_forward(input, self.device)[0] for t in trainer_list] output = sum(outputs) / len(outputs) outputs.append(output) @@ -635,10 +640,9 @@ def zero_shot_predict(self, trainer_list=None): for batch_idx, batch in tqdm(enumerate(data_loader)): input, label, impath = self.parse_batch_test_with_impath(batch) if trainer_list is None or len(trainer_list)==1: - # 如果不是ensemble的测试 output, image_features, text_features = self.model.zero_shot_forward(input, self.device) else: - # ensemble的测试 + # ensemble outputs = [t.model.zero_shot_forward(input, self.device)[0] for t in trainer_list] output = sum(outputs) / len(outputs) outputs.append(output) @@ -649,10 +653,8 @@ def zero_shot_predict(self, trainer_list=None): img_paths = np.concatenate(img_paths, axis=0) - # 尽力维持类别平衡 if self.cfg.DATASET.CLASS_EQULE is True: if self.cfg.DATASET.CONF_THRESHOLD > 0: - # 选择置信度大于一定值 & 选择 predict_label_dict_1, predict_conf_dict_1 = select_top_k_similarity_per_class(outputs, img_paths, K=self.cfg.DATASET.NUM_SHOTS) predict_label_dict_2, predict_conf_dict_2 = select_top_by_value(outputs, img_paths, conf_threshold=self.cfg.DATASET.CONF_THRESHOLD) @@ -725,10 +727,9 @@ def zero_shot_test(self, split=None, trainer_list=None): for batch_idx, batch in enumerate(data_loader): input, label, impath = self.parse_batch_test_with_impath(batch) if trainer_list is None or len(trainer_list)==1: - # 如果不是ensemble的测试 output, image_features, text_features = self.model.zero_shot_forward(input, self.device) else: - # ensemble的测试 + # ensemble outputs = [t.model.zero_shot_forward(input, self.device)[0] for t in trainer_list] output = sum(outputs) / len(outputs) self.evaluator.process(output, label, self.per_image_txt_writer, self.per_class_txt_writer) diff --git a/trainers/upltrainer_CLIP_lp.py b/trainers/upltrainer_CLIP_lp.py deleted file mode 100644 index 1c6325c4..00000000 --- a/trainers/upltrainer_CLIP_lp.py +++ /dev/null @@ -1,1074 +0,0 @@ -import imp -from random import sample -from dassl.engine import TRAINER_REGISTRY, TrainerX -import os.path as osp -import os -import time -import datetime -import numpy as np -from tqdm import tqdm -import json - -import torch -import torch.nn as nn -from torch.nn import functional as F -from torch.cuda.amp import GradScaler, autocast - -from dassl.engine import TRAINER_REGISTRY, TrainerX -from dassl.metrics import compute_accuracy -from dassl.utils import load_pretrained_weights, load_checkpoint -from dassl.utils import ( - MetricMeter, AverageMeter, tolist_if_not, count_num_param, load_checkpoint, - save_checkpoint, mkdir_if_missing, resume_from_checkpoint, - load_pretrained_weights -) -from dassl.optim import build_optimizer, build_lr_scheduler -from dassl.data import DataManager - -from clip import clip -from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer - -from datasets.data_manager import UPLDataManager -from evaluation.evaluator import UPLClassification -from .hhzsclip import ZeroshotCLIP -from .utils import (select_top_k_similarity_per_class, caculate_noise_rate, save_outputs, -select_top_k_similarity, select_top_by_value, caculate_noise_rate_analyze, read_wordnet_classnames) - -_tokenizer = _Tokenizer() -from sklearn.linear_model import LogisticRegression - - -CUSTOM_TEMPLATES = { - "OxfordPets": "a photo of a {}, a type of pet.", - "OxfordFlowers": "a photo of a {}, a type of flower.", - "FGVCAircraft": "a photo of a {}, a type of aircraft.", - "DescribableTextures": "{} texture.", - "EuroSAT": "a centered satellite photo of {}.", - "StanfordCars": "a photo of a {}.", - "Food101": "a photo of {}, a type of food.", - "SUN397": "a photo of a {}.", - "Caltech101": "a photo of a {}.", - "UCF101": "a photo of a person doing {}.", - "ImageNet": "a photo of a {}.", - "ImageNetSketch": "a photo of a {}.", - "ImageNetV2": "a photo of a {}.", - "ImageNetA": "a photo of a {}.", - "ImageNetR": "a photo of a {}.", - # semi-supervised templates - "SSOxfordPets": "a photo of a {}, a type of pet.", - "SSOxfordFlowers": "a photo of a {}, a type of flower.", - "SSFGVCAircraft": "a photo of a {}, a type of aircraft.", - "SSDescribableTextures": "{} texture.", - "SSEuroSAT": "a centered satellite photo of {}.", - "SSStanfordCars": "a photo of a {}.", - "SSFood101": "a photo of {}, a type of food.", - "SSSUN397": "a photo of a {}.", - "SSCaltech101": "a photo of a {}.", - "SSUCF101": "a photo of a person doing {}.", - "SSImageNet": "a photo of a {}.", -} - - -def load_clip_to_cpu(cfg): - backbone_name = cfg.MODEL.BACKBONE.NAME - url = clip._MODELS[backbone_name] - model_path = clip._download(url) - try: - # loading JIT archive - model = torch.jit.load(model_path, map_location="cpu").eval() - state_dict = None - - except RuntimeError: - state_dict = torch.load(model_path, map_location="cpu") - - model = clip.build_model(state_dict or model.state_dict()) - - return model - -class LabelSmoothLoss(nn.Module): - - def __init__(self, smoothing=0.0): - super(LabelSmoothLoss, self).__init__() - self.smoothing = smoothing - - def forward(self, input, target): - log_prob = F.log_softmax(input, dim=-1) - weight = input.new_ones(input.size()) * \ - self.smoothing / (input.size(-1) - 1.) - weight.scatter_(-1, target.unsqueeze(-1), (1. - self.smoothing)) - # pdb.set_trace() - loss = (-weight * log_prob).sum(dim=-1).mean() - return loss - - -class TextEncoder(nn.Module): - def __init__(self, clip_model): - super().__init__() - self.transformer = clip_model.transformer - self.positional_embedding = clip_model.positional_embedding - self.ln_final = clip_model.ln_final - self.text_projection = clip_model.text_projection - self.dtype = clip_model.dtype - - def forward(self, prompts, tokenized_prompts): - x = prompts + self.positional_embedding.type(self.dtype) - x = x.permute(1, 0, 2) # NLD -> LND - x = self.transformer(x) - x = x.permute(1, 0, 2) # LND -> NLD - x = self.ln_final(x).type(self.dtype) - - # x.shape = [batch_size, n_ctx, transformer.width] - # take features from the eot embedding (eot_token is the highest number in each sequence) - x = x[torch.arange(x.shape[0]), tokenized_prompts.argmax(dim=-1)] @ self.text_projection - - return x - - -class PromptLearner(nn.Module): - def __init__(self, cfg, classnames, clip_model): - super().__init__() - n_cls = len(classnames) - n_ctx = cfg.TRAINER.UPLTrainer.N_CTX - ctx_init = cfg.TRAINER.UPLTrainer.CTX_INIT - dtype = clip_model.dtype - ctx_dim = clip_model.ln_final.weight.shape[0] - clip_imsize = clip_model.visual.input_resolution - cfg_imsize = cfg.INPUT.SIZE[0] - assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" - - if ctx_init: - # use given words to initialize context vectors - ctx_init = ctx_init.replace("_", " ") - n_ctx = len(ctx_init.split(" ")) - prompt = clip.tokenize(ctx_init) - with torch.no_grad(): - embedding = clip_model.token_embedding(prompt).type(dtype) - ctx_vectors = embedding[0, 1 : 1 + n_ctx, :] - prompt_prefix = ctx_init - - else: - # random initialization - if cfg.TRAINER.UPLTrainer.CSC: - print("Initializing class-specific contexts") - ctx_vectors = torch.empty(n_cls, n_ctx, ctx_dim, dtype=dtype) - else: - print("Initializing a generic context") - ctx_vectors = torch.empty(n_ctx, ctx_dim, dtype=dtype) - nn.init.normal_(ctx_vectors, std=0.02) - # nn.init.zeros_(ctx_vectors) - prompt_prefix = " ".join(["X"] * n_ctx) - - print(f'Initial context: "{prompt_prefix}"') - print(f"Number of context words (tokens): {n_ctx}") - - self.ctx = nn.Parameter(ctx_vectors) # to be optimized - - classnames = [name.replace("_", " ") for name in classnames] - name_lens = [len(_tokenizer.encode(name)) for name in classnames] - prompts = [prompt_prefix + " " + name + "." for name in classnames] - - tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts]) - with torch.no_grad(): - embedding = clip_model.token_embedding(tokenized_prompts).type(dtype) - - # These token vectors will be saved when in save_model(), - # but they should be ignored in load_model() as we want to use - # those computed using the current class names - self.register_buffer("token_prefix", embedding[:, :1, :]) # SOS - self.register_buffer("token_suffix", embedding[:, 1 + n_ctx :, :]) # CLS, EOS - - self.n_cls = n_cls - self.n_ctx = n_ctx - self.tokenized_prompts = tokenized_prompts # torch.Tensor - self.name_lens = name_lens - self.class_token_position = cfg.TRAINER.UPLTrainer.CLASS_TOKEN_POSITION - - def forward(self): - ctx = self.ctx - if ctx.dim() == 2: - ctx = ctx.unsqueeze(0).expand(self.n_cls, -1, -1) - - prefix = self.token_prefix - suffix = self.token_suffix - - if self.class_token_position == "end": - prompts = torch.cat( - [ - prefix, # (n_cls, 1, dim) - ctx, # (n_cls, n_ctx, dim) - suffix, # (n_cls, *, dim) - ], - dim=1, - ) - - elif self.class_token_position == "middle": - half_n_ctx = self.n_ctx // 2 - prompts = [] - for i in range(self.n_cls): - name_len = self.name_lens[i] - prefix_i = prefix[i : i + 1, :, :] - class_i = suffix[i : i + 1, :name_len, :] - suffix_i = suffix[i : i + 1, name_len:, :] - ctx_i_half1 = ctx[i : i + 1, :half_n_ctx, :] - ctx_i_half2 = ctx[i : i + 1, half_n_ctx:, :] - prompt = torch.cat( - [ - prefix_i, # (1, 1, dim) - ctx_i_half1, # (1, n_ctx//2, dim) - class_i, # (1, name_len, dim) - ctx_i_half2, # (1, n_ctx//2, dim) - suffix_i, # (1, *, dim) - ], - dim=1, - ) - prompts.append(prompt) - prompts = torch.cat(prompts, dim=0) - - elif self.class_token_position == "front": - prompts = [] - for i in range(self.n_cls): - name_len = self.name_lens[i] - prefix_i = prefix[i : i + 1, :, :] - class_i = suffix[i : i + 1, :name_len, :] - suffix_i = suffix[i : i + 1, name_len:, :] - ctx_i = ctx[i : i + 1, :, :] - prompt = torch.cat( - [ - prefix_i, # (1, 1, dim) - class_i, # (1, name_len, dim) - ctx_i, # (1, n_ctx, dim) - suffix_i, # (1, *, dim) - ], - dim=1, - ) - prompts.append(prompt) - prompts = torch.cat(prompts, dim=0) - - else: - raise ValueError - - return prompts - - -class CustomCLIP(nn.Module): - def __init__(self, cfg, classnames, clip_model): - super().__init__() - self.prompt_learner = PromptLearner(cfg, classnames, clip_model) - self.tokenized_prompts = self.prompt_learner.tokenized_prompts - self.image_encoder = clip_model.visual - self.text_encoder = TextEncoder(clip_model) - self.logit_scale = clip_model.logit_scale - self.dtype = clip_model.dtype - self.clip = clip_model - self.classnames = classnames - self.cfg = cfg - - def forward(self, image): - image_features = self.image_encoder(image.type(self.dtype)) - - prompts = self.prompt_learner() - tokenized_prompts = self.tokenized_prompts - text_features = self.text_encoder(prompts, tokenized_prompts) - - image_features = image_features / image_features.norm(dim=-1, keepdim=True) - text_features = text_features / text_features.norm(dim=-1, keepdim=True) - - - logit_scale = self.logit_scale.exp() - logits = logit_scale * image_features @ text_features.t() - - return logits, image_features, text_features - - def zero_shot_forward(self, image, device): - temp = CUSTOM_TEMPLATES[self.cfg.DATASET.NAME] - prompts = [temp.format(c.replace("_", " ")) for c in self.classnames] - prompts = torch.cat([clip.tokenize(p) for p in prompts]) - prompts = prompts.to(device) - with torch.no_grad(): - text_features = self.clip.encode_text(prompts) - text_features = text_features / text_features.norm(dim=-1, keepdim=True) - - image_features = self.clip.encode_image(image) - image_features = image_features / image_features.norm(dim=-1, keepdim=True) - logit_scale = self.clip.logit_scale.exp() - logits = logit_scale * image_features @ text_features.t() - return logits, image_features, text_features - - -@TRAINER_REGISTRY.register() -class UPLTrainer(TrainerX): - def __init__(self, cfg): - super().__init__(cfg) - self.gt_label_dict = self.get_gt_label(cfg) - def check_cfg(self, cfg): - assert cfg.TRAINER.UPLTrainer.PREC in ["fp16", "fp32", "amp"] - - def build_model(self): - cfg = self.cfg - classnames = self.dm.dataset.classnames - print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") - clip_model = load_clip_to_cpu(cfg) - - if cfg.TRAINER.UPLTrainer.PREC == "fp32" or cfg.TRAINER.UPLTrainer.PREC == "amp": - # CLIP's default precision is fp16 - clip_model.float() - - print("Building custom CLIP Image Encoder") - # print("Building resnet50") - self.model = CustomCLIP(cfg, classnames, clip_model) - # self.model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True) - # num_ftrs = self.model.fc.in_features - # self.model.fc = nn.Linear(num_ftrs, len(classnames)) - - # print("Turning off gradients in both the image and the text encoder") - # for name, param in self.model.named_parameters(): - # if "prompt_learner" not in name: - # param.requires_grad_(False) - - if cfg.MODEL.INIT_WEIGHTS: - load_pretrained_weights(self.model.prompt_learner, cfg.MODEL.INIT_WEIGHTS) - - self.model.to(self.device) - # # NOTE: only give prompt_learner to the optimizer - # self.optim = build_optimizer(self.model.prompt_learner, cfg.OPTIM) - # self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) - # self.register_model("prompt_learner", self.model.prompt_learner, self.optim, self.sched) - # NOTE: only give prompt_learner to the optimizer - self.optim = build_optimizer(self.model, cfg.OPTIM) - self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) - self.register_model("CLIP_image_encoder ", self.model, self.optim, self.sched) - - self.scaler = GradScaler() if cfg.TRAINER.UPLTrainer.PREC == "amp" else None - - # Note that multi-gpu training could be slow because CLIP's size is - # big, which slows down the copy operation in DataParallel - device_count = torch.cuda.device_count() - if device_count > 1: - print(f"Multiple GPUs detected (n_gpus={device_count}), use all of them!") - self.model = nn.DataParallel(self.model) - - - def get_gt_label(self, cfg): - dataset_map = { - "SSImageNet":"imagenet", - "SSCaltech101":"caltech-101", - "SSOxfordPets":"oxford_pets", - "SSUCF101":"ucf101", - "SSOxfordFlowers":"oxford_flowers", - "SSStanfordCars":"stanford_cars", - } - dataset_dir = dataset_map[self.cfg.DATASET.NAME] - root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) - dataset_dir = os.path.join(root, dataset_dir) - gt_labels = os.path.join(dataset_dir, "{}_GTlabels.json".format(self.cfg.DATASET.NAME)) - if os.path.exists(gt_labels): - with open(gt_labels, "rb") as f: - gt_label_dict = json.load(f) - print("Loaded training GT labels from {}".format(gt_labels)) - else: - print("Generating training GT labels to {}".format(gt_labels)) - gt_label_dict = {} - for batch_idx, batch in enumerate(self.train_loader_x): - input, label, impath = self.parse_batch_test_with_impath(batch) - for l, ip in zip(label, impath): - ip = './data/' + ip.split('/data/')[1] - gt_label_dict[ip] = l.item() - with open(gt_labels, "w") as outfile: - json.dump(gt_label_dict, outfile) - return gt_label_dict - - @torch.no_grad() - def linear_probe(self): - - is_ensemble = True - # Load the model - device = "cuda" if torch.cuda.is_available() else "cpu" - model, preprocess = clip.load('RN50', device) - - all_features_train = [] - all_labels_train = [] - - all_features_test = [] - all_labels_test = [] - - if not is_ensemble: - with torch.no_grad(): - for batch_idx, batch in enumerate(tqdm(self.train_loader_sstrain)): - images, labels = self.parse_batch_train(batch) - features = model.encode_image(images.to(device)) - - all_features_train.append(features) - all_labels_train.append(labels) - - all_features_train = torch.cat(all_features_train).cpu().numpy() - all_labels_train = torch.cat(all_labels_train).cpu().numpy() - - with torch.no_grad(): - for batch_idx, batch in enumerate(tqdm(self.test_loader)): - images, labels = self.parse_batch_test(batch) - features = model.encode_image(images.to(device)) - - all_features_test.append(features) - all_labels_test.append(labels) - - all_features_test = torch.cat(all_features_test).cpu().numpy() - all_labels_test = torch.cat(all_labels_test).cpu().numpy() - - - # Perform logistic regression - classifier = LogisticRegression(random_state=0, C=1e3, max_iter=1000, verbose=0) - classifier.fit(all_features_train, all_labels_train) - - # Evaluate using the logistic regression classifier - predictions = classifier.predict(all_features_test) - predict_proba = classifier.predict_proba(all_features_test) - accuracy = np.mean((all_labels_test == predictions).astype(np.float)) * 100. - print(f"Accuracy = {accuracy:.2f}") - - # Save test output - save_path = os.path.join('analysis_results_test_linear_probe', self.cfg.DATASET.NAME, 'fp'+str(self.cfg.TRAINER.UPLTrainer.NUM_FP), - str(self.cfg.OPTIM.MAX_EPOCH)+'_'+str(self.cfg.SEED)+'_'+str(self.cfg.DATASET.NUM_SHOTS)+'_random_init'+str(self.cfg.TRAINER.UPLTrainer.CLASS_TOKEN_POSITION)) - if not os.path.exists(save_path): - os.makedirs(save_path) - split ='test' - torch.save(predict_proba, os.path.join(save_path, '{}_logits.pt'.format(split))) - - # Save testing result - with open(os.path.join(save_path, 'accuracy_result.txt'), "w") as f: - print(f"Accuracy = {accuracy:.2f}", file=f) - - - if is_ensemble: - # Ensemble: - prob_end = [] - results_dict = {} - with torch.no_grad(): - for batch_idx, batch in enumerate(tqdm(self.test_loader)): - images, labels = self.parse_batch_test(batch) - features = model.encode_image(images.to(device)) - - all_features_test.append(features) - all_labels_test.append(labels) - all_labels_test = torch.cat(all_labels_test).cpu().numpy() - - for SHOTS in [16]: - print('SHOTS:', SHOTS, '\n\n\n\n\n') - for seed in range(1, 17): - try: - prob = torch.load('./analysis_results_test_linear_probe/{}/fp{}/50_{}_{}_random_initend/test_logits.pt'.format(self.cfg.DATASET.NAME, self.cfg.TRAINER.UPLTrainer.NUM_FP ,seed, SHOTS)) - prob_end.append(prob) - except: - print('loss') - - - print(len(prob_end), ' shots ensemble') - prob_test = sum(prob_end) / len(prob_end) - # Evaluate using the logistic regression classifier - predictions = np.argmax(prob_test, axis=1) - accuracy = np.mean((all_labels_test == predictions).astype(np.float)) * 100. - print(f"Ensemble Accuracy = {accuracy:.2f}") - - - def forward_backward(self, batch): - gt_label_list = [] - _, _, impath = self.parse_batch_test_with_impath(batch) - image, label = self.parse_batch_train(batch) - for ip in impath: - ip = './data/' + ip.split('/data/')[1] - gt_label = self.gt_label_dict[ip] - gt_label_list.append(gt_label) - gt_label = torch.tensor(gt_label_list, dtype=label.dtype).to(label.device) - prec = self.cfg.TRAINER.UPLTrainer.PREC - if prec == "amp": - with autocast(): - output, image_features, text_features = self.model(image) - # loss = F.cross_entropy(output, label, self.class_weights) - loss = F.cross_entropy(output, label) - self.optim.zero_grad() - self.scaler.scale(loss).backward() - self.scaler.step(self.optim) - self.scaler.update() - else: - # output, image_features, text_features = self.model(image) - output = self.model(image) - # loss = F.cross_entropy(output, label, self.class_weights) - loss = F.cross_entropy(output, label) - self.model_backward_and_update(loss) - - loss_summary = { - "loss": loss.item(), - "acc": compute_accuracy(output, label)[0].item(), - } - - if (self.batch_idx + 1) == self.num_batches: - self.update_lr() - - return loss_summary - - def parse_batch_train(self, batch): - input = batch["img"] - label = batch["label"] - input = input.to(self.device) - label = label.to(self.device) - return input, label - - def load_model(self, directory, epoch=None): - if not directory: - print("Note that load_model() is skipped as no pretrained model is given") - return - - names = self.get_model_names() - - # By default, the best model is loaded - model_file = "model-best.pth.tar" - - if epoch is not None: - model_file = "model.pth.tar-" + str(epoch) - - for name in names: - model_path = osp.join(directory, name, model_file) - - if not osp.exists(model_path): - raise FileNotFoundError('Model not found at "{}"'.format(model_path)) - - checkpoint = load_checkpoint(model_path) - state_dict = checkpoint["state_dict"] - epoch = checkpoint["epoch"] - - # Ignore fixed token vectors - if "token_prefix" in state_dict: - del state_dict["token_prefix"] - - if "token_suffix" in state_dict: - del state_dict["token_suffix"] - - print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch)) - # set strict=False - self._models[name].load_state_dict(state_dict, strict=False) - - def load_model_by_id(self, directory, model_id, epoch=None): - if not directory: - print( - 'Note that load_model() is skipped as no pretrained model is given' - ) - return - - names = self.get_model_names() - - # By default, the best model is loaded - model_file = 'model-best-{}.pth.tar'.format(model_id) - - if epoch is not None: - model_file = 'model.pth.tar-' + str(epoch) - - for name in names: - model_path = osp.join(directory, name, model_file) - - if not osp.exists(model_path): - raise FileNotFoundError( - 'Model not found at "{}"'.format(model_path) - ) - - checkpoint = load_checkpoint(model_path) - state_dict = checkpoint['state_dict'] - epoch = checkpoint['epoch'] - - # Ignore fixed token vectors - if 'token_prefix' in state_dict: - del state_dict['token_prefix'] - - if 'token_suffix' in state_dict: - del state_dict['token_suffix'] - - print( - 'Loading weights to {} ' - 'from "{}" (epoch = {})'.format(name, model_path, epoch) - ) - # set strict=False - self._models[name].load_state_dict(state_dict, strict=False) - - @torch.no_grad() - def test(self, split=None, trainer_list=None): - """A generic testing pipeline.""" - - self.set_model_mode("eval") - self.evaluator.reset() - - save_path = os.path.join(self.cfg.TEST.Analyze_Result_Path, self.cfg.DATASET.NAME, 'fp'+str(self.cfg.TRAINER.UPLTrainer.NUM_FP), - str(self.cfg.OPTIM.MAX_EPOCH)+'_'+str(self.cfg.SEED)+'_'+str(self.cfg.DATASET.NUM_SHOTS)+'_random_init'+str(self.cfg.TRAINER.UPLTrainer.CLASS_TOKEN_POSITION)) - if not os.path.exists(save_path): - os.makedirs(save_path) - - results_id = 0 - while os.path.exists(os.path.join(save_path, 'per_image_results_{}_{}.txt'.format(split, results_id))): - results_id += 1 - self.per_image_txt_writer = open(os.path.join(save_path, 'per_image_results_{}_{}.txt'.format(split, results_id)), 'w') - self.per_class_txt_writer = open(os.path.join(save_path, 'per_class_results_{}_{}.txt'.format(split, results_id)), 'w') - - if split is None: - split = self.cfg.TEST.SPLIT - - if split == "val" and self.val_loader is not None: - data_loader = self.val_loader - print("Do evaluation on {} set".format(split)) - elif split=="novel": - data_loader = self.test_novel_loader - print("Do evaluation on test novel set") - elif split=="base": - data_loader = self.test_base_loader - print("Do evaluation on test base set") - elif split=="all": - data_loader = self.test_loader - print("Do evaluation on test set") - else: - data_loader = self.test_loader - print("Do evaluation on test set") - - outputs_all = [] - label_all = [] - image_features_all = [] - text_features_all = [] - for batch_idx, batch in enumerate(data_loader): - input, label = self.parse_batch_test(batch) - if trainer_list is None or len(trainer_list)==1: - # 如果不是ensemble的测试 - # output, image_features, text_features = self.model_inference(input) - output = self.model_inference(input) - # image_features_all.append(image_features) - # text_features_all.append(text_features) - else: - # ensemble的测试 - outputs = [t.model_inference(input)[0] for t in trainer_list] - output = sum(outputs) / len(outputs) - self.evaluator.process(output, label, self.per_image_txt_writer, self.per_class_txt_writer) - outputs_all.append(output) - label_all.append(label) - results = self.evaluator.evaluate() - if split in ['all', 'train', 'test', 'novel', 'base']: - if len(outputs_all) != 0: - outputs_all = torch.cat(outputs_all, dim=0) - label_all = torch.cat(label_all, dim=0) - # image_features_all = torch.cat(image_features_all, dim=0) - # text_features_all = text_features_all[0] - # torch.save(image_features_all, os.path.join(save_path, '{}_v_features.pt'.format(split))) - # torch.save(image_features_all, os.path.join(save_path, '{}_targets.pt'.format(split))) - torch.save(outputs_all, os.path.join(save_path, '{}_logits.pt'.format(split))) - # torch.save(text_features_all, os.path.join(save_path, '{}_l_features.pt'.format(split))) - - - self.per_image_txt_writer.close() - self.per_class_txt_writer.close() - - - for k, v in results.items(): - tag = "{}/{}".format(split, k) - self.write_scalar(tag, v, self.epoch) - - return list(results.values())[0] - - @torch.no_grad() - def zero_shot_analyze(self, trainer_list=None): - """A generic predicting pipeline.""" - self.set_model_mode("eval") - self.model.eval() - self.evaluator.reset() - - data_loader = self.train_loader_sstrain - outputs = [] - image_features_list = [] - text_features_list = [] - img_paths = [] - from tqdm import tqdm - for batch_idx, batch in tqdm(enumerate(data_loader)): - input, label, impath = self.parse_batch_test_with_impath(batch) - if trainer_list is None or len(trainer_list)==1: - # 如果不是ensemble的测试 - output, image_features, text_features = self.model.zero_shot_forward(input, self.device) - else: - # ensemble的测试 - outputs = [t.model.zero_shot_forward(input, self.device)[0] for t in trainer_list] - output = sum(outputs) / len(outputs) - outputs.append(output) - image_features_list.append(image_features) - img_paths.append(impath) - sstrain_outputs = torch.cat(outputs, dim=0) - sstrain_img_paths = np.concatenate(img_paths, axis=0) - image_features = torch.cat(image_features_list, axis=0) - # text_features = torch.cat(text_features, axis=0) - print('image_features', image_features.shape) - print('text_features', text_features.shape) - predict_label_dict, _ = select_top_k_similarity_per_class(sstrain_outputs, sstrain_img_paths, -1, image_features, True, random_seed=self.cfg.SEED) - save_outputs(self.train_loader_x, self, predict_label_dict, self.cfg.DATASET.NAME, text_features, backbone_name=self.cfg.MODEL.BACKBONE.NAME) - caculate_noise_rate_analyze(predict_label_dict, train_loader=self.train_loader_x, trainer=self) - # pdb.set_trace() - return predict_label_dict - - - def load_from_exist_file(self, file_path, model_names): - logits = None - for model in model_names: - model_path = os.path.join(file_path, model) - logist_path = os.path.join(model_path, '{}_logits.pt'.format(self.cfg.DATASET.NAME)) - if logits is None: - logits = torch.load(logist_path) - else: - logits += torch.load(logist_path) - - info_path = os.path.join(model_path, '{}.json'.format(self.cfg.DATASET.NAME)) - info = json.load(open(info_path)) - items = [] - for c in info: - for img_path in info[c]: - item = info[c][img_path] - items.append([img_path, int(item[3])]) # 路径 序号 - sorted(items, key=(lambda x:x[1])) - sstrain_img_paths = np.array(items)[:,0] - - logits /= len(model_names) - predict_label_dict, predict_conf_dict = select_top_k_similarity_per_class(logits, sstrain_img_paths, K=self.cfg.DATASET.NUM_SHOTS, is_softmax=False, random_seed=self.cfg.SEED, gt_label_dict=self.gt_label_dict, num_fp=self.cfg.TRAINER.UPLTrainer.NUM_FP) - return predict_label_dict, predict_conf_dict - - @torch.no_grad() - def zero_shot_predict(self, trainer_list=None): - """A generic predicting pipeline.""" - self.set_model_mode("eval") - self.model.eval() - self.evaluator.reset() - - save_path = os.path.join(self.cfg.TEST.Analyze_Result_Path, self.cfg.DATASET.NAME, - str(self.cfg.OPTIM.MAX_EPOCH)+'_'+str(self.cfg.SEED)+'_'+str(self.cfg.DATASET.NUM_SHOTS)) - if not os.path.exists(save_path): - os.makedirs(save_path) - - data_loader = self.train_loader_sstrain - - outputs = [] - img_paths = [] - - - for batch_idx, batch in tqdm(enumerate(data_loader)): - input, label, impath = self.parse_batch_test_with_impath(batch) - if trainer_list is None or len(trainer_list)==1: - # 如果不是ensemble的测试 - output, image_features, text_features = self.model.zero_shot_forward(input, self.device) - else: - # ensemble的测试 - outputs = [t.model.zero_shot_forward(input, self.device)[0] for t in trainer_list] - output = sum(outputs) / len(outputs) - outputs.append(output) - img_paths.append(impath) - - - outputs = torch.cat(outputs, dim=0) - img_paths = np.concatenate(img_paths, axis=0) - - - # 尽力维持类别平衡 - if self.cfg.DATASET.CLASS_EQULE is True: - if self.cfg.DATASET.CONF_THRESHOLD > 0: - # 选择置信度大于一定值 & 选择 - predict_label_dict_1, predict_conf_dict_1 = select_top_k_similarity_per_class(outputs, img_paths, K=self.cfg.DATASET.NUM_SHOTS) - predict_label_dict_2, predict_conf_dict_2 = select_top_by_value(outputs, img_paths, conf_threshold=self.cfg.DATASET.CONF_THRESHOLD) - - print(len(predict_label_dict_1), 'predict_label_dict_1') - print(len(predict_label_dict_2), 'predict_label_dict_2') - - predict_label_dict = dict(predict_label_dict_1, **predict_label_dict_2) - predict_conf_dict = dict(predict_conf_dict_1, **predict_conf_dict_2) - caculate_noise_rate(predict_label_dict, train_loader=self.train_loader_x, trainer=self) - print('select {} samples'.format(len(predict_label_dict))) - - else: - print("K {} shots".format(self.cfg.DATASET.NUM_SHOTS)) - predict_label_dict, predict_conf_dict = select_top_k_similarity_per_class(outputs, img_paths, K=self.cfg.DATASET.NUM_SHOTS) - caculate_noise_rate(predict_label_dict, train_loader=self.train_loader_x, trainer=self) - print('select {} samples'.format(len(predict_label_dict))) - - else: - print("K", self.cfg.DATASET.NUM_SHOTS*text_features.shape[0]) - predict_label_dict, predict_conf_dict = select_top_k_similarity(outputs, img_paths, K=self.cfg.DATASET.NUM_SHOTS*text_features.shape[0]) - caculate_noise_rate(predict_label_dict, train_loader=self.train_loader_x, trainer=self) - print('select {} samples'.format(len(predict_label_dict))) - return predict_label_dict, predict_conf_dict - - @torch.no_grad() - def zero_shot_test(self, split=None, trainer_list=None): - """A generic predicting pipeline.""" - - self.set_model_mode("eval") - self.evaluator.reset() - - save_path = os.path.join(self.cfg.TEST.Analyze_Result_Path, self.cfg.DATASET.NAME, - str(self.cfg.OPTIM.MAX_EPOCH)+'_'+str(self.cfg.SEED)+'_'+str(self.cfg.DATASET.NUM_SHOTS)) - if not os.path.exists(save_path): - os.makedirs(save_path) - - results_id = 0 - while os.path.exists(os.path.join(save_path, 'per_image_results_{}_{}.txt'.format(split, results_id))): - results_id += 1 - self.per_image_txt_writer = open(os.path.join(save_path, 'per_image_results_{}_{}.txt'.format(split, results_id)), 'w') - self.per_class_txt_writer = open(os.path.join(save_path, 'per_class_results_{}_{}.txt'.format(split, results_id)), 'w') - - if split is None: - split = self.cfg.TEST.SPLIT - - save_path = os.path.join(self.cfg.TEST.Analyze_Result_Path, self.cfg.DATASET.NAME, - str(self.cfg.OPTIM.MAX_EPOCH)+'_'+str(self.cfg.SEED)+'_'+str(self.cfg.DATASET.NUM_SHOTS)) - if not os.path.exists(save_path): - os.makedirs(save_path) - - if split == "val" and self.val_loader is not None: - data_loader = self.val_loader - print("Do evaluation on {} set".format(split)) - elif split=="novel": - data_loader = self.test_novel_loader - print("Do evaluation on test novel set") - elif split=="base": - data_loader = self.test_base_loader - print("Do evaluation on test base set") - elif split=="all": - data_loader = self.test_loader - print("Do evaluation on test set") - elif split=="train": - data_loader = self.train_loader_x - print("Do evaluation on train set") - else: - data_loader = self.test_loader - print("Do evaluation on test set") - - for batch_idx, batch in enumerate(data_loader): - input, label, impath = self.parse_batch_test_with_impath(batch) - if trainer_list is None or len(trainer_list)==1: - # 如果不是ensemble的测试 - output, image_features, text_features = self.model.zero_shot_forward(input, self.device) - else: - # ensemble的测试 - outputs = [t.model.zero_shot_forward(input, self.device)[0] for t in trainer_list] - output = sum(outputs) / len(outputs) - self.evaluator.process(output, label, self.per_image_txt_writer, self.per_class_txt_writer) - results = self.evaluator.evaluate() - - for k, v in results.items(): - tag = "{}/{}".format(split, k) - self.write_scalar(tag, v, self.epoch) - - self.per_image_txt_writer.close() - self.per_class_txt_writer.close() - - return list(results.values())[0] - - - - def build_data_loader(self): - """Create essential data-related attributes. - - A re-implementation of this method must create the - same attributes (except self.dm). - """ - _, preprocess = clip.load(self.cfg.MODEL.BACKBONE.NAME) - # dm = UPLDataManager(self.cfg, custom_tfm_test=preprocess) - dm = UPLDataManager(self.cfg, custom_tfm_train=preprocess, custom_tfm_test=preprocess) - # _, preprocess = clip.load(self.cfg.MODEL.BACKBONE.NAME) - # dm = UPLDataManager(self.cfg, custom_tfm_test=preprocess) - - self.train_loader_x = dm.train_loader_x - self.train_loader_u = dm.train_loader_u # optional, can be None - self.val_loader = dm.val_loader # optional, can be None - self.test_loader = dm.test_loader - self.train_loader_sstrain = dm.train_loader_sstrain - self.num_classes = dm.num_classes - self.num_source_domains = dm.num_source_domains - self.lab2cname = dm.lab2cname # dict {label: classname} - - if self.cfg.DATALOADER.OPEN_SETTING: - self.test_novel_loader = dm.test_novel_loader - self.test_base_loader = dm.test_base_loader - - - self.dm = dm - - def sstrain_with_id(self, model_id): - self.sstrain(self.start_epoch, self.max_epoch, model_id) - - def sstrain(self, start_epoch, max_epoch, model_id): - """Generic training loops.""" - self.start_epoch = start_epoch - self.max_epoch = max_epoch - - self.before_train() - for self.epoch in range(self.start_epoch, self.max_epoch): - self.before_epoch() - self.run_epoch_with_sstrain() - self.after_epoch(model_id) - self.after_train(model_id) - - def run_epoch_with_sstrain(self): - self.set_model_mode("train") - losses = MetricMeter() - batch_time = AverageMeter() - data_time = AverageMeter() - self.num_batches = len(self.train_loader_sstrain) - - end = time.time() - for self.batch_idx, batch in enumerate(self.train_loader_sstrain): - data_time.update(time.time() - end) - loss_summary = self.forward_backward(batch) - batch_time.update(time.time() - end) - losses.update(loss_summary) - - if ( - self.batch_idx + 1 - ) % self.cfg.TRAIN.PRINT_FREQ == 0 or self.num_batches < self.cfg.TRAIN.PRINT_FREQ: - nb_remain = 0 - nb_remain += self.num_batches - self.batch_idx - 1 - nb_remain += ( - self.max_epoch - self.epoch - 1 - ) * self.num_batches - eta_seconds = batch_time.avg * nb_remain - eta = str(datetime.timedelta(seconds=int(eta_seconds))) - print( - "epoch [{0}/{1}][{2}/{3}]\t" - "time {batch_time.val:.3f} ({batch_time.avg:.3f})\t" - "data {data_time.val:.3f} ({data_time.avg:.3f})\t" - "eta {eta}\t" - "{losses}\t" - "lr {lr:.6e}".format( - self.epoch + 1, - self.max_epoch, - self.batch_idx + 1, - self.num_batches, - batch_time=batch_time, - data_time=data_time, - eta=eta, - losses=losses, - lr=self.get_current_lr(), - ) - ) - - n_iter = self.epoch * self.num_batches + self.batch_idx - for name, meter in losses.meters.items(): - self.write_scalar("train/" + name, meter.avg, n_iter) - self.write_scalar("train/lr", self.get_current_lr(), n_iter) - - end = time.time() - - def after_epoch(self, model_id): - last_epoch = (self.epoch + 1) == self.max_epoch - do_test = not self.cfg.TEST.NO_TEST - meet_checkpoint_freq = ( - (self.epoch + 1) % self.cfg.TRAIN.CHECKPOINT_FREQ == 0 - if self.cfg.TRAIN.CHECKPOINT_FREQ > 0 else False - ) - # if ((self.epoch + 1) % 5) == 0 and self.cfg.DATASET.NAME!="SSImageNet": - # curr_result = self.test(split="test") - if do_test and self.cfg.TEST.FINAL_MODEL == "best_val": - curr_result = self.test(split="val") - is_best = curr_result > self.best_result - if is_best: - self.best_result = curr_result - self.save_model( - self.epoch, - self.output_dir, - model_name="model-best-{}.pth.tar".format(model_id) - ) - - if meet_checkpoint_freq or last_epoch: - self.save_model( - self.epoch, - self.output_dir, - model_name="model-best-{}.pth.tar".format(model_id) - ) - - def after_train(self, model_id): - print("Finished training") - - do_test = not self.cfg.TEST.NO_TEST - if do_test: - if self.cfg.TEST.FINAL_MODEL == "best_val": - print("Deploy the model with the best val performance") - self.load_model_by_id(self.output_dir, model_id) - # self.test(split='novel') - # self.test(split='base') - # self.test(split='train') - self.test(split='test') - - - # Show elapsed time - elapsed = round(time.time() - self.time_start) - elapsed = str(datetime.timedelta(seconds=elapsed)) - print("Elapsed: {}".format(elapsed)) - - # Close writer - self.close_writer() - - def parse_batch_test(self, batch): - input = batch["img"] - label = batch["label"] - - input = input.to(self.device) - label = label.to(self.device) - - return input, label - - def parse_batch_test_with_impath(self, batch): - input = batch["img"] - label = batch["label"] - impath = batch["impath"] - - input = input.to(self.device) - label = label.to(self.device) - # impath = impath.to(self.device) - - return input, label, impath - - @torch.no_grad() - def test_with_existing_logits(self, logits, split='test'): - - - self.set_model_mode("eval") - self.evaluator.reset() - - save_path = os.path.join(self.cfg.TEST.Analyze_Result_Path, self.cfg.DATASET.NAME,'fp'+str(self.cfg.TRAINER.UPLTrainer.NUM_FP), - str(self.cfg.OPTIM.MAX_EPOCH)+'_'+str(self.cfg.SEED)+'_'+str(self.cfg.DATASET.NUM_SHOTS)+'_random_init'+str(self.cfg.TRAINER.UPLTrainer.CLASS_TOKEN_POSITION)) - if not os.path.exists(save_path): - os.makedirs(save_path) - - results_id = 0 - while os.path.exists(os.path.join(save_path, 'per_image_results_{}_{}.txt'.format(split, results_id))): - results_id += 1 - self.per_image_txt_writer = open(os.path.join(save_path, 'per_image_results_{}_{}.txt'.format(split, results_id)), 'w') - self.per_class_txt_writer = open(os.path.join(save_path, 'per_class_results_{}_{}.txt'.format(split, results_id)), 'w') - - if split is None: - split = self.cfg.TEST.SPLIT - - if split == "val" and self.val_loader is not None: - data_loader = self.val_loader - print("Do evaluation on {} set".format(split)) - elif split=="novel": - data_loader = self.test_novel_loader - print("Do evaluation on test novel set") - elif split=="base": - data_loader = self.test_base_loader - print("Do evaluation on test base set") - elif split=="all": - data_loader = self.test_loader - print("Do evaluation on test set") - else: - data_loader = self.test_loader - print("Do evaluation on test set") - - label_all = [] - for batch_idx, batch in enumerate(data_loader): - input, label = self.parse_batch_test(batch) - label_all.append(label) - label_all = torch.hstack(label_all) - print(label_all.shape) - - self.evaluator.process(logits, label_all, self.per_image_txt_writer, self.per_class_txt_writer) - results = self.evaluator.evaluate() - for k, v in results.items(): - tag = "{}/{}".format(split, k) - self.write_scalar(tag, v, self.epoch) - - return results diff --git a/trainers/upltrainer_r50.py b/trainers/upltrainer_r50.py deleted file mode 100644 index 0aa12c00..00000000 --- a/trainers/upltrainer_r50.py +++ /dev/null @@ -1,968 +0,0 @@ -import imp -from random import sample -from dassl.engine import TRAINER_REGISTRY, TrainerX -import os.path as osp -import os -import time -import datetime -import numpy as np -from tqdm import tqdm -import json - -import torch -import torch.nn as nn -from torch.nn import functional as F -from torch.cuda.amp import GradScaler, autocast - -from dassl.engine import TRAINER_REGISTRY, TrainerX -from dassl.metrics import compute_accuracy -from dassl.utils import load_pretrained_weights, load_checkpoint -from dassl.utils import ( - MetricMeter, AverageMeter, tolist_if_not, count_num_param, load_checkpoint, - save_checkpoint, mkdir_if_missing, resume_from_checkpoint, - load_pretrained_weights -) -from dassl.optim import build_optimizer, build_lr_scheduler -from dassl.data import DataManager - -from clip import clip -from clip.simple_tokenizer import SimpleTokenizer as _Tokenizer - -from datasets.data_manager import UPLDataManager -from evaluation.evaluator import UPLClassification -from .hhzsclip import ZeroshotCLIP -from .utils import (select_top_k_similarity_per_class, caculate_noise_rate, save_outputs, -select_top_k_similarity, select_top_by_value, caculate_noise_rate_analyze, select_top_k_similarity_per_class_with_noisy_label) - -_tokenizer = _Tokenizer() - - -CUSTOM_TEMPLATES = { - "OxfordPets": "a photo of a {}, a type of pet.", - "OxfordFlowers": "a photo of a {}, a type of flower.", - "FGVCAircraft": "a photo of a {}, a type of aircraft.", - "DescribableTextures": "{} texture.", - "EuroSAT": "a centered satellite photo of {}.", - "StanfordCars": "a photo of a {}.", - "Food101": "a photo of {}, a type of food.", - "SUN397": "a photo of a {}.", - "Caltech101": "a photo of a {}.", - "UCF101": "a photo of a person doing {}.", - "ImageNet": "a photo of a {}.", - "ImageNetSketch": "a photo of a {}.", - "ImageNetV2": "a photo of a {}.", - "ImageNetA": "a photo of a {}.", - "ImageNetR": "a photo of a {}.", - # semi-supervised templates - "SSOxfordPets": "a photo of a {}, a type of pet.", - "SSOxfordFlowers": "a photo of a {}, a type of flower.", - "SSFGVCAircraft": "a photo of a {}, a type of aircraft.", - "SSDescribableTextures": "{} texture.", - "SSEuroSAT": "a centered satellite photo of {}.", - "SSStanfordCars": "a photo of a {}.", - "SSFood101": "a photo of {}, a type of food.", - "SSSUN397": "a photo of a {}.", - "SSCaltech101": "a photo of a {}.", - "SSUCF101": "a photo of a person doing {}.", - "SSImageNet": "a photo of a {}.", -} - - -def load_clip_to_cpu(cfg): - backbone_name = cfg.MODEL.BACKBONE.NAME - url = clip._MODELS[backbone_name] - model_path = clip._download(url) - try: - # loading JIT archive - model = torch.jit.load(model_path, map_location="cpu").eval() - state_dict = None - - except RuntimeError: - state_dict = torch.load(model_path, map_location="cpu") - - model = clip.build_model(state_dict or model.state_dict()) - - return model - - -class TextEncoder(nn.Module): - def __init__(self, clip_model): - super().__init__() - self.transformer = clip_model.transformer - self.positional_embedding = clip_model.positional_embedding - self.ln_final = clip_model.ln_final - self.text_projection = clip_model.text_projection - self.dtype = clip_model.dtype - - def forward(self, prompts, tokenized_prompts): - x = prompts + self.positional_embedding.type(self.dtype) - x = x.permute(1, 0, 2) # NLD -> LND - x = self.transformer(x) - x = x.permute(1, 0, 2) # LND -> NLD - x = self.ln_final(x).type(self.dtype) - - # x.shape = [batch_size, n_ctx, transformer.width] - # take features from the eot embedding (eot_token is the highest number in each sequence) - x = x[torch.arange(x.shape[0]), tokenized_prompts.argmax(dim=-1)] @ self.text_projection - - return x - - -class PromptLearner(nn.Module): - def __init__(self, cfg, classnames, clip_model): - super().__init__() - n_cls = len(classnames) - n_ctx = cfg.TRAINER.UPLTrainer.N_CTX - ctx_init = cfg.TRAINER.UPLTrainer.CTX_INIT - dtype = clip_model.dtype - ctx_dim = clip_model.ln_final.weight.shape[0] - clip_imsize = clip_model.visual.input_resolution - cfg_imsize = cfg.INPUT.SIZE[0] - assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" - - if ctx_init: - # use given words to initialize context vectors - ctx_init = ctx_init.replace("_", " ") - n_ctx = len(ctx_init.split(" ")) - prompt = clip.tokenize(ctx_init) - with torch.no_grad(): - embedding = clip_model.token_embedding(prompt).type(dtype) - ctx_vectors = embedding[0, 1 : 1 + n_ctx, :] - prompt_prefix = ctx_init - - else: - # random initialization - if cfg.TRAINER.UPLTrainer.CSC: - print("Initializing class-specific contexts") - ctx_vectors = torch.empty(n_cls, n_ctx, ctx_dim, dtype=dtype) - else: - print("Initializing a generic context") - ctx_vectors = torch.empty(n_ctx, ctx_dim, dtype=dtype) - nn.init.normal_(ctx_vectors, std=0.02) - # nn.init.zeros_(ctx_vectors) - prompt_prefix = " ".join(["X"] * n_ctx) - - print(f'Initial context: "{prompt_prefix}"') - print(f"Number of context words (tokens): {n_ctx}") - - self.ctx = nn.Parameter(ctx_vectors) # to be optimized - - classnames = [name.replace("_", " ") for name in classnames] - name_lens = [len(_tokenizer.encode(name)) for name in classnames] - prompts = [prompt_prefix + " " + name + "." for name in classnames] - - tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts]) - with torch.no_grad(): - embedding = clip_model.token_embedding(tokenized_prompts).type(dtype) - - # These token vectors will be saved when in save_model(), - # but they should be ignored in load_model() as we want to use - # those computed using the current class names - self.register_buffer("token_prefix", embedding[:, :1, :]) # SOS - self.register_buffer("token_suffix", embedding[:, 1 + n_ctx :, :]) # CLS, EOS - - self.n_cls = n_cls - self.n_ctx = n_ctx - self.tokenized_prompts = tokenized_prompts # torch.Tensor - self.name_lens = name_lens - self.class_token_position = cfg.TRAINER.UPLTrainer.CLASS_TOKEN_POSITION - - def forward(self): - ctx = self.ctx - if ctx.dim() == 2: - ctx = ctx.unsqueeze(0).expand(self.n_cls, -1, -1) - - prefix = self.token_prefix - suffix = self.token_suffix - - if self.class_token_position == "end": - prompts = torch.cat( - [ - prefix, # (n_cls, 1, dim) - ctx, # (n_cls, n_ctx, dim) - suffix, # (n_cls, *, dim) - ], - dim=1, - ) - - elif self.class_token_position == "middle": - half_n_ctx = self.n_ctx // 2 - prompts = [] - for i in range(self.n_cls): - name_len = self.name_lens[i] - prefix_i = prefix[i : i + 1, :, :] - class_i = suffix[i : i + 1, :name_len, :] - suffix_i = suffix[i : i + 1, name_len:, :] - ctx_i_half1 = ctx[i : i + 1, :half_n_ctx, :] - ctx_i_half2 = ctx[i : i + 1, half_n_ctx:, :] - prompt = torch.cat( - [ - prefix_i, # (1, 1, dim) - ctx_i_half1, # (1, n_ctx//2, dim) - class_i, # (1, name_len, dim) - ctx_i_half2, # (1, n_ctx//2, dim) - suffix_i, # (1, *, dim) - ], - dim=1, - ) - prompts.append(prompt) - prompts = torch.cat(prompts, dim=0) - - elif self.class_token_position == "front": - prompts = [] - for i in range(self.n_cls): - name_len = self.name_lens[i] - prefix_i = prefix[i : i + 1, :, :] - class_i = suffix[i : i + 1, :name_len, :] - suffix_i = suffix[i : i + 1, name_len:, :] - ctx_i = ctx[i : i + 1, :, :] - prompt = torch.cat( - [ - prefix_i, # (1, 1, dim) - class_i, # (1, name_len, dim) - ctx_i, # (1, n_ctx, dim) - suffix_i, # (1, *, dim) - ], - dim=1, - ) - prompts.append(prompt) - prompts = torch.cat(prompts, dim=0) - - else: - raise ValueError - - return prompts - - -class CustomCLIP(nn.Module): - def __init__(self, cfg, classnames, clip_model): - super().__init__() - self.prompt_learner = PromptLearner(cfg, classnames, clip_model) - self.tokenized_prompts = self.prompt_learner.tokenized_prompts - self.image_encoder = clip_model.visual - self.text_encoder = TextEncoder(clip_model) - self.logit_scale = clip_model.logit_scale - self.dtype = clip_model.dtype - self.clip = clip_model - self.classnames = classnames - self.cfg = cfg - - def forward(self, image): - image_features = self.image_encoder(image.type(self.dtype)) - - prompts = self.prompt_learner() - tokenized_prompts = self.tokenized_prompts - text_features = self.text_encoder(prompts, tokenized_prompts) - - image_features = image_features / image_features.norm(dim=-1, keepdim=True) - text_features = text_features / text_features.norm(dim=-1, keepdim=True) - - - logit_scale = self.logit_scale.exp() - logits = logit_scale * image_features @ text_features.t() - - return logits, image_features, text_features - - - def zero_shot_forward(self, image, device): - temp = CUSTOM_TEMPLATES[self.cfg.DATASET.NAME] - prompts = [temp.format(c.replace("_", " ")) for c in self.classnames] - prompts = torch.cat([clip.tokenize(p) for p in prompts]) - prompts = prompts.to(device) - with torch.no_grad(): - text_features = self.clip.encode_text(prompts) - text_features = text_features / text_features.norm(dim=-1, keepdim=True) - - image_features = self.clip.encode_image(image) - image_features = image_features / image_features.norm(dim=-1, keepdim=True) - logit_scale = self.clip.logit_scale.exp() - logits = logit_scale * image_features @ text_features.t() - return logits, image_features, text_features - - - -@TRAINER_REGISTRY.register() -class UPLTrainer(TrainerX): - def __init__(self, cfg): - super().__init__(cfg) - self.gt_label_dict = self.get_gt_label(cfg) - def check_cfg(self, cfg): - assert cfg.TRAINER.UPLTrainer.PREC in ["fp16", "fp32", "amp"] - - def build_model(self): - cfg = self.cfg - classnames = self.dm.dataset.classnames - print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})") - clip_model = load_clip_to_cpu(cfg) - - if cfg.TRAINER.UPLTrainer.PREC == "fp32" or cfg.TRAINER.UPLTrainer.PREC == "amp": - # CLIP's default precision is fp16 - clip_model.float() - - print("Building resnet50") - # self.model = CustomCLIP(cfg, classnames, clip_model) - self.model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True) - # num_ftrs = self.model.fc.in_features - # self.model.fc = nn.Linear(num_ftrs, len(classnames)) - - # print("Turning off gradients in both the image and the text encoder") - # for name, param in self.model.named_parameters(): - # if "prompt_learner" not in name: - # param.requires_grad_(False) - - if cfg.MODEL.INIT_WEIGHTS: - load_pretrained_weights(self.model.prompt_learner, cfg.MODEL.INIT_WEIGHTS) - - self.model.to(self.device) - # # NOTE: only give prompt_learner to the optimizer - # self.optim = build_optimizer(self.model.prompt_learner, cfg.OPTIM) - # self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) - # self.register_model("prompt_learner", self.model.prompt_learner, self.optim, self.sched) - # NOTE: only give prompt_learner to the optimizer - self.optim = build_optimizer(self.model, cfg.OPTIM) - self.sched = build_lr_scheduler(self.optim, cfg.OPTIM) - self.register_model("resnet50", self.model, self.optim, self.sched) - - self.scaler = GradScaler() if cfg.TRAINER.UPLTrainer.PREC == "amp" else None - - # Note that multi-gpu training could be slow because CLIP's size is - # big, which slows down the copy operation in DataParallel - device_count = torch.cuda.device_count() - if device_count > 1: - print(f"Multiple GPUs detected (n_gpus={device_count}), use all of them!") - self.model = nn.DataParallel(self.model) - - - def get_gt_label(self, cfg): - dataset_map = { - "SSImageNet":"imagenet", - "SSCaltech101":"caltech-101", - "SSOxfordPets":"oxford_pets", - "SSUCF101":"ucf101", - "SSOxfordFlowers":"oxford_flowers", - "SSStanfordCars":"stanford_cars", - } - dataset_dir = dataset_map[self.cfg.DATASET.NAME] - root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT)) - dataset_dir = os.path.join(root, dataset_dir) - gt_labels = os.path.join(dataset_dir, "{}_GTlabels.json".format(self.cfg.DATASET.NAME)) - if os.path.exists(gt_labels): - with open(gt_labels, "rb") as f: - gt_label_dict = json.load(f) - print("Loaded training GT labels from {}".format(gt_labels)) - else: - print("Generating training GT labels to {}".format(gt_labels)) - gt_label_dict = {} - for batch_idx, batch in enumerate(self.train_loader_x): - input, label, impath = self.parse_batch_test_with_impath(batch) - for l, ip in zip(label, impath): - ip = './data/' + ip.split('/data/')[1] - gt_label_dict[ip] = l.item() - with open(gt_labels, "w") as outfile: - json.dump(gt_label_dict, outfile) - return gt_label_dict - - def forward_backward(self, batch): - gt_label_list = [] - _, _, impath = self.parse_batch_test_with_impath(batch) - image, label = self.parse_batch_train(batch) - for ip in impath: - ip = './data/' + ip.split('/data/')[1] - gt_label = self.gt_label_dict[ip] - gt_label_list.append(gt_label) - gt_label = torch.tensor(gt_label_list, dtype=label.dtype).to(label.device) - prec = self.cfg.TRAINER.UPLTrainer.PREC - if prec == "amp": - with autocast(): - output, image_features, text_features = self.model(image) - # loss = F.cross_entropy(output, label, self.class_weights) - loss = F.cross_entropy(output, label) - self.optim.zero_grad() - self.scaler.scale(loss).backward() - self.scaler.step(self.optim) - self.scaler.update() - else: - # output, image_features, text_features = self.model(image) - output = self.model(image) - # loss = F.cross_entropy(output, label, self.class_weights) - loss = F.cross_entropy(output, label) - self.model_backward_and_update(loss) - - loss_summary = { - "loss": loss.item(), - "acc": compute_accuracy(output, label)[0].item(), - } - - if (self.batch_idx + 1) == self.num_batches: - self.update_lr() - - return loss_summary - - def parse_batch_train(self, batch): - input = batch["img"] - label = batch["label"] - input = input.to(self.device) - label = label.to(self.device) - return input, label - - def load_model(self, directory, epoch=None): - if not directory: - print("Note that load_model() is skipped as no pretrained model is given") - return - - names = self.get_model_names() - - # By default, the best model is loaded - model_file = "model-best.pth.tar" - - if epoch is not None: - model_file = "model.pth.tar-" + str(epoch) - - for name in names: - model_path = osp.join(directory, name, model_file) - - if not osp.exists(model_path): - raise FileNotFoundError('Model not found at "{}"'.format(model_path)) - - checkpoint = load_checkpoint(model_path) - state_dict = checkpoint["state_dict"] - epoch = checkpoint["epoch"] - - # Ignore fixed token vectors - if "token_prefix" in state_dict: - del state_dict["token_prefix"] - - if "token_suffix" in state_dict: - del state_dict["token_suffix"] - - print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch)) - # set strict=False - self._models[name].load_state_dict(state_dict, strict=False) - - def load_model_by_id(self, directory, model_id, epoch=None): - if not directory: - print( - 'Note that load_model() is skipped as no pretrained model is given' - ) - return - - names = self.get_model_names() - - # By default, the best model is loaded - model_file = 'model-best-{}.pth.tar'.format(model_id) - - if epoch is not None: - model_file = 'model.pth.tar-' + str(epoch) - - for name in names: - model_path = osp.join(directory, name, model_file) - - if not osp.exists(model_path): - raise FileNotFoundError( - 'Model not found at "{}"'.format(model_path) - ) - - checkpoint = load_checkpoint(model_path) - state_dict = checkpoint['state_dict'] - epoch = checkpoint['epoch'] - - # Ignore fixed token vectors - if 'token_prefix' in state_dict: - del state_dict['token_prefix'] - - if 'token_suffix' in state_dict: - del state_dict['token_suffix'] - - print( - 'Loading weights to {} ' - 'from "{}" (epoch = {})'.format(name, model_path, epoch) - ) - # set strict=False - self._models[name].load_state_dict(state_dict, strict=False) - - @torch.no_grad() - def test(self, split=None, trainer_list=None): - """A generic testing pipeline.""" - - self.set_model_mode("eval") - self.evaluator.reset() - - save_path = os.path.join(self.cfg.TEST.Analyze_Result_Path, self.cfg.DATASET.NAME, 'fp'+str(self.cfg.TRAINER.UPLTrainer.NUM_FP), - str(self.cfg.OPTIM.MAX_EPOCH)+'_'+str(self.cfg.SEED)+'_'+str(self.cfg.DATASET.NUM_SHOTS)+'_random_init'+str(self.cfg.TRAINER.UPLTrainer.CLASS_TOKEN_POSITION)) - if not os.path.exists(save_path): - os.makedirs(save_path) - - results_id = 0 - while os.path.exists(os.path.join(save_path, 'per_image_results_{}_{}.txt'.format(split, results_id))): - results_id += 1 - self.per_image_txt_writer = open(os.path.join(save_path, 'per_image_results_{}_{}.txt'.format(split, results_id)), 'w') - self.per_class_txt_writer = open(os.path.join(save_path, 'per_class_results_{}_{}.txt'.format(split, results_id)), 'w') - - if split is None: - split = self.cfg.TEST.SPLIT - - if split == "val" and self.val_loader is not None: - data_loader = self.val_loader - print("Do evaluation on {} set".format(split)) - elif split=="novel": - data_loader = self.test_novel_loader - print("Do evaluation on test novel set") - elif split=="base": - data_loader = self.test_base_loader - print("Do evaluation on test base set") - elif split=="all": - data_loader = self.test_loader - print("Do evaluation on test set") - else: - data_loader = self.test_loader - print("Do evaluation on test set") - - outputs_all = [] - label_all = [] - image_features_all = [] - text_features_all = [] - for batch_idx, batch in enumerate(data_loader): - input, label = self.parse_batch_test(batch) - if trainer_list is None or len(trainer_list)==1: - # 如果不是ensemble的测试 - # output, image_features, text_features = self.model_inference(input) - output = self.model_inference(input) - # image_features_all.append(image_features) - # text_features_all.append(text_features) - else: - # ensemble的测试 - outputs = [t.model_inference(input)[0] for t in trainer_list] - output = sum(outputs) / len(outputs) - self.evaluator.process(output, label, self.per_image_txt_writer, self.per_class_txt_writer) - outputs_all.append(output) - label_all.append(label) - results = self.evaluator.evaluate() - if split in ['all', 'train', 'test', 'novel', 'base']: - if len(outputs_all) != 0: - outputs_all = torch.cat(outputs_all, dim=0) - label_all = torch.cat(label_all, dim=0) - # image_features_all = torch.cat(image_features_all, dim=0) - # text_features_all = text_features_all[0] - # torch.save(image_features_all, os.path.join(save_path, '{}_v_features.pt'.format(split))) - # torch.save(image_features_all, os.path.join(save_path, '{}_targets.pt'.format(split))) - torch.save(outputs_all, os.path.join(save_path, '{}_logits.pt'.format(split))) - # torch.save(text_features_all, os.path.join(save_path, '{}_l_features.pt'.format(split))) - - - self.per_image_txt_writer.close() - self.per_class_txt_writer.close() - - - for k, v in results.items(): - tag = "{}/{}".format(split, k) - self.write_scalar(tag, v, self.epoch) - - return list(results.values())[0] - - @torch.no_grad() - def zero_shot_analyze(self, trainer_list=None): - """A generic predicting pipeline.""" - self.set_model_mode("eval") - self.model.eval() - self.evaluator.reset() - - data_loader = self.train_loader_sstrain - outputs = [] - image_features_list = [] - text_features_list = [] - img_paths = [] - from tqdm import tqdm - for batch_idx, batch in tqdm(enumerate(data_loader)): - input, label, impath = self.parse_batch_test_with_impath(batch) - if trainer_list is None or len(trainer_list)==1: - # 如果不是ensemble的测试 - output, image_features, text_features = self.model.zero_shot_forward(input, self.device) - else: - # ensemble的测试 - outputs = [t.model.zero_shot_forward(input, self.device)[0] for t in trainer_list] - output = sum(outputs) / len(outputs) - outputs.append(output) - image_features_list.append(image_features) - img_paths.append(impath) - sstrain_outputs = torch.cat(outputs, dim=0) - sstrain_img_paths = np.concatenate(img_paths, axis=0) - image_features = torch.cat(image_features_list, axis=0) - # text_features = torch.cat(text_features, axis=0) - print('image_features', image_features.shape) - print('text_features', text_features.shape) - predict_label_dict, _ = select_top_k_similarity_per_class(sstrain_outputs, sstrain_img_paths, -1, image_features, True, random_seed=self.cfg.SEED) - save_outputs(self.train_loader_x, self, predict_label_dict, self.cfg.DATASET.NAME, text_features, backbone_name=self.cfg.MODEL.BACKBONE.NAME) - caculate_noise_rate_analyze(predict_label_dict, train_loader=self.train_loader_x, trainer=self) - return predict_label_dict - - - def load_from_exist_file(self, file_path, model_names): - logits = None - for model in model_names: - model_path = os.path.join(file_path, model) - logist_path = os.path.join(model_path, '{}_logits.pt'.format(self.cfg.DATASET.NAME)) - if logits is None: - logits = torch.load(logist_path) - else: - logits += torch.load(logist_path) - - info_path = os.path.join(model_path, '{}.json'.format(self.cfg.DATASET.NAME)) - info = json.load(open(info_path)) - items = [] - for c in info: - for img_path in info[c]: - item = info[c][img_path] - items.append([img_path, int(item[3])]) # 路径 序号 - sorted(items, key=(lambda x:x[1])) - sstrain_img_paths = np.array(items)[:,0] - - logits /= len(model_names) - predict_label_dict, predict_conf_dict = select_top_k_similarity_per_class_with_noisy_label(logits, sstrain_img_paths, - K=self.cfg.DATASET.NUM_SHOTS, is_softmax=False, - random_seed=self.cfg.SEED, gt_label_dict=self.gt_label_dict, - num_fp=self.cfg.TRAINER.UPLTrainer.NUM_FP) - return predict_label_dict, predict_conf_dict - - @torch.no_grad() - def zero_shot_predict(self, trainer_list=None): - """A generic predicting pipeline.""" - self.set_model_mode("eval") - self.model.eval() - self.evaluator.reset() - - save_path = os.path.join(self.cfg.TEST.Analyze_Result_Path, self.cfg.DATASET.NAME, - str(self.cfg.OPTIM.MAX_EPOCH)+'_'+str(self.cfg.SEED)+'_'+str(self.cfg.DATASET.NUM_SHOTS)) - if not os.path.exists(save_path): - os.makedirs(save_path) - - data_loader = self.train_loader_sstrain - - outputs = [] - img_paths = [] - - - for batch_idx, batch in tqdm(enumerate(data_loader)): - input, label, impath = self.parse_batch_test_with_impath(batch) - if trainer_list is None or len(trainer_list)==1: - # 如果不是ensemble的测试 - output, image_features, text_features = self.model.zero_shot_forward(input, self.device) - else: - # ensemble的测试 - outputs = [t.model.zero_shot_forward(input, self.device)[0] for t in trainer_list] - output = sum(outputs) / len(outputs) - outputs.append(output) - img_paths.append(impath) - - - outputs = torch.cat(outputs, dim=0) - img_paths = np.concatenate(img_paths, axis=0) - - - # 尽力维持类别平衡 - if self.cfg.DATASET.CLASS_EQULE is True: - if self.cfg.DATASET.CONF_THRESHOLD > 0: - # 选择置信度大于一定值 & 选择 - predict_label_dict_1, predict_conf_dict_1 = select_top_k_similarity_per_class(outputs, img_paths, K=self.cfg.DATASET.NUM_SHOTS) - predict_label_dict_2, predict_conf_dict_2 = select_top_by_value(outputs, img_paths, conf_threshold=self.cfg.DATASET.CONF_THRESHOLD) - - print(len(predict_label_dict_1), 'predict_label_dict_1') - print(len(predict_label_dict_2), 'predict_label_dict_2') - - predict_label_dict = dict(predict_label_dict_1, **predict_label_dict_2) - predict_conf_dict = dict(predict_conf_dict_1, **predict_conf_dict_2) - caculate_noise_rate(predict_label_dict, train_loader=self.train_loader_x, trainer=self) - print('select {} samples'.format(len(predict_label_dict))) - - else: - print("K {} shots".format(self.cfg.DATASET.NUM_SHOTS)) - predict_label_dict, predict_conf_dict = select_top_k_similarity_per_class(outputs, img_paths, K=self.cfg.DATASET.NUM_SHOTS) - caculate_noise_rate(predict_label_dict, train_loader=self.train_loader_x, trainer=self) - print('select {} samples'.format(len(predict_label_dict))) - - else: - print("K", self.cfg.DATASET.NUM_SHOTS*text_features.shape[0]) - predict_label_dict, predict_conf_dict = select_top_k_similarity(outputs, img_paths, K=self.cfg.DATASET.NUM_SHOTS*text_features.shape[0]) - caculate_noise_rate(predict_label_dict, train_loader=self.train_loader_x, trainer=self) - print('select {} samples'.format(len(predict_label_dict))) - return predict_label_dict, predict_conf_dict - - @torch.no_grad() - def zero_shot_test(self, split=None, trainer_list=None): - """A generic predicting pipeline.""" - - self.set_model_mode("eval") - self.evaluator.reset() - - save_path = os.path.join(self.cfg.TEST.Analyze_Result_Path, self.cfg.DATASET.NAME, - str(self.cfg.OPTIM.MAX_EPOCH)+'_'+str(self.cfg.SEED)+'_'+str(self.cfg.DATASET.NUM_SHOTS)) - if not os.path.exists(save_path): - os.makedirs(save_path) - - results_id = 0 - while os.path.exists(os.path.join(save_path, 'per_image_results_{}_{}.txt'.format(split, results_id))): - results_id += 1 - self.per_image_txt_writer = open(os.path.join(save_path, 'per_image_results_{}_{}.txt'.format(split, results_id)), 'w') - self.per_class_txt_writer = open(os.path.join(save_path, 'per_class_results_{}_{}.txt'.format(split, results_id)), 'w') - - if split is None: - split = self.cfg.TEST.SPLIT - - save_path = os.path.join(self.cfg.TEST.Analyze_Result_Path, self.cfg.DATASET.NAME, - str(self.cfg.OPTIM.MAX_EPOCH)+'_'+str(self.cfg.SEED)+'_'+str(self.cfg.DATASET.NUM_SHOTS)) - if not os.path.exists(save_path): - os.makedirs(save_path) - - if split == "val" and self.val_loader is not None: - data_loader = self.val_loader - print("Do evaluation on {} set".format(split)) - elif split=="novel": - data_loader = self.test_novel_loader - print("Do evaluation on test novel set") - elif split=="base": - data_loader = self.test_base_loader - print("Do evaluation on test base set") - elif split=="all": - data_loader = self.test_loader - print("Do evaluation on test set") - elif split=="train": - data_loader = self.train_loader_x - print("Do evaluation on train set") - else: - data_loader = self.test_loader - print("Do evaluation on test set") - - for batch_idx, batch in enumerate(data_loader): - input, label, impath = self.parse_batch_test_with_impath(batch) - if trainer_list is None or len(trainer_list)==1: - # 如果不是ensemble的测试 - output, image_features, text_features = self.model.zero_shot_forward(input, self.device) - else: - # ensemble的测试 - outputs = [t.model.zero_shot_forward(input, self.device)[0] for t in trainer_list] - output = sum(outputs) / len(outputs) - self.evaluator.process(output, label, self.per_image_txt_writer, self.per_class_txt_writer) - results = self.evaluator.evaluate() - - for k, v in results.items(): - tag = "{}/{}".format(split, k) - self.write_scalar(tag, v, self.epoch) - - self.per_image_txt_writer.close() - self.per_class_txt_writer.close() - - return list(results.values())[0] - - - - def build_data_loader(self): - """Create essential data-related attributes. - - A re-implementation of this method must create the - same attributes (except self.dm). - """ - _, preprocess = clip.load(self.cfg.MODEL.BACKBONE.NAME) - dm = UPLDataManager(self.cfg, custom_tfm_test=preprocess) - # _, preprocess = clip.load(self.cfg.MODEL.BACKBONE.NAME) - # dm = UPLDataManager(self.cfg, custom_tfm_test=preprocess) - - self.train_loader_x = dm.train_loader_x - self.train_loader_u = dm.train_loader_u # optional, can be None - self.val_loader = dm.val_loader # optional, can be None - self.test_loader = dm.test_loader - self.train_loader_sstrain = dm.train_loader_sstrain - self.num_classes = dm.num_classes - self.num_source_domains = dm.num_source_domains - self.lab2cname = dm.lab2cname # dict {label: classname} - - if self.cfg.DATALOADER.OPEN_SETTING: - self.test_novel_loader = dm.test_novel_loader - self.test_base_loader = dm.test_base_loader - - - self.dm = dm - - def sstrain_with_id(self, model_id): - self.sstrain(self.start_epoch, self.max_epoch, model_id) - - def sstrain(self, start_epoch, max_epoch, model_id): - """Generic training loops.""" - self.start_epoch = start_epoch - self.max_epoch = max_epoch - - self.before_train() - for self.epoch in range(self.start_epoch, self.max_epoch): - self.before_epoch() - self.run_epoch_with_sstrain() - self.after_epoch(model_id) - self.after_train(model_id) - - def run_epoch_with_sstrain(self): - self.set_model_mode("train") - losses = MetricMeter() - batch_time = AverageMeter() - data_time = AverageMeter() - self.num_batches = len(self.train_loader_sstrain) - - end = time.time() - for self.batch_idx, batch in enumerate(self.train_loader_sstrain): - data_time.update(time.time() - end) - loss_summary = self.forward_backward(batch) - batch_time.update(time.time() - end) - losses.update(loss_summary) - - if ( - self.batch_idx + 1 - ) % self.cfg.TRAIN.PRINT_FREQ == 0 or self.num_batches < self.cfg.TRAIN.PRINT_FREQ: - nb_remain = 0 - nb_remain += self.num_batches - self.batch_idx - 1 - nb_remain += ( - self.max_epoch - self.epoch - 1 - ) * self.num_batches - eta_seconds = batch_time.avg * nb_remain - eta = str(datetime.timedelta(seconds=int(eta_seconds))) - print( - "epoch [{0}/{1}][{2}/{3}]\t" - "time {batch_time.val:.3f} ({batch_time.avg:.3f})\t" - "data {data_time.val:.3f} ({data_time.avg:.3f})\t" - "eta {eta}\t" - "{losses}\t" - "lr {lr:.6e}".format( - self.epoch + 1, - self.max_epoch, - self.batch_idx + 1, - self.num_batches, - batch_time=batch_time, - data_time=data_time, - eta=eta, - losses=losses, - lr=self.get_current_lr(), - ) - ) - - n_iter = self.epoch * self.num_batches + self.batch_idx - for name, meter in losses.meters.items(): - self.write_scalar("train/" + name, meter.avg, n_iter) - self.write_scalar("train/lr", self.get_current_lr(), n_iter) - - end = time.time() - - def after_epoch(self, model_id): - last_epoch = (self.epoch + 1) == self.max_epoch - do_test = not self.cfg.TEST.NO_TEST - meet_checkpoint_freq = ( - (self.epoch + 1) % self.cfg.TRAIN.CHECKPOINT_FREQ == 0 - if self.cfg.TRAIN.CHECKPOINT_FREQ > 0 else False - ) - # if ((self.epoch + 1) % 5) == 0 and self.cfg.DATASET.NAME!="SSImageNet": - # curr_result = self.test(split="test") - if do_test and self.cfg.TEST.FINAL_MODEL == "best_val": - curr_result = self.test(split="val") - is_best = curr_result > self.best_result - if is_best: - self.best_result = curr_result - self.save_model( - self.epoch, - self.output_dir, - model_name="model-best-{}.pth.tar".format(model_id) - ) - - if meet_checkpoint_freq or last_epoch: - self.save_model( - self.epoch, - self.output_dir, - model_name="model-best-{}.pth.tar".format(model_id) - ) - - def after_train(self, model_id): - print("Finished training") - - do_test = not self.cfg.TEST.NO_TEST - if do_test: - if self.cfg.TEST.FINAL_MODEL == "best_val": - print("Deploy the model with the best val performance") - self.load_model_by_id(self.output_dir, model_id) - # self.test(split='novel') - # self.test(split='base') - # self.test(split='train') - self.test(split='test') - - - # Show elapsed time - elapsed = round(time.time() - self.time_start) - elapsed = str(datetime.timedelta(seconds=elapsed)) - print("Elapsed: {}".format(elapsed)) - - # Close writer - self.close_writer() - - def parse_batch_test(self, batch): - input = batch["img"] - label = batch["label"] - - input = input.to(self.device) - label = label.to(self.device) - - return input, label - - def parse_batch_test_with_impath(self, batch): - input = batch["img"] - label = batch["label"] - impath = batch["impath"] - - input = input.to(self.device) - label = label.to(self.device) - # impath = impath.to(self.device) - - return input, label, impath - - @torch.no_grad() - def test_with_existing_logits(self, logits, split='test'): - - - self.set_model_mode("eval") - self.evaluator.reset() - - save_path = os.path.join(self.cfg.TEST.Analyze_Result_Path, self.cfg.DATASET.NAME, - str(self.cfg.OPTIM.MAX_EPOCH)+'_'+str(self.cfg.SEED)+'_'+str(self.cfg.DATASET.NUM_SHOTS)+'_random_init'+str(self.cfg.TRAINER.UPLTrainer.CLASS_TOKEN_POSITION)) - if not os.path.exists(save_path): - os.makedirs(save_path) - - results_id = 0 - while os.path.exists(os.path.join(save_path, 'per_image_results_{}_{}.txt'.format(split, results_id))): - results_id += 1 - self.per_image_txt_writer = open(os.path.join(save_path, 'per_image_results_{}_{}.txt'.format(split, results_id)), 'w') - self.per_class_txt_writer = open(os.path.join(save_path, 'per_class_results_{}_{}.txt'.format(split, results_id)), 'w') - - if split is None: - split = self.cfg.TEST.SPLIT - - if split == "val" and self.val_loader is not None: - data_loader = self.val_loader - print("Do evaluation on {} set".format(split)) - elif split=="novel": - data_loader = self.test_novel_loader - print("Do evaluation on test novel set") - elif split=="base": - data_loader = self.test_base_loader - print("Do evaluation on test base set") - elif split=="all": - data_loader = self.test_loader - print("Do evaluation on test set") - else: - data_loader = self.test_loader - print("Do evaluation on test set") - - label_all = [] - for batch_idx, batch in enumerate(data_loader): - input, label = self.parse_batch_test(batch) - label_all.append(label) - label_all = torch.hstack(label_all) - print(label_all.shape) - - self.evaluator.process(logits, label_all, self.per_image_txt_writer, self.per_class_txt_writer) - results = self.evaluator.evaluate() - for k, v in results.items(): - tag = "{}/{}".format(split, k) - self.write_scalar(tag, v, self.epoch) - - return results diff --git a/trainers/utils.py b/trainers/utils.py index 431efef2..1b769673 100644 --- a/trainers/utils.py +++ b/trainers/utils.py @@ -6,9 +6,9 @@ import torch from sklearn.metrics import precision_recall_curve from collections import OrderedDict -import pdb import random import time +import pdb import math def plotLogitsMap(outputs, label, save_path, fig_title, max_lines=1000): @@ -20,11 +20,11 @@ def plotLogitsMap(outputs, label, save_path, fig_title, max_lines=1000): pred = outputs.max(1)[1] matches = pred.eq(label).float() output_m = np.sort(output_m) - output_m = output_m[:,::-1] # 从大到小排序 - output_m = output_m[:,:5] # 取前五个 + output_m = output_m[:,::-1] + output_m = output_m[:,:5] output_m_index = output_m[:,0].argsort() output_m = output_m[output_m_index] - output_m = output_m[::-1,:] # 按第一列从大到小排序 + output_m = output_m[::-1,:] matches = matches[output_m_index] matches = torch.flip(matches, dims=[0]) matches = matches.cpu().detach().numpy() @@ -64,19 +64,15 @@ def plotPRMap(outputs, label, save_path, fig_title): pred = outputs.max(1)[1] matches = pred.eq(label).float() output_m = np.sort(output_m) - output_m = output_m[:,::-1] # 从大到小排序 - output_m = output_m[:,:5] # 取前五个 + output_m = output_m[:,::-1] + output_m = output_m[:,:5] output_m_index = output_m[:,0].argsort() output_m = output_m[output_m_index] - output_m = output_m[::-1,:] # 按第一列从大到小排序 + output_m = output_m[::-1,:] matches = matches[output_m_index] matches = torch.flip(matches, dims=[0]) matches = matches.cpu().detach().numpy() - # print(output_m[:,0].shape, matches.shape) precision, recall, thresholds = precision_recall_curve(matches, output_m[:,0]) - # print(precision) - # print(recall) - # print(thresholds, len(thresholds)) plt.plot(recall, precision) step = 0 @@ -102,7 +98,7 @@ def select_top_k_similarity_per_class(outputs, img_paths, K=1, image_features=No img_paths = img_paths[output_m_max_id] output_m_max = output_m_max[output_m_max_id] output_ori = output_ori[output_m_max_id] - ids = (-output_m).argsort()[:, 0] # 获得每行的类别标签 + ids = (-output_m).argsort()[:, 0] if image_features is not None: image_features = image_features.cpu().detach() @@ -111,11 +107,11 @@ def select_top_k_similarity_per_class(outputs, img_paths, K=1, image_features=No predict_label_dict = {} predict_conf_dict = {} from tqdm import tqdm - for id in tqdm(list(set(ids.tolist()))): # 标签去重 + for id in tqdm(list(set(ids.tolist()))): index = np.where(ids==id) - conf_class = output_m_max[index] # 置信度 + conf_class = output_m_max[index] output_class = output_ori[index] - img_paths_class = img_paths[index] # 每个类别的路径 + img_paths_class = img_paths[index] if image_features is not None: img_features = image_features[index] @@ -153,9 +149,8 @@ def select_top_k_similarity_per_class_with_noisy_label(img_paths, K=1, random_se gt_class_label_dict[indx] = np.array([]) for ip, gt_label in gt_label_dict.items(): gt_class_label_dict[gt_label] = np.append(gt_class_label_dict[gt_label], np.array(ip)) - # np.array(gt_class_label_dict[gt_label.item()], (np.array(ip))) img_paths_dict = {k: v for v, k in enumerate(img_paths)} - + fp_ids_chosen = set() predict_label_dict = {} rng = np.random.default_rng(seed=random_seed) acc_rate_dict = {} @@ -164,30 +159,33 @@ def select_top_k_similarity_per_class_with_noisy_label(img_paths, K=1, random_se tp_gt_all_img_index_dict = {} fp_gt_all_img_index_dict = {} fp_gt_all_img_index_list = [] - for id in tqdm(list(set(ids))): # 标签去重 - split = int(math.ceil((len(gt_class_label_dict[id]) * (1-(num_fp/K))))) + for id in tqdm(list(set(ids))): + # noisy lebels - fix candidates for 16 shot samples + split = int(math.ceil((len(gt_class_label_dict[id]) * (0.5)))) + # noisy lebels - fix candidates for 16 shot samples gt_class_img_index = [] for img in list(gt_class_label_dict[id]): gt_class_img_index.append(img_paths_dict[img]) - if num_fp == 0: - tp_gt_all_img_index_dict[id] = gt_class_img_index[:] - else: - tp_gt_all_img_index_dict[id] = gt_class_img_index[:split] + # if num_fp == 0: + # tp_gt_all_img_index_dict[id] = gt_class_img_index[:] + # else: + tp_gt_all_img_index_dict[id] = gt_class_img_index[:split] fp_gt_all_img_index_dict[id] = gt_class_img_index[split:] fp_gt_all_img_index_list.extend(gt_class_img_index[split:]) fp_gt_all_img_index_set = set(fp_gt_all_img_index_list) # noisy lebels - split data into TP and FP sets - for id in tqdm(list(set(ids))): # 标签去重 + for id in tqdm(list(set(ids))): gt_class_img_index = [] for img in list(gt_class_label_dict[id]): gt_class_img_index.append(img_paths_dict[img]) # noisy lebels - randomly draw FP samples with their indice gt_class_img_index = tp_gt_all_img_index_dict[id] - fp_ids_set = fp_gt_all_img_index_set.difference(gt_class_img_index, fp_gt_all_img_index_dict[id]) + fp_ids_set = fp_gt_all_img_index_set.difference(gt_class_img_index, fp_gt_all_img_index_dict[id], fp_ids_chosen) fp_ids = random.choices(list(fp_ids_set), k=num_fp) + fp_ids_chosen.update(fp_ids) # noisy lebels - randomly draw FP samples with their indice - img_paths_class = img_paths[gt_class_img_index] # 每个类别的路径 + img_paths_class = img_paths[gt_class_img_index] if K >= 0: if len(img_paths_class) < K: is_replace=True @@ -201,7 +199,6 @@ def select_top_k_similarity_per_class_with_noisy_label(img_paths, K=1, random_se # noisy lebels - - dilute with FP samples print('---',id) print(img_paths_class) - total = 0 correct = 0 for img_path in (img_paths_class): @@ -238,17 +235,17 @@ def select_by_conf(outputs, img_paths, K=1, conf_threshold=None, is_softmax=True img_paths = img_paths[output_m_max_id] output_m_max = output_m_max[output_m_max_id] output_ori = output_ori[output_m_max_id] - ids = (-output_m).argsort()[:, 0] # 获得每行的类别标签 + ids = (-output_m).argsort()[:, 0] predict_label_dict = {} predict_conf_dict = {} from tqdm import tqdm - for id in tqdm(list(set(ids.tolist()))): # 标签去重 + for id in tqdm(list(set(ids.tolist()))): index = np.where(ids==id) - conf_class = output_m_max[index] # 置信度 + conf_class = output_m_max[index] output_class = output_ori[index] - img_paths_class = img_paths[index] # 每个类别的路径 + img_paths_class = img_paths[index] for img_path, conf in zip(img_paths_class, conf_class): if conf > conf_threshold: @@ -257,7 +254,6 @@ def select_by_conf(outputs, img_paths, K=1, conf_threshold=None, is_softmax=True return predict_label_dict, predict_conf_dict def select_top_k_similarity(outputs, img_paths, K=1, image_features=None, repeat=False): - # print(outputs.shape) outputs = torch.nn.Softmax(dim=1)(outputs) output_m = outputs.cpu().detach().numpy() output_ori = outputs.cpu().detach() @@ -265,10 +261,9 @@ def select_top_k_similarity(outputs, img_paths, K=1, image_features=None, repeat output_m_max_id = np.argsort(-output_m_max) output_m = output_m[output_m_max_id] img_paths = img_paths[output_m_max_id] - # output_m_max = output_m_max[output_m_max_id] output_ori = output_ori[output_m_max_id] - conf_class = output_m_max[output_m_max_id] # 置信度 - ids = (-output_m).argsort()[:, 0] # 获得每行的类别标签 + conf_class = output_m_max[output_m_max_id] + ids = (-output_m).argsort()[:, 0] if image_features is not None: image_features = image_features.cpu().detach() @@ -304,10 +299,9 @@ def select_top_by_value(outputs, img_paths, conf_threshold=0.95, image_features= output_m_max_id = np.argsort(-output_m_max) output_m = output_m[output_m_max_id] img_paths = img_paths[output_m_max_id] - # output_m_max = output_m_max[output_m_max_id] output_ori = output_ori[output_m_max_id] - conf_class = output_m_max[output_m_max_id] # 置信度 - ids = (-output_m).argsort()[:, 0] # 获得每行的类别标签 + conf_class = output_m_max[output_m_max_id] + ids = (-output_m).argsort()[:, 0] if image_features is not None: image_features = image_features.cpu().detach() @@ -337,8 +331,6 @@ def caculate_noise_rate(predict_label_dict, train_loader, trainer, sample_level= ip = './data/' + ip.split('/data/')[1] gt_label_dict[ip] = l - # print('gt_label_dict', len(gt_label_dict)) - # print('gt_label_dict', gt_label_dict) total = 0 correct = 0 for item in predict_label_dict: @@ -368,7 +360,7 @@ def caculate_noise_rate_analyze(predict_label_dict, train_loader, trainer, sampl print('Acc Rate {:.4f}'.format(correct/total)) -def save_outputs(train_loader, trainer, predict_label_dict, dataset_name, text_features, backbone_name=None): +def save_outputs(train_loader, trainer, predict_label_dict, dataset_name, text_features, backbone_name=None, tag='', seed =''): backbone_name = backbone_name.replace('/', '-') gt_pred_label_dict = {} for batch_idx, batch in enumerate(train_loader): @@ -417,20 +409,20 @@ def save_outputs(train_loader, trainer, predict_label_dict, dataset_name, text_f v_features = torch.vstack(v_features) logits_tensor = torch.vstack(logits_list) - if not os.path.exists('./analyze_results/{}/'.format(backbone_name)): - os.makedirs('./analyze_results/{}/'.format(backbone_name)) + if not os.path.exists('./analyze_results/{}{}/'.format(backbone_name, tag)): + os.makedirs('./analyze_results/{}{}/'.format(backbone_name, tag)) - torch.save(v_features, './analyze_results/{}/{}_v_feature.pt'.format(backbone_name, dataset_name)) - torch.save(text_features, './analyze_results/{}/{}_l_feature.pt'.format(backbone_name, dataset_name)) - torch.save(logits_tensor, './analyze_results/{}/{}_logits.pt'.format(backbone_name, dataset_name)) + torch.save(v_features, './analyze_results/{}{}/{}_v_feature.pt'.format(backbone_name, tag, dataset_name, seed)) + torch.save(text_features, './analyze_results/{}{}/{}_l_feature{}.pt'.format(backbone_name, tag, dataset_name, seed)) + torch.save(logits_tensor, './analyze_results/{}{}/{}_logits{}.pt'.format(backbone_name, tag, dataset_name, seed)) - with open("./analyze_results/{}/{}.json".format(backbone_name, dataset_name), "w") as outfile: + with open("./analyze_results/{}{}/{}{}.json".format(backbone_name, tag, dataset_name, seed), "w") as outfile: json.dump(v_distance_dict, outfile) + def select_top_k_similarity_per_class_with_high_conf(outputs, img_paths, K=1, image_features=None, repeat=False): - # print(outputs.shape) outputs = torch.nn.Softmax(dim=1)(outputs) output_m = outputs.cpu().detach().numpy() output_ori = outputs.cpu().detach() @@ -440,16 +432,14 @@ def select_top_k_similarity_per_class_with_high_conf(outputs, img_paths, K=1, im img_paths = img_paths[output_m_max_id] output_m_max = output_m_max[output_m_max_id] output_ori = output_ori[output_m_max_id] - ids = (-output_m).argsort()[:, 0] # 获得每行的类别标签 + ids = (-output_m).argsort()[:, 0] - # 计算类别平均置信度 class_avg_conf = {} - for id in list(set(ids.tolist())): # 标签去重 + for id in list(set(ids.tolist())): index = np.where(ids==id) - conf_class = output_m_max[index] # 置信度 + conf_class = output_m_max[index] class_avg_conf[id] = conf_class.sum() / conf_class.size - # print(class_avg_conf[id]) selected_ids = sorted(class_avg_conf.items(), key = lambda kv:(kv[1], kv[0]), reverse=True)[:int(0.8*len(class_avg_conf))] remain_ids = sorted(class_avg_conf.items(), key = lambda kv:(kv[1], kv[0]), reverse=True)[int(0.8*len(class_avg_conf)):] @@ -464,14 +454,12 @@ def select_top_k_similarity_per_class_with_high_conf(outputs, img_paths, K=1, im predict_label_dict = {} predict_conf_dict = {} - # selected_ids.append(0) - for id in selected_ids: # 标签去重 + for id in selected_ids: index = np.where(ids==id) - conf_class = output_m_max[index] # 置信度 + conf_class = output_m_max[index] output_class = output_ori[index] - img_paths_class = img_paths[index] # 每个类别的路径 - + img_paths_class = img_paths[index] if image_features is not None: img_features = image_features[index] if K >= 0: @@ -515,20 +503,20 @@ def select_top_k_similarity_per_class_with_low_conf(outputs, img_paths, conf_thr img_paths = img_paths[output_m_max_id] output_m_max = output_m_max[output_m_max_id] output_ori = output_ori[output_m_max_id] - ids = (-output_m).argsort()[:, 0] # 获得每行的类别标签 + ids = (-output_m).argsort()[:, 0] predict_label_dict = {} predict_conf_dict = {} no_sample_ids = [] - for id in remain_ids_list: # 标签去重 + for id in remain_ids_list: # print(id) is_id_have_sample = False index = np.where(ids==id) - conf_class = output_m_max[index] # 置信度 + conf_class = output_m_max[index] output_class = output_ori[index] - img_paths_class = img_paths[index] # 每个类别的路径 + img_paths_class = img_paths[index] if K >= 0: for img_path, conf in zip(img_paths_class[:K], conf_class[:K]): @@ -541,7 +529,6 @@ def select_top_k_similarity_per_class_with_low_conf(outputs, img_paths, conf_thr for img_path, conf in zip(img_paths_class, conf_class): predict_label_dict[img_path] = id predict_conf_dict[img_path] = conf - # print(is_id_have_sample) if is_id_have_sample is False: no_sample_ids.append(id) @@ -549,7 +536,6 @@ def select_top_k_similarity_per_class_with_low_conf(outputs, img_paths, conf_thr return predict_label_dict, predict_conf_dict, no_sample_ids def select_top_k_similarity_per_class_no_smaple(outputs, img_paths, no_sample_ids, K=16): - # print(outputs.shape) outputs = torch.nn.Softmax(dim=1)(outputs) output_m = outputs.cpu().detach().numpy() output_ori = outputs.cpu().detach() @@ -559,18 +545,18 @@ def select_top_k_similarity_per_class_no_smaple(outputs, img_paths, no_sample_id img_paths = img_paths[output_m_max_id] output_m_max = output_m_max[output_m_max_id] output_ori = output_ori[output_m_max_id] - ids = (-output_m).argsort()[:, 0] # 获得每行的类别标签 + ids = (-output_m).argsort()[:, 0] predict_label_dict = {} predict_conf_dict = {} - for id in no_sample_ids: # 标签去重 + for id in no_sample_ids: print(id) index = np.where(ids==id) - conf_class = output_m_max[index] # 置信度 + conf_class = output_m_max[index] output_class = output_ori[index] - img_paths_class = img_paths[index] # 每个类别的路径 + img_paths_class = img_paths[index] if K >= 0: for img_path, conf in zip(img_paths_class[:K], conf_class[:K]): @@ -581,20 +567,3 @@ def select_top_k_similarity_per_class_no_smaple(outputs, img_paths, no_sample_id predict_label_dict[img_path] = id predict_conf_dict[img_path] = conf return predict_label_dict, predict_conf_dict - - -def read_wordnet_classnames(classname_CLS, text_file): - """Return a list containing - class name, WordNet name,... - """ - classnames_dict = OrderedDict() - for CLS in classname_CLS: - classnames_dict[CLS] = None - with open(text_file, "r") as f: - lines = f.readlines() - for line in lines: - line = line.strip().split(",") - classnames_dict[line[0]] = ",".join(line[0:]) - classnames = [classnames_dict[key] for key in classnames_dict.keys()] - return classnames - diff --git a/upl_test.py b/upl_test.py index db6ea758..f4cb128b 100644 --- a/upl_test.py +++ b/upl_test.py @@ -146,11 +146,11 @@ def main(args): prob_end = [] results_dict = {} - for SHOTS in [16]: + for SHOTS in [4]: print('SHOTS:', SHOTS, '\n\n\n\n\n') - for seed in range(1, 17): + for seed in range(1, 5): try: - prob = torch.load('./analysis_results_test/{}/fp{}/50_{}_{}_random_initend/test_logits.pt'.format(cfg.DATASET.NAME, args.num_fp, seed, SHOTS)) + prob = torch.load('./analysis_results_test/{}/{}/fp{}/50_{}_{}_random_initend/test_logits.pt'.format(args.tag, cfg.DATASET.NAME, args.num_fp, seed, SHOTS)) prob_end.append(prob) except: print('loss') @@ -159,6 +159,9 @@ def main(args): prob_test = sum(prob_end) / len(prob_end) results_end = trainer_list[0].test_with_existing_logits(prob_test) + save_path = './analysis_results_test/{}/{}/fp{}/'.format(args.tag, cfg.DATASET.NAME, args.num_fp) + with open(os.path.join(save_path, 'ensemble_accuracy_result.txt'), "w") as f: + print('ensemble: {:.2f}'.format((results_end['accuracy'])), file=f) print('ensemble: {:.2f}'.format((results_end['accuracy']))) results_dict[len(prob_end)] = results_end['accuracy'] @@ -245,5 +248,11 @@ def main(args): nargs=argparse.REMAINDER, help='modify config options using the command-line' ) + parser.add_argument( + '--tag', + type=str, + default='', + help='tag for method' + ) args = parser.parse_args() main(args) diff --git a/upl_train.py b/upl_train.py index 4cae821f..07c2c1b3 100644 --- a/upl_train.py +++ b/upl_train.py @@ -145,7 +145,6 @@ def main(args): trainer.dm.update_ssdateloader(predict_label_dict) trainer.train_loader_sstrain = trainer.dm.train_loader_sstrain trainer.sstrain_with_id(model_id=i) - # trainer.linear_probe() if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -229,4 +228,4 @@ def main(args): help='modify config options using the command-line' ) args = parser.parse_args() - main(args) + main(args) \ No newline at end of file