You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hi~, HPSv2 is really nice work. But when I reproduce the v2.1 benchmark, I can not get the same results reported in your readme. Could you tell me how to fix it please? These are my codes of jupyter notebook:
import torch
from PIL import Image
import hpsv2
from hpsv2.src.open_clip import create_model_and_transforms, get_tokenizer
import warnings
import argparse
import os
import requests
from clint.textui import progress
from typing import Union
import huggingface_hub
from hpsv2.utils import root_path, hps_version_map
#warnings.filterwarnings("ignore", category=UserWarning)
def score(model, img_path, prompt) -> list:
if isinstance(img_path, list):
result = []
for one_img_path in img_path:
# Load your image and prompt
with torch.no_grad():
# Process the image
if isinstance(one_img_path, str):
image = preprocess_val(Image.open(one_img_path)).unsqueeze(0).to(device=device, non_blocking=True)
elif isinstance(one_img_path, Image.Image):
image = preprocess_val(one_img_path).unsqueeze(0).to(device=device, non_blocking=True)
else:
raise TypeError('The type of parameter img_path is illegal.')
# Process the prompt
text = tokenizer([prompt]).to(device=device, non_blocking=True)
# Calculate the HPS
with torch.cuda.amp.autocast():
outputs = model(image, text)
image_features, text_features = outputs["image_features"], outputs["text_features"]
logits_per_image = image_features @ text_features.T
hps_score = torch.diagonal(logits_per_image).cpu().numpy()
result.append(hps_score[0])
return result
elif isinstance(img_path, str):
# Load your image and prompt
with torch.no_grad():
# Process the image
image = preprocess_val(Image.open(img_path)).unsqueeze(0).to(device=device, non_blocking=True)
# Process the prompt
text = tokenizer([prompt]).to(device=device, non_blocking=True)
# Calculate the HPS
with torch.cuda.amp.autocast():
outputs = model(image, text)
image_features, text_features = outputs["image_features"], outputs["text_features"]
logits_per_image = image_features @ text_features.T
hps_score = torch.diagonal(logits_per_image).cpu().numpy()
return [hps_score[0]]
elif isinstance(img_path, Image.Image):
# Load your image and prompt
with torch.no_grad():
# Process the image
image = preprocess_val(img_path).unsqueeze(0).to(device=device, non_blocking=True)
# Process the prompt
text = tokenizer([prompt]).to(device=device, non_blocking=True)
# Calculate the HPS
with torch.cuda.amp.autocast():
outputs = model(image, text)
image_features, text_features = outputs["image_features"], outputs["text_features"]
logits_per_image = image_features @ text_features.T
hps_score = torch.diagonal(logits_per_image).cpu().numpy()
return [hps_score[0]]
else:
raise TypeError('The type of parameter img_path is illegal.')
For easily running with every image once, I split the original codes
model_dict = {}
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model, preprocess_train, preprocess_val = create_model_and_transforms(
'ViT-H-14',
'laion2B-s32B-b79K',
precision='amp',
device=device,
jit=False,
force_quick_gelu=False,
force_custom_text=False,
force_patch_dropout=False,
force_image_size=None,
pretrained_image=False,
image_mean=None,
image_std=None,
light_augmentation=True,
aug_cfg={},
output_dict=True,
with_score_predictor=False,
with_region_predictor=False
)
model_dict['model'] = model
model_dict['preprocess_val'] = preprocess_val
checkpoint = os.path.join(root_path,'HPS_v2_compressed.pt')
cp = None
hps_version = "v2.1"
model = model_dict['model']
preprocess_val = model_dict['preprocess_val']
# check if the checkpoint exists
if not os.path.exists(root_path):
os.makedirs(root_path)
if cp is None:
cp = huggingface_hub.hf_hub_download("xswu/HPSv2", hps_version_map[hps_version])
checkpoint = torch.load(cp, map_location=device)
model.load_state_dict(checkpoint['state_dict'])
tokenizer = get_tokenizer('ViT-H-14')
model = model.to(device)
model.eval()
Then I download the test data and reproduce the results of each categories(for example, photo).
from numpy import *
prompts = ["A man taking a drink from a water fountain.", ...]
root = '/my_path/HPDv2/SDXL-refiner-0.9/photo'
imgs = os.listdir(root)
imgs.sort()
ret = []
for i,n in enumerate(imgs):
print(n, prompts[i])
s = score(model, os.path.join(root, n), prompts[i])
ret.append(s)
print(mean(ret))
And I get (31.52 v.s. 33.26) for anime, (26.51 v.s. 28.38 ) for photo.
The text was updated successfully, but these errors were encountered:
Hi~, HPSv2 is really nice work. But when I reproduce the v2.1 benchmark, I can not get the same results reported in your readme. Could you tell me how to fix it please? These are my codes of jupyter notebook:
For easily running with every image once, I split the original codes
Then I download the test data and reproduce the results of each categories(for example, photo).
And I get (31.52 v.s. 33.26) for anime, (26.51 v.s. 28.38 ) for photo.
The text was updated successfully, but these errors were encountered: