Description
I tested the videos in UCF101 on the trained VGG19 network (the training accuracy rate has reached 85%), but the results I got were all wrong classification.
May I ask what might be the reason? Thank you very much.
Here is my revised code, thank you very much.
`import torch
import numpy as np
#from network import C3D_model
import cv2
from torch import nn
from torchvision import transforms
from PIL import Image
import config
import os
from model import CNNEncoder, RNNDecoder
torch.backends.cudnn.benchmark = True
def CenterCrop(frame, size):
h, w = np.shape(frame)[0:2]
th, tw = size
x1 = int(round((w - tw) / 2.))
y1 = int(round((h - th) / 2.))
frame = frame[y1:y1 + th, x1:x1 + tw, :]
return np.array(frame).astype(np.uint8)
def center_crop(frame):
# frame = frame[8:120, 30:142, :]
return np.array(frame).astype(np.uint8)
def transform(self, img):
return transforms.Compose([
transforms.Resize((config.img_w, config.img_h)),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])(img)
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device being used:", device)
with open('./data/ucf_labels.txt', 'r') as f:
class_names = f.readlines()
f.close()
# init models
#model = C3D_model.C3D(num_classes=101)
model = nn.Sequential(
CNNEncoder(**config.cnn_encoder_params),
RNNDecoder(**config.rnn_decoder_params)
)
file_epath ='./checkpoints/VGG19'
filenames = os.listdir(file_epath)
print(filenames)
file_list = sorted(filenames, key=lambda x: os.path.getmtime(os.path.join(file_epath, x)))
checkpoint = torch.load(os.path.join(file_epath, file_list[-1]), map_location=lambda storage, loc: storage)
#print(checkpoint)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
model.eval()
# read video
video = 'G:/dataset/UCF-101/PlayingPiano/v_PlayingPiano_g01_c03.avi'
cap = cv2.VideoCapture(video)
cap = cv2.VideoCapture(0)
retaining = True
clip = []
while retaining:
retaining, frame = cap.read()
if not retaining and frame is None:
continue
# tmp=transform(frame)
# tmp_ = center_crop(cv2.resize(frame, (171, 128)))
tmp_ = center_crop(cv2.resize(frame, (config.img_w, config.img_h)))
# tmp_ = CenterCrop(frame, (config.img_w, config.img_h))
tmp = tmp_ - np.array([[[90.0, 98.0, 102.0]]])
clip.append(tmp)
if len(clip) == 20:
inputs = np.array(clip).astype(np.float32)
inputs = np.expand_dims(inputs, axis=0)
# inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
inputs = torch.from_numpy(inputs)
inputs = torch.autograd.Variable(inputs, requires_grad=False).to(device)
with torch.no_grad():
outputs = model.forward(inputs)
probs = torch.nn.Softmax(dim=1)(outputs)
label = torch.max(probs, 1)[1].detach().cpu().numpy()[0]
cv2.putText(frame, class_names[label].split(' ')[-1].strip(), (20, 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.6,
(0, 0, 255), 1)
cv2.putText(frame, "prob: %.4f" % probs[0][label], (20, 40),
cv2.FONT_HERSHEY_SIMPLEX, 0.6,
(0, 0, 255), 1)
clip.pop(0)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cv2.imshow('result', frame)
cv2.waitKey(30)
cap.release()
cv2.destroyAllWindows()
if name == 'main':
main()
`