Some questions about inference.py

I tested the videos in UCF101 on the trained VGG19 network (the training accuracy rate has reached 85%), but the results I got were all wrong classification.

May I ask what might be the reason? Thank you very much.

Here is my revised code, thank you very much.


`import torch
import numpy as np
#from network import C3D_model
import cv2
from torch import nn
from torchvision import transforms
from PIL import Image
import config
import os
from model import CNNEncoder, RNNDecoder
torch.backends.cudnn.benchmark = True

def CenterCrop(frame, size):
    h, w = np.shape(frame)[0:2]
    th, tw = size
    x1 = int(round((w - tw) / 2.))
    y1 = int(round((h - th) / 2.))

    frame = frame[y1:y1 + th, x1:x1 + tw, :]
    return np.array(frame).astype(np.uint8)


def center_crop(frame):
    # frame = frame[8:120, 30:142, :]
    return np.array(frame).astype(np.uint8)

def transform(self, img):
    return transforms.Compose([
        transforms.Resize((config.img_w, config.img_h)),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])(img)
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Device being used:", device)

    with open('./data/ucf_labels.txt', 'r') as f:
        class_names = f.readlines()
        f.close()

    # init models
    #model = C3D_model.C3D(num_classes=101)
    
    model = nn.Sequential(
        CNNEncoder(**config.cnn_encoder_params),
        RNNDecoder(**config.rnn_decoder_params)
    )

    file_epath ='./checkpoints/VGG19'
    filenames = os.listdir(file_epath)
    print(filenames)
    file_list = sorted(filenames, key=lambda x: os.path.getmtime(os.path.join(file_epath, x)))

    checkpoint = torch.load(os.path.join(file_epath, file_list[-1]), map_location=lambda storage, loc: storage)
    #print(checkpoint)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(device)
    model.eval()

    # read video
    video = 'G:/dataset/UCF-101/PlayingPiano/v_PlayingPiano_g01_c03.avi'
    cap = cv2.VideoCapture(video)
#     cap = cv2.VideoCapture(0)
    retaining = True

    clip = []
    while retaining:
        retaining, frame = cap.read()
        if not retaining and frame is None:
            continue
        # tmp=transform(frame)
        # tmp_ = center_crop(cv2.resize(frame, (171, 128)))
        tmp_ = center_crop(cv2.resize(frame, (config.img_w, config.img_h)))
        # tmp_ = CenterCrop(frame, (config.img_w, config.img_h))
        tmp = tmp_ - np.array([[[90.0, 98.0, 102.0]]])
        clip.append(tmp)
        if len(clip) == 20:
            inputs = np.array(clip).astype(np.float32)
            inputs = np.expand_dims(inputs, axis=0)
            # inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
            inputs = torch.from_numpy(inputs)
            inputs = torch.autograd.Variable(inputs, requires_grad=False).to(device)
            with torch.no_grad():
                outputs = model.forward(inputs)

            probs = torch.nn.Softmax(dim=1)(outputs)
            label = torch.max(probs, 1)[1].detach().cpu().numpy()[0]

            cv2.putText(frame, class_names[label].split(' ')[-1].strip(), (20, 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                        (0, 0, 255), 1)
            cv2.putText(frame, "prob: %.4f" % probs[0][label], (20, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                        (0, 0, 255), 1)
            clip.pop(0)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        cv2.imshow('result', frame)
        cv2.waitKey(30)

    cap.release()
    cv2.destroyAllWindows()


if __name__ == '__main__':
    main()

`

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Some questions about inference.py #54

cap = cv2.VideoCapture(0)

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Some questions about inference.py #54

Description

cap = cv2.VideoCapture(0)

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions