Skip to content
This repository has been archived by the owner on Oct 31, 2023. It is now read-only.

implementing MAML with MiniImageNet #115

Open
ligeng0197 opened this issue Sep 23, 2021 · 0 comments
Open

implementing MAML with MiniImageNet #115

ligeng0197 opened this issue Sep 23, 2021 · 0 comments

Comments

@ligeng0197
Copy link

I slightly changed MAML_omniglot code to implement a version for MiniImageNet, however it seems my model learn with higher cannot earn improvement from training.
This is how I do it:
Firstly, I borrow the dataset and dataloader from https://github.com/dragen1860/MAML-Pytorch.
Then I tried to train model with total 10000 tasks and test every 500 tasks , and the accuracies of test are
[0.20550000000000002, 0.24100000000000002, 0.20800000000000002, 0.2245, 0.223, 0.23199999999999998, 0.22749999999999995, 0.21049999999999996, 0.2245, 0.24650000000000002, 0.233, 0.23600000000000002, 0.22699999999999998, 0.22349999999999998, 0.253, 0.25249999999999995, 0.22650000000000003, 0.2345, 0.22900000000000006, 0.22099999999999997]
The value makes me believe the model didn't learn useful information from training, and the detailed code is:

    stime = time.time()
    mini = MiniImagenet('miniimagenet/', mode='train', n_way=5, k_shot=1,
                        k_query=4,
                        batchsz=2000, resize=84)
    mini_test = MiniImagenet('miniimagenet/', mode='test', n_way=5, k_shot=1,
                             k_query=4,
                             batchsz=100, resize=84)
    db = DataLoader(mini, 1, shuffle=True, num_workers=8, pin_memory=True)
    model = Learner().cuda()
    meta_optim = Adam(model.parameters(),lr=0.01)
    INNER_TRAIN_NUMS = 5
    INNER_TEST_NUMS = 3
    TASK_NUMS = 4
    TEST_MIDDLE = 500
    accs = []
    for step, (x_spt, y_spt, x_qry, y_qry) in enumerate(db):

            x_spt, y_spt, x_qry, y_qry = x_spt.cuda(), y_spt.cuda(), x_qry.cuda(), y_qry.cuda()
            inner_optim = SGD(model.parameters(),lr=0.01)
            with higher.innerloop_ctx(model,inner_optim,copy_initial_weights=False) as (fmodel,foptim):
                for i in range(INNER_TRAIN_NUMS):
                    logits = fmodel(x_spt.squeeze())
                    loss = F.cross_entropy(logits,y_spt.squeeze())
                    foptim.step(loss)
                    acc = (logits.detach().argmax(dim=1)==y_spt.squeeze()).sum().item()/len(y_spt.squeeze())
                    print("train support acc:",acc)
                
                qlogits = fmodel(x_qry.squeeze())
                loss = F.cross_entropy(qlogits,y_qry.squeeze())
                loss.backward()
                acc = (qlogits.detach().argmax(dim=1)==y_qry.squeeze()).sum().item()/len(y_qry.squeeze())
                print("{} train query acc:".format(step),acc)
                # accs.append(acc)
            if step % TASK_NUMS == 0:
                for p in model.parameters():
                    p.grad.data = p.grad.data/TASK_NUMS
                meta_optim.step()
                meta_optim.zero_grad()
            
            if step % TEST_MIDDLE == 0:
                test_accs = []
                db_test = DataLoader(mini_test, 1, shuffle=True, num_workers=5, pin_memory=True)
                for step, (x_spt, y_spt, x_qry, y_qry) in enumerate(db_test):

                    x_spt, y_spt, x_qry, y_qry = x_spt.cuda(), y_spt.cuda(), x_qry.cuda(), y_qry.cuda()
                    inner_optim = SGD(model.parameters(),lr=0.01)
                    with higher.innerloop_ctx(model,inner_optim,copy_initial_weights=True,track_higher_grads=False) as (fmodel,foptim):
                        for i in range(INNER_TEST_NUMS):
                            logits = fmodel(x_spt.squeeze())
                            loss = F.cross_entropy(logits,y_spt.squeeze())
                            foptim.step(loss)
                            acc = (logits.detach().argmax(dim=1)==y_spt.squeeze()).sum().item()/len(y_spt.squeeze())
                            print("test support acc:",acc)
                        qlogits = fmodel(x_qry.squeeze())
                        loss = F.cross_entropy(qlogits,y_qry.squeeze())
                        acc = (qlogits.detach().argmax(dim=1)==y_qry.squeeze()).sum().item()/len(y_qry.squeeze())
                        print("test query acc:",acc)
                        test_accs.append(acc)
                print("TEST acc:",np.mean(test_accs))
                accs.append(np.mean(test_accs))
    print("test acc",accs)
    print("total time:",time.time()-stime)

and my model code is :

class Learner(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3,32,3)
        self.batchNorm1 = nn.BatchNorm2d(32,momentum=1, affine=True)
        self.conv2 = nn.Conv2d(32,32,3)
        self.batchNorm2 = nn.BatchNorm2d(32,momentum=1, affine=True)
        self.conv3 = nn.Conv2d(32,32,3)
        self.batchNorm3 = nn.BatchNorm2d(32,momentum=1, affine=True)
        self.conv4 = nn.Conv2d(32,32,3)
        self.batchNorm4 = nn.BatchNorm2d(32,momentum=1, affine=True)

        self.output = nn.Linear(32*9,5)
        
    def forward(self,input):
        X = self.conv1(input)
        X = self.batchNorm1(X)
        X = F.relu(X)
        X = F.max_pool2d(X,kernel_size=(2,2))
        # print(X.size())

        X = self.conv2(X)
        X = self.batchNorm2(X)
        X = F.relu(X)
        X = F.max_pool2d(X,kernel_size=(2,2))
        # print(X.size())

        X = self.conv3(X)
        X = self.batchNorm3(X)
        X = F.relu(X)
        X = F.max_pool2d(X,kernel_size=(2,2))

        X = self.conv4(X)
        X = self.batchNorm4(X)
        X = F.relu(X)
        X = F.max_pool2d(X,kernel_size=(2,2))

        X = self.output(X.reshape(X.size()[0],-1))
        return F.softmax(X,1)

Does anyone have any idea about what's wrong with my implementation? Thanks!

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant