Reinforcement Q-learning issue on GPU

when running on GPU,  torch.cuda.is_available() == True, I run into the following problem, in training loop:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-76-497fbef317fd> in <module>()
     50     for t in count():
     51         # Select and perform an action
---> 52         action = select_action(state)
     53         _, reward, done, _ = env.step(action[0,0])
     54         reward = torch.Tensor([reward])

<ipython-input-75-a53cf7c104da> in select_action(state)
     27     steps_done += 1
     28     if sample > eps_threshold:
---> 29         return model(Variable(state, volatile=True)).data.max(1)[1].cpu()
     30     else:
     31         return torch.LongTensor([[random.randrange(2)]])

/home/ubuntu/.local/lib/python2.7/site-packages/torch/nn/modules/module.pyc in __call__(self, *input, **kwargs)
    200 
    201     def __call__(self, *input, **kwargs):
--> 202         result = self.forward(*input, **kwargs)
    203         for hook in self._forward_hooks.values():
    204             hook_result = hook(self, input, result)

<ipython-input-73-58135f60e01c> in forward(self, x)
     11 
     12     def forward(self, x):
---> 13         x = F.relu(self.bn1(self.conv1(x)))
     14         x = F.relu(self.bn2(self.conv2(x)))
     15         x = F.relu(self.bn3(self.conv3(x)))

/home/ubuntu/.local/lib/python2.7/site-packages/torch/nn/modules/module.pyc in __call__(self, *input, **kwargs)
    200 
    201     def __call__(self, *input, **kwargs):
--> 202         result = self.forward(*input, **kwargs)
    203         for hook in self._forward_hooks.values():
    204             hook_result = hook(self, input, result)

/home/ubuntu/.local/lib/python2.7/site-packages/torch/nn/modules/conv.pyc in forward(self, input)
    235     def forward(self, input):
    236         return F.conv2d(input, self.weight, self.bias, self.stride,
--> 237                         self.padding, self.dilation, self.groups)
    238 
    239 

/home/ubuntu/.local/lib/python2.7/site-packages/torch/nn/functional.pyc in conv2d(input, weight, bias, stride, padding, dilation, groups)
     35     f = ConvNd(_pair(stride), _pair(padding), _pair(dilation), False,
     36                _pair(0), groups)
---> 37     return f(input, weight, bias) if bias is not None else f(input, weight)
     38 
     39 

/home/ubuntu/.local/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in forward(self, input, weight, bias)
     33         if k == 3:
     34             input, weight = _view4d(input, weight)
---> 35         output = self._update_output(input, weight, bias)
     36         if k == 3:
     37             output, = _view3d(output)

/home/ubuntu/.local/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in _update_output(self, input, weight, bias)
     95 
     96         self._bufs = [[] for g in range(self.groups)]
---> 97         return self._thnn('update_output', input, weight, bias)
     98 
     99     def _grad_input(self, input, weight, grad_output):

/home/ubuntu/.local/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in _thnn(self, fn_name, input, weight, *args)
    154         impl = _thnn_convs[self.thnn_class_name(input)]
    155         if self.groups == 1:
--> 156             return impl[fn_name](self, self._bufs[0], input, weight, *args)
    157         else:
    158             res = []

/home/ubuntu/.local/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in call_update_output(self, bufs, input, weight, bias)
    232         args = parse_arguments(self, fn.arguments[5:], bufs, kernel_size)
    233         getattr(backend, fn.name)(backend.library_state, input, output, weight,
--> 234                                   bias, *args)
    235         return output
    236     return call_update_output

TypeError: FloatSpatialConvolutionMM_updateOutput received an invalid combination of arguments - got (int, torch.FloatTensor, torch.FloatTensor, torch.cuda.FloatTensor, torch.cuda.FloatTensor, torch.FloatTensor, torch.FloatTensor, long, long, int, int, int, int), but expected (int state, torch.FloatTensor input, torch.FloatTensor output, torch.FloatTensor weight, [torch.FloatTensor bias or None], torch.FloatTensor finput, torch.FloatTensor fgradInput, int kW, int kH, int dW, int dH, int padW, int padH)



Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Reinforcement Q-learning issue on GPU #36

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Reinforcement Q-learning issue on GPU #36

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions