Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Type conversion error for categorical distribution with missing data #3145

Closed
plison opened this issue Aug 9, 2018 · 2 comments
Closed

Type conversion error for categorical distribution with missing data #3145

plison opened this issue Aug 9, 2018 · 2 comments

Comments

@plison
Copy link

plison commented Aug 9, 2018

There is a strange conversion error (from Theano) that gets thrown when we provide a masked array (where the mask indicates missing data) to a categorical distribution:

import pymc3 as pm

with pm.Model() as model:

    data = np.array([0,1,1,0,2,0,0,0])
    data = np.ma.masked_array(data, mask=np.array([0,0,1,0,0,0,0,0]))
    diri = pm.Dirichlet("Dir", a=np.ones(3))
    cate = pm.Categorical("C", p=diri, observed=data)

    trace = pm.sample() 
pm.summary(trace)

which stops with the following cryptic error:

TypeError: Cannot convert Type TensorType(int16, vector) (of Variable C_missing_shared__) into Type TensorType(int16, (True,)). You can try to manually convert C_missing_shared__ into a TensorType(int16, (True,)).

Here is the full trace:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-76-0bd03406628f> in <module>()
      8     cate = pm.Categorical("C", p=diri, observed=data)
      9 
---> 10     trace = pm.sample()
     11 pm.summary(trace)

~/anaconda3/lib/python3.6/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, nuts_kwargs, step_kwargs, progressbar, model, random_seed, live_plot, discard_tuned_samples, live_plot_kwargs, compute_convergence_checks, use_mmap, **kwargs)
    415                 step = assign_step_methods(model, step, step_kwargs=step_kwargs)
    416         else:
--> 417             step = assign_step_methods(model, step, step_kwargs=step_kwargs)
    418 
    419         if isinstance(step, list):

~/anaconda3/lib/python3.6/site-packages/pymc3/sampling.py in assign_step_methods(model, step, methods, step_kwargs)
    151             selected_steps[selected].append(var)
    152 
--> 153     return instantiate_steppers(model, steps, selected_steps, step_kwargs)
    154 
    155 

~/anaconda3/lib/python3.6/site-packages/pymc3/sampling.py in instantiate_steppers(model, steps, selected_steps, step_kwargs)
     72         args = step_kwargs.get(step_class.name, {})
     73         used_keys.add(step_class.name)
---> 74         step = step_class(vars=vars, **args)
     75         steps.append(step)
     76 

~/anaconda3/lib/python3.6/site-packages/pymc3/step_methods/hmc/nuts.py in __init__(self, vars, max_treedepth, early_max_treedepth, **kwargs)
    150         `pm.sample` to the desired number of tuning steps.
    151         """
--> 152         super(NUTS, self).__init__(vars, **kwargs)
    153 
    154         self.max_treedepth = max_treedepth

~/anaconda3/lib/python3.6/site-packages/pymc3/step_methods/hmc/base_hmc.py in __init__(self, vars, scaling, step_scale, is_cov, model, blocked, potential, integrator, dtype, Emax, target_accept, gamma, k, t0, adapt_step_size, step_rand, **theano_kwargs)
     61 
     62         super(BaseHMC, self).__init__(vars, blocked=blocked, model=model,
---> 63                                       dtype=dtype, **theano_kwargs)
     64 
     65         self.adapt_step_size = adapt_step_size

~/anaconda3/lib/python3.6/site-packages/pymc3/step_methods/arraystep.py in __init__(self, vars, model, blocked, dtype, **theano_kwargs)
    226 
    227         func = model.logp_dlogp_function(
--> 228             vars, dtype=dtype, **theano_kwargs)
    229 
    230         # handle edge case discovered in #2948

~/anaconda3/lib/python3.6/site-packages/pymc3/model.py in logp_dlogp_function(self, grad_vars, **kwargs)
    707         varnames = [var.name for var in grad_vars]
    708         extra_vars = [var for var in self.free_RVs if var.name not in varnames]
--> 709         return ValueGradFunction(self.logpt, grad_vars, extra_vars, **kwargs)
    710 
    711     @property

~/anaconda3/lib/python3.6/site-packages/pymc3/model.py in __init__(self, cost, grad_vars, extra_vars, dtype, casting, **kwargs)
    446 
    447         self._theano_function = theano.function(
--> 448             inputs, [self._cost_joined, grad], givens=givens, **kwargs)
    449 
    450     def set_extra_values(self, extra_vars):

~/anaconda3/lib/python3.6/site-packages/theano/compile/function.py in function(inputs, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input)
    315                    on_unused_input=on_unused_input,
    316                    profile=profile,
--> 317                    output_keys=output_keys)
    318     return fn

~/anaconda3/lib/python3.6/site-packages/theano/compile/pfunc.py in pfunc(params, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input, output_keys)
    447                                          rebuild_strict=rebuild_strict,
    448                                          copy_inputs_over=True,
--> 449                                          no_default_updates=no_default_updates)
    450     # extracting the arguments
    451     input_variables, cloned_extended_outputs, other_stuff = output_vars

~/anaconda3/lib/python3.6/site-packages/theano/compile/pfunc.py in rebuild_collect_shared(outputs, inputs, replace, updates, rebuild_strict, copy_inputs_over, no_default_updates)
    217         for v in outputs:
    218             if isinstance(v, Variable):
--> 219                 cloned_v = clone_v_get_shared_updates(v, copy_inputs_over)
    220                 cloned_outputs.append(cloned_v)
    221             elif isinstance(v, Out):

~/anaconda3/lib/python3.6/site-packages/theano/compile/pfunc.py in clone_v_get_shared_updates(v, copy_inputs_over)
     91             if owner not in clone_d:
     92                 for i in owner.inputs:
---> 93                     clone_v_get_shared_updates(i, copy_inputs_over)
     94 
     95                 clone_d[owner] = owner.clone_with_new_inputs(

~/anaconda3/lib/python3.6/site-packages/theano/compile/pfunc.py in clone_v_get_shared_updates(v, copy_inputs_over)
     91             if owner not in clone_d:
     92                 for i in owner.inputs:
---> 93                     clone_v_get_shared_updates(i, copy_inputs_over)
     94 
     95                 clone_d[owner] = owner.clone_with_new_inputs(

~/anaconda3/lib/python3.6/site-packages/theano/compile/pfunc.py in clone_v_get_shared_updates(v, copy_inputs_over)
     91             if owner not in clone_d:
     92                 for i in owner.inputs:
---> 93                     clone_v_get_shared_updates(i, copy_inputs_over)
     94 
     95                 clone_d[owner] = owner.clone_with_new_inputs(

~/anaconda3/lib/python3.6/site-packages/theano/compile/pfunc.py in clone_v_get_shared_updates(v, copy_inputs_over)
     91             if owner not in clone_d:
     92                 for i in owner.inputs:
---> 93                     clone_v_get_shared_updates(i, copy_inputs_over)
     94 
     95                 clone_d[owner] = owner.clone_with_new_inputs(

~/anaconda3/lib/python3.6/site-packages/theano/compile/pfunc.py in clone_v_get_shared_updates(v, copy_inputs_over)
     91             if owner not in clone_d:
     92                 for i in owner.inputs:
---> 93                     clone_v_get_shared_updates(i, copy_inputs_over)
     94 
     95                 clone_d[owner] = owner.clone_with_new_inputs(

~/anaconda3/lib/python3.6/site-packages/theano/compile/pfunc.py in clone_v_get_shared_updates(v, copy_inputs_over)
     91             if owner not in clone_d:
     92                 for i in owner.inputs:
---> 93                     clone_v_get_shared_updates(i, copy_inputs_over)
     94 
     95                 clone_d[owner] = owner.clone_with_new_inputs(

~/anaconda3/lib/python3.6/site-packages/theano/compile/pfunc.py in clone_v_get_shared_updates(v, copy_inputs_over)
     91             if owner not in clone_d:
     92                 for i in owner.inputs:
---> 93                     clone_v_get_shared_updates(i, copy_inputs_over)
     94 
     95                 clone_d[owner] = owner.clone_with_new_inputs(

~/anaconda3/lib/python3.6/site-packages/theano/compile/pfunc.py in clone_v_get_shared_updates(v, copy_inputs_over)
     91             if owner not in clone_d:
     92                 for i in owner.inputs:
---> 93                     clone_v_get_shared_updates(i, copy_inputs_over)
     94 
     95                 clone_d[owner] = owner.clone_with_new_inputs(

~/anaconda3/lib/python3.6/site-packages/theano/compile/pfunc.py in clone_v_get_shared_updates(v, copy_inputs_over)
     91             if owner not in clone_d:
     92                 for i in owner.inputs:
---> 93                     clone_v_get_shared_updates(i, copy_inputs_over)
     94 
     95                 clone_d[owner] = owner.clone_with_new_inputs(

~/anaconda3/lib/python3.6/site-packages/theano/compile/pfunc.py in clone_v_get_shared_updates(v, copy_inputs_over)
     91             if owner not in clone_d:
     92                 for i in owner.inputs:
---> 93                     clone_v_get_shared_updates(i, copy_inputs_over)
     94 
     95                 clone_d[owner] = owner.clone_with_new_inputs(

~/anaconda3/lib/python3.6/site-packages/theano/compile/pfunc.py in clone_v_get_shared_updates(v, copy_inputs_over)
     91             if owner not in clone_d:
     92                 for i in owner.inputs:
---> 93                     clone_v_get_shared_updates(i, copy_inputs_over)
     94 
     95                 clone_d[owner] = owner.clone_with_new_inputs(

~/anaconda3/lib/python3.6/site-packages/theano/compile/pfunc.py in clone_v_get_shared_updates(v, copy_inputs_over)
     94 
     95                 clone_d[owner] = owner.clone_with_new_inputs(
---> 96                     [clone_d[i] for i in owner.inputs], strict=rebuild_strict)
     97                 for old_o, new_o in zip(owner.outputs, clone_d[owner].outputs):
     98                     clone_d.setdefault(old_o, new_o)

~/anaconda3/lib/python3.6/site-packages/theano/gof/graph.py in clone_with_new_inputs(self, inputs, strict)
    240                 if strict:
    241                     # If compatible, casts new into curr.type
--> 242                     new_inputs[i] = curr.type.filter_variable(new)
    243                 else:
    244                     remake_node = True

~/anaconda3/lib/python3.6/site-packages/theano/tensor/type.py in filter_variable(self, other, allow_convert)
    232             dict(othertype=other.type,
    233                  other=other,
--> 234                  self=self))
    235 
    236     def value_validity_msg(self, a):

TypeError: Cannot convert Type TensorType(int16, vector) (of Variable C_missing_shared__) into Type TensorType(int16, (True,)). You can try to manually convert C_missing_shared__ into a TensorType(int16, (True,)).

Any idea what's happening?

@plison
Copy link
Author

plison commented Aug 13, 2018

So it seems that the error is due to the fact that only one value is masked, as sampling works correctly if the number of masked values > 1.

@junpenglao
Copy link
Member

Thanks for reporting back! In that case I will close this issue as it is duplicated with #3122

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants