Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Get errors for unhashable or mismatched types #7502

Merged
merged 5 commits into from
Apr 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 35 additions & 25 deletions src/python/pants/engine/native.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,9 @@ def wrapper(func):

class _FFISpecification(object):

def __init__(self, ffi):
def __init__(self, ffi, lib):
self._ffi = ffi
self._lib = lib

@memoized_classproperty
def _extern_fields(cls):
Expand Down Expand Up @@ -295,9 +296,7 @@ def extern_identify(self, context_handle, val):
"""
c = self._ffi.from_handle(context_handle)
obj = self._ffi.from_handle(val[0])
hash_ = hash(obj)
type_id = c.to_id(type(obj))
return (hash_, TypeId(type_id))
return c.identify(obj)

@_extern_decl('_Bool', ['ExternContext*', 'Handle*', 'Handle*'])
def extern_equals(self, context_handle, val1, val2):
Expand Down Expand Up @@ -419,36 +418,37 @@ def extern_create_exception(self, context_handle, msg_ptr, msg_len):
def extern_generator_send(self, context_handle, func, arg):
"""Given a generator, send it the given value and return a response."""
c = self._ffi.from_handle(context_handle)
response = self._ffi.new('PyGeneratorResponse*')
try:
res = c.from_value(func[0]).send(c.from_value(arg[0]))
if isinstance(res, Get):
# Get.
values = [res.subject]
products = [res.product]
tag = 2
response.tag = self._lib.Get
response.get = (
TypeId(c.to_id(res.product)),
c.to_value(res.subject),
c.identify(res.subject),
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a fantastic change. Is this relying on mostly the rust C representation of enums (with tag and (in this case) get fields), or is there some cffi magic going on to allow addressing unions in this deeply ergonomic way? Is there a link we could add documenting why this works to access our rust enums?

Copy link
Contributor

@cosmicexplorer cosmicexplorer Apr 7, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, I actually think we don't want to do this, as we probably want to colocate parsing the python response and producing the FFI object that represents it (so the below code is needless complexity for now). But I wrote this comment out anyway so I can link to it in #7367:

Could this part be moved into a helper method? Something like:

    @memoized_property
    def _generator_response_tags_enum(self):
       class _Tags(enum([self._lib.Get, self._lib.GetMulti, self._lib.Broke, self._lib.Throw])): pass
       return _Tags

    def _set_generator_response(self, response, c, input_res, tag):
      attr, value = self._generator_response_tags_enum(tag).resolve_for_enum_variant({
        self._lib.Get: lambda: ('get', (
            TypeId(c.to_id(input_res.product)),
            c.to_value(input_res.subject),
            c.identify(input_res.subject),
          ),
        # ...
        response.tag = tag
        setattr(response, attr, value)
      })

    def extern_generator_send(...):
      # ...
              if isinstance(res, Get):
                self._set_generator_response(response, c, res, self._lib.Get)
              # ...
      return response[0]

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a fantastic change. Is this relying on mostly the rust C representation of enums (with tag and (in this case) get fields), or is there some cffi magic going on to allow addressing unions in this deeply ergonomic way? Is there a link we could add documenting why this works to access our rust enums?

This is mostly relying on the C API that is generated by cbindgen. Kudos on integrating that.

elif type(res) in (tuple, list):
# GetMulti.
values = [g.subject for g in res]
products = [g.product for g in res]
tag = 3
response.tag = self._lib.GetMulti
response.get_multi = (
c.type_ids_buf([TypeId(c.to_id(g.product)) for g in res]),
c.vals_buf([c.to_value(g.subject) for g in res]),
c.identities_buf([c.identify(g.subject) for g in res]),
)
else:
# Break.
values = [res]
products = []
tag = 0
response.tag = self._lib.Broke
response.broke = (c.to_value(res),)
except Exception as e:
# Throw.
response.tag = self._lib.Throw
val = e
val._formatted_exc = traceback.format_exc()
values = [val]
products = []
tag = 1
response.throw = (c.to_value(val),)

return (
tag,
c.vals_buf([c.to_value(v) for v in values]),
c.type_ids_buf([TypeId(c.to_id(t)) for t in products])
)
return response[0]

@_extern_decl('PyResult', ['ExternContext*', 'Handle*', 'Handle**', 'uint64_t'])
def extern_call(self, context_handle, func, args_ptr, args_len):
Expand Down Expand Up @@ -521,6 +521,10 @@ def vals_buf(self, vals):
buf = self._ffi.new('Handle[]', vals)
return (buf, len(vals), self.to_value(buf))

def identities_buf(self, idents):
buf = self._ffi.new('Ident[]', idents)
return (buf, len(idents), self.to_value(buf))

def type_ids_buf(self, types):
buf = self._ffi.new('TypeId[]', types)
return (buf, len(types), self.to_value(buf))
Expand All @@ -545,6 +549,12 @@ def raise_or_return(self, pyresult):
def drop_handles(self, handles):
self._handles -= set(handles)

def identify(self, obj):
"""Return an Ident-shaped tuple for the given object."""
hash_ = hash(obj)
type_id = self.to_id(type(obj))
return (hash_, TypeId(type_id))

def to_id(self, typ):
type_id = id(typ)
self._types[type_id] = typ
Expand Down Expand Up @@ -589,14 +599,14 @@ def binary(self):
@memoized_property
def lib(self):
"""Load and return the native engine module."""
return self.ffi.dlopen(self.binary)
lib = self.ffi.dlopen(self.binary)
_FFISpecification(self.ffi, lib).register_cffi_externs()
return lib

@memoized_property
def ffi(self):
"""A CompiledCFFI handle as imported from the native engine python module."""
ffi = getattr(self._ffi_module, 'ffi')
_FFISpecification(ffi).register_cffi_externs()
return ffi
return getattr(self._ffi_module, 'ffi')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Considering that register_cffi_externs() modifies the ffi object and not the lib object, I would think we would want to keep that call in this method to avoid spooky mutation of the ffi object happening whenever we first happen to access the lib object.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, constructing the _FFISpecification inside of self.ffi triggered infinite recursion when I attempted to feed it self.lib (which required self.ffi, etc, etc).


@memoized_property
def ffi_lib(self):
Expand Down
179 changes: 98 additions & 81 deletions src/rust/engine/src/externs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,46 +209,36 @@ pub fn call(func: &Value, args: &[Value]) -> Result<Value, Failure> {
.into()
}

///
/// TODO: If we added support for inserting to the `Interns` using an `Ident`, `PyGeneratorResponse`
/// could directly return `Idents` during `Get` calls. This would also require splitting its fields
/// further to avoid needing to "identify" the result of a `PyGeneratorResponseType::Break`.
///
pub fn generator_send(generator: &Value, arg: &Value) -> Result<GeneratorResponse, Failure> {
let response =
with_externs(|e| (e.generator_send)(e.context, generator as &Handle, arg as &Handle));
match response.res_type {
PyGeneratorResponseType::Break => Ok(GeneratorResponse::Break(response.values.unwrap_one())),
PyGeneratorResponseType::Throw => Err(PyResult::failure_from(response.values.unwrap_one())),
PyGeneratorResponseType::Get => {
match response {
PyGeneratorResponse::Broke(h) => Ok(GeneratorResponse::Break(Value::new(h))),
PyGeneratorResponse::Throw(h) => Err(PyResult::failure_from(Value::new(h))),
PyGeneratorResponse::Get(product, handle, ident) => {
let mut interns = INTERNS.write();
let p = response.products.unwrap_one();
let v = response.values.unwrap_one();
let g = Get {
product: p,
subject: interns.insert(v),
product,
subject: interns.insert_with(Value::new(handle), ident),
};
Ok(GeneratorResponse::Get(g))
}
PyGeneratorResponseType::GetMulti => {
PyGeneratorResponse::GetMulti(products, handles, identities) => {
let mut interns = INTERNS.write();
let PyGeneratorResponse {
products: products_buf,
values: values_buf,
..
} = response;
let products = products_buf.to_vec();
let values = values_buf.to_vec();
let products = products.to_vec();
let identities = identities.to_vec();
let values = handles.to_vec();
assert_eq!(products.len(), values.len());
let continues: Vec<Get> = products
let gets: Vec<Get> = products
.into_iter()
.zip(values.into_iter())
.map(|(p, v)| Get {
.zip(identities.into_iter())
.map(|((p, v), i)| Get {
product: p,
subject: interns.insert(v),
subject: interns.insert_with(v, i),
})
.collect();
Ok(GeneratorResponse::GetMulti(continues))
Ok(GeneratorResponse::GetMulti(gets))
}
}
}
Expand Down Expand Up @@ -462,23 +452,18 @@ impl From<Result<(), String>> for PyResult {
}
}

// Only constructed from the python side.
// TODO: map this into a C enum with cbindgen and consume from python instead of using magic numbers
// in extern_generator_send() in native.py!
#[allow(dead_code)]
#[repr(u8)]
pub enum PyGeneratorResponseType {
Break = 0,
Throw = 1,
Get = 2,
GetMulti = 3,
}

///
/// The response from a call to extern_generator_send. Gets include Idents for their Handles
/// in order to avoid roundtripping to intern them, and to eagerly trigger errors for unhashable
/// types on the python side where possible.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good explanation.

///
#[repr(C)]
pub struct PyGeneratorResponse {
res_type: PyGeneratorResponseType,
values: HandleBuffer,
products: TypeIdBuffer,
pub enum PyGeneratorResponse {
Get(TypeId, Handle, Ident),
GetMulti(TypeIdBuffer, HandleBuffer, IdentBuffer),
// NB: Broke not Break because C keyword.
Broke(Handle),
Throw(Handle),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Much better!! Thank you for taking the time to figure out how to do this with cffi, it makes the code leagues more readable. It also obviates the need for that TODO without additional complexity in building the engine!

}

#[derive(Debug)]
Expand Down Expand Up @@ -509,11 +494,43 @@ pub enum GeneratorResponse {
/// the object's type.
///
#[repr(C)]
#[derive(Clone, Copy)]
pub struct Ident {
pub hash: i64,
pub type_id: TypeId,
}

pub trait RawBuffer<Raw, Output> {
fn ptr(&self) -> *mut Raw;
fn len(&self) -> u64;

///
/// A buffer-specific shallow clone operation (possibly just implemented via clone).
///
fn lift(t: &Raw) -> Output;

///
/// Returns a Vec copy of the buffer contents.
///
fn to_vec(&self) -> Vec<Output> {
with_vec(self.ptr(), self.len() as usize, |vec| {
vec.iter().map(Self::lift).collect()
})
}

///
/// Asserts that the buffer contains one item, and returns a copy of it.
///
fn unwrap_one(&self) -> Output {
assert!(
self.len() == 1,
"Expected exactly 1 item in Buffer, but had: {}",
self.len()
);
with_vec(self.ptr(), self.len() as usize, |vec| Self::lift(&vec[0]))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason this doesn't delegate to .to_vec()? Like:

  fn unwrap_one(&self) -> Output {
    assert_equal!(self.len(), 1, "Expected exactly 1 item in Buffer, but had: {}", self.len());
    self.to_vec()[0]
  }

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with_vec doesn't actually "create" a vec: instead, it "views" some memory as a Vec via:

unsafe { Vec::from_raw_parts(c_ptr, c_len, c_len) }

That means that by avoiding calling to_vec(), we avoid actually copying the buffer containing the one item.

}
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is all really great! I really like that we can manipulate python collections in this structured, efficient, and safe way.

///
/// Points to an array containing a series of values allocated by Python.
///
Expand All @@ -529,34 +546,42 @@ pub struct HandleBuffer {
handle_: Handle,
}

impl HandleBuffer {
pub fn to_vec(&self) -> Vec<Value> {
with_vec(self.handles_ptr, self.handles_len as usize, |handle_vec| {
handle_vec
.iter()
.map(|h| Value::new(unsafe { h.clone_shallow() }))
.collect()
})
impl RawBuffer<Handle, Value> for HandleBuffer {
fn ptr(&self) -> *mut Handle {
self.handles_ptr
}

///
/// Asserts that the HandleBuffer contains one value, and returns it.
///
/// NB: Consider making generic and merging with TypeIdBuffer if we get a third copy.
///
pub fn unwrap_one(&self) -> Value {
assert!(
self.handles_len == 1,
"HandleBuffer contained more than one value: {}",
self.handles_len
);
with_vec(self.handles_ptr, self.handles_len as usize, |handle_vec| {
Value::new(unsafe { handle_vec.iter().next().unwrap().clone_shallow() })
})
fn len(&self) -> u64 {
self.handles_len
}

fn lift(t: &Handle) -> Value {
Value::new(unsafe { t.clone_shallow() })
}
}

#[repr(C)]
pub struct IdentBuffer {
idents_ptr: *mut Ident,
idents_len: u64,
// A Handle to hold the underlying array alive.
handle_: Handle,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this part isn't new, but could we add a comment on one of these fields explaining why handle_ needs to exist? Is it so python doesn't gc the pointer?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's correct. Since CI is green here, I'm going to go ahead and merge this one, but can revisit.

}

impl RawBuffer<Ident, Ident> for IdentBuffer {
fn ptr(&self) -> *mut Ident {
self.idents_ptr
}

fn len(&self) -> u64 {
self.idents_len
}

fn lift(t: &Ident) -> Ident {
*t
}
}

// Points to an array of TypeIds.
#[repr(C)]
pub struct TypeIdBuffer {
ids_ptr: *mut TypeId,
Expand All @@ -565,25 +590,17 @@ pub struct TypeIdBuffer {
handle_: Handle,
}

impl TypeIdBuffer {
pub fn to_vec(&self) -> Vec<TypeId> {
with_vec(self.ids_ptr, self.ids_len as usize, |vec| vec.clone())
impl RawBuffer<TypeId, TypeId> for TypeIdBuffer {
fn ptr(&self) -> *mut TypeId {
self.ids_ptr
}

///
/// Asserts that the TypeIdBuffer contains one TypeId, and returns it.
///
/// NB: Consider making generic and merging with HandleBuffer if we get a third copy.
///
pub fn unwrap_one(&self) -> TypeId {
assert!(
self.ids_len == 1,
"TypeIdBuffer contained more than one value: {}",
self.ids_len
);
with_vec(self.ids_ptr, self.ids_len as usize, |ids_vec| {
*ids_vec.iter().next().unwrap()
})
fn len(&self) -> u64 {
self.ids_len
}

fn lift(t: &TypeId) -> TypeId {
*t
}
}

Expand Down
10 changes: 7 additions & 3 deletions src/rust/engine/src/interning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,19 @@ impl Interns {
}

pub fn insert(&mut self, v: Value) -> Key {
let Ident { hash, type_id } = externs::identify(&v);
let ident = externs::identify(&v);
self.insert_with(v, ident)
}

pub fn insert_with(&mut self, v: Value, ident: Ident) -> Key {
let mut inserted = false;
let id_generator = self.id_generator;
let key = *self
.forward
.entry(InternKey(hash, v.clone()))
.entry(InternKey(ident.hash, v.clone()))
.or_insert_with(|| {
inserted = true;
Key::new(id_generator, type_id)
Key::new(id_generator, ident.type_id)
});
if inserted {
self.reverse.insert(key, v);
Expand Down
2 changes: 1 addition & 1 deletion src/rust/engine/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ use crate::externs::{
Buffer, BufferBuffer, CallExtern, CloneValExtern, CreateExceptionExtern, DropHandlesExtern,
EqualsExtern, EvalExtern, ExternContext, Externs, GeneratorSendExtern, GetTypeForExtern,
HandleBuffer, IdentifyExtern, LogExtern, ProjectIgnoringTypeExtern, ProjectMultiExtern, PyResult,
StoreBoolExtern, StoreBytesExtern, StoreF64Extern, StoreI64Extern, StoreTupleExtern,
RawBuffer, StoreBoolExtern, StoreBytesExtern, StoreF64Extern, StoreI64Extern, StoreTupleExtern,
StoreUtf8Extern, TypeIdBuffer, TypeToStrExtern, ValToStrExtern,
};
use crate::handles::Handle;
Expand Down
Loading