Skip to content

Commit

Permalink
Improve Get errors for unhashable or mismatched types (#7502)
Browse files Browse the repository at this point in the history
### Problem

The error for unhashable types used as `Get` params is unfortunate, because the call to `identify` fails and returns a null pointer to rust, which triggers an error... somewhere else. See #7499.

### Solution

As suggested in #7499, have `extern_generator_send` directly `identify` `Get` params, meaning that failures to hash take advantage of the fact that `extern_generator_send` is already fallible. This avoids a significant number of calls back to python to identify `Values` (an optimization discussed in #7318 and #7114).

### Result

Improved usability. Fixes #7499.
  • Loading branch information
Stu Hood authored Apr 10, 2019
1 parent 65ed665 commit d0c670f
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 114 deletions.
60 changes: 35 additions & 25 deletions src/python/pants/engine/native.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,9 @@ def wrapper(func):

class _FFISpecification(object):

def __init__(self, ffi):
def __init__(self, ffi, lib):
self._ffi = ffi
self._lib = lib

@memoized_classproperty
def _extern_fields(cls):
Expand Down Expand Up @@ -295,9 +296,7 @@ def extern_identify(self, context_handle, val):
"""
c = self._ffi.from_handle(context_handle)
obj = self._ffi.from_handle(val[0])
hash_ = hash(obj)
type_id = c.to_id(type(obj))
return (hash_, TypeId(type_id))
return c.identify(obj)

@_extern_decl('_Bool', ['ExternContext*', 'Handle*', 'Handle*'])
def extern_equals(self, context_handle, val1, val2):
Expand Down Expand Up @@ -419,36 +418,37 @@ def extern_create_exception(self, context_handle, msg_ptr, msg_len):
def extern_generator_send(self, context_handle, func, arg):
"""Given a generator, send it the given value and return a response."""
c = self._ffi.from_handle(context_handle)
response = self._ffi.new('PyGeneratorResponse*')
try:
res = c.from_value(func[0]).send(c.from_value(arg[0]))
if isinstance(res, Get):
# Get.
values = [res.subject]
products = [res.product]
tag = 2
response.tag = self._lib.Get
response.get = (
TypeId(c.to_id(res.product)),
c.to_value(res.subject),
c.identify(res.subject),
)
elif type(res) in (tuple, list):
# GetMulti.
values = [g.subject for g in res]
products = [g.product for g in res]
tag = 3
response.tag = self._lib.GetMulti
response.get_multi = (
c.type_ids_buf([TypeId(c.to_id(g.product)) for g in res]),
c.vals_buf([c.to_value(g.subject) for g in res]),
c.identities_buf([c.identify(g.subject) for g in res]),
)
else:
# Break.
values = [res]
products = []
tag = 0
response.tag = self._lib.Broke
response.broke = (c.to_value(res),)
except Exception as e:
# Throw.
response.tag = self._lib.Throw
val = e
val._formatted_exc = traceback.format_exc()
values = [val]
products = []
tag = 1
response.throw = (c.to_value(val),)

return (
tag,
c.vals_buf([c.to_value(v) for v in values]),
c.type_ids_buf([TypeId(c.to_id(t)) for t in products])
)
return response[0]

@_extern_decl('PyResult', ['ExternContext*', 'Handle*', 'Handle**', 'uint64_t'])
def extern_call(self, context_handle, func, args_ptr, args_len):
Expand Down Expand Up @@ -521,6 +521,10 @@ def vals_buf(self, vals):
buf = self._ffi.new('Handle[]', vals)
return (buf, len(vals), self.to_value(buf))

def identities_buf(self, idents):
buf = self._ffi.new('Ident[]', idents)
return (buf, len(idents), self.to_value(buf))

def type_ids_buf(self, types):
buf = self._ffi.new('TypeId[]', types)
return (buf, len(types), self.to_value(buf))
Expand All @@ -545,6 +549,12 @@ def raise_or_return(self, pyresult):
def drop_handles(self, handles):
self._handles -= set(handles)

def identify(self, obj):
"""Return an Ident-shaped tuple for the given object."""
hash_ = hash(obj)
type_id = self.to_id(type(obj))
return (hash_, TypeId(type_id))

def to_id(self, typ):
type_id = id(typ)
self._types[type_id] = typ
Expand Down Expand Up @@ -589,14 +599,14 @@ def binary(self):
@memoized_property
def lib(self):
"""Load and return the native engine module."""
return self.ffi.dlopen(self.binary)
lib = self.ffi.dlopen(self.binary)
_FFISpecification(self.ffi, lib).register_cffi_externs()
return lib

@memoized_property
def ffi(self):
"""A CompiledCFFI handle as imported from the native engine python module."""
ffi = getattr(self._ffi_module, 'ffi')
_FFISpecification(ffi).register_cffi_externs()
return ffi
return getattr(self._ffi_module, 'ffi')

@memoized_property
def ffi_lib(self):
Expand Down
179 changes: 98 additions & 81 deletions src/rust/engine/src/externs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,46 +209,36 @@ pub fn call(func: &Value, args: &[Value]) -> Result<Value, Failure> {
.into()
}

///
/// TODO: If we added support for inserting to the `Interns` using an `Ident`, `PyGeneratorResponse`
/// could directly return `Idents` during `Get` calls. This would also require splitting its fields
/// further to avoid needing to "identify" the result of a `PyGeneratorResponseType::Break`.
///
pub fn generator_send(generator: &Value, arg: &Value) -> Result<GeneratorResponse, Failure> {
let response =
with_externs(|e| (e.generator_send)(e.context, generator as &Handle, arg as &Handle));
match response.res_type {
PyGeneratorResponseType::Break => Ok(GeneratorResponse::Break(response.values.unwrap_one())),
PyGeneratorResponseType::Throw => Err(PyResult::failure_from(response.values.unwrap_one())),
PyGeneratorResponseType::Get => {
match response {
PyGeneratorResponse::Broke(h) => Ok(GeneratorResponse::Break(Value::new(h))),
PyGeneratorResponse::Throw(h) => Err(PyResult::failure_from(Value::new(h))),
PyGeneratorResponse::Get(product, handle, ident) => {
let mut interns = INTERNS.write();
let p = response.products.unwrap_one();
let v = response.values.unwrap_one();
let g = Get {
product: p,
subject: interns.insert(v),
product,
subject: interns.insert_with(Value::new(handle), ident),
};
Ok(GeneratorResponse::Get(g))
}
PyGeneratorResponseType::GetMulti => {
PyGeneratorResponse::GetMulti(products, handles, identities) => {
let mut interns = INTERNS.write();
let PyGeneratorResponse {
products: products_buf,
values: values_buf,
..
} = response;
let products = products_buf.to_vec();
let values = values_buf.to_vec();
let products = products.to_vec();
let identities = identities.to_vec();
let values = handles.to_vec();
assert_eq!(products.len(), values.len());
let continues: Vec<Get> = products
let gets: Vec<Get> = products
.into_iter()
.zip(values.into_iter())
.map(|(p, v)| Get {
.zip(identities.into_iter())
.map(|((p, v), i)| Get {
product: p,
subject: interns.insert(v),
subject: interns.insert_with(v, i),
})
.collect();
Ok(GeneratorResponse::GetMulti(continues))
Ok(GeneratorResponse::GetMulti(gets))
}
}
}
Expand Down Expand Up @@ -462,23 +452,18 @@ impl From<Result<(), String>> for PyResult {
}
}

// Only constructed from the python side.
// TODO: map this into a C enum with cbindgen and consume from python instead of using magic numbers
// in extern_generator_send() in native.py!
#[allow(dead_code)]
#[repr(u8)]
pub enum PyGeneratorResponseType {
Break = 0,
Throw = 1,
Get = 2,
GetMulti = 3,
}

///
/// The response from a call to extern_generator_send. Gets include Idents for their Handles
/// in order to avoid roundtripping to intern them, and to eagerly trigger errors for unhashable
/// types on the python side where possible.
///
#[repr(C)]
pub struct PyGeneratorResponse {
res_type: PyGeneratorResponseType,
values: HandleBuffer,
products: TypeIdBuffer,
pub enum PyGeneratorResponse {
Get(TypeId, Handle, Ident),
GetMulti(TypeIdBuffer, HandleBuffer, IdentBuffer),
// NB: Broke not Break because C keyword.
Broke(Handle),
Throw(Handle),
}

#[derive(Debug)]
Expand Down Expand Up @@ -509,11 +494,43 @@ pub enum GeneratorResponse {
/// the object's type.
///
#[repr(C)]
#[derive(Clone, Copy)]
pub struct Ident {
pub hash: i64,
pub type_id: TypeId,
}

pub trait RawBuffer<Raw, Output> {
fn ptr(&self) -> *mut Raw;
fn len(&self) -> u64;

///
/// A buffer-specific shallow clone operation (possibly just implemented via clone).
///
fn lift(t: &Raw) -> Output;

///
/// Returns a Vec copy of the buffer contents.
///
fn to_vec(&self) -> Vec<Output> {
with_vec(self.ptr(), self.len() as usize, |vec| {
vec.iter().map(Self::lift).collect()
})
}

///
/// Asserts that the buffer contains one item, and returns a copy of it.
///
fn unwrap_one(&self) -> Output {
assert!(
self.len() == 1,
"Expected exactly 1 item in Buffer, but had: {}",
self.len()
);
with_vec(self.ptr(), self.len() as usize, |vec| Self::lift(&vec[0]))
}
}

///
/// Points to an array containing a series of values allocated by Python.
///
Expand All @@ -529,34 +546,42 @@ pub struct HandleBuffer {
handle_: Handle,
}

impl HandleBuffer {
pub fn to_vec(&self) -> Vec<Value> {
with_vec(self.handles_ptr, self.handles_len as usize, |handle_vec| {
handle_vec
.iter()
.map(|h| Value::new(unsafe { h.clone_shallow() }))
.collect()
})
impl RawBuffer<Handle, Value> for HandleBuffer {
fn ptr(&self) -> *mut Handle {
self.handles_ptr
}

///
/// Asserts that the HandleBuffer contains one value, and returns it.
///
/// NB: Consider making generic and merging with TypeIdBuffer if we get a third copy.
///
pub fn unwrap_one(&self) -> Value {
assert!(
self.handles_len == 1,
"HandleBuffer contained more than one value: {}",
self.handles_len
);
with_vec(self.handles_ptr, self.handles_len as usize, |handle_vec| {
Value::new(unsafe { handle_vec.iter().next().unwrap().clone_shallow() })
})
fn len(&self) -> u64 {
self.handles_len
}

fn lift(t: &Handle) -> Value {
Value::new(unsafe { t.clone_shallow() })
}
}

#[repr(C)]
pub struct IdentBuffer {
idents_ptr: *mut Ident,
idents_len: u64,
// A Handle to hold the underlying array alive.
handle_: Handle,
}

impl RawBuffer<Ident, Ident> for IdentBuffer {
fn ptr(&self) -> *mut Ident {
self.idents_ptr
}

fn len(&self) -> u64 {
self.idents_len
}

fn lift(t: &Ident) -> Ident {
*t
}
}

// Points to an array of TypeIds.
#[repr(C)]
pub struct TypeIdBuffer {
ids_ptr: *mut TypeId,
Expand All @@ -565,25 +590,17 @@ pub struct TypeIdBuffer {
handle_: Handle,
}

impl TypeIdBuffer {
pub fn to_vec(&self) -> Vec<TypeId> {
with_vec(self.ids_ptr, self.ids_len as usize, |vec| vec.clone())
impl RawBuffer<TypeId, TypeId> for TypeIdBuffer {
fn ptr(&self) -> *mut TypeId {
self.ids_ptr
}

///
/// Asserts that the TypeIdBuffer contains one TypeId, and returns it.
///
/// NB: Consider making generic and merging with HandleBuffer if we get a third copy.
///
pub fn unwrap_one(&self) -> TypeId {
assert!(
self.ids_len == 1,
"TypeIdBuffer contained more than one value: {}",
self.ids_len
);
with_vec(self.ids_ptr, self.ids_len as usize, |ids_vec| {
*ids_vec.iter().next().unwrap()
})
fn len(&self) -> u64 {
self.ids_len
}

fn lift(t: &TypeId) -> TypeId {
*t
}
}

Expand Down
10 changes: 7 additions & 3 deletions src/rust/engine/src/interning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,19 @@ impl Interns {
}

pub fn insert(&mut self, v: Value) -> Key {
let Ident { hash, type_id } = externs::identify(&v);
let ident = externs::identify(&v);
self.insert_with(v, ident)
}

pub fn insert_with(&mut self, v: Value, ident: Ident) -> Key {
let mut inserted = false;
let id_generator = self.id_generator;
let key = *self
.forward
.entry(InternKey(hash, v.clone()))
.entry(InternKey(ident.hash, v.clone()))
.or_insert_with(|| {
inserted = true;
Key::new(id_generator, type_id)
Key::new(id_generator, ident.type_id)
});
if inserted {
self.reverse.insert(key, v);
Expand Down
2 changes: 1 addition & 1 deletion src/rust/engine/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ use crate::externs::{
Buffer, BufferBuffer, CallExtern, CloneValExtern, CreateExceptionExtern, DropHandlesExtern,
EqualsExtern, EvalExtern, ExternContext, Externs, GeneratorSendExtern, GetTypeForExtern,
HandleBuffer, IdentifyExtern, LogExtern, ProjectIgnoringTypeExtern, ProjectMultiExtern, PyResult,
StoreBoolExtern, StoreBytesExtern, StoreF64Extern, StoreI64Extern, StoreTupleExtern,
RawBuffer, StoreBoolExtern, StoreBytesExtern, StoreF64Extern, StoreI64Extern, StoreTupleExtern,
StoreUtf8Extern, TypeIdBuffer, TypeToStrExtern, ValToStrExtern,
};
use crate::handles::Handle;
Expand Down
Loading

0 comments on commit d0c670f

Please sign in to comment.