-
-
Notifications
You must be signed in to change notification settings - Fork 636
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve Get errors for unhashable or mismatched types #7502
Changes from all commits
e9c53be
701d799
b5e083d
9520bec
b44073c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -226,8 +226,9 @@ def wrapper(func): | |
|
||
class _FFISpecification(object): | ||
|
||
def __init__(self, ffi): | ||
def __init__(self, ffi, lib): | ||
self._ffi = ffi | ||
self._lib = lib | ||
|
||
@memoized_classproperty | ||
def _extern_fields(cls): | ||
|
@@ -295,9 +296,7 @@ def extern_identify(self, context_handle, val): | |
""" | ||
c = self._ffi.from_handle(context_handle) | ||
obj = self._ffi.from_handle(val[0]) | ||
hash_ = hash(obj) | ||
type_id = c.to_id(type(obj)) | ||
return (hash_, TypeId(type_id)) | ||
return c.identify(obj) | ||
|
||
@_extern_decl('_Bool', ['ExternContext*', 'Handle*', 'Handle*']) | ||
def extern_equals(self, context_handle, val1, val2): | ||
|
@@ -419,36 +418,37 @@ def extern_create_exception(self, context_handle, msg_ptr, msg_len): | |
def extern_generator_send(self, context_handle, func, arg): | ||
"""Given a generator, send it the given value and return a response.""" | ||
c = self._ffi.from_handle(context_handle) | ||
response = self._ffi.new('PyGeneratorResponse*') | ||
try: | ||
res = c.from_value(func[0]).send(c.from_value(arg[0])) | ||
if isinstance(res, Get): | ||
# Get. | ||
values = [res.subject] | ||
products = [res.product] | ||
tag = 2 | ||
response.tag = self._lib.Get | ||
response.get = ( | ||
TypeId(c.to_id(res.product)), | ||
c.to_value(res.subject), | ||
c.identify(res.subject), | ||
) | ||
elif type(res) in (tuple, list): | ||
# GetMulti. | ||
values = [g.subject for g in res] | ||
products = [g.product for g in res] | ||
tag = 3 | ||
response.tag = self._lib.GetMulti | ||
response.get_multi = ( | ||
c.type_ids_buf([TypeId(c.to_id(g.product)) for g in res]), | ||
c.vals_buf([c.to_value(g.subject) for g in res]), | ||
c.identities_buf([c.identify(g.subject) for g in res]), | ||
) | ||
else: | ||
# Break. | ||
values = [res] | ||
products = [] | ||
tag = 0 | ||
response.tag = self._lib.Broke | ||
response.broke = (c.to_value(res),) | ||
except Exception as e: | ||
# Throw. | ||
response.tag = self._lib.Throw | ||
val = e | ||
val._formatted_exc = traceback.format_exc() | ||
values = [val] | ||
products = [] | ||
tag = 1 | ||
response.throw = (c.to_value(val),) | ||
|
||
return ( | ||
tag, | ||
c.vals_buf([c.to_value(v) for v in values]), | ||
c.type_ids_buf([TypeId(c.to_id(t)) for t in products]) | ||
) | ||
return response[0] | ||
|
||
@_extern_decl('PyResult', ['ExternContext*', 'Handle*', 'Handle**', 'uint64_t']) | ||
def extern_call(self, context_handle, func, args_ptr, args_len): | ||
|
@@ -521,6 +521,10 @@ def vals_buf(self, vals): | |
buf = self._ffi.new('Handle[]', vals) | ||
return (buf, len(vals), self.to_value(buf)) | ||
|
||
def identities_buf(self, idents): | ||
buf = self._ffi.new('Ident[]', idents) | ||
return (buf, len(idents), self.to_value(buf)) | ||
|
||
def type_ids_buf(self, types): | ||
buf = self._ffi.new('TypeId[]', types) | ||
return (buf, len(types), self.to_value(buf)) | ||
|
@@ -545,6 +549,12 @@ def raise_or_return(self, pyresult): | |
def drop_handles(self, handles): | ||
self._handles -= set(handles) | ||
|
||
def identify(self, obj): | ||
"""Return an Ident-shaped tuple for the given object.""" | ||
hash_ = hash(obj) | ||
type_id = self.to_id(type(obj)) | ||
return (hash_, TypeId(type_id)) | ||
|
||
def to_id(self, typ): | ||
type_id = id(typ) | ||
self._types[type_id] = typ | ||
|
@@ -589,14 +599,14 @@ def binary(self): | |
@memoized_property | ||
def lib(self): | ||
"""Load and return the native engine module.""" | ||
return self.ffi.dlopen(self.binary) | ||
lib = self.ffi.dlopen(self.binary) | ||
_FFISpecification(self.ffi, lib).register_cffi_externs() | ||
return lib | ||
|
||
@memoized_property | ||
def ffi(self): | ||
"""A CompiledCFFI handle as imported from the native engine python module.""" | ||
ffi = getattr(self._ffi_module, 'ffi') | ||
_FFISpecification(ffi).register_cffi_externs() | ||
return ffi | ||
return getattr(self._ffi_module, 'ffi') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Considering that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately, constructing the |
||
|
||
@memoized_property | ||
def ffi_lib(self): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -209,46 +209,36 @@ pub fn call(func: &Value, args: &[Value]) -> Result<Value, Failure> { | |
.into() | ||
} | ||
|
||
/// | ||
/// TODO: If we added support for inserting to the `Interns` using an `Ident`, `PyGeneratorResponse` | ||
/// could directly return `Idents` during `Get` calls. This would also require splitting its fields | ||
/// further to avoid needing to "identify" the result of a `PyGeneratorResponseType::Break`. | ||
/// | ||
pub fn generator_send(generator: &Value, arg: &Value) -> Result<GeneratorResponse, Failure> { | ||
let response = | ||
with_externs(|e| (e.generator_send)(e.context, generator as &Handle, arg as &Handle)); | ||
match response.res_type { | ||
PyGeneratorResponseType::Break => Ok(GeneratorResponse::Break(response.values.unwrap_one())), | ||
PyGeneratorResponseType::Throw => Err(PyResult::failure_from(response.values.unwrap_one())), | ||
PyGeneratorResponseType::Get => { | ||
match response { | ||
PyGeneratorResponse::Broke(h) => Ok(GeneratorResponse::Break(Value::new(h))), | ||
PyGeneratorResponse::Throw(h) => Err(PyResult::failure_from(Value::new(h))), | ||
PyGeneratorResponse::Get(product, handle, ident) => { | ||
let mut interns = INTERNS.write(); | ||
let p = response.products.unwrap_one(); | ||
let v = response.values.unwrap_one(); | ||
let g = Get { | ||
product: p, | ||
subject: interns.insert(v), | ||
product, | ||
subject: interns.insert_with(Value::new(handle), ident), | ||
}; | ||
Ok(GeneratorResponse::Get(g)) | ||
} | ||
PyGeneratorResponseType::GetMulti => { | ||
PyGeneratorResponse::GetMulti(products, handles, identities) => { | ||
let mut interns = INTERNS.write(); | ||
let PyGeneratorResponse { | ||
products: products_buf, | ||
values: values_buf, | ||
.. | ||
} = response; | ||
let products = products_buf.to_vec(); | ||
let values = values_buf.to_vec(); | ||
let products = products.to_vec(); | ||
let identities = identities.to_vec(); | ||
let values = handles.to_vec(); | ||
assert_eq!(products.len(), values.len()); | ||
let continues: Vec<Get> = products | ||
let gets: Vec<Get> = products | ||
.into_iter() | ||
.zip(values.into_iter()) | ||
.map(|(p, v)| Get { | ||
.zip(identities.into_iter()) | ||
.map(|((p, v), i)| Get { | ||
product: p, | ||
subject: interns.insert(v), | ||
subject: interns.insert_with(v, i), | ||
}) | ||
.collect(); | ||
Ok(GeneratorResponse::GetMulti(continues)) | ||
Ok(GeneratorResponse::GetMulti(gets)) | ||
} | ||
} | ||
} | ||
|
@@ -462,23 +452,18 @@ impl From<Result<(), String>> for PyResult { | |
} | ||
} | ||
|
||
// Only constructed from the python side. | ||
// TODO: map this into a C enum with cbindgen and consume from python instead of using magic numbers | ||
// in extern_generator_send() in native.py! | ||
#[allow(dead_code)] | ||
#[repr(u8)] | ||
pub enum PyGeneratorResponseType { | ||
Break = 0, | ||
Throw = 1, | ||
Get = 2, | ||
GetMulti = 3, | ||
} | ||
|
||
/// | ||
/// The response from a call to extern_generator_send. Gets include Idents for their Handles | ||
/// in order to avoid roundtripping to intern them, and to eagerly trigger errors for unhashable | ||
/// types on the python side where possible. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good explanation. |
||
/// | ||
#[repr(C)] | ||
pub struct PyGeneratorResponse { | ||
res_type: PyGeneratorResponseType, | ||
values: HandleBuffer, | ||
products: TypeIdBuffer, | ||
pub enum PyGeneratorResponse { | ||
Get(TypeId, Handle, Ident), | ||
GetMulti(TypeIdBuffer, HandleBuffer, IdentBuffer), | ||
// NB: Broke not Break because C keyword. | ||
Broke(Handle), | ||
Throw(Handle), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Much better!! Thank you for taking the time to figure out how to do this with cffi, it makes the code leagues more readable. It also obviates the need for that TODO without additional complexity in building the engine! |
||
} | ||
|
||
#[derive(Debug)] | ||
|
@@ -509,11 +494,43 @@ pub enum GeneratorResponse { | |
/// the object's type. | ||
/// | ||
#[repr(C)] | ||
#[derive(Clone, Copy)] | ||
pub struct Ident { | ||
pub hash: i64, | ||
pub type_id: TypeId, | ||
} | ||
|
||
pub trait RawBuffer<Raw, Output> { | ||
fn ptr(&self) -> *mut Raw; | ||
fn len(&self) -> u64; | ||
|
||
/// | ||
/// A buffer-specific shallow clone operation (possibly just implemented via clone). | ||
/// | ||
fn lift(t: &Raw) -> Output; | ||
|
||
/// | ||
/// Returns a Vec copy of the buffer contents. | ||
/// | ||
fn to_vec(&self) -> Vec<Output> { | ||
with_vec(self.ptr(), self.len() as usize, |vec| { | ||
vec.iter().map(Self::lift).collect() | ||
}) | ||
} | ||
|
||
/// | ||
/// Asserts that the buffer contains one item, and returns a copy of it. | ||
/// | ||
fn unwrap_one(&self) -> Output { | ||
assert!( | ||
self.len() == 1, | ||
"Expected exactly 1 item in Buffer, but had: {}", | ||
self.len() | ||
); | ||
with_vec(self.ptr(), self.len() as usize, |vec| Self::lift(&vec[0])) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason this doesn't delegate to fn unwrap_one(&self) -> Output {
assert_equal!(self.len(), 1, "Expected exactly 1 item in Buffer, but had: {}", self.len());
self.to_vec()[0]
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That means that by avoiding calling |
||
} | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is all really great! I really like that we can manipulate python collections in this structured, efficient, and safe way. |
||
/// | ||
/// Points to an array containing a series of values allocated by Python. | ||
/// | ||
|
@@ -529,34 +546,42 @@ pub struct HandleBuffer { | |
handle_: Handle, | ||
} | ||
|
||
impl HandleBuffer { | ||
pub fn to_vec(&self) -> Vec<Value> { | ||
with_vec(self.handles_ptr, self.handles_len as usize, |handle_vec| { | ||
handle_vec | ||
.iter() | ||
.map(|h| Value::new(unsafe { h.clone_shallow() })) | ||
.collect() | ||
}) | ||
impl RawBuffer<Handle, Value> for HandleBuffer { | ||
fn ptr(&self) -> *mut Handle { | ||
self.handles_ptr | ||
} | ||
|
||
/// | ||
/// Asserts that the HandleBuffer contains one value, and returns it. | ||
/// | ||
/// NB: Consider making generic and merging with TypeIdBuffer if we get a third copy. | ||
/// | ||
pub fn unwrap_one(&self) -> Value { | ||
assert!( | ||
self.handles_len == 1, | ||
"HandleBuffer contained more than one value: {}", | ||
self.handles_len | ||
); | ||
with_vec(self.handles_ptr, self.handles_len as usize, |handle_vec| { | ||
Value::new(unsafe { handle_vec.iter().next().unwrap().clone_shallow() }) | ||
}) | ||
fn len(&self) -> u64 { | ||
self.handles_len | ||
} | ||
|
||
fn lift(t: &Handle) -> Value { | ||
Value::new(unsafe { t.clone_shallow() }) | ||
} | ||
} | ||
|
||
#[repr(C)] | ||
pub struct IdentBuffer { | ||
idents_ptr: *mut Ident, | ||
idents_len: u64, | ||
// A Handle to hold the underlying array alive. | ||
handle_: Handle, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know this part isn't new, but could we add a comment on one of these fields explaining why There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's correct. Since CI is green here, I'm going to go ahead and merge this one, but can revisit. |
||
} | ||
|
||
impl RawBuffer<Ident, Ident> for IdentBuffer { | ||
fn ptr(&self) -> *mut Ident { | ||
self.idents_ptr | ||
} | ||
|
||
fn len(&self) -> u64 { | ||
self.idents_len | ||
} | ||
|
||
fn lift(t: &Ident) -> Ident { | ||
*t | ||
} | ||
} | ||
|
||
// Points to an array of TypeIds. | ||
#[repr(C)] | ||
pub struct TypeIdBuffer { | ||
ids_ptr: *mut TypeId, | ||
|
@@ -565,25 +590,17 @@ pub struct TypeIdBuffer { | |
handle_: Handle, | ||
} | ||
|
||
impl TypeIdBuffer { | ||
pub fn to_vec(&self) -> Vec<TypeId> { | ||
with_vec(self.ids_ptr, self.ids_len as usize, |vec| vec.clone()) | ||
impl RawBuffer<TypeId, TypeId> for TypeIdBuffer { | ||
fn ptr(&self) -> *mut TypeId { | ||
self.ids_ptr | ||
} | ||
|
||
/// | ||
/// Asserts that the TypeIdBuffer contains one TypeId, and returns it. | ||
/// | ||
/// NB: Consider making generic and merging with HandleBuffer if we get a third copy. | ||
/// | ||
pub fn unwrap_one(&self) -> TypeId { | ||
assert!( | ||
self.ids_len == 1, | ||
"TypeIdBuffer contained more than one value: {}", | ||
self.ids_len | ||
); | ||
with_vec(self.ids_ptr, self.ids_len as usize, |ids_vec| { | ||
*ids_vec.iter().next().unwrap() | ||
}) | ||
fn len(&self) -> u64 { | ||
self.ids_len | ||
} | ||
|
||
fn lift(t: &TypeId) -> TypeId { | ||
*t | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a fantastic change. Is this relying on mostly the rust C representation of enums (with
tag
and (in this case)get
fields), or is there some cffi magic going on to allow addressing unions in this deeply ergonomic way? Is there a link we could add documenting why this works to access our rust enums?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this case, I actually think we don't want to do this, as we probably want to colocate parsing the python response and producing the FFI object that represents it (so the below code is needless complexity for now). But I wrote this comment out anyway so I can link to it in #7367:
Could this part be moved into a helper method? Something like:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is mostly relying on the C API that is generated by cbindgen. Kudos on integrating that.