From e9c53be2e887e6c6715202a6a62696d8781e86a2 Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Thu, 4 Apr 2019 13:58:13 -0700 Subject: [PATCH 1/5] Extract a RawBuffer trait for reuse in an upcoming new buffer. --- src/rust/engine/src/externs.rs | 88 +++++++++++++++++++--------------- src/rust/engine/src/lib.rs | 2 +- 2 files changed, 50 insertions(+), 40 deletions(-) diff --git a/src/rust/engine/src/externs.rs b/src/rust/engine/src/externs.rs index f34534bc01e..b563fbcc6f6 100644 --- a/src/rust/engine/src/externs.rs +++ b/src/rust/engine/src/externs.rs @@ -514,6 +514,37 @@ pub struct Ident { pub type_id: TypeId, } +pub trait RawBuffer { + fn ptr(&self) -> *mut T; + fn len(&self) -> u64; + + /// + /// A buffer-specific shallow clone operation (possibly just implemented via clone). + /// + fn lift(t: &T) -> O; + + /// + /// Returns a Vec copy of the buffer contents. + /// + fn to_vec(&self) -> Vec { + with_vec(self.ptr(), self.len() as usize, |vec| { + vec.iter().map(Self::lift).collect() + }) + } + + /// + /// Asserts that the buffer contains one item, and returns a copy of it. + /// + fn unwrap_one(&self) -> O { + assert!( + self.len() == 1, + "Expected exactly 1 item in Buffer, but had: {}", + self.len() + ); + with_vec(self.ptr(), self.len() as usize, |vec| Self::lift(&vec[0])) + } +} + /// /// Points to an array containing a series of values allocated by Python. /// @@ -529,30 +560,17 @@ pub struct HandleBuffer { handle_: Handle, } -impl HandleBuffer { - pub fn to_vec(&self) -> Vec { - with_vec(self.handles_ptr, self.handles_len as usize, |handle_vec| { - handle_vec - .iter() - .map(|h| Value::new(unsafe { h.clone_shallow() })) - .collect() - }) +impl RawBuffer for HandleBuffer { + fn ptr(&self) -> *mut Handle { + self.handles_ptr } - /// - /// Asserts that the HandleBuffer contains one value, and returns it. - /// - /// NB: Consider making generic and merging with TypeIdBuffer if we get a third copy. - /// - pub fn unwrap_one(&self) -> Value { - assert!( - self.handles_len == 1, - "HandleBuffer contained more than one value: {}", - self.handles_len - ); - with_vec(self.handles_ptr, self.handles_len as usize, |handle_vec| { - Value::new(unsafe { handle_vec.iter().next().unwrap().clone_shallow() }) - }) + fn len(&self) -> u64 { + self.handles_len + } + + fn lift(t: &Handle) -> Value { + Value::new(unsafe { t.clone_shallow() }) } } @@ -565,25 +583,17 @@ pub struct TypeIdBuffer { handle_: Handle, } -impl TypeIdBuffer { - pub fn to_vec(&self) -> Vec { - with_vec(self.ids_ptr, self.ids_len as usize, |vec| vec.clone()) +impl RawBuffer for TypeIdBuffer { + fn ptr(&self) -> *mut TypeId { + self.ids_ptr } - /// - /// Asserts that the TypeIdBuffer contains one TypeId, and returns it. - /// - /// NB: Consider making generic and merging with HandleBuffer if we get a third copy. - /// - pub fn unwrap_one(&self) -> TypeId { - assert!( - self.ids_len == 1, - "TypeIdBuffer contained more than one value: {}", - self.ids_len - ); - with_vec(self.ids_ptr, self.ids_len as usize, |ids_vec| { - *ids_vec.iter().next().unwrap() - }) + fn len(&self) -> u64 { + self.ids_len + } + + fn lift(t: &TypeId) -> TypeId { + *t } } diff --git a/src/rust/engine/src/lib.rs b/src/rust/engine/src/lib.rs index 7b2243e25f0..06182dd9f51 100644 --- a/src/rust/engine/src/lib.rs +++ b/src/rust/engine/src/lib.rs @@ -58,7 +58,7 @@ use crate::externs::{ Buffer, BufferBuffer, CallExtern, CloneValExtern, CreateExceptionExtern, DropHandlesExtern, EqualsExtern, EvalExtern, ExternContext, Externs, GeneratorSendExtern, GetTypeForExtern, HandleBuffer, IdentifyExtern, LogExtern, ProjectIgnoringTypeExtern, ProjectMultiExtern, PyResult, - StoreBoolExtern, StoreBytesExtern, StoreF64Extern, StoreI64Extern, StoreTupleExtern, + RawBuffer, StoreBoolExtern, StoreBytesExtern, StoreF64Extern, StoreI64Extern, StoreTupleExtern, StoreUtf8Extern, TypeIdBuffer, TypeToStrExtern, ValToStrExtern, }; use crate::handles::Handle; From 701d79997b0a84d4f95ba09bbf0d5b921a33f939 Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Thu, 4 Apr 2019 14:40:06 -0700 Subject: [PATCH 2/5] Add a test that unhashable Get params do not trigger panics. --- .../pants_test/engine/test_scheduler.py | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/tests/python/pants_test/engine/test_scheduler.py b/tests/python/pants_test/engine/test_scheduler.py index ad49a6a5788..6a55f5de372 100644 --- a/tests/python/pants_test/engine/test_scheduler.py +++ b/tests/python/pants_test/engine/test_scheduler.py @@ -108,10 +108,18 @@ def __init__(self, inner): @rule(A, [TypeCheckFailWrapper]) def a_typecheck_fail_test(wrapper): - # This `yield Get(A, B, ...)` will use the `nested_raise` rule defined above, but it won't get to - # the point of raising since the type check will fail at the Get. - supposedly_a = yield Get(A, B, wrapper.inner) - yield supposedly_a + # This `yield` would use the `nested_raise` rule, but it won't get to the point of raising since + # the type check will fail at the Get. + _ = yield Get(A, B, wrapper.inner) # noqa: F841 + yield A() + + +@rule(C, [TypeCheckFailWrapper]) +def c_unhashable(_): + # This `yield` would use the `nested_raise` rule, but it won't get to the point of raising since + # the hashability check will fail. + _ = yield Get(A, B, list()) # noqa: F841 + yield C() @contextmanager @@ -201,6 +209,7 @@ def rules(cls): RootRule(B), RootRule(TypeCheckFailWrapper), a_typecheck_fail_test, + c_unhashable, nested_raise, ] @@ -213,6 +222,12 @@ def test_get_type_match_failure(self): # `a_typecheck_fail_test` above expects `wrapper.inner` to be a `B`. self.scheduler.product_request(A, [Params(TypeCheckFailWrapper(A()))]) + def test_unhashable_failure(self): + """Test that unhashable Get(...) params result in a structured error.""" + expected_msg = """TypeError: unhashable type: 'list'""" + with assert_execution_error(self, expected_msg): + self.scheduler.product_request(C, [Params(TypeCheckFailWrapper(A()))]) + def test_trace_includes_rule_exception_traceback(self): # Execute a request that will trigger the nested raise, and then directly inspect its trace. request = self.scheduler.execution_request([A], [B()]) From b5e083d86ec1b7c15e56ca0af4d2ab371fec8ed0 Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Thu, 4 Apr 2019 14:43:08 -0700 Subject: [PATCH 3/5] Identify all Get params before returning them in extern_generator_send. --- src/python/pants/engine/native.py | 19 +++++++++++--- src/rust/engine/src/externs.rs | 43 +++++++++++++++++++++++-------- src/rust/engine/src/interning.rs | 10 ++++--- 3 files changed, 55 insertions(+), 17 deletions(-) diff --git a/src/python/pants/engine/native.py b/src/python/pants/engine/native.py index 350f97fd72c..2f3d40745e6 100644 --- a/src/python/pants/engine/native.py +++ b/src/python/pants/engine/native.py @@ -295,9 +295,7 @@ def extern_identify(self, context_handle, val): """ c = self._ffi.from_handle(context_handle) obj = self._ffi.from_handle(val[0]) - hash_ = hash(obj) - type_id = c.to_id(type(obj)) - return (hash_, TypeId(type_id)) + return c.identify(obj) @_extern_decl('_Bool', ['ExternContext*', 'Handle*', 'Handle*']) def extern_equals(self, context_handle, val1, val2): @@ -424,16 +422,19 @@ def extern_generator_send(self, context_handle, func, arg): if isinstance(res, Get): # Get. values = [res.subject] + identities = [c.identify(res.subject)] products = [res.product] tag = 2 elif type(res) in (tuple, list): # GetMulti. values = [g.subject for g in res] + identities = [c.identify(g.subject) for g in res] products = [g.product for g in res] tag = 3 else: # Break. values = [res] + identities = [] products = [] tag = 0 except Exception as e: @@ -441,12 +442,14 @@ def extern_generator_send(self, context_handle, func, arg): val = e val._formatted_exc = traceback.format_exc() values = [val] + identities = [] products = [] tag = 1 return ( tag, c.vals_buf([c.to_value(v) for v in values]), + c.identities_buf(identities), c.type_ids_buf([TypeId(c.to_id(t)) for t in products]) ) @@ -521,6 +524,10 @@ def vals_buf(self, vals): buf = self._ffi.new('Handle[]', vals) return (buf, len(vals), self.to_value(buf)) + def identities_buf(self, idents): + buf = self._ffi.new('Ident[]', idents) + return (buf, len(idents), self.to_value(buf)) + def type_ids_buf(self, types): buf = self._ffi.new('TypeId[]', types) return (buf, len(types), self.to_value(buf)) @@ -545,6 +552,12 @@ def raise_or_return(self, pyresult): def drop_handles(self, handles): self._handles -= set(handles) + def identify(self, obj): + """Return an Ident-shaped tuple for the given object.""" + hash_ = hash(obj) + type_id = self.to_id(type(obj)) + return (hash_, TypeId(type_id)) + def to_id(self, typ): type_id = id(typ) self._types[type_id] = typ diff --git a/src/rust/engine/src/externs.rs b/src/rust/engine/src/externs.rs index b563fbcc6f6..38dd6590cf2 100644 --- a/src/rust/engine/src/externs.rs +++ b/src/rust/engine/src/externs.rs @@ -223,29 +223,27 @@ pub fn generator_send(generator: &Value, arg: &Value) -> Result { let mut interns = INTERNS.write(); let p = response.products.unwrap_one(); + let i = response.identities.unwrap_one(); let v = response.values.unwrap_one(); let g = Get { product: p, - subject: interns.insert(v), + subject: interns.insert_with(v, i), }; Ok(GeneratorResponse::Get(g)) } PyGeneratorResponseType::GetMulti => { let mut interns = INTERNS.write(); - let PyGeneratorResponse { - products: products_buf, - values: values_buf, - .. - } = response; - let products = products_buf.to_vec(); - let values = values_buf.to_vec(); + let products = response.products.to_vec(); + let identities = response.identities.to_vec(); + let values = response.values.to_vec(); assert_eq!(products.len(), values.len()); let continues: Vec = products .into_iter() .zip(values.into_iter()) - .map(|(p, v)| Get { + .zip(identities.into_iter()) + .map(|((p, v), i)| Get { product: p, - subject: interns.insert(v), + subject: interns.insert_with(v, i), }) .collect(); Ok(GeneratorResponse::GetMulti(continues)) @@ -478,6 +476,7 @@ pub enum PyGeneratorResponseType { pub struct PyGeneratorResponse { res_type: PyGeneratorResponseType, values: HandleBuffer, + identities: IdentBuffer, products: TypeIdBuffer, } @@ -509,6 +508,7 @@ pub enum GeneratorResponse { /// the object's type. /// #[repr(C)] +#[derive(Clone, Copy)] pub struct Ident { pub hash: i64, pub type_id: TypeId, @@ -574,7 +574,28 @@ impl RawBuffer for HandleBuffer { } } -// Points to an array of TypeIds. +#[repr(C)] +pub struct IdentBuffer { + idents_ptr: *mut Ident, + idents_len: u64, + // A Handle to hold the underlying array alive. + handle_: Handle, +} + +impl RawBuffer for IdentBuffer { + fn ptr(&self) -> *mut Ident { + self.idents_ptr + } + + fn len(&self) -> u64 { + self.idents_len + } + + fn lift(t: &Ident) -> Ident { + *t + } +} + #[repr(C)] pub struct TypeIdBuffer { ids_ptr: *mut TypeId, diff --git a/src/rust/engine/src/interning.rs b/src/rust/engine/src/interning.rs index f53956beb2e..868ac4d00ba 100644 --- a/src/rust/engine/src/interning.rs +++ b/src/rust/engine/src/interning.rs @@ -44,15 +44,19 @@ impl Interns { } pub fn insert(&mut self, v: Value) -> Key { - let Ident { hash, type_id } = externs::identify(&v); + let ident = externs::identify(&v); + self.insert_with(v, ident) + } + + pub fn insert_with(&mut self, v: Value, ident: Ident) -> Key { let mut inserted = false; let id_generator = self.id_generator; let key = *self .forward - .entry(InternKey(hash, v.clone())) + .entry(InternKey(ident.hash, v.clone())) .or_insert_with(|| { inserted = true; - Key::new(id_generator, type_id) + Key::new(id_generator, ident.type_id) }); if inserted { self.reverse.insert(key, v); From 9520bec18270a6f28c5fb0db94a8da9482356d3a Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Fri, 5 Apr 2019 21:55:40 -0700 Subject: [PATCH 4/5] Take advantage of cbindgen's ability to generate tagged unions! --- src/python/pants/engine/native.py | 51 +++++++++++++------------- src/rust/engine/src/externs.rs | 60 ++++++++++++------------------- 2 files changed, 47 insertions(+), 64 deletions(-) diff --git a/src/python/pants/engine/native.py b/src/python/pants/engine/native.py index 2f3d40745e6..14ae63b34ff 100644 --- a/src/python/pants/engine/native.py +++ b/src/python/pants/engine/native.py @@ -226,8 +226,9 @@ def wrapper(func): class _FFISpecification(object): - def __init__(self, ffi): + def __init__(self, ffi, lib): self._ffi = ffi + self._lib = lib @memoized_classproperty def _extern_fields(cls): @@ -417,41 +418,37 @@ def extern_create_exception(self, context_handle, msg_ptr, msg_len): def extern_generator_send(self, context_handle, func, arg): """Given a generator, send it the given value and return a response.""" c = self._ffi.from_handle(context_handle) + response = self._ffi.new('PyGeneratorResponse*') try: res = c.from_value(func[0]).send(c.from_value(arg[0])) if isinstance(res, Get): # Get. - values = [res.subject] - identities = [c.identify(res.subject)] - products = [res.product] - tag = 2 + response.tag = self._lib.Get + response.get = ( + TypeId(c.to_id(res.product)), + c.to_value(res.subject), + c.identify(res.subject), + ) elif type(res) in (tuple, list): # GetMulti. - values = [g.subject for g in res] - identities = [c.identify(g.subject) for g in res] - products = [g.product for g in res] - tag = 3 + response.tag = self._lib.GetMulti + response.get_multi = ( + c.type_ids_buf([TypeId(c.to_id(g.product)) for g in res]), + c.vals_buf([c.to_value(g.subject) for g in res]), + c.identities_buf([c.identify(g.subject) for g in res]), + ) else: # Break. - values = [res] - identities = [] - products = [] - tag = 0 + response.tag = self._lib.Broke + response.broke = (c.to_value(res),) except Exception as e: # Throw. + response.tag = self._lib.Throw val = e val._formatted_exc = traceback.format_exc() - values = [val] - identities = [] - products = [] - tag = 1 + response.throw = (c.to_value(val),) - return ( - tag, - c.vals_buf([c.to_value(v) for v in values]), - c.identities_buf(identities), - c.type_ids_buf([TypeId(c.to_id(t)) for t in products]) - ) + return response[0] @_extern_decl('PyResult', ['ExternContext*', 'Handle*', 'Handle**', 'uint64_t']) def extern_call(self, context_handle, func, args_ptr, args_len): @@ -602,14 +599,14 @@ def binary(self): @memoized_property def lib(self): """Load and return the native engine module.""" - return self.ffi.dlopen(self.binary) + lib = self.ffi.dlopen(self.binary) + _FFISpecification(self.ffi, lib).register_cffi_externs() + return lib @memoized_property def ffi(self): """A CompiledCFFI handle as imported from the native engine python module.""" - ffi = getattr(self._ffi_module, 'ffi') - _FFISpecification(ffi).register_cffi_externs() - return ffi + return getattr(self._ffi_module, 'ffi') @memoized_property def ffi_lib(self): diff --git a/src/rust/engine/src/externs.rs b/src/rust/engine/src/externs.rs index 38dd6590cf2..69b1067eb44 100644 --- a/src/rust/engine/src/externs.rs +++ b/src/rust/engine/src/externs.rs @@ -209,35 +209,27 @@ pub fn call(func: &Value, args: &[Value]) -> Result { .into() } -/// -/// TODO: If we added support for inserting to the `Interns` using an `Ident`, `PyGeneratorResponse` -/// could directly return `Idents` during `Get` calls. This would also require splitting its fields -/// further to avoid needing to "identify" the result of a `PyGeneratorResponseType::Break`. -/// pub fn generator_send(generator: &Value, arg: &Value) -> Result { let response = with_externs(|e| (e.generator_send)(e.context, generator as &Handle, arg as &Handle)); - match response.res_type { - PyGeneratorResponseType::Break => Ok(GeneratorResponse::Break(response.values.unwrap_one())), - PyGeneratorResponseType::Throw => Err(PyResult::failure_from(response.values.unwrap_one())), - PyGeneratorResponseType::Get => { + match response { + PyGeneratorResponse::Broke(h) => Ok(GeneratorResponse::Break(Value::new(h))), + PyGeneratorResponse::Throw(h) => Err(PyResult::failure_from(Value::new(h))), + PyGeneratorResponse::Get(product, handle, ident) => { let mut interns = INTERNS.write(); - let p = response.products.unwrap_one(); - let i = response.identities.unwrap_one(); - let v = response.values.unwrap_one(); let g = Get { - product: p, - subject: interns.insert_with(v, i), + product, + subject: interns.insert_with(Value::new(handle), ident), }; Ok(GeneratorResponse::Get(g)) } - PyGeneratorResponseType::GetMulti => { + PyGeneratorResponse::GetMulti(products, handles, identities) => { let mut interns = INTERNS.write(); - let products = response.products.to_vec(); - let identities = response.identities.to_vec(); - let values = response.values.to_vec(); + let products = products.to_vec(); + let identities = identities.to_vec(); + let values = handles.to_vec(); assert_eq!(products.len(), values.len()); - let continues: Vec = products + let gets: Vec = products .into_iter() .zip(values.into_iter()) .zip(identities.into_iter()) @@ -246,7 +238,7 @@ pub fn generator_send(generator: &Value, arg: &Value) -> Result> for PyResult { } } -// Only constructed from the python side. -// TODO: map this into a C enum with cbindgen and consume from python instead of using magic numbers -// in extern_generator_send() in native.py! -#[allow(dead_code)] -#[repr(u8)] -pub enum PyGeneratorResponseType { - Break = 0, - Throw = 1, - Get = 2, - GetMulti = 3, -} - +/// +/// The response from a call to extern_generator_send. Gets include Idents for their Handles +/// in order to avoid roundtripping to intern them, and to eagerly trigger errors for unhashable +/// types on the python side where possible. +/// #[repr(C)] -pub struct PyGeneratorResponse { - res_type: PyGeneratorResponseType, - values: HandleBuffer, - identities: IdentBuffer, - products: TypeIdBuffer, +pub enum PyGeneratorResponse { + Get(TypeId, Handle, Ident), + GetMulti(TypeIdBuffer, HandleBuffer, IdentBuffer), + // NB: Broke not Break because C keyword. + Broke(Handle), + Throw(Handle), } #[derive(Debug)] From b44073c5786055da9bbf83e9b5be7150e859995b Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Fri, 5 Apr 2019 22:12:37 -0700 Subject: [PATCH 5/5] Improve type parameter names. --- src/rust/engine/src/externs.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rust/engine/src/externs.rs b/src/rust/engine/src/externs.rs index 69b1067eb44..0563390b9eb 100644 --- a/src/rust/engine/src/externs.rs +++ b/src/rust/engine/src/externs.rs @@ -500,19 +500,19 @@ pub struct Ident { pub type_id: TypeId, } -pub trait RawBuffer { - fn ptr(&self) -> *mut T; +pub trait RawBuffer { + fn ptr(&self) -> *mut Raw; fn len(&self) -> u64; /// /// A buffer-specific shallow clone operation (possibly just implemented via clone). /// - fn lift(t: &T) -> O; + fn lift(t: &Raw) -> Output; /// /// Returns a Vec copy of the buffer contents. /// - fn to_vec(&self) -> Vec { + fn to_vec(&self) -> Vec { with_vec(self.ptr(), self.len() as usize, |vec| { vec.iter().map(Self::lift).collect() }) @@ -521,7 +521,7 @@ pub trait RawBuffer { /// /// Asserts that the buffer contains one item, and returns a copy of it. /// - fn unwrap_one(&self) -> O { + fn unwrap_one(&self) -> Output { assert!( self.len() == 1, "Expected exactly 1 item in Buffer, but had: {}",