diff --git a/guide/src/features.md b/guide/src/features.md index d801e2dd1e4..48beb5d016f 100644 --- a/guide/src/features.md +++ b/guide/src/features.md @@ -59,7 +59,9 @@ The feature has some unfinished refinements and performance improvements. To hel ### `experimental-inspect` -This feature adds the `pyo3::inspect` module, as well as `IntoPy::type_output` and `FromPyObject::type_input` APIs to produce Python type "annotations" for Rust types. +This feature adds to the built binaries introspection data that can be then retrieved using the `pyo3-introspection` crate to generate [type stubs](https://typing.readthedocs.io/en/latest/source/stubs.html). + +Also, this feature adds the `pyo3::inspect` module, as well as `IntoPy::type_output` and `FromPyObject::type_input` APIs to produce Python type "annotations" for Rust types. This is a first step towards adding first-class support for generating type annotations automatically in PyO3, however work is needed to finish this off. All feedback and offers of help welcome on [issue #2454](https://github.com/PyO3/pyo3/issues/2454). diff --git a/pyo3-introspection/src/introspection.rs b/pyo3-introspection/src/introspection.rs index 3097f273c77..208a3fd87b9 100644 --- a/pyo3-introspection/src/introspection.rs +++ b/pyo3-introspection/src/introspection.rs @@ -86,74 +86,72 @@ fn find_introspection_chunks_in_binary_object(path: &Path) -> Result> .context("The built library is not valid or not supported by our binary parser")? { Object::Elf(elf) => find_introspection_chunks_in_elf(&elf, &library_content), - Object::Mach(Mach::Binary(matcho)) => { - find_introspection_chunks_in_matcho(&matcho, &library_content) + Object::Mach(Mach::Binary(macho)) => { + find_introspection_chunks_in_macho(&macho, &library_content) } Object::Mach(Mach::Fat(multi_arch)) => { for arch in &multi_arch { match arch? { - SingleArch::MachO(matcho) => { - return find_introspection_chunks_in_matcho(&matcho, &library_content) + SingleArch::MachO(macho) => { + return find_introspection_chunks_in_macho(&macho, &library_content) } SingleArch::Archive(_) => (), } } - bail!("No Match-o chunk found in the multi-arch Match-o container") + bail!("No Mach-o chunk found in the multi-arch Mach-o container") } Object::PE(pe) => find_introspection_chunks_in_pe(&pe, &library_content), _ => { - bail!("Only ELF, Match-o and PE containers can be introspected") + bail!("Only ELF, Mach-o and PE containers can be introspected") } } } fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Result> { - let pyo3_data_section_header = elf - .section_headers - .iter() - .find(|section| elf.shdr_strtab.get_at(section.sh_name).unwrap_or_default() == ".pyo3i0") - .context("No .pyo3i0 section found")?; - let sh_offset = - usize::try_from(pyo3_data_section_header.sh_offset).context("Section offset overflow")?; - let sh_size = - usize::try_from(pyo3_data_section_header.sh_size).context("Section len overflow")?; - if elf.is_64 { - read_section_with_ptr_and_len_64bits( - &library_content[sh_offset..sh_offset + sh_size], - 0, - library_content, - ) - } else { - read_section_with_ptr_and_len_32bits( - &library_content[sh_offset..sh_offset + sh_size], - 0, - library_content, - ) + let mut chunks = Vec::new(); + for sym in &elf.syms { + if is_introspection_symbol(elf.strtab.get_at(sym.st_name).unwrap_or_default()) { + let section_header = &elf.section_headers[sym.st_shndx]; + let data_offset = sym.st_value + section_header.sh_offset - section_header.sh_addr; + chunks.push(read_symbol_value_with_ptr_and_len( + &library_content[usize::try_from(data_offset).context("File offset overflow")?..], + 0, + library_content, + elf.is_64, + )?); + } } + Ok(chunks) } -fn find_introspection_chunks_in_matcho( - matcho: &MachO<'_>, +fn find_introspection_chunks_in_macho( + macho: &MachO<'_>, library_content: &[u8], ) -> Result> { - if !matcho.little_endian { - bail!("Only little endian Match-o binaries are supported"); + if !macho.little_endian { + bail!("Only little endian Mach-o binaries are supported"); } - let text_segment = matcho + + let sections = macho .segments - .iter() - .find(|s| s.segname == *b"__TEXT\0\0\0\0\0\0\0\0\0\0") - .context("No __TEXT segment found")?; - let (_, pyo3_data_section) = text_segment - .sections()? - .into_iter() - .find(|s| s.0.sectname == *b"__pyo3i0\0\0\0\0\0\0\0\0") - .context("No __pyo3i0 section found")?; - if matcho.is_64 { - read_section_with_ptr_and_len_64bits(pyo3_data_section, 0, library_content) - } else { - read_section_with_ptr_and_len_32bits(pyo3_data_section, 0, library_content) + .sections() + .flatten() + .map(|t| t.map(|s| s.0)) + .collect::, _>>()?; + let mut chunks = Vec::new(); + for (name, nlist) in macho.symbols().flatten() { + if is_introspection_symbol(name) { + let section = §ions[nlist.n_sect]; + let data_offset = nlist.n_value + u64::from(section.offset) - section.addr; + chunks.push(read_symbol_value_with_ptr_and_len( + &library_content[usize::try_from(data_offset).context("File offset overflow")?..], + 0, + library_content, + macho.is_64, + )?); + } } + Ok(chunks) } fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Result> { @@ -167,77 +165,63 @@ fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Resul .context(".rdata virtual_address overflow")? - usize::try_from(rdata_data_section.pointer_to_raw_data) .context(".rdata pointer_to_raw_data overflow")?; - let pyo3_data_section = pe - .sections - .iter() - .find(|section| section.name().unwrap_or_default() == ".pyo3i0") - .context("No .pyo3i0 section found")?; - let pyo3_data = pyo3_data_section - .data(library_content)? - .context("Not able to find the .pyo3i0 section content")?; - if pe.is_64 { - read_section_with_ptr_and_len_64bits(&pyo3_data, rdata_shift, library_content) - } else { - read_section_with_ptr_and_len_32bits(&pyo3_data, rdata_shift, library_content) + + let mut chunks = Vec::new(); + for export in &pe.exports { + if is_introspection_symbol(export.name.unwrap_or_default()) { + chunks.push(read_symbol_value_with_ptr_and_len( + &library_content[export.offset.context("No symbol offset")?..], + rdata_shift, + library_content, + pe.is_64, + )?); + } } + Ok(chunks) } -fn read_section_with_ptr_and_len_32bits( - slice: &[u8], +fn read_symbol_value_with_ptr_and_len( + value_slice: &[u8], shift: usize, full_library_content: &[u8], -) -> Result> { - slice - .chunks_exact(8) - .filter_map(|element| { - let (ptr, len) = element.split_at(4); - let ptr = match usize::try_from(u32::from_le_bytes(ptr.try_into().unwrap())) { - Ok(ptr) => ptr, - Err(e) => return Some(Err(e).context("Pointer overflow")), - }; - let len = match usize::try_from(u32::from_le_bytes(len.try_into().unwrap())) { - Ok(ptr) => ptr, - Err(e) => return Some(Err(e).context("Length overflow")), - }; - if ptr == 0 || len == 0 { - // Workaround for PE - return None; - } - Some( - serde_json::from_slice(&full_library_content[ptr - shift..ptr - shift + len]) - .context("Failed to parse introspection chunk"), - ) - }) - .collect() + is_64: bool, +) -> Result { + let (ptr, len) = if is_64 { + let (ptr, len) = value_slice[..16].split_at(8); + let ptr = usize::try_from(u64::from_le_bytes( + ptr.try_into().context("Too short symbol value")?, + )) + .context("Pointer overflow")?; + let len = usize::try_from(u64::from_le_bytes( + len.try_into().context("Too short symbol value")?, + )) + .context("Length overflow")?; + (ptr, len) + } else { + let (ptr, len) = value_slice[..8].split_at(4); + let ptr = usize::try_from(u32::from_le_bytes( + ptr.try_into().context("Too short symbol value")?, + )) + .context("Pointer overflow")?; + let len = usize::try_from(u32::from_le_bytes( + len.try_into().context("Too short symbol value")?, + )) + .context("Length overflow")?; + (ptr, len) + }; + let chunk = &full_library_content[ptr - shift..ptr - shift + len]; + serde_json::from_slice(chunk).with_context(|| { + format!( + "Failed to parse introspection chunk: '{}'", + String::from_utf8_lossy(chunk) + ) + }) } -fn read_section_with_ptr_and_len_64bits( - slice: &[u8], - shift: usize, - full_library_content: &[u8], -) -> Result> { - slice - .chunks_exact(16) - .filter_map(|element| { - let (ptr, len) = element.split_at(8); - let ptr = match usize::try_from(u64::from_le_bytes(ptr.try_into().unwrap())) { - Ok(ptr) => ptr, - Err(e) => return Some(Err(e).context("Pointer overflow")), - }; - let len = match usize::try_from(u64::from_le_bytes(len.try_into().unwrap())) { - Ok(ptr) => ptr, - Err(e) => return Some(Err(e).context("Length overflow")), - }; - if ptr == 0 || len == 0 { - // Workaround for PE - return None; - } - Some( - serde_json::from_slice(&full_library_content[ptr - shift..ptr - shift + len]) - .context("Failed to parse introspection chunk"), - ) - }) - .collect() +fn is_introspection_symbol(name: &str) -> bool { + name.strip_prefix('_') + .unwrap_or(name) + .starts_with("PYO3_INTROSPECTION_0_") } #[derive(Deserialize)] diff --git a/pyo3-macros-backend/src/introspection.rs b/pyo3-macros-backend/src/introspection.rs index 5a9baaf6bfc..f2f2492b39f 100644 --- a/pyo3-macros-backend/src/introspection.rs +++ b/pyo3-macros-backend/src/introspection.rs @@ -7,8 +7,9 @@ use crate::utils::PyO3CratePath; use proc_macro2::{Span, TokenStream}; -use quote::{format_ident, quote, ToTokens}; +use quote::{format_ident, quote}; use std::collections::hash_map::DefaultHasher; +use std::collections::HashMap; use std::hash::{Hash, Hasher}; use std::sync::atomic::{AtomicUsize, Ordering}; use syn::Ident; @@ -20,29 +21,24 @@ pub fn module_introspection_code<'a>( name: &str, members: impl IntoIterator, ) -> TokenStream { - let mut to_concat = Vec::new(); - to_concat.push(quote! { "{\"type\":\"module\",\"id\":\"" }); - to_concat.push(quote! { _PYO3_INTROSPECTION_ID }); - to_concat.push(quote! { "\",\"name\":\""}); - to_concat.push(quote! { #name }); - to_concat.push(quote! { "\",\"members\":["}); - let mut start = true; - for member in members { - if start { - start = false; - } else { - to_concat.push(quote! { "," }); - } - to_concat.push(quote! { "\"" }); - to_concat.push(quote! { - #member::_PYO3_INTROSPECTION_ID - }); - to_concat.push(quote! { "\"" }); - } - to_concat.push(quote! { "]}" }); - let stub = stub_section(quote! { - #pyo3_crate_path::impl_::concat::const_concat!(#(#to_concat , )*) - }); + let stub = IntrospectionNode::Map( + [ + ("type", IntrospectionNode::String("module")), + ("id", IntrospectionNode::IntrospectionId(None)), + ("name", IntrospectionNode::String(name)), + ( + "members", + IntrospectionNode::List( + members + .into_iter() + .map(|member| IntrospectionNode::IntrospectionId(Some(member))) + .collect(), + ), + ), + ] + .into(), + ) + .emit(pyo3_crate_path); let introspection_id = introspection_id_const(); quote! { #stub @@ -55,15 +51,15 @@ pub fn class_introspection_code( ident: &Ident, name: &str, ) -> TokenStream { - let mut to_concat = Vec::new(); - to_concat.push(quote! { "{\"type\":\"class\",\"id\":\"" }); - to_concat.push(quote! { #ident::_PYO3_INTROSPECTION_ID }); - to_concat.push(quote! { "\",\"name\":\""}); - to_concat.push(quote! { #name }); - to_concat.push(quote! { "\"}" }); - let stub = stub_section(quote! { - #pyo3_crate_path::impl_::concat::const_concat!(#(#to_concat , )*) - }); + let stub = IntrospectionNode::Map( + [ + ("type", IntrospectionNode::String("class")), + ("id", IntrospectionNode::IntrospectionId(Some(ident))), + ("name", IntrospectionNode::String(name)), + ] + .into(), + ) + .emit(pyo3_crate_path); let introspection_id = introspection_id_const(); quote! { #stub @@ -74,15 +70,15 @@ pub fn class_introspection_code( } pub fn function_introspection_code(pyo3_crate_path: &PyO3CratePath, name: &str) -> TokenStream { - let mut to_concat = Vec::new(); - to_concat.push(quote! { "{\"type\":\"function\",\"id\":\"" }); - to_concat.push(quote! { _PYO3_INTROSPECTION_ID }); - to_concat.push(quote! { "\",\"name\":\""}); - to_concat.push(quote! { #name }); - to_concat.push(quote! { "\"}" }); - let stub = stub_section(quote! { - #pyo3_crate_path::impl_::concat::const_concat!(#(#to_concat , )*) - }); + let stub = IntrospectionNode::Map( + [ + ("type", IntrospectionNode::String("function")), + ("id", IntrospectionNode::IntrospectionId(None)), + ("name", IntrospectionNode::String(name)), + ] + .into(), + ) + .emit(pyo3_crate_path); let introspection_id = introspection_id_const(); quote! { #stub @@ -90,19 +86,89 @@ pub fn function_introspection_code(pyo3_crate_path: &PyO3CratePath, name: &str) } } -fn stub_section(content: impl ToTokens) -> TokenStream { - let static_name = format_ident!("PYO3_INTRS_{}", unique_element_id()); - // #[no_mangle] is required to make sure some linkers like Linux ones do not mangle the section name too. - quote! { - const _: () = { - #[used] - #[cfg(not(target_family = "wasm"))] - #[cfg_attr(any(target_os = "macos", target_os = "ios", target_os = "tvos", target_os = "watchos"), link_section = "__TEXT,__pyo3i0")] - #[cfg_attr(not(any(target_os = "macos", target_os = "ios", target_os = "tvos", target_os = "watchos")), link_section = ".pyo3i0")] - #[no_mangle] - static #static_name: &'static str = #content; - }; +enum IntrospectionNode<'a> { + String(&'a str), + IntrospectionId(Option<&'a Ident>), + Map(HashMap<&'static str, IntrospectionNode<'a>>), + List(Vec>), +} + +impl IntrospectionNode<'_> { + fn emit(&self, pyo3_crate_path: &PyO3CratePath) -> TokenStream { + let mut content = Vec::new(); + self.add_to_serialization(&mut content); + + let static_name = format_ident!("PYO3_INTROSPECTION_0_{}", unique_element_id()); + // #[no_mangle] is required to make sure some linkers like Linux ones do not mangle the section name too. + quote! { + const _: () = { + #[used] + #[no_mangle] + static #static_name: &'static str = #pyo3_crate_path::impl_::concat::const_concat!(#(#content , )*); + }; + } + } + + fn add_to_serialization(&self, content: &mut Vec) { + match self { + Self::String(string) => { + let string = escape_json_string(string); + content.push(quote! { #string }); + } + Self::IntrospectionId(ident) => { + content.push(quote! { "\"" }); + content.push(if let Some(ident) = ident { + quote! { #ident::_PYO3_INTROSPECTION_ID} + } else { + quote! { _PYO3_INTROSPECTION_ID } + }); + content.push(quote! { "\"" }); + } + Self::Map(map) => { + content.push(quote! { "{" }); + for (i, (key, value)) in map.iter().enumerate() { + if i > 0 { + content.push(quote! { "," }); + } + let key = escape_json_string(key); + content.push(quote! { #key }); + content.push(quote! { ":" }); + value.add_to_serialization(content); + } + content.push(quote! { "}" }); + } + Self::List(list) => { + content.push(quote! { "[" }); + for (i, value) in list.iter().enumerate() { + if i > 0 { + content.push(quote! { "," }); + } + value.add_to_serialization(content); + } + content.push(quote! { "]" }); + } + } + } +} + +fn escape_json_string(s: &str) -> String { + let mut buffer = String::with_capacity(s.len() + 2); + buffer.push('"'); + for c in s.chars() { + match c { + '\\' => buffer.push_str("\\\\"), + '"' => buffer.push_str("\\\""), + c => { + if c < char::from(32) { + panic!("ASCII chars below 32 are not allowed") + } else { + buffer.push(c) + } + } + } } + buffer.push('"'); + buffer } fn introspection_id_const() -> TokenStream { diff --git a/pyo3-macros-backend/src/pyclass.rs b/pyo3-macros-backend/src/pyclass.rs index 5d5091b16f2..9000eff93a1 100644 --- a/pyo3-macros-backend/src/pyclass.rs +++ b/pyo3-macros-backend/src/pyclass.rs @@ -2253,7 +2253,7 @@ impl<'a> PyClassImplsBuilder<'a> { #[cfg(feature = "experimental-inspect")] fn impl_introspection(&self, ctx: &Ctx) -> TokenStream { - let Ctx { pyo3_path } = ctx; + let Ctx { pyo3_path, .. } = ctx; let name = get_class_python_name(self.cls, self.attr).to_string(); class_introspection_code(pyo3_path, self.cls, &name) } diff --git a/pytests/Cargo.toml b/pytests/Cargo.toml index 758764d8c1b..3ad3e7ab5ce 100644 --- a/pytests/Cargo.toml +++ b/pytests/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" publish = false [dependencies] -pyo3 = { path = "../", features = ["extension-module", "experimental-declarative-modules", "experimental-inspect"] } +pyo3 = { path = "../", features = ["extension-module", "experimental-inspect"] } [build-dependencies] pyo3-build-config = { path = "../pyo3-build-config" } diff --git a/pytests/src/pyclasses.rs b/pytests/src/pyclasses.rs index a6973feab2e..bfc0d8ec139 100644 --- a/pytests/src/pyclasses.rs +++ b/pytests/src/pyclasses.rs @@ -80,5 +80,7 @@ impl ClassWithDict { #[pymodule] pub mod pyclasses { #[pymodule_export] - use super::{AssertingBaseClass, ClassWithoutConstructor, EmptyClass, PyClassIter, ClassWithDict}; + use super::{ + AssertingBaseClass, ClassWithDict, ClassWithoutConstructor, EmptyClass, PyClassIter, + }; } diff --git a/pytests/stubs/pyclasses.pyi b/pytests/stubs/pyclasses.pyi index db688c368ef..86ae67e21f8 100644 --- a/pytests/stubs/pyclasses.pyi +++ b/pytests/stubs/pyclasses.pyi @@ -1,4 +1,5 @@ class AssertingBaseClass: ... +class ClassWithDict: ... class ClassWithoutConstructor: ... class EmptyClass: ... class PyClassIter: ...