diff --git a/core/src/ast/types.rs b/core/src/ast/types.rs index 74d50aa97..b2cfeaf2d 100644 --- a/core/src/ast/types.rs +++ b/core/src/ast/types.rs @@ -396,6 +396,8 @@ pub enum TypeName { pub enum StringEncoding { UnvalidatedUtf8, UnvalidatedUtf16, + /// The caller guarantees that they're passing valid UTF-8, under penalty of UB + Utf8, } impl TypeName { @@ -514,6 +516,11 @@ impl TypeName { )) .unwrap() } + TypeName::StrReference(lifetime, StringEncoding::Utf8) => syn::parse_str(&format!( + "{}str", + ReferenceDisplay(lifetime, &Mutability::Immutable) + )) + .unwrap(), TypeName::PrimitiveSlice(lifetime, mutability, name) => { let primitive_name = PRIMITIVE_TO_STRING.get(name).unwrap(); let formatted_str = format!( @@ -549,14 +556,16 @@ impl TypeName { let mutability = Mutability::from_syn(&r.mutability); let name = r.elem.to_token_stream().to_string(); - if name.starts_with("DiplomatStr") { + if name.starts_with("DiplomatStr") || name == "str" { if mutability.is_mutable() { - panic!("mutable `DiplomatStr*` references are disallowed"); + panic!("mutable string references are disallowed"); } if name == "DiplomatStr" { return TypeName::StrReference(lifetime, StringEncoding::UnvalidatedUtf8); } else if name == "DiplomatStr16" { return TypeName::StrReference(lifetime, StringEncoding::UnvalidatedUtf16); + } else if name == "str" { + return TypeName::StrReference(lifetime, StringEncoding::Utf8); } } if let syn::Type::Slice(slice) = &*r.elem { @@ -951,6 +960,13 @@ impl fmt::Display for TypeName { ReferenceDisplay(lifetime, &Mutability::Immutable) ) } + TypeName::StrReference(lifetime, StringEncoding::Utf8) => { + write!( + f, + "{}str", + ReferenceDisplay(lifetime, &Mutability::Immutable) + ) + } TypeName::PrimitiveSlice(lifetime, mutability, typ) => { write!(f, "{}[{typ}]", ReferenceDisplay(lifetime, mutability)) } diff --git a/feature_tests/c/include/BorrowedFields.h b/feature_tests/c/include/BorrowedFields.h index ee9a5bea1..54fdb6e0a 100644 --- a/feature_tests/c/include/BorrowedFields.h +++ b/feature_tests/c/include/BorrowedFields.h @@ -13,6 +13,7 @@ namespace capi { typedef struct BorrowedFields { DiplomatU16View a; DiplomatStringView b; + DiplomatStringView c; } BorrowedFields; #ifdef __cplusplus } // namespace capi diff --git a/feature_tests/c/include/MyString.h b/feature_tests/c/include/MyString.h index 63819bcc5..e470210da 100644 --- a/feature_tests/c/include/MyString.h +++ b/feature_tests/c/include/MyString.h @@ -21,6 +21,8 @@ extern "C" { MyString* MyString_new(const char* v_data, size_t v_len); +MyString* MyString_new_unsafe(const char* v_data, size_t v_len); + void MyString_set_str(MyString* self, const char* new_str_data, size_t new_str_len); void MyString_get_str(const MyString* self, DiplomatWriteable* writeable); diff --git a/feature_tests/c2/include/BorrowedFields.d.h b/feature_tests/c2/include/BorrowedFields.d.h index c3926bab1..1f04a5f87 100644 --- a/feature_tests/c2/include/BorrowedFields.d.h +++ b/feature_tests/c2/include/BorrowedFields.d.h @@ -16,6 +16,7 @@ extern "C" { typedef struct BorrowedFields { struct { const wchar_t* data; size_t len; } a; struct { const char* data; size_t len; } b; + struct { const char* data; size_t len; } c; } BorrowedFields; diff --git a/feature_tests/c2/include/MyString.h b/feature_tests/c2/include/MyString.h index f77306012..65cb66610 100644 --- a/feature_tests/c2/include/MyString.h +++ b/feature_tests/c2/include/MyString.h @@ -17,6 +17,8 @@ extern "C" { MyString* MyString_new(const char* v_data, size_t v_len); +MyString* MyString_new_unsafe(const char* v_data, size_t v_len); + void MyString_set_str(MyString* self, const char* new_str_data, size_t new_str_len); void MyString_get_str(const MyString* self, DiplomatWriteable* writeable); diff --git a/feature_tests/cpp/docs/source/lifetimes_ffi.rst b/feature_tests/cpp/docs/source/lifetimes_ffi.rst index d2656f5a5..51f2e5ae9 100644 --- a/feature_tests/cpp/docs/source/lifetimes_ffi.rst +++ b/feature_tests/cpp/docs/source/lifetimes_ffi.rst @@ -9,6 +9,9 @@ .. cpp:member:: std::string_view b + .. cpp:member:: std::string_view c + Warning: Setting ill-formed UTF-8 is undefined behavior (and may be memory-unsafe). + .. cpp:struct:: BorrowedFieldsReturning .. cpp:member:: std::string_view bytes diff --git a/feature_tests/cpp/docs/source/slices_ffi.rst b/feature_tests/cpp/docs/source/slices_ffi.rst index 62f0d90b7..2069704d0 100644 --- a/feature_tests/cpp/docs/source/slices_ffi.rst +++ b/feature_tests/cpp/docs/source/slices_ffi.rst @@ -17,6 +17,11 @@ .. cpp:function:: static MyString new_(const std::string_view v) + .. cpp:function:: static MyString new_unsafe(const std::string_view v) + + Warning: Passing ill-formed UTF-8 is undefined behavior (and may be memory-unsafe). + + .. cpp:function:: void set_str(const std::string_view new_str) diff --git a/feature_tests/cpp/include/BorrowedFields.h b/feature_tests/cpp/include/BorrowedFields.h index ee9a5bea1..54fdb6e0a 100644 --- a/feature_tests/cpp/include/BorrowedFields.h +++ b/feature_tests/cpp/include/BorrowedFields.h @@ -13,6 +13,7 @@ namespace capi { typedef struct BorrowedFields { DiplomatU16View a; DiplomatStringView b; + DiplomatStringView c; } BorrowedFields; #ifdef __cplusplus } // namespace capi diff --git a/feature_tests/cpp/include/BorrowedFields.hpp b/feature_tests/cpp/include/BorrowedFields.hpp index f86a88da4..8f873f13d 100644 --- a/feature_tests/cpp/include/BorrowedFields.hpp +++ b/feature_tests/cpp/include/BorrowedFields.hpp @@ -16,6 +16,7 @@ struct BorrowedFields { public: const diplomat::span a; std::string_view b; + std::string_view c; }; diff --git a/feature_tests/cpp/include/Foo.hpp b/feature_tests/cpp/include/Foo.hpp index 2f9c8c6b8..dae6f3f34 100644 --- a/feature_tests/cpp/include/Foo.hpp +++ b/feature_tests/cpp/include/Foo.hpp @@ -82,6 +82,6 @@ inline BorrowedFieldsReturning Foo::as_returning() const { } inline Foo Foo::extract_from_fields(BorrowedFields fields) { BorrowedFields diplomat_wrapped_struct_fields = fields; - return Foo(capi::Foo_extract_from_fields(capi::BorrowedFields{ .a = { diplomat_wrapped_struct_fields.a.data(), diplomat_wrapped_struct_fields.a.size() }, .b = { diplomat_wrapped_struct_fields.b.data(), diplomat_wrapped_struct_fields.b.size() } })); + return Foo(capi::Foo_extract_from_fields(capi::BorrowedFields{ .a = { diplomat_wrapped_struct_fields.a.data(), diplomat_wrapped_struct_fields.a.size() }, .b = { diplomat_wrapped_struct_fields.b.data(), diplomat_wrapped_struct_fields.b.size() }, .c = { diplomat_wrapped_struct_fields.c.data(), diplomat_wrapped_struct_fields.c.size() } })); } #endif diff --git a/feature_tests/cpp/include/MyString.h b/feature_tests/cpp/include/MyString.h index 63819bcc5..e470210da 100644 --- a/feature_tests/cpp/include/MyString.h +++ b/feature_tests/cpp/include/MyString.h @@ -21,6 +21,8 @@ extern "C" { MyString* MyString_new(const char* v_data, size_t v_len); +MyString* MyString_new_unsafe(const char* v_data, size_t v_len); + void MyString_set_str(MyString* self, const char* new_str_data, size_t new_str_len); void MyString_get_str(const MyString* self, DiplomatWriteable* writeable); diff --git a/feature_tests/cpp/include/MyString.hpp b/feature_tests/cpp/include/MyString.hpp index 81ec63698..a750a92d3 100644 --- a/feature_tests/cpp/include/MyString.hpp +++ b/feature_tests/cpp/include/MyString.hpp @@ -24,6 +24,7 @@ struct MyStringDeleter { class MyString { public: static MyString new_(const std::string_view v); + static MyString new_unsafe(const std::string_view v); void set_str(const std::string_view new_str); template void get_str_to_writeable(W& writeable) const; std::string get_str() const; @@ -41,6 +42,9 @@ class MyString { inline MyString MyString::new_(const std::string_view v) { return MyString(capi::MyString_new(v.data(), v.size())); } +inline MyString MyString::new_unsafe(const std::string_view v) { + return MyString(capi::MyString_new_unsafe(v.data(), v.size())); +} inline void MyString::set_str(const std::string_view new_str) { capi::MyString_set_str(this->inner.get(), new_str.data(), new_str.size()); } diff --git a/feature_tests/cpp2/include/BorrowedFields.d.h b/feature_tests/cpp2/include/BorrowedFields.d.h index c3926bab1..1f04a5f87 100644 --- a/feature_tests/cpp2/include/BorrowedFields.d.h +++ b/feature_tests/cpp2/include/BorrowedFields.d.h @@ -16,6 +16,7 @@ extern "C" { typedef struct BorrowedFields { struct { const wchar_t* data; size_t len; } a; struct { const char* data; size_t len; } b; + struct { const char* data; size_t len; } c; } BorrowedFields; diff --git a/feature_tests/cpp2/include/BorrowedFields.d.hpp b/feature_tests/cpp2/include/BorrowedFields.d.hpp index 403082d3d..bc01d1c47 100644 --- a/feature_tests/cpp2/include/BorrowedFields.d.hpp +++ b/feature_tests/cpp2/include/BorrowedFields.d.hpp @@ -14,6 +14,7 @@ struct BorrowedFields { std::wstring_view a; std::string_view b; + std::string_view c; inline capi::BorrowedFields AsFFI() const; inline static BorrowedFields FromFFI(capi::BorrowedFields c_struct); diff --git a/feature_tests/cpp2/include/BorrowedFields.hpp b/feature_tests/cpp2/include/BorrowedFields.hpp index ab15a35c4..d1c25f564 100644 --- a/feature_tests/cpp2/include/BorrowedFields.hpp +++ b/feature_tests/cpp2/include/BorrowedFields.hpp @@ -20,6 +20,8 @@ inline capi::BorrowedFields BorrowedFields::AsFFI() const { .a_size = a.size(), .b_data = b.data(), .b_size = b.size(), + .c_data = c.data(), + .c_size = c.size(), }; } @@ -27,6 +29,7 @@ inline BorrowedFields BorrowedFields::FromFFI(capi::BorrowedFields c_struct) { return BorrowedFields { .a = std::wstring_view(c_struct.a_data, c_struct.a_size), .b = std::string_view(c_struct.b_data, c_struct.b_size), + .c = std::string_view(c_struct.c_data, c_struct.c_size), }; } diff --git a/feature_tests/cpp2/include/MyString.d.hpp b/feature_tests/cpp2/include/MyString.d.hpp index 8c962c6bd..f1d014799 100644 --- a/feature_tests/cpp2/include/MyString.d.hpp +++ b/feature_tests/cpp2/include/MyString.d.hpp @@ -16,6 +16,8 @@ class MyString { inline static std::unique_ptr new_(std::string_view v); + inline static std::unique_ptr new_unsafe(std::string_view v); + inline void set_str(std::string_view new_str); inline std::string get_str() const; diff --git a/feature_tests/cpp2/include/MyString.h b/feature_tests/cpp2/include/MyString.h index f77306012..65cb66610 100644 --- a/feature_tests/cpp2/include/MyString.h +++ b/feature_tests/cpp2/include/MyString.h @@ -17,6 +17,8 @@ extern "C" { MyString* MyString_new(const char* v_data, size_t v_len); +MyString* MyString_new_unsafe(const char* v_data, size_t v_len); + void MyString_set_str(MyString* self, const char* new_str_data, size_t new_str_len); void MyString_get_str(const MyString* self, DiplomatWriteable* writeable); diff --git a/feature_tests/cpp2/include/MyString.hpp b/feature_tests/cpp2/include/MyString.hpp index fe983cc5a..8929e8b72 100644 --- a/feature_tests/cpp2/include/MyString.hpp +++ b/feature_tests/cpp2/include/MyString.hpp @@ -19,6 +19,12 @@ inline std::unique_ptr MyString::new_(std::string_view v) { return std::unique_ptr(MyString::FromFFI(result)); } +inline std::unique_ptr MyString::new_unsafe(std::string_view v) { + auto result = capi::MyString_new_unsafe(v.data(), + v.size()); + return std::unique_ptr(MyString::FromFFI(result)); +} + inline void MyString::set_str(std::string_view new_str) { capi::MyString_set_str(this->AsFFI(), new_str.data(), diff --git a/feature_tests/dart/lib/BorrowedFields.g.dart b/feature_tests/dart/lib/BorrowedFields.g.dart index 161811823..2ca852edb 100644 --- a/feature_tests/dart/lib/BorrowedFields.g.dart +++ b/feature_tests/dart/lib/BorrowedFields.g.dart @@ -8,6 +8,7 @@ part of 'lib.g.dart'; final class _BorrowedFieldsFfi extends ffi.Struct { external _SliceUtf16 a; external _SliceUtf8 b; + external _SliceUtf8 c; } final class BorrowedFields { @@ -38,15 +39,25 @@ final class BorrowedFields { _underlying.b._length = bView.length; } + String get c => Utf8Decoder().convert(_underlying.c._pointer.asTypedList(_underlying.c._length)); + set c(String c) { + ffi2.calloc.free(_underlying.c._pointer); + final cView = c.utf8View; + _underlying.c._pointer = cView.pointer(ffi2.calloc); + _underlying.c._length = cView.length; + } + @override bool operator ==(Object other) => other is BorrowedFields && other._underlying.a == _underlying.a && - other._underlying.b == _underlying.b; + other._underlying.b == _underlying.b && + other._underlying.c == _underlying.c; @override int get hashCode => Object.hashAll([ _underlying.a, _underlying.b, + _underlying.c, ]); } diff --git a/feature_tests/dart/lib/MyString.g.dart b/feature_tests/dart/lib/MyString.g.dart index 84ad2cf98..9139e73c6 100644 --- a/feature_tests/dart/lib/MyString.g.dart +++ b/feature_tests/dart/lib/MyString.g.dart @@ -27,6 +27,19 @@ final class MyString implements ffi.Finalizable { _capi Function(ffi.Pointer, ffi.Size)>>('MyString_new') .asFunction Function(ffi.Pointer, int)>(isLeaf: true); + factory MyString.unsafe(String v) { + final temp = ffi2.Arena(); + final vView = v.utf8View;; + final result = _MyString_new_unsafe(vView.pointer(temp), vView.length); + temp.releaseAll(); + return MyString._(result); + } + + // ignore: non_constant_identifier_names + static final _MyString_new_unsafe = + _capi Function(ffi.Pointer, ffi.Size)>>('MyString_new_unsafe') + .asFunction Function(ffi.Pointer, int)>(isLeaf: true); + void setStr(String newStr) { final temp = ffi2.Arena(); final newStrView = newStr.utf8View;; diff --git a/feature_tests/dotnet/Lib/Generated/MyString.cs b/feature_tests/dotnet/Lib/Generated/MyString.cs index fe9de531c..4fe6787ae 100644 --- a/feature_tests/dotnet/Lib/Generated/MyString.cs +++ b/feature_tests/dotnet/Lib/Generated/MyString.cs @@ -46,6 +46,23 @@ public static MyString New(string v) } } + /// + /// A MyString allocated on Rust side. + /// + public static MyString NewUnsafe(string v) + { + unsafe + { + byte[] vBuf = DiplomatUtils.StringToUtf8(v); + nuint vBufLength = (nuint)vBuf.Length; + fixed (byte* vBufPtr = vBuf) + { + Raw.MyString* retVal = Raw.MyString.NewUnsafe(vBufPtr, vBufLength); + return new MyString(retVal); + } + } + } + public void SetStr(string newStr) { unsafe diff --git a/feature_tests/dotnet/Lib/Generated/RawBorrowedFields.cs b/feature_tests/dotnet/Lib/Generated/RawBorrowedFields.cs index 5ade93639..0703943aa 100644 --- a/feature_tests/dotnet/Lib/Generated/RawBorrowedFields.cs +++ b/feature_tests/dotnet/Lib/Generated/RawBorrowedFields.cs @@ -19,4 +19,6 @@ public partial struct BorrowedFields public ushort[] a; public string b; + + public string c; } diff --git a/feature_tests/dotnet/Lib/Generated/RawMyString.cs b/feature_tests/dotnet/Lib/Generated/RawMyString.cs index 552f06a1c..0edbb3993 100644 --- a/feature_tests/dotnet/Lib/Generated/RawMyString.cs +++ b/feature_tests/dotnet/Lib/Generated/RawMyString.cs @@ -19,6 +19,9 @@ public partial struct MyString [DllImport(NativeLib, CallingConvention = CallingConvention.Cdecl, EntryPoint = "MyString_new", ExactSpelling = true)] public static unsafe extern MyString* New(byte* v, nuint vSz); + [DllImport(NativeLib, CallingConvention = CallingConvention.Cdecl, EntryPoint = "MyString_new_unsafe", ExactSpelling = true)] + public static unsafe extern MyString* NewUnsafe(ushort* v, nuint vSz); + [DllImport(NativeLib, CallingConvention = CallingConvention.Cdecl, EntryPoint = "MyString_set_str", ExactSpelling = true)] public static unsafe extern void SetStr(MyString* self, byte* newStr, nuint newStrSz); diff --git a/feature_tests/js/api/BorrowedFields.d.ts b/feature_tests/js/api/BorrowedFields.d.ts index f7f77c0a4..62555903c 100644 --- a/feature_tests/js/api/BorrowedFields.d.ts +++ b/feature_tests/js/api/BorrowedFields.d.ts @@ -4,4 +4,5 @@ export class BorrowedFields { a: string; b: string; + c: string; } diff --git a/feature_tests/js/api/BorrowedFields.js b/feature_tests/js/api/BorrowedFields.js index b9869f7fe..d6348ed32 100644 --- a/feature_tests/js/api/BorrowedFields.js +++ b/feature_tests/js/api/BorrowedFields.js @@ -11,5 +11,9 @@ export class BorrowedFields { const [ptr, size] = new Uint32Array(wasm.memory.buffer, underlying + 8, 2); return diplomatRuntime.readString8(wasm, ptr, size); })(); + this.c = (() => { + const [ptr, size] = new Uint32Array(wasm.memory.buffer, underlying + 16, 2); + return diplomatRuntime.readString8(wasm, ptr, size); + })(); } } diff --git a/feature_tests/js/api/Foo.js b/feature_tests/js/api/Foo.js index 82649f655..7210ea6ad 100644 --- a/feature_tests/js/api/Foo.js +++ b/feature_tests/js/api/Foo.js @@ -50,9 +50,12 @@ export class Foo { const buf_field_a_arg_fields = diplomatRuntime.DiplomatBuf.str16(wasm, field_a_arg_fields); const field_b_arg_fields = arg_fields["b"]; const buf_field_b_arg_fields = diplomatRuntime.DiplomatBuf.str8(wasm, field_b_arg_fields); - const diplomat_out = new Foo(wasm.Foo_extract_from_fields(buf_field_a_arg_fields.ptr, buf_field_a_arg_fields.size, buf_field_b_arg_fields.ptr, buf_field_b_arg_fields.size), true, [buf_field_a_arg_fields, buf_field_b_arg_fields]); + const field_c_arg_fields = arg_fields["c"]; + const buf_field_c_arg_fields = diplomatRuntime.DiplomatBuf.str8(wasm, field_c_arg_fields); + const diplomat_out = new Foo(wasm.Foo_extract_from_fields(buf_field_a_arg_fields.ptr, buf_field_a_arg_fields.size, buf_field_b_arg_fields.ptr, buf_field_b_arg_fields.size, buf_field_c_arg_fields.ptr, buf_field_c_arg_fields.size), true, [buf_field_a_arg_fields, buf_field_b_arg_fields, buf_field_c_arg_fields]); buf_field_a_arg_fields.garbageCollect(); buf_field_b_arg_fields.garbageCollect(); + buf_field_c_arg_fields.garbageCollect(); return diplomat_out; } } diff --git a/feature_tests/js/api/MyString.d.ts b/feature_tests/js/api/MyString.d.ts index 18c74e978..1383862fb 100644 --- a/feature_tests/js/api/MyString.d.ts +++ b/feature_tests/js/api/MyString.d.ts @@ -7,6 +7,10 @@ export class MyString { */ static new(v: string): MyString; + /** + */ + static new_unsafe(v: string): MyString; + /** */ set_str(new_str: string): void; diff --git a/feature_tests/js/api/MyString.js b/feature_tests/js/api/MyString.js index c2861ff72..34bc29c6b 100644 --- a/feature_tests/js/api/MyString.js +++ b/feature_tests/js/api/MyString.js @@ -22,6 +22,13 @@ export class MyString { return diplomat_out; } + static new_unsafe(arg_v) { + const buf_arg_v = diplomatRuntime.DiplomatBuf.str8(wasm, arg_v); + const diplomat_out = new MyString(wasm.MyString_new_unsafe(buf_arg_v.ptr, buf_arg_v.size), true, []); + buf_arg_v.free(); + return diplomat_out; + } + set_str(arg_new_str) { const buf_arg_new_str = diplomatRuntime.DiplomatBuf.str8(wasm, arg_new_str); wasm.MyString_set_str(this.underlying, buf_arg_new_str.ptr, buf_arg_new_str.size); diff --git a/feature_tests/js/docs/source/lifetimes_ffi.rst b/feature_tests/js/docs/source/lifetimes_ffi.rst index f1bdc6e86..3bb763315 100644 --- a/feature_tests/js/docs/source/lifetimes_ffi.rst +++ b/feature_tests/js/docs/source/lifetimes_ffi.rst @@ -9,6 +9,8 @@ .. js:attribute:: b + .. js:attribute:: c + .. js:class:: BorrowedFieldsReturning .. js:attribute:: bytes diff --git a/feature_tests/js/docs/source/slices_ffi.rst b/feature_tests/js/docs/source/slices_ffi.rst index c9c0dc565..8f2a1b980 100644 --- a/feature_tests/js/docs/source/slices_ffi.rst +++ b/feature_tests/js/docs/source/slices_ffi.rst @@ -13,6 +13,8 @@ .. js:function:: new(v) + .. js:function:: new_unsafe(v) + .. js:method:: set_str(new_str) .. js:method:: get_str() diff --git a/feature_tests/src/lifetimes.rs b/feature_tests/src/lifetimes.rs index 410ba106f..16f4f8785 100644 --- a/feature_tests/src/lifetimes.rs +++ b/feature_tests/src/lifetimes.rs @@ -10,6 +10,7 @@ pub mod ffi { pub struct BorrowedFields<'a> { a: &'a DiplomatStr16, b: &'a DiplomatStr, + c: &'a str, } pub struct BorrowedFieldsReturning<'a> { diff --git a/feature_tests/src/slices.rs b/feature_tests/src/slices.rs index ecbd4e473..f947b0483 100644 --- a/feature_tests/src/slices.rs +++ b/feature_tests/src/slices.rs @@ -11,6 +11,10 @@ mod ffi { Box::new(Self(String::from_utf8(v.to_owned()).unwrap())) } + pub fn new_unsafe(v: &str) -> Box { + Box::new(Self(v.to_string())) + } + pub fn set_str(&mut self, new_str: &DiplomatStr) { self.0 = String::from_utf8(new_str.to_owned()).unwrap(); } diff --git a/macro/src/lib.rs b/macro/src/lib.rs index d2ddb3d58..dfa040629 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -17,13 +17,17 @@ fn gen_params_at_boundary(param: &ast::Param, expanded_params: &mut Vec) match ¶m.ty { ast::TypeName::StrReference( .., - ast::StringEncoding::UnvalidatedUtf8 | ast::StringEncoding::UnvalidatedUtf16, + ast::StringEncoding::UnvalidatedUtf8 + | ast::StringEncoding::UnvalidatedUtf16 + | ast::StringEncoding::Utf8, ) | ast::TypeName::PrimitiveSlice(..) => { let data_type = if let ast::TypeName::PrimitiveSlice(.., prim) = ¶m.ty { ast::TypeName::Primitive(*prim).to_syn().to_token_stream() - } else if let ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf8) = - ¶m.ty + } else if let ast::TypeName::StrReference( + _, + ast::StringEncoding::UnvalidatedUtf8 | ast::StringEncoding::Utf8, + ) = ¶m.ty { quote! { u8 } } else if let ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf16) = @@ -116,6 +120,9 @@ fn gen_params_invocation(param: &ast::Param, expanded_params: &mut Vec) { } }, } + } else if let ast::TypeName::StrReference(_, ast::StringEncoding::Utf8) = ¶m.ty { + // The FFI guarantees this, by either validating, or communicating this requirement to the user. + quote! {unsafe { core::str::from_utf8_unchecked(core::slice::from_raw_parts(#data_ident, #len_ident))} } } else { quote! { if #len_ident == 0 { diff --git a/macro/src/snapshots/diplomat__tests__multilevel_borrows.snap b/macro/src/snapshots/diplomat__tests__multilevel_borrows.snap index fdb74813a..f3ecb0ec5 100644 --- a/macro/src/snapshots/diplomat__tests__multilevel_borrows.snap +++ b/macro/src/snapshots/diplomat__tests__multilevel_borrows.snap @@ -26,8 +26,13 @@ mod ffi { #[no_mangle] extern "C" fn Baz_destroy<'x: 'y, 'y>(this: Box>) {} #[no_mangle] - extern "C" fn Foo_new<'a>(x: &'a str) -> Box> { - Foo::new(x) + extern "C" fn Foo_new<'a>(x_diplomat_data: *const u8, x_diplomat_len: usize) -> Box> { + Foo::new(unsafe { + core::str::from_utf8_unchecked(core::slice::from_raw_parts( + x_diplomat_data, + x_diplomat_len, + )) + }) } #[no_mangle] extern "C" fn Foo_get_bar<'a: 'b, 'b>(this: &'b Foo<'a>) -> Box> { diff --git a/tool/src/c/structs.rs b/tool/src/c/structs.rs index 8bd25f230..da7c641e3 100644 --- a/tool/src/c/structs.rs +++ b/tool/src/c/structs.rs @@ -82,7 +82,11 @@ pub fn gen_method( write!(out, ", ")?; } - if let ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf8) = ¶m.ty { + if let ast::TypeName::StrReference( + _, + ast::StringEncoding::UnvalidatedUtf8 | ast::StringEncoding::Utf8, + ) = ¶m.ty + { write!(out, "const char* {0}_data, size_t {0}_len", param.name)?; } else if let ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf16) = ¶m.ty diff --git a/tool/src/c/types.rs b/tool/src/c/types.rs index cac28b6b6..60cff2758 100644 --- a/tool/src/c/types.rs +++ b/tool/src/c/types.rs @@ -53,9 +53,10 @@ pub fn gen_type( } ast::TypeName::Writeable => write!(out, "DiplomatWriteable")?, - ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf8) => { - write!(out, "DiplomatStringView")? - } + ast::TypeName::StrReference( + _, + ast::StringEncoding::UnvalidatedUtf8 | ast::StringEncoding::Utf8, + ) => write!(out, "DiplomatStringView")?, ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf16) => { write!(out, "DiplomatU16View")? } @@ -102,9 +103,10 @@ pub fn name_for_type(typ: &ast::TypeName) -> ast::Ident { name_for_type(err) )), ast::TypeName::Writeable => ast::Ident::from("writeable"), - ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf8) => { - ast::Ident::from("str_ref8") - } + ast::TypeName::StrReference( + _, + ast::StringEncoding::UnvalidatedUtf8 | ast::StringEncoding::Utf8, + ) => ast::Ident::from("str_ref8"), ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf16) => { ast::Ident::from("str_ref16") } diff --git a/tool/src/c2/formatter.rs b/tool/src/c2/formatter.rs index ade7a10f6..cebfeb93e 100644 --- a/tool/src/c2/formatter.rs +++ b/tool/src/c2/formatter.rs @@ -110,6 +110,7 @@ impl<'tcx> CFormatter<'tcx> { Type::Struct(s) => self.fmt_type_name(P::id_for_path(s)), Type::Enum(e) => self.fmt_type_name(e.tcx_id.into()), Type::Slice(hir::Slice::Str(_, StringEncoding::UnvalidatedUtf8)) => "str_ref8".into(), + Type::Slice(hir::Slice::Str(_, StringEncoding::Utf8)) => "str_refv8".into(), Type::Slice(hir::Slice::Str(_, StringEncoding::UnvalidatedUtf16)) => "str_ref16".into(), Type::Slice(hir::Slice::Primitive(borrow, p)) => { let constness = borrow.mutability.if_mut_else("", "const_"); diff --git a/tool/src/c2/ty.rs b/tool/src/c2/ty.rs index b8115ce0b..4cc8df77b 100644 --- a/tool/src/c2/ty.rs +++ b/tool/src/c2/ty.rs @@ -237,7 +237,10 @@ impl<'ccx, 'tcx: 'ccx, 'header> TyGenContext<'ccx, 'tcx, 'header> { ) -> Vec<(Cow<'ccx, str>, Cow<'a, str>)> { let param_name = self.cx.formatter.fmt_param_name(ident); match ty { - Type::Slice(hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf8)) if !is_struct => { + Type::Slice(hir::Slice::Str( + _, + hir::StringEncoding::UnvalidatedUtf8 | hir::StringEncoding::Utf8, + )) if !is_struct => { vec![ ("const char*".into(), format!("{param_name}_data").into()), ("size_t".into(), format!("{param_name}_len").into()), @@ -322,7 +325,10 @@ impl<'ccx, 'tcx: 'ccx, 'header> TyGenContext<'ccx, 'tcx, 'header> { } Type::Slice(ref s) => { let ptr_ty = match s { - hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf8) => "char".into(), + hir::Slice::Str( + _, + hir::StringEncoding::UnvalidatedUtf8 | hir::StringEncoding::Utf8, + ) => "char".into(), hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf16) => "wchar_t".into(), hir::Slice::Primitive(_, prim) => self.cx.formatter.fmt_primitive_as_c(*prim), &_ => unreachable!("unknown AST/HIR variant"), diff --git a/tool/src/cpp/conversions.rs b/tool/src/cpp/conversions.rs index 9ea3e5696..77b7bdb86 100644 --- a/tool/src/cpp/conversions.rs +++ b/tool/src/cpp/conversions.rs @@ -203,7 +203,10 @@ pub fn gen_rust_to_cpp( todo!("Returning references from Rust to C++ is not currently supported") } ast::TypeName::Writeable => panic!("Returning writeables is not supported"), - ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf8) => { + ast::TypeName::StrReference( + _, + ast::StringEncoding::UnvalidatedUtf8 | ast::StringEncoding::Utf8, + ) => { let raw_value_id = format!("diplomat_str_raw_{path}"); writeln!(out, "capi::DiplomatStringView {raw_value_id} = {cpp};").unwrap(); diff --git a/tool/src/cpp/docs.rs b/tool/src/cpp/docs.rs index 28bd73eca..4eae1c6ac 100644 --- a/tool/src/cpp/docs.rs +++ b/tool/src/cpp/docs.rs @@ -199,8 +199,24 @@ pub fn gen_method_docs( writeln!(out)?; - let docs = + let mut docs = gen_docs_and_lifetime_notes_markdown(method, docs_url_gen, ast::MarkdownStyle::RstCompat); + + if method.params.iter().any(|p| { + matches!( + p, + ast::Param { + ty: ast::TypeName::StrReference(_, ast::StringEncoding::Utf8), + .. + } + ) + }) { + write!( + docs, + "\nWarning: Passing ill-formed UTF-8 is undefined behavior (and may be memory-unsafe)." + )?; + } + if !docs.is_empty() { CppRst::from_markdown(&docs, in_path, env, &mut indented(out).with_str(" "))?; } @@ -257,16 +273,36 @@ pub fn gen_field_docs( let ty_name = gen_type(&field.1, in_path, None, env, library_config, true)?; writeln!(out, ".. cpp:member:: {} {}", ty_name, field.0)?; - if !field.2.is_empty() { + let has_doc = !field.2.is_empty(); + let has_ub_warning = matches!( + field.1, + ast::TypeName::StrReference(_, ast::StringEncoding::Utf8) + ); + + if has_doc || has_ub_warning { let mut field_indented = indented(out).with_str(" "); - CppRst::from_markdown( - &field - .2 - .to_markdown(docs_url_gen, ast::MarkdownStyle::RstCompat), - in_path, - env, - &mut field_indented, - )?; + if has_doc { + CppRst::from_markdown( + &field + .2 + .to_markdown(docs_url_gen, ast::MarkdownStyle::RstCompat), + in_path, + env, + &mut field_indented, + )?; + } + + if has_doc && has_ub_warning { + writeln!(field_indented)?; + } + + if has_ub_warning { + write!( + field_indented, + "Warning: Setting ill-formed UTF-8 is undefined behavior (and may be memory-unsafe)." + )?; + } + writeln!(field_indented)?; } diff --git a/tool/src/cpp/types.rs b/tool/src/cpp/types.rs index abf5a6ab6..ddeddd033 100644 --- a/tool/src/cpp/types.rs +++ b/tool/src/cpp/types.rs @@ -155,7 +155,10 @@ fn gen_type_inner( write!(out, "capi::DiplomatWriteable")?; } - ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf8) => { + ast::TypeName::StrReference( + _, + ast::StringEncoding::UnvalidatedUtf8 | ast::StringEncoding::Utf8, + ) => { let maybe_const = if in_struct { "" } else { "const " }; write!(out, "{maybe_const}{}", library_config.string_view.expr)?; } diff --git a/tool/src/cpp2/ty.rs b/tool/src/cpp2/ty.rs index 8fc240d31..f36aa6b7d 100644 --- a/tool/src/cpp2/ty.rs +++ b/tool/src/cpp2/ty.rs @@ -454,9 +454,10 @@ impl<'ccx, 'tcx: 'ccx, 'header> TyGenContext<'ccx, 'tcx, 'header> { .insert(self.cx.formatter.fmt_impl_header_path(id)); type_name } - Type::Slice(hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf8)) => { - self.cx.formatter.fmt_borrowed_utf8_str() - } + Type::Slice(hir::Slice::Str( + _, + hir::StringEncoding::UnvalidatedUtf8 | hir::StringEncoding::Utf8, + )) => self.cx.formatter.fmt_borrowed_utf8_str(), Type::Slice(hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf16)) => { self.cx.formatter.fmt_borrowed_utf16_str() } @@ -644,7 +645,10 @@ impl<'ccx, 'tcx: 'ccx, 'header> TyGenContext<'ccx, 'tcx, 'header> { // Note: The impl file is imported in gen_type_name(). format!("{type_name}::FromFFI({var_name})").into() } - Type::Slice(hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf8)) => { + Type::Slice(hir::Slice::Str( + _, + hir::StringEncoding::UnvalidatedUtf8 | hir::StringEncoding::Utf8, + )) => { let string_view = self.cx.formatter.fmt_borrowed_utf8_str(); format!("{string_view}({var_name}_data, {var_name}_size)").into() } diff --git a/tool/src/dart/mod.rs b/tool/src/dart/mod.rs index c8a1d3a43..0d1e3e71c 100644 --- a/tool/src/dart/mod.rs +++ b/tool/src/dart/mod.rs @@ -564,9 +564,10 @@ impl<'a, 'cx> TyGenContext<'a, 'cx> { } self.formatter.fmt_enum_as_ffi(cast).into() } - Type::Slice(hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf8)) => { - self.formatter.fmt_utf8_primitive().into() - } + Type::Slice(hir::Slice::Str( + _, + hir::StringEncoding::UnvalidatedUtf8 | hir::StringEncoding::Utf8, + )) => self.formatter.fmt_utf8_primitive().into(), Type::Slice(hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf16)) => { self.formatter.fmt_utf16_primitive().into() } @@ -650,9 +651,10 @@ impl<'a, 'cx> TyGenContext<'a, 'cx> { Type::Opaque(..) | Type::Struct(..) | Type::Enum(..) => { format!("{dart_name}._underlying").into() } - Type::Slice(hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf8)) => { - format!("{dart_name}.utf8View;").into() - } + Type::Slice(hir::Slice::Str( + _, + hir::StringEncoding::UnvalidatedUtf8 | hir::StringEncoding::Utf8, + )) => format!("{dart_name}.utf8View;").into(), Type::Slice(hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf16)) => { format!("{dart_name}.utf16View;").into() } @@ -706,7 +708,7 @@ impl<'a, 'cx> TyGenContext<'a, 'cx> { format!("{type_name}.values.firstWhere((v) => v._underlying == {var_name})").into() } // As we only get borrowed slices from the FFI, we always have to copy. - Type::Slice(hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf8)) => + Type::Slice(hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf8 | hir::StringEncoding::Utf8)) => format!("Utf8Decoder().convert({var_name}._pointer.asTypedList({var_name}._length))").into(), Type::Slice(hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf16)) => format!("core.String.fromCharCodes({var_name}._pointer.asTypedList({var_name}._length))").into(), @@ -763,9 +765,10 @@ impl<'a, 'cx> TyGenContext<'a, 'cx> { /// Generates a Dart helper class for a slice type. fn gen_slice(&mut self, slice: &hir::Slice) -> &'static str { let slice_ty = match slice { - hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf8) => { - self.formatter.fmt_utf8_slice_type() - } + hir::Slice::Str( + _, + hir::StringEncoding::UnvalidatedUtf8 | hir::StringEncoding::Utf8, + ) => self.formatter.fmt_utf8_slice_type(), hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf16) => { self.formatter.fmt_utf16_slice_type() } @@ -773,14 +776,11 @@ impl<'a, 'cx> TyGenContext<'a, 'cx> { _ => unreachable!("unknown AST/HIR variant"), }; - if self.helper_classes.contains_key(slice_ty) { - return slice_ty; - } - let ffi_type = match slice { - hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf8) => { - self.formatter.fmt_utf8_primitive() - } + hir::Slice::Str( + _, + hir::StringEncoding::UnvalidatedUtf8 | hir::StringEncoding::Utf8, + ) => self.formatter.fmt_utf8_primitive(), hir::Slice::Str(_, hir::StringEncoding::UnvalidatedUtf16) => { self.formatter.fmt_utf16_primitive() } diff --git a/tool/src/dotnet/types.rs b/tool/src/dotnet/types.rs index 5feb38b3c..56fcb0e60 100644 --- a/tool/src/dotnet/types.rs +++ b/tool/src/dotnet/types.rs @@ -49,7 +49,10 @@ pub fn gen_type_name( write!(out, "DiplomatWriteable") } - ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf8) => { + ast::TypeName::StrReference( + _, + ast::StringEncoding::UnvalidatedUtf8 | ast::StringEncoding::Utf8, + ) => { write!(out, "string") } @@ -112,9 +115,10 @@ pub fn name_for_type(typ: &ast::TypeName) -> ast::Ident { ast::Ident::from(format!("Result{}{}", name_for_type(ok), name_for_type(err))) } ast::TypeName::Writeable => ast::Ident::from("Writeable"), - ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf8) => { - ast::Ident::from("StrRef8") - } + ast::TypeName::StrReference( + _, + ast::StringEncoding::UnvalidatedUtf8 | ast::StringEncoding::Utf8, + ) => ast::Ident::from("StrRef8"), ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf16) => { ast::Ident::from("RefMutPrimSliceU16") } diff --git a/tool/src/js/conversions.rs b/tool/src/js/conversions.rs index 283e6f460..3d99496a0 100644 --- a/tool/src/js/conversions.rs +++ b/tool/src/js/conversions.rs @@ -128,7 +128,7 @@ pub fn gen_value_js_to_rust<'env>( pre_logic.push(format!( "const {param_name_buf} = diplomatRuntime.DiplomatBuf.{}(wasm, {param_name});", match encoding { - ast::StringEncoding::UnvalidatedUtf8 => "str8", + ast::StringEncoding::UnvalidatedUtf8 | ast::StringEncoding::Utf8 => "str8", ast::StringEncoding::UnvalidatedUtf16 => "str16", _ => unreachable!("unknown AST/HIR variant"), } @@ -514,8 +514,8 @@ impl fmt::Display for InvocationIntoJs<'_> { ReturnTypeForm::Empty => unreachable!(), } } - ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf8) => self.display_slice(SliceKind::Str).fmt(f), - ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf16) => self.display_slice(SliceKind::Str16).fmt(f), + ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf8 | ast::StringEncoding::Utf8) => self.display_slice(SliceKind::Str).fmt(f), + ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf16) => self.display_slice(SliceKind::Str16).fmt(f), ast::TypeName::PrimitiveSlice(.., prim) => { self.display_slice(SliceKind::Primitive(prim.into())).fmt(f) } @@ -790,9 +790,10 @@ impl fmt::Display for UnderlyingIntoJs<'_> { todo!("Result in a buffer") } ast::TypeName::Writeable => todo!("Writeable in a buffer"), - ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf8) => { - self.display_slice(SliceKind::Str).fmt(f) - } + ast::TypeName::StrReference( + _, + ast::StringEncoding::UnvalidatedUtf8 | ast::StringEncoding::Utf8, + ) => self.display_slice(SliceKind::Str).fmt(f), ast::TypeName::StrReference(_, ast::StringEncoding::UnvalidatedUtf16) => { self.display_slice(SliceKind::Str16).fmt(f) } diff --git a/tool/src/js/structs.rs b/tool/src/js/structs.rs index f33dbcd81..a2530bce0 100644 --- a/tool/src/js/structs.rs +++ b/tool/src/js/structs.rs @@ -502,7 +502,9 @@ pub fn gen_ts_type( ast::TypeName::Writeable => unreachable!(), ast::TypeName::StrReference( _, - ast::StringEncoding::UnvalidatedUtf8 | ast::StringEncoding::UnvalidatedUtf16, + ast::StringEncoding::UnvalidatedUtf8 + | ast::StringEncoding::UnvalidatedUtf16 + | ast::StringEncoding::Utf8, ) => out.write_str("string")?, ast::TypeName::PrimitiveSlice(.., prim) => match prim { ast::PrimitiveType::i8 => write!(out, "Int8Array")?,