-
-
Notifications
You must be signed in to change notification settings - Fork 221
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GString
, StringName
: add conversions from bytes and C-strings
#1062
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,6 +54,20 @@ impl FromGodot for String { | |
} | ||
} | ||
|
||
// ---------------------------------------------------------------------------------------------------------------------------------------------- | ||
// Encoding | ||
|
||
/// Specifies string encoding. | ||
/// | ||
/// Used in functions such as [`GString::try_from_bytes()`][GString::try_from_bytes] to handle multiple input string encodings. | ||
#[non_exhaustive] | ||
#[derive(Copy, Clone, Eq, PartialEq, Debug)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe include There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would lead to code such as: GString::try_from_bytes(bytes, Default::default()) which I find much less readable than: GString::try_from_bytes(bytes, Encoding::Utf8) Since UTF-8 isn't a 100% obvious default (Godot itself uses a mix of Latin-1 and UTF-32), I'd rather make this explicit. |
||
pub enum Encoding { | ||
Ascii, | ||
Latin1, | ||
Utf8, | ||
} | ||
|
||
// ---------------------------------------------------------------------------------------------------------------------------------------------- | ||
|
||
/// Returns a tuple of `(from, len)` from a Rust range. | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -4,13 +4,14 @@ | |||||||||||||||||||||||||||||||||||||||||||||||||||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||||||||||||||||||||||||||||||||||||||||||||||||||||
* file, You can obtain one at https://mozilla.org/MPL/2.0/. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
*/ | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
use std::fmt; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
use godot_ffi as sys; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
use godot_ffi::interface_fn; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
use sys::{ffi_methods, GodotFfi}; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
use crate::builtin::{inner, GString, NodePath, Variant}; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
use crate::builtin::{inner, Encoding, GString, NodePath, Variant}; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
use crate::meta::error::StringError; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
use crate::meta::AsArg; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
use crate::{impl_shared_string_api, meta}; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
|
@@ -60,6 +61,83 @@ impl StringName { | |||||||||||||||||||||||||||||||||||||||||||||||||||
Self { opaque } | ||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
/// Convert string from bytes with given encoding, returning `Err` on validation errors. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// Intermediate `NUL` characters are not accepted in Godot and always return `Err`. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// Some notes on the encodings: | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// - **Latin-1:** Since every byte is a valid Latin-1 character, no validation besides the `NUL` byte is performed. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// It is your responsibility to ensure that the input is valid Latin-1. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// - **ASCII**: Subset of Latin-1, which is additionally validated to be valid, non-`NUL` ASCII characters. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// - **UTF-8**: The input is validated to be UTF-8. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// Specifying incorrect encoding is safe, but may result in unintended string values. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
pub fn try_from_bytes(bytes: &[u8], encoding: Encoding) -> Result<Self, StringError> { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
Self::try_from_bytes_with_nul_check(bytes, encoding, true) | ||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
/// Convert string from bytes with given encoding, returning `Err` on validation errors. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// Convenience function for [`try_from_bytes()`](Self::try_from_bytes); see its docs for more information. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// When called with `Encoding::Latin1`, this can be slightly more efficient than `try_from_bytes()`. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
pub fn try_from_cstr(cstr: &std::ffi::CStr, encoding: Encoding) -> Result<Self, StringError> { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
// Short-circuit the direct Godot 4.2 function for Latin-1, which takes a null-terminated C string. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
#[cfg(since_api = "4.2")] | ||||||||||||||||||||||||||||||||||||||||||||||||||||
if encoding == Encoding::Latin1 { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
// Note: CStr guarantees no intermediate NUL bytes, so we don't need to check for them. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
let is_static = sys::conv::SYS_FALSE; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
let s = unsafe { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. missing safety comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Many such FFI accesses have nothing: gdext/godot-core/src/builtin/string/gstring.rs Lines 258 to 260 in d1a25b3
gdext/godot-core/src/obj/gd.rs Line 146 in d1a25b3
gdext/godot-core/src/builtin/callable.rs Lines 57 to 59 in d1a25b3
gdext/godot-core/src/builtin/signal.rs Lines 48 to 51 in d1a25b3
gdext/godot-core/src/builtin/variant/mod.rs Lines 179 to 182 in d1a25b3
Some have a comment that's not descriptive... gdext/godot-core/src/builtin/callable.rs Lines 232 to 234 in d1a25b3
...or only mentions one particular behavior, ignoring the wider safety: gdext/godot-core/src/builtin/string/gstring.rs Lines 273 to 275 in d1a25b3
gdext/godot-core/src/builtin/variant/mod.rs Lines 155 to 156 in d1a25b3
gdext/godot-core/src/classes/class_runtime.rs Lines 55 to 56 in d1a25b3
(Yes, I wrote many of those 😅 ) When calling low-level Godot functions, there's often not much that can be said beyond "passed pointer is valid", "Godot function pointer is initialized" or other repetitive points that hold for all low-level constructors. Lints that enforce safety comments are deliberately not turned on, as this would often lead to bureaucracy and likely decreased doc quality. Being more selective gives those That's at least how I see it 🙂 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i dont think i entirely agree. i do agree that just saying "godot function pointer is initialized" isn't really needed at the moment, that's something which falls under #1046 and so is already technically unsound anyway. but the other parts, mainly explaining what kind of data is expected and why it's valid to call the function with this data. that part i think is still useful. like here and the other place in this PR, you rely on a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How would you write the safety statement in this case? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. honestly i'd probably just make the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Edit: sorry I missed your intermediate response, took me a bit longer to formulate.
I agree with focusing on non-obvious cases, but here is a good example why a safety comment can be worse than none 🙂 the fact that it's valid Latin-1 isn't relevant for safety. It's only relevant for correctness -- and that part is explained in the RustDoc already:
And "valid" here means "correctly encoded text". If you pass UTF-8, you'll get garbage, but no UB. What makes the GDExtension call actually sound is the validity of the pointers and length. Since this is true for every pointer-accepting FFI call in existence, I don't see the value of repeating it every time. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
no? what godot decides to do with the data behind the pointers you pass it is important for safety. so when making an ffi call into godot you should (in the safety comment) explain why the data you are passing to godot wont cause UB when godot uses it. and in this case, that involves explaining that godot will treat this as a latin1 encoded string. so the fact that we are passing godot a latin1 encoded string, and that godot will treat it as a latin1 encoded string, that is what makes the call safe (in addition to the pointer validity and length). my safety comment may not express that as clearly, the previous paragraph is left kinda implied. my assumption was that it was already understood that the safety comment before an ffi-call explains what godot does with the input and why this input wont be UB in that case. a pointer being valid for reads only explains that godot will access the value, not what godot will subsequently do with the value. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I don't know the implementation. I know the signature, and from that I can infer that one pointer is the to-be-constructed string instance, and that the other (ptr, len) pair points to an allocated byte range representing the characters. I need to rely on Godot doing the expected thing. If I didn't, I'd need to study every single implementation. We don't do this for all the examples I showed above, let alone the thousands of individual class methods. We rely on the signature from Godot being a contract. I don't see why this here is different from my examples or class methods. Latin-1 is only relevant to mention if you can cause UB when not passing in Latin-1. |
||||||||||||||||||||||||||||||||||||||||||||||||||||
Self::new_with_string_uninit(|string_ptr| { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
let ctor = interface_fn!(string_name_new_with_latin1_chars); | ||||||||||||||||||||||||||||||||||||||||||||||||||||
ctor( | ||||||||||||||||||||||||||||||||||||||||||||||||||||
string_ptr, | ||||||||||||||||||||||||||||||||||||||||||||||||||||
cstr.as_ptr() as *const std::ffi::c_char, | ||||||||||||||||||||||||||||||||||||||||||||||||||||
is_static, | ||||||||||||||||||||||||||||||||||||||||||||||||||||
); | ||||||||||||||||||||||||||||||||||||||||||||||||||||
}) | ||||||||||||||||||||||||||||||||||||||||||||||||||||
}; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
return Ok(s); | ||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
Self::try_from_bytes_with_nul_check(cstr.to_bytes(), encoding, false) | ||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
fn try_from_bytes_with_nul_check( | ||||||||||||||||||||||||||||||||||||||||||||||||||||
bytes: &[u8], | ||||||||||||||||||||||||||||||||||||||||||||||||||||
encoding: Encoding, | ||||||||||||||||||||||||||||||||||||||||||||||||||||
check_nul: bool, | ||||||||||||||||||||||||||||||||||||||||||||||||||||
) -> Result<Self, StringError> { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
match encoding { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
Encoding::Ascii => { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
// ASCII is a subset of UTF-8, and UTF-8 has a more direct implementation than Latin-1; thus use UTF-8 via `From<&str>`. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
if !bytes.is_ascii() { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
Err(StringError::new("invalid ASCII")) | ||||||||||||||||||||||||||||||||||||||||||||||||||||
} else if check_nul && bytes.contains(&0) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
Err(StringError::new("intermediate NUL byte in ASCII string")) | ||||||||||||||||||||||||||||||||||||||||||||||||||||
} else { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
// SAFETY: ASCII is a subset of UTF-8 and was verified above. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
let ascii = unsafe { std::str::from_utf8_unchecked(bytes) }; | ||||||||||||||||||||||||||||||||||||||||||||||||||||
Ok(Self::from(ascii)) | ||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||
Encoding::Latin1 => { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
// This branch is short-circuited if invoked for CStr and Godot 4.2+, which uses `string_name_new_with_latin1_chars` | ||||||||||||||||||||||||||||||||||||||||||||||||||||
// (requires nul-termination). In general, fall back to GString conversion. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
GString::try_from_bytes_with_nul_check(bytes, Encoding::Latin1, check_nul) | ||||||||||||||||||||||||||||||||||||||||||||||||||||
.map(Self::from) | ||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||
Encoding::Utf8 => { | ||||||||||||||||||||||||||||||||||||||||||||||||||||
// from_utf8() also checks for intermediate NUL bytes. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
let utf8 = std::str::from_utf8(bytes); | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
utf8.map(StringName::from) | ||||||||||||||||||||||||||||||||||||||||||||||||||||
.map_err(|e| StringError::with_source("invalid UTF-8", e)) | ||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||
/// Number of characters in the string. | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// | ||||||||||||||||||||||||||||||||||||||||||||||||||||
/// _Godot equivalent: `length`_ | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
/* | ||
* Copyright (c) godot-rust; Bromeon and contributors. | ||
* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||
* file, You can obtain one at https://mozilla.org/MPL/2.0/. | ||
*/ | ||
|
||
use std::error::Error; | ||
use std::fmt; | ||
|
||
/// Error related to string encoding/decoding. | ||
#[derive(Debug)] | ||
pub struct StringError { | ||
message: String, | ||
source: Option<Box<(dyn Error + 'static)>>, | ||
} | ||
|
||
impl fmt::Display for StringError { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
if let Some(source) = self.source() { | ||
write!(f, "{}: {}", self.message, source) | ||
} else { | ||
write!(f, "{}", self.message) | ||
} | ||
} | ||
} | ||
|
||
impl Error for StringError { | ||
fn source(&self) -> Option<&(dyn Error + 'static)> { | ||
self.source.as_deref() | ||
} | ||
} | ||
|
||
impl StringError { | ||
pub(crate) fn new(message: impl Into<String>) -> Self { | ||
Self { | ||
message: message.into(), | ||
source: None, | ||
} | ||
} | ||
|
||
pub(crate) fn with_source( | ||
message: impl Into<String>, | ||
source: impl Into<Box<(dyn Error + 'static)>>, | ||
) -> Self { | ||
Self { | ||
message: message.into(), | ||
source: Some(source.into()), | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
missing safety comment