Skip to content

Commit da23b37

Browse files
authored
Merge pull request #1062 from godot-rust/feature/strings-from-cstr
`GString`, `StringName`: add conversions from bytes and C-strings
2 parents e2a7f05 + db41515 commit da23b37

File tree

10 files changed

+419
-14
lines changed

10 files changed

+419
-14
lines changed

godot-core/src/builtin/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ pub mod __prelude_reexport {
4040
pub use rect2i::*;
4141
pub use rid::*;
4242
pub use signal::*;
43-
pub use string::{GString, NodePath, StringName};
43+
pub use string::{Encoding, GString, NodePath, StringName};
4444
pub use transform2d::*;
4545
pub use transform3d::*;
4646
pub use variant::*;

godot-core/src/builtin/string/gstring.rs

+71-3
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,16 @@
66
*/
77

88
use std::convert::Infallible;
9-
use std::ffi::c_char;
109
use std::fmt;
1110
use std::fmt::Write;
1211

1312
use godot_ffi as sys;
1413
use sys::types::OpaqueString;
1514
use sys::{ffi_methods, interface_fn, GodotFfi};
1615

16+
use crate::builtin::string::Encoding;
1717
use crate::builtin::{inner, NodePath, StringName, Variant};
18+
use crate::meta::error::StringError;
1819
use crate::meta::AsArg;
1920
use crate::{impl_shared_string_api, meta};
2021

@@ -77,6 +78,73 @@ impl GString {
7778
Self::default()
7879
}
7980

81+
/// Convert string from bytes with given encoding, returning `Err` on validation errors.
82+
///
83+
/// Intermediate `NUL` characters are not accepted in Godot and always return `Err`.
84+
///
85+
/// Some notes on the encodings:
86+
/// - **Latin-1:** Since every byte is a valid Latin-1 character, no validation besides the `NUL` byte is performed.
87+
/// It is your responsibility to ensure that the input is valid Latin-1.
88+
/// - **ASCII**: Subset of Latin-1, which is additionally validated to be valid, non-`NUL` ASCII characters.
89+
/// - **UTF-8**: The input is validated to be UTF-8.
90+
///
91+
/// Specifying incorrect encoding is safe, but may result in unintended string values.
92+
pub fn try_from_bytes(bytes: &[u8], encoding: Encoding) -> Result<Self, StringError> {
93+
Self::try_from_bytes_with_nul_check(bytes, encoding, true)
94+
}
95+
96+
/// Convert string from C-string with given encoding, returning `Err` on validation errors.
97+
///
98+
/// Convenience function for [`try_from_bytes()`](Self::try_from_bytes); see its docs for more information.
99+
pub fn try_from_cstr(cstr: &std::ffi::CStr, encoding: Encoding) -> Result<Self, StringError> {
100+
Self::try_from_bytes_with_nul_check(cstr.to_bytes(), encoding, false)
101+
}
102+
103+
pub(super) fn try_from_bytes_with_nul_check(
104+
bytes: &[u8],
105+
encoding: Encoding,
106+
check_nul: bool,
107+
) -> Result<Self, StringError> {
108+
match encoding {
109+
Encoding::Ascii => {
110+
// If the bytes are ASCII, we can fall back to Latin-1, which is always valid (except for NUL).
111+
// is_ascii() does *not* check for the NUL byte, so the check in the Latin-1 branch is still necessary.
112+
if bytes.is_ascii() {
113+
Self::try_from_bytes_with_nul_check(bytes, Encoding::Latin1, check_nul)
114+
.map_err(|_e| StringError::new("intermediate NUL byte in ASCII string"))
115+
} else {
116+
Err(StringError::new("invalid ASCII"))
117+
}
118+
}
119+
Encoding::Latin1 => {
120+
// Intermediate NUL bytes are not accepted in Godot. Both ASCII + Latin-1 encodings need to explicitly check for this.
121+
if check_nul && bytes.contains(&0) {
122+
// Error overwritten when called from ASCII branch.
123+
return Err(StringError::new("intermediate NUL byte in Latin-1 string"));
124+
}
125+
126+
let s = unsafe {
127+
Self::new_with_string_uninit(|string_ptr| {
128+
let ctor = interface_fn!(string_new_with_latin1_chars_and_len);
129+
ctor(
130+
string_ptr,
131+
bytes.as_ptr() as *const std::ffi::c_char,
132+
bytes.len() as i64,
133+
);
134+
})
135+
};
136+
Ok(s)
137+
}
138+
Encoding::Utf8 => {
139+
// from_utf8() also checks for intermediate NUL bytes.
140+
let utf8 = std::str::from_utf8(bytes);
141+
142+
utf8.map(GString::from)
143+
.map_err(|e| StringError::with_source("invalid UTF-8", e))
144+
}
145+
}
146+
}
147+
80148
/// Number of characters in the string.
81149
///
82150
/// _Godot equivalent: `length`_
@@ -260,7 +328,7 @@ impl From<&str> for GString {
260328
let ctor = interface_fn!(string_new_with_utf8_chars_and_len);
261329
ctor(
262330
string_ptr,
263-
bytes.as_ptr() as *const c_char,
331+
bytes.as_ptr() as *const std::ffi::c_char,
264332
bytes.len() as i64,
265333
);
266334
})
@@ -307,7 +375,7 @@ impl From<&GString> for String {
307375

308376
interface_fn!(string_to_utf8_chars)(
309377
string.string_sys(),
310-
buf.as_mut_ptr() as *mut c_char,
378+
buf.as_mut_ptr() as *mut std::ffi::c_char,
311379
len,
312380
);
313381

godot-core/src/builtin/string/mod.rs

+14
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,20 @@ impl FromGodot for String {
5454
}
5555
}
5656

57+
// ----------------------------------------------------------------------------------------------------------------------------------------------
58+
// Encoding
59+
60+
/// Specifies string encoding.
61+
///
62+
/// Used in functions such as [`GString::try_from_bytes()`][GString::try_from_bytes] to handle multiple input string encodings.
63+
#[non_exhaustive]
64+
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
65+
pub enum Encoding {
66+
Ascii,
67+
Latin1,
68+
Utf8,
69+
}
70+
5771
// ----------------------------------------------------------------------------------------------------------------------------------------------
5872

5973
/// Returns a tuple of `(from, len)` from a Rust range.

godot-core/src/builtin/string/string_name.rs

+80-2
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44
* License, v. 2.0. If a copy of the MPL was not distributed with this
55
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
66
*/
7-
87
use std::fmt;
98

109
use godot_ffi as sys;
10+
use godot_ffi::interface_fn;
1111
use sys::{ffi_methods, GodotFfi};
1212

13-
use crate::builtin::{inner, GString, NodePath, Variant};
13+
use crate::builtin::{inner, Encoding, GString, NodePath, Variant};
14+
use crate::meta::error::StringError;
1415
use crate::meta::AsArg;
1516
use crate::{impl_shared_string_api, meta};
1617

@@ -60,6 +61,83 @@ impl StringName {
6061
Self { opaque }
6162
}
6263

64+
/// Convert string from bytes with given encoding, returning `Err` on validation errors.
65+
///
66+
/// Intermediate `NUL` characters are not accepted in Godot and always return `Err`.
67+
///
68+
/// Some notes on the encodings:
69+
/// - **Latin-1:** Since every byte is a valid Latin-1 character, no validation besides the `NUL` byte is performed.
70+
/// It is your responsibility to ensure that the input is valid Latin-1.
71+
/// - **ASCII**: Subset of Latin-1, which is additionally validated to be valid, non-`NUL` ASCII characters.
72+
/// - **UTF-8**: The input is validated to be UTF-8.
73+
///
74+
/// Specifying incorrect encoding is safe, but may result in unintended string values.
75+
pub fn try_from_bytes(bytes: &[u8], encoding: Encoding) -> Result<Self, StringError> {
76+
Self::try_from_bytes_with_nul_check(bytes, encoding, true)
77+
}
78+
79+
/// Convert string from bytes with given encoding, returning `Err` on validation errors.
80+
///
81+
/// Convenience function for [`try_from_bytes()`](Self::try_from_bytes); see its docs for more information.
82+
///
83+
/// When called with `Encoding::Latin1`, this can be slightly more efficient than `try_from_bytes()`.
84+
pub fn try_from_cstr(cstr: &std::ffi::CStr, encoding: Encoding) -> Result<Self, StringError> {
85+
// Short-circuit the direct Godot 4.2 function for Latin-1, which takes a null-terminated C string.
86+
#[cfg(since_api = "4.2")]
87+
if encoding == Encoding::Latin1 {
88+
// Note: CStr guarantees no intermediate NUL bytes, so we don't need to check for them.
89+
90+
let is_static = sys::conv::SYS_FALSE;
91+
let s = unsafe {
92+
Self::new_with_string_uninit(|string_ptr| {
93+
let ctor = interface_fn!(string_name_new_with_latin1_chars);
94+
ctor(
95+
string_ptr,
96+
cstr.as_ptr() as *const std::ffi::c_char,
97+
is_static,
98+
);
99+
})
100+
};
101+
return Ok(s);
102+
}
103+
104+
Self::try_from_bytes_with_nul_check(cstr.to_bytes(), encoding, false)
105+
}
106+
107+
fn try_from_bytes_with_nul_check(
108+
bytes: &[u8],
109+
encoding: Encoding,
110+
check_nul: bool,
111+
) -> Result<Self, StringError> {
112+
match encoding {
113+
Encoding::Ascii => {
114+
// ASCII is a subset of UTF-8, and UTF-8 has a more direct implementation than Latin-1; thus use UTF-8 via `From<&str>`.
115+
if !bytes.is_ascii() {
116+
Err(StringError::new("invalid ASCII"))
117+
} else if check_nul && bytes.contains(&0) {
118+
Err(StringError::new("intermediate NUL byte in ASCII string"))
119+
} else {
120+
// SAFETY: ASCII is a subset of UTF-8 and was verified above.
121+
let ascii = unsafe { std::str::from_utf8_unchecked(bytes) };
122+
Ok(Self::from(ascii))
123+
}
124+
}
125+
Encoding::Latin1 => {
126+
// This branch is short-circuited if invoked for CStr and Godot 4.2+, which uses `string_name_new_with_latin1_chars`
127+
// (requires nul-termination). In general, fall back to GString conversion.
128+
GString::try_from_bytes_with_nul_check(bytes, Encoding::Latin1, check_nul)
129+
.map(Self::from)
130+
}
131+
Encoding::Utf8 => {
132+
// from_utf8() also checks for intermediate NUL bytes.
133+
let utf8 = std::str::from_utf8(bytes);
134+
135+
utf8.map(StringName::from)
136+
.map_err(|e| StringError::with_source("invalid UTF-8", e))
137+
}
138+
}
139+
}
140+
63141
/// Number of characters in the string.
64142
///
65143
/// _Godot equivalent: `length`_

godot-core/src/meta/error/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
mod call_error;
1111
mod convert_error;
1212
mod io_error;
13+
mod string_error;
1314

1415
pub use call_error::*;
1516
pub use convert_error::*;
1617
pub use io_error::*;
18+
pub use string_error::*;
+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Copyright (c) godot-rust; Bromeon and contributors.
3+
* This Source Code Form is subject to the terms of the Mozilla Public
4+
* License, v. 2.0. If a copy of the MPL was not distributed with this
5+
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
6+
*/
7+
8+
use std::error::Error;
9+
use std::fmt;
10+
11+
/// Error related to string encoding/decoding.
12+
#[derive(Debug)]
13+
pub struct StringError {
14+
message: String,
15+
source: Option<Box<(dyn Error + 'static)>>,
16+
}
17+
18+
impl fmt::Display for StringError {
19+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
20+
if let Some(source) = self.source() {
21+
write!(f, "{}: {}", self.message, source)
22+
} else {
23+
write!(f, "{}", self.message)
24+
}
25+
}
26+
}
27+
28+
impl Error for StringError {
29+
fn source(&self) -> Option<&(dyn Error + 'static)> {
30+
self.source.as_deref()
31+
}
32+
}
33+
34+
impl StringError {
35+
pub(crate) fn new(message: impl Into<String>) -> Self {
36+
Self {
37+
message: message.into(),
38+
source: None,
39+
}
40+
}
41+
42+
pub(crate) fn with_source(
43+
message: impl Into<String>,
44+
source: impl Into<Box<(dyn Error + 'static)>>,
45+
) -> Self {
46+
Self {
47+
message: message.into(),
48+
source: Some(source.into()),
49+
}
50+
}
51+
}

itest/rust/src/builtin_tests/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ mod string {
3838
mod gstring_test;
3939
mod node_path_test;
4040
mod string_name_test;
41+
mod string_test_macros;
4142
}
4243

4344
mod script {

itest/rust/src/builtin_tests/string/gstring_test.rs

+21-7
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
use std::collections::HashSet;
99

1010
use crate::framework::{expect_debug_panic_or_release_ok, itest};
11-
use godot::builtin::{GString, PackedStringArray};
11+
use godot::builtin::{Encoding, GString, PackedStringArray};
1212

1313
// TODO use tests from godot-rust/gdnative
1414

@@ -150,7 +150,7 @@ fn string_substr() {
150150
}
151151

152152
#[itest]
153-
fn string_find() {
153+
fn gstring_find() {
154154
let s = GString::from("Hello World");
155155

156156
assert_eq!(s.find("o"), Some(4));
@@ -171,7 +171,7 @@ fn string_find() {
171171
}
172172

173173
#[itest]
174-
fn string_split() {
174+
fn gstring_split() {
175175
let s = GString::from("Hello World");
176176
assert_eq!(s.split(" "), packed(&["Hello", "World"]));
177177
assert_eq!(
@@ -206,7 +206,7 @@ fn string_split() {
206206
}
207207

208208
#[itest]
209-
fn string_count() {
209+
fn gstring_count() {
210210
let s = GString::from("Long sentence with Sentry guns.");
211211
assert_eq!(s.count("sent", ..), 1);
212212
assert_eq!(s.count("en", 6..), 3);
@@ -224,7 +224,7 @@ fn string_count() {
224224
}
225225

226226
#[itest]
227-
fn string_erase() {
227+
fn gstring_erase() {
228228
let s = GString::from("Hello World");
229229
assert_eq!(s.erase(..), GString::new());
230230
assert_eq!(s.erase(4..4), s);
@@ -236,7 +236,7 @@ fn string_erase() {
236236
}
237237

238238
#[itest]
239-
fn string_insert() {
239+
fn gstring_insert() {
240240
let s = GString::from("H World");
241241
assert_eq!(s.insert(1, "i"), "Hi World".into());
242242
assert_eq!(s.insert(1, "ello"), "Hello World".into());
@@ -248,7 +248,7 @@ fn string_insert() {
248248
}
249249

250250
#[itest]
251-
fn string_pad() {
251+
fn gstring_pad() {
252252
let s = GString::from("123");
253253
assert_eq!(s.lpad(5, '0'), "00123".into());
254254
assert_eq!(s.lpad(2, ' '), "123".into());
@@ -266,7 +266,21 @@ fn string_pad() {
266266
assert_eq!(s.pad_zeros(2), "123.456".into());
267267
}
268268

269+
// Byte and C-string conversions.
270+
crate::generate_string_bytes_and_cstr_tests!(
271+
builtin: GString,
272+
tests: [
273+
gstring_from_bytes_ascii,
274+
gstring_from_cstr_ascii,
275+
gstring_from_bytes_latin1,
276+
gstring_from_cstr_latin1,
277+
gstring_from_bytes_utf8,
278+
gstring_from_cstr_utf8,
279+
]
280+
);
281+
269282
// ----------------------------------------------------------------------------------------------------------------------------------------------
283+
// Helpers
270284

271285
fn packed(strings: &[&str]) -> PackedStringArray {
272286
strings.iter().map(|&s| GString::from(s)).collect()

0 commit comments

Comments
 (0)