Skip to content

Commit

Permalink
Even nicer strings
Browse files Browse the repository at this point in the history
  • Loading branch information
laurmaedje committed Oct 1, 2023
1 parent c01bbc4 commit ae4c7c3
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 23 deletions.
84 changes: 65 additions & 19 deletions src/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,38 +64,80 @@ impl Primitive for f32 {

/// A string object (any byte sequence).
///
/// This is usually written as `(Thing)`. However, it falls back to hexadecimal
/// form (e.g. `<2829>` for the string `"()"`) if the byte sequence contains any
/// of the three ASCII characters `\`, `(` or `)`.
/// This is written as `(Thing)`.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Str<'a>(pub &'a [u8]);

impl Str<'_> {
/// Whether the parentheses in the byte string are balanced.
fn is_balanced(self) -> bool {
let mut depth = 0;
for &byte in self.0 {
match byte {
b'(' => depth += 1,
b')' if depth > 0 => depth -= 1,
b')' => return false,
_ => {}
}
}
depth == 0
}
}

impl Primitive for Str<'_> {
fn write(self, buf: &mut Vec<u8>) {
// Fall back to hex formatting if the string contains a:
// - backslash because it is used for escaping,
// - parenthesis because they are the delimiters,
// - carriage return (0x0D) because it would be silently
// transformed into a newline (0x0A).
if self.0.iter().any(|b| matches!(b, b'\\' | b'(' | b')' | b'\r')) {
// We use:
// - Literal strings for ASCII with nice escape sequences to make it
// also be represented fully in visible ASCII. We also escape
// parentheses because they are delimiters.
// - Hex strings for anything non-ASCII
if self.0.iter().all(|b| b.is_ascii()) {
buf.reserve(self.0.len());
buf.push(b'(');

let mut balanced = None;
for &byte in self.0 {
match byte {
b'(' | b')' => {
if !*balanced
.get_or_insert_with(|| byte != b')' && self.is_balanced())
{
buf.push(b'\\');
}
buf.push(byte);
}
b'\\' => buf.extend(br"\\"),
b' '..=b'~' => buf.push(byte),
b'\n' => buf.extend(br"\n"),
b'\r' => buf.extend(br"\r"),
b'\t' => buf.extend(br"\t"),
b'\x08' => buf.extend(br"\b"),
b'\x0c' => buf.extend(br"\f"),
_ => {
buf.push(b'\\');
buf.push_octal(byte);
}
}
}

buf.push(b')');
} else {
buf.reserve(2 + 2 * self.0.len());
buf.push(b'<');

for &byte in self.0 {
buf.push_hex(byte);
}

buf.push(b'>');
} else {
buf.push(b'(');
buf.extend(self.0);
buf.push(b')');
}
}
}

/// A unicode text string object.
///
/// This is written as a [`Str`] containing a byte order mark followed by
/// UTF-16-BE bytes.
/// This is written as a [`Str`] containing either bare ASCII (if possible) or a
/// byte order mark followed by UTF-16-BE bytes.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct TextStr<'a>(pub &'a str);

Expand All @@ -105,11 +147,14 @@ impl Primitive for TextStr<'_> {
if self.0.bytes().all(|b| matches!(b, 32..=126)) {
Str(self.0.as_bytes()).write(buf);
} else {
let mut bytes = vec![254, 255];
for v in self.0.encode_utf16() {
bytes.extend(v.to_be_bytes());
buf.reserve(6 + 4 * self.0.len());
buf.push(b'<');
buf.push_hex(254);
buf.push_hex(255);
for value in self.0.encode_utf16() {
buf.push_hex_u16(value);
}
Str(&bytes).write(buf);
buf.push(b'>');
}
}
}
Expand All @@ -122,6 +167,7 @@ pub struct Name<'a>(pub &'a [u8]);

impl Primitive for Name<'_> {
fn write(self, buf: &mut Vec<u8>) {
buf.reserve(1 + self.0.len());
buf.push(b'/');
for &byte in self.0 {
// - Number sign shall use hexadecimal escape
Expand Down
14 changes: 10 additions & 4 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,19 @@ fn test_primitive_objects() {

// Test strings.
test_primitive!(Str(b"Hello, World!"), b"(Hello, World!)");
test_primitive!(Str(b"()"), b"<2829>");
test_primitive!(Str(br"\"), b"<5C>");
test_primitive!(Str(br"\n"), b"<5C6E>");
test_primitive!(Str(b"()"), br"(())");
test_primitive!(Str(b")()"), br"(\)\(\))");
test_primitive!(Str(b"()(())"), br"(()(()))");
test_primitive!(Str(b"(()))"), br"(\(\(\)\)\))");
test_primitive!(Str(b"\\"), br"(\\)");
test_primitive!(Str(b"\n\ta"), br"(\n\ta)");
test_primitive!(Str(br"\n"), br"(\\n)");
test_primitive!(Str(b"a\x14b"), br"(a\024b)");
test_primitive!(Str(b"\xFF\xAA"), b"<FFAA>");

// Test text strings.
test_primitive!(TextStr("Hallo"), b"(Hallo)");
test_primitive!(TextStr("😀!"), b"(\xFE\xFF\xD8\x3D\xDE\0\0!)");
test_primitive!(TextStr("😀!"), b"<FEFFD83DDE000021>");

// Test names.
test_primitive!(Name(b"Filter"), b"/Filter");
Expand Down

0 comments on commit ae4c7c3

Please sign in to comment.