Skip to content

Commit

Permalink
feat: add custom CowStr type
Browse files Browse the repository at this point in the history
Related issue: #20
  • Loading branch information
kmaasrud committed Apr 1, 2023
1 parent 14f6c41 commit 18363ea
Show file tree
Hide file tree
Showing 3 changed files with 219 additions and 16 deletions.
20 changes: 7 additions & 13 deletions src/attr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,11 @@ impl<'s> AttributeValue<'s> {
}

fn extend(&mut self, s: &'s str) {
match &mut self.raw {
CowStr::Borrowed(prev) => {
if prev.is_empty() {
*prev = s;
} else {
self.raw = format!("{} {}", prev, s).into();
}
}
CowStr::Owned(ref mut prev) => {
prev.push(' ');
prev.push_str(s);
}
if self.raw.is_empty() {
self.raw = s.into();
} else {
self.raw.push(' ');
self.raw.push_str(s);
}
}
}
Expand Down Expand Up @@ -171,7 +164,8 @@ impl<'s> Attributes<'s> {
if let Some(i) = attrs.iter().position(|(k, _)| *k == key) {
let prev = &mut attrs[i].1;
if key == "class" {
*prev = format!("{} {}", prev, val).into();
prev.raw.push(' ');
prev.raw.push_str(&val.raw);
} else {
*prev = val;
}
Expand Down
6 changes: 3 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
//! let events =
//! jotdown::Parser::new("a [link](https://example.com)").map(|e| match e {
//! Event::Start(Link(dst, ty), attrs) => {
//! Event::Start(Link(dst.replace(".com", ".net").into(), ty), attrs)
//! Event::Start(Link(dst.replace(".com", ".net"), ty), attrs)
//! }
//! e => e,
//! });
Expand All @@ -60,13 +60,13 @@ mod block;
mod inline;
mod lex;
mod span;
mod string;
mod tree;

use span::Span;

pub use attr::{AttributeValue, AttributeValueParts, Attributes};

type CowStr<'s> = std::borrow::Cow<'s, str>;
pub use string::CowStr;

/// A trait for rendering [`Event`]s to an output format.
///
Expand Down
209 changes: 209 additions & 0 deletions src/string.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
use std::{borrow::Borrow, fmt::Display, ops::Deref, str::from_utf8};

// Largest CowStr variant is Owned(String). A String uses 3 words of memory, but a fourth word is
// needed to hold the tag (the tag takes a byte, but a full word is used for alignment reasons.)
// This means that the available space we have for an inline string is 4 words - 2 bytes for the
// tag and length.
const MAX_INLINE_STR_LEN: usize = 4 * std::mem::size_of::<usize>() - 2;

#[derive(Debug, Eq)]
pub enum CowStr<'s> {
Owned(String),
Borrowed(&'s str),
Inlined([u8; MAX_INLINE_STR_LEN], u8),
}

impl<'s> CowStr<'s> {
pub fn replace(self, from: &str, to: &str) -> Self {
if from.is_empty() {
return self;
}

match self {
CowStr::Inlined(mut inner, len) => {
let mut len = len as usize;
let diff = to.len() as isize - from.len() as isize;

while let Some(start) = from_utf8(&inner[..len]).unwrap().find(from) {
if diff.is_positive() {
len += diff as usize;
if len > MAX_INLINE_STR_LEN {
return CowStr::Owned(self.deref().replace(from, to));
}
inner[start + from.len()..].rotate_right(diff as usize);
} else if diff.is_negative() {
len -= (-diff) as usize;
inner[start..].rotate_left((-diff) as usize);
}

inner[start..start + to.len()].copy_from_slice(to.as_bytes());
}

CowStr::Inlined(inner, len as u8)
}
CowStr::Borrowed(s) if s.contains(from) => {
let mut inner = [0; MAX_INLINE_STR_LEN];
let mut len = s.len();
let diff = to.len() as isize - from.len() as isize;
inner[..len].copy_from_slice(s.as_bytes());

while let Some(start) = from_utf8(&inner[..len]).unwrap().find(from) {
if diff.is_positive() {
len += diff as usize;
if len > MAX_INLINE_STR_LEN {
return CowStr::Owned(self.deref().replace(from, to));
}
inner[start + from.len()..].rotate_right(diff as usize);
} else if diff.is_negative() {
len -= (-diff) as usize;
inner[start..].rotate_left((-diff) as usize);
}

inner[start..start + to.len()].copy_from_slice(to.as_bytes());
}

CowStr::Inlined(inner, len as u8)
}
CowStr::Owned(s) if s.contains(from) => CowStr::Owned(s.replace(from, to)),
_ => self,
}
}

pub fn push(&mut self, c: char) {
match self {
CowStr::Owned(this) => this.push(c),
CowStr::Inlined(inner, len) => {
let l = *len as usize + c.len_utf8();
if l > MAX_INLINE_STR_LEN {
let mut s = self.to_string();
s.push(c);
*self = CowStr::Owned(s);
} else {
c.encode_utf8(&mut inner[*len as usize..l]);
*len = l as u8;
}
}
CowStr::Borrowed(this) => {
let len = this.len() + c.len_utf8();
if len > MAX_INLINE_STR_LEN {
let mut s = self.to_string();
s.push(c);
*self = CowStr::Owned(s);
} else {
let mut inner = [0; MAX_INLINE_STR_LEN];
inner[..this.len()].copy_from_slice(this.as_bytes());
c.encode_utf8(&mut inner[this.len()..len]);
*self = CowStr::Inlined(inner, len as u8);
}
}
}
}

pub fn push_str(&mut self, s: &str) {
if s.is_empty() {
return;
}

match self {
CowStr::Owned(this) => this.push_str(s),
CowStr::Inlined(inner, len) => {
let l = *len as usize + s.len();
if l > MAX_INLINE_STR_LEN {
*self = CowStr::Owned(self.to_string() + s);
} else {
inner[*len as usize..l].copy_from_slice(s.as_bytes());
*len = l as u8;
}
}
CowStr::Borrowed(this) => {
let len = this.len() + s.len();
if len > MAX_INLINE_STR_LEN {
*self = CowStr::Owned(this.to_string() + s);
} else {
let mut inner = [0; MAX_INLINE_STR_LEN];
inner[..this.len()].copy_from_slice(this.as_bytes());
inner[this.len()..len].copy_from_slice(s.as_bytes());
*self = CowStr::Inlined(inner, len as u8);
}
}
}
}
}

impl<'s> Deref for CowStr<'s> {
type Target = str;

fn deref(&self) -> &Self::Target {
match *self {
Self::Owned(ref s) => s.borrow(),
Self::Borrowed(s) => s,
// NOTE: Inlined strings can only be constructed from strings or chars, which means they
// are guaranteed to be valid UTF-8. We could consider unchecked conversion as well, but
// a benchmark should be done before introducing unsafes.
Self::Inlined(ref inner, len) => from_utf8(&inner[..len as usize]).unwrap(),
}
}
}

impl<'s> AsRef<str> for CowStr<'s> {
fn as_ref(&self) -> &str {
self.deref()
}
}

impl<'s> From<char> for CowStr<'s> {
fn from(value: char) -> Self {
let mut inner = [0u8; MAX_INLINE_STR_LEN];
value.encode_utf8(&mut inner);
CowStr::Inlined(inner, value.len_utf8() as u8)
}
}

impl<'s> From<&'s str> for CowStr<'s> {
fn from(value: &'s str) -> Self {
CowStr::Borrowed(value)
}
}

impl<'s> From<String> for CowStr<'s> {
fn from(value: String) -> Self {
CowStr::Owned(value)
}
}

impl<'s> Clone for CowStr<'s> {
fn clone(&self) -> Self {
match self {
CowStr::Owned(s) => {
let len = s.len();
if len > MAX_INLINE_STR_LEN {
CowStr::Owned(s.clone())
} else {
let mut inner = [0u8; MAX_INLINE_STR_LEN];
inner[..len].copy_from_slice(s.as_bytes());
CowStr::Inlined(inner, len as u8)
}
}
CowStr::Borrowed(s) => CowStr::Borrowed(s),
CowStr::Inlined(inner, len) => CowStr::Inlined(*inner, *len),
}
}
}

impl<'s> PartialEq for CowStr<'s> {
fn eq(&self, other: &Self) -> bool {
self.deref() == other.deref()
}
}

impl<'s> Display for CowStr<'s> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.deref())
}
}

impl<'s, 'a> FromIterator<&'a str> for CowStr<'s> {
fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
CowStr::Owned(FromIterator::from_iter(iter))
}
}

0 comments on commit 18363ea

Please sign in to comment.