Skip to content
This repository has been archived by the owner on Nov 26, 2024. It is now read-only.

Commit

Permalink
#58 figuring out bidi para
Browse files Browse the repository at this point in the history
  • Loading branch information
bennobuilder committed Mar 21, 2024
1 parent b3b6990 commit 890bae6
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 11 deletions.
41 changes: 31 additions & 10 deletions crates/attributed_string/src/bidi_para.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,36 @@ impl<'text> Iterator for BidiParagraphs<'text> {
fn next(&mut self) -> Option<Self::Item> {
let para = self.info.next()?;
let paragraph = &self.text[para.range];
// `para.range` includes the newline that splits the line, so remove it if present
let mut char_indices = paragraph.char_indices();
if let Some(i) = char_indices.next_back().and_then(|(i, c)| {
// `BidiClass::B` is a Paragraph_Separator (various newline characters)
(bidi_class(c) == BidiClass::B).then_some(i)
}) {
Some((&paragraph[0..i], para.level))
} else {
Some((paragraph, para.level))
}
Some((paragraph, para.level))
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn e2e() {
// This example text is defined using `concat!` because some browsers
// and text editors have trouble displaying bidi strings.
let text = concat!["א", "ב", "ג", "a", "b", "c",];

// Resolve embedding levels within the text. Pass `None` to detect the
// paragraph level automatically.
let bidi_info = BidiInfo::new(&text, None);

// This paragraph has embedding level 1 because its first strong character is RTL.
assert_eq!(bidi_info.paragraphs.len(), 1);
let para = &bidi_info.paragraphs[0];
assert_eq!(para.level.number(), 1);
assert_eq!(para.level.is_rtl(), true);

// Re-ordering is done after wrapping each paragraph into a sequence of
// lines. For this example, I'll just use a single line that spans the
// entire paragraph.
let line = para.range.clone();

let display = bidi_info.reorder_line(para, line);
assert_eq!(display, concat!["a", "b", "c", "ג", "ב", "א",]);
}
}
11 changes: 10 additions & 1 deletion crates/attributed_string/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ impl AttributedString {
(start..end, level)
});

// TODO: Bidi Paragraphs do not directly identify the level differences instead more when a linebreak happens
// But since we can identify that with unicode_linebreak its probably best
// if we identify the spans based on level differences within the BidiInfo struct?
for (para_range, para_level) in bidi_para_range_iter {
let mut start = para_range.start;
let para_text = &self.text[para_range.clone()];
Expand All @@ -66,6 +69,8 @@ impl AttributedString {
let break_class = unicode_linebreak::break_property(_char as u32);

match break_class {
// Handle line break
// TODO: Should the line breaks happen based on the bidi paragraphs?
BreakClass::Mandatory
| BreakClass::LineFeed
| BreakClass::NextLine
Expand Down Expand Up @@ -93,6 +98,8 @@ impl AttributedString {
)));
start = global_index + 1;
}

// Handle text segment separation
BreakClass::Space | BreakClass::ZeroWidthSpace => {
// Add text fragment token
if start != global_index {
Expand Down Expand Up @@ -155,7 +162,7 @@ mod tests {

#[test]
fn e2e() {
let text = String::from("Hello, world! שלום עולם! This is a mix of English and Hebrew.");
let text = String::from("Hello, world!\nשלום עולם!\nThis is a mix of English and Hebrew.");
let attrs_intervals = vec![
AttrsInterval {
start: 0,
Expand All @@ -179,6 +186,8 @@ mod tests {

attributed_string.tokenize();

println!("{:#?}", attributed_string);

assert_eq!(attributed_string.token_stream.is_empty(), false);
}
}

0 comments on commit 890bae6

Please sign in to comment.