Skip to content
This repository has been archived by the owner on Nov 26, 2024. It is now read-only.

Commit

Permalink
#58 added unicode-linebreak
Browse files Browse the repository at this point in the history
  • Loading branch information
bennobuilder committed Mar 19, 2024
1 parent 06ed10a commit f9c4a89
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 39 deletions.
9 changes: 7 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion crates/attributed_string/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@ fontdb = "0.16.0"
ordered-float = "4.2.0"
rustybuzz = "0.13.0"
ropey = "1.6.1"
rust-lapper = "1.1.0"
# rust-lapper = { git = "https://github.com/bennoinbeta/rust-lapper.git", rev = "ac47eb7" }
rust-lapper = { path = "/Users/benno/Desktop/workspace/contribution/code/rust-lapper" }
smallvec = { workspace = true }
strict-num = "0.2.0"
tiny-skia-path = { workspace = true }
unicode-bidi = "0.3"
unicode-linebreak = "0.1.5"
unicode-script = "0.5"
unicode-vo = "0.1"

Expand Down
6 changes: 6 additions & 0 deletions crates/attributed_string/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

## Emoji
- https://github.com/xi-editor/xi-editor/tree/master/rust/unicode/src

## Linebreak
- https://github.com/axelf4/unicode-linebreak
98 changes: 62 additions & 36 deletions crates/attributed_string/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use rust_lapper::Lapper;
use std::ops::Range;
use tiny_skia_path::{Path, Transform};
use token::{Token, TokenVariant};
use unicode_linebreak::BreakClass;
use usvg::{
database::FontsCache,
process_anchor,
Expand Down Expand Up @@ -38,60 +39,85 @@ struct AttributedString {

/// Specifies the writing mode for the text.
pub writing_mode: WritingMode,

pub width: f32,

pub height: f32,
}

impl AttributedString {
pub fn new(text: String, attribute_intervals: Vec<AttributeInterval>) -> Self {
// Merge overlapping intervals
let mut attribute_intervals = Lapper::new(attribute_intervals);
attribute_intervals.divide_overlaps_with(|overlaps| {
// TODO: Make attribute attributes optional or something to merge them
overlaps.get(0).unwrap().clone().clone()
});

Self {
text,
token_stream: Vec::new(),
attribute_intervals: Lapper::new(attribute_intervals),
attribute_intervals,
anchor: TextAnchor::Start,
text_flow: TextFlow::Linear,
writing_mode: WritingMode::LeftToRight,
width: 100.0,
height: 100.0,
}
}

pub fn tokanize(&mut self) {
pub fn tokenize(&mut self) {
let mut token_stream = Vec::new();

// Tokenize the text, considering spaces and line breaks
let chars: Vec<char> = self.text.chars().collect();
let mut start = 0;
for (index, match_str) in self
.text
.match_indices(|c: char| is_word_separator_char(c) || is_linebreak_char(c))
{
// Create a text fragment token for non-whitespace segments
if start != index {
token_stream.push(Token::new(
TokenVariant::TextFragment,
Range { start, end: index },
));
}

// Create a token for each space or line break
token_stream.push(match match_str.chars().next() {
Some(c) if is_word_separator_char(c) => Token::new(
TokenVariant::WordSeparator,
Range {
start: index,
end: index + match_str.len(),
},
),
Some(c) if is_linebreak_char(c) => Token::new(
TokenVariant::Linebreak,
Range {
start: index,
end: index + match_str.len(),
},
),
_ => continue, // Should never happen
});

start = index + match_str.len();
// Process each character for potential tokenization
for (index, _char) in chars.iter().enumerate() {
let break_class = unicode_linebreak::break_property(*_char as u32);

match break_class {
BreakClass::Mandatory | BreakClass::LineFeed | BreakClass::CarriageReturn => {
if start != index {
// Add text fragment token
token_stream.push(Token::new(
TokenVariant::TextFragment,
Range { start, end: index },
));
}
// Add line break token
token_stream.push(Token::new(
TokenVariant::Linebreak,
Range {
start: index,
end: index + 1,
},
));
start = index + 1;
}
BreakClass::Space | BreakClass::ZeroWidthSpace => {
if start != index {
// Add text fragment token
token_stream.push(Token::new(
TokenVariant::TextFragment,
Range { start, end: index },
));
}
// Add word separator token
token_stream.push(Token::new(
TokenVariant::WordSeparator,
Range {
start: index,
end: index + 1,
},
));
start = index + 1;
}
_ => {}
}
}

// Handle the last text fragment in the segment, if any
// Handle the last text fragment, if any
if start < self.text.len() {
token_stream.push(Token::new(
TokenVariant::TextFragment,
Expand Down Expand Up @@ -263,7 +289,7 @@ mod tests {
];
let mut attributed_string = AttributedString::new(text, attribute_intervals);

attributed_string.tokanize();
attributed_string.tokenize();
attributed_string.shape_glyphs(&mut fonts_cache, &fontdb);
attributed_string.apply_modifications();

Expand Down
3 changes: 3 additions & 0 deletions crates/attributed_string/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ impl Token {
clusters_length(&self.outlined_clusters)
}

// TODO: Does it make more sense to shape the glyphs from the attribute intervals
// instead of from the tokens (thus always having to query the corresponding attributes)
// now that we have guranteed there are not overlapping attributes (due to `divide_overlaps_with`).
pub fn shape_glyphs(
&mut self,
text: &String,
Expand Down

0 comments on commit f9c4a89

Please sign in to comment.