Skip to content

Commit

Permalink
feat: implement ink OCR analysis
Browse files Browse the repository at this point in the history
See #6
  • Loading branch information
msiemens committed Feb 27, 2021
1 parent fe5ab74 commit e9c5b8d
Show file tree
Hide file tree
Showing 11 changed files with 263 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/one/property/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,4 +203,8 @@ pub(crate) enum PropertyType {
EmbeddedInkOffsetVert = 0x140034A3,
EmbeddedInkSpaceWidth = 0x14001C27,
EmbeddedInkSpaceHeight = 0x14001C28,
InkAnalysis = 0x200035D7,
InkAnalysisReference = 0x240035D9,
InkAnalysisWordAlternatives = 0x1C0035DA,
InkAnalysisWordLanguageId = 0x100035DC,
}
25 changes: 25 additions & 0 deletions src/one/property_set/ink_analysis.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
use crate::errors::{ErrorKind, Result};
use crate::fsshttpb::data::exguid::ExGuid;
use crate::one::property::object_reference::ObjectReference;
use crate::one::property::PropertyType;
use crate::one::property_set::PropertySetId;
use crate::onestore::object::Object;

#[derive(Debug)]
pub(crate) struct Data {
pub(crate) paragraphs: Vec<ExGuid>,
}

pub(crate) fn parse(object: &Object) -> Result<Data> {
if object.id() != PropertySetId::InkAnalysis.as_jcid() {
return Err(ErrorKind::MalformedOneNoteFileData(
format!("unexpected object type: 0x{:X}", object.id().0).into(),
)
.into());
}

let paragraphs =
ObjectReference::parse_vec(PropertyType::InkAnalysisReference, object)?.unwrap_or_default();

Ok(Data { paragraphs })
}
25 changes: 25 additions & 0 deletions src/one/property_set/ink_analysis_line.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
use crate::errors::{ErrorKind, Result};
use crate::fsshttpb::data::exguid::ExGuid;
use crate::one::property::object_reference::ObjectReference;
use crate::one::property::PropertyType;
use crate::one::property_set::PropertySetId;
use crate::onestore::object::Object;

#[derive(Debug)]
pub(crate) struct Data {
pub(crate) words: Vec<ExGuid>,
}

pub(crate) fn parse(object: &Object) -> Result<Data> {
if object.id() != PropertySetId::InkAnalysisLine.as_jcid() {
return Err(ErrorKind::MalformedOneNoteFileData(
format!("unexpected object type: 0x{:X}", object.id().0).into(),
)
.into());
}

let words =
ObjectReference::parse_vec(PropertyType::InkAnalysisReference, object)?.unwrap_or_default();

Ok(Data { words })
}
25 changes: 25 additions & 0 deletions src/one/property_set/ink_analysis_paragraph.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
use crate::errors::{ErrorKind, Result};
use crate::fsshttpb::data::exguid::ExGuid;
use crate::one::property::object_reference::ObjectReference;
use crate::one::property::PropertyType;
use crate::one::property_set::PropertySetId;
use crate::onestore::object::Object;

#[derive(Debug)]
pub(crate) struct Data {
pub(crate) lines: Vec<ExGuid>,
}

pub(crate) fn parse(object: &Object) -> Result<Data> {
if object.id() != PropertySetId::InkAnalysisParagraph.as_jcid() {
return Err(ErrorKind::MalformedOneNoteFileData(
format!("unexpected object type: 0x{:X}", object.id().0).into(),
)
.into());
}

let lines =
ObjectReference::parse_vec(PropertyType::InkAnalysisReference, object)?.unwrap_or_default();

Ok(Data { lines })
}
49 changes: 49 additions & 0 deletions src/one/property_set/ink_analysis_word.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
use crate::errors::{ErrorKind, Result};
use crate::one::property::{simple, PropertyType};
use crate::one::property_set::PropertySetId;
use crate::onestore::object::Object;
use itertools::Itertools;
use widestring::U16String;

#[derive(Debug)]
pub(crate) struct Data {
pub(crate) alternatives: Vec<String>,
pub(crate) language_code: Option<u32>,
}

pub(crate) fn parse(object: &Object) -> Result<Data> {
if object.id() != PropertySetId::InkAnalysisWord.as_jcid() {
return Err(ErrorKind::MalformedOneNoteFileData(
format!("unexpected object type: 0x{:X}", object.id().0).into(),
)
.into());
}

let language_code = simple::parse_u16(PropertyType::InkAnalysisWordLanguageId, object)?
.map(|value| value as u32);

let alternatives = simple::parse_vec(PropertyType::InkAnalysisWordAlternatives, object)?
.map(|data| {
let data: Vec<_> = data
.chunks_exact(2)
.map(|v| u16::from_le_bytes([v[0], v[1]]))
.collect();
data.split(|c| *c == 0)
.filter(|chars| !chars.is_empty())
.map(|chars| {
U16String::from_vec(chars.to_vec())
.to_string()
.map_err(|e| e.into())
})
.collect::<Result<_>>()
})
.transpose()?
.ok_or_else(|| {
ErrorKind::MalformedOneNoteFileData("ink analysis word has no alternatives".into())
})?;

Ok(Data {
alternatives,
language_code,
})
}
8 changes: 8 additions & 0 deletions src/one/property_set/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ pub(crate) mod embedded_file_container;
pub(crate) mod embedded_file_node;
pub(crate) mod embedded_ink_container;
pub(crate) mod image_node;
pub(crate) mod ink_analysis;
pub(crate) mod ink_analysis_line;
pub(crate) mod ink_analysis_paragraph;
pub(crate) mod ink_analysis_word;
pub(crate) mod ink_container;
pub(crate) mod ink_data_node;
pub(crate) mod ink_stroke_node;
Expand Down Expand Up @@ -70,6 +74,10 @@ pub(crate) enum PropertySetId {
InkDataNode = 0x0002003B,
InkStrokeNode = 0x00020047,
StrokePropertiesNode = 0x00120048,
InkAnalysis = 0x00020054,
InkAnalysisParagraph = 0x00020055,
InkAnalysisLine = 0x00020056,
InkAnalysisWord = 0x00020057,
}

impl PropertySetId {
Expand Down
116 changes: 116 additions & 0 deletions src/onenote/ink_analysis.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
use crate::errors::{ErrorKind, Result};
use crate::fsshttpb::data::exguid::ExGuid;
use crate::one::property_set::{
ink_analysis, ink_analysis_line, ink_analysis_paragraph, ink_analysis_word,
};
use crate::onestore::object_space::ObjectSpace;
use itertools::Itertools;

/// The results of OCR analysis of ink handwriting.
#[derive(Clone, Debug)]
pub struct InkAnalysis {
pub(crate) paragraphs: Vec<InkAnalysisParagraph>,
}

impl InkAnalysis {
pub fn paragraphs(&self) -> &[InkAnalysisParagraph] {
&self.paragraphs
}
}

#[derive(Clone, Debug)]
pub struct InkAnalysisParagraph {
pub(crate) lines: Vec<InkAnalysisLine>,
}

impl InkAnalysisParagraph {
pub fn lines(&self) -> &[InkAnalysisLine] {
&self.lines
}
}

#[derive(Clone, Debug)]
pub struct InkAnalysisLine {
pub(crate) words: Vec<InkAnalysisWord>,
}

impl InkAnalysisLine {
pub fn words(&self) -> &[InkAnalysisWord] {
&self.words
}
}

#[derive(Clone, Debug)]
pub struct InkAnalysisWord {
pub(crate) language_code: Option<u32>,
pub(crate) alternatives: Vec<String>,
}

impl InkAnalysisWord {
pub fn alternatives(&self) -> &[String] {
&self.alternatives
}
}

pub(crate) fn parse_ink_analysis(
ink_analysis_id: ExGuid,
space: &ObjectSpace,
) -> Result<InkAnalysis> {
let container_object = space.get_object(ink_analysis_id).ok_or_else(|| {
ErrorKind::MalformedOneNoteData("ink analysis container is missing".into())
})?;
let container_data = ink_analysis::parse(container_object)?;

let paragraphs = container_data
.paragraphs
.iter()
.map(|id| parse_ink_analysis_paragraph(*id, space))
.collect::<Result<_>>()?;

Ok(InkAnalysis { paragraphs })
}

fn parse_ink_analysis_paragraph(
paragraph_id: ExGuid,
space: &ObjectSpace,
) -> Result<InkAnalysisParagraph> {
let object = space.get_object(paragraph_id).ok_or_else(|| {
ErrorKind::MalformedOneNoteData("ink analysis paragraph is missing".into())
})?;
let data = ink_analysis_paragraph::parse(object)?;

let lines = data
.lines
.iter()
.map(|id| parse_ink_analysis_line(*id, space))
.collect::<Result<_>>()?;

Ok(InkAnalysisParagraph { lines })
}

fn parse_ink_analysis_line(line_id: ExGuid, space: &ObjectSpace) -> Result<InkAnalysisLine> {
let object = space
.get_object(line_id)
.ok_or_else(|| ErrorKind::MalformedOneNoteData("ink analysis line is missing".into()))?;
let data = ink_analysis_line::parse(object)?;

let words = data
.words
.iter()
.map(|id| parse_ink_analysis_word(*id, space))
.collect::<Result<_>>()?;

Ok(InkAnalysisLine { words })
}

fn parse_ink_analysis_word(word_id: ExGuid, space: &ObjectSpace) -> Result<InkAnalysisWord> {
let object = space
.get_object(word_id)
.ok_or_else(|| ErrorKind::MalformedOneNoteData("ink analysis word is missing".into()))?;
let data = ink_analysis_word::parse(object)?;

Ok(InkAnalysisWord {
language_code: data.language_code,
alternatives: data.alternatives,
})
}
1 change: 1 addition & 0 deletions src/onenote/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub(crate) mod content;
pub(crate) mod embedded_file;
pub(crate) mod image;
pub(crate) mod ink;
pub(crate) mod ink_analysis;
pub(crate) mod list;
pub(crate) mod note_tag;
pub(crate) mod notebook;
Expand Down
8 changes: 8 additions & 0 deletions src/onenote/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::errors::{ErrorKind, Result};
use crate::fsshttpb::data::exguid::ExGuid;
use crate::one::property::layout_alignment::LayoutAlignment;
use crate::one::property_set::{page_manifest_node, page_metadata, page_node, title_node};
use crate::onenote::ink_analysis::{parse_ink_analysis, InkAnalysis};
use crate::onenote::outline::{parse_outline, Outline};
use crate::onenote::page_content::{parse_page_content, PageContent};
use crate::onestore::object_space::ObjectSpace;
Expand All @@ -19,6 +20,7 @@ pub struct Page {
author: Option<String>,
height: Option<f32>,
contents: Vec<PageContent>,
ink_analysis: Option<InkAnalysis>,
}

impl Page {
Expand Down Expand Up @@ -163,12 +165,18 @@ pub(crate) fn parse_page(page_space: &ObjectSpace) -> Result<Page> {
.map(|content_id| parse_page_content(content_id, page_space))
.collect::<Result<_>>()?;

let ink_analysis = data
.ink_analysis
.map(|id| parse_ink_analysis(id, page_space))
.transpose()?;

Ok(Page {
title,
level,
author: data.author.map(|author| author.into_value()),
height: data.page_height,
contents,
ink_analysis,
})
}

Expand Down
1 change: 1 addition & 0 deletions tests/snapshots/lib__parse_notebook.snap
Original file line number Diff line number Diff line change
Expand Up @@ -98303,6 +98303,7 @@ Notebook {
},
),
],
ink_analysis: None,
},
],
},
Expand Down
1 change: 1 addition & 0 deletions tests/snapshots/lib__parse_section.snap
Original file line number Diff line number Diff line change
Expand Up @@ -98300,6 +98300,7 @@ Section {
},
),
],
ink_analysis: None,
},
],
},
Expand Down

0 comments on commit e9c5b8d

Please sign in to comment.