-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move unsafe handling code to plumbing module
This has given me the opportunity to review the safety of the unsafe code - and it was found lacking. But it's fixed in the plumbing module. #16 I've tried to not put naming opinions within the plumbing module. That is, things within it are named to match the c or c++ libraries of leptonica and tesseract as much as possible. This addresses #17 at least within the plumbing module.
- Loading branch information
Showing
6 changed files
with
411 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
//! A direct but safe wrapper for `tesseract-sys`. It should stick as close as | ||
//! possible to the upstream API whilst avoiding unsafe behaviour. | ||
//! | ||
//! Are you interested in using this on its own? | ||
//! Raise an issue, and I'll split it into its own crate. | ||
mod pix; | ||
mod tess_base_api; | ||
mod tesseract_text; | ||
|
||
pub use self::pix::Pix; | ||
pub use self::pix::PixReadError; | ||
pub use self::pix::PixReadMemError; | ||
pub use self::tess_base_api::TessBaseAPI; | ||
pub use self::tess_base_api::TessBaseAPIGetUTF8TextError; | ||
pub use self::tess_base_api::TessBaseAPIInitError; | ||
pub use self::tess_base_api::TessBaseAPIRecogniseError; | ||
pub use self::tess_base_api::TessBaseAPISetImageSafetyError; | ||
pub use self::tess_base_api::TessBaseAPISetVariableError; | ||
pub use self::tesseract_text::TesseractText; | ||
|
||
#[test] | ||
fn ocr_from_mem_with_ppi() -> Result<(), Box<dyn std::error::Error>> { | ||
use std::ffi::CString; | ||
|
||
let pix = Pix::read_mem(include_bytes!("../../img.tiff"))?; | ||
|
||
let mut cube = TessBaseAPI::new(); | ||
cube.init_2(None, Some(&CString::new("eng")?))?; | ||
cube.set_image_2(&pix); | ||
|
||
cube.set_source_resolution(70); | ||
assert_eq!( | ||
cube.get_utf8_text()?.as_ref().to_str()?, | ||
include_str!("../../img.txt") | ||
); | ||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn expanded_test() -> Result<(), Box<dyn std::error::Error>> { | ||
use std::ffi::CString; | ||
|
||
let mut cube = TessBaseAPI::new(); | ||
cube.set_variable( | ||
&CString::new("tessedit_char_blacklist")?, | ||
&CString::new("z")?, | ||
)?; | ||
cube.init_2(None, None)?; | ||
let pix = Pix::read(&CString::new("../img.png")?)?; | ||
cube.set_image_2(&pix); | ||
cube.recognize()?; | ||
assert_eq!( | ||
cube.get_utf8_text()?.as_ref().to_str()?, | ||
include_str!("../../img.txt") | ||
); | ||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn setting_image_without_initializing_test() -> Result<(), PixReadMemError> { | ||
let mut cube = TessBaseAPI::new(); | ||
let pix = Pix::read_mem(include_bytes!("../../img.tiff"))?; | ||
cube.set_image_2(&pix); | ||
assert!(cube.recognize().is_err()); | ||
assert!(cube.get_utf8_text().is_err()); | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
extern crate leptonica_sys; | ||
extern crate thiserror; | ||
|
||
use self::leptonica_sys::{pixFreeData, pixRead, pixReadMem}; | ||
use self::thiserror::Error; | ||
use std::convert::AsRef; | ||
use std::ffi::CStr; | ||
|
||
/// Wrapper around Leptonica's [`Pix`](https://tpgit.github.io/Leptonica/struct_pix.html) structure | ||
pub struct Pix(*mut leptonica_sys::Pix); | ||
|
||
impl Drop for Pix { | ||
fn drop(&mut self) { | ||
unsafe { | ||
pixFreeData(self.0); | ||
} | ||
} | ||
} | ||
|
||
impl AsRef<*mut leptonica_sys::Pix> for Pix { | ||
fn as_ref(&self) -> &*mut leptonica_sys::Pix { | ||
&self.0 | ||
} | ||
} | ||
|
||
#[derive(Debug, Error)] | ||
#[error("Pix::read returned null")] | ||
pub struct PixReadError(); | ||
|
||
#[derive(Debug, Error)] | ||
#[error("Pix::read_mem returned null")] | ||
pub struct PixReadMemError(); | ||
|
||
impl Pix { | ||
/// Wrapper for [`pixRead`](https://tpgit.github.io/Leptonica/leptprotos_8h.html#a84634846cbb5e01df667d6e9241dfc53) | ||
/// | ||
/// Read an image from a filename | ||
pub fn read(filename: &CStr) -> Result<Self, PixReadError> { | ||
let ptr = unsafe { pixRead(filename.as_ptr()) }; | ||
if ptr.is_null() { | ||
Err(PixReadError {}) | ||
} else { | ||
Ok(Self(ptr)) | ||
} | ||
} | ||
|
||
/// Wrapper for [`pixReadMem`](https://tpgit.github.io/Leptonica/leptprotos_8h.html#a027a927dc3438192e3bdae8c219d7f6a) | ||
/// | ||
/// Read an image from memory | ||
pub fn read_mem(img: &[u8]) -> Result<Self, PixReadMemError> { | ||
let ptr = unsafe { pixReadMem(img.as_ptr(), img.len()) }; | ||
if ptr.is_null() { | ||
Err(PixReadMemError {}) | ||
} else { | ||
Ok(Self(ptr)) | ||
} | ||
} | ||
} | ||
|
||
#[test] | ||
fn read_error_test() -> Result<(), Box<dyn std::error::Error>> { | ||
let path = std::ffi::CString::new("fail")?; | ||
assert!(Pix::read(&path).is_err()); | ||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn read_mem_error_test() { | ||
assert!(Pix::read_mem(&[]).is_err()); | ||
} |
Oops, something went wrong.