Skip to content

Commit

Permalink
Fix options macro (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonxslays authored Mar 13, 2024
1 parent bc736e1 commit 0253d0b
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 76 deletions.
2 changes: 1 addition & 1 deletion decancer_py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

from __future__ import annotations

__all__ = ("parse", "CuredString", "__version__")
__all__ = ("parse", "CuredString")

from .decancer_py import *
72 changes: 64 additions & 8 deletions decancer_py/decancer_py.pyi
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
from __future__ import annotations

__all__ = ("parse", "CuredString", "__version__")
__all__ = ("parse", "CuredString")
__version__: str

class CuredString:
"""A small wrapper around a string used for comparisons."""

def __eq__(self, other: str) -> bool: ...
def __ne__(self, other: str) -> bool: ...
def __eq__(self, other: object) -> bool: ...
def __ne__(self, other: object) -> bool: ...
def __contains__(self, other: str) -> bool: ...
def __bool__(self) -> bool: ...
def __str__(self) -> bool: ...
def __repr__(self) -> bool: ...
def __str__(self) -> str: ...
def __repr__(self) -> str: ...
def starts_with(self, other: str) -> bool:
"""Checks if this cured string starts with the other string.
Expand All @@ -21,6 +19,7 @@ class CuredString:
Returns:
True if the cured string starts with the other string, else False.
"""

def ends_with(self, other: str) -> bool:
"""Checks if this cured string ends with the other string.
Expand All @@ -30,6 +29,7 @@ class CuredString:
Returns:
True if the cured string ends with the other string, else False.
"""

def contains(self, other: str) -> bool:
"""Checks if this cured string contains the other string.
Expand All @@ -40,13 +40,69 @@ class CuredString:
True if the cured string contains the other string, else False.
"""

def parse(text: str) -> CuredString:
def parse(text: str, **options: bool) -> CuredString:
"""Parses a jank string into a less toxic string wrapped in a CuredString
object.
Args:
text: The text to parse.
Keyword Args:
**options: The options to enable or disable.
Returns:
The CuredString object to use for comparisons.
Available options:
- formatter: Use if all you want is text formatting.
- pure_homoglyph: Prevents parsing chars from major foreign
writing systems.
- retain_capitalization: Prevents changing all chars to lower
case.
- disable_bidi: Disables the unicode bidirectional algorithm.
Only use this when you don't expect and right-to-left chars.
- retain_diacritics: Prevent parsing chars with diacritics or
accents.
- retain_japanese: Prevent parsing katakana and hiragana chars.
- retain_emojis: Prevent parsing emojis.
- retain_greek: Prevent parsing greek chars.
- retain_cyrillic: Prevent parsing cyrillic chars.
- retain_hebrew: Prevent parsing hebrew chars.
- retain_arabic: Prevent parsing arabic chars.
- retain_devanagari: Prevent parsing devanagari chars.
- retain_bengali: Prevent parsing bengali chars.
- retain_armenian: Prevent parsing armenian chars.
- retain_gujarati: Prevent parsing gujarati chars.
- retain_tamil: Prevent parsing tamil chars.
- retain_thai: Prevent parsing thai chars.
- retain_lao: Prevent parsing lao chars.
- retain_burmese: Prevent parsing burmese chars.
- retain_khmer: Prevent parsing khmer chars.
- retain_mongolian: Prevent parsing mongolian chars.
- retain_chinese: Prevent parsing chinese chars.
- retain_korean: Prevent parsing korean chars.
- retain_braille: Prevent parsing braille chars.
"""
181 changes: 114 additions & 67 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,28 @@ impl CuredString {
self.0.contains(other)
}

fn __richcmp__(&self, other: &str, op: CompareOp) -> PyResult<bool> {
Ok(match op {
fn __richcmp__(&self, other: &str, op: CompareOp) -> bool {
match op {
CompareOp::Eq => self.0 == other,
CompareOp::Ne => self.0 != other,
_ => false,
})
}
}

fn __contains__(&self, other: &str) -> PyResult<bool> {
Ok(self.contains(other))
fn __contains__(&self, other: &str) -> bool {
self.contains(other)
}

fn __bool__(&self) -> PyResult<bool> {
Ok(!self.0.is_empty())
fn __bool__(&self) -> bool {
!self.0.is_empty()
}

fn __str__(&self) -> PyResult<&str> {
Ok(&self.0)
fn __str__(&self) -> &str {
&self.0
}

fn __repr__(&self) -> PyResult<String> {
Ok(format!("{:?}", self.0))
fn __repr__(&self) -> String {
format!("{:?}", self.0)
}
}

Expand All @@ -60,69 +60,117 @@ fn is_dict_key(dict: &PyDict, key: &'static str) -> bool {
false
}

macro_rules! options_override {
($dict:ident,$output:ident,$($option:ident),*) => {
$(
if is_dict_key($dict, stringify!($output)) {
$output = decancer::Options::$option();
fn kwargs_to_options(options: Option<&PyDict>) -> Options {
let mut result = Options::default();

match options {
None => result,
Some(dict) => {
if is_dict_key(dict, "pure_homoglyph") {
result = Options::pure_homoglyph();
}
)*
};
}

macro_rules! options {
($dict:ident,$output:ident,$($option:ident),*) => {
$(
if is_dict_key($dict, stringify!($output)) {
$output = $output.$option();
if is_dict_key(dict, "retain_capitalization") {
result = result.retain_capitalization();
}
)*
};
}

fn kwargs_to_options(options: Option<&PyDict>) -> Options {
match options {
Some(dict) => {
let mut output = Options::default();

options_override!(dict, output, formatter, pure_homoglyph);

options!(
dict,
output,
retain_capitalization,
disable_bidi,
retain_diacritics,
retain_japanese,
retain_emojis,
retain_greek,
retain_cyrillic,
retain_hebrew,
retain_arabic,
retain_devanagari,
retain_bengali,
retain_armenian,
retain_gujarati,
retain_tamil,
retain_thai,
retain_lao,
retain_burmese,
retain_khmer,
retain_mongolian,
retain_chinese,
retain_korean,
retain_braille
);

output
}
if is_dict_key(dict, "disable_bidi") {
result = result.disable_bidi();
}

if is_dict_key(dict, "retain_diacritics") {
result = result.retain_diacritics();
}

if is_dict_key(dict, "retain_japanese") {
result = result.retain_japanese();
}

if is_dict_key(dict, "retain_emojis") {
result = result.retain_emojis();
}

if is_dict_key(dict, "retain_greek") {
result = result.retain_greek();
}

if is_dict_key(dict, "retain_cyrillic") {
result = result.retain_cyrillic();
}

None => Options::default(),
if is_dict_key(dict, "retain_hebrew") {
result = result.retain_hebrew();
}

if is_dict_key(dict, "retain_arabic") {
result = result.retain_arabic();
}

if is_dict_key(dict, "retain_devanagari") {
result = result.retain_devanagari();
}


if is_dict_key(dict, "retain_bengali") {
result = result.retain_bengali();
}


if is_dict_key(dict, "retain_armenian") {
result = result.retain_armenian();
}


if is_dict_key(dict, "retain_gujarati") {
result = result.retain_gujarati();
}


if is_dict_key(dict, "retain_tamil") {
result = result.retain_tamil();
}


if is_dict_key(dict, "retain_thai") {
result = result.retain_thai();
}

if is_dict_key(dict, "retain_lao") {
result = result.retain_lao();
}

if is_dict_key(dict, "retain_burmese") {
result = result.retain_burmese();
}

if is_dict_key(dict, "retain_khmer") {
result = result.retain_khmer();
}

if is_dict_key(dict, "retain_mongolian") {
result = result.retain_mongolian();
}

if is_dict_key(dict, "retain_chinese") {
result = result.retain_chinese();
}

if is_dict_key(dict, "retain_korean") {
result = result.retain_korean();
}

if is_dict_key(dict, "retain_braille") {
result = result.retain_braille();
}

result
}
}
}

/// Parses a jank string into a less toxic lowercase string wrapped in CuredString object.
#[pyfunction]
#[pyo3(signature = (text, **options))]
#[pyo3(text_signature = "(text: str, **options) -> CuredString")]
pub fn parse<'a>(text: String, options: Option<&PyDict>) -> PyResult<CuredString> {
match decancer::cure(&text, kwargs_to_options(options)) {
Expand All @@ -136,8 +184,7 @@ pub fn parse<'a>(text: String, options: Option<&PyDict>) -> PyResult<CuredString
/// The module we export to python
#[pymodule]
fn decancer_py(_py: Python, m: &PyModule) -> PyResult<()> {
m.add("__version__", std::env!("CARGO_PKG_VERSION"))?;

m.add_class::<CuredString>()?;
m.add_function(wrap_pyfunction!(parse, m)?)
m.add_function(wrap_pyfunction!(parse, m)?)?;
m.add("__version__", std::env!("CARGO_PKG_VERSION"))
}
11 changes: 11 additions & 0 deletions tests/test_decancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def test_contains() -> None:
assert YEET.contains("ee")
assert not YEET.contains("no")


def test_starts_with() -> None:
assert YEET.starts_with("ye")
assert not YEET.starts_with("et")
Expand Down Expand Up @@ -47,3 +48,13 @@ def test_dunder_contains() -> None:
def test_dunder_contains_invalid_type() -> None:
with pytest.raises(TypeError):
69 in YEET # type: ignore


def test_chinese() -> None:
result = parse("旧")
assert result == "18"


def test_retain_chinese() -> None:
result = parse("旧", retain_chinese=True)
assert result == "旧"

0 comments on commit 0253d0b

Please sign in to comment.