Skip to content

Commit

Permalink
Removed Sastadev dependency and made it optional (#18)
Browse files Browse the repository at this point in the history
Removed Sastadev dependency and made it optional
  • Loading branch information
oktaal authored Nov 29, 2022
1 parent d73bad4 commit 55fe733
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 34 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,18 @@ AuChAnn was specifically developed to enhance linguistic data in the form of a t
## Getting Started

You can install AuChAnn using pip:

```bash
pip install auchann
```

You can also optionally install [Sastadev](https://github.com/UUDigitalHumanitieslab/sastadev)
which is used for detecting inflection errors.

```bash
pip install auchann[NL]
```

When installed, the program can be run interactively from the console using the command `auchann` .

## Import as Library
Expand Down Expand Up @@ -55,6 +63,10 @@ settings.calc_distance = lambda original, correction: editdistance.distance(orig
# Default method detects inflection errors
settings.detect_error = lambda original, correction: (1, "m") if original == "geloopt" and correction == "liep" else (0, None)

### Sastadev contains a helper function for Dutch which detects inflection errors
from sastadev.deregularise import detect_error
settings.detect_error = detect_error

# How many words could be split from one?
# e.g. das -> da(t) (i)s requires a lookahead of 2
# hoest -> hoe (i)s (he)t requires a lookahead of 3
Expand Down
37 changes: 10 additions & 27 deletions auchann/align_words.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,8 @@
from enum import Enum, unique
from auchann.correct_parenthesize import correct_parenthesize
import auchann.data as data
from sastadev.deregularise import correctinflection
import editdistance

chat_errors = {
'Overgeneralisation': 'm',
'Lacking ge prefix': 'm',
'Prefix ge without onset': 'm',
'Wrong Overgeneralisation': 'm',
'Wrong -en suffix': 'm'
}


def map_error(error_type: str) -> str:
try:
return chat_errors[error_type]
except KeyError:
return error_type


@unique
class TokenOperation(Enum):
Expand Down Expand Up @@ -165,18 +149,8 @@ def __calc_distance(original: str, correction: str) -> int:

return distance

def __detect_error(original: str, correction: str) -> Tuple[int, Optional[str]]:
error = None
for candidate, candidate_error in correctinflection(original):
if candidate == correction:
error = map_error(candidate_error)
if error is not None:
return 1, cast(str, error)
else:
return 0, None

self.__calc_distance = __calc_distance
self.__detect_error = __detect_error
self.__detect_error = data.detect_error

@property
def calc_distance(self):
Expand Down Expand Up @@ -225,6 +199,15 @@ def detect_error(self):

@detect_error.setter
def detect_error(self, method: Callable[[str, str], Tuple[int, Optional[str]]]):
"""Specify a method to compare a text with a correction which returns
the desired editing distance and the CHAT error code.
When no error is returned the returned editing distance will be
ignored.
Args:
method: Callable[[str, str], Tuple[int, Optional[str]]]:
(transcribed text, correction) -> (editing distance, error code)
"""
self.__detect_error = method
self.distance_hash = {}

Expand Down
8 changes: 8 additions & 0 deletions auchann/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,3 +754,11 @@
'papap',
'e>[/]',
'wawawa']


try:
from sastadev.deregularise import detect_error # type: ignore[import]

except ImportError:
def detect_error(_: str, __: str):
return 0, None
2 changes: 0 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,3 @@ pyyaml==5.4.1
# via pyyaml-include
pyyaml-include==1.2.post2
# via auchann (setup.py)
sastadev==0.0.2
# via auchann (setup.py)
8 changes: 5 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name='auchann',
version='0.1.1',
version='0.2.0',
packages=find_packages(include=['auchann', 'auchann.*']),
package_data={'auchann': ['py.typed']},
description='The AuChAnn (Automatic CHAT Annotation) package can generate CHAT annotations based on a transcript-correction pairs of utterances.',
Expand All @@ -18,9 +18,11 @@
install_requires=[
'chamd>=0.5.8',
'editdistance',
'pyyaml-include',
'sastadev'
'pyyaml-include'
],
extras_require={
'NL': ['sastadev>=0.03']
},
python_requires='>=3.7',
zip_safe=True,
entry_points={
Expand Down
12 changes: 10 additions & 2 deletions unit-tests/test_align.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from auchann.align_words import align_words, align_split
from auchann.align_words import AlignmentSettings, align_words, align_split


def test_replace():
Expand Down Expand Up @@ -71,6 +71,14 @@ def test_multi_word():


def assertAlign(transcript_line: str, correction_line: str, expected_chat_line: str):
alignment = align_words(transcript_line, correction_line)
settings = AlignmentSettings()
def detect_error(original: str, correction: str):
if original == "slaapte" and correction == "sliep":
return 1, "m"
else:
return 0, None
settings.detect_error = detect_error

alignment = align_words(transcript_line, correction_line, settings)
chat_line = str(alignment)
assert chat_line == expected_chat_line

0 comments on commit 55fe733

Please sign in to comment.