Skip to content

Commit

Permalink
Merge pull request #86 from KennethEnevoldsen/package_update
Browse files Browse the repository at this point in the history
Added pre-config workflows
  • Loading branch information
KennethEnevoldsen authored Jun 21, 2022
2 parents 2561880 + 3952884 commit 62eb860
Show file tree
Hide file tree
Showing 41 changed files with 273 additions and 235 deletions.
1 change: 1 addition & 0 deletions .github/workflows/documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -e .
sudo apt-get install pandoc
- name: Build and Commit
uses: sphinx-notes/pages@master
Expand Down
31 changes: 31 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
default_stages: [commit, push]

repos:
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
name: isort (python)
args: ["--profile", "black", "--filter-files"]

- repo: https://github.com/asottile/add-trailing-comma
rev: v2.2.3
hooks:
- id: add-trailing-comma

- repo: https://github.com/asottile/pyupgrade
rev: v2.34.0
hooks:
- id: pyupgrade

- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
language_version: python3.8

- repo: https://github.com/PyCQA/flake8
rev: 4.0.1
hooks:
- id: flake8
args: [--config, setup.cfg]
85 changes: 40 additions & 45 deletions augmenty/__init__.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,52 @@
from .about import __version__, __download_url__, __title__ # noqa

from .util import augmenters, docs, texts, keyboards, load, meta # noqa
from .about import __download_url__, __title__, __version__ # noqa
from .augment_utilities import set_doc_level # noqa
from .augment_utilities import combine, repeat, yield_original # noqa

# import augmenters
from .character import create_char_random_augmenter_v1 # noqa
from .character import create_char_replace_augmenter_v1 # noqa
from .character import create_char_swap_augmenter_v1 # noqa
from .character import ( # noqa
create_char_replace_augmenter_v1,
create_char_swap_augmenter_v1,
create_char_random_augmenter_v1,
create_keystroke_error_augmenter_v1,
create_random_casing_augmenter_v1,
create_remove_spacing_augmenter_v1,
)
from .doc import create_paragraph_subset_augmenter_v1 # noqa
from .doc import create_spongebob_augmenter_v1 # noqa
from .doc import create_upper_casing_augmenter_v1 # noqa
from .lang import create_da_historical_noun_casing_augmenter_v1 # noqa
from .lang import create_da_æøå_replace_augmenter_v1 # noqa
from .lang import create_mk # noqa
from .lang import ( # noqa
create_qwerty_da,
create_qwerty_de,
create_qwerty_el,
create_qwerty_en,
create_qwerty_es,
create_qwerty_fr,
create_qwerty_it,
create_qwerty_lt,
create_qwerty_nb,
create_qwerty_nl,
create_qwerty_pl,
create_qwerty_pt,
create_qwerty_ro,
create_ru,
)
from .span import create_ent_augmenter_v1 # noqa
from .span import create_ent_format_augmenter_v1 # noqa
from .span import create_per_replace_augmenter_v1 # noqa
from .token import create_conditional_token_casing_augmenter_v1 # noqa
from .token import create_duplicate_token_augmenter_v1 # noqa
from .token import create_letter_spacing_augmenter_v1 # noqa
from .token import ( # noqa
create_conditional_token_casing_augmenter_v1,
create_token_swap_augmenter_v1,
create_random_synonym_insertion_augmenter_v1,
create_spacing_insertion_augmenter_v1,
create_letter_spacing_augmenter_v1,
create_wordnet_synonym_augmenter_v1,
create_token_replace_augmenter_v1,
create_starting_case_augmenter_v1,
create_token_insert_random_augmenter_v1,
create_token_insert_augmenter_v1,
create_random_synonym_insertion_augmenter_v1,
create_duplicate_token_augmenter_v1,
)
from .span import ( # noqa
create_per_replace_augmenter_v1,
create_ent_format_augmenter_v1,
create_ent_augmenter_v1,
)
from .doc import ( # noqa
create_upper_casing_augmenter_v1,
create_spongebob_augmenter_v1,
create_paragraph_subset_augmenter_v1,
)
from .lang import ( # noqa
create_ru,
create_qwerty_ro,
create_qwerty_pt,
create_qwerty_pl,
create_qwerty_nl,
create_qwerty_nb,
create_mk,
create_qwerty_lt,
create_qwerty_it,
create_qwerty_fr,
create_qwerty_es,
create_qwerty_en,
create_qwerty_el,
create_qwerty_de,
create_da_historical_noun_casing_augmenter_v1,
create_da_æøå_replace_augmenter_v1,
create_qwerty_da,
create_token_insert_random_augmenter_v1,
create_token_replace_augmenter_v1,
create_token_swap_augmenter_v1,
create_wordnet_synonym_augmenter_v1,
)

from .augment_utilities import combine, set_doc_level, yield_original, repeat # noqa
from .util import augmenters, docs, keyboards, load, meta, texts # noqa
8 changes: 4 additions & 4 deletions augmenty/augment_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@

import random
from functools import partial
from typing import Callable, Iterable, Iterator, List
from typing import Callable, Iterable, Iterator

from spacy.language import Language
from spacy.tokens import Doc
from spacy.training import Example


def combine(
augmenters: Iterable[Callable[[Language, Example], Iterator[Example]]]
augmenters: Iterable[Callable[[Language, Example], Iterator[Example]]],
) -> Callable[[Language, Example], Iterator[Example]]:
"""Combines a series of spaCy style augmenters.
Expand Down Expand Up @@ -84,7 +83,8 @@ def __augment(nlp: Language, example: Example):


def yield_original(
augmenter: Callable[[Language, Example], Iterator[Example]], doc_level: float = 1.0
augmenter: Callable[[Language, Example], Iterator[Example]],
doc_level: float = 1.0,
) -> Callable[[Language, Example], Iterator[Example]]:
"""Wraps and augmented such that it yields both the original and augmented example.
Expand Down
2 changes: 1 addition & 1 deletion augmenty/character/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .casing import create_random_casing_augmenter_v1 # noqa
from .replace import create_char_random_augmenter_v1 # noqa
from .replace import ( # noqa
create_char_random_augmenter_v1,
create_char_replace_augmenter_v1,
create_keystroke_error_augmenter_v1,
)
Expand Down
6 changes: 4 additions & 2 deletions augmenty/character/casing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import random
from functools import partial
from typing import Iterator, Callable
from typing import Callable, Iterator

import spacy
from spacy.language import Language
Expand Down Expand Up @@ -34,7 +34,9 @@ def create_random_casing_augmenter_v1(


def random_casing_augmenter_v1(
nlp: Language, example: Example, level: float
nlp: Language,
example: Example,
level: float,
) -> Iterator[Example]:
def __casing(c):
if random.random() < level:
Expand Down
3 changes: 2 additions & 1 deletion augmenty/character/replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ def create_char_random_augmenter_v1(

@spacy.registry.augmenters("char_replace.v1")
def create_char_replace_augmenter_v1(
level: float, replace: dict
level: float,
replace: dict,
) -> Callable[[Language, Example], Iterator[Example]]:
"""Creates an augmenter that replaces a character with a random character from
replace dict
Expand Down
5 changes: 2 additions & 3 deletions augmenty/character/spacing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
Augmenters for modyfing spacing
"""

from typing import Callable, Iterator

from functools import partial
import random
from functools import partial
from typing import Callable, Iterator

import spacy
from spacy.language import Language
Expand Down
5 changes: 2 additions & 3 deletions augmenty/character/swap.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
"""


from typing import Callable, Iterator

from functools import partial
import random
from functools import partial
from typing import Callable, Iterator

import spacy
from spacy.language import Language
Expand Down
6 changes: 2 additions & 4 deletions augmenty/doc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from .casing import (
create_spongebob_augmenter_v1,
create_upper_casing_augmenter_v1,
) # noqa
from .casing import create_spongebob_augmenter_v1 # noqa
from .casing import create_upper_casing_augmenter_v1 # noqa
from .subset import create_paragraph_subset_augmenter_v1 # noqa
2 changes: 1 addition & 1 deletion augmenty/doc/casing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import random
from functools import partial
from typing import Iterator, Callable
from typing import Callable, Iterator

import spacy
from spacy.language import Language
Expand Down
8 changes: 5 additions & 3 deletions augmenty/doc/subset.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import random
from functools import partial
from typing import Iterator, Callable, Union
from typing import Callable, Iterator, Union

import numpy as np
import spacy
from spacy.language import Language
from spacy.training import Example

from augmenty.augment_utilities import make_text_from_orth
import numpy as np


@spacy.registry.augmenters("paragraph_subset_augmenter.v1")
Expand Down Expand Up @@ -42,7 +43,8 @@ def create_paragraph_subset_augmenter_v1(
>>> "It have tons of different augmenters. " +
>>> " Augmenty is developed using spaCy."
>>> list(augmenty.texts([text], upper_case_augmenter, nlp))
["Augmenty is a wonderful tool for augmentation. Augmenty is developed using spaCy."]
["Augmenty is a wonderful tool for augmentation. Augmenty is developed using
spaCy."]
"""
return partial(
paragraph_subset_augmenter_v1,
Expand Down
7 changes: 2 additions & 5 deletions augmenty/lang/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
# import keyboard and augmenters
from .da import ( # noqa
create_qwerty_da,
create_da_æøå_replace_augmenter_v1,
create_da_historical_noun_casing_augmenter_v1,
)
from .da import create_da_historical_noun_casing_augmenter_v1 # noqa
from .da import create_da_æøå_replace_augmenter_v1, create_qwerty_da # noqa
from .de import create_qwerty_de # noqa
from .el import create_qwerty_el # noqa
from .en import create_qwerty_en # noqa
Expand Down
7 changes: 2 additions & 5 deletions augmenty/lang/da/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from .augmenters import ( # noqa
create_da_historical_noun_casing_augmenter_v1,
create_da_æøå_replace_augmenter_v1,
)

from .augmenters import create_da_historical_noun_casing_augmenter_v1 # noqa
from .augmenters import create_da_æøå_replace_augmenter_v1 # noqa
from .keyboard import create_qwerty_da # noqa
4 changes: 3 additions & 1 deletion augmenty/lang/da/augmenters.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,7 @@ def conditional(token):
return False

return create_conditional_token_casing_augmenter_v1(
conditional=conditional, upper=True, level=level
conditional=conditional,
upper=True,
level=level,
)
8 changes: 3 additions & 5 deletions augmenty/span/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from .entities import ( # noqa
create_ent_augmenter_v1,
create_ent_format_augmenter_v1,
create_per_replace_augmenter_v1,
)
from .entities import create_ent_augmenter_v1 # noqa
from .entities import create_ent_format_augmenter_v1 # noqa
from .entities import create_per_replace_augmenter_v1 # noqa
12 changes: 6 additions & 6 deletions augmenty/span/entities.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import random
from functools import partial
from typing import Callable, Dict, Iterable, Iterator, List, Optional, Union, Generator
from typing import Callable, Dict, Generator, Iterable, Iterator, List, Optional, Union

import numpy as np

import spacy
from spacy.language import Language
from spacy.training import Example
from spacy.tokens import Token
from spacy.training import Example

from ..augment_utilities import make_text_from_orth

Expand Down Expand Up @@ -111,10 +110,10 @@ def ent_augmenter_v1(
np.array(head[: ent.start + offset]), # before
np.array(
[head[ent.root.i + offset]]
+ [ent.start + offset] * (len_ent - 1)
+ [ent.start + offset] * (len_ent - 1),
), # the entity
np.array(head[ent.end + offset :]), # after
]
],
)
offset += offset_

Expand Down Expand Up @@ -142,7 +141,8 @@ def ent_augmenter_v1(
@spacy.registry.augmenters("per_replace.v1")
def create_per_replace_augmenter_v1(
names: Dict[
str, List[str]
str,
List[str],
], # {"firstname": ["Kenneth", "Lasse"], "lastname": ["Enevoldsen", "Hansen"]}
patterns: List[List[str]], # ["firstname", "firstname", "lastname"]
level: float,
Expand Down
5 changes: 2 additions & 3 deletions augmenty/tests/fixtures.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import pytest

import spacy
from spacy.training import Example

from dacy.datasets import dane
from spacy.training import Example

from .books import BOOKS


# pipelines
@pytest.fixture()
def nlp_en():
Expand Down
12 changes: 6 additions & 6 deletions augmenty/tests/lang/test_da.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from augmenty.lang.da import create_da_æøå_replace_augmenter_v1
from augmenty.lang.da import create_da_historical_noun_casing_augmenter_v1

import augmenty

import pytest
from spacy.lang.da import Danish
from spacy.tokens import Doc

import pytest
import augmenty
from augmenty.lang.da import (
create_da_historical_noun_casing_augmenter_v1,
create_da_æøå_replace_augmenter_v1,
)


@pytest.fixture()
Expand Down
Loading

1 comment on commit 62eb860

@KennethEnevoldsen
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
augmenty
   init.py220100% 
   augment_utilities.py40295%53–54
   keyboard.py42198%42
   util.py41198%73
augmenty/character
   init.py50100% 
   casing.py200100% 
   replace.py340100% 
   spacing.py200100% 
   swap.py210100% 
augmenty/doc
   init.py30100% 
   casing.py270100% 
   subset.py44589%74, 93, 97, 99, 105
augmenty/lang
   init.py160100% 
augmenty/lang/da
   init.py30100% 
   augmenters.py170100% 
   keyboard.py50100% 
augmenty/lang/de
   init.py10100% 
   keyboard.py50100% 
augmenty/lang/el
   init.py10100% 
   keyboard.py50100% 
augmenty/lang/en
   init.py10100% 
   keyboard.py50100% 
augmenty/lang/es
   init.py10100% 
   keyboard.py50100% 
augmenty/lang/fr
   init.py10100% 
   keyboard.py50100% 
augmenty/lang/it
   init.py10100% 
   keyboard.py50100% 
augmenty/lang/lt
   init.py10100% 
   keyboard.py50100% 
augmenty/lang/mk
   init.py10100% 
   keyboard.py50100% 
augmenty/lang/nb
   init.py10100% 
   keyboard.py50100% 
augmenty/lang/nl
   init.py10100% 
   keyboard.py50100% 
augmenty/lang/pl
   init.py10100% 
   keyboard.py50100% 
augmenty/lang/pt
   init.py10100% 
   keyboard.py50100% 
augmenty/lang/ro
   init.py10100% 
   keyboard.py50100% 
augmenty/lang/ru
   init.py10100% 
   keyboard.py50100% 
augmenty/span
   init.py30100% 
   entities.py83199%285
augmenty/token
   init.py90100% 
   casing.py42198%66
   insert.py103694%76, 93–94, 198, 260, 278
   replace.py103694%83, 195, 199, 235, 281, 295
   spacing.py390100% 
   static_embedding_util.py35197%50
   swap.py62494%81, 111, 122, 135
   wordnet_util.py10370%7–9
TOTAL9333197% 

Please sign in to comment.