diff --git a/lemminflect/__init__.py b/lemminflect/__init__.py index 64e63b0..3d3b716 100644 --- a/lemminflect/__init__.py +++ b/lemminflect/__init__.py @@ -3,7 +3,7 @@ from .core.Inflections import Inflections from .core.Lemmatizer import Lemmatizer -__version__ = '0.2.2' +__version__ = '0.2.3' # Lemmatizer is a singleton so this will only instantiate and load the data diff --git a/lemminflect/resources/lemma_overrides.csv b/lemminflect/resources/lemma_overrides.csv index 7137fd0..4748390 100644 --- a/lemminflect/resources/lemma_overrides.csv +++ b/lemminflect/resources/lemma_overrides.csv @@ -1 +1,88 @@ # inflection,upos,lemma (note that upos must be capitalized) + +# The following pronouns are not in the lookup +all,NOUN,all +another,NOUN,another +any,NOUN,any +anybody,NOUN,anybody +anyone,NOUN,anyone +anything,NOUN,anything +aught,NOUN,aught +both,NOUN,both +each,NOUN,each +everybody,NOUN,everybody +everyone,NOUN,everyone +everything,NOUN,everything +he,NOUN,he +her,NOUN,her +hers,NOUN,hers +herself,NOUN,herself +him,NOUN,him +himself,NOUN,himself +his,NOUN,his +idem,NOUN,idem +it,NOUN,it +its,NOUN,its +itself,NOUN,itself +many,NOUN,many +me,NOUN,me +my,NOUN,my +myself,NOUN,myself +naught,NOUN,naught +neither,NOUN,neither +none,NOUN,none +our,NOUN,our +ours,NOUN,ours +ourself,NOUN,ourself +ourselves,NOUN,ourselves +she,NOUN,she +some,NOUN,some +somebody,NOUN,somebody +someone,NOUN,someone +something,NOUN,something +such,NOUN,such +suchlike,NOUN,suchlike +that,NOUN,that +thee,NOUN,thee +theirs,NOUN,theirs +their,NOUN,their +theirself,NOUN,theirself +theirselves,NOUN,theirselves +them,NOUN,them +themself,NOUN,themself +themselves,NOUN,themselves +these,NOUN,these +they,NOUN,they +thine,NOUN,thine +this,NOUN,this +those,NOUN,those +thou,NOUN,thou +thy,NOUN,thy +thyself,NOUN,thyself +us,NOUN,us +we,NOUN,we +what,NOUN,what +whatever,NOUN,whatever +whatnot,NOUN,whatnot +whether,NOUN,whether +which,NOUN,which +whichever,NOUN,whichever +whichsoever,NOUN,whichsoever +who,NOUN,who +whoever,NOUN,whoever +whom,NOUN,whom +whomever,NOUN,whomever +whomso,NOUN,whomso +whomsoever,NOUN,whomsoever +whose,NOUN,whose +whosever,NOUN,whosever +whosesoever,NOUN,whosesoever +whoso,NOUN,whoso +whosoever,NOUN,whosoever +ye,NOUN,ye +yon,NOUN,yon +you,NOUN,you +your,NOUN,your +yours,NOUN,yours +yourself,NOUN,yourself +yourselves,NOUN,yourselves diff --git a/tests/auto/LemmatizerTests.py b/tests/auto/LemmatizerTests.py index c1f5c5a..d1758c2 100755 --- a/tests/auto/LemmatizerTests.py +++ b/tests/auto/LemmatizerTests.py @@ -210,6 +210,21 @@ def testProperNouns(self): token = self.nlp('The Axxlaskans went South.')[1] self.assertEqual(token._.lemma(lemmatize_oov=True), 'Axxlaskan') + def testPronouns(self): + pronouns = ['all', 'another', 'any', 'anybody', 'anyone', 'anything', 'aught', 'both', 'each', + 'everybody', 'everyone', 'everything', 'he', 'her', 'hers', 'herself', 'him', + 'himself', 'his', 'idem', 'it', 'its', 'itself', 'many', 'me', 'my', 'myself', + 'neither', 'none', 'our', 'ours', 'ourself', 'ourselves', 'she', 'some', 'somebody', + 'someone', 'something', 'such', 'suchlike', 'that', 'thee', 'their', 'theirs', + 'theirself', 'theirselves', 'them', 'themself', 'themselves', 'these', 'they', + 'thine', 'this', 'those', 'thou', 'thy', 'thyself', 'us', 'we', 'what', 'whatever', + 'whatnot', 'whether', 'which', 'whichever', 'whichsoever', 'who', 'whoever', 'whom', + 'whomever', 'whomso', 'whomsoever', 'whose', 'whosever', 'whosesoever', 'whoso', + 'whosoever', 'ye', 'yon', 'you', 'your', 'yours', 'yourself', 'yourselves'] + tests = [(p, 'NOUN', p) for p in pronouns] + self.runGetAllLemmasTests(tests) + self.runGetLemmaTests(tests) + def testOverrides(self): # run the lemmatizer once to assure the overrides is loaded (ie.. lazy loading) lemminflect.getLemma('Alaskans', 'NOUN', lemmatize_oov=False)