Skip to content

Commit

Permalink
Merge pull request #1 from acdh-oeaw/dev
Browse files Browse the repository at this point in the history
new function added
  • Loading branch information
zozlak authored Feb 16, 2021
2 parents 5880cce + dab0d73 commit 9119c90
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 5 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@
/dist
/composer.lock
/vendor
env
.tox
*.pyc
22 changes: 19 additions & 3 deletions AcdhUriNormRules/AcdhUriNormRules.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,25 @@
import json
import pkg_resources
import re

def get_rules():
""" returns a list of regex pattern mateches
def getRules():
#with open(pkg_resources.resource_string(__name__, 'rules.json'), 'r') as f:
:return: a list of regex patterns to normalize Authority IDs
:rtype: list
"""
return json.loads(pkg_resources.resource_string(__name__, 'rules.json'))
#return rules


def get_normalized_uri(uri):
""" takes a normdata uri and returns a normlalized version
:param uri: A normdata uri
:param type: str
:return: The normalized URI
:rtype: str
"""
for x in get_rules():
uri = re.sub(x['match'], x['replace'], uri)
return uri
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,14 @@ Rules are stored as a JSON in the `UriNormRules/rules.json` file.
```
* Use with
```Python
from AcdhUriNormRules import *
print(AcdhUriNormRules.getRules())
from AcdhUriNormRules import get_rules, get_normalized_uri
print(AcdhUriNormRules.get_rules())

wrong_id = "http://sws.geonames.org/1232324343/linz.html"

good_id = get_normalized_uri(wrong_id)
print(good_id)
# "https://www.geonames.org/1232324343"
```

## PHP
Expand Down
Empty file added tests/__init__.py
Empty file.
27 changes: 27 additions & 0 deletions tests/test_normalizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import unittest
from AcdhUriNormRules.AcdhUriNormRules import get_rules, get_normalized_uri

SAMPLES = [
[
"http://sws.geonames.org/1232324343/linz.html",
"https://www.geonames.org/1232324343"
],
[
"http://d-nb.info/gnd/4074255-6/",
"https://d-nb.info/gnd/4074255-6"
],
[
"https://d-nb.info/gnd/4074255-6",
"https://d-nb.info/gnd/4074255-6"
]
]

class TestNormalizer(unittest.TestCase):
def test__001_load_list(self):
rules = get_rules()
self.assertEqual(type(rules), list, "should be type 'list' ")

def test__002_test_patterns(self):
for x in SAMPLES:
new_uri = get_normalized_uri(x[0])
self.assertEqual(x[1], new_uri)
8 changes: 8 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[tox]
envlist = py35, py36, py37, py38

[testenv]
setenv =
PYTHONPATH = {toxinidir}

commands = python setup.py test

0 comments on commit 9119c90

Please sign in to comment.