From 059e051333bb1f1bb819da2b685d828520458916 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 7 Nov 2022 16:09:47 -0500 Subject: [PATCH 01/11] First draft implementation of uncertain date model Co-authored-by: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> --- tests/test_undate.py | 15 +++++++++++++ undate/__init__.py | 0 undate/models.py | 0 undate/undate.py | 52 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+) create mode 100644 tests/test_undate.py create mode 100644 undate/__init__.py create mode 100644 undate/models.py create mode 100644 undate/undate.py diff --git a/tests/test_undate.py b/tests/test_undate.py new file mode 100644 index 0000000..f87fb30 --- /dev/null +++ b/tests/test_undate.py @@ -0,0 +1,15 @@ +from undate.undate import Undate, UndateInterval + +def test_single_date(): + assert str(Undate(2022, 11, 7)) == "2022-11-07" + assert str(Undate(2022, 11)) == "2022-11" + assert str(Undate(2022)) == "2022" + assert str(Undate(month=11, day=7)) == "--11-07" + +def test_range(): + # 2022 - 2023 + assert str(UndateInterval(Undate(2022), Undate(2023))) == "2022/2023" + # 2022 - 2023-05 + assert str(UndateInterval(Undate(2022), Undate(2023, 5))) == "2022/2023-05" + # 2022-11-01 to 2022-11-07 + assert str(UndateInterval(Undate(2022, 11, 1), Undate(2023, 11, 7))) == "2022-11-01/2023-11-07" diff --git a/undate/__init__.py b/undate/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/undate/models.py b/undate/models.py new file mode 100644 index 0000000..e69de29 diff --git a/undate/undate.py b/undate/undate.py new file mode 100644 index 0000000..9b6fa24 --- /dev/null +++ b/undate/undate.py @@ -0,0 +1,52 @@ +from datetime import date + +class Undate: + """Simple object for representing uncertain or partially unknown dates""" + + earliest = None + latest = None + label = None + + #: ISO format based on date precision + iso_format = { + "year": "%Y", + "month": "%m", + "day": "%d", + } + + def __init__(self, year=None, month=None, day=None): + # TODO: for unknowns, assume smallest possible value for earliest and + # largest valid for latest + self.earliest = date(year or self._default, month or self._default, day or self._default) + self.latest = self.earliest # TODO: needs to be a copy, not same object + # keep track of which values are known + self.known_values = { + "year": year is not None, + "month": month is not None, + "day": day is not None + } + + def __str__(self): + # serialize to iso format for simplicity, for now + date_parts = [] + # for each part of the date that is known, generate the string format + # then combine + for date_portion, known in self.known_values.items(): + if known: + date_parts.append(self.earliest.strftime(self.iso_format[date_portion])) + elif date_portion == "year": + # if not known but this is year, add '-' for --MM-DD unknown year format + date_parts.append("-") + return "-".join(date_parts) + + +class UndateInterval: + # date range between two undates + + def __init__(self, earliest, latest): + # for now, assume takes two undate objects + self.earliest = earliest + self.latest = latest + + def __str__(self): + return "%s/%s" % (self.earliest, self.latest) \ No newline at end of file From e28b268cd98655bea31b9463df14ca7ba0fa6037 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 7 Nov 2022 16:10:42 -0500 Subject: [PATCH 02/11] Add .gitignore for python files --- .gitignore | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b7757c8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,35 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ \ No newline at end of file From f9eff1dcaff075ad131751f831bf81213e52aee2 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 7 Nov 2022 16:52:11 -0500 Subject: [PATCH 03/11] Adjust earliest/latest date initialization --- undate/undate.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/undate/undate.py b/undate/undate.py index 9b6fa24..2ba57b2 100644 --- a/undate/undate.py +++ b/undate/undate.py @@ -1,4 +1,5 @@ -from datetime import date +import datetime +from calendar import monthrange class Undate: """Simple object for representing uncertain or partially unknown dates""" @@ -15,10 +16,15 @@ class Undate: } def __init__(self, year=None, month=None, day=None): - # TODO: for unknowns, assume smallest possible value for earliest and + # for unknowns, assume smallest possible value for earliest and # largest valid for latest - self.earliest = date(year or self._default, month or self._default, day or self._default) - self.latest = self.earliest # TODO: needs to be a copy, not same object + self.earliest = datetime.date(year or datetime.MINYEAR, month or 1, day or 1) + # if day is unknown but we have year and month, calculate max day + if day is None and year and month: + _, maxday = monthrange(year, month) + else: + maxday = 31 # ??? + self.latest = datetime.date(year or datetime.MAXYEAR, month or 12, day or maxday) # keep track of which values are known self.known_values = { "year": year is not None, From 309f0588a3b0e123b9fa4e4c9df13b46aa20db0c Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 7 Nov 2022 17:03:53 -0500 Subject: [PATCH 04/11] Remove file accidentally added to git --- undate/models.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 undate/models.py diff --git a/undate/models.py b/undate/models.py deleted file mode 100644 index e69de29..0000000 From 8c43781055fdd7d196033f0c8c865a876b472371 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 7 Nov 2022 17:05:30 -0500 Subject: [PATCH 05/11] Add preliminary developer documentation --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index ac19014..c473ff2 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,22 @@ # hackathon-2022 + Repository for the DHTech 2022 Hackathon + + +## Instructions to setup for development + +Use a recent version of python 3.x; recommended to use a virtualenv, e.g. +``` +python3 -m venv undate +source undate/bin/activate +``` + +Install dependencies: +``` +pip install pytest +``` + +Run unit tests: +``` +python -m pytest +``` \ No newline at end of file From 33e4c2b77d9aeccfc3cc825bcf22d85230846c5b Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 7 Nov 2022 17:25:27 -0500 Subject: [PATCH 06/11] Add support for open-ended date ranges --- tests/test_undate.py | 9 +++++++++ undate/undate.py | 11 ++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/test_undate.py b/tests/test_undate.py index f87fb30..b654e71 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -13,3 +13,12 @@ def test_range(): assert str(UndateInterval(Undate(2022), Undate(2023, 5))) == "2022/2023-05" # 2022-11-01 to 2022-11-07 assert str(UndateInterval(Undate(2022, 11, 1), Undate(2023, 11, 7))) == "2022-11-01/2023-11-07" + +def test_open_range(): + # 900 - + assert str(UndateInterval(Undate(900))) == "0900/" + # - 1900 + assert str(UndateInterval(latest=Undate(1900))) == "../1900" + # - 1900-12 + assert str(UndateInterval(latest=Undate(1900, 12))) == "../1900-12" + diff --git a/undate/undate.py b/undate/undate.py index 2ba57b2..8769293 100644 --- a/undate/undate.py +++ b/undate/undate.py @@ -2,13 +2,13 @@ from calendar import monthrange class Undate: - """Simple object for representing uncertain or partially unknown dates""" + """Simple object for representing uncertain, fuzzy or partially unknown dates""" earliest = None latest = None label = None - #: ISO format based on date precision + #: datetime strftime format for known part of date iso_format = { "year": "%Y", "month": "%m", @@ -47,12 +47,13 @@ def __str__(self): class UndateInterval: - # date range between two undates + # date range between two uncertain dates - def __init__(self, earliest, latest): + def __init__(self, earliest=None, latest=None): # for now, assume takes two undate objects self.earliest = earliest self.latest = latest def __str__(self): - return "%s/%s" % (self.earliest, self.latest) \ No newline at end of file + # using EDTF syntax for open ranges + return "%s/%s" % (self.earliest or "..", self.latest or "") \ No newline at end of file From 3a9246f16e920221f5c35dc4d90b25b902cfb32a Mon Sep 17 00:00:00 2001 From: Cole Crawford Date: Mon, 7 Nov 2022 17:31:30 -0500 Subject: [PATCH 07/11] Linting and dependencies --- .pre-commit-config.yaml | 15 +++++++++++++++ README.md | 18 +++++++++++++++--- requirements-dev.txt | 5 +++++ requirements.txt | 1 + tests/test_undate.py | 9 +++++++-- undate/undate.py | 9 ++++++--- 6 files changed, 49 insertions(+), 8 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 requirements-dev.txt create mode 100644 requirements.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..94823a3 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,15 @@ +files: \.py +repos: + - repo: https://github.com/psf/black + rev: 22.10.0 + hooks: + - id: black + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: check-case-conflict + - id: check-executables-have-shebangs + - id: debug-statements + - id: end-of-file-fixer + - id: mixed-line-ending + - id: trailing-whitespace \ No newline at end of file diff --git a/README.md b/README.md index c473ff2..86486eb 100644 --- a/README.md +++ b/README.md @@ -5,18 +5,30 @@ Repository for the DHTech 2022 Hackathon ## Instructions to setup for development +### Clone repo +``` +$ git clone git@github.com:dh-tech/hackathon-2022.git +cd hackathon-2022 +``` + +### Set up Python environment Use a recent version of python 3.x; recommended to use a virtualenv, e.g. ``` python3 -m venv undate source undate/bin/activate ``` -Install dependencies: +### Install dependencies +``` +$ pip install -r requirements-dev.txt +``` + +### Install pre-commit hooks ``` -pip install pytest +$ pre-commit install ``` -Run unit tests: +### Run unit tests ``` python -m pytest ``` \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..4e6d04e --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,5 @@ +-r requirements.txt + +black==22.10.0 +pre-commit==2.20.0 +pytest==7.2.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ + diff --git a/tests/test_undate.py b/tests/test_undate.py index b654e71..343d4fd 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -1,18 +1,24 @@ from undate.undate import Undate, UndateInterval + def test_single_date(): assert str(Undate(2022, 11, 7)) == "2022-11-07" assert str(Undate(2022, 11)) == "2022-11" assert str(Undate(2022)) == "2022" assert str(Undate(month=11, day=7)) == "--11-07" + def test_range(): # 2022 - 2023 assert str(UndateInterval(Undate(2022), Undate(2023))) == "2022/2023" # 2022 - 2023-05 assert str(UndateInterval(Undate(2022), Undate(2023, 5))) == "2022/2023-05" # 2022-11-01 to 2022-11-07 - assert str(UndateInterval(Undate(2022, 11, 1), Undate(2023, 11, 7))) == "2022-11-01/2023-11-07" + assert ( + str(UndateInterval(Undate(2022, 11, 1), Undate(2023, 11, 7))) + == "2022-11-01/2023-11-07" + ) + def test_open_range(): # 900 - @@ -21,4 +27,3 @@ def test_open_range(): assert str(UndateInterval(latest=Undate(1900))) == "../1900" # - 1900-12 assert str(UndateInterval(latest=Undate(1900, 12))) == "../1900-12" - diff --git a/undate/undate.py b/undate/undate.py index 8769293..528635b 100644 --- a/undate/undate.py +++ b/undate/undate.py @@ -1,6 +1,7 @@ import datetime from calendar import monthrange + class Undate: """Simple object for representing uncertain, fuzzy or partially unknown dates""" @@ -24,12 +25,14 @@ def __init__(self, year=None, month=None, day=None): _, maxday = monthrange(year, month) else: maxday = 31 # ??? - self.latest = datetime.date(year or datetime.MAXYEAR, month or 12, day or maxday) + self.latest = datetime.date( + year or datetime.MAXYEAR, month or 12, day or maxday + ) # keep track of which values are known self.known_values = { "year": year is not None, "month": month is not None, - "day": day is not None + "day": day is not None, } def __str__(self): @@ -56,4 +59,4 @@ def __init__(self, earliest=None, latest=None): def __str__(self): # using EDTF syntax for open ranges - return "%s/%s" % (self.earliest or "..", self.latest or "") \ No newline at end of file + return "%s/%s" % (self.earliest or "..", self.latest or "") From 3933917d3aff27fd01eaafeabc08bcb8b41a90dc Mon Sep 17 00:00:00 2001 From: Cole Crawford Date: Mon, 7 Nov 2022 17:34:25 -0500 Subject: [PATCH 08/11] Add basic linting / pytest CI --- .github/workflows/ci.yml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f6ca0ba --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,34 @@ +name: CI + +on: + push: + branches: + - "**" + paths: + - 'undate/**' + - 'tests/**' + +jobs: + qa: + runs-on: ubuntu-latest + defaults: + run: + working-directory: . + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.9 + uses: actions/setup-python@v4 + with: + python-version: 3.9 + cache: 'pip' + cache-dependency-path: '**/requirements-dev.txt' + - name: Install deps using Pip + run: pip install -r requirements-dev.txt + if: steps.python-cache.outputs.cache-hit != 'true' + - name: Run black + run: | + black src --check --diff + - name: Run unit tests + run: | + python -m pytest From b66f2d63296d719035b74661fdc56e2938f71c47 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 7 Nov 2022 17:44:30 -0500 Subject: [PATCH 09/11] Configure CI workflow to run on pull requests also --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f6ca0ba..89e5080 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,7 @@ on: paths: - 'undate/**' - 'tests/**' + pull_request: jobs: qa: From f5e755996bcd7230326f2d7a7263df050dd22fe2 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 7 Nov 2022 17:46:27 -0500 Subject: [PATCH 10/11] Tweak path for Black linter check in CI --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 89e5080..4a7f13b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,7 +29,7 @@ jobs: if: steps.python-cache.outputs.cache-hit != 'true' - name: Run black run: | - black src --check --diff + black undate --check --diff - name: Run unit tests run: | python -m pytest From 71855a56e54402b0bb7c62b7be7c1059a63e595b Mon Sep 17 00:00:00 2001 From: Malte Vogl Date: Tue, 8 Nov 2022 14:41:22 +0100 Subject: [PATCH 11/11] Update test_undate.py Fix CI --- tests/test_undate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_undate.py b/tests/test_undate.py index 343d4fd..673c96c 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -22,7 +22,7 @@ def test_range(): def test_open_range(): # 900 - - assert str(UndateInterval(Undate(900))) == "0900/" + assert str(UndateInterval(Undate(900))) == "900/" # - 1900 assert str(UndateInterval(latest=Undate(1900))) == "../1900" # - 1900-12