From e52c35e34899bc21a389aa7f4fe5084423cf538c Mon Sep 17 00:00:00 2001 From: AvidCoderr Date: Thu, 25 Jan 2024 12:04:54 -0500 Subject: [PATCH] Ci - Normalize accented text twice. (#143) * pre normalize, upversion node support in ci/cd, more test --- .github/workflows/ci.yml | 4 ++-- .github/workflows/dev.yml | 4 ++-- .github/workflows/main.yml | 4 ++-- CHANGELOG.md | 5 +++-- slugify/__version__.py | 2 +- slugify/slugify.py | 9 ++++++--- test.py | 4 ++++ 7 files changed, 20 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 36959b0..71bc219 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,9 +17,9 @@ jobs: python: [3.7, 3.8, 3.9, "3.10", 3.11, 3.12, pypy3.8] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - name: Install dependencies diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index c12b80a..88791a7 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -17,9 +17,9 @@ jobs: python: [3.7, 3.8, 3.9, "3.10", 3.11, 3.12, pypy3.8] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - name: Install dependencies diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index eb66dc5..7a9c77e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,9 +16,9 @@ jobs: python: [3.7, 3.8, 3.9, "3.10", 3.11, 3.12, pypy3.8] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - name: Install dependencies diff --git a/CHANGELOG.md b/CHANGELOG.md index 395e538..eb60bee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ ## Work in progress -- Added typing to API and expose `py.typed`. -- Formally support 3.12 +## 8.0.2 + +- Normalize text before converting to unicode. (@chuckyblack - thx) ## 8.0.1 diff --git a/slugify/__version__.py b/slugify/__version__.py index a558d9b..dbbff9f 100644 --- a/slugify/__version__.py +++ b/slugify/__version__.py @@ -5,4 +5,4 @@ __url__ = 'https://github.com/un33k/python-slugify' __license__ = 'MIT' __copyright__ = 'Copyright 2022 Val Neekman @ Neekware Inc.' -__version__ = '8.0.1' +__version__ = '8.0.2' diff --git a/slugify/slugify.py b/slugify/slugify.py index 21bdaeb..9242e3e 100644 --- a/slugify/slugify.py +++ b/slugify/slugify.py @@ -118,8 +118,11 @@ def slugify( # replace quotes with dashes - pre-process text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text) - # decode unicode - if not allow_unicode: + # normalize text, convert to unicode if required + if allow_unicode: + text = unicodedata.normalize('NFKC', text) + else: + text = unicodedata.normalize('NFKD', text) text = unidecode.unidecode(text) # ensure text is still in unicode @@ -144,7 +147,7 @@ def slugify( except Exception: pass - # translate + # re normalize text if allow_unicode: text = unicodedata.normalize('NFKC', text) else: diff --git a/test.py b/test.py index 931f38f..2534499 100644 --- a/test.py +++ b/test.py @@ -36,6 +36,10 @@ def test_phonetic_conversion_of_eastern_scripts(self): self.assertEqual(r, "ying-shi-ma") def test_accented_text(self): + txt = '𝐚́́𝕒́àáâäãąā' + r = slugify(txt) + self.assertEqual(r, "aaaaaaaaa") + txt = 'C\'est déjà l\'été.' r = slugify(txt) self.assertEqual(r, "c-est-deja-l-ete")