Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Key ordering with locale, take #2 #288

Merged
merged 3 commits into from
Jul 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,22 @@ Here is a more complex example:
ignore: |
*.ignore-trailing-spaces.yaml
ascii-art/*

Setting the locale
------------------

It is possible to set the ``locale`` option globally. This is passed to Python's
`locale.setlocale
<https://docs.python.org/3/library/locale.html#locale.setlocale>`_,
so an empty string ``""`` will use the system default locale, while e.g.
``"en_US.UTF-8"`` will use that.

Currently this only affects the ``key-ordering`` rule. The default will order
by Unicode code point number, while locales will sort case and accents
properly as well.

.. code-block:: yaml

extends: default

locale: en_US.UTF-8
42 changes: 38 additions & 4 deletions tests/rules/test_key_ordering.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import locale

from tests.common import RuleTestCase


Expand Down Expand Up @@ -103,14 +105,46 @@ def test_accents(self):
'haïr: true\n'
'hais: true\n', conf,
problem=(3, 1))
self.check('---\n'
'haïr: true\n'
'hais: true\n', conf,
problem=(3, 1))

def test_key_tokens_in_flow_sequences(self):
conf = 'key-ordering: enable'
self.check('---\n'
'[\n'
' key: value, mappings, in, flow: sequence\n'
']\n', conf)

def test_locale_case(self):
self.addCleanup(locale.setlocale, locale.LC_ALL, (None, None))
try:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
except locale.Error:
self.skipTest('locale en_US.UTF-8 not available')
conf = ('key-ordering: enable')
self.check('---\n'
't-shirt: 1\n'
'T-shirt: 2\n'
't-shirts: 3\n'
'T-shirts: 4\n', conf)
self.check('---\n'
't-shirt: 1\n'
't-shirts: 2\n'
'T-shirt: 3\n'
'T-shirts: 4\n', conf,
problem=(4, 1))

def test_locale_accents(self):
self.addCleanup(locale.setlocale, locale.LC_ALL, (None, None))
try:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
except locale.Error:
self.skipTest('locale en_US.UTF-8 not available')
conf = ('key-ordering: enable')
self.check('---\n'
'hair: true\n'
'haïr: true\n'
'hais: true\n'
'haïssable: true\n', conf)
self.check('---\n'
'hais: true\n'
'haïr: true\n', conf,
problem=(3, 1))
62 changes: 58 additions & 4 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ def setUpClass(cls):
# dos line endings yaml
'dos.yml': '---\r\n'
'dos: true',
# different key-ordering by locale
'c.yaml': '---\n'
'A: true\n'
'a: true',
'en.yaml': '---\n'
'a: true\n'
'A: true'
})

@classmethod
Expand All @@ -108,8 +115,10 @@ def test_find_files_recursively(self):
self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'dos.yml'),
os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
os.path.join(self.wd, 'sub/directory.yaml/empty.yml'),
os.path.join(self.wd, 'sub/ok.yaml'),
Expand Down Expand Up @@ -146,6 +155,8 @@ def test_find_files_recursively(self):
self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
os.path.join(self.wd, 'sub/ok.yaml'),
os.path.join(self.wd, 'warn.yaml')]
Expand Down Expand Up @@ -175,8 +186,10 @@ def test_find_files_recursively(self):
self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'dos.yml'),
os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 'no-yaml.json'),
os.path.join(self.wd, 'non-ascii/éçäγλνπ¥/utf-8'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
Expand All @@ -194,8 +207,10 @@ def test_find_files_recursively(self):
self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'dos.yml'),
os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 'no-yaml.json'),
os.path.join(self.wd, 'non-ascii/éçäγλνπ¥/utf-8'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
Expand Down Expand Up @@ -315,6 +330,46 @@ def test_run_with_user_yamllint_config_file_in_env(self):
cli.run((os.path.join(self.wd, 'a.yaml'), ))
self.assertEqual(ctx.returncode, 1)

def test_run_with_locale(self):
# check for availability of locale, otherwise skip the test
# reset to default before running the test,
# as the first two runs don't use setlocale()
try:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
except locale.Error:
self.skipTest('locale en_US.UTF-8 not available')
locale.setlocale(locale.LC_ALL, (None, None))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you briefly explain why you added this? Is the cleanup on line 334 redundant?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cleanup is still needed after the whole test ran.

Setting the locale to en_US is just a test for locale availability. But because the default without locale option (as in the first of the for test .runs below) doesn't set any locale, that en_US would still be active. Therefore we need to reset that to it's initial state first, then run the 2 tests with default settings, then the 2 with locale setting, then cleanup.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

I understood the second one (line 339), but I thought the cleanup wasn't needed, then. Actually it's still needed because yamllint calls locale.setlocale() too (in cli.py).

Could you add a tiny comment above self.addCleanup(), just to make sure we remember it? E.g. # Clean up after yamllint called locale.setlocale() in cli.py:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added some comments and moved the addCleanup to a more appropriate spot regarding the flow in this test.


# C + en.yaml should fail
with RunContext(self) as ctx:
cli.run(('-d', 'rules: { key-ordering: enable }',
os.path.join(self.wd, 'en.yaml')))
self.assertEqual(ctx.returncode, 1)

# C + c.yaml should pass
with RunContext(self) as ctx:
cli.run(('-d', 'rules: { key-ordering: enable }',
os.path.join(self.wd, 'c.yaml')))
self.assertEqual(ctx.returncode, 0)

# the next two runs use setlocale() inside,
# so we need to clean up afterwards
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

self.addCleanup(locale.setlocale, locale.LC_ALL, (None, None))

# en_US + en.yaml should pass
with RunContext(self) as ctx:
cli.run(('-d', 'locale: en_US.UTF-8\n'
'rules: { key-ordering: enable }',
os.path.join(self.wd, 'en.yaml')))
self.assertEqual(ctx.returncode, 0)

# en_US + c.yaml should fail
with RunContext(self) as ctx:
cli.run(('-d', 'locale: en_US.UTF-8\n'
'rules: { key-ordering: enable }',
os.path.join(self.wd, 'c.yaml')))
self.assertEqual(ctx.returncode, 1)

def test_run_version(self):
with RunContext(self) as ctx:
cli.run(('--version', ))
Expand Down Expand Up @@ -375,12 +430,11 @@ def test_run_non_ascii_file(self):

# Make sure the default localization conditions on this "system"
# support UTF-8 encoding.
loc = locale.getlocale()
try:
locale.setlocale(locale.LC_ALL, 'C.UTF-8')
locale.setlocale(locale.LC_ALL, (None, 'UTF-8'))
except locale.Error:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
self.addCleanup(locale.setlocale, locale.LC_ALL, loc)
self.skipTest('no UTF-8 locale available')
self.addCleanup(locale.setlocale, locale.LC_ALL, (None, None))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I now understand more how the whole locale module works, so I improved this test case (unrelated to my changes) as well. This should now work for any available UTF-8 locale, I guess - and also reset to the "default" from before better then with getlocale.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting: 'C.UTF-8'(None, 'UTF-8')

Thanks!


with RunContext(self) as ctx:
cli.run(('-f', 'parsable', path))
Expand Down
4 changes: 4 additions & 0 deletions yamllint/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import argparse
import io
import locale
import os
import platform
import sys
Expand Down Expand Up @@ -175,6 +176,9 @@ def run(argv=None):
print(e, file=sys.stderr)
sys.exit(-1)

if conf.locale is not None:
locale.setlocale(locale.LC_ALL, conf.locale)

max_level = 0

for file in find_files_recursively(args.files, conf):
Expand Down
8 changes: 8 additions & 0 deletions yamllint/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def __init__(self, content=None, file=None):
self.yaml_files = pathspec.PathSpec.from_lines(
'gitwildmatch', ['*.yaml', '*.yml', '.yamllint'])

self.locale = None

if file is not None:
with open(file) as f:
content = f.read()
Expand Down Expand Up @@ -111,6 +113,12 @@ def parse(self, raw_content):
self.yaml_files = pathspec.PathSpec.from_lines('gitwildmatch',
conf['yaml-files'])

if 'locale' in conf:
if not isinstance(conf['locale'], str):
raise YamlLintConfigError(
'invalid config: locale should be a string')
self.locale = conf['locale']

def validate(self):
for id in self.rules:
try:
Expand Down
25 changes: 22 additions & 3 deletions yamllint/rules/key_ordering.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@

"""
Use this rule to enforce alphabetical ordering of keys in mappings. The sorting
order uses the Unicode code point number. As a result, the ordering is
case-sensitive and not accent-friendly (see examples below).
order uses the Unicode code point number as a default. As a result, the
ordering is case-sensitive and not accent-friendly (see examples below).
This can be changed by setting the global ``locale`` option. This allows to
sort case and accents properly.

.. rubric:: Examples

Expand Down Expand Up @@ -63,8 +65,24 @@

- haïr: true
hais: true

#. With global option ``locale: "en_US.UTF-8"`` and rule ``key-ordering: {}``

as opposed to before, the following code snippet would now **PASS**:
::

- t-shirt: 1
T-shirt: 2
t-shirts: 3
T-shirts: 4
- hair: true
haïr: true
hais: true
haïssable: true
"""

from locale import strcoll

import yaml

from yamllint.linter import LintProblem
Expand Down Expand Up @@ -101,7 +119,8 @@ def check(conf, token, prev, next, nextnext, context):
# This check is done because KeyTokens can be found inside flow
# sequences... strange, but allowed.
if len(context['stack']) > 0 and context['stack'][-1].type == MAP:
if any(next.value < key for key in context['stack'][-1].keys):
if any(strcoll(next.value, key) < 0
for key in context['stack'][-1].keys):
yield LintProblem(
next.start_mark.line + 1, next.start_mark.column + 1,
'wrong ordering of key "%s" in mapping' % next.value)
Expand Down