Skip to content

Commit

Permalink
checks: Detect duplicated formats as well
Browse files Browse the repository at this point in the history
Utilize Counter instead of set to better handle repeated format strings
in both source and target.
  • Loading branch information
nijel committed Jan 24, 2023
1 parent 3ac0aef commit 6eb35fe
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 22 deletions.
2 changes: 2 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ Weblate 4.16

Not yet released.

* Format string checks now also detects duplicated formats.

`All changes in detail <https://github.com/WeblateOrg/weblate/milestone/89?closed=1>`__.

Weblate 4.15.1
Expand Down
31 changes: 17 additions & 14 deletions weblate/checks/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# SPDX-License-Identifier: GPL-3.0-or-later

import re
from collections import defaultdict
from collections import Counter, defaultdict
from typing import Optional, Pattern

from django.utils.functional import SimpleLazyObject
Expand Down Expand Up @@ -260,6 +260,7 @@ class BaseFormatCheck(TargetCheck):

regexp: Optional[Pattern[str]] = None
default_disabled = True
normalize_remove = None

def check_target_unit(self, sources, targets, unit):
"""Check single unit, handling plurals."""
Expand Down Expand Up @@ -313,7 +314,12 @@ def cleanup_string(self, text):
return text

def normalize(self, matches):
return matches
if self.normalize_remove is None:
return matches
if isinstance(matches, Counter):
matches.pop(self.normalize_remove, None)
return matches
return [m for m in matches if m != self.normalize_remove]

def extract_matches(self, string):
return [self.cleanup_string(x[0]) for x in self.regexp.findall(string)]
Expand All @@ -333,17 +339,13 @@ def check_format(self, source, target, ignore_missing, unit):
tgt_matches = self.extract_matches(target)

if not uses_position:
src_matches = set(src_matches)
tgt_matches = set(tgt_matches)
src_matches = Counter(src_matches)
tgt_matches = Counter(tgt_matches)

if src_matches != tgt_matches:
# Ignore mismatch in percent position
if self.normalize(src_matches) == self.normalize(tgt_matches):
return False
# We can ignore missing format strings
# for first of plurals
if ignore_missing and tgt_matches < src_matches:
return False
if not uses_position:
missing = sorted(src_matches - tgt_matches)
extra = sorted(tgt_matches - src_matches)
Expand All @@ -356,6 +358,10 @@ def check_format(self, source, target, ignore_missing, unit):
extra.append(tgt_matches[i])
missing.extend(src_matches[len(tgt_matches) :])
extra.extend(tgt_matches[len(src_matches) :])
# We can ignore missing format strings
# for first of plurals
if ignore_missing and missing and not extra:
return False
return {"missing": missing, "extra": extra}
return False

Expand Down Expand Up @@ -417,16 +423,15 @@ def get_description(self, check_obj):
class BasePrintfCheck(BaseFormatCheck):
"""Base class for printf based format checks."""

normalize_remove = "%"

def __init__(self):
super().__init__()
self.regexp, self._is_position_based = FLAG_RULES[self.enable_string]

def is_position_based(self, string):
return self._is_position_based(string)

def normalize(self, matches):
return [m for m in matches if m != "%"]

def format_string(self, string):
return f"%{string}"

Expand Down Expand Up @@ -500,9 +505,7 @@ class SchemeFormatCheck(BasePrintfCheck):
check_id = "scheme_format"
name = _("Scheme format")
description = _("Scheme format string does not match source")

def normalize(self, matches):
return [m for m in matches if m != "~"]
normalize_remove = "~"

def format_string(self, string):
return f"~{string}"
Expand Down
40 changes: 32 additions & 8 deletions weblate/checks/tests/test_format_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,26 @@ def test_description_nolocation(self):
"Following format strings are wrongly ordered: %d, %s",
)

def test_duplicated_format(self):
self.assertEqual(
self.check.check_format(
"%(LANGUAGE)s %(classF)s %(mailto)s %(classS)s %(mail)s",
"%(classF)s %(LANGUAGE)s %(classF)s %(mailto)s %(classS)s %(mail)s",
False,
None,
),
{"missing": [], "extra": ["(classF)s"]},
)
self.assertEqual(
self.check.check_format(
"%(test)s%(test)s%(test)s%(test)s",
"%(test)s%(test)s%(test)s",
False,
None,
),
{"missing": ["(test)s"], "extra": []},
)


class PHPFormatCheckTest(CheckTestCase):
check = PHPFormatCheck()
Expand Down Expand Up @@ -337,8 +357,9 @@ def test_wrong_format(self):
self.assertTrue(self.check.check_format("%s string", "%c string", False, None))

def test_wrong_named_format(self):
self.assertTrue(
self.check.check_format("%10s string", "%20s string", False, None)
self.assertEqual(
self.check.check_format("%10s string", "%20s string", False, None),
{"missing": ["10s"], "extra": ["20s"]},
)

def test_reorder_format(self):
Expand All @@ -352,13 +373,14 @@ def test_locale_delimiter(self):
)

def test_ld_format(self):
self.assertFalse(
self.assertEqual(
self.check.check_format(
"%ld bytes (free %ld bytes, used %ld bytes)",
"%l octets (%l octets libres, %l octets utilisés)",
True,
None,
)
),
{"missing": ["ld", "ld", "ld"], "extra": ["l", "l", "l"]},
)

def test_parenthesis(self):
Expand Down Expand Up @@ -549,8 +571,9 @@ def test_missing_full_format(self):
)

def test_wrong_format(self):
self.assertTrue(
self.check.check_format("{0} string", "{1} string", False, None)
self.assertEqual(
self.check.check_format("{0} string", "{1} string", False, None),
{"missing": ["0"], "extra": ["1"]},
)

def test_missing_named_format_ignore(self):
Expand All @@ -560,8 +583,9 @@ def test_escaping_with_position(self):
self.assertFalse(self.check.check_format("{{ 0 }}", "string", False, None))

def test_wrong_attribute_format(self):
self.assertTrue(
self.check.check_format("{0} string", "{1} string", False, None)
self.assertEqual(
self.check.check_format("{0} string", "{1} string", False, None),
{"missing": ["0"], "extra": ["1"]},
)

def test_reordered_format(self):
Expand Down

0 comments on commit 6eb35fe

Please sign in to comment.