georgkrause · georgkrause · Nov 19, 2024 · Jul 26, 2024 · Nov 6, 2024
diff --git a/langcodes/__init__.py b/langcodes/__init__.py
@@ -674,14 +674,16 @@ def match_score(self, supported: 'Language') -> int:
         )
         return 100 - min(self.distance(supported), 100)
 
-    def distance(self, supported: 'Language') -> int:
+    def distance(self, supported: 'Language', ignore_script: bool = False) -> int:
         """
         Suppose that `self` is the language that the user desires, and
         `supported` is a language that is actually supported.
 
         This method returns a number from 0 to 134 measuring the 'distance'
         between the languages (lower numbers are better). This is not a
-        symmetric relation.
+        symmetric relation. If `ignore_script` is `True`, the script will
+        not be used in the comparison, possibly resulting in a smaller
+        'distance'.
 
         The language distance is not really about the linguistic similarity or
         history of the languages; instead, it's based largely on sociopolitical
@@ -703,9 +705,10 @@ def distance(self, supported: 'Language') -> int:
             desired_triple = ('und', 'Zzzz', 'ZZ')
         else:
             desired_complete = self.prefer_macrolanguage().maximize()
+
             desired_triple = (
                 desired_complete.language,
-                desired_complete.script,
+                None if ignore_script else desired_complete.script,
                 desired_complete.territory,
             )
 
@@ -717,9 +720,10 @@ def distance(self, supported: 'Language') -> int:
             supported_triple = ('und', 'Zzzz', 'ZZ')
         else:
             supported_complete = supported.prefer_macrolanguage().maximize()
+
             supported_triple = (
                 supported_complete.language,
-                supported_complete.script,
+                None if ignore_script else supported_complete.script,
                 supported_complete.territory,
             )
 
@@ -1648,7 +1652,7 @@ def tag_match_score(
     return desired_ld.match_score(supported_ld)
 
 
-def tag_distance(desired: Union[str, Language], supported: Union[str, Language]) -> int:
+def tag_distance(desired: Union[str, Language], supported: Union[str, Language], ignore_script: bool = False) -> int:
     """
     Tags that expand to the same thing when likely values are filled in get a
     distance of 0.
@@ -1792,13 +1796,19 @@ def tag_distance(desired: Union[str, Language], supported: Union[str, Language])
     >>> tag_distance('ja', 'ja-Latn-US-hepburn')
     54
 
+    If `ignore_script` is used, the script difference is ignored and a smaller
+    difference with lower score will be found.
+
+    >>> tag_distance('ja', 'ja-Latn-hepburn', ignore_script=True)
+    0
+
     >>> # You can read the Shavian script, right?
     >>> tag_distance('en', 'en-Shaw')
     54
     """
     desired_obj = Language.get(desired)
     supported_obj = Language.get(supported)
-    return desired_obj.distance(supported_obj)
+    return desired_obj.distance(supported_obj, ignore_script)
 
 
 def best_match(
@@ -1835,6 +1845,7 @@ def closest_match(
     desired_language: Union[str, Language],
     supported_languages: Sequence[str],
     max_distance: int = 25,
+    ignore_script: bool = False,
 ) -> Tuple[str, int]:
     """
     You have software that supports any of the `supported_languages`. You want
@@ -1854,6 +1865,9 @@ def closest_match(
     the wrong language. The documentation for `tag_distance` describes the
     distance values in more detail.
 
+    `ignore_script` makes the matching ignore scripts, allowing matches to be
+    found when they wouldn't otherwise be due to different scripts.
+
     When there is a tie for the best matching language, the first one in the
     tie will be used.
 
@@ -1871,6 +1885,9 @@ def closest_match(
 
     >>> closest_match('ja', ['ja-Latn-hepburn', 'en'])
     ('und', 1000)
+
+    >>> closest_match('ja', ['ja-Latn-hepburn', 'en'], ignore_script=True)
+    ('ja-Latn-hepburn', 0)
     """
     desired_language = str(desired_language)
 
@@ -1884,7 +1901,7 @@ def closest_match(
         return desired_language, 0
 
     match_distances = [
-        (supported, tag_distance(desired_language, supported))
+        (supported, tag_distance(desired_language, supported, ignore_script))
         for supported in supported_languages
     ]
     match_distances = [

diff --git a/langcodes/tests/test_issue_59.py b/langcodes/tests/test_issue_59.py
@@ -0,0 +1,21 @@
+from langcodes import closest_match
+
+
+def test_language_less_than():
+    spoken_language_1 = 'pa'
+    spoken_language_2 = 'pa-PK'
+    match = closest_match(
+        spoken_language_1, [spoken_language_2], ignore_script=True
+    )
+    print(match)
+    assert match[0] != "und"
+
+
+def test_language_more_than():
+    spoken_language_1 = 'pa-PK'
+    spoken_language_2 = 'pa'
+    match = closest_match(
+        spoken_language_1, [spoken_language_2], ignore_script=True
+    )
+    print(match)
+    assert match[0] != "und"