Merge pull request #206 from PabloLec/main

Test regex names capitalization
bee-san · Oct 11, 2021 · 4ac82e6 · 4ac82e6
2 parents 669682c + 55b3465
commit 4ac82e6
Show file tree

Hide file tree

Showing 4 changed files with 60 additions and 39 deletions.
diff --git a/pywhat/Data/regex.json b/pywhat/Data/regex.json
@@ -1575,7 +1575,7 @@
       ]
    },
    {
-      "Name": "Amazon Web Services Organization identifier",
+      "Name": "Amazon Web Services Organization Identifier",
       "Regex": "^(o-[a-z0-9]{10,32})$",
       "plural_name": false,
       "Description": null,
@@ -1746,7 +1746,7 @@
       ]
    },
    {
-      "Name": "Amazon Web Services EC2 Instance identifier",
+      "Name": "Amazon Web Services EC2 Instance Identifier",
       "Regex": "(?i)^(\\b[a-z]+-[a-z0-9]+)$",
       "plural_name": false,
       "Description": null,

diff --git a/tests/test_identifier.py b/tests/test_identifier.py
@@ -7,22 +7,6 @@
 r = identifier.Identifier()
 
 
-def test_check_keys_in_json():
-    database = load_regexes()
-
-    for entry in database:
-        keys = list(entry.keys())
-        entry_name = entry["Name"]
-
-        assert "Name" in keys, entry_name
-        assert "Regex" in keys, entry_name
-        assert "plural_name" in keys, entry_name
-        assert "Description" in keys, entry_name
-        assert "Rarity" in keys, entry_name
-        assert "URL" in keys, entry_name
-        assert "Tags" in keys, entry_name
-
-
 def test_identifier_works():
     out = r.identify("DANHz6EQVoWyZ9rER56DwTXHWUxfkv9k2o")
     assert (

diff --git a/tests/test_regex_formatting.py b/tests/test_regex_formatting.py
@@ -0,0 +1,56 @@
+import re
+
+from pywhat.helper import load_regexes
+
+database = load_regexes()
+
+
+def test_name_capitalization():
+    for entry in database:
+        entry_name = entry["Name"]
+        for word in entry_name.split():
+            upper_and_num_count = sum(1 for c in word if c.isupper() or c.isnumeric())
+            if upper_and_num_count > 0:
+                continue
+            cleaned_word = word.translate({ord(c): None for c in "(),."})
+            if cleaned_word in ["a", "of", "etc"]:
+                continue
+
+            assert word.title() == word, (
+                f'Wrong capitalization in regex name: "{entry_name}"\n'
+                f'Expected: "{entry_name.title()}"\n'
+                "Please capitalize every the first letter of each word."
+            )
+
+
+def test_regex_format():
+    for regex in database:
+        assert re.findall(
+            r"^(?:\(\?i\))?\^\(.*\)\$$", regex["Regex"]
+        ), r"Please use ^(regex)$ regex format. If there is '\n' character, you have to escape it. If there is '(?i)', it is allowed and should be before the '^'."
+
+        assert (
+            re.findall(r"\^\||\|\^|\$\|\^|\$\||\|\$", regex["Regex"]) == []
+        ), "Remove in-between boundaries. For example, '^|$' should only be '|'."
+
+
+def test_check_keys():
+    for entry in database:
+        keys = list(entry.keys())
+        entry_name = entry["Name"]
+
+        assert "Name" in keys, entry_name
+        assert "Regex" in keys, entry_name
+        assert "plural_name" in keys, entry_name
+        assert "Description" in keys, entry_name
+        assert "Rarity" in keys, entry_name
+        assert "URL" in keys, entry_name
+        assert "Tags" in keys, entry_name
+
+
+def test_sorted_by_rarity():
+    rarity_num = [regex["Rarity"] for regex in database]
+
+    assert rarity_num == sorted(
+        rarity_num, reverse=True
+    ), "Regexes should be sorted by rarity in 'regex.json'. Regexes with rarity '1' are at the top of the file and '0' is at the bottom."
diff --git a/tests/test_regex_identifier.py b/tests/test_regex_identifier.py
@@ -42,25 +42,6 @@ def test_if_all_tests_exist():
         ), "No test for this regex found in 'test_regex_identifier.py'. Note that a test needs to assert the whole name."
 
 
-def test_regex_format():
-    for regex in database:
-        assert re.findall(
-            r"^(?:\(\?i\))?\^\(.*\)\$$", regex["Regex"]
-        ), r"Please use ^(regex)$ regex format. If there is '\n' character, you have to escape it. If there is '(?i)', it is allowed and should be before the '^'."
-
-        assert (
-            re.findall(r"\^\||\|\^|\$\|\^|\$\||\|\$", regex["Regex"]) == []
-        ), "Remove in-between boundaries. For example, '^|$' should only be '|'."
-
-
-def test_sorted_by_rarity():
-    rarity_num = [regex["Rarity"] for regex in database]
-
-    assert rarity_num == sorted(
-        rarity_num, reverse=True
-    ), "Regexes should be sorted by rarity in 'regex.json'. Regexes with rarity '1' are at the top of the file and '0' is at the bottom."
-
-
 def test_dogecoin():
     res = r.check(["DANHz6EQVoWyZ9rER56DwTXHWUxfkv9k2o"])
     _assert_match_first_item("Dogecoin (DOGE) Wallet Address", res)
@@ -664,12 +645,12 @@ def test_aws_secret_access_key():
 
 def test_aws_ec2_id():
     res = r.check(["i-1234567890abcdef0"])
-    assert "Amazon Web Services EC2 Instance identifier" in str(res)
+    assert "Amazon Web Services EC2 Instance Identifier" in str(res)
 
 
 def test_aws_org_id():
     res = r.check(["o-aa111bb222"])
-    assert "Amazon Web Services Organization identifier" in str(res)
+    assert "Amazon Web Services Organization Identifier" in str(res)
 
 
 def test_aws_sns():