Yelp · KevinHock · Sep 17, 2019 · Sep 10, 2019 · Sep 10, 2019 · Sep 17, 2019
diff --git a/README.md b/README.md
@@ -162,6 +162,8 @@ The current heuristic searches we implement out of the box include:
 
 * **RegexBasedDetector**: checks for any keys matching certain regular expressions (Artifactory, AWS, Slack, Stripe, Mailchimp).
 
+**JwtTokenDetector**: checks for formally correct JWTs.
+
 See [detect_secrets/
 plugins](https://github.com/Yelp/detect-secrets/tree/master/detect_secrets/plugins)
 for more details.

diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py
@@ -335,6 +335,11 @@ class PluginOptions(object):
             disable_flag_text='--no-mailchimp-scan',
             disable_help_text='Disable scanning for Mailchimp keys',
         ),
+        PluginDescriptor(
+            classname='JwtTokenDetector',
+            disable_flag_text='--no-jwt-scan',
+            disable_help_text='Disable scanning for JWTs',
+        ),
     ]
 
     def __init__(self, parser):

diff --git a/detect_secrets/plugins/common/initialize.py b/detect_secrets/plugins/common/initialize.py
@@ -6,6 +6,7 @@
 from ..common.util import get_mapping_from_secret_type_to_class_name
 from ..high_entropy_strings import Base64HighEntropyString  # noqa: F401
 from ..high_entropy_strings import HexHighEntropyString     # noqa: F401
+from ..jwt import JwtTokenDetector                          # noqa: F401
 from ..keyword import KeywordDetector                       # noqa: F401
 from ..mailchimp import MailchimpDetector                   # noqa: F401
 from ..private_key import PrivateKeyDetector                # noqa: F401

diff --git a/detect_secrets/plugins/common/util.py b/detect_secrets/plugins/common/util.py
@@ -11,6 +11,7 @@
 from ..basic_auth import BasicAuthDetector                  # noqa: F401
 from ..high_entropy_strings import Base64HighEntropyString  # noqa: F401
 from ..high_entropy_strings import HexHighEntropyString     # noqa: F401
+from ..jwt import JwtTokenDetector                          # noqa: F401
 from ..keyword import KeywordDetector                       # noqa: F401
 from ..private_key import PrivateKeyDetector                # noqa: F401
 from ..slack import SlackDetector                           # noqa: F401

diff --git a/detect_secrets/plugins/jwt.py b/detect_secrets/plugins/jwt.py
@@ -0,0 +1,53 @@
+"""
+This plugin finds JWT tokens
+"""
+from __future__ import absolute_import
+
+import base64
+import json
+import re
+
+from .base import RegexBasedDetector
+
+try:
+    # Python 2
+    from future_builtins import filter
+except ImportError:
+    # Python 3
+    pass
+
+
+class JwtTokenDetector(RegexBasedDetector):
+    secret_type = 'JSON Web Token'
+    denylist = [
+        re.compile(r'eyJ[A-Za-z0-9-_=]+\.[A-Za-z0-9-_=]+\.?[A-Za-z0-9-_.+/=]*?'),
+    ]
+
+    def secret_generator(self, string, *args, **kwargs):
+        return filter(
+            self.is_formally_valid,
+            super(JwtTokenDetector, self).secret_generator(string, *args, **kwargs),
+        )
+
+    @staticmethod
+    def is_formally_valid(token):
+        parts = token.split('.')
+        for idx, part in enumerate(parts):
+            try:
+                part = part.encode('ascii')
+                # https://github.com/magical/jwt-python/blob/2fd976b41111031313107792b40d5cfd1a8baf90/jwt.py#L49
+                # https://github.com/jpadilla/pyjwt/blob/3d47b0ea9e5d489f9c90ee6dde9e3d9d69244e3a/jwt/utils.py#L33
+                m = len(part) % 4
+                if m == 1:
+                    raise TypeError('Incorrect padding')
+                elif m == 2:
+                    part += '=='.encode('utf-8')
+                elif m == 3:
+                    part += '==='.encode('utf-8')
+                b64_decoded = base64.urlsafe_b64decode(part)
+                if idx < 2:
+                    _ = json.loads(b64_decoded.decode('utf-8'))
+            except (TypeError, ValueError, UnicodeDecodeError):
+                return False
+
+        return True
diff --git a/tests/core/usage_test.py b/tests/core/usage_test.py
@@ -42,6 +42,7 @@ def test_consolidates_output_basic(self):
             'ArtifactoryDetector': {},
             'StripeDetector': {},
             'MailchimpDetector': {},
+            'JwtTokenDetector': {},
         }
         assert not hasattr(args, 'no_private_key_scan')
 

diff --git a/tests/main_test.py b/tests/main_test.py
@@ -94,6 +94,7 @@ def test_scan_string_basic(
                 Base64HighEntropyString: {}
                 BasicAuthDetector      : False
                 HexHighEntropyString   : {}
+                JwtTokenDetector       : False
                 KeywordDetector        : False
                 MailchimpDetector      : False
                 PrivateKeyDetector     : False
@@ -120,6 +121,7 @@ def test_scan_string_cli_overrides_stdin(self):
                 Base64HighEntropyString: False (2.585)
                 BasicAuthDetector      : False
                 HexHighEntropyString   : False (2.121)
+                JwtTokenDetector       : False
                 KeywordDetector        : False
                 MailchimpDetector      : False
                 PrivateKeyDetector     : False
@@ -254,6 +256,9 @@ def test_old_baseline_ignored_with_update_flag(
                         'hex_limit': 3,
                         'name': 'HexHighEntropyString',
                     },
+                    {
+                        'name': 'JwtTokenDetector',
+                    },
                     {
                         'name': 'KeywordDetector',
                     },
@@ -294,6 +299,9 @@ def test_old_baseline_ignored_with_update_flag(
                         'hex_limit': 3,
                         'name': 'HexHighEntropyString',
                     },
+                    {
+                        'name': 'JwtTokenDetector',
+                    },
                     {
                         'name': 'KeywordDetector',
                     },
@@ -387,6 +395,9 @@ def test_old_baseline_ignored_with_update_flag(
                     {
                         'name': 'BasicAuthDetector',
                     },
+                    {
+                        'name': 'JwtTokenDetector',
+                    },
                     {
                         'name': 'MailchimpDetector',
                     },
@@ -426,6 +437,9 @@ def test_old_baseline_ignored_with_update_flag(
                     {
                         'name': 'BasicAuthDetector',
                     },
+                    {
+                        'name': 'JwtTokenDetector',
+                    },
                     {
                         'name': 'MailchimpDetector',
                     },

diff --git a/tests/plugins/jwt_test.py b/tests/plugins/jwt_test.py
@@ -0,0 +1,46 @@
+from __future__ import absolute_import
+
+import pytest
+
+from detect_secrets.plugins.jwt import JwtTokenDetector
+
+
+class TestJwtTokenDetector(object):
+
+    @pytest.mark.parametrize(
+        'payload, should_flag',
+        [
+            # valid jwt
+            ('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', True),  # noqa: E501
+            # valid jwt - but header contains CR/LF-s
+            ('eyJ0eXAiOiJKV1QiLA0KImFsZyI6IkhTMjU2In0.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ', True),  # noqa: E501
+            # valid jwt - but claims contain bunch of LF newlines
+            ('eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJuYW1lIjoiSm9lIiwKInN0YXR1cyI6ImVtcGxveWVlIgp9', True),  # noqa: E501
+            # valid jwt - claims contain strings with unicode accents
+            ('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IsWww6HFkcOtIMOWxZHDqcOoIiwiaWF0IjoxNTE2MjM5MDIyfQ.k5HibI_uLn_RTuPcaCNkaVaQH2y5q6GvJg8GPpGMRwQ', True),  # noqa: E501
+            # as unicode literal
+            (u'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', True),  # noqa: E501
+            # no signature - but still valid
+            ('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ', True),  # noqa: E501
+            # decoded - invalid
+            ('{"alg":"HS256","typ":"JWT"}.{"name":"Jon Doe"}.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', False),  # noqa: E501
+            # invalid json - invalid (caught by regex)
+            ('bm90X3ZhbGlkX2pzb25fYXRfYWxs.bm90X3ZhbGlkX2pzb25fYXRfYWxs.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', False),  # noqa: E501
+            # missing claims - invalid
+            ('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9', False),  # noqa: E501
+            # totally not a jwt
+            ('jwt', False),  # noqa: E501
+            # invalid json with random bytes
+            ('eyJhbasdGciOiJIUaddasdasfsasdasdzI1NiIasdsInR5cCI6IkpXVCasdJasd9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', False),  # noqa: E501
+            # invalid json in jwt header - invalid (caught by parsing)
+            ('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', False),  # noqa: E501
+            # good by regex, but otherwise totally not JWT
+            ('eyJAAAA.eyJBBB', False),  # noqa: E501
+            ('eyJBB.eyJCC.eyJDDDD', False),  # noqa: E501
+        ],
+    )
+    def test_analyze_string(self, payload, should_flag):
+        logic = JwtTokenDetector()
+
+        output = logic.analyze_string(payload, 1, 'mock_filename')
+        assert len(output) == int(should_flag)
diff --git a/tests/pre_commit_hook_test.py b/tests/pre_commit_hook_test.py
@@ -191,6 +191,9 @@ def test_that_baseline_gets_updated(
                     'hex_limit': 3,
                     'name': 'HexHighEntropyString',
                 },
+                {
+                    'name': 'JwtTokenDetector',
+                },
                 {
                     'name': 'KeywordDetector',
                 },