From cdf8e7ea5a230bafb76cdc960f2dc3b1f9cbb476 Mon Sep 17 00:00:00 2001 From: Dristy Srivastava <58721149+dristysrivastava@users.noreply.github.com> Date: Thu, 19 Sep 2024 10:45:26 +0530 Subject: [PATCH] New field added client-secret for Azure client secret ID. (#555) * New field added azure-client-secret for Azure client secret ID. * Updated test cases --- docs/gh_pages/docs/entityclassifier.md | 1 + pebblo/entity_classifier/README.md | 1 + pebblo/entity_classifier/utils/config.py | 2 +- .../entity_classifier/utils/regex_pattern.py | 2 +- tests/entity_classifier/mock_response.py | 35 +- tests/entity_classifier/test_data.py | 55 ++- .../test_entity_classifier.py | 402 +++++++----------- 7 files changed, 221 insertions(+), 277 deletions(-) diff --git a/docs/gh_pages/docs/entityclassifier.md b/docs/gh_pages/docs/entityclassifier.md index 612b69c3..c21a9510 100644 --- a/docs/gh_pages/docs/entityclassifier.md +++ b/docs/gh_pages/docs/entityclassifier.md @@ -31,6 +31,7 @@ Below is the list of `entities` supported by Pebblo - 1. RSA Private Key 1. Google Account Private Key 1. Github Fine Grained Token +1. Azure Client Secret Key User can get details of classified entities for their loader source files in Pebblo report. diff --git a/pebblo/entity_classifier/README.md b/pebblo/entity_classifier/README.md index ab180196..6d2965cf 100644 --- a/pebblo/entity_classifier/README.md +++ b/pebblo/entity_classifier/README.md @@ -25,6 +25,7 @@ And following Secret Entities: 10. RSA Private Key 11. Google Account Private Key 12. Github Fine Grained Token +13. Azure Client Secret Key ## How to use Entity Classifier diff --git a/pebblo/entity_classifier/utils/config.py b/pebblo/entity_classifier/utils/config.py index 016bd83c..23da3e1a 100644 --- a/pebblo/entity_classifier/utils/config.py +++ b/pebblo/entity_classifier/utils/config.py @@ -11,7 +11,7 @@ "aws-access-key": ["aws_access_key", "aws_key", "access", "id", "api"], "aws-secret-key": ["aws_secret_key", "secret"], "azure-key-id": ["azure_key", "azure_key_id", "azure_id", "key"], - "azure-client-secret": ["azure_client_secret", "client", "secret"], + "azure-client-secret": ["azure_client_secret", "client-secret", "client_secret"], "google-api-key": ["google_api_key", "google_key", "google"], } diff --git a/pebblo/entity_classifier/utils/regex_pattern.py b/pebblo/entity_classifier/utils/regex_pattern.py index af3628b1..51ecbf63 100644 --- a/pebblo/entity_classifier/utils/regex_pattern.py +++ b/pebblo/entity_classifier/utils/regex_pattern.py @@ -12,6 +12,6 @@ "aws-access-key": r"""\b((?:AKIA|ABIA|ACCA|ASIA)[0-9A-Z]{16})\b""", "aws-secret-key": r"""\b([A-Za-z0-9+/]{40})[ \r\n'"\x60]""", "azure-key-id": r"""(?i)(%s).{0,20}([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})""", - "azure-client-secret": r"""\b(?i)(%s).{0,20}([a-z0-9_\.\-~]{34})\b""", + "azure-client-secret": r"""\b[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}\b""", "google-api-key": r"""\bAIza[0-9A-Za-z\-_]{35}\b""", } diff --git a/tests/entity_classifier/mock_response.py b/tests/entity_classifier/mock_response.py index 266879de..28bbf118 100644 --- a/tests/entity_classifier/mock_response.py +++ b/tests/entity_classifier/mock_response.py @@ -1,28 +1,27 @@ mock_input_text1_anonymize_snippet_true = """ -<PERSON>'s SSN is <US_SSN>. +Sachin's SSN is <US_SSN>. ITIN number <US_ITIN> His AWS Access Key is: <AWS_ACCESS_KEY>. -And <PERSON> is: <GITHUB_TOKEN> +And Github Token is: <GITHUB_TOKEN> """ mock_input_text2_anonymize_snippet_true = """ Content -"<PERSON> board on <DATE_TIME> announced an interim dividend of Re 1 per equity share of the face value of Rs 2 each, i.e., a 50 per cent payout for <DATE_TIME> along with financial results for the <DATE_TIME> period of the company for <DATE_TIME>." -"<PERSON> reminded the board of the scheduled retreat coming up in <DATE_TIME>, and provided a drafted retreat schedule. The board provided feedback on the agenda and the consensus was that, outside of making a few minor changes, the committee should move forward as planned. No board action required." +"Wipros board on Friday, January 12 announced an interim dividend of Re 1 per equity share of the face value of Rs 2 each, i.e., a 50 per cent payout for the current financial year along with financial results for the October-December period of the company for the financial year ending March 2024." +"Roberts reminded the board of the scheduled retreat coming up in three months, and provided a drafted retreat schedule. The board provided feedback on the agenda and the consensus was that, outside of making a few minor changes, the committee should move forward as planned. No board action required." "Claims: An adaptive pacing system for implantable cardiac devices, comprising a pulse generator, multiple sensing electrodes, a microprocessor-based control unit, a wireless communication module, and memory for dynamically adjusting pacing parameters based on real-time physiological data. The system of claim 1, wherein the adaptive pacing algorithms include rate-responsive pacing based on physical activity. The system of claim 1, further comprising an external monitoring system for remote data access and modification of pacing parameters." -"<PERSON>'s SSN is <US_SSN>. His passport ID is 5484880UA. -<PERSON>'s driver's license number is <NRP>. -<PERSON>'s bank account number is 70048841700216300. -His <NRP> express credit card number is <CREDIT_CARD>. -His UK IBAN Code is <IBAN_CODE>. -ITIN number <US_ITIN>. -Azure client secret : c4cb6f91-15a7-4e6d-a824-abcdef012345. -AWS Access Key is: <AWS_ACCESS_KEY> -AWS Secret Key is : <AWS_SECRET_KEY> -Github Token is: <GITHUB_TOKEN> -Google API key: <PERSON><PERSON> is: <SLACK_TOKEN> -Azure Client Secret - c4cb6f91-15a7-4e6d-a824-abcdef012345 -<PERSON> - <SLACK_TOKEN> +"Sachin's SSN is <US_SSN>. His passport ID is 5484880UA. +Sachin's driver's license number is <US_DRIVER_LICENSE>. +Sachin's bank account number is <US_BANK_NUMBER>. +His American express credit card number is <CREDIT_CARD>. +His UK IBAN Code is <IBAN_CODE>. +ITIN number <US_ITIN>. +AWS Access Key is: <AWS_ACCESS_KEY> +AWS Secret Key is : <AWS_SECRET_KEY>Github Token is: <GITHUB_TOKEN> +Google API key: zaCELgL0imfnc8mVLWwsAawjYr4Rx-Af50DDqtlx +Slack Token is: <SLACK_TOKEN> +Slack Token - <SLACK_TOKEN> Google API key- KLzaSyB_tWrbmfWx8g2bzL7Vhq7znuTUn0JPKmY" -IP Address - <IP_ADDRESS> +My IP Address - <IP_ADDRESS> +Azure client_secret is <AZURE_CLIENT_SECRET> """ diff --git a/tests/entity_classifier/test_data.py b/tests/entity_classifier/test_data.py index 1f1fdcd2..c57004dd 100644 --- a/tests/entity_classifier/test_data.py +++ b/tests/entity_classifier/test_data.py @@ -10,22 +10,21 @@ "Wipros board on Friday, January 12 announced an interim dividend of Re 1 per equity share of the face value of Rs 2 each, i.e., a 50 per cent payout for the current financial year along with financial results for the October-December period of the company for the financial year ending March 2024." "Roberts reminded the board of the scheduled retreat coming up in three months, and provided a drafted retreat schedule. The board provided feedback on the agenda and the consensus was that, outside of making a few minor changes, the committee should move forward as planned. No board action required." "Claims: An adaptive pacing system for implantable cardiac devices, comprising a pulse generator, multiple sensing electrodes, a microprocessor-based control unit, a wireless communication module, and memory for dynamically adjusting pacing parameters based on real-time physiological data. The system of claim 1, wherein the adaptive pacing algorithms include rate-responsive pacing based on physical activity. The system of claim 1, further comprising an external monitoring system for remote data access and modification of pacing parameters." -"Sachin's SSN is 222-85-4836. His passport ID is 5484880UA. -Sachin's driver's license number is S9998888. -Sachin's bank account number is 70048841700216300. -His American express credit card number is 371449635398431. -His UK IBAN Code is AZ96AZEJ00000000001234567890. -ITIN number 993-77 0690. -Azure client secret : c4cb6f91-15a7-4e6d-a824-abcdef012345. -AWS Access Key is: AKIAQIPT4PDORIRTV6PH -AWS Secret Key is : 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 -Github Token is: ghp_hgu657yiujgwfrtigu3ver238765tyuhygvtrder6t7gyvhbuy5e676578976tyghy76578uygfyfgcyturtdf -Google API key: zaCELgL0imfnc8mVLWwsAawjYr4Rx-Af50DDqtlx -Slack Token is: xoxp-7676545380258-uygh -Azure Client Secret - c4cb6f91-15a7-4e6d-a824-abcdef012345 -Slack Token - xoxb-3204014939555-4519358291237-TTIf0243T8YFSAGEVr1wBrWE +"Sachin's SSN is 222-85-4836. His passport ID is 5484880UA. +Sachin's driver's license number is S9998888. +Sachin's bank account number is 70048841700216300. +His American express credit card number is 371449635398431. +His UK IBAN Code is AZ96AZEJ00000000001234567890. +ITIN number 993-77 0690. +AWS Access Key is: AKIAQIPT4PDORIRTV6PH +AWS Secret Key is : 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 +Github Token is: ghp_hgu657yiujgwfrtigu3ver238765tyuhygvtrder6t7gyvhbuy5e676578976tyghy76578uygfyfgcyturtdf +Google API key: zaCELgL0imfnc8mVLWwsAawjYr4Rx-Af50DDqtlx +Slack Token is: xoxp-7676545380258-uygh +Slack Token - xoxb-3204014939555-4519358291237-TTIf0243T8YFSAGEVr1wBrWE Google API key- KLzaSyB_tWrbmfWx8g2bzL7Vhq7znuTUn0JPKmY" -My IP Address - 10.55.60.61 +My IP Address - 10.55.60.61 +Azure client_secret is de1d4a2d-d9fa-44f1-84bb-4f73c004afda """ negative_data = """ @@ -33,3 +32,29 @@ His AWS Access Key is: AKIPT4PDORIRTV6PH. And Github Token is: ghpu657yiujgwfrtigu3ver238765tyuhygvtrder6t7gyvhbuy5e676578976tyghy76578uygfyfgcyturtdf """ + +tf_test_data = """ +variable "client_secret" { +} + +# We strongly recommend using the required_providers block to set the +# Azure Provider source and version being used +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 4.x" + } + } +} + +# Configure the Microsoft Azure Provider +provider "azurerm" { + features {} + + client_id = "00000000-0000-0000-0000-000000000000" + client_secret = "1131a1fc-8cee-4f3c-9b2f-6808f66f72a4" + tenant_id = "10000000-0000-0000-0000-000000000000" + subscription_id = "20000000-0000-0000-0000-000000000000" +} +""" diff --git a/tests/entity_classifier/test_entity_classifier.py b/tests/entity_classifier/test_entity_classifier.py index 25181766..c96d58c0 100644 --- a/tests/entity_classifier/test_entity_classifier.py +++ b/tests/entity_classifier/test_entity_classifier.py @@ -1,6 +1,3 @@ -from typing import List, Tuple -from unittest.mock import Mock, patch - import pytest from pebblo.entity_classifier.entity_classifier import EntityClassifier @@ -8,7 +5,12 @@ mock_input_text1_anonymize_snippet_true, mock_input_text2_anonymize_snippet_true, ) -from tests.entity_classifier.test_data import input_text1, input_text2, negative_data +from tests.entity_classifier.test_data import ( + input_text1, + input_text2, + negative_data, + tf_test_data, +) class TestAnonymizerResult: @@ -17,211 +19,23 @@ def __init__(self, entity_type): @pytest.fixture -def mocked_objects(): - with ( - patch( - "pebblo.entity_classifier.entity_classifier.AnalyzerEngine" - ) as mock_analyzer, - patch( - "pebblo.entity_classifier.entity_classifier.AnalyzerEngine" - ) as mock_anomyzer, - patch( - "pebblo.entity_classifier.utils.utils.add_custom_regex_analyzer_registry" - ) as mock_custom_registry, - ): - yield mock_analyzer, mock_anomyzer, mock_custom_registry - - -@pytest.fixture -def mocked_entity_classifier_response(mocker): - """ - Mocking entity classifier response - """ - mocker.patch( - "pebblo.entity_classifier.entity_classifier.EntityClassifier.analyze_response", - return_value=Mock(), - ) - - anonymize_response1: Tuple[list, str] = ( - [ - TestAnonymizerResult("GITHUB_TOKEN"), - TestAnonymizerResult("AWS_ACCESS_KEY"), - TestAnonymizerResult("US_ITIN"), - TestAnonymizerResult("US_SSN"), - ], - mock_input_text1_anonymize_snippet_true, - ) - anonymize_response2: Tuple[list, str] = ( - [ - TestAnonymizerResult("SLACK_TOKEN"), - TestAnonymizerResult("SLACK_TOKEN"), - TestAnonymizerResult("GITHUB_TOKEN"), - TestAnonymizerResult("AWS_SECRET_KEY"), - TestAnonymizerResult("AWS_ACCESS_KEY"), - TestAnonymizerResult("US_ITIN"), - TestAnonymizerResult("IBAN_CODE"), - TestAnonymizerResult("CREDIT_CARD"), - TestAnonymizerResult("US_SSN"), - TestAnonymizerResult("IP_ADDRESS"), - ], - mock_input_text2_anonymize_snippet_true, - ) - anonymize_negative_response: Tuple[list, str] = ( - [], - negative_data, - ) - mocker.patch( - "pebblo.entity_classifier.entity_classifier.EntityClassifier.anonymize_response", - side_effect=[ - anonymize_response1, - anonymize_response2, - anonymize_negative_response, - ], - ) - - analyzed_entities_response1: List[dict] = [ - {"entity_type": "US_SSN", "location": "17_28", "confidence_score": 0.85}, - {"entity_type": "US_ITIN", "location": "42_53", "confidence_score": 0.85}, - { - "entity_type": "AWS_ACCESS_KEY", - "location": "77_97", - "confidence_score": 0.8, - }, - { - "entity_type": "GITHUB_TOKEN", - "location": "120_210", - "confidence_score": 0.8, - }, - ] - analyzed_entities_response2: List[dict] = [ - {"entity_type": "US_SSN", "location": "17_25", "confidence_score": 0.85}, - {"entity_type": "US_ITIN", "location": "39_48", "confidence_score": 0.85}, - { - "entity_type": "AWS_ACCESS_KEY", - "location": "72_88", - "confidence_score": 0.8, - }, - { - "entity_type": "GITHUB_TOKEN", - "location": "111_125", - "confidence_score": 0.8, - }, - ] - analyzed_entities_response3: List[dict] = [ - { - "entity_type": "CREDIT_CARD", - "location": "1367_1382", - "confidence_score": 1.0, - }, - { - "entity_type": "IBAN_CODE", - "location": "1406_1434", - "confidence_score": 1.0, - }, - {"entity_type": "US_SSN", "location": "1178_1189", "confidence_score": 0.85}, - {"entity_type": "US_ITIN", "location": "1450_1461", "confidence_score": 0.85}, - { - "entity_type": "AWS_ACCESS_KEY", - "location": "1545_1565", - "confidence_score": 0.8, - }, - { - "entity_type": "AWS_SECRET_KEY", - "location": "1587_1628", - "confidence_score": 0.8, - }, - { - "entity_type": "GITHUB_TOKEN", - "location": "1646_1736", - "confidence_score": 0.8, - }, - { - "entity_type": "SLACK_TOKEN", - "location": "1812_1835", - "confidence_score": 0.8, - }, - { - "entity_type": "SLACK_TOKEN", - "location": "1911_1968", - "confidence_score": 0.8, - }, - {"entity_type": "IP_ADDRESS", "location": "1339_1355", "confidence_score": 0.8}, - ] - analyzed_entities_response4: List[dict] = [ - { - "entity_type": "CREDIT_CARD", - "location": "1178_1186", - "confidence_score": 1.0, - }, - { - "entity_type": "IBAN_CODE", - "location": "1364_1377", - "confidence_score": 1.0, - }, - {"entity_type": "US_SSN", "location": "1401_1412", "confidence_score": 0.85}, - {"entity_type": "US_ITIN", "location": "1428_1437", "confidence_score": 0.85}, - { - "entity_type": "AWS_ACCESS_KEY", - "location": "1521_1537", - "confidence_score": 0.8, - }, - { - "entity_type": "AWS_SECRET_KEY", - "location": "1559_1575", - "confidence_score": 0.8, - }, - { - "entity_type": "GITHUB_TOKEN", - "location": "1593_1607", - "confidence_score": 0.8, - }, - { - "entity_type": "SLACK_TOKEN", - "location": "1683_1696", - "confidence_score": 0.8, - }, - { - "entity_type": "SLACK_TOKEN", - "location": "1772_1785", - "confidence_score": 0.8, - }, - {"entity_type": "IP_ADDRESS", "location": "1339_1355", "confidence_score": 0.8}, - ] - analyzed_entities_negative_response1: List = [] - analyzed_entities_negative_response2: List = [] - mocker.patch( - "pebblo.entity_classifier.entity_classifier.EntityClassifier.get_analyzed_entities_response", - side_effect=[ - analyzed_entities_response1, - analyzed_entities_response2, - analyzed_entities_response3, - analyzed_entities_response4, - analyzed_entities_negative_response1, - analyzed_entities_negative_response2, - ], - ) - - -@pytest.fixture -def entity_classifier(mocked_objects): +def entity_classifier(): """ Create an instance of the EntityClassifier class """ return EntityClassifier() -def test_entity_classifier_init(mocked_objects) -> None: +def test_entity_classifier_init() -> None: """ Initiated Entity Classifier """ _ = EntityClassifier() -def test_presidio_entity_classifier_and_anonymizer( - entity_classifier, mocked_entity_classifier_response -): +def test_entity_classifier_and_anonymizer1(entity_classifier): """ - UTs for presidio_entity_classifier_and_anonymizer function + UT for presidio_entity_classifier_and_anonymizer function with input_text1 """ ( entities, @@ -285,34 +99,39 @@ def test_presidio_entity_classifier_and_anonymizer( assert entity_details == { "us-ssn": [ { - "location": "17_25", + "location": "17_31", "confidence_score": "HIGH", "entity_group": "pii-identification", } ], "us-itin": [ { - "location": "39_48", + "location": "45_60", "confidence_score": "HIGH", "entity_group": "pii-financial", } ], "aws-access-key": [ { - "location": "72_88", + "location": "84_106", "confidence_score": "HIGH", "entity_group": "secrets_and_tokens", } ], "github-token": [ { - "location": "111_125", + "location": "129_149", "confidence_score": "HIGH", "entity_group": "secrets_and_tokens", } ], } + +def test_entity_classifier_and_anonymizer2(entity_classifier): + """ + UT for presidio_entity_classifier_and_anonymizer function with input_text2 + """ ( entities, total_count, @@ -320,87 +139,111 @@ def test_presidio_entity_classifier_and_anonymizer( entity_details, ) = entity_classifier.presidio_entity_classifier_and_anonymizer(input_text2) assert entities == { - "slack-token": 2, - "github-token": 1, + "us-ssn": 1, + "us-drivers-license": 1, + "us-bank-account-number": 1, + "credit-card-number": 1, + "iban-code": 1, + "us-itin": 1, "aws-access-key": 1, "aws-secret-key": 1, - "us-itin": 1, - "iban-code": 1, - "credit-card-number": 1, - "us-ssn": 1, + "github-token": 1, + "slack-token": 2, "ip-address": 1, + "azure-client-secret": 1, } - assert total_count == 10 + assert total_count == 13 assert anonymized_text == input_text2 assert entity_details == { - "credit-card-number": [ + "us-ssn": [ { - "location": "1367_1382", + "location": "1178_1189", "confidence_score": "HIGH", + "entity_group": "pii-identification", + } + ], + "us-drivers-license": [ + { + "location": "1257_1265", + "confidence_score": "MEDIUM", + "entity_group": "pii-identification", + } + ], + "us-bank-account-number": [ + { + "location": "1299_1316", + "confidence_score": "MEDIUM", "entity_group": "pii-financial", } ], - "iban-code": [ + "credit-card-number": [ { - "location": "1406_1434", + "location": "1361_1376", "confidence_score": "HIGH", "entity_group": "pii-financial", } ], - "us-ssn": [ + "iban-code": [ { - "location": "1178_1189", + "location": "1398_1426", "confidence_score": "HIGH", - "entity_group": "pii-identification", + "entity_group": "pii-financial", } ], "us-itin": [ { - "location": "1450_1461", + "location": "1440_1451", "confidence_score": "HIGH", "entity_group": "pii-financial", } ], "aws-access-key": [ { - "location": "1545_1565", + "location": "1472_1492", "confidence_score": "HIGH", "entity_group": "secrets_and_tokens", } ], "aws-secret-key": [ { - "location": "1587_1628", + "location": "1513_1554", "confidence_score": "HIGH", "entity_group": "secrets_and_tokens", } ], "github-token": [ { - "location": "1646_1736", + "location": "1571_1661", "confidence_score": "HIGH", "entity_group": "secrets_and_tokens", } ], "slack-token": [ { - "location": "1812_1835", + "location": "1735_1758", "confidence_score": "HIGH", "entity_group": "secrets_and_tokens", }, { - "location": "1911_1968", + "location": "1773_1830", "confidence_score": "HIGH", "entity_group": "secrets_and_tokens", }, ], "ip-address": [ { - "location": "1339_1355", + "location": "1904_1915", "confidence_score": "HIGH", "entity_group": "pii-network", } ], + "azure-client-secret": [ + { + "location": "1939_1975", + "confidence_score": "HIGH", + "entity_group": "secrets_and_tokens", + } + ], } ( @@ -412,89 +255,118 @@ def test_presidio_entity_classifier_and_anonymizer( input_text2, anonymize_snippets=True ) assert entities == { - "slack-token": 2, - "github-token": 1, + "us-ssn": 1, + "us-drivers-license": 1, + "us-bank-account-number": 1, + "credit-card-number": 1, + "iban-code": 1, + "us-itin": 1, "aws-access-key": 1, "aws-secret-key": 1, - "us-itin": 1, - "iban-code": 1, - "credit-card-number": 1, - "us-ssn": 1, + "github-token": 1, + "slack-token": 2, "ip-address": 1, + "azure-client-secret": 1, } - assert total_count == 10 + assert total_count == 13 assert anonymized_text == mock_input_text2_anonymize_snippet_true assert entity_details == { - "credit-card-number": [ + "us-ssn": [ { - "location": "1178_1186", + "location": "1178_1192", "confidence_score": "HIGH", + "entity_group": "pii-identification", + } + ], + "us-drivers-license": [ + { + "location": "1260_1285", + "confidence_score": "MEDIUM", + "entity_group": "pii-identification", + } + ], + "us-bank-account-number": [ + { + "location": "1319_1341", + "confidence_score": "MEDIUM", "entity_group": "pii-financial", } ], - "iban-code": [ + "credit-card-number": [ { - "location": "1364_1377", + "location": "1386_1405", "confidence_score": "HIGH", "entity_group": "pii-financial", } ], - "us-ssn": [ + "iban-code": [ { - "location": "1401_1412", + "location": "1427_1444", "confidence_score": "HIGH", - "entity_group": "pii-identification", + "entity_group": "pii-financial", } ], "us-itin": [ { - "location": "1428_1437", + "location": "1458_1473", "confidence_score": "HIGH", "entity_group": "pii-financial", } ], "aws-access-key": [ { - "location": "1521_1537", + "location": "1494_1516", "confidence_score": "HIGH", "entity_group": "secrets_and_tokens", } ], "aws-secret-key": [ { - "location": "1559_1575", + "location": "1537_1559", "confidence_score": "HIGH", "entity_group": "secrets_and_tokens", } ], "github-token": [ { - "location": "1593_1607", + "location": "1576_1596", "confidence_score": "HIGH", "entity_group": "secrets_and_tokens", } ], "slack-token": [ { - "location": "1683_1696", + "location": "1670_1689", "confidence_score": "HIGH", "entity_group": "secrets_and_tokens", }, { - "location": "1772_1785", + "location": "1704_1723", "confidence_score": "HIGH", "entity_group": "secrets_and_tokens", }, ], "ip-address": [ { - "location": "1339_1355", + "location": "1797_1815", "confidence_score": "HIGH", "entity_group": "pii-network", } ], + "azure-client-secret": [ + { + "location": "1839_1866", + "confidence_score": "HIGH", + "entity_group": "secrets_and_tokens", + } + ], } + +def test_entity_classifier_and_anonymizer_negative_data(entity_classifier): + """ + UT for presidio_entity_classifier_and_anonymizer function with negative_data + """ ( entities, total_count, @@ -516,3 +388,49 @@ def test_presidio_entity_classifier_and_anonymizer( assert entities == {} assert total_count == 0 assert anonymized_text == negative_data + + +def test_entity_classifier_and_anonymizer_azure_secret(entity_classifier): + """ + UT for presidio_entity_classifier_and_anonymizer function with tf_test_data + """ + ( + entities, + total_count, + anonymized_text, + entity_details, + ) = entity_classifier.presidio_entity_classifier_and_anonymizer(tf_test_data) + assert entities == { + "azure-client-secret": 1, + } + assert total_count == 1 + assert anonymized_text == tf_test_data + assert entity_details == { + "azure-client-secret": [ + { + "location": "430_466", + "confidence_score": "HIGH", + "entity_group": "secrets_and_tokens", + } + ] + } + + ( + entities, + total_count, + anonymized_text, + entity_details, + ) = entity_classifier.presidio_entity_classifier_and_anonymizer(tf_test_data, True) + assert entities == { + "azure-client-secret": 1, + } + assert total_count == 1 + assert entity_details == { + "azure-client-secret": [ + { + "location": "430_457", + "confidence_score": "HIGH", + "entity_group": "secrets_and_tokens", + } + ] + }