From 5f7b9825cdf9367d8596a32b8d213a0287b73822 Mon Sep 17 00:00:00 2001 From: jlmwise <66651702+jlmwise@users.noreply.github.com> Date: Thu, 11 Jun 2020 11:28:05 -0700 Subject: [PATCH] Add code sample and tests for redaction (#4037) Add A DLP code sample for redacting text. Code will be linked to this documentation: https://cloud.google.com/dlp/docs/deidentify-sensitive-data --- dlp/deid.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ dlp/deid_test.py | 8 +++++++ 2 files changed, 66 insertions(+) diff --git a/dlp/deid.py b/dlp/deid.py index d6afea08b430..af5213256962 100644 --- a/dlp/deid.py +++ b/dlp/deid.py @@ -83,6 +83,64 @@ def deidentify_with_mask( # [END dlp_deidentify_masking] +# [START dlp_deidentify_redact] +def deidentify_with_redact( + project, + input_str, + info_types, +): + """Uses the Data Loss Prevention API to deidentify sensitive data in a + string by redacting matched input values. + Args: + project: The Google Cloud project id to use as a parent resource. + input_str: The string to deidentify (will be treated as text). + info_types: A list of strings representing info types to look for. + Returns: + None; the response from the API is printed to the terminal. + """ + import google.cloud.dlp + + # Instantiate a client + dlp = google.cloud.dlp_v2.DlpServiceClient() + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Construct inspect configuration dictionary + inspect_config = { + "info_types": [{"name": info_type} for info_type in info_types] + } + + # Construct deidentify configuration dictionary + deidentify_config = { + "info_type_transformations": { + "transformations": [ + { + "primitive_transformation": { + "redact_config": {} + } + } + ] + } + } + + # Construct item + item = {"value": input_str} + + # Call the API + response = dlp.deidentify_content( + parent, + inspect_config=inspect_config, + deidentify_config=deidentify_config, + item=item, + ) + + # Print out the results. + print(response.item.value) + + +# [END dlp_deidentify_redact] + # [START dlp_deidentify_replace] def deidentify_with_replace( project, diff --git a/dlp/deid_test.py b/dlp/deid_test.py index 0a2c53829613..a407fff97fc7 100644 --- a/dlp/deid_test.py +++ b/dlp/deid_test.py @@ -88,6 +88,14 @@ def test_deidentify_with_mask_masking_number_specified(capsys): assert "My SSN is *******27" in out +def test_deidentify_with_redact(capsys): + deid.deidentify_with_redact( + GCLOUD_PROJECT, HARMFUL_STRING + "!", ["US_SOCIAL_SECURITY_NUMBER"] + ) + out, _ = capsys.readouterr() + assert "My SSN is !" in out + + def test_deidentify_with_replace(capsys): deid.deidentify_with_replace( GCLOUD_PROJECT, HARMFUL_STRING, ["US_SOCIAL_SECURITY_NUMBER"],