Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(iast): ssrf vulnerability redacts the url query parameters correctly [backport 2.16] #11590

Open
wants to merge 1 commit into
base: 2.16
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions ddtrace/appsec/_iast/_evidence_redaction/_sensitive_handler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import string

from ddtrace.internal.logger import get_logger
from ddtrace.settings.asm import config as asm_config
Expand All @@ -16,7 +17,15 @@

log = get_logger(__name__)

REDACTED_SOURCE_BUFFER = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
REDACTED_SOURCE_BUFFER = string.ascii_letters + string.digits
LEN_SOURCE_BUFFER = len(REDACTED_SOURCE_BUFFER)


def get_redacted_source(length):
full_repeats = length // LEN_SOURCE_BUFFER
remainder = length % LEN_SOURCE_BUFFER
result = REDACTED_SOURCE_BUFFER * full_repeats + REDACTED_SOURCE_BUFFER[:remainder]
return result


class SensitiveHandler:
Expand Down Expand Up @@ -218,7 +227,7 @@ def to_redacted_json(self, evidence_value, sensitive, tainted_ranges, sources):
if source_index < len(sources):
if not sources[source_index].redacted and self.is_sensible_source(sources[source_index]):
redacted_sources.append(source_index)
sources[source_index].pattern = REDACTED_SOURCE_BUFFER[: len(sources[source_index].value)]
sources[source_index].pattern = get_redacted_source(len(sources[source_index].value))
sources[source_index].redacted = True

if source_index in redacted_sources:
Expand Down Expand Up @@ -282,7 +291,7 @@ def redact_source(self, sources, redacted_sources, redacted_sources_context, sou
if source_index is not None:
if not sources[source_index].redacted:
redacted_sources.append(source_index)
sources[source_index].pattern = REDACTED_SOURCE_BUFFER[: len(sources[source_index].value)]
sources[source_index].pattern = get_redacted_source(len(sources[source_index].value))
sources[source_index].redacted = True

if source_index not in redacted_sources_context.keys():
Expand Down Expand Up @@ -334,12 +343,12 @@ def write_redacted_value_part(
sensitive_start = 0
sensitive = _value[sensitive_start : _source_redaction_context["end"] - offset]
index_of_part_value_in_pattern = source.value.find(sensitive)

pattern = (
placeholder[index_of_part_value_in_pattern : index_of_part_value_in_pattern + len(sensitive)]
if index_of_part_value_in_pattern > -1
else placeholder[_source_redaction_context["start"] : _source_redaction_context["end"]]
)

value_parts.append({"redacted": True, "source": source_index, "pattern": pattern})
_value = _value[len(pattern) :]
offset += len(pattern)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,33 @@


log = get_logger(__name__)
AUTHORITY = r"^(?:[^:]+:)?//([^@]+)@"
QUERY_FRAGMENT = r"[?#&]([^=&;]+)=([^?#&]+)"
pattern = re.compile(f"({AUTHORITY})|({QUERY_FRAGMENT})", re.IGNORECASE | re.MULTILINE)
AUTHORITY_PATTERN = re.compile(r"https?://([^@]+)(?=@)", re.IGNORECASE | re.MULTILINE)
QUERY_FRAGMENT_PATTERN = re.compile(r"[?#&]([^=&;]+)=([^?#&]+)", re.IGNORECASE | re.MULTILINE)


def url_sensitive_analyzer(evidence, name_pattern=None, value_pattern=None):
try:
ranges = []
regex_result = pattern.search(evidence.value)
def find_authority(ranges, evidence):
regex_result = AUTHORITY_PATTERN.search(evidence.value)
while regex_result is not None:
if isinstance(regex_result.group(1), str):
start = regex_result.start(1)
end = regex_result.start(1) + (len(regex_result.group(1)))
ranges.append({"start": start, "end": end})

while regex_result is not None:
if isinstance(regex_result.group(1), str):
end = regex_result.start() + (len(regex_result.group(0)) - 1)
start = end - len(regex_result.group(1))
ranges.append({"start": start, "end": end})
regex_result = AUTHORITY_PATTERN.search(evidence.value, regex_result.end())

if isinstance(regex_result.group(3), str):
end = regex_result.start() + len(regex_result.group(0))
start = end - len(regex_result.group(3))
ranges.append({"start": start, "end": end})

regex_result = pattern.search(evidence.value, regex_result.end())
def find_query_fragment(ranges, evidence):
regex_result = QUERY_FRAGMENT_PATTERN.search(evidence.value)
while regex_result is not None:
if isinstance(regex_result.group(2), str):
start = regex_result.start(2)
end = regex_result.start(2) + (len(regex_result.group(2)))
ranges.append({"start": start, "end": end})
regex_result = QUERY_FRAGMENT_PATTERN.search(evidence.value, regex_result.end())

return ranges
except Exception as e:
log.debug(e)

return []
def url_sensitive_analyzer(evidence, name_pattern=None, value_pattern=None):
ranges = []
find_authority(ranges, evidence)
find_query_fragment(ranges, evidence)
return ranges
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
Code Security: Ensure IAST SSRF vulnerability redacts the url query parameters correctly.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def test_cmdi_redaction_suite(evidence_input, sources_expected, vulnerabilities_
source["origin"] = origin_to_str(source["origin"])

assert vulnerability["type"] == VULN_CMDI
assert vulnerability["evidence"] == vulnerabilities_expected["evidence"]
assert source == sources_expected


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,5 @@ def test_header_injection_redaction_suite(
source["origin"] = origin_to_str(source["origin"])

assert vulnerability["type"] == VULN_HEADER_INJECTION
assert vulnerability["evidence"] == vulnerabilities_expected["evidence"]
assert source == sources_expected
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@


# FIXME: ideally all these should pass, through the key is that we don't leak any potential PII
_ignore_list = {46, 47}
_ignore_list = {
46,
}


@pytest.mark.parametrize(
Expand Down
46 changes: 40 additions & 6 deletions tests/appsec/iast/taint_sinks/test_ssrf_redacted.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@


@pytest.mark.parametrize(
"evidence_input, sources_expected, vulnerabilities_expected", list(get_parametrize(VULN_SSRF))[0:2]
"evidence_input, sources_expected, vulnerabilities_expected", list(get_parametrize(VULN_SSRF, ignore_list={9, 10}))
)
def test_ssrf_redaction_suite(evidence_input, sources_expected, vulnerabilities_expected, iast_context_defaults):
# TODO: fix get_parametrize(VULN_SSRF)[2:] replacements doesn't work correctly with params of SSRF
tainted_object = evidence_input_value = evidence_input.get("value", "")
if evidence_input_value:
tainted_object = _taint_pyobject_multiranges(
Expand All @@ -49,6 +48,7 @@ def test_ssrf_redaction_suite(evidence_input, sources_expected, vulnerabilities_
source["origin"] = origin_to_str(source["origin"])

assert vulnerability["type"] == VULN_SSRF
assert vulnerability["evidence"] == vulnerabilities_expected["evidence"]
assert source == sources_expected


Expand All @@ -71,15 +71,44 @@ def test_ssrf_redact_param(iast_context_defaults):
assert result["vulnerabilities"]
for v in result["vulnerabilities"]:
assert v["evidence"]["valueParts"] == [
{"value": "https://www.domain1.com/"},
{"redacted": True},
{"value": "https://www.domain1.com/?id="},
{"pattern": "abcdefgh", "redacted": True, "source": 0},
{"value": "&param2="},
{"redacted": True},
{"value": "&param3="},
{"redacted": True},
{"value": "&param3="},
{"redacted": True},
]


def test_ssrf_redact_params_log_url(iast_context_defaults):
url = (
"http://dovahkiin.dovahkiin.naal.ok/zin/los/vahriin/wahdein-vokul-mahfaeraak.ast-vaal-4ee5-aa01-3de356fd2cd8/snapshots/"
"f7591b64-9839-4be8-a3a9-e7ccf5c0e17e?schemaVersion=c27f6149-36b6-4f3a-bacb-ba4b8db36892"
)
tainted_source = taint_pyobject(pyobject=url, source_name="encoded_forward_api", source_value=url)

ev = Evidence(value=tainted_source)

loc = Location(path="foobar.py", line=35, spanId=123)
v = Vulnerability(type=VULN_SSRF, evidence=ev, location=loc)
report = IastSpanReporter(vulnerabilities={v})
report.add_ranges_to_evidence_and_extract_sources(v)
result = report.build_and_scrub_value_parts()

assert result["vulnerabilities"]
for v in result["vulnerabilities"]:
assert v["evidence"]["valueParts"] == [
{
"source": 0,
"value": "http://dovahkiin.dovahkiin.naal.ok/zin/los/vahriin/wahdein-vokul-mahfaeraak.ast-vaal-4ee5-aa01-3de356fd"
"2cd8/snapshots/f7591b64-9839-4be8-a3a9-e7ccf5c0e17e?schemaVersion=",
},
{"pattern": "TUVWXYZ0123456789abcdefghijklmnopqrs", "redacted": True, "source": 0},
]


def test_cmdi_redact_user_password(iast_context_defaults):
user_taint_range = taint_pyobject(pyobject="root", source_name="username", source_value="root")
password_taint_range = taint_pyobject(
Expand Down Expand Up @@ -107,7 +136,12 @@ def test_cmdi_redact_user_password(iast_context_defaults):
assert v["evidence"]["valueParts"] == [
{"value": "https://"},
{"pattern": "abcd", "redacted": True, "source": 0},
{"value": ":"},
{"redacted": True},
{"pattern": "abcdefghijklmnopqrs", "redacted": True, "source": 1},
{"value": "@domain1.com/?id=&param2=value2&param3=value3&param3=value3"},
{"value": "@domain1.com/?id=&param2="},
{"redacted": True},
{"value": "&param3="},
{"redacted": True},
{"value": "&param3="},
{"redacted": True},
]