Skip to content

Commit

Permalink
chore(asm): performance improvement (#5233)
Browse files Browse the repository at this point in the history
Avoid creating a new pyobject if the pyobject is not interned

## Checklist

- [x] Change(s) are motivated and described in the PR description.
- [x] Testing strategy is described if automated tests are not included
in the PR.
- [x] Risk is outlined (performance impact, potential for breakage,
maintainability, etc).
- [x] Change is maintainable (easy to change, telemetry, documentation).
- [x] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/contributing.html#Release-Note-Guidelines)
are followed.
- [x] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/)).
- [x] Author is aware of the performance implications of this PR as
reported in the benchmarks PR comment.

## Reviewer Checklist

- [ ] Title is accurate.
- [ ] No unnecessary changes are introduced.
- [ ] Description motivates each change.
- [ ] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes unless absolutely necessary.
- [ ] Testing strategy adequately addresses listed risk(s).
- [ ] Change is maintainable (easy to change, telemetry, documentation).
- [ ] Release note makes sense to a user of the library.
- [ ] Reviewer is aware of, and discussed the performance implications
of this PR as reported in the benchmarks PR comment.
  • Loading branch information
gnufede authored Mar 1, 2023
1 parent b3194c7 commit 4372d46
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 16 deletions.
23 changes: 17 additions & 6 deletions ddtrace/appsec/iast/_taint_tracking/_taint_tracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,21 @@ static PyObject *clear_taint_mapping(PyObject *Py_UNUSED(module),
Py_RETURN_NONE;
}

static PyObject *new_pyobject_id(PyObject *tainted_object) {
static PyObject *new_pyobject_id(PyObject *tainted_object,
Py_ssize_t object_length) {
if (PyUnicode_Check(tainted_object)) {
if (PyUnicode_CHECK_INTERNED(tainted_object) == 0) { // SSTATE_NOT_INTERNED
Py_INCREF(tainted_object);
return tainted_object;
}
return PyUnicode_Join(empty_unicode,
Py_BuildValue("(OO)", tainted_object, empty_unicode));
} else if (object_length > 1) {
// Bytes and bytearrays with length > 1 are not interned
Py_INCREF(tainted_object);
return tainted_object;
} else if (PyBytes_Check(tainted_object)) {

return PyObject_CallFunctionObjArgs(
bytes_join, empty_bytes,
Py_BuildValue("(OO)", tainted_object, empty_bytes), NULL);
Expand All @@ -52,15 +62,15 @@ static PyObject *taint_pyobject(PyObject *Py_UNUSED(module), PyObject *args) {
T_input_info input_info;
PyArg_ParseTuple(args, "OO", &tainted_object, &input_info);
// DEV: could use PyUnicode_GET_LENGTH if we are only using unicode string
Py_ssize_t tainted_length = PyObject_Length(tainted_object);
if (tainted_length < 1) {
Py_ssize_t object_length = PyObject_Length(tainted_object);
if (object_length < 1) {
Py_INCREF(tainted_object);
return tainted_object;
}

Py_INCREF(input_info);
tainted_object = new_pyobject_id(tainted_object);
TaintMapping[tainted_object] = {{input_info, 0, tainted_length}};
tainted_object = new_pyobject_id(tainted_object, object_length);
TaintMapping[tainted_object] = {{input_info, 0, object_length}};
return tainted_object;
}

Expand All @@ -75,7 +85,8 @@ static PyObject *add_taint_pyobject(PyObject *Py_UNUSED(module),
Py_INCREF(tainted_object);
return tainted_object;
}
tainted_object = new_pyobject_id(tainted_object);
tainted_object =
new_pyobject_id(tainted_object, PyObject_Length(tainted_object));
if IS_TAINTED (op1)
TaintMapping[tainted_object] = TaintMapping[op1];
else
Expand Down
19 changes: 9 additions & 10 deletions tests/appsec/iast/aspects/test_add_aspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,10 @@ def test_add_aspect_type_error(obj1, obj2):
(3.5, 3.3, False),
(complex(2, 1), complex(3, 4), False),
("Hello ", "world", True),
(b"bye ", b"bye ", True),
("πŸ™€", "πŸ™€", True),
(b"bye ", b"".join((b"bye", b" ")), True),
("πŸ™€", "".join(("πŸ™€", "")), True),
("a", "a", True),
(b"a", b"a", True),
(b"Hi", b"", True),
(b"Hi ", b" world", True),
(["a"], ["b"], False),
Expand All @@ -72,18 +74,15 @@ def test_add_aspect_tainting_left_hand(obj1, obj2, should_be_tainted):
clear_taint_mapping()

if should_be_tainted:
new_obj1 = taint_pyobject(obj1, Input_info("test_add_aspect_tainting_left_hand", obj1, 0))
assert obj1 is not new_obj1
else:
new_obj1 = obj1
obj1 = taint_pyobject(obj1, Input_info("test_add_aspect_tainting_left_hand", obj1, 0))

result = ddtrace_aspects.add_aspect(new_obj1, obj2)
assert result == new_obj1 + obj2
result = ddtrace_aspects.add_aspect(obj1, obj2)
assert result == obj1 + obj2
if isinstance(obj2, (bytes, str, bytearray)) and len(obj2):
assert result is not new_obj1 + obj2
assert result is not obj1 + obj2
assert is_pyobject_tainted(result) == should_be_tainted
if should_be_tainted:
assert get_tainted_ranges(result) == get_tainted_ranges(new_obj1)
assert get_tainted_ranges(result) == get_tainted_ranges(obj1)


@pytest.mark.parametrize(
Expand Down

0 comments on commit 4372d46

Please sign in to comment.