Skip to content

Commit

Permalink
feat(appsec): ddwaf extension (#3027)
Browse files Browse the repository at this point in the history
This PR adds an extension for [libddwaf](https://github.com/DataDog/libddwaf) and a minimal AppSec module.

ddwaf is a C++ extension built using cython and linked statically with the libddwaf library that is itself built with cmake. As a result, a C++ compiler, cmake, ninja and git are now required to build ddtrace from source. We except most users to install ddtrace using binary wheels, so they should not notice this change. However, any errors during the compilation of this extension is ignored for now in order to avoid installation failures.

The AppSec module is disabled by default and can be enabled using the `DD_APPSEC_ENABLED` environment variable. It consists of a single processor subscribing to web spans and detecting common web scanners on 404 HTTP responses.
  • Loading branch information
nizox authored Feb 2, 2022
1 parent 665fed4 commit d668dac
Show file tree
Hide file tree
Showing 21 changed files with 6,368 additions and 17 deletions.
14 changes: 11 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ executors:
cimg_base:
docker:
- image: *cimg_base_image
resource_class: small
resource_class: medium
python39:
docker:
- image: *python39_image
Expand Down Expand Up @@ -284,9 +284,9 @@ jobs:
steps:
- checkout
- run: sudo apt-get update
- run: sudo apt-get install --yes clang-format gcc-10 python3 python3-setuptools python3-pip
- run: sudo apt-get install --yes clang-format gcc-10 g++-10 python3 python3-setuptools python3-pip
- run: scripts/cformat.sh
- run: DD_COMPILE_DEBUG=1 CC=gcc-10 pip -vvv install .
- run: DD_COMPILE_DEBUG=1 DD_TESTING_RAISE=1 CC=gcc-10 CXX=g++-10 pip -vvv install .

coverage_report:
executor: python39
Expand Down Expand Up @@ -342,6 +342,12 @@ jobs:
paths:
- "."

appsec:
<<: *contrib_job
steps:
- run_test:
pattern: 'appsec'

tracer:
<<: *contrib_job
steps:
Expand Down Expand Up @@ -1025,6 +1031,7 @@ requires_tests: &requires_tests
- starlette
- test_logging
- tracer
- appsec
- tornado
- urllib3
- vertica
Expand Down Expand Up @@ -1112,6 +1119,7 @@ workflows:
- test_logging: *requires_base_venvs
- tornado: *requires_base_venvs
- tracer: *requires_base_venvs
- appsec: *requires_base_venvs
- urllib3: *requires_base_venvs
- vertica: *requires_base_venvs
- wsgi: *requires_base_venvs
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ jobs:

- name: Install build dependencies
# Rust + Cargo are needed for Cryptography
run: apk add git gcc musl-dev libffi-dev openssl-dev bash rust cargo
run: apk add git gcc g++ musl-dev libffi-dev openssl-dev bash rust cargo

- name: Check source package
run: |
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ __pycache__/
*$py.class

# C extensions
ddtrace/appsec/_ddwaf.cpp
ddtrace/profiling/collector/_task.c
ddtrace/profiling/collector/_threading.c
ddtrace/profiling/collector/_traceback.c
Expand All @@ -14,6 +15,7 @@ ddtrace/internal/_encoding.c
ddtrace/internal/_rand.c
ddtrace/internal/_tagset.c
*.so
*.a

# Cython annotate HTML files
ddtrace/**/*.html
Expand Down
17 changes: 17 additions & 0 deletions ddtrace/appsec/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
cmake_minimum_required(VERSION 3.14)
project(ddwaf)
include(ExternalProject)

ExternalProject_Add(libddwaf
GIT_REPOSITORY https://github.com/DataDog/libddwaf.git
GIT_TAG 1.0.16
INSTALL_DIR ${CMAKE_SOURCE_DIR}
CMAKE_ARGS
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
-DLIBDDWAF_BUILD_SHARED=OFF
-DLIBDDWAF_BUILD_STATIC=ON
-DLIBDDWAF_MSVC_RUNTIME_LIBRARY=/MD
-DCMAKE_INSTALL_LIBDIR=lib
-DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
)
Empty file added ddtrace/appsec/__init__.py
Empty file.
262 changes: 262 additions & 0 deletions ddtrace/appsec/_ddwaf.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
from collections import deque
import sys
from typing import Tuple

import six


if six.PY3:
from typing import Mapping
from typing import Sequence
else:
from collections import Mapping, Sequence

from _libddwaf cimport DDWAF_LOG_LEVEL
from _libddwaf cimport DDWAF_OBJ_TYPE
from _libddwaf cimport ddwaf_context
from _libddwaf cimport ddwaf_context_destroy
from _libddwaf cimport ddwaf_context_init
from _libddwaf cimport ddwaf_destroy
from _libddwaf cimport ddwaf_get_version
from _libddwaf cimport ddwaf_handle
from _libddwaf cimport ddwaf_init
from _libddwaf cimport ddwaf_object
from _libddwaf cimport ddwaf_required_addresses
from _libddwaf cimport ddwaf_result
from _libddwaf cimport ddwaf_result_free
from _libddwaf cimport ddwaf_run
from _libddwaf cimport ddwaf_set_log_cb
from _libddwaf cimport ddwaf_version
from cpython.bytes cimport PyBytes_AsString
from cpython.bytes cimport PyBytes_Size
from cpython.exc cimport PyErr_Clear
from cpython.exc cimport PyErr_Occurred
from cpython.mem cimport PyMem_Free
from cpython.mem cimport PyMem_Realloc
from cpython.unicode cimport PyUnicode_AsEncodedString
from libc.stdint cimport uint32_t
from libc.stdint cimport uint64_t
from libc.stdint cimport uintptr_t
from libc.string cimport memset


DEFAULT_DDWAF_TIMEOUT_MS=20


cdef extern from "Python.h":
const char* PyUnicode_AsUTF8AndSize(object o, Py_ssize_t *size)


def version():
# type: () -> Tuple[int, int, int]
"""Get the version of libddwaf."""
cdef ddwaf_version version
ddwaf_get_version(&version)
return (version.major, version.minor, version.patch)


cdef inline object _string_to_bytes(object string, const char **ptr, ssize_t *length):
if isinstance(string, six.binary_type):
ptr[0] = PyBytes_AsString(string)
length[0] = PyBytes_Size(string)
return string
elif isinstance(string, six.text_type):
IF PY_MAJOR_VERSION >= 3:
ptr[0] = PyUnicode_AsUTF8AndSize(string, length)
if ptr[0] == NULL and PyErr_Occurred():
# ignore exception from this function as we fallback to
# PyUnicode_AsEncodedString
PyErr_Clear()
if ptr[0] == NULL:
string = PyUnicode_AsEncodedString(string, "utf-8", "surrogatepass")
ptr[0] = PyBytes_AsString(string)
length[0] = PyBytes_Size(string)
return string
raise RuntimeError


cdef class _Wrapper(object):
"""
Wrapper to convert Python objects to ddwaf objects.
libddwaf represents scalar and composite values using ddwaf objects. This
wrapper converts Python objects to ddwaf objects by traversing all values
and their children. By default, the number of objects is limited to avoid
infinite loops. This limitation can be lifted on trusted data by setting
`max_objects` to `None`.
Under the hood, the wrapper uses an array of `ddwaf_object` allocated as a
single buffer. Objects such as maps or arrays refer to other objects that
are only part of this buffer. Strings are not copied, they live in the
Python heap and are referenced by the wrapper to avoid garbage collection.
"""

cdef ddwaf_object *_ptr
cdef readonly object _string_refs
cdef readonly ssize_t _size
cdef readonly ssize_t _next_idx

def __init__(self, value, max_objects=5000):
self._string_refs = []
self._convert(value, max_objects)

cdef ssize_t _reserve_obj(self, ssize_t n=1) except -1:
"""
Exponentially grows the size of the memory space used for objects.
Will stop if too much memory is allocated.
"""
cdef ssize_t idx, i
cdef ddwaf_object *ptr
cdef ddwaf_object *obj

idx = self._next_idx
if idx + n > self._size:
while idx + n > self._size:
# grow 1.5 the previous size + an initial fixed size until
# it can accommodate at least n new objects
self._size += (self._size >> 1) + 128
ptr = <ddwaf_object *> PyMem_Realloc(self._ptr, self._size * sizeof(ddwaf_object))
if ptr == NULL:
raise MemoryError
memset(ptr + idx, 0, (self._size - idx) * sizeof(ddwaf_object))
if self._ptr != NULL and ptr != self._ptr:
# we need to patch all array objects because they use pointers to other objects
for i in range(idx):
obj = ptr + i
if (obj.type == DDWAF_OBJ_TYPE.DDWAF_OBJ_MAP or obj.type == DDWAF_OBJ_TYPE.DDWAF_OBJ_ARRAY) and obj.array != NULL:
obj.array = obj.array - self._ptr + ptr
self._ptr = ptr
self._next_idx += n
return idx

cdef int _make_string(self, ssize_t idx, object string) except -1:
cdef const char * ptr
cdef ssize_t length
cdef ddwaf_object *obj

self._string_refs.append(_string_to_bytes(string, &ptr, &length))

obj = self._ptr + idx
obj.type = DDWAF_OBJ_TYPE.DDWAF_OBJ_STRING
obj.stringValue = ptr
obj.nbEntries = length

cdef void _make_array(self, ssize_t idx, ssize_t array_idx, ssize_t nb_entries):
cdef ddwaf_object *obj
obj = self._ptr + idx
obj.type = DDWAF_OBJ_TYPE.DDWAF_OBJ_ARRAY
obj.array = self._ptr + array_idx
obj.nbEntries = nb_entries

cdef void _make_map(self, ssize_t idx, ssize_t array_idx, ssize_t nb_entries):
cdef ddwaf_object *obj
obj = self._ptr + idx
obj.type = DDWAF_OBJ_TYPE.DDWAF_OBJ_MAP
obj.array = self._ptr + array_idx
obj.nbEntries = nb_entries

cdef int _set_parameter(self, ssize_t idx, object string) except -1:
cdef const char * ptr
cdef ssize_t length
cdef ddwaf_object *obj

self._string_refs.append(_string_to_bytes(string, &ptr, &length))

obj = self._ptr + idx
obj.parameterName = ptr
obj.parameterNameLength = length

cdef void _convert(self, value, max_objects) except *:
cdef object stack
cdef ssize_t i, j, n, idx, items_idx

i = 0
stack = deque([(self._reserve_obj(), value)], maxlen=max_objects)
while len(stack) and (max_objects is None or i < <ssize_t?> max_objects):
idx, val = stack.popleft()

if isinstance(val, (int, float)):
val = str(val)

if isinstance(val, (six.binary_type, six.text_type)):
self._make_string(idx, val)

elif isinstance(val, Mapping):
n = len(val)
items_idx = self._reserve_obj(n)
self._make_map(idx, items_idx, n)
# size of val must not change!! should not happen
# while holding the GIL?
for j, (k, v) in enumerate(six.iteritems(val)):
if not isinstance(k, (six.binary_type, six.text_type)):
if isinstance(k, (int, float)):
k = str(k)
else:
continue
self._set_parameter(items_idx + j, k)
stack.append((items_idx + j, v))

elif isinstance(val, Sequence):
n = len(val)
items_idx = self._reserve_obj(n)
self._make_array(idx, items_idx, n)
stack.extend([(items_idx + j, val[j]) for j in range(n)])

i += 1

def __repr__(self):
return "<{0.__class__.__name__} for {0._next_idx} elements>".format(self)

def __sizeof__(self):
return super(_Wrapper, self).__sizeof__() + self._size * sizeof(ddwaf_object)

def __dealloc__(self):
PyMem_Free(self._ptr)


cdef class DDWaf(object):
"""
A DDWaf instance performs a matching operation on provided data according
to some rules.
"""

cdef ddwaf_handle _handle
cdef object _rules

def __init__(self, rules):
cdef ddwaf_object* rule_objects
self._rules = _Wrapper(rules, max_objects=None)
rule_objects = (<_Wrapper?>self._rules)._ptr;
self._handle = ddwaf_init(rule_objects, NULL)
if <void *> self._handle == NULL:
raise ValueError("invalid rules")

@property
def required_data(self):
cdef uint32_t size
cdef const char* const* ptr

addresses = []
ptr = ddwaf_required_addresses(self._handle, &size)
for i in range(size):
addresses.append((<bytes> ptr[i]).decode("utf-8"))
return addresses

def run(self, data, timeout_ms=DEFAULT_DDWAF_TIMEOUT_MS):
cdef ddwaf_context ctx
cdef ddwaf_result result

ctx = ddwaf_context_init(self._handle, NULL)
if <void *> ctx == NULL:
raise RuntimeError
try:
wrapper = _Wrapper(data)
ddwaf_run(ctx, (<_Wrapper?>wrapper)._ptr, &result, <uint64_t?> timeout_ms * 1000)
if result.data != NULL:
return (<bytes> result.data).decode("utf-8")
finally:
ddwaf_result_free(&result)
ddwaf_context_destroy(ctx)

def __dealloc__(self):
ddwaf_destroy(self._handle)
Loading

0 comments on commit d668dac

Please sign in to comment.