Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement more comprehensive SSRF mitigation #6362

Merged
merged 2 commits into from
Jun 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- \[API\] Invalid schema for the owner field in several endpoints (<https://github.com/opencv/cvat/pull/6343>)

### Security
- TDB
- More comprehensive SSRF mitigations were implemented.
Previously, on task creation it was prohibited to specify remote data URLs
with hosts that resolved to IP addresses in the private ranges.
Now, redirects to such URLs are also prohibited.
In addition, this restriction is now also applied to webhook URLs.
System administrators can allow or deny custom IP address ranges
with the `SMOKESCREEN_OPTS` environment variable.
(<https://github.com/opencv/cvat/pull/6362>).

## \[2.4.8] - 2023-06-22
### Fixed
Expand Down
8 changes: 8 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ RUN --mount=type=cache,target=/root/.cache/pip/http \
-r /tmp/cvat/requirements/${DJANGO_CONFIGURATION}.txt \
-w /tmp/wheelhouse

FROM golang:1.20.5 AS build-smokescreen

RUN git clone --depth=1 -b v0.0.4 https://github.com/stripe/smokescreen.git
RUN cd smokescreen && go build -o /tmp/smokescreen

FROM ${BASE_IMAGE}

ARG http_proxy
Expand Down Expand Up @@ -126,6 +131,9 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/* && \
echo 'application/wasm wasm' >> /etc/mime.types

# Install smokescreen
COPY --from=build-smokescreen /tmp/smokescreen /usr/local/bin/smokescreen

# Add a non-root user
ENV USER=${USER}
ENV HOME /home/${USER}
Expand Down
52 changes: 14 additions & 38 deletions cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
from distutils.dir_util import copy_tree
from urllib import parse as urlparse
from urllib import request as urlrequest
import ipaddress
import dns.resolver
import django_rq
import pytz

Expand All @@ -30,7 +28,7 @@
from cvat.apps.engine.media_extractors import (MEDIA_TYPES, ImageListReader, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter,
ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime, sort)
from cvat.apps.engine.utils import av_scan_paths, get_rq_job_meta
from cvat.utils.http import make_requests_session
from cvat.utils.http import make_requests_session, PROXIES_FOR_UNTRUSTED_URLS
from utils.dataset_manifest import ImageManifestManager, VideoManifestManager, is_manifest
from utils.dataset_manifest.core import VideoManifestValidator, is_dataset_manifest
from utils.dataset_manifest.utils import detect_related_images
Expand Down Expand Up @@ -354,45 +352,14 @@ def _validate_manifest(

return None

def _validate_url(url):
def _validate_ip_address(ip_address):
if not ip_address.is_global:
raise ValidationError('Non public IP address \'{}\' is provided!'.format(ip_address))

def _validate_scheme(url):
ALLOWED_SCHEMES = ['http', 'https']

parsed_url = urlparse.urlparse(url)

if parsed_url.scheme not in ALLOWED_SCHEMES:
raise ValueError('Unsupported URL sheme: {}. Only http and https are supported'.format(parsed_url.scheme))

try:
ip_address = ipaddress.ip_address(parsed_url.hostname)
_validate_ip_address(ip_address)
except ValueError as _:
ip_v4_records = None
ip_v6_records = None
try:
ip_v4_records = dns.resolver.query(parsed_url.hostname, 'A')
for record in ip_v4_records:
_validate_ip_address(ipaddress.ip_address(record.to_text()))
except ValidationError:
raise
except Exception as e:
slogger.glob.info('Cannot get A record for domain \'{}\': {}'.format(parsed_url.hostname, e))

try:
ip_v6_records = dns.resolver.query(parsed_url.hostname, 'AAAA')
for record in ip_v6_records:
_validate_ip_address(ipaddress.ip_address(record.to_text()))
except ValidationError:
raise
except Exception as e:
slogger.glob.info('Cannot get AAAA record for domain \'{}\': {}'.format(parsed_url.hostname, e))

if not ip_v4_records and not ip_v6_records:
raise ValidationError('Cannot resolve IP address for domain \'{}\''.format(parsed_url.hostname))

def _download_data(urls, upload_dir):
job = rq.get_current_job()
local_files = {}
Expand All @@ -402,18 +369,27 @@ def _download_data(urls, upload_dir):
name = os.path.basename(urlrequest.url2pathname(urlparse.urlparse(url).path))
if name in local_files:
raise Exception("filename collision: {}".format(name))
_validate_url(url)
_validate_scheme(url)
slogger.glob.info("Downloading: {}".format(url))
job.meta['status'] = '{} is being downloaded..'.format(url)
job.save_meta()

response = session.get(url, stream=True)
response = session.get(url, stream=True, proxies=PROXIES_FOR_UNTRUSTED_URLS)
if response.status_code == 200:
response.raw.decode_content = True
with open(os.path.join(upload_dir, name), 'wb') as output_file:
shutil.copyfileobj(response.raw, output_file)
else:
raise Exception("Failed to download " + url)
error_message = f"Failed to download {response.url}"
if url != response.url:
error_message += f" (redirected from {url})"

if response.status_code == 407:
error_message += "; likely attempt to access internal host"
elif response.status_code:
error_message += f"; HTTP error {response.status_code}"

raise Exception(error_message)

local_files[name] = True

Expand Down
3 changes: 2 additions & 1 deletion cvat/apps/webhooks/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
get_instance_diff, organization_id,
project_id)
from cvat.apps.organizations.models import Invitation, Membership, Organization
from cvat.utils.http import make_requests_session
from cvat.utils.http import make_requests_session, PROXIES_FOR_UNTRUSTED_URLS

from .event_type import EventTypeChoice, event_name
from .models import Webhook, WebhookDelivery, WebhookTypeChoice
Expand Down Expand Up @@ -55,6 +55,7 @@ def send_webhook(webhook, payload, redelivery=False):
headers=headers,
timeout=WEBHOOK_TIMEOUT,
stream=True,
proxies=PROXIES_FOR_UNTRUSTED_URLS,
)
status_code = response.status_code
response_body = response.raw.read(
Expand Down
2 changes: 2 additions & 0 deletions cvat/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,3 +690,5 @@ class CVAT_QUEUES(Enum):
ASSET_MAX_SIZE_MB = 2
ASSET_SUPPORTED_TYPES = ('image/jpeg', 'image/png', 'image/webp', 'image/gif', 'application/pdf', )
ASSET_MAX_COUNT_PER_GUIDE = 10

SMOKESCREEN_ENABLED = True
2 changes: 2 additions & 0 deletions cvat/settings/development.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,5 @@
DATABASES['default']['HOST'] = os.getenv('CVAT_POSTGRES_HOST', 'localhost')

QUALITY_CHECK_JOB_DELAY = 5

SMOKESCREEN_ENABLED = False
14 changes: 14 additions & 0 deletions cvat/utils/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,27 @@
#
# SPDX-License-Identifier: MIT

from django.conf import settings

import requests
import requests.utils

from cvat import __version__

_CVAT_USER_AGENT = f"CVAT/{__version__} {requests.utils.default_user_agent()}"

# Note that setting Session.proxies doesn't work correctly if an upstream proxy
# is specified via environment variables: <https://github.com/psf/requests/issues/2018>.
# Therefore, for robust operation, these need to be passed with every request via the
# proxies= parameter.
PROXIES_FOR_UNTRUSTED_URLS = None

if settings.SMOKESCREEN_ENABLED:
PROXIES_FOR_UNTRUSTED_URLS = {
'http': 'http://localhost:4750',
'https': 'http://localhost:4750',
}

def make_requests_session() -> requests.Session:
session = requests.Session()
session.headers['User-Agent'] = _CVAT_USER_AGENT
Expand Down
3 changes: 3 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ services:
CLICKHOUSE_HOST: clickhouse
CVAT_ANALYTICS: 1
CVAT_BASE_URL:
SMOKESCREEN_OPTS: ${SMOKESCREEN_OPTS:-}
entrypoint: /home/django/backend_entrypoint.sh
labels:
- traefik.enable=true
Expand Down Expand Up @@ -100,6 +101,7 @@ services:
DJANGO_LOG_SERVER_PORT: 80
no_proxy: clickhouse,grafana,vector,nuclio,opa,${no_proxy:-}
NUMPROCS: 2
SMOKESCREEN_OPTS: ${SMOKESCREEN_OPTS:-}
command: -c supervisord/worker.import.conf
volumes:
- cvat_data:/home/django/data
Expand Down Expand Up @@ -169,6 +171,7 @@ services:
DJANGO_LOG_SERVER_PORT: 80
no_proxy: clickhouse,grafana,vector,nuclio,opa,${no_proxy:-}
NUMPROCS: 1
SMOKESCREEN_OPTS: ${SMOKESCREEN_OPTS:-}
command: -c supervisord/worker.webhooks.conf
volumes:
- cvat_data:/home/django/data
Expand Down
2 changes: 1 addition & 1 deletion helm-chart/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.7.3
version: 0.8.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand Down
4 changes: 3 additions & 1 deletion helm-chart/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ Create the name of the service account to use
{{- end }}
{{- end }}

{{- define "cvat.nuclioEnv" }}
{{- define "cvat.sharedBackendEnv" }}
- name: SMOKESCREEN_OPTS
value: {{ .Values.smokescreen.opts | toJson }}
{{- if .Values.nuclio.enabled }}
- name: CVAT_SERVERLESS
value: "1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ spec:
secretKeyRef:
name: "{{ tpl (.Values.postgresql.secret.name) . }}"
key: password
{{ include "cvat.sharedBackendEnv" . | indent 10 }}
{{- with .Values.cvat.backend.worker.webhooks.additionalEnv }}
{{- toYaml . | nindent 10 }}
{{- end }}
Expand Down
2 changes: 1 addition & 1 deletion helm-chart/templates/cvat_backend/server/deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ spec:
secretKeyRef:
name: "{{ tpl (.Values.postgresql.secret.name) . }}"
key: password
{{ include "cvat.nuclioEnv" . | indent 10 }}
{{ include "cvat.sharedBackendEnv" . | indent 10 }}
{{- if .Values.analytics.enabled}}
- name: CVAT_ANALYTICS
value: "1"
Expand Down
2 changes: 1 addition & 1 deletion helm-chart/templates/cvat_backend/utils/deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ spec:
secretKeyRef:
name: "{{ tpl (.Values.postgresql.secret.name) . }}"
key: password
{{ include "cvat.nuclioEnv" . | indent 10 }}
{{ include "cvat.sharedBackendEnv" . | indent 10 }}
{{- if .Values.analytics.enabled}}
- name: CVAT_ANALYTICS
value: "1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ spec:
secretKeyRef:
name: "{{ tpl (.Values.postgresql.secret.name) . }}"
key: password
{{ include "cvat.nuclioEnv" . | indent 10 }}
{{ include "cvat.sharedBackendEnv" . | indent 10 }}
{{- if .Values.analytics.enabled}}
- name: CVAT_ANALYTICS
value: "1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ spec:
secretKeyRef:
name: "{{ tpl (.Values.postgresql.secret.name) . }}"
key: password
{{ include "cvat.nuclioEnv" . | indent 10 }}
{{ include "cvat.sharedBackendEnv" . | indent 10 }}
{{- if .Values.analytics.enabled}}
- name: CVAT_ANALYTICS
value: "1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ spec:
secretKeyRef:
name: "{{ tpl (.Values.postgresql.secret.name) . }}"
key: password
{{ include "cvat.nuclioEnv" . | indent 10 }}
{{ include "cvat.sharedBackendEnv" . | indent 10 }}
{{- if .Values.analytics.enabled}}
- name: CVAT_ANALYTICS
value: "1"
Expand Down
3 changes: 3 additions & 0 deletions helm-chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -385,3 +385,6 @@ traefik:
service:
externalIPs:
# - "192.168.49.2"

smokescreen:
opts: ''
3 changes: 3 additions & 0 deletions supervisord/server.conf
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,6 @@ numprocs=%(ENV_NUMPROCS)s
process_name=%(program_name)s-%(process_num)s
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0

[program:smokescreen]
command=smokescreen --listen-ip=127.0.0.1 %(ENV_SMOKESCREEN_OPTS)s
3 changes: 3 additions & 0 deletions supervisord/worker.import.conf
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,6 @@ command=bash -c "if [ \"${CLAM_AV}\" = 'yes' ]; then /usr/bin/freshclam -d \
-l %(ENV_HOME)s/logs/freshclam.log --foreground=true; fi"
numprocs=1
startsecs=0

[program:smokescreen]
command=smokescreen --listen-ip=127.0.0.1 %(ENV_SMOKESCREEN_OPTS)s
3 changes: 3 additions & 0 deletions supervisord/worker.webhooks.conf
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,6 @@ command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -i
"
environment=SSH_AUTH_SOCK="/tmp/ssh-agent.sock",VECTOR_EVENT_HANDLER="SynchronousLogstashHandler"
numprocs=%(ENV_NUMPROCS)s

[program:smokescreen]
command=smokescreen --listen-ip=127.0.0.1 %(ENV_SMOKESCREEN_OPTS)s
27 changes: 27 additions & 0 deletions tests/docker-compose.test_servers.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,31 @@
services:
cvat_worker_webhooks:
depends_on:
- webhook_receiver
entrypoint:
- /bin/bash
- -euc
# We want to exclude webhooks_receiver from SSRF protection,
# so that the server can access it.
# --allow-address doesn't allow hostnames, so we have to resolve
# the IP address ourselves.
- |
webhooks_ip_addr="$$(getent hosts webhooks | head -1 | awk '{ print $$1 }')"
export SMOKESCREEN_OPTS=--allow-address="$$webhooks_ip_addr"
exec /usr/bin/supervisord -c supervisord/worker.webhooks.conf

cvat_server:
depends_on:
- webhook_receiver
entrypoint:
- /bin/bash
- -euc
- |
webhooks_ip_addr="$$(getent hosts webhooks | head -1 | awk '{ print $$1 }')"
export SMOKESCREEN_OPTS=--allow-address="$$webhooks_ip_addr"
exec /home/django/backend_entrypoint.sh


webhook_receiver:
image: python:3.9-slim
restart: always
Expand Down
4 changes: 2 additions & 2 deletions tests/python/shared/fixtures/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@

CONTAINER_NAME_FILES = ["docker-compose.tests.yml"]

DC_FILES = [
DC_FILES = CONTAINER_NAME_FILES + [
"docker-compose.dev.yml",
"tests/docker-compose.file_share.yml",
"tests/docker-compose.minio.yml",
"tests/docker-compose.test_servers.yml",
] + CONTAINER_NAME_FILES
]


class Container(str, Enum):
Expand Down