Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DNM] Fix CodeQL reported vulnerabilities #2398

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions onadata/apps/logger/migrations/0001_pre-django-3-upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from onadata.libs.utils.model_tools import queryset_iterator
from onadata.libs.utils.logger_tools import create_xform_version
import taggit.managers
from hashlib import md5
from hashlib import sha256


def recalculate_xform_hash(apps, schema_editor): # pylint: disable=W0613
Expand All @@ -29,8 +29,8 @@ def recalculate_xform_hash(apps, schema_editor): # pylint: disable=W0613
counter = 0

for xform in queryset_iterator(xforms, 500):
hash_value = md5(xform.xml.encode("utf8")).hexdigest()
xform.hash = f"md5:{hash_value}"
hash_value = sha256(xform.xml.encode("utf8")).hexdigest()
xform.hash = f"sha256:{hash_value}"
xform.save(update_fields=["hash"])
counter += 1
if counter % 500 == 0:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.18 on 2023-04-02 20:06

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('logger', '0004_update_instance_geoms'),
]

operations = [
migrations.AlterField(
model_name='xform',
name='hash',
field=models.CharField(blank=True, default=None, max_length=128, null=True, verbose_name='Hash'),
),
]
4 changes: 2 additions & 2 deletions onadata/apps/logger/migrations/0051_auto_20180522_1118.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from __future__ import unicode_literals

from django.db import migrations
from hashlib import md5
from hashlib import sha256

from onadata.libs.utils.model_tools import queryset_iterator

Expand All @@ -23,7 +23,7 @@ def recalculate_xform_hash(apps, schema_editor): # pylint: disable=W0613
counter = 0

for xform in queryset_iterator(xforms, 500):
xform.hash = "md5:%s" % md5(xform.xml.encode("utf8")).hexdigest()
xform.hash = "sha256:%s" % sha256(xform.xml.encode("utf8")).hexdigest()
xform.save(update_fields=["hash"])
counter += 1
if counter % 500 == 0:
Expand Down
10 changes: 5 additions & 5 deletions onadata/apps/logger/models/attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,13 @@ def save(self, *args, **kwargs):

@property
def file_hash(self):
"""Returns the MD5 hash of the file."""
md5_hash = ""
"""Returns the sha256 hash of the file."""
sha256_hash = ""
if self.media_file.storage.exists(self.media_file.name):
md5_hash = hashlib.new(
"md5", self.media_file.read(), usedforsecurity=False
sha256_hash = hashlib.new(
"sha256", self.media_file.read(), usedforsecurity=False
).hexdigest()
return md5_hash
return sha256_hash

@property
def filename(self):
Expand Down
14 changes: 7 additions & 7 deletions onadata/apps/logger/models/xform.py
Original file line number Diff line number Diff line change
Expand Up @@ -889,7 +889,7 @@ class XForm(XFormMixin, BaseModel):
has_hxl_support = models.BooleanField(default=False)
last_updated_at = models.DateTimeField(auto_now=True)
hash = models.CharField(
_("Hash"), max_length=36, blank=True, null=True, default=None
_("Hash"), max_length=128, blank=True, null=True, default=None
)
# XForm was created as a merged dataset
is_merged_dataset = models.BooleanField(default=False)
Expand Down Expand Up @@ -962,22 +962,22 @@ def _set_title(self):

# Capture urls within form title
if re.search(
r"^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$", # noqa
r"^(?:http(s)?:\/\/)?[\w\-.]+(?:\.[\w\-.]+)+[\w\-.~:/?#[\]@!\$&'()*+,;=]+$", # noqa

Check failure

Code scanning / CodeQL

Inefficient regular expression

This part of the regular expression may cause exponential backtracking on strings starting with '-.' and containing many repetitions of '-.'.
self.title,
):
raise XLSFormError(_("Invalid title value; value shouldn't match a URL"))

self.title = title_xml

def get_hash(self):
"""Returns the MD5 hash of the forms XML content prefixed by 'md5:'"""
md5_hash = hashlib.new(
"md5", self.xml.encode("utf-8"), usedforsecurity=False
"""Returns the sha256 hash of the forms XML content prefixed by 'sha256:'"""
sha256_hash = hashlib.new(
"sha256", self.xml.encode("utf-8"), usedforsecurity=False
).hexdigest()
return f"md5:{md5_hash}"
return f"sha256:{sha256_hash}"

def set_hash(self):
"""Sets the MD5 hash of the form."""
"""Sets the sha256 hash of the form."""
self.hash = self.get_hash()

def _set_encrypted_field(self):
Expand Down
7 changes: 7 additions & 0 deletions onadata/apps/main/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,13 @@ def publish(self, user, id_string=None, created_by=None):
if cleaned_url:
self.validate(cleaned_url)
cleaned_xls_file = urlparse(cleaned_url)

if cleaned_xls_file.scheme not in ["http", "https"]:
raise forms.ValidationError("Invalid URL scheme")

if not cleaned_xls_file.netloc:
raise forms.ValidationError("Invalid URL")

cleaned_xls_file = "_".join(cleaned_xls_file.path.split("/")[-2:])
name, extension = os.path.splitext(cleaned_xls_file)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.18 on 2023-04-02 20:06

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('main', '0012_metadata_extra_data'),
]

operations = [
migrations.AlterField(
model_name='metadata',
name='file_hash',
field=models.CharField(blank=True, max_length=128, null=True),
),
]
8 changes: 4 additions & 4 deletions onadata/apps/main/models/meta_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ class MetaData(models.Model):
extra_data = models.JSONField(default=dict, blank=True, null=True)
data_file = models.FileField(upload_to=upload_to, blank=True, null=True)
data_file_type = models.CharField(max_length=255, blank=True, null=True)
file_hash = models.CharField(max_length=50, blank=True, null=True)
file_hash = models.CharField(max_length=128, blank=True, null=True)
date_created = models.DateTimeField(null=True, auto_now_add=True)
date_modified = models.DateTimeField(null=True, auto_now=True)
deleted_at = models.DateTimeField(null=True, default=None)
Expand Down Expand Up @@ -222,7 +222,7 @@ def hash(self):

def set_hash(self):
"""
Returns the md5 hash of the metadata file.
Returns the sha256 hash of the metadata file.
"""
if not self.data_file:
return None
Expand All @@ -238,9 +238,9 @@ def set_hash(self):
return ""
else:
file_hash = hashlib.new(
"md5", self.data_file.read(), usedforsecurity=False
"sha256", self.data_file.read(), usedforsecurity=False
).hexdigest()
self.file_hash = f"md5:{file_hash}"
self.file_hash = f"sha256:{file_hash}"

return self.file_hash

Expand Down
4 changes: 2 additions & 2 deletions onadata/libs/mixins/etags_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

Adds Etag headers to the viewset response.
"""
from hashlib import md5
from hashlib import sha256

MODELS_WITH_DATE_MODIFIED = (
"XForm",
Expand All @@ -30,7 +30,7 @@ class ETagsMixin:
def set_etag_header(self, etag_value, etag_hash=None):
"""Updates the response headers with Etag header"""
if etag_value:
etag_hash = md5(str(etag_value).encode("utf-8")).hexdigest()
etag_hash = sha256(str(etag_value).encode("utf-8")).hexdigest()
if etag_hash:
self.headers.update({"ETag": etag_hash})

Expand Down
10 changes: 5 additions & 5 deletions onadata/libs/serializers/xform_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,7 @@ def get_url(self, obj):
@check_obj
def get_hash(self, obj):
"""
Returns MD5 hash based on last_submission_time for a media linked form.
Returns sha256 hash based on last_submission_time for a media linked form.
"""
filename = obj.data_value
hsh = obj.file_hash
Expand All @@ -674,14 +674,14 @@ def get_hash(self, obj):
xform = data_view.xform

if xform and xform.last_submission_time:
md5_hash = hashlib.new(
"md5",
sha256_hash = hashlib.new(
"sha256",
xform.last_submission_time.isoformat().encode("utf-8"),
usedforsecurity=False,
).hexdigest()
hsh = f"md5:{md5_hash}"
hsh = f"sha256:{sha256_hash}"

return f"{hsh or 'md5:'}"
return f"{hsh or 'sha256:'}"

@check_obj
def get_filename(self, obj):
Expand Down
38 changes: 24 additions & 14 deletions onadata/libs/utils/export_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,18 @@
SUPPORTED_INDEX_TAGS = ("[", "]", "(", ")", "{", "}", ".", "_")
EXPORT_QUERY_KEY = "query"
MAX_RETRIES = 3
SAFE_FILENAME_REGEX = re.compile(r"^[a-zA-Z0-9_-]{1,100}\.[a-zA-Z0-9]{1,10}$")
MAX_FILENAME_LENGTH = 256

# pylint: disable=invalid-name
User = get_user_model()


def md5hash(string):
def sha256hash(string):
"""
Return the MD5 hex digest of the given string.
Return the sha256 hex digest of the given string.
"""
return hashlib.md5(string).hexdigest()
return hashlib.sha256(string).hexdigest()


def get_export_options(options):
Expand Down Expand Up @@ -391,18 +393,26 @@ def increment_index_in_filename(filename):
filename should be in the form file.ext or file-2.ext - we check for the
dash and index and increment appropriately
"""
# check for an index i.e. dash then number then dot extension
regex = re.compile(r"(.+?)\-(\d+)(\..+)")
match = regex.match(filename)
if match:
basename = match.groups()[0]
index = int(match.groups()[1]) + 1
ext = match.groups()[2]
new_filename = ""

# Validate the input to prevent DoS attacks
if len(filename) > MAX_FILENAME_LENGTH:
raise Exception("Filename is too long.")
elif not SAFE_FILENAME_REGEX.match(filename):
raise Exception("Filename is not in a safe format.")
else:
index = 1
# split filename from ext
basename, ext = os.path.splitext(filename)
new_filename = f"{basename}-{index}{ext}"
# check for an index i.e. dash then number then dot extension
regex = re.compile(r"(.+?)\-(\d+)(\..+)")
match = regex.match(filename)

Check failure

Code scanning / CodeQL

Polynomial regular expression used on uncontrolled data

This [regular expression](1) that depends on a [user-provided value](2) may run slow on strings with many repetitions of 'a'.
if match:
basename = match.groups()[0]
index = int(match.groups()[1]) + 1
ext = match.groups()[2]
else:
index = 1
# split filename from ext
basename, ext = os.path.splitext(filename)
new_filename = f"{basename}-{index}{ext}"

return new_filename

Expand Down
8 changes: 4 additions & 4 deletions onadata/libs/utils/gravatar.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,24 @@
GRAVATAR_SIZE = str(60)


def email_md5(user):
def email_sha256(user):
"""Returns the hash of an email for the user"""
return hashlib.new(
"md5", user.email.lower().encode("utf-8"), usedforsecurity=False
"sha256", user.email.lower().encode("utf-8"), usedforsecurity=False
).hexdigest()


def get_gravatar_img_link(user):
"""Returns the Gravatar image URL"""
return (
GRAVATAR_ENDPOINT
+ email_md5(user)
+ email_sha256(user)
+ "?"
+ urlencode({"d": DEFAULT_GRAVATAR, "s": str(GRAVATAR_SIZE)})
)


def gravatar_exists(user):
"""Checks if the Gravatar URL exists"""
url = GRAVATAR_ENDPOINT + email_md5(user) + "?" + "d=404"
url = GRAVATAR_ENDPOINT + email_sha256(user) + "?" + "d=404"
return requests.get(url).status_code != 404