Skip to content

Commit 7245bdc

Browse files
kaustavb12xitij2000
authored andcommitted
feat: allow switching anonymous user ID hashing algorithm from shake to md5
The hashing algorithm has been changed in cd60646. However, there are Open edX operators who maintain backward compatibility of anonymous user IDs after past rotations of their Django secret key. For them, altering the hashing algorithm was a breaking change that made their analytics inconsistent. (cherry picked from commit 746e4fe) (cherry picked from commit ff6d92f)
1 parent dbe7b04 commit 7245bdc

File tree

4 files changed

+44
-2
lines changed

4 files changed

+44
-2
lines changed

cms/envs/common.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,17 @@
531531
# .. toggle_creation_date: 2023-03-31
532532
# .. toggle_tickets: https://github.com/openedx/edx-platform/pull/32015
533533
'DISABLE_ADVANCED_SETTINGS': False,
534+
535+
# .. toggle_name: FEATURES['ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID']
536+
# .. toggle_implementation: DjangoSetting
537+
# .. toggle_default: False
538+
# .. toggle_description: Whether to enable the legacy MD5 hashing algorithm to generate anonymous user id
539+
# instead of the newer SHAKE128 hashing algorithm
540+
# .. toggle_use_cases: open_edx
541+
# .. toggle_creation_date: 2022-08-08
542+
# .. toggle_target_removal_date: None
543+
# .. toggle_tickets: 'https://github.com/openedx/edx-platform/pull/30832'
544+
'ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID': False,
534545
}
535546

536547
# .. toggle_name: ENABLE_COPPA_COMPLIANCE

common/djangoapps/student/models/user.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,12 +150,21 @@ def anonymous_id_for_user(user, course_id):
150150
# function: Rotate at will, since the hashes are stored and
151151
# will not change.
152152
# include the secret key as a salt, and to make the ids unique across different LMS installs.
153-
hasher = hashlib.shake_128()
153+
legacy_hash_enabled = settings.FEATURES.get('ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID', False)
154+
if legacy_hash_enabled:
155+
# Use legacy MD5 algorithm if flag enabled
156+
hasher = hashlib.md5()
157+
else:
158+
hasher = hashlib.shake_128()
154159
hasher.update(settings.SECRET_KEY.encode('utf8'))
155160
hasher.update(str(user.id).encode('utf8'))
156161
if course_id:
157162
hasher.update(str(course_id).encode('utf-8'))
158-
anonymous_user_id = hasher.hexdigest(16)
163+
164+
if legacy_hash_enabled:
165+
anonymous_user_id = hasher.hexdigest()
166+
else:
167+
anonymous_user_id = hasher.hexdigest(16) # pylint: disable=too-many-function-args
159168

160169
try:
161170
AnonymousUserId.objects.create(

common/djangoapps/student/tests/tests.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,6 +1050,17 @@ def test_anonymous_id_secret_key_changes_result_in_diff_values_for_same_new_user
10501050
assert anonymous_id != new_anonymous_id
10511051
assert self.user == user_by_anonymous_id(new_anonymous_id)
10521052

1053+
def test_enable_legacy_hash_flag(self):
1054+
"""Test that different anonymous id returned if ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID enabled."""
1055+
CourseEnrollment.enroll(self.user, self.course.id)
1056+
anonymous_id = anonymous_id_for_user(self.user, self.course.id)
1057+
with patch.dict(settings.FEATURES, ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID=True):
1058+
# Recreate user object to clear cached anonymous id.
1059+
self.user = User.objects.get(pk=self.user.id)
1060+
AnonymousUserId.objects.filter(user=self.user).filter(course_id=self.course.id).delete()
1061+
new_anonymous_id = anonymous_id_for_user(self.user, self.course.id)
1062+
assert anonymous_id != new_anonymous_id
1063+
10531064

10541065
@skip_unless_lms
10551066
@patch('openedx.core.djangoapps.programs.utils.get_programs')

lms/envs/common.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,17 @@
10281028
# .. toggle_creation_date: 2022-06-06
10291029
# .. toggle_tickets: 'https://github.com/edx/edx-platform/pull/29538'
10301030
'DISABLE_ALLOWED_ENROLLMENT_IF_ENROLLMENT_CLOSED': False,
1031+
1032+
# .. toggle_name: FEATURES['ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID']
1033+
# .. toggle_implementation: DjangoSetting
1034+
# .. toggle_default: False
1035+
# .. toggle_description: Whether to enable the legacy MD5 hashing algorithm to generate anonymous user id
1036+
# instead of the newer SHAKE128 hashing algorithm
1037+
# .. toggle_use_cases: open_edx
1038+
# .. toggle_creation_date: 2022-08-08
1039+
# .. toggle_target_removal_date: None
1040+
# .. toggle_tickets: 'https://github.com/openedx/edx-platform/pull/30832'
1041+
'ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID': False,
10311042
}
10321043

10331044
# Specifies extra XBlock fields that should available when requested via the Course Blocks API

0 commit comments

Comments
 (0)