From e1ce1a42c3909f10cb77e6b1031f7f32fa669c9b Mon Sep 17 00:00:00 2001 From: Saleem Latif Date: Tue, 12 Nov 2024 13:33:45 +0500 Subject: [PATCH] fix: Fixed null email issue for leaderboard. --- CHANGELOG.rst | 5 ++ enterprise_data/__init__.py | 2 +- .../queries/fact_engagement_admin_dash.py | 49 ++++++++++++ .../tables/fact_engagement_admin_dash.py | 77 +++++++++++++++++-- .../api/v1/views/analytics_leaderboard.py | 10 ++- .../commands/pre_warm_analytics_cache.py | 9 ++- .../tests/test_pre_warm_analytics_cache.py | 19 +++-- enterprise_data/utils.py | 17 ++++ enterprise_data_roles/rules.py | 2 +- 9 files changed, 169 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5c12e4e4..f80db8bf 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -16,6 +16,11 @@ Unreleased ========================= +[10.2.0] - 2024-11-12 +--------------------- + * Fixed null email issue for leaderboard. + + [10.1.0] - 2024-10-29 --------------------- * Added management command to pre-warm analytics data. diff --git a/enterprise_data/__init__.py b/enterprise_data/__init__.py index 89f3ef50..8b2591f9 100644 --- a/enterprise_data/__init__.py +++ b/enterprise_data/__init__.py @@ -2,4 +2,4 @@ Enterprise data api application. This Django app exposes API endpoints used by enterprises. """ -__version__ = "10.1.0" +__version__ = "10.2.0" diff --git a/enterprise_data/admin_analytics/database/queries/fact_engagement_admin_dash.py b/enterprise_data/admin_analytics/database/queries/fact_engagement_admin_dash.py index dcaaa763..39f9a73b 100644 --- a/enterprise_data/admin_analytics/database/queries/fact_engagement_admin_dash.py +++ b/enterprise_data/admin_analytics/database/queries/fact_engagement_admin_dash.py @@ -163,6 +163,55 @@ def get_completion_data_for_leaderboard_query(email_list: list): GROUP BY email; """ + @staticmethod + def get_engagement_data_for_leaderboard_null_email_only_query(): + """ + Get the query to fetch the engagement data for leaderboard for NULL emails only. + + Query should fetch the engagement data for like learning time, session length of + the enterprise learners to show in the leaderboard. + + Returns: + (str): Query to fetch the engagement data for leaderboard. + """ + return """ + SELECT + email, + ROUND(SUM(learning_time_seconds) / 3600, 1) as learning_time_hours, + SUM(is_engaged) as session_count, + CASE + WHEN SUM(is_engaged) = 0 THEN 0.0 + ELSE ROUND(SUM(learning_time_seconds) / 3600 / SUM(is_engaged), 1) + END AS average_session_length + FROM fact_enrollment_engagement_day_admin_dash + WHERE enterprise_customer_uuid=%(enterprise_customer_uuid)s AND + (activity_date BETWEEN %(start_date)s AND %(end_date)s) AND + is_engaged = 1 AND + email is NULL + GROUP BY email; + """ + + @staticmethod + def get_completion_data_for_leaderboard_null_email_only_query(): + """ + Get the query to fetch the completions data for leaderboard for NULL emails. + + Query should fetch the completion data for like course completion count of + the enterprise learners to show in the leaderboard. + + Returns: + (list): Query to fetch the completions data for leaderboard. + """ + return """ + SELECT email, count(course_key) as course_completion_count + FROM fact_enrollment_admin_dash + WHERE enterprise_customer_uuid=%(enterprise_customer_uuid)s AND + (passed_date BETWEEN %(start_date)s AND %(end_date)s) AND + has_passed = 1 AND + email is NULL + GROUP BY email; + """ + @staticmethod def get_leaderboard_data_count_query(): """ diff --git a/enterprise_data/admin_analytics/database/tables/fact_engagement_admin_dash.py b/enterprise_data/admin_analytics/database/tables/fact_engagement_admin_dash.py index dce46711..3fcae103 100644 --- a/enterprise_data/admin_analytics/database/tables/fact_engagement_admin_dash.py +++ b/enterprise_data/admin_analytics/database/tables/fact_engagement_admin_dash.py @@ -5,6 +5,7 @@ from uuid import UUID from enterprise_data.cache.decorators import cache_it +from enterprise_data.utils import find_first from ..queries import FactEngagementAdminDashQueries from ..utils import run_query @@ -168,7 +169,14 @@ def get_engagement_time_series_data(self, enterprise_customer_uuid: UUID, start_ @cache_it() def _get_engagement_data_for_leaderboard( - self, enterprise_customer_uuid: UUID, start_date: date, end_date: date, limit: int, offset: int + self, + enterprise_customer_uuid: UUID, + start_date: date, + end_date: date, + limit: int, + offset: int, + include_null_email: bool, + ): """ Get the engagement data for leaderboard. @@ -182,11 +190,12 @@ def _get_engagement_data_for_leaderboard( end_date (date): The end date. limit (int): The maximum number of records to return. offset (int): The number of records to skip. + include_null_email (bool): If True, only fetch data for NULL emails. Returns: list[dict]: The engagement data for leaderboard. """ - return run_query( + engagements = run_query( query=self.queries.get_engagement_data_for_leaderboard_query(), params={ 'enterprise_customer_uuid': enterprise_customer_uuid, @@ -198,9 +207,27 @@ def _get_engagement_data_for_leaderboard( as_dict=True, ) + if include_null_email: + engagement_for_null_email = run_query( + query=self.queries.get_engagement_data_for_leaderboard_null_email_only_query(), + params={ + 'enterprise_customer_uuid': enterprise_customer_uuid, + 'start_date': start_date, + 'end_date': end_date, + }, + as_dict=True, + ) + engagements += engagement_for_null_email + return engagements + @cache_it() def _get_completion_data_for_leaderboard_query( - self, enterprise_customer_uuid: UUID, start_date: date, end_date: date, email_list: list + self, + enterprise_customer_uuid: UUID, + start_date: date, + end_date: date, + email_list: list, + include_null_email: bool, ): """ Get the completion data for leaderboard. @@ -213,11 +240,13 @@ def _get_completion_data_for_leaderboard_query( start_date (date): The start date. end_date (date): The end date. email_list (list): List of emails of the enterprise learners. + include_null_email (bool): If True, only fetch data for NULL emails. Returns: list[dict]: The engagement data for leaderboard. """ - return run_query( + + completions = run_query( query=self.queries.get_completion_data_for_leaderboard_query(email_list), params={ 'enterprise_customer_uuid': enterprise_customer_uuid, @@ -227,8 +256,28 @@ def _get_completion_data_for_leaderboard_query( as_dict=True, ) + if include_null_email: + completions_for_null_email = run_query( + query=self.queries.get_completion_data_for_leaderboard_null_email_only_query(), + params={ + 'enterprise_customer_uuid': enterprise_customer_uuid, + 'start_date': start_date, + 'end_date': end_date, + }, + as_dict=True, + ) + completions += completions_for_null_email + + return completions + def get_all_leaderboard_data( - self, enterprise_customer_uuid: UUID, start_date: date, end_date: date, limit: int, offset: int + self, + enterprise_customer_uuid: UUID, + start_date: date, + end_date: date, + limit: int, + offset: int, + total_count: int, ): """ Get the leaderboard data for the given enterprise customer. @@ -239,32 +288,48 @@ def get_all_leaderboard_data( end_date (date): The end date. limit (int): The maximum number of records to return. offset (int): The number of records to skip. + total_count (int): The total number of records. Returns: list[dict]: The leaderboard data. """ + include_null_email = False + # If this is the last or only page, we need to include NULL emails record. + if total_count <= offset + limit: + include_null_email = True + engagement_data = self._get_engagement_data_for_leaderboard( enterprise_customer_uuid=enterprise_customer_uuid, start_date=start_date, end_date=end_date, limit=limit, offset=offset, + include_null_email=include_null_email, ) # If there is no data, no need to proceed. if not engagement_data: return [] - engagement_data_dict = {engagement['email']: engagement for engagement in engagement_data} + engagement_data_dict = { + engagement['email']: engagement for engagement in engagement_data if engagement['email'] + } completion_data = self._get_completion_data_for_leaderboard_query( enterprise_customer_uuid=enterprise_customer_uuid, start_date=start_date, end_date=end_date, email_list=list(engagement_data_dict.keys()), + include_null_email=include_null_email, ) for completion in completion_data: email = completion['email'] engagement_data_dict[email]['course_completion_count'] = completion['course_completion_count'] + if include_null_email: + engagement_data_dict['None'] = find_first(engagement_data, lambda x: x['email'] is None) or {} + completion = find_first(completion_data, lambda x: x['email'] is None) or \ + {'course_completion_count': 'Unknown'} + engagement_data_dict['None']['course_completion_count'] = completion['course_completion_count'] + return list(engagement_data_dict.values()) @cache_it() diff --git a/enterprise_data/api/v1/views/analytics_leaderboard.py b/enterprise_data/api/v1/views/analytics_leaderboard.py index 5dc26274..618370e7 100644 --- a/enterprise_data/api/v1/views/analytics_leaderboard.py +++ b/enterprise_data/api/v1/views/analytics_leaderboard.py @@ -49,17 +49,18 @@ def list(self, request, enterprise_uuid): end_date = serializer.data.get('end_date', date.today()) page = serializer.data.get('page', 1) page_size = serializer.data.get('page_size', 100) - leaderboard = FactEngagementAdminDashTable().get_all_leaderboard_data( + total_count = FactEngagementAdminDashTable().get_leaderboard_data_count( enterprise_customer_uuid=enterprise_uuid, start_date=start_date, end_date=end_date, - limit=page_size, - offset=(page - 1) * page_size, ) - total_count = FactEngagementAdminDashTable().get_leaderboard_data_count( + leaderboard = FactEngagementAdminDashTable().get_all_leaderboard_data( enterprise_customer_uuid=enterprise_uuid, start_date=start_date, end_date=end_date, + limit=page_size, + offset=(page - 1) * page_size, + total_count=total_count, ) response_type = request.query_params.get('response_type', ResponseType.JSON.value) @@ -102,6 +103,7 @@ def _stream_serialized_data(enterprise_uuid, start_date, end_date, total_count, end_date=end_date, limit=page_size, offset=offset, + total_count=total_count, ) yield from leaderboard offset += page_size diff --git a/enterprise_data/management/commands/pre_warm_analytics_cache.py b/enterprise_data/management/commands/pre_warm_analytics_cache.py index 74a537c2..2eaac702 100644 --- a/enterprise_data/management/commands/pre_warm_analytics_cache.py +++ b/enterprise_data/management/commands/pre_warm_analytics_cache.py @@ -161,17 +161,18 @@ def __cache_engagement_data(enterprise_customer_uuid): start_date=start_date, end_date=end_date, ) - enterprise_engagement_table.get_all_leaderboard_data( + total_count = enterprise_engagement_table.get_leaderboard_data_count( enterprise_customer_uuid=enterprise_customer_uuid, start_date=start_date, end_date=end_date, - limit=page_size, - offset=0, ) - enterprise_engagement_table.get_leaderboard_data_count( + enterprise_engagement_table.get_all_leaderboard_data( enterprise_customer_uuid=enterprise_customer_uuid, start_date=start_date, end_date=end_date, + limit=page_size, + offset=0, + total_count=total_count, ) @staticmethod diff --git a/enterprise_data/management/commands/tests/test_pre_warm_analytics_cache.py b/enterprise_data/management/commands/tests/test_pre_warm_analytics_cache.py index 6d82ee6a..0c75d665 100644 --- a/enterprise_data/management/commands/tests/test_pre_warm_analytics_cache.py +++ b/enterprise_data/management/commands/tests/test_pre_warm_analytics_cache.py @@ -30,9 +30,18 @@ def setUp(self): get_enrollment_date_range_patcher.start() self.addCleanup(get_enrollment_date_range_patcher.stop) - @patch('enterprise_data.admin_analytics.database.tables.fact_engagement_admin_dash.run_query', MagicMock()) - @patch('enterprise_data.admin_analytics.database.tables.fact_enrollment_admin_dash.run_query', MagicMock()) - @patch('enterprise_data.admin_analytics.database.tables.skills_daily_rollup_admin_dash.run_query', MagicMock()) + @patch( + 'enterprise_data.admin_analytics.database.tables.fact_engagement_admin_dash.run_query', + MagicMock(return_value=[]) + ) + @patch( + 'enterprise_data.admin_analytics.database.tables.fact_enrollment_admin_dash.run_query', + MagicMock(return_value=[]) + ) + @patch( + 'enterprise_data.admin_analytics.database.tables.skills_daily_rollup_admin_dash.run_query', + MagicMock(return_value=[]) + ) @patch('enterprise_data.api.v1.views.analytics_enrollments.FactEnrollmentAdminDashTable.get_top_enterprises') @patch('enterprise_data.cache.decorators.cache.set') @patch('enterprise_data.cache.decorators.cache.get') @@ -47,5 +56,5 @@ def test_pre_warm_analytics_cache(self, mock_get_cache, mock_set_cache, mock_get call_command('pre_warm_analytics_cache') - assert mock_get_cache.call_count == 24 - assert mock_set_cache.call_count == 24 + assert mock_get_cache.call_count == 23 + assert mock_set_cache.call_count == 23 diff --git a/enterprise_data/utils.py b/enterprise_data/utils.py index e671e0a5..1e28a2ac 100644 --- a/enterprise_data/utils.py +++ b/enterprise_data/utils.py @@ -97,3 +97,20 @@ def primary_subject_truncate(x): return x else: return "other" + + +def find_first(iterable, condition): + """ + Find the first item in an iterable that satisfies the condition. + + Arguments: + iterable (iterable): The iterable to search. + condition (function): The condition to satisfy. + + Returns: + The first item that satisfies the condition, or None if no item satisfies the condition. + """ + try: + return next(item for item in iterable if condition(item)) + except StopIteration: + return None diff --git a/enterprise_data_roles/rules.py b/enterprise_data_roles/rules.py index 1a51f47b..40c02da5 100644 --- a/enterprise_data_roles/rules.py +++ b/enterprise_data_roles/rules.py @@ -57,5 +57,5 @@ def request_user_has_explicit_access(*args, **kwargs): rules.add_perm( 'can_access_enterprise', - request_user_has_implicit_access | request_user_has_explicit_access # pylint: disable=unsupported-binary-operation + request_user_has_implicit_access | request_user_has_explicit_access )