Merge branch 'staging' into 'master'

Staging -> Master See merge request flagsmith/bullet-train-api!360
Flagsmith · May 5, 2021 · 2c9144d · 2c9144d
2 parents e6a670f + 45106b0
commit 2c9144d
Show file tree

Hide file tree

Showing 51 changed files with 1,222 additions and 332 deletions.
diff --git a/.isort.cfg b/.isort.cfg
@@ -4,4 +4,4 @@ multi_line_output=3
 include_trailing_comma=true
 line_length=79
 known_first_party=analytics,app,custom_auth,environments,integrations,organisations,projects,segments,users,webhooks,api,audit,e2etests,features,permissions,util
-known_third_party=apiclient,app_analytics,axes,chargebee,core,coreapi,corsheaders,dj_database_url,django,djoser,drf_writable_nested,drf_yasg2,environs,google,influxdb_client,ordered_model,pyotp,pytest,pytz,requests,responses,rest_framework,rest_framework_nested,rest_framework_recursive,sentry_sdk,shortuuid,simple_history,six,telemetry,tests,trench,whitenoise
+known_third_party=apiclient,app_analytics,axes,chargebee,core,coreapi,corsheaders,dj_database_url,django,django_lifecycle,djoser,drf_writable_nested,drf_yasg2,environs,google,influxdb_client,ordered_model,pyotp,pytest,pytz,requests,responses,rest_framework,rest_framework_nested,rest_framework_recursive,sentry_sdk,shortuuid,simple_history,six,telemetry,tests,trench,whitenoise
diff --git a/requirements.in b/requirements.in
@@ -35,6 +35,7 @@ django-debug-toolbar==3.1.1
 sentry-sdk==0.19.4
 environs==9.2.0
 analytics-python
+django-lifecycle
 drf-writable-nested
 django-filter
 dataclasses
diff --git a/requirements.txt b/requirements.txt
@@ -53,6 +53,8 @@ django-health-check==3.14.3
     # via -r requirements.in
 django-ipware==3.0.2
     # via django-axes
+django-lifecycle==0.9.0
+    # via -r requirements.in
 django-ordered-model==3.4.1
     # via -r requirements.in
 django-ses==1.0.3
@@ -249,6 +251,8 @@ urllib3==1.25.11
     #   influxdb-client
     #   requests
     #   sentry-sdk
+urlman==1.4.0
+    # via django-lifecycle
 whitenoise==3.3.1
     # via -r requirements.in
 yubico-client==1.13.0

diff --git a/src/app/settings/common.py b/src/app/settings/common.py
@@ -106,6 +106,7 @@
     "environments.identities",
     "environments.identities.traits",
     "features",
+    "features.multivariate",
     "segments",
     "e2etests",
     "simple_history",

diff --git a/src/environments/identities/helpers.py b/src/environments/identities/helpers.py
@@ -1,3 +1,7 @@
+import hashlib
+import itertools
+import typing
+
 from integrations.amplitude.amplitude import AmplitudeWrapper
 from integrations.heap.heap import HeapWrapper
 from integrations.mixpanel.mixpanel import MixpanelWrapper
@@ -22,3 +26,32 @@ def identify_integrations(identity, all_feature_states):
                 user_id=identity.identifier, feature_states=all_feature_states
             )
             wrapper_instance.identify_user_async(data=user_data)
+
+
+def get_hashed_percentage_for_object_ids(
+    object_ids: typing.Iterable[int], iterations: int = 1
+) -> float:
+    """
+    Given a list of object ids, get a floating point number between 0 and 1 based on
+    the hash of those ids. This should give the same value every time for any
+    list of ids.
+
+    :param object_ids: list of object ids to calculate the has for
+    :param iterations: num times to include each id in the generated string to hash
+    :return: (float) number between 0 (inclusive) and 1 (exclusive)
+    """
+
+    to_hash = ",".join(str(id_) for id_ in list(object_ids) * iterations)
+    hashed_value = hashlib.md5(to_hash.encode("utf-8"))
+    hashed_value_as_int = int(hashed_value.hexdigest(), base=16)
+    value = (hashed_value_as_int % 9999) / 9998
+
+    if value == 1:
+        # since we want a number between 0 (inclusive) and 1 (exclusive), in the
+        # unlikely case that we get the exact number 1, we call the method again
+        # and increase the number of iterations to ensure we get a different result
+        return get_hashed_percentage_for_object_ids(
+            object_ids=object_ids, iterations=iterations + 1
+        )
+
+    return value
diff --git a/src/environments/identities/models.py b/src/environments/identities/models.py
@@ -1,12 +1,14 @@
+import hashlib
 import typing
 
 from django.db import models
-from django.db.models import Q
+from django.db.models import Q, Prefetch
 from django.utils.encoding import python_2_unicode_compatible
 
 from environments.models import Environment
 from environments.identities.traits.models import Trait
 from features.models import FeatureState
+from features.multivariate.models import MultivariateFeatureStateValue
 
 
 @python_2_unicode_compatible
@@ -61,11 +63,20 @@ def get_all_feature_states(self, traits: typing.List[Trait] = None):
             "feature_state_value",
             "feature_segment",
             "feature_segment__segment",
+            "identity",
         ]
 
-        # When Project's hide_disabled_flags enabled, exclude disabled Features from the list
-        all_flags = FeatureState.objects.select_related(*select_related_args).filter(
-            full_query
+        all_flags = (
+            FeatureState.objects.select_related(*select_related_args)
+            .prefetch_related(
+                Prefetch(
+                    "multivariate_feature_state_values",
+                    queryset=MultivariateFeatureStateValue.objects.select_related(
+                        "multivariate_feature_option"
+                    ),
+                )
+            )
+            .filter(full_query)
         )
 
         # iterate over all the flags and build a dictionary keyed on feature with the highest priority flag

diff --git a/src/environments/identities/tests/test_helpers.py b/src/environments/identities/tests/test_helpers.py
@@ -1,9 +1,15 @@
+import hashlib
+import itertools
+import typing
 from unittest import mock
 from unittest.case import TestCase
 
 import pytest
 
-from environments.identities.helpers import identify_integrations
+from environments.identities.helpers import (
+    identify_integrations,
+    get_hashed_percentage_for_object_ids,
+)
 from environments.identities.models import Identity
 from environments.models import Environment
 from integrations.amplitude.models import AmplitudeConfiguration
@@ -75,3 +81,121 @@ def test_identify_integrations_segment_and_amplitude_called(
 
         mock_segment_wrapper.assert_called()
         mock_amplitude_wrapper.assert_called()
+
+
+def test_get_hashed_percentage_for_object_ids_is_number_between_0_inc_and_1_exc():
+    assert 1 > get_hashed_percentage_for_object_ids([12, 93]) >= 0
+
+
+def test_get_hashed_percentage_for_object_ids_is_the_same_each_time():
+    # Given
+    object_ids = [30, 73]
+
+    # When
+    result_1 = get_hashed_percentage_for_object_ids(object_ids)
+    result_2 = get_hashed_percentage_for_object_ids(object_ids)
+
+    # Then
+    assert result_1 == result_2
+
+
+def test_percentage_value_is_unique_for_different_identities():
+    # Given
+    first_object_ids = [14, 106]
+    second_object_ids = [53, 200]
+
+    # When
+    result_1 = get_hashed_percentage_for_object_ids(first_object_ids)
+    result_2 = get_hashed_percentage_for_object_ids(second_object_ids)
+
+    # Then
+    assert result_1 != result_2
+
+
+def test_get_hashed_percentage_for_object_ids_should_be_evenly_distributed():
+    """
+    This test checks if the percentage value returned by the helper function returns
+    evenly distributed values.
+
+    Note that since it's technically random, it's not guaranteed to pass every time,
+    however, it should pass 99/100 times. It will likely be more accurate by increasing
+    the test_sample value and / or decreasing the num_test_buckets value.
+    """
+    test_sample = 500  # number of ids to sample in each list
+    num_test_buckets = 50  # split the sample into 'buckets' to check that the values are evenly distributed
+    test_bucket_size = int(test_sample / num_test_buckets)
+    error_factor = 0.1
+
+    # Given
+    object_id_pairs = itertools.product(range(test_sample), range(test_sample))
+
+    # When
+    values = sorted(
+        get_hashed_percentage_for_object_ids(pair) for pair in object_id_pairs
+    )
+
+    # Then
+    for i in range(num_test_buckets):
+        bucket_start = i * test_bucket_size
+        bucket_end = (i + 1) * test_bucket_size
+        bucket_value_limit = min(
+            (i + 1) / num_test_buckets + error_factor * ((i + 1) / num_test_buckets),
+            1,
+        )
+
+        assert all(
+            [value <= bucket_value_limit for value in values[bucket_start:bucket_end]]
+        )
+
+
+@mock.patch("environments.identities.helpers.hashlib")
+def test_get_hashed_percentage_does_not_return_1(mock_hashlib):
+    """
+    Quite complex test to ensure that the function will never return 1.
+
+    To achieve this, we mock the hashlib module to return a magic mock so that we can
+    subsequently mock the hexdigest method to return known strings. These strings are
+    chosen such that they can be converted (via `int(s, base=16)`) to known integers.
+    """
+
+    # Given
+    object_ids = [12, 93]
+
+    # -- SETTING UP THE MOCKS --
+    # hash strings specifically created to return specific values when converted to
+    # integers via int(s, base=16). Note that the reverse function was created
+    # courtesy of https://code.i-harness.com/en/q/1f7c41
+    hash_string_to_return_1 = "270e"
+    hash_string_to_return_0 = "270f"
+    hashed_values = [hash_string_to_return_0, hash_string_to_return_1]
+
+    def hexdigest_side_effect():
+        return hashed_values.pop()
+
+    mock_hash = mock.MagicMock()
+    mock_hashlib.md5.return_value = mock_hash
+
+    mock_hash.hexdigest.side_effect = hexdigest_side_effect
+
+    # -- FINISH SETTING UP THE MOCKS --
+
+    # When
+    # we get the hashed percentage value for the given object ids
+    value = get_hashed_percentage_for_object_ids(object_ids)
+
+    # Then
+    # The value is 0 as defined by the mock data
+    assert value == 0
+
+    # and the md5 function was called twice
+    # (i.e. the get_hashed_percentage_for_object_ids function was also called twice)
+    call_list = mock_hashlib.md5.call_args_list
+    assert len(call_list) == 2
+
+    # the first call, with a string (in bytes) that contains each object id once
+    expected_bytes_1 = ",".join(str(id_) for id_ in object_ids).encode("utf-8")
+    assert call_list[0][0][0] == expected_bytes_1
+
+    # the second call, with a string (in bytes) that contains each object id twice
+    expected_bytes_2 = ",".join(str(id_) for id_ in object_ids * 2).encode("utf-8")
+    assert call_list[1][0][0] == expected_bytes_2
diff --git a/src/environments/identities/tests/test_models.py b/src/environments/identities/tests/test_models.py
@@ -4,7 +4,7 @@
 from environments.identities.traits.models import Trait
 from environments.models import FLOAT, Environment
 from features.models import Feature, FeatureSegment, FeatureState, FeatureStateValue
-from features.utils import BOOLEAN, INTEGER, STRING
+from features.value_types import INTEGER, STRING, BOOLEAN
 from organisations.models import Organisation
 from projects.models import Project
 from segments.models import (

diff --git a/src/environments/identities/tests/test_views.py b/src/environments/identities/tests/test_views.py
@@ -7,6 +7,7 @@
 from rest_framework import status
 from rest_framework.test import APIClient, APITestCase
 
+from environments.identities.helpers import get_hashed_percentage_for_object_ids
 from environments.identities.models import Identity
 from environments.identities.traits.models import Trait
 from environments.models import Environment
@@ -477,7 +478,9 @@ def test_identities_endpoint_returns_value_for_segment_if_rule_type_percentage_s
             segment=segment, type=SegmentRule.ALL_RULE
         )
 
-        identity_percentage_value = segment.get_identity_percentage_value(self.identity)
+        identity_percentage_value = get_hashed_percentage_for_object_ids(
+            [segment.id, self.identity.id]
+        )
         Condition.objects.create(
             operator=models.PERCENTAGE_SPLIT,
             value=(identity_percentage_value + (1 - identity_percentage_value) / 2)
@@ -522,7 +525,9 @@ def test_identities_endpoint_returns_default_value_if_rule_type_percentage_split
             segment=segment, type=SegmentRule.ALL_RULE
         )
 
-        identity_percentage_value = segment.get_identity_percentage_value(self.identity)
+        identity_percentage_value = get_hashed_percentage_for_object_ids(
+            [segment.id, self.identity.id]
+        )
         Condition.objects.create(
             operator=models.PERCENTAGE_SPLIT,
             value=identity_percentage_value / 2,

diff --git a/src/environments/identities/traits/constants.py b/src/environments/identities/traits/constants.py
@@ -1,3 +1,3 @@
-from features.utils import INTEGER, STRING, BOOLEAN, FLOAT
+from features.value_types import INTEGER, STRING, BOOLEAN, FLOAT
 
 ACCEPTED_TRAIT_VALUE_TYPES = [INTEGER, STRING, BOOLEAN, FLOAT]
diff --git a/src/environments/identities/traits/fields.py b/src/environments/identities/traits/fields.py
@@ -1,7 +1,7 @@
 from rest_framework import serializers
 
 from environments.identities.traits.constants import ACCEPTED_TRAIT_VALUE_TYPES
-from features.utils import STRING
+from features.value_types import STRING
 import logging
 
 logger = logging.getLogger(__name__)

diff --git a/src/environments/identities/views.py b/src/environments/identities/views.py
@@ -160,13 +160,18 @@ def post(self, request):
 
         # we need to serialize the response again to ensure that the
         # trait values are serialized correctly
-        response_serializer = IdentifyWithTraitsSerializer(instance=instance)
+        response_serializer = IdentifyWithTraitsSerializer(
+            instance=instance,
+            context={"identity": instance.get("identity")},  # todo: improve this
+        )
         return Response(response_serializer.data)
 
     def _get_single_feature_state_response(self, identity, feature_name):
         for feature_state in identity.get_all_feature_states():
             if feature_state.feature.name == feature_name:
-                serializer = FeatureStateSerializerFull(feature_state)
+                serializer = FeatureStateSerializerFull(
+                    feature_state, context={"identity": identity}
+                )
                 return Response(data=serializer.data, status=status.HTTP_200_OK)
 
         return Response(
@@ -182,7 +187,9 @@ def _get_all_feature_states_for_user_response(self, identity, trait_models=None)
         :return: Response containing lists of both serialized flags and traits
         """
         all_feature_states = identity.get_all_feature_states()
-        serialized_flags = FeatureStateSerializerFull(all_feature_states, many=True)
+        serialized_flags = FeatureStateSerializerFull(
+            all_feature_states, many=True, context={"identity": identity}
+        )
         serialized_traits = TraitSerializerBasic(
             identity.identity_traits.all(), many=True
         )