Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add option to omit anonymous users from index and identify events #306

Merged
merged 2 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions contract-tests/client_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __init__(self, tag, config):
opts["all_attributes_private"] = events.get("allAttributesPrivate", False)
opts["private_attributes"] = events.get("globalPrivateAttributes", {})
_set_optional_time_prop(events, "flushIntervalMs", opts, "flush_interval")
opts["omit_anonymous_contexts"] = events.get("omitAnonymousContexts", False)
else:
opts["send_events"] = False

Expand Down
3 changes: 2 additions & 1 deletion contract-tests/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ def status():
'polling-gzip',
'inline-context',
'anonymous-redaction',
'evaluation-hooks'
'evaluation-hooks',
'omit-anonymous-contexts'
]
}
return (json.dumps(body), 200, {'Content-type': 'application/json'})
Expand Down
12 changes: 11 additions & 1 deletion ldclient/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,8 @@ def __init__(self,
big_segments: Optional[BigSegmentsConfig]=None,
application: Optional[dict]=None,
hooks: Optional[List[Hook]]=None,
enable_event_compression: bool=False):
enable_event_compression: bool=False,
omit_anonymous_contexts: bool=False):
"""
:param sdk_key: The SDK key for your LaunchDarkly account. This is always required.
:param base_uri: The base URL for the LaunchDarkly server. Most users should use the default
Expand Down Expand Up @@ -243,6 +244,7 @@ def __init__(self,
:param application: Optional properties for setting application metadata. See :py:attr:`~application`
:param hooks: Hooks provide entrypoints which allow for observation of SDK functions.
:param enable_event_compression: Whether or not to enable GZIP compression for outgoing events.
:param omit_anonymous_contexts: Sets whether anonymous contexts should be omitted from index and identify events.
"""
self.__sdk_key = sdk_key

Expand Down Expand Up @@ -277,6 +279,7 @@ def __init__(self,
self.__application = validate_application_info(application or {}, log)
self.__hooks = [hook for hook in hooks if isinstance(hook, Hook)] if hooks else []
self.__enable_event_compression = enable_event_compression
self.__omit_anonymous_contexts = omit_anonymous_contexts
self._data_source_update_sink: Optional[DataSourceUpdateSink] = None

def copy_with_new_sdk_key(self, new_sdk_key: str) -> 'Config':
Expand Down Expand Up @@ -466,6 +469,13 @@ def hooks(self) -> List[Hook]:
def enable_event_compression(self) -> bool:
return self.__enable_event_compression

@property
def omit_anonymous_contexts(self) -> bool:
"""
Determines whether or not anonymous contexts will be omitted from index and identify events.
"""
return self.__omit_anonymous_contexts

@property
def data_source_update_sink(self) -> Optional[DataSourceUpdateSink]:
"""
Expand Down
22 changes: 22 additions & 0 deletions ldclient/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,28 @@ def anonymous(self) -> bool:
"""
return self.__anonymous

def without_anonymous_contexts(self) -> Context:
"""
For a multi-kind context:

A multi-kind context is made up of two or more single-kind contexts.
This method will first discard any single-kind contexts which are
anonymous. It will then create a new multi-kind context from the
remaining single-kind contexts. This may result in an invalid context
(e.g. all single-kind contexts are anonymous).

For a single-kind context:

If the context is not anonymous, this method will return the current
context as is and unmodified.

If the context is anonymous, this method will return an invalid context.
"""
contexts = self.__multi if self.__multi is not None else [self]
contexts = [c for c in contexts if not c.anonymous]

return Context.create_multi(*contexts)

def get(self, attribute: str) -> Any:
"""
Looks up the value of any attribute of the context by name.
Expand Down
46 changes: 32 additions & 14 deletions ldclient/impl/events/event_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,13 @@
from email.utils import parsedate
import json
from threading import Event, Lock, Thread
from typing import Any, List, Optional, Dict
from typing import Any, List, Optional, Dict, Callable
import time
import uuid
import queue
import urllib3
import gzip
from ldclient.config import Config
from datetime import timedelta
from random import Random

from ldclient.context import Context
Expand Down Expand Up @@ -341,6 +340,7 @@ def __init__(self, inbox, config, http_client, diagnostic_accumulator=None):
self._deduplicated_contexts = 0
self._diagnostic_accumulator = None if config.diagnostic_opt_out else diagnostic_accumulator
self._sampler = Sampler(Random())
self._omit_anonymous_contexts = config.omit_anonymous_contexts

self._flush_workers = FixedThreadPool(__MAX_FLUSH_THREADS__, "ldclient.flush")
self._diagnostic_flush_workers = None if self._diagnostic_accumulator is None else FixedThreadPool(1, "ldclient.diag_flush")
Expand Down Expand Up @@ -387,7 +387,6 @@ def _process_event(self, event: EventInput):
# Decide whether to add the event to the payload. Feature events may be added twice, once for
# the event (if tracked) and once for debugging.
context = None # type: Optional[Context]
can_add_index = True
full_event = None # type: Any
debug_event = None # type: Optional[DebugEvent]
sampling_ratio = 1 if event.sampling_ratio is None else event.sampling_ratio
Expand All @@ -401,31 +400,50 @@ def _process_event(self, event: EventInput):
if self._should_debug_event(event):
debug_event = DebugEvent(event)
elif isinstance(event, EventInputIdentify):
context = event.context
if self._omit_anonymous_contexts:
context = event.context.without_anonymous_contexts()
if not context.valid:
return

event = EventInputIdentify(event.timestamp, context, event.sampling_ratio)

full_event = event
can_add_index = False # an index event would be redundant if there's an identify event
elif isinstance(event, EventInputCustom):
context = event.context
full_event = event
elif isinstance(event, MigrationOpEvent):
full_event = event

# For each context we haven't seen before, we add an index event - unless this is already
# an identify event.
if context is not None:
already_seen = self._context_keys.put(context.fully_qualified_key, True)
if can_add_index:
if already_seen:
self._deduplicated_contexts += 1
else:
self._outbox.add_event(IndexEvent(event.timestamp, context))
self._get_indexable_context(event, lambda c: self._outbox.add_event(IndexEvent(event.timestamp, c)))

if full_event and self._sampler.sample(sampling_ratio):
self._outbox.add_event(full_event)

if debug_event and self._sampler.sample(sampling_ratio):
self._outbox.add_event(debug_event)

def _get_indexable_context(self, event: EventInput, block: Callable[[Context], None]):
if event.context is None:
return

context = event.context
if self._omit_anonymous_contexts:
context = context.without_anonymous_contexts()

if not context.valid:
return

already_seen = self._context_keys.put(context.fully_qualified_key, True)
if already_seen:
self._deduplicated_contexts += 1
return
elif isinstance(event, EventInputIdentify) or isinstance(event, MigrationOpEvent):
return

block(context)



def _should_debug_event(self, event: EventInputEvaluation):
if event.flag is None:
return False
Expand Down
59 changes: 59 additions & 0 deletions ldclient/testing/impl/events/test_event_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,37 @@ def test_context_is_filtered_in_identify_event():
assert len(output) == 1
check_identify_event(output[0], e, formatter.format_context(context))


def test_omit_anonymous_contexts_suppresses_identify_event():
with DefaultTestProcessor(omit_anonymous_contexts=True) as ep:
anon_context = Context.builder('userkey').name('Red').anonymous(True).build()
e = EventInputIdentify(timestamp, anon_context)
ep.send_event(e)

try:
flush_and_get_events(ep)
pytest.fail("Expected no events")
except AssertionError:
pass


def test_omit_anonymous_contexts_strips_anonymous_contexts_correctly():
with DefaultTestProcessor(omit_anonymous_contexts=True) as ep:
a = Context.builder('a').kind('a').anonymous(True).build()
b = Context.builder('b').kind('b').anonymous(True).build()
c = Context.builder('c').kind('c').anonymous(False).build()
mc = Context.multi_builder().add(a).add(b).add(c).build()

e = EventInputIdentify(timestamp, mc)
ep.send_event(e)

output = flush_and_get_events(ep)
assert len(output) == 1

formatter = EventContextFormatter(True, [])
check_identify_event(output[0], e, formatter.format_context(c))


def test_individual_feature_event_is_queued_with_index_event():
with DefaultTestProcessor() as ep:
e = EventInputEvaluation(timestamp, context, flag.key, flag, 1, 'value', None, 'default', None, True)
Expand All @@ -248,6 +279,34 @@ def test_individual_feature_event_is_queued_with_index_event():
check_summary_event(output[2])


def test_omit_anonymous_context_emits_feature_event_without_index():
with DefaultTestProcessor(omit_anonymous_contexts=True) as ep:
anon = Context.builder('a').anonymous(True).build()
e = EventInputEvaluation(timestamp, anon, flag.key, flag, 1, 'value', None, 'default', None, True)
ep.send_event(e)

output = flush_and_get_events(ep)
assert len(output) == 2
check_feature_event(output[0], e)
check_summary_event(output[1])


def test_omit_anonymous_context_strips_anonymous_from_index_event():
with DefaultTestProcessor(omit_anonymous_contexts=True) as ep:
a = Context.builder('a').kind('a').anonymous(True).build()
b = Context.builder('b').kind('b').anonymous(True).build()
c = Context.builder('c').kind('c').anonymous(False).build()
mc = Context.multi_builder().add(a).add(b).add(c).build()
e = EventInputEvaluation(timestamp, mc, flag.key, flag, 1, 'value', None, 'default', None, True)
ep.send_event(e)

output = flush_and_get_events(ep)
assert len(output) == 3
check_index_event(output[0], e, c.to_dict()) # Should only contain non-anon context
check_feature_event(output[1], e)
check_summary_event(output[2])


def test_individual_feature_event_is_ignored_for_0_sampling_ratio():
with DefaultTestProcessor() as ep:
e = EventInputEvaluation(timestamp, context, flag_with_0_sampling_ratio.key, flag_with_0_sampling_ratio, 1, 'value', None, 'default', None, True)
Expand Down
62 changes: 49 additions & 13 deletions ldclient/testing/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,22 +110,22 @@ def test_get_built_in_attribute_by_name(self):
assert c.get('kind') == 'b'
assert c.get('name') == 'c'
assert c.get('anonymous') is True

def test_get_unknown_attribute(self):
c = Context.create('a')
assert c.get('b') is None

def test_private_attributes(self):
assert list(Context.create('a').private_attributes) == []

c = Context.builder('a').private('b', '/c/d').private('e').build()
assert list(c.private_attributes) == ['b', '/c/d', 'e']

def test_fully_qualified_key(self):
assert Context.create('key1').fully_qualified_key == 'key1'
assert Context.create('key1', 'kind1').fully_qualified_key == 'kind1:key1'
assert Context.create('key%with:things', 'kind1').fully_qualified_key == 'kind1:key%25with%3Athings'

def test_builder_from_context(self):
c1 = Context.builder('a').kind('kind1').name('b').set('c', True).private('d').build()
b = Context.builder_from_context(c1)
Expand Down Expand Up @@ -167,7 +167,7 @@ def _assert_contexts_from_factory_equal(fn):
Context.create_multi(Context.create('a', 'kind1'), Context.create('b', 'kind2'))
assert Context.create_multi(Context.create('a', 'kind1'), Context.create('b', 'kind2')) != \
Context.create('a', 'kind1')

_assert_contexts_from_factory_equal(lambda: Context.create('invalid', 'kind'))
assert Context.create('invalid', 'kind') != Context.create_multi() # different errors

Expand Down Expand Up @@ -195,10 +195,10 @@ def test_json_decoding(self):
Context.builder('key1').kind('kind1').anonymous(True).build()
assert Context.from_dict({'kind': 'kind1', 'key': 'key1', '_meta': {'privateAttributes': ['b']}}) == \
Context.builder('key1').kind('kind1').private('b').build()

assert Context.from_dict({'kind': 'multi', 'kind1': {'key': 'key1'}, 'kind2': {'key': 'key2'}}) == \
Context.create_multi(Context.create('key1', 'kind1'), Context.create('key2', 'kind2'))

assert_context_invalid(Context.from_dict({'kind': 'kind1'}))
assert_context_invalid(Context.from_dict({'kind': 'kind1', 'key': 3}))
assert_context_invalid(Context.from_dict({'kind': 'multi'}))
Expand Down Expand Up @@ -256,34 +256,70 @@ class TestContextErrors:
def test_key_empty_string(self):
assert_context_invalid(Context.create(''))
assert_context_invalid(Context.builder('').build())

@pytest.mark.parametrize('kind', ['kind', 'multi', 'b$c', ''])
def test_kind_invalid_strings(self, kind):
assert_context_invalid(Context.create('a', kind))
assert_context_invalid(Context.builder('a').kind(kind).build())

def test_create_multi_with_no_contexts(self):
assert_context_invalid(Context.create_multi())

def test_multi_builder_with_no_contexts(self):
assert_context_invalid(Context.multi_builder().build())

def test_create_multi_with_duplicate_kind(self):
c1 = Context.create('a', 'kind1')
c2 = Context.create('b', 'kind1')
assert_context_invalid(Context.create_multi(c1, c2))

def test_multi_builder_with_duplicate_kind(self):
c1 = Context.create('a', 'kind1')
c2 = Context.create('b', 'kind1')
assert_context_invalid(Context.multi_builder().add(c1).add(c2).build())

def test_create_multi_with_invalid_context(self):
c1 = Context.create('a', 'kind1')
c2 = Context.create('')
assert_context_invalid(Context.create_multi(c1, c2))

def test_multi_builder_with_invalid_context(self):
c1 = Context.create('a', 'kind1')
c2 = Context.create('')
assert_context_invalid(Context.multi_builder().add(c1).add(c2).build())


class TestAnonymousRedaction:
def test_redacting_anonoymous_leads_to_invalid_context(self):
original = Context.builder('a').anonymous(True).build()
c = original.without_anonymous_contexts()

assert_context_invalid(c)

def test_redacting_non_anonymous_does_not_change_context(self):
original = Context.builder('a').anonymous(False).build()
c = original.without_anonymous_contexts()

assert_context_valid(c)
assert c == original

def test_can_find_non_anonymous_contexts_from_multi(self):
anon = Context.builder('a').anonymous(True).build()
nonanon = Context.create('b', 'kind2')
mc = Context.create_multi(anon, nonanon)

filtered = mc.without_anonymous_contexts()

assert_context_valid(filtered)
assert filtered.individual_context_count == 1
assert filtered.key == 'b'
assert filtered.kind == 'kind2'

def test_can_filter_all_from_multi(self):
a = Context.builder('a').anonymous(True).build()
b = Context.builder('b').anonymous(True).build()
mc = Context.create_multi(a, b)

filtered = mc.without_anonymous_contexts()

assert_context_invalid(filtered)
Loading