Skip to content

Commit afb7486

Browse files
authored
strict init timeout and some other fixes (#363)
- detailed reason return enum value instead of the enum - remove uninitialized check that returns uninitialized eval details. this early return omits possible info about local override/unsupported config - new option overall_init_timeout to time the entire init process - moved anything that can throw during init to the class's initialize so the class can still be constructed - lock on some flakey unit tests - add timeout to initialization details and error logs - kong: statsig-io/kong#2786
1 parent 33e8166 commit afb7486

14 files changed

+141
-83
lines changed

statsig/evaluation_details.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,5 @@ def __init__(self, config_sync_time: int, init_time: int,
3535

3636
def detailed_reason(self):
3737
if self.reason == EvaluationReason.none:
38-
return f"{self.source}"
39-
return f"{self.source}:{self.reason}"
38+
return f"{self.source.value}"
39+
return f"{self.source.value}:{self.reason.value}"

statsig/evaluator.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,14 @@ class _Evaluator:
2121
def __init__(self, spec_store: _SpecStore):
2222
self._spec_store = spec_store
2323

24-
self._country_lookup = CountryLookup()
24+
self._country_lookup: Optional[CountryLookup] = None
2525
self._gate_overrides: Dict[str, dict] = {}
2626
self._config_overrides: Dict[str, dict] = {}
2727
self._layer_overrides: Dict[str, dict] = {}
2828

29+
def initialize(self):
30+
self._country_lookup = CountryLookup()
31+
2932
def override_gate(self, gate, value, user_id=None):
3033
gate_overrides = self._gate_overrides.get(gate)
3134
if gate_overrides is None:
@@ -195,9 +198,6 @@ def check_gate(self, user, gate, end_result=None, is_nested=False):
195198
if override is not None:
196199
return override
197200

198-
if self._spec_store.init_source == DataSource.UNINITIALIZED:
199-
return _ConfigEvaluation(
200-
evaluation_details=self._create_evaluation_details())
201201
eval_gate = self._spec_store.get_gate(gate)
202202
if eval_gate is None:
203203
globals.logger.debug(f"Gate {gate} not found in the store. Are you sure the gate name is correct?")
@@ -212,10 +212,6 @@ def get_config(self, user, config_name, is_exp=False):
212212
if override is not None:
213213
return override
214214

215-
if self._spec_store.init_source == DataSource.UNINITIALIZED:
216-
return _ConfigEvaluation(
217-
evaluation_details=self._create_evaluation_details())
218-
219215
eval_config = self._spec_store.get_config(config_name)
220216
if eval_config is None:
221217
globals.logger.debug(
@@ -231,10 +227,6 @@ def get_layer(self, user, layer_name):
231227
if override is not None:
232228
return override
233229

234-
if self._spec_store.init_source == DataSource.UNINITIALIZED:
235-
return _ConfigEvaluation(evaluation_details=
236-
self._create_evaluation_details())
237-
238230
eval_layer = self._spec_store.get_layer(layer_name)
239231
if eval_layer is None:
240232
globals.logger.debug(f"Layer {layer_name} not found in the store. Are you sure the layer name is correct?")
@@ -376,6 +368,8 @@ def __evaluate_condition(self, user, condition, end_result):
376368
if value is None:
377369
ip = self.__get_from_user(user, "ip")
378370
if ip is not None and field == "country":
371+
if not self._country_lookup:
372+
self._country_lookup = CountryLookup()
379373
value = self._country_lookup.lookupStr(ip)
380374
if value is None:
381375
return False

statsig/spec_store.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from . import globals
88
from .constants import Const
9-
from .diagnostics import Context, Diagnostics, Marker
9+
from .diagnostics import Diagnostics, Marker
1010
from .evaluation_details import EvaluationReason, DataSource
1111
from .sdk_configs import _SDK_Configs
1212
from .spec_updater import SpecUpdater
@@ -83,7 +83,6 @@ def initialize(self):
8383
)
8484

8585
self.spec_updater.download_id_lists(for_initialize=True)
86-
8786
self.spec_updater.start_background_threads()
8887
self.spec_updater.initialized = True
8988

@@ -343,13 +342,6 @@ def _log_process(self, msg, process=None):
343342
process = "Initialize" if not self.spec_updater.initialized else "Config Sync"
344343
globals.logger.log_process(process, msg)
345344

346-
def _get_current_context(self):
347-
return (
348-
Context.INITIALIZE
349-
if not self.spec_updater.initialized
350-
else Context.CONFIG_SYNC
351-
)
352-
353345
def _get_initialize_strategy(self) -> List[DataSource]:
354346
try:
355347
if self._options.initialize_sources is not None:

statsig/spec_updater.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def __init__(
4343
self._statsig_metadata = statsig_metadata
4444
self._background_download_configs = None
4545
self._background_download_id_lists = None
46-
self._config_sync_strategies = self._get_sync_dcs_strategies()
46+
self._config_sync_strategies = [DataSource.NETWORK]
4747
self._dcs_process_lock = threading.Lock()
4848
if options.out_of_sync_threshold_in_s is not None:
4949
self._enforce_sync_fallback_threshold_in_ms: Optional[float] = options.out_of_sync_threshold_in_s * 1000
@@ -56,6 +56,7 @@ def __init__(
5656
self.dcs_listener: Optional[Callable] = None
5757
self.id_lists_listener: Optional[Callable] = None
5858
self.data_adapter = data_adapter
59+
self._config_sync_strategies = self._get_sync_dcs_strategies()
5960

6061
def get_config_spec(self, source: DataSource, for_initialize=False):
6162
try:

statsig/statsig_context.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@ class InitContext:
1010
error: Optional[Exception]
1111
source: DataSource
1212
store_populated: bool
13+
timed_out: bool
1314

1415
def __init__(self):
1516
self.start_time = int(time.time() * 1000)
1617
self.success = False
1718
self.error = None
1819
self.source = DataSource.UNINITIALIZED
1920
self.store_populated = False
21+
self.timed_out = False

statsig/statsig_errors.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,7 @@ class StatsigRuntimeError(RuntimeError):
88

99
class StatsigNameError(NameError):
1010
pass
11+
12+
13+
class StatsigTimeoutError(TimeoutError):
14+
pass

statsig/statsig_network.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def __init__(
8080
):
8181
self.sdk_key = sdk_key
8282
self.error_boundary = error_boundary
83-
self.statsig_options = options
83+
self.options = options
8484
self.diagnostics = diagnostics
8585
self.shutdown_event = shutdown_event
8686
self.statsig_metadata = statsig_metadata
@@ -91,7 +91,7 @@ def __init__(
9191
self.id_list_worker: IStatsigNetworkWorker = defaultHttpWorker
9292
self.log_event_worker: IStatsigNetworkWorker = defaultHttpWorker
9393
self.http_worker: IStatsigNetworkWorker = defaultHttpWorker
94-
for endpoint, config in options.proxy_configs.items():
94+
for endpoint, config in self.options.proxy_configs.items():
9595
protocol = config.protocol
9696
if protocol == NetworkProtocol.GRPC:
9797
self.load_grpc_worker(endpoint, config)
@@ -107,7 +107,7 @@ def load_grpc_websocket_worker(self, endpoint: NetworkEndpoint, config: ProxyCon
107107
grpc_webhook_worker = grpc_webhook_worker_class(
108108
self.sdk_key,
109109
config,
110-
self.statsig_options,
110+
self.options,
111111
self.error_boundary,
112112
self.diagnostics,
113113
self.shutdown_event,
@@ -156,7 +156,7 @@ def get_dcs(
156156
log_on_exception: Optional[bool] = False,
157157
init_timeout: Optional[int] = None,
158158
):
159-
if self.statsig_options.local_mode:
159+
if self.options.local_mode:
160160
globals.logger.warning("Local mode is enabled. Not fetching DCS.")
161161
return
162162
self.dcs_worker.get_dcs(on_complete, since_time, log_on_exception, init_timeout)
@@ -168,14 +168,14 @@ def get_dcs_fallback(
168168
log_on_exception: Optional[bool] = False,
169169
init_timeout: Optional[int] = None,
170170
):
171-
if self.statsig_options.local_mode:
171+
if self.options.local_mode:
172172
globals.logger.warning("Local mode is enabled. Not fetching DCS with fallback.")
173173
return
174-
dcs_proxy = self.statsig_options.proxy_configs.get(NetworkEndpoint.DOWNLOAD_CONFIG_SPECS)
174+
dcs_proxy = self.options.proxy_configs.get(NetworkEndpoint.DOWNLOAD_CONFIG_SPECS)
175175
is_proxy_dcs = (
176176
dcs_proxy
177177
and dcs_proxy.proxy_address != STATSIG_CDN
178-
or self.statsig_options.api_for_download_config_specs != STATSIG_CDN
178+
or self.options.api_for_download_config_specs != STATSIG_CDN
179179
)
180180
if is_proxy_dcs:
181181
self.http_worker.get_dcs_fallback(on_complete, since_time, log_on_exception, init_timeout)
@@ -186,7 +186,7 @@ def get_id_lists(
186186
log_on_exception: Optional[bool] = False,
187187
init_timeout: Optional[int] = None,
188188
):
189-
if self.statsig_options.local_mode:
189+
if self.options.local_mode:
190190
globals.logger.warning("Local mode is enabled. Not fetching ID Lists.")
191191
return
192192
self.id_list_worker.get_id_lists(on_complete, log_on_exception, init_timeout)
@@ -197,42 +197,42 @@ def get_id_lists_fallback(
197197
log_on_exception: Optional[bool] = False,
198198
init_timeout: Optional[int] = None,
199199
):
200-
if self.statsig_options.local_mode:
200+
if self.options.local_mode:
201201
globals.logger.warning("Local mode is enabled. Not fetching ID Lists with fallback.")
202202
return
203-
if not self.statsig_options.fallback_to_statsig_api:
203+
if not self.options.fallback_to_statsig_api:
204204
return
205-
id_list_proxy = self.statsig_options.proxy_configs.get(
205+
id_list_proxy = self.options.proxy_configs.get(
206206
NetworkEndpoint.GET_ID_LISTS
207207
)
208-
id_list_api_override = self.statsig_options.api_for_get_id_lists
208+
id_list_api_override = self.options.api_for_get_id_lists
209209
is_id_lists_proxy = id_list_api_override != STATSIG_API or (
210210
id_list_proxy and id_list_proxy.proxy_address != STATSIG_API)
211211
if is_id_lists_proxy:
212212
self.http_worker.get_id_lists_fallback(on_complete, log_on_exception, init_timeout)
213213

214214
def get_id_list(self, on_complete: Any, url, headers, log_on_exception=False):
215-
if self.statsig_options.local_mode:
215+
if self.options.local_mode:
216216
globals.logger.warning("Local mode is enabled. Not fetching ID List.")
217217
return
218218
self.http_worker.get_id_list(on_complete, url, headers, log_on_exception)
219219

220220
def log_events(self, payload, headers=None, log_on_exception=False, retry=0):
221-
if self.statsig_options.local_mode:
221+
if self.options.local_mode:
222222
globals.logger.warning("Local mode is enabled. Not logging events.")
223223
return None
224224
return self.log_event_worker.log_events(
225225
payload, headers=headers, log_on_exception=log_on_exception, retry=retry
226226
)
227227

228228
def listen_for_dcs(self, listeners: IStreamingListeners, fallback: Callable):
229-
if self.statsig_options.local_mode:
229+
if self.options.local_mode:
230230
globals.logger.warning("Local mode is enabled. Not listening for DCS.")
231231
return
232232
if isinstance(self.dcs_worker, IStatsigWebhookWorker):
233233
self.dcs_worker.start_listen_for_config_spec(listeners)
234234
interval = (
235-
self.statsig_options.rulesets_sync_interval
235+
self.options.rulesets_sync_interval
236236
or DEFAULT_RULESET_SYNC_INTERVAL
237237
)
238238
callbacks = StreamingFallback(
@@ -244,7 +244,7 @@ def listen_for_dcs(self, listeners: IStreamingListeners, fallback: Callable):
244244
self.dcs_worker.register_fallback_cb(callbacks)
245245

246246
def listen_for_id_lists(self, listeners: IStreamingListeners):
247-
if self.statsig_options.local_mode:
247+
if self.options.local_mode:
248248
globals.logger.warning("Local mode is enabled. Not listening for ID Lists.")
249249
return
250250
if isinstance(self.id_list_worker, IStatsigWebhookWorker):

statsig/statsig_options.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ def __init__(
107107
initialize_sources: Optional[List[DataSource]] = None,
108108
config_sync_sources: Optional[List[DataSource]] = None,
109109
output_logger_level: Optional[LogLevel] = LogLevel.WARNING,
110+
overall_init_timeout: Optional[float] = None,
110111
):
111112
self.data_store = data_store
112113
self._environment: Union[None, dict] = None
@@ -152,6 +153,7 @@ def __init__(
152153
self.initialize_sources = initialize_sources
153154
self.config_sync_sources = config_sync_sources
154155
self.output_logger_level = output_logger_level
156+
self.overall_init_timeout = overall_init_timeout
155157
self._logging_copy: Dict[str, Any] = {}
156158
self._set_logging_copy()
157159
self._attributes_changed = False
@@ -218,5 +220,7 @@ def _set_logging_copy(self):
218220
logging_copy["event_queue_size"] = self.event_queue_size
219221
if self.retry_queue_size != DEFAULT_RETRY_QUEUE_SIZE:
220222
logging_copy["retry_queue_size"] = self.retry_queue_size
223+
if self.overall_init_timeout is not None:
224+
logging_copy["overall_init_timeout"] = self.overall_init_timeout
221225
self._logging_copy = logging_copy
222226
self._attributes_changed = False

statsig/statsig_server.py

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import dataclasses
22
import threading
33
import time
4+
from concurrent.futures import ThreadPoolExecutor
45
from typing import Optional, Union, Tuple
56

67
from . import globals
@@ -15,7 +16,7 @@
1516
from .spec_store import _SpecStore, EntityType
1617
from .statsig_context import InitContext
1718
from .statsig_error_boundary import _StatsigErrorBoundary
18-
from .statsig_errors import StatsigNameError, StatsigRuntimeError, StatsigValueError
19+
from .statsig_errors import StatsigNameError, StatsigRuntimeError, StatsigValueError, StatsigTimeoutError
1920
from .statsig_event import StatsigEvent
2021
from .statsig_logger import _StatsigLogger
2122
from .statsig_metadata import _StatsigMetadata
@@ -37,14 +38,16 @@ class InitializeDetails:
3738
init_success: bool
3839
store_populated: bool
3940
error: Optional[Exception]
41+
timed_out: bool
4042

4143
def __init__(self, duration: int, source: str, init_success: bool, store_populated: bool,
42-
error: Optional[Exception]):
44+
error: Optional[Exception], timed_out: bool = False):
4345
self.duration = duration
4446
self.source = source
4547
self.init_success = init_success
4648
self.error = error
4749
self.store_populated = store_populated
50+
self.timed_out = timed_out
4851

4952

5053
class StatsigServer:
@@ -61,6 +64,7 @@ class StatsigServer:
6164
_evaluator: _Evaluator
6265

6366
def __init__(self) -> None:
67+
self._executor = ThreadPoolExecutor(max_workers=1)
6468
self._initialized = False
6569

6670
self._errorBoundary = _StatsigErrorBoundary()
@@ -109,12 +113,15 @@ def _post_init_logging(self, options: StatsigOptions, init_details: InitializeDe
109113
globals.logger.error(
110114
"Statsig SDK instance initialized, but config store is not populated. The SDK is using default values for evaluation.")
111115
else:
112-
globals.logger.error("Statsig SDK instance Initialized failed!")
116+
if init_details.timed_out:
117+
globals.logger.error("Statsig SDK instance initialization timed out.")
118+
else:
119+
globals.logger.error("Statsig SDK instance Initialized failed!")
113120

114121
def _initialize_impl(self, sdk_key: str, options: Optional[StatsigOptions]):
115122
threw_error = False
123+
init_context = InitContext()
116124
try:
117-
init_context = InitContext()
118125
diagnostics = Diagnostics()
119126
diagnostics.add_marker(Marker().overall().start())
120127

@@ -157,13 +164,28 @@ def _initialize_impl(self, sdk_key: str, options: Optional[StatsigOptions]):
157164
self._evaluator = _Evaluator(self._spec_store)
158165
self._sampling_key_set = TTLSet(self.__shutdown_event)
159166

160-
self._spec_store.initialize()
161-
self._initialized = True
167+
init_timeout = options.overall_init_timeout
168+
if init_timeout is not None:
169+
future = self._executor.submit(self._initialize_instance)
170+
try:
171+
future.result(timeout=init_timeout)
172+
except Exception as e:
173+
raise StatsigTimeoutError() from e
174+
else:
175+
self._initialize_instance()
162176

163177
except (StatsigValueError, StatsigNameError, StatsigRuntimeError) as e:
164178
threw_error = True
165179
raise e
166180

181+
except StatsigTimeoutError:
182+
globals.logger.warning(
183+
"Statsig SDK instance initialization timed out. Initializing store in the background.")
184+
self._executor.submit(self._initialize_instance)
185+
threw_error = True
186+
init_context.timed_out = True
187+
self._initialized = True
188+
167189
except Exception as e:
168190
threw_error = True
169191
self._errorBoundary.log_exception("initialize", e)
@@ -176,7 +198,13 @@ def _initialize_impl(self, sdk_key: str, options: Optional[StatsigOptions]):
176198
init_context.success = not threw_error
177199

178200
return InitializeDetails(int(time.time() * 1000) - init_context.start_time, init_context.source,
179-
init_context.success, init_context.store_populated, init_context.error)
201+
init_context.success, init_context.store_populated, init_context.error,
202+
init_context.timed_out)
203+
204+
def _initialize_instance(self):
205+
self._evaluator.initialize()
206+
self._spec_store.initialize()
207+
self._initialized = True
180208

181209
def is_store_populated(self):
182210
try:
@@ -423,6 +451,7 @@ def flush(self):
423451

424452
def shutdown(self):
425453
def task():
454+
globals.logger.info("Shutting down Statsig SDK instance.")
426455
self.__shutdown_event.set()
427456
self._logger.shutdown()
428457
self._spec_store.shutdown()

0 commit comments

Comments
 (0)