Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OPIK-277] [SDK] Flaky unit-tests on Windows #846

Merged
merged 4 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ async def ascore(
def _parse_model_output(self, content: str) -> score_result.ScoreResult:
try:
dict_content = json.loads(content)
score: float = dict_content["context_precision_score"]
score: float = float(dict_content["context_precision_score"])

if not (0.0 <= score <= 1.0):
score = 0.5
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ async def ascore(
def _parse_model_output(self, content: str) -> score_result.ScoreResult:
try:
dict_content = json.loads(content)
score: float = dict_content["context_recall_score"]
score: float = float(dict_content["context_recall_score"])

if not (0.0 <= score <= 1.0):
score = 0.5
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ async def ascore(
def _parse_model_output(self, content: str) -> score_result.ScoreResult:
try:
dict_content = json.loads(content)
score = dict_content["score"]
score = float(dict_content["score"])
return score_result.ScoreResult(
name=self.name,
value=score,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ async def ascore(
def _parse_model_output(self, content: str) -> score_result.ScoreResult:
try:
dict_content = json.loads(content)
score: float = dict_content["score"]
score: float = float(dict_content["score"])

if not (0.0 <= score <= 1.0):
score = 0.5
Expand Down
66 changes: 48 additions & 18 deletions sdks/python/tests/unit/decorator/test_tracker_outputs.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
import mock
import threading
import asyncio
import threading
from typing import Dict

import mock
import pytest
from opik.decorator import tracker
from opik import context_storage, opik_context
from opik.api_objects import opik_client
from opik.api_objects import trace

from opik import context_storage, opik_context
from opik.api_objects import opik_client, trace
from opik.decorator import tracker
from ...testlib import (
SpanModel,
TraceModel,
FeedbackScoreModel,
ANY_BUT_NONE,
ANY_STRING,
FeedbackScoreModel,
SpanModel,
TraceModel,
assert_equal,
)

Expand Down Expand Up @@ -477,12 +478,23 @@ async def async_f(x):


def test_track__nested_calls_in_separate_threads__3_traces_in_result(fake_backend):
ID_STORAGE: Dict[str, str] = {}

@tracker.track(capture_output=True)
def f_inner(y, thread_id):
ID_STORAGE[f"f_inner-trace-id-{thread_id}"] = (
opik_context.get_current_trace_data().id
)
ID_STORAGE[f"f_inner-span-id-{thread_id}"] = (
opik_context.get_current_span_data().id
)
return f"inner-output-from-{thread_id}"

@tracker.track(capture_output=True)
def f_outer(x):
ID_STORAGE["f_outer-trace-id"] = opik_context.get_current_trace_data().id
ID_STORAGE["f_outer-span-id"] = opik_context.get_current_span_data().id

t1 = threading.Thread(target=f_inner, args=("inner-input-1", "thread-1"))
t2 = threading.Thread(target=f_inner, args=("inner-input-2", "thread-2"))
t1.start()
Expand All @@ -497,15 +509,15 @@ def f_outer(x):

EXPECTED_TRACE_TREES = [
TraceModel(
id=ANY_BUT_NONE,
id=ID_STORAGE["f_outer-trace-id"],
name="f_outer",
input={"x": "outer-input"},
output={"output": "outer-output"},
start_time=ANY_BUT_NONE,
end_time=ANY_BUT_NONE,
spans=[
SpanModel(
id=ANY_BUT_NONE,
id=ID_STORAGE["f_outer-span-id"],
name="f_outer",
input={"x": "outer-input"},
output={"output": "outer-output"},
Expand All @@ -516,15 +528,15 @@ def f_outer(x):
],
),
TraceModel(
id=ANY_BUT_NONE,
id=ID_STORAGE["f_inner-trace-id-thread-1"],
name="f_inner",
input={"y": "inner-input-1", "thread_id": "thread-1"},
output={"output": "inner-output-from-thread-1"},
start_time=ANY_BUT_NONE,
end_time=ANY_BUT_NONE,
spans=[
SpanModel(
id=ANY_BUT_NONE,
id=ID_STORAGE["f_inner-span-id-thread-1"],
name="f_inner",
input={"y": "inner-input-1", "thread_id": "thread-1"},
output={"output": "inner-output-from-thread-1"},
Expand All @@ -535,15 +547,15 @@ def f_outer(x):
],
),
TraceModel(
id=ANY_BUT_NONE,
id=ID_STORAGE["f_inner-trace-id-thread-2"],
name="f_inner",
input={"y": "inner-input-2", "thread_id": "thread-2"},
output={"output": "inner-output-from-thread-2"},
start_time=ANY_BUT_NONE,
end_time=ANY_BUT_NONE,
spans=[
SpanModel(
id=ANY_BUT_NONE,
id=ID_STORAGE["f_inner-span-id-thread-2"],
name="f_inner",
input={"y": "inner-input-2", "thread_id": "thread-2"},
output={"output": "inner-output-from-thread-2"},
Expand All @@ -557,9 +569,27 @@ def f_outer(x):

assert len(fake_backend.trace_trees) == 3

assert_equal(EXPECTED_TRACE_TREES[0], fake_backend.trace_trees[0])
assert_equal(EXPECTED_TRACE_TREES[1], fake_backend.trace_trees[1])
assert_equal(EXPECTED_TRACE_TREES[2], fake_backend.trace_trees[2])
trace_outer = EXPECTED_TRACE_TREES[0]
trace_inner_thread1 = EXPECTED_TRACE_TREES[1]
trace_inner_thread2 = EXPECTED_TRACE_TREES[2]

trace_backend_outer = [
trace for trace in fake_backend.trace_trees if trace.id == trace_outer.id
][0]
trace_backend_inner_thread1 = [
trace
for trace in fake_backend.trace_trees
if trace.id == trace_inner_thread1.id
][0]
trace_backend_inner_thread2 = [
trace
for trace in fake_backend.trace_trees
if trace.id == trace_inner_thread2.id
][0]

assert_equal(expected=trace_outer, actual=trace_backend_outer)
assert_equal(expected=trace_inner_thread1, actual=trace_backend_inner_thread1)
assert_equal(expected=trace_inner_thread2, actual=trace_backend_inner_thread2)


def test_track__single_generator_function_tracked__generator_exhausted__happyflow(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_flushing_thread__batcher_is_flushed__every_time_flush_interval_time_pas
batcher.add("some-value-to-make-batcher-not-empty")
flush_callback.assert_not_called()

time.sleep(FLUSH_INTERVAL + 0.01)
time.sleep(FLUSH_INTERVAL + 0.1)
# flush interval has passed after batcher was created, batcher is ready to be flushed
# (0.1 is added because thread probation interval is 0.1 and it's already made it first check)
flush_callback.assert_called_once()
Expand Down
Loading