From 537bdb3d27da595925a8c550df4bc2da07759038 Mon Sep 17 00:00:00 2001 From: Buddy Williams Date: Fri, 18 Jun 2021 08:48:15 -0400 Subject: [PATCH 01/13] wip: pagination for persons on clickhouse funnels --- ee/clickhouse/sql/funnels/funnel.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/ee/clickhouse/sql/funnels/funnel.py b/ee/clickhouse/sql/funnels/funnel.py index 7201a8070759c..6f467d76edc00 100644 --- a/ee/clickhouse/sql/funnels/funnel.py +++ b/ee/clickhouse/sql/funnels/funnel.py @@ -22,3 +22,32 @@ ORDER BY max_step {top_level_groupby} ASC ; """ + +FUNNEL_PERSONS_SQL = """ +SELECT max_step {top_level_groupby}, count(1) as cnt, id +FROM +( + SELECT + pid.person_id as id, + {extra_select} + windowFunnel({within_time})(toUInt64(toUnixTimestamp64Micro(timestamp)), + {steps} + ) as max_step + FROM + events + JOIN ( + SELECT person_id, distinct_id FROM ({latest_distinct_id_sql}) WHERE team_id = %(team_id)s + ) as pid + ON pid.distinct_id = events.distinct_id + WHERE + team_id = %(team_id)s {filters} {parsed_date_from} {parsed_date_to} + AND event IN %(events)s + GROUP BY pid.person_id {extra_groupby} +) +WHERE max_step = {current_step} +GROUP BY max_step, id {top_level_groupby} +ORDER BY max_step {top_level_groupby} ASC +limit 50 +offset {offset} +; +""" From 605f9def9ef204092dacb4f68f67f8fb7b8c8f19 Mon Sep 17 00:00:00 2001 From: Buddy Williams Date: Sat, 19 Jun 2021 09:57:40 -0400 Subject: [PATCH 02/13] wip: added offset support for getting a list of persons; added support for conversion window; --- ee/clickhouse/queries/clickhouse_funnel.py | 38 +++++--- ee/clickhouse/queries/test/test_funnel.py | 88 ++++++++++++++++++- ee/clickhouse/sql/funnels/funnel.py | 9 +- .../filters/mixins/funnel_window_days.py | 9 +- 4 files changed, 125 insertions(+), 19 deletions(-) diff --git a/ee/clickhouse/queries/clickhouse_funnel.py b/ee/clickhouse/queries/clickhouse_funnel.py index c7daac8ebc220..8bc7f51f40f11 100644 --- a/ee/clickhouse/queries/clickhouse_funnel.py +++ b/ee/clickhouse/queries/clickhouse_funnel.py @@ -7,9 +7,10 @@ from ee.clickhouse.queries.clickhouse_funnel_base import ClickhouseFunnelBase from ee.clickhouse.queries.clickhouse_funnel_trends import ClickhouseFunnelTrends from ee.clickhouse.queries.util import parse_timestamps -from ee.clickhouse.sql.funnels.funnel import FUNNEL_SQL +from ee.clickhouse.sql.funnels.funnel import FUNNEL_PERSONS_SQL, FUNNEL_SQL from ee.clickhouse.sql.person import GET_LATEST_PERSON_DISTINCT_ID_SQL from posthog.constants import TRENDS_LINEAR +from posthog.models.filters.mixins.funnel_window_days import FunnelWindowDaysMixin from posthog.utils import relative_date_parse @@ -24,6 +25,9 @@ def run(self, *args, **kwargs) -> List[Dict[str, Any]]: # Format of this is [step order, person count (that reached that step), array of person uuids] results = self._exec_query() + if self._filter.offset > 0: + return results + steps = [] relevant_people = [] total_people = 0 @@ -60,16 +64,24 @@ def _exec_query(self) -> List[Tuple]: ) self.params.update(prop_filter_params) steps = [self._build_steps_query(entity, index) for index, entity in enumerate(self._filter.entities)] - query = FUNNEL_SQL.format( - team_id=self._team.id, - steps=", ".join(steps), - filters=prop_filters.replace("uuid IN", "events.uuid IN", 1), - parsed_date_from=parsed_date_from, - parsed_date_to=parsed_date_to, - top_level_groupby="", - extra_select="", - extra_groupby="", - within_time="6048000000000000", - latest_distinct_id_sql=GET_LATEST_PERSON_DISTINCT_ID_SQL, - ) + + format_properties = { + "team_id": self._team.id, + "steps": ", ".join(steps), + "filters": prop_filters.replace("uuid IN", "events.uuid IN", 1), + "parsed_date_from": parsed_date_from, + "parsed_date_to": parsed_date_to, + "top_level_groupby": "", + "extra_select": "", + "extra_groupby": "", + "within_time": FunnelWindowDaysMixin.microseconds_from_days(self._filter.funnel_window_days), + "latest_distinct_id_sql": GET_LATEST_PERSON_DISTINCT_ID_SQL, + "offset": self._filter.offset, + } + + if self._filter.offset == 0: + query = FUNNEL_SQL.format(**format_properties) + else: + query = FUNNEL_PERSONS_SQL.format(**format_properties) + return sync_execute(query, self.params) diff --git a/ee/clickhouse/queries/test/test_funnel.py b/ee/clickhouse/queries/test/test_funnel.py index 60931793f7de0..b084bd52ca121 100644 --- a/ee/clickhouse/queries/test/test_funnel.py +++ b/ee/clickhouse/queries/test/test_funnel.py @@ -1,11 +1,20 @@ +from datetime import datetime from uuid import uuid4 from ee.clickhouse.models.event import create_event from ee.clickhouse.queries.clickhouse_funnel import ClickhouseFunnel from ee.clickhouse.util import ClickhouseTestMixin +from posthog.constants import INSIGHT_FUNNELS, TRENDS_FUNNEL +from posthog.models import Filter +from posthog.models.filters.mixins.funnel_window_days import FunnelWindowDaysMixin from posthog.models.person import Person from posthog.queries.test.test_funnel import funnel_test_factory +FORMAT_TIME = "%Y-%m-%d 00:00:00" +MAX_STEP_COLUMN = 0 +COUNT_COLUMN = 1 +PERSON_ID_COLUMN = 2 + def _create_person(**kwargs): person = Person.objects.create(**kwargs) @@ -18,4 +27,81 @@ def _create_event(**kwargs): class TestFunnel(ClickhouseTestMixin, funnel_test_factory(ClickhouseFunnel, _create_event, _create_person)): # type: ignore - pass + def setUp(self): + self._create_sample_data() + super().setUp() + + def _create_sample_data(self): + for i in range(250): + _create_person(distinct_ids=[f"user_{i}"], team=self.team) + _create_event(event="step one", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:00:00") + _create_event(event="step two", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-03 00:00:00") + _create_event(event="step three", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-05 00:00:00") + + def test_basic_offset(self): + data = { + "insight": INSIGHT_FUNNELS, + "display": TRENDS_FUNNEL, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 00:00:00", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + filter = Filter(data=data) + results = ClickhouseFunnel(filter, self.team)._exec_query() + self.assertEqual(1, len(results)) + self.assertEqual(3, results[0][MAX_STEP_COLUMN]) + self.assertEqual(250, results[0][COUNT_COLUMN]) + self.assertEqual(100, len(results[0][PERSON_ID_COLUMN])) + + filter_offset = Filter(data={**data, "offset": 100,}) + results = ClickhouseFunnel(filter_offset, self.team).run() + self.assertEqual(100, len(results)) + + filter_offset = Filter(data={**data, "offset": 200,}) + results = ClickhouseFunnel(filter_offset, self.team).run() + self.assertEqual(50, len(results)) + + def test_funnel_window_days_to_microseconds(self): + one_day = FunnelWindowDaysMixin.microseconds_from_days(1) + two_days = FunnelWindowDaysMixin.microseconds_from_days(2) + three_days = FunnelWindowDaysMixin.microseconds_from_days(3) + + self.assertEqual(86400000000, one_day) + self.assertEqual(172800000000, two_days) + self.assertEqual(259200000000, three_days) + + def test_basic_conversion_window(self): + data = { + "insight": INSIGHT_FUNNELS, + "display": TRENDS_FUNNEL, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 00:00:00", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + filter = Filter(data={**data, "funnel_window_days": 1,}) + results = ClickhouseFunnel(filter, self.team)._exec_query() + self.assertEqual(1, len(results)) + self.assertEqual(1, results[0][MAX_STEP_COLUMN]) + self.assertEqual(250, results[0][COUNT_COLUMN]) + self.assertEqual(100, len(results[0][PERSON_ID_COLUMN])) + + filter = Filter(data={**data, "funnel_window_days": 2,}) + results = ClickhouseFunnel(filter, self.team)._exec_query() + self.assertEqual(1, len(results)) + self.assertEqual(2, results[0][MAX_STEP_COLUMN]) + self.assertEqual(250, results[0][COUNT_COLUMN]) + self.assertEqual(100, len(results[0][PERSON_ID_COLUMN])) diff --git a/ee/clickhouse/sql/funnels/funnel.py b/ee/clickhouse/sql/funnels/funnel.py index 6f467d76edc00..37d2c831b2563 100644 --- a/ee/clickhouse/sql/funnels/funnel.py +++ b/ee/clickhouse/sql/funnels/funnel.py @@ -40,14 +40,17 @@ ) as pid ON pid.distinct_id = events.distinct_id WHERE - team_id = %(team_id)s {filters} {parsed_date_from} {parsed_date_to} + team_id = %(team_id)s + {filters} + {parsed_date_from} + {parsed_date_to} AND event IN %(events)s GROUP BY pid.person_id {extra_groupby} ) -WHERE max_step = {current_step} +WHERE max_step > 0 GROUP BY max_step, id {top_level_groupby} ORDER BY max_step {top_level_groupby} ASC -limit 50 +limit 100 offset {offset} ; """ diff --git a/posthog/models/filters/mixins/funnel_window_days.py b/posthog/models/filters/mixins/funnel_window_days.py index 460cf534b1a19..07e3487ae2705 100644 --- a/posthog/models/filters/mixins/funnel_window_days.py +++ b/posthog/models/filters/mixins/funnel_window_days.py @@ -15,5 +15,10 @@ def funnel_window_days_to_dict(self): @staticmethod def milliseconds_from_days(days): - second, minute, hour, day = [1000, 60, 60, 24] - return second * minute * hour * day * days + milliseconds, seconds, minutes, hours = [1000, 60, 60, 24] + return milliseconds * seconds * minutes * hours * days + + @staticmethod + def microseconds_from_days(days): + microseconds = 1000 + return microseconds * FunnelWindowDaysMixin.milliseconds_from_days(days) From a9a5f126a899e74ea149667e6556d3181305837a Mon Sep 17 00:00:00 2001 From: Buddy Williams Date: Sat, 19 Jun 2021 14:23:12 -0400 Subject: [PATCH 03/13] fixed mypy exception --- ee/clickhouse/queries/clickhouse_funnel.py | 6 +++--- posthog/queries/base.py | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/ee/clickhouse/queries/clickhouse_funnel.py b/ee/clickhouse/queries/clickhouse_funnel.py index 8bc7f51f40f11..30ecc0b8b9c2a 100644 --- a/ee/clickhouse/queries/clickhouse_funnel.py +++ b/ee/clickhouse/queries/clickhouse_funnel.py @@ -1,8 +1,8 @@ -from typing import Any, Dict, List, Tuple +from typing import List, Tuple from django.utils import timezone -from ee.clickhouse.client import format_sql, sync_execute +from ee.clickhouse.client import sync_execute from ee.clickhouse.models.property import parse_prop_clauses from ee.clickhouse.queries.clickhouse_funnel_base import ClickhouseFunnelBase from ee.clickhouse.queries.clickhouse_funnel_trends import ClickhouseFunnelTrends @@ -15,7 +15,7 @@ class ClickhouseFunnel(ClickhouseFunnelBase): - def run(self, *args, **kwargs) -> List[Dict[str, Any]]: + def run(self, *args, **kwargs): if len(self._filter.entities) == 0: return [] diff --git a/posthog/queries/base.py b/posthog/queries/base.py index 37b854af0a85d..1f155e8cc88b0 100644 --- a/posthog/queries/base.py +++ b/posthog/queries/base.py @@ -1,5 +1,3 @@ -import copy -from datetime import timedelta from typing import Any, Callable, Dict, List, Optional, Union, cast from dateutil.relativedelta import relativedelta From e9523088b5f369c5782e33ef29d6bcc0cb707eb9 Mon Sep 17 00:00:00 2001 From: Buddy Williams Date: Sun, 20 Jun 2021 08:19:04 -0400 Subject: [PATCH 04/13] helper function to insert data for local testing --- ee/clickhouse/demo.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ee/clickhouse/demo.py b/ee/clickhouse/demo.py index f9fd1a9668019..fad3843d8227b 100644 --- a/ee/clickhouse/demo.py +++ b/ee/clickhouse/demo.py @@ -1,8 +1,10 @@ +import uuid from typing import Dict, List from uuid import uuid4 from ee.clickhouse.models.event import create_event from ee.clickhouse.models.session_recording_event import create_session_recording_event +from posthog.models import EventDefinition, Person, Team def bulk_create_events(events: List[Dict], **kw): @@ -13,3 +15,24 @@ def bulk_create_events(events: List[Dict], **kw): def bulk_create_session_recording_events(events: List[Dict], **kw): for data in events: create_session_recording_event(**data, **kw, uuid=uuid4()) # type: ignore + + +def insert_localdev_data(team_id=1, number=250): + team = Team.objects.get(id=team_id) + + EventDefinition.objects.get_or_create(team=team, name="step one") + EventDefinition.objects.get_or_create(team=team, name="step two") + EventDefinition.objects.get_or_create(team=team, name="step three") + EventDefinition.objects.get_or_create(team=team, name="step four") + EventDefinition.objects.get_or_create(team=team, name="step five") + + for i in range(number): + try: + Person.objects.create(distinct_ids=[f"user_{i}"], team=team) + except Exception as e: + print(str(e)) + create_event(uuid.uuid4(), "step one", team, f"user_{i}", "2021-05-01 00:00:00") + create_event(uuid.uuid4(), "step two", team, f"user_{i}", "2021-05-03 00:00:00") + create_event(uuid.uuid4(), "step three", team, f"user_{i}", "2021-05-05 00:00:00") + create_event(uuid.uuid4(), "step four", team, f"user_{i}", "2021-05-07 00:00:00") + create_event(uuid.uuid4(), "step five", team, f"user_{i}", "2021-05-09 00:00:00") From ceda12e3687219e12f03a20ecf88b602ab2a5e07 Mon Sep 17 00:00:00 2001 From: Buddy Williams Date: Sun, 20 Jun 2021 09:25:23 -0400 Subject: [PATCH 05/13] moved generate code into separate class for more functionality later --- ee/clickhouse/demo.py | 23 ------------- ee/clickhouse/generate_local.py | 58 +++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 23 deletions(-) create mode 100644 ee/clickhouse/generate_local.py diff --git a/ee/clickhouse/demo.py b/ee/clickhouse/demo.py index fad3843d8227b..f9fd1a9668019 100644 --- a/ee/clickhouse/demo.py +++ b/ee/clickhouse/demo.py @@ -1,10 +1,8 @@ -import uuid from typing import Dict, List from uuid import uuid4 from ee.clickhouse.models.event import create_event from ee.clickhouse.models.session_recording_event import create_session_recording_event -from posthog.models import EventDefinition, Person, Team def bulk_create_events(events: List[Dict], **kw): @@ -15,24 +13,3 @@ def bulk_create_events(events: List[Dict], **kw): def bulk_create_session_recording_events(events: List[Dict], **kw): for data in events: create_session_recording_event(**data, **kw, uuid=uuid4()) # type: ignore - - -def insert_localdev_data(team_id=1, number=250): - team = Team.objects.get(id=team_id) - - EventDefinition.objects.get_or_create(team=team, name="step one") - EventDefinition.objects.get_or_create(team=team, name="step two") - EventDefinition.objects.get_or_create(team=team, name="step three") - EventDefinition.objects.get_or_create(team=team, name="step four") - EventDefinition.objects.get_or_create(team=team, name="step five") - - for i in range(number): - try: - Person.objects.create(distinct_ids=[f"user_{i}"], team=team) - except Exception as e: - print(str(e)) - create_event(uuid.uuid4(), "step one", team, f"user_{i}", "2021-05-01 00:00:00") - create_event(uuid.uuid4(), "step two", team, f"user_{i}", "2021-05-03 00:00:00") - create_event(uuid.uuid4(), "step three", team, f"user_{i}", "2021-05-05 00:00:00") - create_event(uuid.uuid4(), "step four", team, f"user_{i}", "2021-05-07 00:00:00") - create_event(uuid.uuid4(), "step five", team, f"user_{i}", "2021-05-09 00:00:00") diff --git a/ee/clickhouse/generate_local.py b/ee/clickhouse/generate_local.py new file mode 100644 index 0000000000000..819b569f20e20 --- /dev/null +++ b/ee/clickhouse/generate_local.py @@ -0,0 +1,58 @@ +import uuid + +from ee.clickhouse.client import sync_execute +from ee.clickhouse.models.event import create_event +from posthog.models import EventDefinition, Person, Team + + +class GenerateLocal: + _team: None + _number: None + + def __init__(self, team_id=1, number=250): + self._team = Team.objects.get(id=team_id) + self._number = number + + def generate(self): + self._insert_persons() + self._insert_person_distinct_ids() + self._insert_event_definitions() + self._insert_events() + + def destroy(self): + # TODO + # You'll need to manually clean up the clickhouse database + pass + + def _insert_event_definitions(self): + EventDefinition.objects.get_or_create(team=self._team, name="step one") + EventDefinition.objects.get_or_create(team=self._team, name="step two") + EventDefinition.objects.get_or_create(team=self._team, name="step three") + EventDefinition.objects.get_or_create(team=self._team, name="step four") + EventDefinition.objects.get_or_create(team=self._team, name="step five") + + def _insert_persons(self): + for i in range(self._number): + try: + Person.objects.create(distinct_ids=[f"user_{i}"], team=self._team) + except Exception as e: + print(str(e)) + + def _insert_person_distinct_ids(self): + values = [] + for i in range(self._number): + values.append(f"('user_{i}', generateUUIDv4(), {self._team.id}, now())") + + sql = f""" + insert into person_distinct_id (distinct_id, person_id, team_id, _timestamp) values {",".join(values)}; + """ + + sync_execute(sql) + + def _insert_events(self): + for i in range(self._number): + create_event(uuid.uuid4(), "step one", self._team, f"user_{i}", "2021-05-01 00:00:00") + create_event(uuid.uuid4(), "step two", self._team, f"user_{i}", "2021-05-03 00:00:00") + create_event(uuid.uuid4(), "step three", self._team, f"user_{i}", "2021-05-05 00:00:00") + create_event(uuid.uuid4(), "step four", self._team, f"user_{i}", "2021-05-07 00:00:00") + create_event(uuid.uuid4(), "step five", self._team, f"user_{i}", "2021-05-09 00:00:00") From 1a29fdb0b5ba9f2d7cd145b7630b1e9bb8785ab9 Mon Sep 17 00:00:00 2001 From: Buddy Williams Date: Sun, 20 Jun 2021 09:31:26 -0400 Subject: [PATCH 06/13] corrected person_distinct_id to use the person id from postgres --- ee/clickhouse/generate_local.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/ee/clickhouse/generate_local.py b/ee/clickhouse/generate_local.py index 819b569f20e20..e2c1dd1dc876c 100644 --- a/ee/clickhouse/generate_local.py +++ b/ee/clickhouse/generate_local.py @@ -15,7 +15,6 @@ def __init__(self, team_id=1, number=250): def generate(self): self._insert_persons() - self._insert_person_distinct_ids() self._insert_event_definitions() self._insert_events() @@ -34,17 +33,15 @@ def _insert_event_definitions(self): def _insert_persons(self): for i in range(self._number): try: - Person.objects.create(distinct_ids=[f"user_{i}"], team=self._team) + person = Person.objects.create(distinct_ids=[f"user_{i}"], team=self._team) + self._insert_person_distinct_ids(person.id) except Exception as e: print(str(e)) - def _insert_person_distinct_ids(self): - values = [] - for i in range(self._number): - values.append(f"('user_{i}', generateUUIDv4(), {self._team.id}, now())") - + def _insert_person_distinct_ids(self, user_id, person_id): sql = f""" - insert into person_distinct_id (distinct_id, person_id, team_id, _timestamp) values {",".join(values)}; + insert into person_distinct_id (distinct_id, person_id, team_id, _timestamp) values + ('user_{user_id}', '{person_id}', '{self._team.id}', now()); """ sync_execute(sql) From 1990907fcd2203fbc164649cc012d1661a57f483 Mon Sep 17 00:00:00 2001 From: Buddy Williams Date: Sun, 20 Jun 2021 10:24:11 -0400 Subject: [PATCH 07/13] minor corrections to generate local class along with addition of data cleanup via destroy() method --- ee/clickhouse/generate_local.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/ee/clickhouse/generate_local.py b/ee/clickhouse/generate_local.py index e2c1dd1dc876c..3a1b40a525ffd 100644 --- a/ee/clickhouse/generate_local.py +++ b/ee/clickhouse/generate_local.py @@ -1,5 +1,7 @@ import uuid +from django.db import connection + from ee.clickhouse.client import sync_execute from ee.clickhouse.models.event import create_event from posthog.models import EventDefinition, Person, Team @@ -19,9 +21,14 @@ def generate(self): self._insert_events() def destroy(self): - # TODO - # You'll need to manually clean up the clickhouse database - pass + # You'll need to manually clean up the clickhouse database by: + # 1. docker compose -f ee/docker-compose.ch.yml down clickhouse zookeeper kafka + # 2. DEBUG=1;DJANGO_SETTINGS_MODULE=posthog.settings;PRIMARY_DB=clickhouse;CLICKHOUSE_HOST=clickhouse;CLICKHOUSE_DATABASE=posthog;CLICKHOUSE_SECURE=false;CLICKHOUSE_VERIFY=false python migrate.py migrate_clickhouse + + with connection.cursor() as cursor: + cursor.execute("delete from posthog_persondistinctid where distinct_id like 'user_%%'") + cursor.execute("delete from posthog_person where properties->> 'name' like 'user_%'") + cursor.execute("delete from posthog_eventdefinition where name like 'step %'") def _insert_event_definitions(self): EventDefinition.objects.get_or_create(team=self._team, name="step one") @@ -31,23 +38,25 @@ def _insert_event_definitions(self): EventDefinition.objects.get_or_create(team=self._team, name="step five") def _insert_persons(self): - for i in range(self._number): + for i in range(1, self._number + 1): try: - person = Person.objects.create(distinct_ids=[f"user_{i}"], team=self._team) - self._insert_person_distinct_ids(person.id) + person = Person.objects.create( + distinct_ids=[f"user_{i}"], team=self._team, properties={"name": f"user_{i}"} + ) + self._insert_person_distinct_ids(f"user_{i}", person.uuid) except Exception as e: print(str(e)) - def _insert_person_distinct_ids(self, user_id, person_id): + def _insert_person_distinct_ids(self, distinct_id, person_uuid): sql = f""" insert into person_distinct_id (distinct_id, person_id, team_id, _timestamp) values - ('user_{user_id}', '{person_id}', '{self._team.id}', now()); + ('{distinct_id}', '{person_uuid}', '{self._team.id}', now()); """ sync_execute(sql) def _insert_events(self): - for i in range(self._number): + for i in range(1, self._number + 1): create_event(uuid.uuid4(), "step one", self._team, f"user_{i}", "2021-05-01 00:00:00") create_event(uuid.uuid4(), "step two", self._team, f"user_{i}", "2021-05-03 00:00:00") create_event(uuid.uuid4(), "step three", self._team, f"user_{i}", "2021-05-05 00:00:00") From 7daeaaa9a6ca262162951e27d557a1447ef519da Mon Sep 17 00:00:00 2001 From: Buddy Williams Date: Mon, 21 Jun 2021 08:34:49 -0400 Subject: [PATCH 08/13] reduce the number of persons who make it to each step --- ee/clickhouse/generate_local.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ee/clickhouse/generate_local.py b/ee/clickhouse/generate_local.py index 3a1b40a525ffd..83d441148c8d8 100644 --- a/ee/clickhouse/generate_local.py +++ b/ee/clickhouse/generate_local.py @@ -56,9 +56,19 @@ def _insert_person_distinct_ids(self, distinct_id, person_uuid): sync_execute(sql) def _insert_events(self): - for i in range(1, self._number + 1): + step_one = self._number + 1 + step_two = round(step_one / 2) + step_three = round(step_one / 3) + step_four = round(step_one / 4) + step_five = round(step_one / 5) + + for i in range(1, step_one): create_event(uuid.uuid4(), "step one", self._team, f"user_{i}", "2021-05-01 00:00:00") + for i in range(1, step_two): create_event(uuid.uuid4(), "step two", self._team, f"user_{i}", "2021-05-03 00:00:00") + for i in range(1, step_three): create_event(uuid.uuid4(), "step three", self._team, f"user_{i}", "2021-05-05 00:00:00") + for i in range(1, step_four): create_event(uuid.uuid4(), "step four", self._team, f"user_{i}", "2021-05-07 00:00:00") + for i in range(1, step_five): create_event(uuid.uuid4(), "step five", self._team, f"user_{i}", "2021-05-09 00:00:00") From fba259e3649663fbcbd60afd6dbc02786bed7d6f Mon Sep 17 00:00:00 2001 From: Buddy Williams Date: Mon, 21 Jun 2021 16:53:28 -0400 Subject: [PATCH 09/13] moved funnel queries to a new folder for better organization; separated funnel_persons and funnel_trends_persons into individual classes; --- ee/clickhouse/queries/__init__.py | 3 +- ee/clickhouse/queries/clickhouse_funnel.py | 87 ------------ .../queries/clickhouse_funnel_base.py | 44 ------ ee/clickhouse/queries/funnels/__init__.py | 0 ee/clickhouse/queries/funnels/base.py | 127 ++++++++++++++++++ ee/clickhouse/queries/funnels/funnel.py | 7 + .../queries/funnels/funnel_persons.py | 7 + .../funnel_trends.py} | 8 +- .../queries/funnels/funnel_trends_persons.py | 6 + .../queries/funnels/test/__init__.py | 0 .../queries/funnels/test/test_funnel.py | 26 ++++ .../test/test_funnel_persons.py} | 19 ++- .../{ => funnels}/test/test_funnel_trends.py | 2 +- ee/clickhouse/views/insights.py | 5 +- ee/clickhouse/views/person.py | 9 ++ 15 files changed, 202 insertions(+), 148 deletions(-) delete mode 100644 ee/clickhouse/queries/clickhouse_funnel.py delete mode 100644 ee/clickhouse/queries/clickhouse_funnel_base.py create mode 100644 ee/clickhouse/queries/funnels/__init__.py create mode 100644 ee/clickhouse/queries/funnels/base.py create mode 100644 ee/clickhouse/queries/funnels/funnel.py create mode 100644 ee/clickhouse/queries/funnels/funnel_persons.py rename ee/clickhouse/queries/{clickhouse_funnel_trends.py => funnels/funnel_trends.py} (96%) create mode 100644 ee/clickhouse/queries/funnels/funnel_trends_persons.py create mode 100644 ee/clickhouse/queries/funnels/test/__init__.py create mode 100644 ee/clickhouse/queries/funnels/test/test_funnel.py rename ee/clickhouse/queries/{test/test_funnel.py => funnels/test/test_funnel_persons.py} (83%) rename ee/clickhouse/queries/{ => funnels}/test/test_funnel_trends.py (99%) diff --git a/ee/clickhouse/queries/__init__.py b/ee/clickhouse/queries/__init__.py index 15e5acf5b26b5..4da9ab069eb4f 100644 --- a/ee/clickhouse/queries/__init__.py +++ b/ee/clickhouse/queries/__init__.py @@ -1,4 +1,5 @@ -from .clickhouse_funnel import ClickhouseFunnel +from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnel + from .clickhouse_paths import ClickhousePaths from .clickhouse_retention import ClickhouseRetention from .clickhouse_session_recording import SessionRecording diff --git a/ee/clickhouse/queries/clickhouse_funnel.py b/ee/clickhouse/queries/clickhouse_funnel.py deleted file mode 100644 index 30ecc0b8b9c2a..0000000000000 --- a/ee/clickhouse/queries/clickhouse_funnel.py +++ /dev/null @@ -1,87 +0,0 @@ -from typing import List, Tuple - -from django.utils import timezone - -from ee.clickhouse.client import sync_execute -from ee.clickhouse.models.property import parse_prop_clauses -from ee.clickhouse.queries.clickhouse_funnel_base import ClickhouseFunnelBase -from ee.clickhouse.queries.clickhouse_funnel_trends import ClickhouseFunnelTrends -from ee.clickhouse.queries.util import parse_timestamps -from ee.clickhouse.sql.funnels.funnel import FUNNEL_PERSONS_SQL, FUNNEL_SQL -from ee.clickhouse.sql.person import GET_LATEST_PERSON_DISTINCT_ID_SQL -from posthog.constants import TRENDS_LINEAR -from posthog.models.filters.mixins.funnel_window_days import FunnelWindowDaysMixin -from posthog.utils import relative_date_parse - - -class ClickhouseFunnel(ClickhouseFunnelBase): - def run(self, *args, **kwargs): - if len(self._filter.entities) == 0: - return [] - - if self._filter.display == TRENDS_LINEAR: - return ClickhouseFunnelTrends(self._filter, self._team).run() - else: - # Format of this is [step order, person count (that reached that step), array of person uuids] - results = self._exec_query() - - if self._filter.offset > 0: - return results - - steps = [] - relevant_people = [] - total_people = 0 - - for step in reversed(self._filter.entities): - # Clickhouse step order starts at one, hence the +1 - result_step = [x for x in results if step.order + 1 == x[0]] - if len(result_step) > 0: - total_people += result_step[0][1] - relevant_people += result_step[0][2] - steps.append(self._serialize_step(step, total_people, relevant_people[0:100])) - - return steps[::-1] #  reverse - - def _exec_query(self) -> List[Tuple]: - prop_filters, prop_filter_params = parse_prop_clauses( - self._filter.properties, - self._team.pk, - prepend="global", - allow_denormalized_props=True, - filter_test_accounts=self._filter.filter_test_accounts, - ) - - # format default dates - data = {} - if not self._filter._date_from: - data.update({"date_from": relative_date_parse("-7d")}) - if not self._filter._date_to: - data.update({"date_to": timezone.now()}) - self._filter = self._filter.with_data(data) - - parsed_date_from, parsed_date_to, _ = parse_timestamps( - filter=self._filter, table="events.", team_id=self._team.pk - ) - self.params.update(prop_filter_params) - steps = [self._build_steps_query(entity, index) for index, entity in enumerate(self._filter.entities)] - - format_properties = { - "team_id": self._team.id, - "steps": ", ".join(steps), - "filters": prop_filters.replace("uuid IN", "events.uuid IN", 1), - "parsed_date_from": parsed_date_from, - "parsed_date_to": parsed_date_to, - "top_level_groupby": "", - "extra_select": "", - "extra_groupby": "", - "within_time": FunnelWindowDaysMixin.microseconds_from_days(self._filter.funnel_window_days), - "latest_distinct_id_sql": GET_LATEST_PERSON_DISTINCT_ID_SQL, - "offset": self._filter.offset, - } - - if self._filter.offset == 0: - query = FUNNEL_SQL.format(**format_properties) - else: - query = FUNNEL_PERSONS_SQL.format(**format_properties) - - return sync_execute(query, self.params) diff --git a/ee/clickhouse/queries/clickhouse_funnel_base.py b/ee/clickhouse/queries/clickhouse_funnel_base.py deleted file mode 100644 index c89fb4374a546..0000000000000 --- a/ee/clickhouse/queries/clickhouse_funnel_base.py +++ /dev/null @@ -1,44 +0,0 @@ -from ee.clickhouse.models.action import format_action_filter -from ee.clickhouse.models.property import parse_prop_clauses -from posthog.constants import TREND_FILTER_TYPE_ACTIONS -from posthog.models import Action, Entity, Filter, Team -from posthog.queries.funnel import Funnel - - -class ClickhouseFunnelBase(Funnel): - _filter: Filter - _team: Team - - def __init__(self, filter: Filter, team: Team) -> None: - self._filter = filter - self._team = team - self.params = { - "team_id": self._team.pk, - "events": [], # purely a speed optimization, don't need this for filtering - } - - def _build_steps_query(self, entity: Entity, index: int) -> str: - filters = self._build_filters(entity, index) - if entity.type == TREND_FILTER_TYPE_ACTIONS: - action = Action.objects.get(pk=entity.id) - for action_step in action.steps.all(): - self.params["events"].append(action_step.event) - action_query, action_params = format_action_filter(action, "step_{}".format(index)) - if action_query == "": - return "" - - self.params.update(action_params) - content_sql = "{actions_query} {filters}".format(actions_query=action_query, filters=filters,) - else: - self.params["events"].append(entity.id) - content_sql = "event = '{event}' {filters}".format(event=entity.id, filters=filters) - return content_sql - - def _build_filters(self, entity: Entity, index: int) -> str: - prop_filters, prop_filter_params = parse_prop_clauses( - entity.properties, self._team.pk, prepend=str(index), allow_denormalized_props=True - ) - self.params.update(prop_filter_params) - if entity.properties: - return prop_filters - return "" diff --git a/ee/clickhouse/queries/funnels/__init__.py b/ee/clickhouse/queries/funnels/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ee/clickhouse/queries/funnels/base.py b/ee/clickhouse/queries/funnels/base.py new file mode 100644 index 0000000000000..898eaecdf3060 --- /dev/null +++ b/ee/clickhouse/queries/funnels/base.py @@ -0,0 +1,127 @@ +from abc import ABC, abstractmethod +from typing import List, Tuple + +from django.utils import timezone + +from ee.clickhouse.client import sync_execute +from ee.clickhouse.models.action import format_action_filter +from ee.clickhouse.models.property import parse_prop_clauses +from ee.clickhouse.queries.util import parse_timestamps +from ee.clickhouse.sql.person import GET_LATEST_PERSON_DISTINCT_ID_SQL +from posthog.constants import TREND_FILTER_TYPE_ACTIONS +from posthog.models import Action, Entity, Filter, Team +from posthog.models.filters.mixins.funnel_window_days import FunnelWindowDaysMixin +from posthog.queries.funnel import Funnel +from posthog.utils import relative_date_parse + + +class ClickhouseFunnelBase(ABC, Funnel): + _filter: Filter + _team: Team + + def __init__(self, filter: Filter, team: Team) -> None: + self._filter = filter + self._team = team + self.params = { + "team_id": self._team.pk, + "events": [], # purely a speed optimization, don't need this for filtering + } + + def run(self, *args, **kwargs): + if len(self._filter.entities) == 0: + return [] + + # if self._filter.display == TRENDS_LINEAR: + # return ClickhouseFunnelTrends(self._filter, self._team).run() + # else: + + # Format of this is [step order, person count (that reached that step), array of person uuids] + results = self._exec_query() + + if self._filter.offset > 0: + return results + + steps = [] + relevant_people = [] + total_people = 0 + + for step in reversed(self._filter.entities): + # Clickhouse step order starts at one, hence the +1 + result_step = [x for x in results if step.order + 1 == x[0]] + if len(result_step) > 0: + total_people += result_step[0][1] + relevant_people += result_step[0][2] + steps.append(self._serialize_step(step, total_people, relevant_people[0:100])) + + return steps[::-1] #  reverse + + def _exec_query(self) -> List[Tuple]: + prop_filters, prop_filter_params = parse_prop_clauses( + self._filter.properties, + self._team.pk, + prepend="global", + allow_denormalized_props=True, + filter_test_accounts=self._filter.filter_test_accounts, + ) + + # format default dates + data = {} + if not self._filter._date_from: + data.update({"date_from": relative_date_parse("-7d")}) + if not self._filter._date_to: + data.update({"date_to": timezone.now()}) + self._filter = self._filter.with_data(data) + + parsed_date_from, parsed_date_to, _ = parse_timestamps( + filter=self._filter, table="events.", team_id=self._team.pk + ) + self.params.update(prop_filter_params) + steps = [self._build_steps_query(entity, index) for index, entity in enumerate(self._filter.entities)] + + format_properties = { + "team_id": self._team.id, + "steps": ", ".join(steps), + "filters": prop_filters.replace("uuid IN", "events.uuid IN", 1), + "parsed_date_from": parsed_date_from, + "parsed_date_to": parsed_date_to, + "top_level_groupby": "", + "extra_select": "", + "extra_groupby": "", + "within_time": FunnelWindowDaysMixin.microseconds_from_days(self._filter.funnel_window_days), + "latest_distinct_id_sql": GET_LATEST_PERSON_DISTINCT_ID_SQL, + "offset": self._filter.offset, + } + + query = self.get_query(format_properties) + + return sync_execute(query, self.params) + + def _build_steps_query(self, entity: Entity, index: int) -> str: + filters = self._build_filters(entity, index) + if entity.type == TREND_FILTER_TYPE_ACTIONS: + action = Action.objects.get(pk=entity.id) + for action_step in action.steps.all(): + self.params["events"].append(action_step.event) + action_query, action_params = format_action_filter(action, "step_{}".format(index)) + if action_query == "": + return "" + + self.params.update(action_params) + content_sql = "{actions_query} {filters}".format(actions_query=action_query, filters=filters,) + else: + self.params["events"].append(entity.id) + content_sql = "event = '{event}' {filters}".format(event=entity.id, filters=filters) + return content_sql + + def _build_filters(self, entity: Entity, index: int) -> str: + prop_filters, prop_filter_params = parse_prop_clauses( + entity.properties, self._team.pk, prepend=str(index), allow_denormalized_props=True + ) + self.params.update(prop_filter_params) + if entity.properties: + return prop_filters + return "" + + @abstractmethod + def get_query(self, format_properties): + pass diff --git a/ee/clickhouse/queries/funnels/funnel.py b/ee/clickhouse/queries/funnels/funnel.py new file mode 100644 index 0000000000000..76cd6479bd01d --- /dev/null +++ b/ee/clickhouse/queries/funnels/funnel.py @@ -0,0 +1,7 @@ +from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase +from ee.clickhouse.sql.funnels.funnel import FUNNEL_SQL + + +class ClickhouseFunnel(ClickhouseFunnelBase): + def get_query(self, format_properties): + return FUNNEL_SQL.format(**format_properties) diff --git a/ee/clickhouse/queries/funnels/funnel_persons.py b/ee/clickhouse/queries/funnels/funnel_persons.py new file mode 100644 index 0000000000000..760382b45112d --- /dev/null +++ b/ee/clickhouse/queries/funnels/funnel_persons.py @@ -0,0 +1,7 @@ +from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase +from ee.clickhouse.sql.funnels.funnel import FUNNEL_PERSONS_SQL + + +class ClickhouseFunnelPersons(ClickhouseFunnelBase): + def get_query(self, format_properties): + return FUNNEL_PERSONS_SQL.format(**format_properties) diff --git a/ee/clickhouse/queries/clickhouse_funnel_trends.py b/ee/clickhouse/queries/funnels/funnel_trends.py similarity index 96% rename from ee/clickhouse/queries/clickhouse_funnel_trends.py rename to ee/clickhouse/queries/funnels/funnel_trends.py index e13c8705ad3b1..159bb4dea8bd0 100644 --- a/ee/clickhouse/queries/clickhouse_funnel_trends.py +++ b/ee/clickhouse/queries/funnels/funnel_trends.py @@ -2,7 +2,7 @@ from ee.clickhouse.client import sync_execute from ee.clickhouse.models.property import parse_prop_clauses -from ee.clickhouse.queries.clickhouse_funnel_base import ClickhouseFunnelBase +from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase from ee.clickhouse.queries.util import get_time_diff, get_trunc_func_ch, parse_timestamps from ee.clickhouse.sql.events import NULL_SQL_FUNNEL_TRENDS from ee.clickhouse.sql.funnels.funnel_trend import FUNNEL_TREND_SQL @@ -18,6 +18,9 @@ class ClickhouseFunnelTrends(ClickhouseFunnelBase): def run(self): + if len(self._filter.entities) == 0: + return [] + summary = self.perform_query() ui_response = self._get_ui_response(summary) return ui_response @@ -124,3 +127,6 @@ def _determine_complete(self, timestamp): compare_timestamp = timestamp.date() if type(timestamp) is datetime else timestamp is_incomplete = compare_timestamp > completed_end return not is_incomplete + + def get_query(self, format_properties): + pass diff --git a/ee/clickhouse/queries/funnels/funnel_trends_persons.py b/ee/clickhouse/queries/funnels/funnel_trends_persons.py new file mode 100644 index 0000000000000..8034448bbb26d --- /dev/null +++ b/ee/clickhouse/queries/funnels/funnel_trends_persons.py @@ -0,0 +1,6 @@ +from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase + + +class ClickhouseFunnelPersons(ClickhouseFunnelBase): + def get_query(self, format_properties): + pass diff --git a/ee/clickhouse/queries/funnels/test/__init__.py b/ee/clickhouse/queries/funnels/test/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ee/clickhouse/queries/funnels/test/test_funnel.py b/ee/clickhouse/queries/funnels/test/test_funnel.py new file mode 100644 index 0000000000000..130dc03b177f5 --- /dev/null +++ b/ee/clickhouse/queries/funnels/test/test_funnel.py @@ -0,0 +1,26 @@ +from uuid import uuid4 + +from ee.clickhouse.models.event import create_event +from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnel +from ee.clickhouse.util import ClickhouseTestMixin +from posthog.models.person import Person +from posthog.queries.test.test_funnel import funnel_test_factory + +FORMAT_TIME = "%Y-%m-%d 00:00:00" +MAX_STEP_COLUMN = 0 +COUNT_COLUMN = 1 +PERSON_ID_COLUMN = 2 + + +def _create_person(**kwargs): + person = Person.objects.create(**kwargs) + return Person(id=person.uuid, uuid=person.uuid) + + +def _create_event(**kwargs): + kwargs.update({"event_uuid": uuid4()}) + create_event(**kwargs) + + +class TestFunnel(ClickhouseTestMixin, funnel_test_factory(ClickhouseFunnel, _create_event, _create_person)): # type: ignore + pass diff --git a/ee/clickhouse/queries/test/test_funnel.py b/ee/clickhouse/queries/funnels/test/test_funnel_persons.py similarity index 83% rename from ee/clickhouse/queries/test/test_funnel.py rename to ee/clickhouse/queries/funnels/test/test_funnel_persons.py index b084bd52ca121..5c70de710cb68 100644 --- a/ee/clickhouse/queries/test/test_funnel.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel_persons.py @@ -1,14 +1,14 @@ -from datetime import datetime from uuid import uuid4 from ee.clickhouse.models.event import create_event -from ee.clickhouse.queries.clickhouse_funnel import ClickhouseFunnel +from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnel +from ee.clickhouse.queries.funnels.funnel_persons import ClickhouseFunnelPersons from ee.clickhouse.util import ClickhouseTestMixin from posthog.constants import INSIGHT_FUNNELS, TRENDS_FUNNEL from posthog.models import Filter from posthog.models.filters.mixins.funnel_window_days import FunnelWindowDaysMixin from posthog.models.person import Person -from posthog.queries.test.test_funnel import funnel_test_factory +from posthog.test.base import APIBaseTest FORMAT_TIME = "%Y-%m-%d 00:00:00" MAX_STEP_COLUMN = 0 @@ -26,7 +26,7 @@ def _create_event(**kwargs): create_event(**kwargs) -class TestFunnel(ClickhouseTestMixin, funnel_test_factory(ClickhouseFunnel, _create_event, _create_person)): # type: ignore +class TestFunnel(ClickhouseTestMixin, APIBaseTest): # type: ignore def setUp(self): self._create_sample_data() super().setUp() @@ -54,18 +54,15 @@ def test_basic_offset(self): } filter = Filter(data=data) - results = ClickhouseFunnel(filter, self.team)._exec_query() - self.assertEqual(1, len(results)) - self.assertEqual(3, results[0][MAX_STEP_COLUMN]) - self.assertEqual(250, results[0][COUNT_COLUMN]) - self.assertEqual(100, len(results[0][PERSON_ID_COLUMN])) + results = ClickhouseFunnelPersons(filter, self.team)._exec_query() + self.assertEqual(100, len(results)) filter_offset = Filter(data={**data, "offset": 100,}) - results = ClickhouseFunnel(filter_offset, self.team).run() + results = ClickhouseFunnelPersons(filter_offset, self.team).run() self.assertEqual(100, len(results)) filter_offset = Filter(data={**data, "offset": 200,}) - results = ClickhouseFunnel(filter_offset, self.team).run() + results = ClickhouseFunnelPersons(filter_offset, self.team).run() self.assertEqual(50, len(results)) def test_funnel_window_days_to_microseconds(self): diff --git a/ee/clickhouse/queries/test/test_funnel_trends.py b/ee/clickhouse/queries/funnels/test/test_funnel_trends.py similarity index 99% rename from ee/clickhouse/queries/test/test_funnel_trends.py rename to ee/clickhouse/queries/funnels/test/test_funnel_trends.py index 9a4484689ba5f..fdf2671207677 100644 --- a/ee/clickhouse/queries/test/test_funnel_trends.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel_trends.py @@ -2,7 +2,7 @@ from uuid import uuid4 from ee.clickhouse.models.event import create_event -from ee.clickhouse.queries.clickhouse_funnel_trends import ClickhouseFunnelTrends +from ee.clickhouse.queries.funnels.funnel_trends import ClickhouseFunnelTrends from ee.clickhouse.util import ClickhouseTestMixin from posthog.constants import INSIGHT_FUNNELS, TRENDS_LINEAR from posthog.models.filters import Filter diff --git a/ee/clickhouse/views/insights.py b/ee/clickhouse/views/insights.py index f5eb5492bc29f..07f11e72bf97e 100644 --- a/ee/clickhouse/views/insights.py +++ b/ee/clickhouse/views/insights.py @@ -1,20 +1,19 @@ -from typing import Any, Dict, List +from typing import Any, Dict from rest_framework.decorators import action from rest_framework.request import Request from rest_framework.response import Response -from ee.clickhouse.queries.clickhouse_funnel import ClickhouseFunnel from ee.clickhouse.queries.clickhouse_paths import ClickhousePaths from ee.clickhouse.queries.clickhouse_retention import ClickhouseRetention from ee.clickhouse.queries.clickhouse_stickiness import ClickhouseStickiness +from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnel from ee.clickhouse.queries.sessions.clickhouse_sessions import ClickhouseSessions from ee.clickhouse.queries.trends.clickhouse_trends import ClickhouseTrends from ee.clickhouse.queries.util import get_earliest_timestamp from posthog.api.insight import InsightViewSet from posthog.constants import INSIGHT_FUNNELS, INSIGHT_PATHS, INSIGHT_SESSIONS, INSIGHT_STICKINESS, TRENDS_STICKINESS from posthog.decorators import cached_function -from posthog.models import Event from posthog.models.filters import Filter from posthog.models.filters.path_filter import PathFilter from posthog.models.filters.retention_filter import RetentionFilter diff --git a/ee/clickhouse/views/person.py b/ee/clickhouse/views/person.py index 24f58d3b60785..a2176bdf4805f 100644 --- a/ee/clickhouse/views/person.py +++ b/ee/clickhouse/views/person.py @@ -1,4 +1,5 @@ from rest_framework import request, response +from rest_framework.decorators import action from rest_framework.exceptions import NotFound from ee.clickhouse.models.person import delete_person @@ -16,6 +17,14 @@ class ClickhousePersonViewSet(PersonViewSet): retention_class = ClickhouseRetention stickiness_class = ClickhouseStickiness + @action(methods=["GET"], detail=False) + def funnel(self, request: request.Request, **kwargs) -> response.Response: + pass + + @action(methods=["GET"], detail=False) + def funnel_trends(self, request: request.Request, **kwargs) -> response.Response: + pass + def destroy(self, request: request.Request, pk=None, **kwargs): # type: ignore try: person = Person.objects.get(team=self.team, pk=pk) From 142e7b697f82324038e819bf99ada7442a3df1ff Mon Sep 17 00:00:00 2001 From: Buddy Williams Date: Tue, 22 Jun 2021 15:42:08 -0400 Subject: [PATCH 10/13] funnel persons and tests --- ee/clickhouse/queries/funnels/base.py | 5 +-- .../queries/funnels/funnel_persons.py | 8 ++++ .../queries/funnels/funnel_trends_persons.py | 2 +- ee/clickhouse/views/person.py | 14 ++++-- .../test/test_clickhouse_funnel_person.py | 45 +++++++++++++++++++ posthog/models/person.py | 16 +++++++ 6 files changed, 83 insertions(+), 7 deletions(-) create mode 100644 ee/clickhouse/views/test/test_clickhouse_funnel_person.py diff --git a/ee/clickhouse/queries/funnels/base.py b/ee/clickhouse/queries/funnels/base.py index 898eaecdf3060..26138bdbdf5ba 100644 --- a/ee/clickhouse/queries/funnels/base.py +++ b/ee/clickhouse/queries/funnels/base.py @@ -37,10 +37,9 @@ def run(self, *args, **kwargs): # Format of this is [step order, person count (that reached that step), array of person uuids] results = self._exec_query() + return self._format_results(results) - if self._filter.offset > 0: - return results - + def _format_results(self, results): steps = [] relevant_people = [] total_people = 0 diff --git a/ee/clickhouse/queries/funnels/funnel_persons.py b/ee/clickhouse/queries/funnels/funnel_persons.py index 760382b45112d..0c0eda528c621 100644 --- a/ee/clickhouse/queries/funnels/funnel_persons.py +++ b/ee/clickhouse/queries/funnels/funnel_persons.py @@ -1,7 +1,15 @@ from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase from ee.clickhouse.sql.funnels.funnel import FUNNEL_PERSONS_SQL +from posthog.models import Person class ClickhouseFunnelPersons(ClickhouseFunnelBase): def get_query(self, format_properties): return FUNNEL_PERSONS_SQL.format(**format_properties) + + def _format_results(self, results): + formatted_results = [] + for row in results: + distinct_ids, email = Person.get_distinct_ids_and_email_by_id(row[2], self._team.id) + formatted_results.append({"max_step": row[0], "distinct_ids": distinct_ids, "email": email}) + return formatted_results diff --git a/ee/clickhouse/queries/funnels/funnel_trends_persons.py b/ee/clickhouse/queries/funnels/funnel_trends_persons.py index 8034448bbb26d..1690eb056a55e 100644 --- a/ee/clickhouse/queries/funnels/funnel_trends_persons.py +++ b/ee/clickhouse/queries/funnels/funnel_trends_persons.py @@ -1,6 +1,6 @@ from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase -class ClickhouseFunnelPersons(ClickhouseFunnelBase): +class ClickhouseFunnelTrendsPersons(ClickhouseFunnelBase): def get_query(self, format_properties): pass diff --git a/ee/clickhouse/views/person.py b/ee/clickhouse/views/person.py index a2176bdf4805f..a44950976d310 100644 --- a/ee/clickhouse/views/person.py +++ b/ee/clickhouse/views/person.py @@ -5,9 +5,11 @@ from ee.clickhouse.models.person import delete_person from ee.clickhouse.queries.clickhouse_retention import ClickhouseRetention from ee.clickhouse.queries.clickhouse_stickiness import ClickhouseStickiness +from ee.clickhouse.queries.funnels.funnel_persons import ClickhouseFunnelPersons +from ee.clickhouse.queries.funnels.funnel_trends_persons import ClickhouseFunnelTrendsPersons from ee.clickhouse.queries.trends.lifecycle import ClickhouseLifecycle from posthog.api.person import PersonViewSet -from posthog.models import Event, Person +from posthog.models import Event, Filter, Person # TODO: Move grabbing all this to Clickhouse. See WIP-people-from-clickhouse branch. @@ -19,11 +21,17 @@ class ClickhousePersonViewSet(PersonViewSet): @action(methods=["GET"], detail=False) def funnel(self, request: request.Request, **kwargs) -> response.Response: - pass + filter = Filter(request=request) + team = request.user.team + results = ClickhouseFunnelPersons(filter, team).run() + return response.Response(data=results) @action(methods=["GET"], detail=False) def funnel_trends(self, request: request.Request, **kwargs) -> response.Response: - pass + filter = Filter(request=request) + team = request.user.team + results = ClickhouseFunnelTrendsPersons(filter, team).run() + return response.Response(data=results) def destroy(self, request: request.Request, pk=None, **kwargs): # type: ignore try: diff --git a/ee/clickhouse/views/test/test_clickhouse_funnel_person.py b/ee/clickhouse/views/test/test_clickhouse_funnel_person.py new file mode 100644 index 0000000000000..f2e74430c080b --- /dev/null +++ b/ee/clickhouse/views/test/test_clickhouse_funnel_person.py @@ -0,0 +1,45 @@ +import json +import urllib.parse +from urllib.parse import urlencode + +from rest_framework import status + +from ee.clickhouse.generate_local import GenerateLocal +from ee.clickhouse.util import ClickhouseTestMixin +from posthog.constants import INSIGHT_FUNNELS, TRENDS_FUNNEL +from posthog.test.base import APIBaseTest + + +class TestFunnelPerson(ClickhouseTestMixin, APIBaseTest): + def setUp(self): + super().setUp() + GenerateLocal().generate() + + def test_basic_pagination(self): + request_data = { + "insight": INSIGHT_FUNNELS, + "display": TRENDS_FUNNEL, + "interval": "day", + "actions": json.dumps([]), + "events": json.dumps( + [{"id": "step one", "order": 0}, {"id": "step two", "order": 1}, {"id": "step three", "order": 2},] + ), + "properties": json.dumps([]), + "funnel_window_days": 14, + "filter_test_accounts": "false", + "new_entity": json.dumps([]), + "date_from": "2021-05-01", + "date_to": "2021-05-10", + } + + response = self.client.get("/api/person/funnel/", data=request_data) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(100, len(response.data)) + + response = self.client.get("/api/person/funnel/", data={**request_data, "offset": 100}) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(100, len(response.data)) + + response = self.client.get("/api/person/funnel/", data={**request_data, "offset": 200}) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(50, len(response.data)) diff --git a/posthog/models/person.py b/posthog/models/person.py index 673701e94f4ba..b50dd94219c08 100644 --- a/posthog/models/person.py +++ b/posthog/models/person.py @@ -79,6 +79,22 @@ def merge_people(self, people_to_merge: List["Person"]): # Has an index on properties -> email, built concurrently # See migration 0121 + @staticmethod + def get_distinct_ids_and_email_by_ids(person_ids, team_id): + decorated = [] + for person_id in person_ids: + distinct_ids, email = Person.get_distinct_ids_and_email_by_id(person_id, team_id) + decorated.append({"distinct_ids": distinct_ids, "email": email}) + return decorated + + @staticmethod + def get_distinct_ids_and_email_by_id(person_id, team_id): + person = Person.objects.get(uuid=person_id) + distinct_ids = PersonDistinctId.objects.filter(person_id=person.id, team_id=team_id) + flat_distinct_ids = [row.distinct_id for row in distinct_ids] + email = person.properties.get("email", "") + return flat_distinct_ids, email + class PersonDistinctId(models.Model): class Meta: From ac6d60114c4d33246d7cd17babcf0b075bc151ce Mon Sep 17 00:00:00 2001 From: Buddy Williams Date: Tue, 22 Jun 2021 16:04:18 -0400 Subject: [PATCH 11/13] invoke the funnel or funnel trends class respectively --- ee/clickhouse/queries/funnels/base.py | 6 +----- posthog/tasks/update_cache.py | 6 +++++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ee/clickhouse/queries/funnels/base.py b/ee/clickhouse/queries/funnels/base.py index 26138bdbdf5ba..da770a0e94579 100644 --- a/ee/clickhouse/queries/funnels/base.py +++ b/ee/clickhouse/queries/funnels/base.py @@ -31,15 +31,11 @@ def run(self, *args, **kwargs): if len(self._filter.entities) == 0: return [] - # if self._filter.display == TRENDS_LINEAR: - # return ClickhouseFunnelTrends(self._filter, self._team).run() - # else: - - # Format of this is [step order, person count (that reached that step), array of person uuids] results = self._exec_query() return self._format_results(results) def _format_results(self, results): + # Format of this is [step order, person count (that reached that step), array of person uuids] steps = [] relevant_people = [] total_people = 0 diff --git a/posthog/tasks/update_cache.py b/posthog/tasks/update_cache.py index dc8dc46b2aff3..56e295b00ae13 100644 --- a/posthog/tasks/update_cache.py +++ b/posthog/tasks/update_cache.py @@ -19,6 +19,7 @@ INSIGHT_SESSIONS, INSIGHT_STICKINESS, INSIGHT_TRENDS, + TRENDS_LINEAR, TRENDS_STICKINESS, ) from posthog.decorators import CacheType @@ -151,7 +152,10 @@ def _calculate_funnel(filter: Filter, key: str, team_id: int) -> List[Dict[str, dashboard_items.update(refreshing=True) if is_clickhouse_enabled(): - insight_class = import_from("ee.clickhouse.queries.clickhouse_funnel", "ClickhouseFunnel") + if filter.display == TRENDS_LINEAR: + insight_class = import_from("ee.clickhouse.queries.funnels.funnel_trends", "ClickhouseFunnelTrends") + else: + insight_class = import_from("ee.clickhouse.queries.funnels.funnel", "ClickhouseFunnel") else: insight_class = import_from("posthog.queries.funnel", "Funnel") From 474ba429e75bbfa01d02f90aa8b68736ce5f875f Mon Sep 17 00:00:00 2001 From: Buddy Williams Date: Wed, 23 Jun 2021 10:17:37 -0400 Subject: [PATCH 12/13] mypy corrections and PR feedback --- ee/clickhouse/generate_local.py | 4 ++-- ee/clickhouse/queries/funnels/funnel_persons.py | 2 +- ee/clickhouse/queries/funnels/test/test_funnel_persons.py | 2 +- ee/clickhouse/sql/funnels/funnel.py | 6 +++--- ee/clickhouse/views/person.py | 6 ++++++ ee/clickhouse/views/test/test_clickhouse_funnel_person.py | 6 +++--- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/ee/clickhouse/generate_local.py b/ee/clickhouse/generate_local.py index 83d441148c8d8..ab5aeead3b54e 100644 --- a/ee/clickhouse/generate_local.py +++ b/ee/clickhouse/generate_local.py @@ -8,8 +8,8 @@ class GenerateLocal: - _team: None - _number: None + _team: Team + _number: int def __init__(self, team_id=1, number=250): self._team = Team.objects.get(id=team_id) diff --git a/ee/clickhouse/queries/funnels/funnel_persons.py b/ee/clickhouse/queries/funnels/funnel_persons.py index 0c0eda528c621..02cee79410d69 100644 --- a/ee/clickhouse/queries/funnels/funnel_persons.py +++ b/ee/clickhouse/queries/funnels/funnel_persons.py @@ -10,6 +10,6 @@ def get_query(self, format_properties): def _format_results(self, results): formatted_results = [] for row in results: - distinct_ids, email = Person.get_distinct_ids_and_email_by_id(row[2], self._team.id) + distinct_ids, email = Person.get_distinct_ids_and_email_by_id(row[1], self._team.id) formatted_results.append({"max_step": row[0], "distinct_ids": distinct_ids, "email": email}) return formatted_results diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_persons.py b/ee/clickhouse/queries/funnels/test/test_funnel_persons.py index 5c70de710cb68..dbd37d7ba6617 100644 --- a/ee/clickhouse/queries/funnels/test/test_funnel_persons.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel_persons.py @@ -26,7 +26,7 @@ def _create_event(**kwargs): create_event(**kwargs) -class TestFunnel(ClickhouseTestMixin, APIBaseTest): # type: ignore +class TestFunnel(ClickhouseTestMixin, APIBaseTest): def setUp(self): self._create_sample_data() super().setUp() diff --git a/ee/clickhouse/sql/funnels/funnel.py b/ee/clickhouse/sql/funnels/funnel.py index 37d2c831b2563..8869360d67459 100644 --- a/ee/clickhouse/sql/funnels/funnel.py +++ b/ee/clickhouse/sql/funnels/funnel.py @@ -24,7 +24,7 @@ """ FUNNEL_PERSONS_SQL = """ -SELECT max_step {top_level_groupby}, count(1) as cnt, id +SELECT max_step, id FROM ( SELECT @@ -48,8 +48,8 @@ GROUP BY pid.person_id {extra_groupby} ) WHERE max_step > 0 -GROUP BY max_step, id {top_level_groupby} -ORDER BY max_step {top_level_groupby} ASC +GROUP BY max_step, id +ORDER BY max_step ASC limit 100 offset {offset} ; diff --git a/ee/clickhouse/views/person.py b/ee/clickhouse/views/person.py index a44950976d310..ccc03aa32b1d0 100644 --- a/ee/clickhouse/views/person.py +++ b/ee/clickhouse/views/person.py @@ -21,6 +21,9 @@ class ClickhousePersonViewSet(PersonViewSet): @action(methods=["GET"], detail=False) def funnel(self, request: request.Request, **kwargs) -> response.Response: + if request.user.is_anonymous or not request.user.team: + return response.Response(data=[]) + filter = Filter(request=request) team = request.user.team results = ClickhouseFunnelPersons(filter, team).run() @@ -28,6 +31,9 @@ def funnel(self, request: request.Request, **kwargs) -> response.Response: @action(methods=["GET"], detail=False) def funnel_trends(self, request: request.Request, **kwargs) -> response.Response: + if request.user.is_anonymous or not request.user.team: + return response.Response(data=[]) + filter = Filter(request=request) team = request.user.team results = ClickhouseFunnelTrendsPersons(filter, team).run() diff --git a/ee/clickhouse/views/test/test_clickhouse_funnel_person.py b/ee/clickhouse/views/test/test_clickhouse_funnel_person.py index f2e74430c080b..19721aed6d7c7 100644 --- a/ee/clickhouse/views/test/test_clickhouse_funnel_person.py +++ b/ee/clickhouse/views/test/test_clickhouse_funnel_person.py @@ -34,12 +34,12 @@ def test_basic_pagination(self): response = self.client.get("/api/person/funnel/", data=request_data) self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(100, len(response.data)) + self.assertEqual(100, len(response.json())) response = self.client.get("/api/person/funnel/", data={**request_data, "offset": 100}) self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(100, len(response.data)) + self.assertEqual(100, len(response.json())) response = self.client.get("/api/person/funnel/", data={**request_data, "offset": 200}) self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(50, len(response.data)) + self.assertEqual(50, len(response.json())) From 4fb5a432abe18f9a435108c5c2bde6982e2f6bd4 Mon Sep 17 00:00:00 2001 From: Buddy Williams Date: Wed, 23 Jun 2021 15:13:18 -0400 Subject: [PATCH 13/13] corrected tests --- ee/clickhouse/generate_local.py | 38 +++++++++---------- ee/clickhouse/views/person.py | 9 ++++- .../test/test_clickhouse_funnel_person.py | 21 ++++++---- posthog/api/utils.py | 20 +++++++++- 4 files changed, 58 insertions(+), 30 deletions(-) diff --git a/ee/clickhouse/generate_local.py b/ee/clickhouse/generate_local.py index ab5aeead3b54e..257a179f9acb3 100644 --- a/ee/clickhouse/generate_local.py +++ b/ee/clickhouse/generate_local.py @@ -8,12 +8,12 @@ class GenerateLocal: - _team: Team - _number: int + team: Team + number: int - def __init__(self, team_id=1, number=250): - self._team = Team.objects.get(id=team_id) - self._number = number + def __init__(self, team, number=250): + self.team = team + self.number = number def generate(self): self._insert_persons() @@ -31,17 +31,17 @@ def destroy(self): cursor.execute("delete from posthog_eventdefinition where name like 'step %'") def _insert_event_definitions(self): - EventDefinition.objects.get_or_create(team=self._team, name="step one") - EventDefinition.objects.get_or_create(team=self._team, name="step two") - EventDefinition.objects.get_or_create(team=self._team, name="step three") - EventDefinition.objects.get_or_create(team=self._team, name="step four") - EventDefinition.objects.get_or_create(team=self._team, name="step five") + EventDefinition.objects.get_or_create(team=self.team, name="step one") + EventDefinition.objects.get_or_create(team=self.team, name="step two") + EventDefinition.objects.get_or_create(team=self.team, name="step three") + EventDefinition.objects.get_or_create(team=self.team, name="step four") + EventDefinition.objects.get_or_create(team=self.team, name="step five") def _insert_persons(self): - for i in range(1, self._number + 1): + for i in range(1, self.number + 1): try: person = Person.objects.create( - distinct_ids=[f"user_{i}"], team=self._team, properties={"name": f"user_{i}"} + distinct_ids=[f"user_{i}"], team=self.team, properties={"name": f"user_{i}"} ) self._insert_person_distinct_ids(f"user_{i}", person.uuid) except Exception as e: @@ -50,25 +50,25 @@ def _insert_persons(self): def _insert_person_distinct_ids(self, distinct_id, person_uuid): sql = f""" insert into person_distinct_id (distinct_id, person_id, team_id, _timestamp) values - ('{distinct_id}', '{person_uuid}', '{self._team.id}', now()); + ('{distinct_id}', '{person_uuid}', '{self.team.id}', now()); """ sync_execute(sql) def _insert_events(self): - step_one = self._number + 1 + step_one = self.number + 1 step_two = round(step_one / 2) step_three = round(step_one / 3) step_four = round(step_one / 4) step_five = round(step_one / 5) for i in range(1, step_one): - create_event(uuid.uuid4(), "step one", self._team, f"user_{i}", "2021-05-01 00:00:00") + create_event(uuid.uuid4(), "step one", self.team, f"user_{i}", "2021-05-01 00:00:00") for i in range(1, step_two): - create_event(uuid.uuid4(), "step two", self._team, f"user_{i}", "2021-05-03 00:00:00") + create_event(uuid.uuid4(), "step two", self.team, f"user_{i}", "2021-05-03 00:00:00") for i in range(1, step_three): - create_event(uuid.uuid4(), "step three", self._team, f"user_{i}", "2021-05-05 00:00:00") + create_event(uuid.uuid4(), "step three", self.team, f"user_{i}", "2021-05-05 00:00:00") for i in range(1, step_four): - create_event(uuid.uuid4(), "step four", self._team, f"user_{i}", "2021-05-07 00:00:00") + create_event(uuid.uuid4(), "step four", self.team, f"user_{i}", "2021-05-07 00:00:00") for i in range(1, step_five): - create_event(uuid.uuid4(), "step five", self._team, f"user_{i}", "2021-05-09 00:00:00") + create_event(uuid.uuid4(), "step five", self.team, f"user_{i}", "2021-05-09 00:00:00") diff --git a/ee/clickhouse/views/person.py b/ee/clickhouse/views/person.py index ccc03aa32b1d0..179f3ae9b40eb 100644 --- a/ee/clickhouse/views/person.py +++ b/ee/clickhouse/views/person.py @@ -9,6 +9,7 @@ from ee.clickhouse.queries.funnels.funnel_trends_persons import ClickhouseFunnelTrendsPersons from ee.clickhouse.queries.trends.lifecycle import ClickhouseLifecycle from posthog.api.person import PersonViewSet +from posthog.api.utils import format_next_absolute_url, format_next_url from posthog.models import Event, Filter, Person @@ -27,7 +28,9 @@ def funnel(self, request: request.Request, **kwargs) -> response.Response: filter = Filter(request=request) team = request.user.team results = ClickhouseFunnelPersons(filter, team).run() - return response.Response(data=results) + + next_url = format_next_absolute_url(request, filter.offset, 100) if len(results) > 99 else None + return response.Response(data={"results": results, "next": next_url}) @action(methods=["GET"], detail=False) def funnel_trends(self, request: request.Request, **kwargs) -> response.Response: @@ -37,7 +40,9 @@ def funnel_trends(self, request: request.Request, **kwargs) -> response.Response filter = Filter(request=request) team = request.user.team results = ClickhouseFunnelTrendsPersons(filter, team).run() - return response.Response(data=results) + + next_url = format_next_absolute_url(request, filter.offset, 100) if len(results) > 99 else None + return response.Response(data={"results": results, "next": next_url}) def destroy(self, request: request.Request, pk=None, **kwargs): # type: ignore try: diff --git a/ee/clickhouse/views/test/test_clickhouse_funnel_person.py b/ee/clickhouse/views/test/test_clickhouse_funnel_person.py index 19721aed6d7c7..492b45735a6e7 100644 --- a/ee/clickhouse/views/test/test_clickhouse_funnel_person.py +++ b/ee/clickhouse/views/test/test_clickhouse_funnel_person.py @@ -1,6 +1,4 @@ import json -import urllib.parse -from urllib.parse import urlencode from rest_framework import status @@ -13,7 +11,7 @@ class TestFunnelPerson(ClickhouseTestMixin, APIBaseTest): def setUp(self): super().setUp() - GenerateLocal().generate() + GenerateLocal(self.team).generate() def test_basic_pagination(self): request_data = { @@ -34,12 +32,19 @@ def test_basic_pagination(self): response = self.client.get("/api/person/funnel/", data=request_data) self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(100, len(response.json())) + j = response.json() + next = j["next"] + self.assertEqual(100, len(j["results"])) - response = self.client.get("/api/person/funnel/", data={**request_data, "offset": 100}) + response = self.client.get(next) self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(100, len(response.json())) + j = response.json() + next = j["next"] + self.assertEqual(100, len(j["results"])) + self.assertNotEqual(None, next) - response = self.client.get("/api/person/funnel/", data={**request_data, "offset": 200}) + response = self.client.get(next) self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(50, len(response.json())) + j = response.json() + self.assertEqual(50, len(j["results"])) + self.assertEqual(None, j["next"]) diff --git a/posthog/api/utils.py b/posthog/api/utils.py index fabecdd264337..78cbff2140e88 100644 --- a/posthog/api/utils.py +++ b/posthog/api/utils.py @@ -22,9 +22,11 @@ def format_next_url(request: request.Request, offset: int, page_size: int): if not next_url: return None + new_offset = str(offset + page_size) + if "offset" in next_url: next_url = next_url[1:] - next_url = next_url.replace("offset=" + str(offset), "offset=" + str(offset + page_size)) + next_url = next_url.replace(f"offset={str(offset)}", f"offset={new_offset}") else: next_url = request.build_absolute_uri( "{}{}offset={}".format(next_url, "&" if "?" in next_url else "?", offset + page_size) @@ -32,6 +34,22 @@ def format_next_url(request: request.Request, offset: int, page_size: int): return next_url +def format_next_absolute_url(request: request.Request, offset: int, page_size: int): + next_url = request.get_raw_uri() + + if not next_url: + return None + + new_offset = str(offset + page_size) + + if "offset" in next_url: + next_url = next_url.replace(f"offset={str(offset)}", f"offset={new_offset}") + else: + next_url = next_url + ("&" if "?" in next_url else "?") + f"offset={new_offset}" + + return next_url + + def get_token(data, request) -> Tuple[Optional[str], bool]: token = None if request.method == "GET":