-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
APIv2: visit:country_name, visit:region_name, visit:city_name dimensi…
…ons (#4328) * Add data migration for creating and syncing location_data table and dictionary * Migration to populate location data * Daily cron to refresh location dataset if changed * Add support for visit:country_name, visit:region_name and visit:city_name dimensions Under the hood this relies on a `location_data` table in clickhouse being regularly synced with plausible/location repo and dictionary lookups used in ALIAS columns * Update queue name * Update documentation * Explicit structs * Improve docs further * Migration comment * Add queues * Add error when already loaded * Test for filtering by new dimensions * Update deps * dimension -> select_dimension * Update a test
- Loading branch information
Showing
21 changed files
with
376 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
defmodule Plausible.ClickhouseLocationData do | ||
@moduledoc """ | ||
Schema for storing location id <-> translation mappings in ClickHouse | ||
Indirectly read via dictionary `location_data_dictionary` in ALIAS columns in | ||
`events_v2`, `sessions_v2` and `imported_locations` table. | ||
""" | ||
use Ecto.Schema | ||
|
||
@primary_key false | ||
schema "location_data" do | ||
field :type, Ch, type: "LowCardinality(String)" | ||
field :id, :string | ||
field :name, :string | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
defmodule Plausible.DataMigration.LocationsSync do | ||
@moduledoc """ | ||
ClickHouse locations data migration for storing location names in ClickHouse. | ||
Only run when `Location.version()` changes: either as a migration or in cron. | ||
The migration: | ||
1. Truncates existing `location_data` table (if exists) | ||
2. Creates new table (if needed) | ||
3. Inserts new data from Location module | ||
4. (Re-)Creates dictionary to read location data from table | ||
5. Creates ALIAS columns in `events_v2`, `sessions_v2` and `imported_locations` table to make reading location names easy | ||
6. Updates table comment for `location_data` to indicate last version synced. | ||
Note that the dictionary is large enough to cache the whole dataset in memory, making lookups fast. | ||
This migration is intended to be idempotent and rerunnable - if run multiple times, it should always set things to the same | ||
result as if run once. | ||
SQL files available at: priv/data_migrations/LocationsSync/sql | ||
""" | ||
alias Plausible.ClickhouseLocationData | ||
|
||
use Plausible.DataMigration, dir: "LocationsSync", repo: Plausible.IngestRepo | ||
|
||
@columns [ | ||
%{ | ||
table: "events_v2", | ||
column_name: "country_name", | ||
type: "country", | ||
input_column: "country_code" | ||
}, | ||
%{ | ||
table: "events_v2", | ||
column_name: "region_name", | ||
type: "subdivision", | ||
input_column: "subdivision1_code" | ||
}, | ||
%{ | ||
table: "events_v2", | ||
column_name: "city_name", | ||
type: "city", | ||
input_column: "city_geoname_id" | ||
}, | ||
%{ | ||
table: "sessions_v2", | ||
column_name: "country_name", | ||
type: "country", | ||
input_column: "country_code" | ||
}, | ||
%{ | ||
table: "sessions_v2", | ||
column_name: "region_name", | ||
type: "subdivision", | ||
input_column: "subdivision1_code" | ||
}, | ||
%{ | ||
table: "sessions_v2", | ||
column_name: "city_name", | ||
type: "city", | ||
input_column: "city_geoname_id" | ||
}, | ||
%{ | ||
table: "imported_locations", | ||
column_name: "country_name", | ||
type: "country", | ||
input_column: "country" | ||
}, | ||
%{ | ||
table: "imported_locations", | ||
column_name: "region_name", | ||
type: "subdivision", | ||
input_column: "region" | ||
}, | ||
%{ | ||
table: "imported_locations", | ||
column_name: "city_name", | ||
type: "city", | ||
input_column: "city" | ||
} | ||
] | ||
|
||
def out_of_date?() do | ||
case run_sql("get-location-data-table-comment") do | ||
{:ok, %{rows: [[stored_version]]}} -> stored_version != Location.version() | ||
_ -> true | ||
end | ||
end | ||
|
||
def run() do | ||
cluster? = Plausible.MigrationUtils.clustered_table?("sessions_v2") | ||
|
||
{:ok, _} = run_sql("truncate-location-data-table", cluster?: cluster?) | ||
{:ok, _} = run_sql("create-location-data-table", cluster?: cluster?) | ||
|
||
countries = | ||
Location.Country.all() | ||
|> Enum.map(fn %Location.Country{alpha_2: alpha_2, name: name} -> | ||
%{type: "country", id: alpha_2, name: name} | ||
end) | ||
|
||
subdivisions = | ||
Location.Subdivision.all() | ||
|> Enum.map(fn %Location.Subdivision{code: code, name: name} -> | ||
%{type: "subdivision", id: code, name: name} | ||
end) | ||
|
||
cities = | ||
Location.City.all() | ||
|> Enum.map(fn %Location.City{id: id, name: name} -> | ||
%{type: "city", id: Integer.to_string(id), name: name} | ||
end) | ||
|
||
insert_data = Enum.concat([countries, subdivisions, cities]) | ||
@repo.insert_all(ClickhouseLocationData, insert_data) | ||
|
||
{:ok, _} = | ||
run_sql("update-location-data-dictionary", | ||
cluster?: cluster?, | ||
dictionary_connection_params: Plausible.MigrationUtils.dictionary_connection_params() | ||
) | ||
|
||
for column <- @columns do | ||
{:ok, _} = | ||
run_sql("add-alias-column", | ||
cluster?: cluster?, | ||
table: column.table, | ||
column_name: column.column_name, | ||
type: column.type, | ||
input_column: column.input_column | ||
) | ||
end | ||
|
||
{:ok, _} = | ||
run_sql("update-location-data-table-comment", | ||
cluster?: cluster?, | ||
version: Location.version() | ||
) | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
defmodule Plausible.Workers.LocationsSync do | ||
@moduledoc false | ||
|
||
use Plausible.Repo | ||
use Oban.Worker, queue: :locations_sync | ||
|
||
@impl Oban.Worker | ||
def perform(_job) do | ||
if Plausible.DataMigration.LocationsSync.out_of_date?() do | ||
Plausible.DataMigration.LocationsSync.run() | ||
end | ||
|
||
:ok | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5 changes: 5 additions & 0 deletions
5
priv/data_migrations/LocationsSync/sql/add-alias-column.sql.eex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
ALTER TABLE <%= @table %> | ||
<%= if @cluster? do %>ON CLUSTER '{cluster}'<% end %> | ||
ADD COLUMN IF NOT EXISTS | ||
<%= @column_name %> String | ||
ALIAS dictGet('location_data_dict', 'name', tuple('<%= @type %>', <%= @input_column %>)) |
13 changes: 13 additions & 0 deletions
13
priv/data_migrations/LocationsSync/sql/create-location-data-table.sql.eex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
CREATE TABLE IF NOT EXISTS location_data <%= if @cluster? do %>ON CLUSTER '{cluster}'<% end %> | ||
( | ||
`type` LowCardinality(String), | ||
`id` String, | ||
`name` String | ||
) | ||
<%= if @cluster? do %> | ||
ENGINE = ReplicateMergeTree('/clickhouse/{cluster}/tables/{shard}/plausible_prod/location_data', '{replica}') | ||
<% else %> | ||
ENGINE = MergeTree() | ||
<% end %> | ||
ORDER BY (type, id) | ||
SETTINGS index_granularity = 128 |
1 change: 1 addition & 0 deletions
1
priv/data_migrations/LocationsSync/sql/get-location-data-table-comment.sql.eex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
select comment from system.tables where database = currentDatabase() and table = 'location_data' |
1 change: 1 addition & 0 deletions
1
priv/data_migrations/LocationsSync/sql/truncate-location-data-table.sql.eex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
TRUNCATE TABLE IF EXISTS location_data <%= if @cluster? do %>ON CLUSTER '{cluster}'<% end %> |
11 changes: 11 additions & 0 deletions
11
priv/data_migrations/LocationsSync/sql/update-location-data-dictionary.sql.eex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
CREATE OR REPLACE DICTIONARY location_data_dict | ||
<%= if @cluster? do %>ON CLUSTER '{cluster}'<% end %> | ||
( | ||
`type` String, | ||
`id` String, | ||
`name` String | ||
) | ||
PRIMARY KEY type, id | ||
SOURCE(CLICKHOUSE(TABLE location_data <%= @dictionary_connection_params %>)) | ||
LIFETIME(0) | ||
LAYOUT(complex_key_cache(size_in_cells 500000)) |
3 changes: 3 additions & 0 deletions
3
priv/data_migrations/LocationsSync/sql/update-location-data-table-comment.sql.eex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
ALTER TABLE location_data | ||
<%= if @cluster? do %>ON CLUSTER '{cluster}'<% end %> | ||
MODIFY COMMENT '<%= @version %>' |
18 changes: 18 additions & 0 deletions
18
priv/ingest_repo/migrations/20240709181437_populate_location_data.exs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
defmodule Plausible.IngestRepo.Migrations.PopulateLocationData do | ||
use Ecto.Migration | ||
|
||
def up do | ||
try do | ||
Location.load_all() | ||
rescue | ||
# Already loaded | ||
ArgumentError -> nil | ||
end | ||
|
||
Plausible.DataMigration.LocationsSync.run() | ||
end | ||
|
||
def down do | ||
raise "Irreversible" | ||
end | ||
end |
Oops, something went wrong.