Skip to content

Commit

Permalink
1) Entities are now created as SQL statements only - this is reflecte…
Browse files Browse the repository at this point in the history
…d in the SQL files that are run with docker-entrypoint-initdb.d

2) Entities are created as views per default (so no config in the entity creation necessary)
3) Fixed Airflow worker stability
4) Reverted numpy version to prevent error in the installation of dot in the airflow worker
  • Loading branch information
JanPeterDatakind committed Apr 21, 2023
1 parent 3420d83 commit 2577ab9
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 41 deletions.
24 changes: 7 additions & 17 deletions db/dot/4-upload_sample_dot_data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,19 @@ INSERT INTO dot.entity_categories VALUES('ZAG', 'Zagreb airport flights');
INSERT INTO dot.entity_categories VALUES('ETH', 'Ethiopian Airlines');

-- configured entities - db views of the data we want to scan
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'all_flight_data', 'ALL', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select *
from {{ schema }}.flight_data ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');

INSERT INTO dot.configured_entities VALUES('ScanProject1', 'zagreb_flight_data', 'ZAG', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select *
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'all_flight_data', 'ALL', 'select *
from public.flight_data ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');

INSERT INTO dot.configured_entities VALUES('ScanProject1', 'zagreb_flight_data', 'ZAG', 'select *
from {{ schema }}.flight_data WHERE origin_airport=''Zagreb airport'' ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');

INSERT INTO dot.configured_entities VALUES('ScanProject1', 'ethiopia_airlines_data', 'ETH', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select *
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'ethiopia_airlines_data', 'ETH', 'select *
from {{ schema }}.flight_data WHERE airline=''Ethiopian Airlines'' ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');

INSERT INTO dot.configured_entities VALUES('ScanProject1', 'all_airports_data', 'ALL', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select *
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'all_airports_data', 'ALL', 'select *
from {{ schema }}.airport_data ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');

INSERT INTO dot.configured_entities VALUES('ScanProject1', 'airlines_data', 'ALL', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select DISTINCT airline
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'airlines_data', 'ALL', 'select DISTINCT(airline)
from {{ schema }}.flight_data ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');


Expand Down
35 changes: 18 additions & 17 deletions db/dot/5-db_connections.sql
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
CREATE SCHEMA IF NOT EXISTS dot_config;
--CREATE SCHEMA IF NOT EXISTS dot_config;

CREATE TABLE dot_config.dot(
output_schema_suffix VARCHAR(255) NOT NULL DEFAULT 'test'
);
--CREATE TABLE dot_config.dot(
--output_schema_suffix VARCHAR(255) NOT NULL DEFAULT 'test'
--);

INSERT INTO dot_config.dot(output_schema_suffix)
VALUES('tests');
--INSERT INTO dot_config.dot(output_schema_suffix)
--VALUES('tests');

CREATE TABLE dot_config.dot_db_config(
connection_name VARCHAR(255) PRIMARY KEY,
type VARCHAR(255),
CREATE TABLE airflow.connection(
id INTEGER PRIMARY KEY,
conn_id VARCHAR(255),
conn_type VARCHAR(255),
description VARCHAR(255),
host VARCHAR(255),
user_name VARCHAR(255),
schema VARCHAR(255),
login VARCHAR(255),
password VARCHAR(255),
port VARCHAR(255),
dbname VARCHAR(255),
schema VARCHAR(255),
threads VARCHAR(255)
is_encrypted BOOLEAN,
is_extra_encrypted BOOLEAN,
extra VARCHAR(255)
);

INSERT INTO dot_config.dot_db_config(connection_name,type,host,user_name,password,port,dbname,schema,threads)
VALUES('dot_db','postgres','dot_db','postgres','','5432','dot_db','dot','4');
INSERT INTO dot_config.dot_db_config(connection_name,type,host,user_name,password,port,dbname,schema,threads)
VALUES('ScanProject1','postgres','dot_db','postgres','','5432','dot_db','public','4');
INSERT INTO dot_config.dot_db_config(id,conn_id,conn_type,description,host,schema,login,password,port)
VALUES(1,'dot_db','postgres','dot_db','dot','postgres','','5432');
--ToDo:Hashthepasswordcolumn
5 changes: 2 additions & 3 deletions docker/docker-compose-with-airflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,10 @@ x-airflow-common:
environment:
&airflow-common-env
AIRFLOW__CORE__EXECUTOR: CeleryExecutor
AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://postgres:${POSTGRES_PASSWORD}@dot_db:5432/dot_db?options=-csearch_path=airflow
AIRFLOW__CELERY__RESULT_BACKEND: postgresql+psycopg2://postgres:${POSTGRES_PASSWORD}@dot_db:5432/dot_db?options=-csearch_path=airflow
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://postgres:${POSTGRES_PASSWORD}@dot_db:5432/dot_db?options=-csearch_path=airflow
AIRFLOW__DATABASE__RESULT_BACKEND: postgresql+psycopg2://postgres:${POSTGRES_PASSWORD}@dot_db:5432/dot_db?options=-csearch_path=airflow
AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0
AIRFLOW__DATABASE__SQL_ALCHEMY_SCHEMA: 'airflow'
AIRFLOW__CORE__SQL_ALCHEMY_SCHEMA: 'airflow'
AIRFLOW__CORE__FERNET_KEY: ''
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
AIRFLOW__CORE__ENABLE_XCOM_PICKLING: 'true'
Expand Down
2 changes: 1 addition & 1 deletion dot/requirements_dot.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ mock==4.0.3
nbformat==5.1.3
nest-asyncio==1.5.4
notebook==6.4.12
numpy==1.22.0
numpy==1.21.6
oauthlib==3.2.2
openpyxl==3.0.9
pandas==1.3.4
Expand Down
6 changes: 4 additions & 2 deletions dot/utils/configuration_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,11 @@ def generate_master_config_files(project_id, logger=logging.Logger):
"models:",
" dbt_model_1:",
" core:",
f" +schema: '{output_schema_suffix}'",
f" +schema: '{output_schema_suffix}'\n"
f" materialized: view",
" test:",
f" +schema: '{output_schema_suffix}'",
f" +schema: '{output_schema_suffix}'\n"
f" materialized: view",
]
)
with open(output_file, "a") as f:
Expand Down
2 changes: 1 addition & 1 deletion dot/utils/configuration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def load_config_from_db(project_id: str):
project_schema = row['project_schema']
except Exception as e:
raise Exception(f"Looks like the project_id '{project_id}' has not been set up. "
f"Please check the projects in appsmith or under the dot.projects table in the dot_db and try again.")
f"Please check the projects in the UI or under the dot.projects table in the dot_db and try again.")

project = project_id + "_db"
new_entry = {project: {'type': 'postgres', 'host': 'dot_db', 'user': 'postgres', 'pass': os.getenv("POSTGRES_PASSWORD"), 'port': 5432, 'dbname': 'dot_db', 'schema': project_schema, 'threads': 4}}
Expand Down

0 comments on commit 2577ab9

Please sign in to comment.