Skip to content

Commit

Permalink
82 sqlserver query execution (#157)
Browse files Browse the repository at this point in the history
* getting started with sqlserver query execution

* remove stmt.compile

* Extends SQLConnector and alters generate_query such that sql server query generation works

* gets tests passing

* increase wait time for integration containers

* Refactors SQLServer query logic into separate class, adds test

* remove unneeded code

* refactor SQLServer to override format_clause_for_query

* formatting

* rename field_name to string_path

* lint

* more formatting

* removing debug params, small lint

* no need to test for log preservation for mssql specifically

* delete privacy request results created in test
  • Loading branch information
eastandwestwind authored Jan 21, 2022
1 parent aef287b commit 6ef11d2
Show file tree
Hide file tree
Showing 11 changed files with 547 additions and 66 deletions.
17 changes: 15 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ server-shell: compose-build
@docker-compose run $(IMAGE_NAME) /bin/bash

integration-shell: compose-build
# note- does not bring up external connectors such as redshift or snowflake
@echo "Bringing up main image and images for integration testing"
@docker-compose -f docker-compose.yml -f docker-compose.integration-test.yml up -d
@echo "Waiting 15s for integration containers to be ready..."
@sleep 15
@echo "Running additional setup for mssql integration tests"
@docker exec -it fidesops python tests/integration_tests/mssql_setup.py
@docker-compose -f docker-compose.yml -f docker-compose.integration-test.yml run $(IMAGE_NAME) /bin/bash

integration-env: compose-build
Expand Down Expand Up @@ -113,8 +120,8 @@ pytest-integration-access: compose-build
@docker-compose -f docker-compose.yml -f docker-compose.integration-test.yml build
@echo "Bringing up the integration environment..."
@docker-compose -f docker-compose.yml -f docker-compose.integration-test.yml up -d
@echo "Waiting 15s for integration containers to be ready..."
@sleep 15
@echo "Waiting 20s for integration containers to be ready..."
@sleep 20
@echo "Running additional setup for mssql integration tests"
@docker exec fidesops python tests/integration_tests/mssql_setup.py
@echo "Running pytest integration tests..."
Expand All @@ -126,6 +133,12 @@ pytest-integration-access: compose-build
pytest-integration-erasure: compose-build
@echo "Building additional Docker images for integration tests..."
@docker-compose -f docker-compose.yml -f docker-compose.integration-test.yml build
@echo "Bringing up the integration environment..."
@docker-compose -f docker-compose.yml -f docker-compose.integration-test.yml up -d
@echo "Waiting 20s for integration containers to be ready..."
@sleep 20
@echo "Running additional setup for mssql integration tests"
@docker exec fidesops python tests/integration_tests/mssql_setup.py
@echo "Running pytest integration tests..."
@docker-compose -f docker-compose.yml -f docker-compose.integration-test.yml \
run $(IMAGE_NAME) \
Expand Down
210 changes: 210 additions & 0 deletions data/dataset/mssql_example_test_dataset.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
dataset:
- fides_key: mssql_example_test_dataset
name: Microsoft SQLServer Example Test Dataset
description: Example of a Microsoft SQLServer dataset containing a variety of related tables like customers, products, addresses, etc.
collections:
- name: address
fields:
- name: city
data_categories: [user.provided.identifiable.contact.city]
- name: house
data_categories: [user.provided.identifiable.contact.street]
- name: id
data_categories: [system.operations]
fidesops_meta:
primary_key: True
- name: state
data_categories: [user.provided.identifiable.contact.state]
- name: street
data_categories: [user.provided.identifiable.contact.street]
- name: zip
data_categories: [user.provided.identifiable.contact.postal_code]

- name: customer
fields:
- name: address_id
data_categories: [system.operations]
fidesops_meta:
references:
- dataset: mssql_example_test_dataset
field: address.id
direction: to
- name: created
data_categories: [system.operations]
- name: email
data_categories: [user.provided.identifiable.contact.email]
fidesops_meta:
identity: email
data_type: string
- name: id
data_categories: [user.derived.identifiable.unique_id]
fidesops_meta:
primary_key: True
- name: name
data_categories: [user.provided.identifiable.name]

- name: employee
fields:
- name: address_id
data_categories: [system.operations]
fidesops_meta:
references:
- dataset: mssql_example_test_dataset
field: address.id
direction: to
- name: email
data_categories: [user.provided.identifiable.contact.email]
fidesops_meta:
identity: email
data_type: string
- name: id
data_categories: [user.derived.identifiable.unique_id]
fidesops_meta:
primary_key: True
- name: name
data_categories: [user.provided.identifiable.name]

- name: login
fields:
- name: customer_id
data_categories: [user.derived.identifiable.unique_id]
fidesops_meta:
references:
- dataset: mssql_example_test_dataset
field: customer.id
direction: from
- name: id
data_categories: [system.operations]
- name: time
data_categories: [user.derived.nonidentifiable.sensor]

- name: orders
fields:
- name: customer_id
data_categories: [user.derived.identifiable.unique_id]
fidesops_meta:
references:
- dataset: mssql_example_test_dataset
field: customer.id
direction: from
- name: id
data_categories: [system.operations]
fidesops_meta:
primary_key: True
- name: shipping_address_id
data_categories: [system.operations]
fidesops_meta:
references:
- dataset: mssql_example_test_dataset
field: address.id
direction: to

# order_item
- name: order_item
fields:
- name: order_id
data_categories: [system.operations]
fidesops_meta:
references:
- dataset: mssql_example_test_dataset
field: orders.id
direction: from
- name: product_id
data_categories: [system.operations]
fidesops_meta:
references:
- dataset: mssql_example_test_dataset
field: product.id
direction: to
- name: quantity
data_categories: [system.operations]

- name: payment_card
fields:
- name: billing_address_id
data_categories: [system.operations]
fidesops_meta:
references:
- dataset: mssql_example_test_dataset
field: address.id
direction: to
- name: ccn
data_categories: [user.provided.identifiable.financial.account_number]
- name: code
data_categories: [user.provided.identifiable.financial]
- name: customer_id
data_categories: [user.derived.identifiable.unique_id]
fidesops_meta:
references:
- dataset: mssql_example_test_dataset
field: customer.id
direction: from
- name: id
data_categories: [system.operations]
- name: name
data_categories: [user.provided.identifiable.financial]
- name: preferred
data_categories: [user.provided.nonidentifiable]

- name: product
fields:
- name: id
data_categories: [system.operations]
- name: name
data_categories: [system.operations]
- name: price
data_categories: [system.operations]

- name: report
fields:
- name: email
data_categories: [user.provided.identifiable.contact.email]
fidesops_meta:
identity: email
data_type: string
- name: id
data_categories: [system.operations]
- name: month
data_categories: [system.operations]
- name: name
data_categories: [system.operations]
- name: total_visits
data_categories: [system.operations]
- name: year
data_categories: [system.operations]

- name: service_request
fields:
- name: alt_email
data_categories: [user.provided.identifiable.contact.email]
fidesops_meta:
identity: email
data_type: string
- name: closed
data_categories: [system.operations]
- name: email
data_categories: [system.operations]
fidesops_meta:
identity: email
data_type: string
- name: employee_id
data_categories: [user.derived.identifiable.unique_id]
fidesops_meta:
references:
- dataset: mssql_example_test_dataset
field: employee.id
direction: from
- name: id
data_categories: [system.operations]
- name: opened
data_categories: [system.operations]

- name: visit
fields:
- name: email
data_categories: [user.provided.identifiable.contact.email]
fidesops_meta:
identity: email
data_type: string
- name: last_visit
data_categories: [system.operations]
83 changes: 39 additions & 44 deletions data/sql/mssql_example.sql
Original file line number Diff line number Diff line change
@@ -1,84 +1,79 @@
USE master;

-- CREATE USER IF NOT EXISTS 'sa'@'mssql_example' IDENTIFIED BY 'Mssql_pw1';
-- GRANT ALL PRIVILEGES ON *.* TO 'sa'@'mssql_example' ;
-- GRANT ALL PRIVILEGES ON *.* TO 'sa'@'%' ;
-- FLUSH PRIVILEGES;

DROP DATABASE IF EXISTS mssql_example;
CREATE DATABASE mssql_example;
USE mssql_example;

DROP TABLE IF EXISTS report;
DROP TABLE IF EXISTS service_request;
DROP TABLE IF EXISTS login;
DROP TABLE IF EXISTS visit;
DROP TABLE IF EXISTS order_item;
DROP TABLE IF EXISTS orders;
DROP TABLE IF EXISTS payment_card;
DROP TABLE IF EXISTS employee;
DROP TABLE IF EXISTS customer;
DROP TABLE IF EXISTS address;
DROP TABLE IF EXISTS product;
DROP TABLE IF EXISTS composite_pk_test;
DROP TABLE IF EXISTS type_link_test;
DROP TABLE IF EXISTS dbo.report;
DROP TABLE IF EXISTS dbo.service_request;
DROP TABLE IF EXISTS dbo.login;
DROP TABLE IF EXISTS dbo.visit;
DROP TABLE IF EXISTS dbo.order_item;
DROP TABLE IF EXISTS dbo.orders;
DROP TABLE IF EXISTS dbo.payment_card;
DROP TABLE IF EXISTS dbo.employee;
DROP TABLE IF EXISTS dbo.customer;
DROP TABLE IF EXISTS dbo.address;
DROP TABLE IF EXISTS dbo.product;
DROP TABLE IF EXISTS dbo.composite_pk_test;
DROP TABLE IF EXISTS dbo.type_link_test;


CREATE TABLE product ( id INT PRIMARY KEY, name CHARACTER VARYING(100), price MONEY);
CREATE TABLE dbo.product ( id INT PRIMARY KEY, name CHARACTER VARYING(100), price MONEY);

CREATE TABLE address ( id BIGINT PRIMARY KEY, house INT, street CHARACTER VARYING(100), city CHARACTER VARYING(100), state CHARACTER VARYING(100), zip CHARACTER VARYING(100));
CREATE TABLE dbo.address ( id BIGINT PRIMARY KEY, house INT, street CHARACTER VARYING(100), city CHARACTER VARYING(100), state CHARACTER VARYING(100), zip CHARACTER VARYING(100));

CREATE TABLE customer ( id INT PRIMARY KEY, email CHARACTER VARYING(100), name CHARACTER VARYING(100), created DATETIME, address_id BIGINT);
CREATE TABLE dbo.customer ( id INT PRIMARY KEY, email CHARACTER VARYING(100), name CHARACTER VARYING(100), created DATETIME, address_id BIGINT);

CREATE TABLE employee ( id INT PRIMARY KEY, email CHARACTER VARYING(100), name CHARACTER VARYING(100), address_id BIGINT);
CREATE TABLE dbo.employee ( id INT PRIMARY KEY, email CHARACTER VARYING(100), name CHARACTER VARYING(100), address_id BIGINT);

CREATE TABLE payment_card ( id CHARACTER VARYING(100) PRIMARY KEY, name CHARACTER VARYING(100), ccn BIGINT, code SMALLINT, preferred BIT, customer_id INT, billing_address_id BIGINT);
CREATE TABLE dbo.payment_card ( id CHARACTER VARYING(100) PRIMARY KEY, name CHARACTER VARYING(100), ccn BIGINT, code SMALLINT, preferred BIT, customer_id INT, billing_address_id BIGINT);

CREATE TABLE orders ( id CHARACTER VARYING(100) PRIMARY KEY, customer_id INT, shipping_address_id BIGINT, payment_card_id CHARACTER VARYING(100));
CREATE TABLE dbo.orders ( id CHARACTER VARYING(100) PRIMARY KEY, customer_id INT, shipping_address_id BIGINT, payment_card_id CHARACTER VARYING(100));

CREATE TABLE order_item ( order_id CHARACTER VARYING(100), item_no SMALLINT, product_id INT, quantity SMALLINT, CONSTRAINT order_item_pk PRIMARY KEY (order_id, item_no));
CREATE TABLE dbo.order_item ( order_id CHARACTER VARYING(100), item_no SMALLINT, product_id INT, quantity SMALLINT, CONSTRAINT order_item_pk PRIMARY KEY (order_id, item_no));

CREATE TABLE visit ( email CHARACTER VARYING(100), last_visit DATETIME, CONSTRAINT visit_pk PRIMARY KEY (email, last_visit));
CREATE TABLE dbo.visit ( email CHARACTER VARYING(100), last_visit DATETIME, CONSTRAINT visit_pk PRIMARY KEY (email, last_visit));

CREATE TABLE login ( id INT PRIMARY KEY, customer_id INT, time DATETIME);
CREATE TABLE dbo.login ( id INT PRIMARY KEY, customer_id INT, time DATETIME);

CREATE TABLE service_request ( id CHARACTER VARYING(100) PRIMARY KEY, email CHARACTER VARYING(100), alt_email CHARACTER VARYING(100), opened DATE, closed DATE, employee_id INT);
CREATE TABLE dbo.service_request ( id CHARACTER VARYING(100) PRIMARY KEY, email CHARACTER VARYING(100), alt_email CHARACTER VARYING(100), opened DATE, closed DATE, employee_id INT);

CREATE TABLE report ( id INT PRIMARY KEY, email CHARACTER VARYING(100), name CHARACTER VARYING(100), year INT, month INT, total_visits INT);
CREATE TABLE dbo.report ( id INT PRIMARY KEY, email CHARACTER VARYING(100), name CHARACTER VARYING(100), year INT, month INT, total_visits INT);

CREATE TABLE composite_pk_test ( id_a INT NOT NULL, id_b INT NOT NULL, description VARCHAR(100), customer_id INT, PRIMARY KEY(id_a, id_b));
CREATE TABLE dbo.composite_pk_test ( id_a INT NOT NULL, id_b INT NOT NULL, description VARCHAR(100), customer_id INT, PRIMARY KEY(id_a, id_b));

INSERT INTO composite_pk_test VALUES (1,10,'linked to customer 1',1), (1,11,'linked to customer 2',2), (2,10,'linked to customer 3',3);
INSERT INTO dbo.composite_pk_test VALUES (1,10,'linked to customer 1',1), (1,11,'linked to customer 2',2), (2,10,'linked to customer 3',3);

CREATE TABLE type_link_test ( id CHARACTER VARYING(100) PRIMARY KEY, name CHARACTER VARYING(100));
CREATE TABLE dbo.type_link_test ( id CHARACTER VARYING(100) PRIMARY KEY, name CHARACTER VARYING(100));

-- Populate tables with some public data
INSERT INTO product VALUES (1, 'Example Product 1', '$10.00'), (2, 'Example Product 2', '$20.00'), (3, 'Example Product 3', '$50.00');
INSERT INTO dbo.product VALUES (1, 'Example Product 1', '$10.00'), (2, 'Example Product 2', '$20.00'), (3, 'Example Product 3', '$50.00');

INSERT INTO address VALUES (1, '123', 'Example Street', 'Exampletown', 'NY', '12345'), (2, '4', 'Example Lane', 'Exampletown', 'NY', '12321'), (3, '555', 'Example Ave', 'Example City', 'NY', '12000'), (4, '1111', 'Example Place', 'Example Mountain', 'TX', '54321');
INSERT INTO dbo.address VALUES (1, '123', 'Example Street', 'Exampletown', 'NY', '12345'), (2, '4', 'Example Lane', 'Exampletown', 'NY', '12321'), (3, '555', 'Example Ave', 'Example City', 'NY', '12000'), (4, '1111', 'Example Place', 'Example Mountain', 'TX', '54321');


INSERT INTO customer VALUES (1, 'customer-1@example.com', 'John Customer', '2020-04-01 11:47:42', 1), (2, 'customer-2@example.com', 'Jill Customer', '2020-04-01 11:47:42', 2), (3, 'jane@example.com', 'Jane Customer', '2020-04-01 11:47:42', 4);
INSERT INTO dbo.customer VALUES (1, 'customer-1@example.com', 'John Customer', '2020-04-01 11:47:42', 1), (2, 'customer-2@example.com', 'Jill Customer', '2020-04-01 11:47:42', 2), (3, 'jane@example.com', 'Jane Customer', '2020-04-01 11:47:42', 4);


INSERT INTO employee VALUES (1, 'employee-1@example.com', 'Jack Employee', 3), (2, 'employee-2@example.com', 'Jane Employee', 3);
INSERT INTO dbo.employee VALUES (1, 'employee-1@example.com', 'Jack Employee', 3), (2, 'employee-2@example.com', 'Jane Employee', 3);

INSERT INTO payment_card VALUES ('pay_aaa-aaa', 'Example Card 1', 123456789, 321, 1, 1, 1), ('pay_bbb-bbb', 'Example Card 2', 987654321, 123, 0, 2, 1), ('pay_ccc-ccc', 'Example Card 3', 373719391, 222, 0, 3, 4);
INSERT INTO dbo.payment_card VALUES ('pay_aaa-aaa', 'Example Card 1', 123456789, 321, 1, 1, 1), ('pay_bbb-bbb', 'Example Card 2', 987654321, 123, 0, 2, 1), ('pay_ccc-ccc', 'Example Card 3', 373719391, 222, 0, 3, 4);


INSERT INTO orders VALUES ('ord_aaa-aaa', 1, 2, 'pay_aaa-aaa'), ('ord_bbb-bbb', 2, 1, 'pay_bbb-bbb'), ('ord_ccc-ccc', 1, 1, 'pay_aaa-aaa'), ('ord_ddd-ddd', 1, 1, 'pay_bbb-bbb'), ('ord_ddd-eee', 3, 4, 'pay-ccc-ccc');
INSERT INTO dbo.orders VALUES ('ord_aaa-aaa', 1, 2, 'pay_aaa-aaa'), ('ord_bbb-bbb', 2, 1, 'pay_bbb-bbb'), ('ord_ccc-ccc', 1, 1, 'pay_aaa-aaa'), ('ord_ddd-ddd', 1, 1, 'pay_bbb-bbb'), ('ord_ddd-eee', 3, 4, 'pay-ccc-ccc');


INSERT INTO order_item VALUES ('ord_aaa-aaa', 1, 1, 1), ('ord_bbb-bbb', 1, 1, 1), ('ord_ccc-ccc', 1, 1, 1), ('ord_ccc-ccc', 2, 2, 1), ('ord_ddd-ddd', 1, 1, 1), ('ord_eee-eee', 3, 4, 3);
INSERT INTO dbo.order_item VALUES ('ord_aaa-aaa', 1, 1, 1), ('ord_bbb-bbb', 1, 1, 1), ('ord_ccc-ccc', 1, 1, 1), ('ord_ccc-ccc', 2, 2, 1), ('ord_ddd-ddd', 1, 1, 1), ('ord_eee-eee', 3, 4, 3);


INSERT INTO visit VALUES ('customer-1@example.com', '2021-01-06 01:00:00'), ('customer-2@example.com', '2021-01-06 01:00:00');
INSERT INTO dbo.visit VALUES ('customer-1@example.com', '2021-01-06 01:00:00'), ('customer-2@example.com', '2021-01-06 01:00:00');

INSERT INTO login VALUES (1, 1, '2021-01-01 01:00:00'), (2, 1, '2021-01-02 01:00:00'), (3, 1, '2021-01-03 01:00:00'), (4, 1, '2021-01-04 01:00:00'), (5, 1, '2021-01-05 01:00:00'), (6, 1, '2021-01-06 01:00:00'), (7, 2, '2021-01-06 01:00:00'), (8, 3, '2021-01-06 01:00:00');
INSERT INTO dbo.login VALUES (1, 1, '2021-01-01 01:00:00'), (2, 1, '2021-01-02 01:00:00'), (3, 1, '2021-01-03 01:00:00'), (4, 1, '2021-01-04 01:00:00'), (5, 1, '2021-01-05 01:00:00'), (6, 1, '2021-01-06 01:00:00'), (7, 2, '2021-01-06 01:00:00'), (8, 3, '2021-01-06 01:00:00');


INSERT INTO service_request VALUES ('ser_aaa-aaa', 'customer-1@example.com', 'customer-1-alt@example.com', '2021-01-01', '2021-01-03', 1), ('ser_bbb-bbb', 'customer-2@example.com', null, '2021-01-04', null, 1), ('ser_ccc-ccc', 'customer-3@example.com', null, '2021-01-05', '2020-01-07', 1), ('ser_ddd-ddd', 'customer-3@example.com', null, '2021-05-05', '2020-05-08', 2);
INSERT INTO dbo.service_request VALUES ('ser_aaa-aaa', 'customer-1@example.com', 'customer-1-alt@example.com', '2021-01-01', '2021-01-03', 1), ('ser_bbb-bbb', 'customer-2@example.com', null, '2021-01-04', null, 1), ('ser_ccc-ccc', 'customer-3@example.com', null, '2021-01-05', '2020-01-07', 1), ('ser_ddd-ddd', 'customer-3@example.com', null, '2021-05-05', '2020-05-08', 2);

INSERT INTO report VALUES (1, 'admin-account@example.com', 'Monthly Report', 2021, 8, 100), (2, 'admin-account@example.com', 'Monthly Report', 2021, 9, 100), (3, 'admin-account@example.com', 'Monthly Report', 2021, 10, 100), (4, 'admin-account@example.com', 'Monthly Report', 2021, 11, 100);
INSERT INTO dbo.report VALUES (1, 'admin-account@example.com', 'Monthly Report', 2021, 8, 100), (2, 'admin-account@example.com', 'Monthly Report', 2021, 9, 100), (3, 'admin-account@example.com', 'Monthly Report', 2021, 10, 100), (4, 'admin-account@example.com', 'Monthly Report', 2021, 11, 100);

INSERT INTO type_link_test VALUES ('1', 'name1'), ('2', 'name2');
INSERT INTO dbo.type_link_test VALUES ('1', 'name1'), ('2', 'name2');
Loading

0 comments on commit 6ef11d2

Please sign in to comment.