Skip to content
This repository has been archived by the owner on Nov 30, 2022. It is now read-only.

Commit

Permalink
163 yaml datasets (#284)
Browse files Browse the repository at this point in the history
* 163 Initial commit showing that yaml dataset can be saved.

* 163 Refactor the yaml dataset endpoint and added pytests.

* 163 Exclude yaml dataset endpoint from open api spec.

Since this endpoint needs to access the request, the open api spec will not be generated correctly. To include this, extend open api: https://fastapi.tiangolo.com/advanced/extending-openapi/

* 163 Formatted with Black.

* 163 Fixes for mypy.

* 163 Added happy path test for patching yaml dataset.

* 163 respond with 415 if the wrong content type is specified a patch to the yaml dataset endpoint.

* 163 Respond with 400 when the dataset yaml cannot be loaded.

* 163 Added example of a YAML PATCH request for dataset. Named: Create/Update Postgres Dataset YAML

* 163 Added example of a YAML PATCH request for dataset. Named: Create/Update Postgres Dataset YAML

* 163 Test patching multiple datasets at a time.

* 163 Return specific yaml error in http response.

* 163 Ran make black.

* 163 Trying to get linter to pass.

Co-authored-by: Nelson Schoenbrot <nelson@ethyca.com>
  • Loading branch information
nschoenbrot and Nelson Schoenbrot authored May 18, 2022
1 parent f0f7476 commit 889a950
Show file tree
Hide file tree
Showing 7 changed files with 973 additions and 24 deletions.
225 changes: 225 additions & 0 deletions data/dataset/example_test_dataset.invalid
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
dataset:
* fides_key: bigquery_example_test_dataset
name: BigQuery Example Test Dataset
description: Example of a BigQuery dataset containing a variety of related tables like customers, products, addresses, etc.
collections:
* name: address
fields:
* name: city
data_categories: [user.provided.identifiable.contact.city]
* name: house
data_categories: [user.provided.identifiable.contact.street]
* name: id
data_categories: [system.operations]
fidesops_meta:
primary_key: True
* name: state
data_categories: [user.provided.identifiable.contact.state]
* name: street
data_categories: [user.provided.identifiable.contact.street]
* name: zip
data_categories: [user.provided.identifiable.contact.postal_code]

* name: customer
fields:
* name: address_id
data_categories: [system.operations]
fidesops_meta:
references:
* dataset: bigquery_example_test_dataset
field: address.id
direction: to
* name: created
data_categories: [system.operations]
* name: email
data_categories: [user.provided.identifiable.contact.email]
fidesops_meta:
identity: email
data_type: string
* name: id
data_categories: [user.derived.identifiable.unique_id]
fidesops_meta:
primary_key: True
* name: name
data_categories: [user.provided.identifiable.name]
fidesops_meta:
data_type: string
length: 40

* name: employee
fields:
* name: address_id
data_categories: [system.operations]
fidesops_meta:
references:
* dataset: bigquery_example_test_dataset
field: address.id
direction: to
* name: email
data_categories: [user.provided.identifiable.contact.email]
fidesops_meta:
identity: email
data_type: string
* name: id
data_categories: [user.derived.identifiable.unique_id]
fidesops_meta:
primary_key: True
* name: name
data_categories: [user.provided.identifiable.name]
fidesops_meta:
data_type: string

* name: login
fields:
* name: customer_id
data_categories: [user.derived.identifiable.unique_id]
fidesops_meta:
references:
* dataset: bigquery_example_test_dataset
field: customer.id
direction: from
* name: id
data_categories: [system.operations]
fidesops_meta:
primary_key: True
* name: time
data_categories: [user.derived.nonidentifiable.sensor]

* name: orders
fields:
* name: customer_id
data_categories: [user.derived.identifiable.unique_id]
fidesops_meta:
references:
* dataset: bigquery_example_test_dataset
field: customer.id
direction: from
* name: id
data_categories: [system.operations]
fidesops_meta:
primary_key: True
* name: shipping_address_id
data_categories: [system.operations]
fidesops_meta:
references:
* dataset: bigquery_example_test_dataset
field: address.id
direction: to

# order_item
* name: order_item
fields:
* name: order_id
data_categories: [system.operations]
fidesops_meta:
references:
* dataset: bigquery_example_test_dataset
field: orders.id
direction: from
* name: product_id
data_categories: [system.operations]
fidesops_meta:
references:
* dataset: bigquery_example_test_dataset
field: product.id
direction: to
* name: quantity
data_categories: [system.operations]

* name: payment_card
fields:
* name: billing_address_id
data_categories: [system.operations]
fidesops_meta:
references:
* dataset: bigquery_example_test_dataset
field: address.id
direction: to
* name: ccn
data_categories: [user.provided.identifiable.financial.account_number]
* name: code
data_categories: [user.provided.identifiable.financial]
* name: customer_id
data_categories: [user.derived.identifiable.unique_id]
fidesops_meta:
references:
* dataset: bigquery_example_test_dataset
field: customer.id
direction: from
* name: id
data_categories: [system.operations]
fidesops_meta:
primary_key: True
* name: name
data_categories: [user.provided.identifiable.financial]
* name: preferred
data_categories: [user.provided.nonidentifiable]

* name: product
fields:
* name: id
data_categories: [system.operations]
fidesops_meta:
primary_key: True
* name: name
data_categories: [system.operations]
* name: price
data_categories: [system.operations]

* name: report
fields:
* name: email
data_categories: [user.provided.identifiable.contact.email]
fidesops_meta:
identity: email
data_type: string
* name: id
data_categories: [system.operations]
fidesops_meta:
primary_key: True
* name: month
data_categories: [system.operations]
* name: name
data_categories: [system.operations]
* name: total_visits
data_categories: [system.operations]
* name: year
data_categories: [system.operations]

* name: service_request
fields:
* name: alt_email
data_categories: [user.provided.identifiable.contact.email]
fidesops_meta:
identity: email
data_type: string
* name: closed
data_categories: [system.operations]
* name: email
data_categories: [system.operations]
fidesops_meta:
identity: email
data_type: string
* name: employee_id
data_categories: [user.derived.identifiable.unique_id]
fidesops_meta:
references:
* dataset: bigquery_example_test_dataset
field: employee.id
direction: from
* name: id
data_categories: [system.operations]
fidesops_meta:
primary_key: True
* name: opened
data_categories: [system.operations]

* name: visit
fields:
* name: email
data_categories: [user.provided.identifiable.contact.email]
fidesops_meta:
identity: email
data_type: string
* name: last_visit
data_categories: [system.operations]
Loading

0 comments on commit 889a950

Please sign in to comment.