Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data Category Visualization #136

Merged
merged 25 commits into from
Oct 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
a02e61e
Initial Data Category Visualization
brentonmallen1 Oct 4, 2021
26fe41c
Nested Dictionary Functionality
brentonmallen1 Oct 4, 2021
1a14ca3
Nested Dictionary HTML
brentonmallen1 Oct 4, 2021
9da5bdd
HTML Templating and Serving Dependencies
brentonmallen1 Oct 5, 2021
b141666
Revert "Nested Dictionary HTML"
brentonmallen1 Oct 5, 2021
c1e2b22
Revert "Revert "Nested Dictionary HTML""
brentonmallen1 Oct 5, 2021
d9b44f5
Temp side fastapi server
brentonmallen1 Oct 7, 2021
7d30383
Add Default resources to Fideslang (#137)
ThomasLaPiana Oct 7, 2021
ea192e8
Initial Data Category Visualization
brentonmallen1 Oct 4, 2021
5a5fd5d
Check in to rebase in master changes
brentonmallen1 Oct 8, 2021
b3996cd
Move Category Viz to API
brentonmallen1 Oct 8, 2021
9f3e611
Some Cleanup
brentonmallen1 Oct 8, 2021
4366215
Add Unit Tests
brentonmallen1 Oct 8, 2021
77b7a89
Formatting, Linting, Typing
brentonmallen1 Oct 11, 2021
322c4a4
Appease the benevolent pylint gods
brentonmallen1 Oct 11, 2021
72c9a73
Merge branch 'main' into dataset_ui
Oct 11, 2021
8045cdb
fix the docker docs page
Oct 11, 2021
ceab8fa
redo the visualization endpoint to make it generalizable to any resou…
Oct 11, 2021
9dc1e1a
rename some stuff, fix the default taxonomy
Oct 11, 2021
39eb395
pylint fix
Oct 11, 2021
9c9ac85
Expanded doc strings
brentonmallen1 Oct 11, 2021
c511a5a
Programmatic Figure Titles
brentonmallen1 Oct 11, 2021
d97b83d
fix the tests
Oct 11, 2021
f8ea4fc
fix the tests again
Oct 11, 2021
3a54690
update module-level docstrings
Oct 11, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions docs/fides/docs/getting_started/docker.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,18 @@ The following commands should all be run from the top-level `fides` directory (w
...
Building docs
...
Check for new migrations to run...
...
root@1a742083cedf:/fides/fidesctl#
```

2. `fidesctl init-db` -> Builds the required images, spins up the database, and runs the initialization scripts:
1. `fidesctl init-db` -> Builds the required images, spins up the database, and runs the initialization scripts:

```bash
~/git/fides% fidesctl init-db
INFO [alembic.runtime.migration] Context impl PostgresqlImpl.
INFO [alembic.runtime.migration] Will assume transactional DDL.
```


3. `fidesctl ping` -> This confirms that your `fidesctl` CLI can reach the server and everything is ready to go!
1. `fidesctl ping` -> This confirms that your `fidesctl` CLI can reach the server and everything is ready to go!

```bash
root@796cfde906f1:/fides/fidesctl# fidesctl ping
Expand Down
2 changes: 2 additions & 0 deletions fidesctl/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ click==7.1.2
colorama==0.4.4
deepdiff==5.5.0
fastapi[all]==0.68.1
pandas==1.3.3
plotly==5.3.1
PyJWT==2.1.0
psycopg2-binary==2.9.1
pydantic==1.8.2
Expand Down
5 changes: 4 additions & 1 deletion fidesctl/src/fidesapi/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import uvicorn
from fastapi import FastAPI

from fidesapi import crud, db_session
from fidesapi import crud, db_session, visualize
from fidesctl.core.config import get_config

app = FastAPI()
Expand All @@ -17,6 +17,9 @@ def configure_routes() -> None:
"Include all of the routers not defined here."
for router in crud.routers:
app.include_router(router)
# add router for the category viz endpoints
for router in visualize.routers:
app.include_router(router)


def configure_db(database_url: str) -> None:
Expand Down
77 changes: 77 additions & 0 deletions fidesctl/src/fidesapi/visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
API endpoints for displaying hierarchical data representations.
"""
from enum import Enum
from typing import Union

from fastapi import APIRouter, HTTPException
from fastapi.responses import HTMLResponse

from fidesctl.core import visualize
from fideslang import DEFAULT_TAXONOMY, model_map

# pylint: disable=redefined-outer-name,cell-var-from-loop

VISUALIZABLE_RESOURCE_TYPES = ["data_category", "data_qualifier", "data_use"]


class FigureTypeEnum(str, Enum):
"""
Figure Type Enum to capture the discrete possible values
for a valid figure type to be visualized
"""

SANKEY = "sankey"
SUNBURST = "sunburst"
TEXT = "text"


def get_resource_type(router: APIRouter) -> str:
"""
Get the resource type from the prefix of an API router
Args:
router: Api router from which to extract the resource type

Returns:
The router's resource type
"""
return router.prefix[1:]


routers = []
for resource_type in VISUALIZABLE_RESOURCE_TYPES:
# Programmatically define routers for each resource type
RESOURCE_MODEL_NAME = model_map[resource_type].__name__
router = APIRouter(
tags=["Visualize", RESOURCE_MODEL_NAME],
prefix=f"/{resource_type}",
)

@router.get("/visualize/{figure_type}")
async def get_visualization(
figure_type: FigureTypeEnum, resource_type: str = get_resource_type(router)
) -> Union[HTMLResponse, HTTPException]:
"""
Visualize the hierarchy of a supported resource type.
Args:
figure_type: type of figure, by name, to generate
resource_type: hierarchy source. one of ["data_category", "data_qualifier", "data_use"]

Returns:
Html for the requested figure. Response with status code 400 when invalid figure type is provided
"""
if figure_type not in ["sankey", "sunburst", "text"]:
return HTTPException(
status_code=400,
detail=f"{figure_type} is not a valid figure type. Valid options: [sankey, sunburst, text]",
)
taxonomy = DEFAULT_TAXONOMY.dict()[resource_type]
if figure_type == "sunburst":
figure = visualize.sunburst_plot(taxonomy, resource_type)
elif figure_type == "sankey":
figure = visualize.sankey_plot(taxonomy, resource_type)
else:
figure = visualize.nested_categories_to_html_list(taxonomy, resource_type)
return HTMLResponse(figure)

routers += [router]
188 changes: 188 additions & 0 deletions fidesctl/src/fidesctl/core/visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
"""
Creates data visualizations for hierarchical Fides resource types.
"""

from typing import Generator, List, Dict

import plotly.express as px
import plotly.graph_objects as go

FIDES_KEY_NAME = "fides_key"
FIDES_PARENT_NAME = "parent_key"


def sunburst_plot(
categories: List[dict], resource_type: str, json_out: bool = False
) -> str:
"""
Create a sunburst plot from data categories yaml file
Reference: https://plotly.com/python/sunburst-charts/
Args:
categories: list of the dictionaries for each taxonomy member
resource: the name of the resource type
json_out: Flag to return a json representation of the visualization

Returns:
Json representation of the figure if `json_out` is True, html otherwise
"""

# add color map
for category in categories:
category["color"] = category[FIDES_KEY_NAME].split(".")[0]

fig = px.sunburst(
categories, names=FIDES_KEY_NAME, parents=FIDES_PARENT_NAME, color="color"
)
fig.update_layout(
title_text=f'Fides {resource_type.replace("_", " ").title()} Hierarchy',
font_size=10,
)

if json_out:
return fig.to_json()
return fig.to_html()


def sankey_plot(
categories: List[dict], resource_type: str, json_out: bool = False
) -> str:
"""
Create a sankey plot from data categories yaml file
Reference: https://plotly.com/python/sankey-diagram/
Args:
categories: list of the dictionaries for each taxonomy member
resource_type: the name of the resource type
json_out: Flag to return a json representation of the visualization

Returns:
Json representation of the figure if `json_out` is True, html otherwise
"""

fides_key_dict = {v[FIDES_KEY_NAME]: i for i, v in enumerate(categories)}
source = []
target = []

for category in categories:
if FIDES_PARENT_NAME in category.keys():
if category[FIDES_PARENT_NAME]:
source.append(fides_key_dict[category[FIDES_PARENT_NAME]])
target.append(fides_key_dict[category[FIDES_KEY_NAME]])

fig = go.Figure(
data=[
go.Sankey(
valueformat=".1f",
valuesuffix="%",
node=dict(
pad=15,
thickness=20,
line=dict(color="black", width=0.5),
label=list(fides_key_dict.keys()),
color="blue", # Maybe make this 'ethyca blue'?
hovertemplate="%{label}",
),
link=dict(source=source, target=target, value=target),
)
]
)

fig.update_layout(
title_text=f'Fides {resource_type.replace("_", " ").title()} Hierarchy',
font_size=10,
)

if json_out:
return fig.to_json()
return fig.to_html()


def convert_categories_to_nested_dict(categories: List[dict]) -> dict:
"""
Convert a catalog yaml file into a hierarchical nested dictionary.
Leaf nodes will have an empty dictionary as the value.

e.g.:

{Parent1:
{
Child1: {},
Child2: {},
Parent2: {
Child3: {}
}
}
}

Args:
categories : list of dictionaries containing each entry from a catalog yaml file

Returns:

"""

def create_hierarchical_dict(data: dict, keys: List) -> None:
"""
Create a nested dictionary given a list of strings as a key path
Args:
data: Dictionary to contain the nested dictionary as it's built
keys: List of keys that equates to the 'path' down the nested dictionary

Returns:
None
"""
for key in keys:
if key in data:
if key == keys[-1]:
# we've reached the end of the path (no more children)
data[key] = {}
data = data[key]
else:
data[key] = {}

nested_output: Dict[Dict, Dict] = {}
for category in categories:
if FIDES_PARENT_NAME not in category:
nested_output[category[FIDES_KEY_NAME]] = {}
else:
node_path = category[FIDES_KEY_NAME].split(".")
create_hierarchical_dict(nested_output, node_path)
return nested_output


def nested_categories_to_html_list(
categories: List[dict], resource_type: str, indent: int = 1
) -> str:
"""
Create an HTML string unordered list from the keys of a nested dictionary
Args:
categories: list of the dictionaries for each taxonomy member
resource_type: the name of the resource type
indent: spacing multiplier

Returns:

"""
nested_categories = convert_categories_to_nested_dict(categories)

def nest_to_html(nested_dict: dict, indent_factor: int) -> Generator:
"""
Create the html
Args:
nested_dict: nested dictionary for keys to convert to html list object
indent_factor: spacing multiplier

Returns:
HTML string containing a nested, unordered list of the nested dictionary keys
"""
spacing = " " * indent_factor
for key, value in nested_dict.items():
yield "{}<li>{}</li>".format(spacing, key)
if isinstance(value, dict):
yield "{spacing}<ul>\n{member}\n{spacing}</ul>".format(
spacing=spacing,
member="\n".join(nest_to_html(value, indent_factor + 1)),
)

header = f'<h2>Fides {resource_type.replace("_", " ").title()} Hierarchy</h2>'
categories_tree = "\n".join(nest_to_html(nested_categories, indent))
return f"{header}\n{categories_tree}"
2 changes: 1 addition & 1 deletion fidesctl/src/fideslang/default_taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,7 @@
],
data_qualifier=[
DataQualifier(
fides_key="Aggregated",
fides_key="aggregated",
organization_fides_key="default_organization",
name="Aggregated Data",
description="Statistical data that does not contain individually identifying information but includes information about groups of individuals that renders individual identification impossible.",
Expand Down
4 changes: 1 addition & 3 deletions fidesctl/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,7 @@ def resources_dict():
name="Test Policy",
description="Test Policy",
data_categories=models.PrivacyRule(inclusion="NONE", values=[]),
data_uses=models.PrivacyRule(
inclusion="NONE", values=["provide.system"]
),
data_uses=models.PrivacyRule(inclusion="NONE", values=["provide.system"]),
data_subjects=models.PrivacyRule(inclusion="ANY", values=[]),
data_qualifier="unlinked_pseudonymized_data",
action="REJECT",
Expand Down
Loading