Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: FE ML Feature get_feature_metadata #1147

Merged
merged 14 commits into from
May 25, 2021
79 changes: 77 additions & 2 deletions frontend/amundsen_application/api/metadata/v0.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
from amundsen_application.models.user import load_user, dump_user

from amundsen_application.api.utils.metadata_utils import is_table_editable, marshall_table_partial, \
marshall_table_full, marshall_dashboard_partial, marshall_dashboard_full, marshall_lineage_table, TableUri
marshall_table_full, marshall_dashboard_partial, marshall_dashboard_full, marshall_feature_full, \
marshall_lineage_table, TableUri
from amundsen_application.api.utils.request_utils import get_query_param, request_metadata, request_search


Expand All @@ -31,6 +32,7 @@
TAGS_ENDPOINT = '/tags/'
USER_ENDPOINT = '/user'
DASHBOARD_ENDPOINT = '/dashboard'
FEATURE_ENDPOINT = '/feature'


def _get_table_endpoint() -> str:
Expand All @@ -47,6 +49,13 @@ def _get_dashboard_endpoint() -> str:
return dashboard_endpoint


def _get_feature_endpoint() -> str:
feature_endpoint = app.config['METADATASERVICE_BASE'] + FEATURE_ENDPOINT
if feature_endpoint is None:
raise Exception('An request endpoint for feature resources must be configured')
return feature_endpoint


@metadata_blueprint.route('/popular_tables', methods=['GET'])
def popular_tables() -> Response:
"""
Expand Down Expand Up @@ -91,7 +100,7 @@ def popular_tables() -> Response:
def get_table_metadata() -> Response:
"""
call the metadata service endpoint and return matching results
:return: a json output containing a table metdata object as 'tableData'
:return: a json output containing a table metadata object as 'tableData'

Schema Defined Here: https://github.com/lyft/amundsenmetadatalibrary/blob/master/metadata_service/api/table.py
TODO: Define type for this
Expand Down Expand Up @@ -842,3 +851,69 @@ def get_column_lineage() -> Response:
except Exception as e:
payload = jsonify({'msg': 'Encountered exception: ' + str(e)})
return make_response(payload, HTTPStatus.INTERNAL_SERVER_ERROR)


@metadata_blueprint.route('/feature', methods=['GET'])
def get_feature_metadata() -> Response:
"""
call the metadata service endpoint and return matching results
:return: a json output containing a feature metadata object as 'featureData'

"""
try:
feature_key = get_query_param(request.args, 'key')
list_item_index = request.args.get('index', None)
list_item_source = request.args.get('source', None)

results_dict = _get_feature_metadata(feature_key=feature_key, index=list_item_index, source=list_item_source)
return make_response(jsonify(results_dict), results_dict.get('status_code', HTTPStatus.INTERNAL_SERVER_ERROR))
except Exception as e:
message = 'Encountered exception: ' + str(e)
logging.exception(message)
return make_response(jsonify({'featureData': {}, 'msg': message}), HTTPStatus.INTERNAL_SERVER_ERROR)


@action_logging
def _get_feature_metadata(*, feature_key: str, index: int, source: str) -> Dict[str, Any]:

results_dict = {
'featureData': {},
'msg': '',
}

try:
feature_endpoint = _get_feature_endpoint()
url = '{0}/{1}'.format(feature_endpoint, feature_key)
allisonsuarez marked this conversation as resolved.
Show resolved Hide resolved
response = request_metadata(url=url)
except ValueError as e:
# envoy client BadResponse is a subclass of ValueError
message = 'Encountered exception: ' + str(e)
results_dict['msg'] = message
results_dict['status_code'] = getattr(e, 'code', HTTPStatus.INTERNAL_SERVER_ERROR)
logging.exception(message)
return results_dict

status_code = response.status_code
results_dict['status_code'] = status_code

if status_code != HTTPStatus.OK:
message = 'Encountered error: Metadata request failed'
results_dict['msg'] = message
logging.error(message)
return results_dict

try:
feature_data_raw: dict = response.json()

feature_data_raw['key'] = feature_key
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding the 'key' and the 'marshall' functions in the frontend were added because FE needed some things that the metadata or search service didn't include. I talked about this with @feng-tao a while ago. What do you think about adding these types of fields to the upstream responses?

(Not necessarily asking to change these here, just starting a discussion)
(Also wondering if FE should just call search/metadata services directly instead of having a FE Flask layer)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly I have been thinking the same thing, its all basically just duplicated work which seems unnecessary.


results_dict['featureData'] = marshall_feature_full(feature_data_raw)
results_dict['msg'] = 'Success'
return results_dict
except Exception as e:
message = 'Encountered exception: ' + str(e)
results_dict['msg'] = message
logging.exception(message)
# explicitly raise the exception which will trigger 500 api response
results_dict['status_code'] = getattr(e, 'code', HTTPStatus.INTERNAL_SERVER_ERROR)
return results_dict
24 changes: 24 additions & 0 deletions frontend/amundsen_application/api/utils/metadata_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import Any, Dict, List

from amundsen_common.models.dashboard import DashboardSummary, DashboardSummarySchema
from amundsen_common.models.feature import Feature, FeatureSchema
from amundsen_common.models.popular_table import PopularTable, PopularTableSchema
from amundsen_common.models.table import Table, TableSchema
from amundsen_application.models.user import load_user, dump_user
Expand Down Expand Up @@ -253,3 +254,26 @@ def _get_partition_data(watermarks: Dict) -> Dict:
return {
'is_partitioned': False
}


def marshall_feature_full(feature_dict: Dict) -> Dict:
"""
Forms the full version of a table Dict, with additional and sanitized fields
:param table_dict: Table Dict from metadata service
:return: Table Dict with sanitized fields
"""

schema = FeatureSchema()
feature: Feature = schema.load(feature_dict)
results: Dict[str, Any] = schema.dump(feature)

# TODO do we need this for Features?
# is_editable = is_table_editable(results['schema'], results['name'])
# results['is_editable'] = is_editable

results['owners'] = [_map_user_object_to_schema(owner) for owner in results['owners']]

prog_descriptions = results['programmatic_descriptions']
results['programmatic_descriptions'] = _convert_prog_descriptions(prog_descriptions)

return results
116 changes: 115 additions & 1 deletion frontend/tests/unit/api/metadata/test_v0.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from amundsen_application import create_app
from amundsen_application.api.metadata.v0 import TABLE_ENDPOINT, LAST_INDEXED_ENDPOINT,\
POPULAR_TABLES_ENDPOINT, TAGS_ENDPOINT, USER_ENDPOINT, DASHBOARD_ENDPOINT
POPULAR_TABLES_ENDPOINT, TAGS_ENDPOINT, USER_ENDPOINT, DASHBOARD_ENDPOINT, FEATURE_ENDPOINT
from amundsen_application.config import MatchRuleObject

from amundsen_application.tests.test_utils import TEST_USER_ID
Expand Down Expand Up @@ -326,6 +326,75 @@ def setUp(self) -> None:
],
'dashboard': [],
}
self.mock_feature_metadata = {
'partition_column': {
'name': 'ds',
'description': 'This is a test',
'col_type': 'bigint',
'sort_order': 0,
'badges': [
{'category': 'column', 'badge_name': 'partition_column'}
]
},
'entity': 'rider',
'key': 'test_feature_group/test_feature_name/1.4',
'availability': ['hive'],
'last_updated_timestamp': 1563872712,
'owners': [],
'name': 'test_feature_name',
'description': 'This is a test',
'status': None,
'programmatic_descriptions': [
{'source': 'c_1', 'text': 'description c'},
{'source': 'a_1', 'text': 'description a'},
{'source': 'b_1', 'text': 'description b'}
],
'owner_tags': [],
'data_type': 'bigint',
'feature_group': 'test_feature_group',
'version': '1.4',
'tags': [],
'watermarks': [
{'watermark_type': 'low_watermark', 'partition_key': 'ds', 'partition_value': '', 'create_time': ''},
{'watermark_type': 'high_watermark', 'partition_key': 'ds', 'partition_value': '', 'create_time': ''}
],
'badges': [{'category': 'data', 'badge_name': 'pii'}],
}
self.expected_feature_metadata = {
'partition_column': {
'name': 'ds',
'description': 'This is a test',
'col_type': 'bigint',
'sort_order': 0,
'badges': [
{'category': 'column', 'badge_name': 'partition_column'}
]
},
'entity': 'rider',
'key': 'test_feature_group/test_feature_name/1.4',
'created_timestamp': None,
'availability': ['hive'],
'last_updated_timestamp': 1563872712,
'owners': [],
'name': 'test_feature_name',
'description': 'This is a test',
'status': None,
'programmatic_descriptions': [
{'source': 'c_1', 'text': 'description c'},
{'source': 'a_1', 'text': 'description a'},
{'source': 'b_1', 'text': 'description b'}
],
'owner_tags': [],
'data_type': 'bigint',
'feature_group': 'test_feature_group',
'version': '1.4',
'tags': [],
'watermarks': [
{'watermark_type': 'low_watermark', 'partition_key': 'ds', 'partition_value': '', 'create_time': ''},
{'watermark_type': 'high_watermark', 'partition_key': 'ds', 'partition_value': '', 'create_time': ''}
],
'badges': [{'category': 'data', 'badge_name': 'pii'}],
}
self.mock_dashboard_metadata = {
"badges": [],
"chart_names": [],
Expand Down Expand Up @@ -1120,3 +1189,48 @@ def test_get_related_dashboards_failure(self) -> None:
'status_code': 400
}
self.assertEqual(response.json, expected)

@responses.activate
def test_get_feature_metadata_failure(self) -> None:
"""
Test get_feature_metadata API failure
:return:
"""
url = local_app.config['METADATASERVICE_BASE'] + FEATURE_ENDPOINT + '/test_feature_group/test_feature_name/1.4'
responses.add(responses.GET, url, json=self.mock_feature_metadata, status=HTTPStatus.BAD_REQUEST)

with local_app.test_client() as test:
response = test.get(
'/api/metadata/v0/feature',
query_string=dict(
key='test_feature_group/test_feature_name/1.4'
)
)
data = json.loads(response.data)
expected = {
'featureData': {},
'msg': 'Encountered error: Metadata request failed',
'status_code': 400
}

self.assertEqual(data, expected)

@responses.activate
def test_get_feature_metadata_success(self) -> None:
"""
Test successful get_feature_metadata request
:return:
"""
url = local_app.config['METADATASERVICE_BASE'] + FEATURE_ENDPOINT + '/test_feature_group/test_feature_name/1.4'
responses.add(responses.GET, url, json=self.mock_feature_metadata, status=HTTPStatus.OK)

with local_app.test_client() as test:
response = test.get(
'/api/metadata/v0/feature',
query_string=dict(
key='test_feature_group/test_feature_name/1.4'
)
)
data = json.loads(response.data)
self.assertEqual(response.status_code, HTTPStatus.OK)
self.assertCountEqual(data.get('featureData'), self.expected_feature_metadata)