amundsen-io · feng-tao · Jan 27, 2021 · Jan 21, 2021 · Jan 22, 2021 · Jan 27, 2021
@@ -2,10 +2,11 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import logging
+import json
 
 from http import HTTPStatus
 from typing import Tuple, Any
-
+from ast import literal_eval
 from flasgger import swag_from
 from flask_restful import Resource, reqparse
 from search_service.proxy import get_proxy_client
@@ -58,7 +59,7 @@ def post(self) -> Tuple[Any, int]:
          :param data: list of data objects to be indexed in Elasticsearch
          :return: name of new index
          """
-        self.parser.add_argument('data', required=True)
+        self.parser.add_argument('data', required=True, action='append')
         args = self.parser.parse_args()
 
         try:
@@ -78,11 +79,15 @@ def put(self) -> Tuple[Any, int]:
         :param data: list of data objects to be indexed in Elasticsearch
         :return: name of index
         """
-        self.parser.add_argument('data', required=True)
+        self.parser.add_argument('data', required=True, action='append')
         args = self.parser.parse_args()
 
         try:
-            data = self.schema(many=True, strict=False).loads(args.get('data')).data
+            table_dict_list = []
+            for table_str in args.get('data'):
+                table_dict_list.append(literal_eval(table_str))
+            table_list_json = json.dumps(table_dict_list)
+            data = self.schema(many=True, strict=False).loads(table_list_json).data
             results = self.proxy.update_document(data=data, index=args.get('index'))
             return results, HTTPStatus.OK
         except RuntimeError as e:

@@ -11,19 +11,18 @@ parameters:
       type: string
       default: 'table_search_index'
     required: false
-  - name: body
-    in: body
-    schema:
-      type: object
-      name: data
-      properties:
-        data:
-          type: array
-          description: 'List of tables'
-          items:
-            $ref: '#/components/schemas/TableFields'
-    description: 'Tables to create'
-    required: true
+requestBody:
+  content:
+    'application/json':
+      schema:
+        type: object
+        properties:
+          data:
+            type: array
+            items:
+              $ref: '#/components/schemas/TableFields'
+  description: 'Tables to create'
+  required: true
 responses:
   200:
     description: Empty json response

@@ -11,19 +11,18 @@ parameters:
       type: string
       default: 'table_search_index'
     required: false
-  - name: body
-    in: body
-    schema:
-      type: object
-      name: data
-      properties:
-        data:
-          type: array
-          description: 'List of tables'
-          items:
-            $ref: '#/components/schemas/TableFields'
-    description: 'Tables to update'
-    required: true
+requestBody:
+  content:
+    'application/json':
+      schema:
+        type: object
+        properties:
+          data:
+            type: array
+            items:
+              $ref: '#/components/schemas/TableFields'
+  description: 'Tables to update'
+  required: true
 responses:
   200:
     description: Empty json response

@@ -11,19 +11,18 @@ parameters:
       type: string
       default: user_search_index
     required: false
-  - name: body
-    in: body
-    schema:
-      type: object
-      name: data
-      properties:
-        data:
-          type: array
-          description: 'List of users'
-          items:
-            $ref: '#/components/schemas/UserFields'
-    description: 'Users to create'
-    required: true
+requestBody:
+  content:
+    'application/json':
+      schema:
+        type: object
+        properties:
+          data:
+            type: array
+            items:
+              $ref: '#/components/schemas/UserFields'
+  description: 'Users to create'
+  required: true
 responses:
   200:
     description: Empty json response

@@ -11,19 +11,18 @@ parameters:
       type: string
       default: user_search_index
     required: false
-  - name: body
-    in: body
-    schema:
-      type: object
-      name: data
-      properties:
-        data:
-          type: array
-          description: 'List of users'
-          items:
-            $ref: '#/components/schemas/UserFields'
-    description: 'Users to update'
-    required: true
+requestBody:
+  content:
+    'application/json':
+      schema:
+        type: object
+        properties:
+          data:
+            type: array
+            items:
+              $ref: '#/components/schemas/UserFields'
+  description: 'Users to update'
+  required: true
 responses:
   200:
     description: Empty json response

@@ -52,6 +52,10 @@ components:
     TableFields:
       type: object
       properties:
+        id:
+          type: string
+          description: 'elasticsearch doc id'
+          example: 'M81jD3cBdULZTSY96PSh'
         name:
           type: string
           description: 'name of table'
@@ -82,16 +86,54 @@ components:
               type: string
           description: 'list of column names'
           example: ['col1', 'col2']
+        column_descriptions:
+          type: array
+          items:
+            type: string
+          description: 'list of column descriptions'
+          example: ['column description1', 'column description2']
+        programmatic_descriptions:
+          type: array
+          items:
+            type: string
+          description: 'list of programmatic descriptions'
+          example: ['programmatic description1', 'programmatic description2']
         tags:
           type: array
           items:
-              type: string
+            type: object
+            properties:
+              tag_name:
+                type: string
           description: 'list of table tags'
-          example: ['tag2', 'tag1']
+          example: [{'tag_name': 'tag1'}, {'tag_name': 'tag2'}]
+        badges:
+          type: array
+          items:
+            type: object
+            properties:
+              tag_name:
+                type: string
+          description: 'list of table badges'
+          example: [{'tag_name': 'badge1'}, {'tag_name': 'badge2'}]
         last_updated_timestamp:
           type: integer
           description: 'table last updated time'
           example: 1568814420
+        display_name:
+          type: string
+          description: 'table display name'
+          example: 'display_name'
+        total_usage:
+          type: int
+          description: 'total usage'
+          example: 0
+        schema_description:
+          type: array
+          items:
+            type: string
+          description: 'list of schema descriptions'
+          example: ['schema description1', 'schema description2']
     DashboardFields:
       type: object
       properties:
@@ -130,6 +172,10 @@ components:
     UserFields:
       type: object
       properties:
+        id:
+          type: string
+          description: 'elasticsearch doc id'
+          example: 'M81jD3cBdULZTSY96PSh'
         name:
           type: string
           description: 'user name'

@@ -8,38 +8,46 @@
 
 from .base import Base
 from search_service.models.tag import Tag
+import time
 
 
 @attr.s(auto_attribs=True, kw_only=True)
 class Table(Base):
     """
     This represents the part of a table stored in the search proxy
     """
-    database: str
-    cluster: str
-    schema: str
-    name: str
-    key: str
+    id: str
+    database: Optional[str] = None
+    cluster: Optional[str] = None
+    schema: Optional[str] = None
+    name: Optional[str] = None
+    key: Optional[str] = None
     display_name: Optional[str] = None
-    tags: List[Tag]
-    badges: List[Tag]
+    tags: List[Tag] = []
+    badges: List[Tag] = []
     description: Optional[str] = None
-    last_updated_timestamp: int
+    last_updated_timestamp: int = int(time.time())
     # The following properties are lightly-transformed properties from the normal table object:
-    column_names: List[str]
+    column_names: List[str] = []
     column_descriptions: List[str] = []
     programmatic_descriptions: List[str] = []
     # The following are search-only properties:
     total_usage: int = 0
     schema_description: Optional[str] = attr.ib(default=None)
 
     def get_id(self) -> str:
-        # uses the table key as the document id in ES
-        return self.key
+        return self.id
+
+    def get_attrs_dict(self) -> dict:
+        attrs_dict = self.__dict__.copy()
+        attrs_dict['tags'] = [str(tag) for tag in self.tags]
+        attrs_dict['badges'] = [str(tag) for tag in self.badges]
+        return attrs_dict
 
     @classmethod
     def get_attrs(cls) -> Set:
         return {
+            'id',
             'name',
             'key',
             'description',

@@ -13,6 +13,9 @@ class Tag:
     def __init__(self, tag_name: str):
         self.tag_name = tag_name
 
+    def __str__(self) -> str:
+        return self.tag_name
+
 
 class TagSchema(AttrsSchema):
     class Meta:

@@ -17,14 +17,16 @@ class User(Base, CommonUser):
     This represents the part of a user stored in the search proxy
     """
     manager_email: Optional[str] = None
+    id: str
 
     def get_id(self) -> str:
         # uses the user email as the document id in ES
-        return self.email if self.email else ''
+        return self.id
 
     @classmethod
     def get_attrs(cls) -> Set:
         return {
+            'id',
             'full_name',
             'first_name',
             'last_name',

@@ -92,7 +92,8 @@ def _prepare_tables(self, response: EntityCollection, enhance_metadata: bool = F
 
             badges: List[Tag] = tags
 
-            table = Table(name=entity_name,
+            table = Table(id=f"{entity.typeName}://{db_cluster}.{db_name}/{entity_name}",
+                          name=entity_name,
                           key=f"{entity.typeName}://{db_cluster}.{db_name}/{entity_name}",
                           description=entity_attrs.get('description'),
                           cluster=db_cluster,

@@ -116,6 +116,8 @@ def _get_search_result(self, page_index: int,
 
         for hit in response:
             try:
+                es_metadata = hit.__dict__.get('meta', {})
+
                 # ES hit: {'_d_': {'key': xxx...}
                 es_payload = hit.__dict__.get('_d_', {})
                 if not es_payload:
@@ -124,6 +126,7 @@ def _get_search_result(self, page_index: int,
                 for attr, val in es_payload.items():
                     if attr in model.get_attrs():
                         result[attr] = self._get_instance(attr=attr, val=val)
+                result['id'] = self._get_instance(attr='id', val=es_metadata['id'])
 
                 results.append(model(**result))
             except Exception:
@@ -590,7 +593,7 @@ def _build_update_actions(self, data: List[Table], index_key: str) -> List[Dict[
 
         for item in data:
             actions.append({'update': {'_index': index_key, '_type': item.get_type(), '_id': item.get_id()}})
-            actions.append({'doc': item.__dict__})
+            actions.append({'doc': item.get_attrs_dict()})
         return actions
 
     def _build_delete_actions(self, data: List[str], index_key: str, type: str) -> List[Dict[str, Any]]:

@@ -33,7 +33,7 @@ def test_post(self, get_proxy: MagicMock, RequestParser: MagicMock) -> None:
     @patch('search_service.api.document.get_proxy_client')
     def test_put(self, get_proxy: MagicMock, RequestParser: MagicMock) -> None:
         mock_proxy = get_proxy.return_value = Mock()
-        RequestParser().parse_args.return_value = dict(data='{}', index='fake_index')
+        RequestParser().parse_args.return_value = dict(data=[], index='fake_index')
 
         response = DocumentTablesAPI().put()
         self.assertEqual(list(response)[1], HTTPStatus.OK)

@@ -33,7 +33,7 @@ def test_post(self, get_proxy: MagicMock, RequestParser: MagicMock) -> None:
     @patch('search_service.api.document.get_proxy_client')
     def test_put(self, get_proxy: MagicMock, RequestParser: MagicMock) -> None:
         mock_proxy = get_proxy.return_value = Mock()
-        RequestParser().parse_args.return_value = dict(data='{}', index='fake_index')
+        RequestParser().parse_args.return_value = dict(data=[], index='fake_index')
 
         response = DocumentUsersAPI().put()
         self.assertEqual(list(response)[1], HTTPStatus.OK)