diff --git a/README.md b/README.md index 4366464..c425d65 100644 --- a/README.md +++ b/README.md @@ -32,12 +32,16 @@ Required Parameters: Timestamp of requesting API. Optional Parameters: - -u PWC_USER, --pwc_user PWC_USER + -pu PWC_USER, --pwc_user PWC_USER Paper with code repository user name. - -p PWC_PASSWORD, --pwc_password PWC_PASSWORD + -pp PWC_PASSWORD, --pwc_password PWC_PASSWORD Paper with code repository password. - -t GITHUB_ACC_TOKEN, --github_acc_token GITHUB_ACC_TOKEN + -gt GITHUB_ACC_TOKEN, --github_acc_token GITHUB_ACC_TOKEN Github access token. + -yk YOUTUBE_DEV_KEY, --youtube_dev_key YOUTUBE_DEV_KEY + Youtube developer key. + -ynpt NEXT_PAGE_TOKEN, --y_next_page_token NEXT_PAGE_TOKEN + Next page token for Youtube API. ```
@@ -51,4 +55,5 @@ api_request = APIRequest(source, query, init_idx, count) api_request.pwc_auth_info = ('user_name', 'password') api_request.github_acc_token = 'token' -``` \ No newline at end of file +api_request.youtube_developer_key = 'your_key' +``` diff --git a/build.py b/build.py index cb91233..dd34c76 100644 --- a/build.py +++ b/build.py @@ -18,3 +18,4 @@ def set_properties(project): project.build_depends_on("mock") project.build_depends_on("requests") project.build_depends_on("pygithub") + project.build_depends_on("google-api-python-client") diff --git a/requirements.txt b/requirements.txt index 656236f..c118007 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ PyGithub==1.43.8 -pybuilder \ No newline at end of file +pybuilder +requests +google-api-python-client \ No newline at end of file diff --git a/src/main/python/mlsearch/api_requester.py b/src/main/python/mlsearch/api_requester.py index 33076c2..3077ba5 100644 --- a/src/main/python/mlsearch/api_requester.py +++ b/src/main/python/mlsearch/api_requester.py @@ -2,6 +2,7 @@ from mlsearch.protocol import Protocol from github import Github from requests.auth import HTTPBasicAuth +import googleapiclient.discovery import json import requests # import scholarly @@ -10,7 +11,7 @@ class APIRequest(): """For handling the different Valid API requests.""" - def __init__(self, source, query, init_idx, count): + def __init__(self, source, query, init_idx, count, y_next_page_token=None): """ Initialization for the class. @@ -18,10 +19,12 @@ def __init__(self, source, query, init_idx, count): :param query: The query for searching. :param init_idx: The initial pagination index. :param count: The number of records to be fetched. + :param y_next_page_token: The current page token for youtube API. """ self.params = {'query':query, 'init_idx':init_idx, - 'count':count, 'source': source} + 'count':count, 'source': source, + 'y_next_page_token': y_next_page_token} self.params_model = {'query':str, 'init_idx':int, 'count':int} # Load the configuration file @@ -32,7 +35,8 @@ def __init__(self, source, query, init_idx, count): self.data = { 'response_code': 201, 'content': None, - 'has_next_page': False} + 'has_next_page': False, + 'y_next_page_token': None} @property def github_acc_token(self): @@ -43,6 +47,14 @@ def github_acc_token(self, access_token): if access_token: self._config.GITHUB_ACC_TOKEN = access_token + @property + def youtube_developer_key(self): + return self._config.YOUTUBE_DEVELOPER_KEY + + @youtube_developer_key.setter + def youtube_developer_key(self, developer_key): + if developer_key: + self._config.YOUTUBE_DEVELOPER_KEY = developer_key @property def pwc_auth_info(self): @@ -70,7 +82,6 @@ def _validate_params(self): raise TypeError( f'Invalid type for {item}. {typ} is expected but ' f'{type(self.params[item])} is given.') - if self.params['source'] not in self._config.VALID_API_SOURCE: raise ValueError( f"Invalid value for {self.params['source']}. " @@ -118,8 +129,8 @@ def _fetch_github(self) -> [Protocol]: } results.append(Protocol(data)) - self.data['response_code'] = 200 - self.data['content'] = [proto.to_JSON() for proto in results] + self.data['response_code'] = 200 + self.data['content'] = [proto.to_JSON() for proto in results] def _fetch_paperwithcode(self) -> [Protocol]: """Fetch Paper with Code Repository""" @@ -161,7 +172,59 @@ def _fetch_paperwithcode(self) -> [Protocol]: self.data['content'] = [proto.to_JSON() for proto in results] self.data['response_code'] = query_result.status_code - + + def _fetch_youtube(self, y_next_page_token=None) -> [Protocol]: + """Fetch the Youtube Repository""" + results = [] + youtube = googleapiclient.discovery.build( + self._config.YOUTUBE_SERVICE_NAME, + self._config.YOUTUBE_API_VERSION, + developerKey = self._config.YOUTUBE_DEVELOPER_KEY) + request = youtube.search().list( + part=self._config.YOUTUBE_PART, + maxResults=self.params['count'], + order=self._config.YOUTUBE_ORDER, + q=self.params['query'], + safeSearch=self._config.YOUTUBE_SAFESEARCH, + pageToken=y_next_page_token + ) + response = request.execute() + + if 'items' in response and len(response['items']) > 0: + for item in response['items']: + data = { + 'video_id': item.get( + 'id', dict({'videoId': None}) + ).get('videoId', None), + 'title': item.get( + 'snippet', dict({'title': None}) + ).get('title', None), + 'description': item.get( + 'snippet',dict({'description': None}) + ).get('description', None), + 'channel_id': item.get( + 'snippet',dict({'channelId': None}) + ).get('channelId', None), + 'channel_title': item.get( + 'snippet',dict({'channelTitle': None}) + ).get('channelTitle', None), + 'live_broadcast_content': item.get( + 'snippet',dict({'liveBroadcastContent': None}) + ).get('liveBroadcastContent', None), + 'published_datetime': item.get( + 'snippet',dict({'publishedAt': None}) + ).get('publishedAt', None), + 'thumbnails': item.get( + 'snippet',dict({'thumbnails': None}) + ).get('thumbnails', None), + 'source': self.params.get('source', ''), + } + results.append(Protocol(data)) + self.data['y_next_page_token'] = response.get('nextPageToken', None) + self.data['content'] = [proto.to_JSON() for proto in results] + self.data['has_next_page'] = response.get('pageInfo', dict({'totalResults':0})).get('totalResults', 0) > 0 + self.data['response_code'] = 200 + def fetch_data(self) -> json: """Fetch the data from designated API source.""" @@ -170,7 +233,10 @@ def fetch_data(self) -> json: self._fetch_paperwithcode() if self.params.get('source', '') == 'github': - responses = self._fetch_github() + self._fetch_github() + + if self.params.get('source', '') == 'youtube': + self._fetch_youtube(self.params.get('y_next_page_token', None)) # TODO: Implement the function for Coursera. However, this function # may be handled by the backend server. @@ -181,4 +247,4 @@ def fetch_data(self) -> json: self.data['response_code'] = 500 self.data['content'] = str(ex) - return self.data \ No newline at end of file + return self.data diff --git a/src/main/python/mlsearch/config.py b/src/main/python/mlsearch/config.py index fdbc95a..c53574f 100644 --- a/src/main/python/mlsearch/config.py +++ b/src/main/python/mlsearch/config.py @@ -13,4 +13,12 @@ class Config(object): GITHUB_URL = os.environ.get('GITHUB_URL') or "in:readme+in:description" # AIP Source - VALID_API_SOURCE = ['paperwithcode', 'github', 'coursera'] \ No newline at end of file + VALID_API_SOURCE = ['paperwithcode', 'github', 'coursera', 'youtube'] + + # Youtube configuration + YOUTUBE_SERVICE_NAME = os.environ.get('YOUTUBE_SERVICE_NAME') or "youtube" + YOUTUBE_API_VERSION = os.environ.get('YOUTUBE_API_VERSION') or "v3" + YOUTUBE_DEVELOPER_KEY = os.environ.get('YOUTUBE_DEVELOPER_KEY') or None + YOUTUBE_ORDER = os.environ.get('YOUTUBE_ORDER') or "relevance" + YOUTUBE_SAFESEARCH = os.environ.get('YOUTUBE_SAFESEARCH') or "strict" + YOUTUBE_PART = os.environ.get('YOUTUBE_PART') or "snippet" \ No newline at end of file diff --git a/src/main/python/mlsearch/helper.py b/src/main/python/mlsearch/helper.py index e53bef8..111cf5f 100644 --- a/src/main/python/mlsearch/helper.py +++ b/src/main/python/mlsearch/helper.py @@ -40,7 +40,8 @@ def parse_parameters(event): :return: dict( 'query', 'init_idx', 'count', 'source', - 'cookies', 'timestamp') + 'cookies', 'timestamp', + 'y_next_page_token') """ try: param = dict() @@ -50,6 +51,7 @@ def parse_parameters(event): param['source'] = event['source'] param['cookies'] = event['cookies'] param['timestamp'] = event['timestamp'] + param['y_next_page_token'] = event['y_next_page_token'] if param['init_idx'] >= 0 and param['count'] > 0: return param @@ -57,4 +59,4 @@ def parse_parameters(event): return dict() except: - return dict() \ No newline at end of file + return dict() diff --git a/src/main/python/mlsearch/protocol.py b/src/main/python/mlsearch/protocol.py index 05c67aa..826cc46 100644 --- a/src/main/python/mlsearch/protocol.py +++ b/src/main/python/mlsearch/protocol.py @@ -25,7 +25,13 @@ def __init__(self, kwargs): 'partners_v1', 'instructors_v1', # Source Flag - 'source' + 'source', + + # Youtube + 'video_id', + 'channel_id', 'channel_title', + 'live_broadcast_content', 'published_datetime', + 'thumbnails', ] for param in kwargs: @@ -54,6 +60,12 @@ def __init__(self, kwargs): self.instructors_v1 = kwargs.get('instructors_v1', None) self.source = kwargs.get('source', None) self.pwc_url = kwargs.get('pwc_url', None) + self.video_id = kwargs.get('video_id', None) + self.channel_id = kwargs.get('channel_id', None) + self.channel_title = kwargs.get('channel_title', None) + self.live_broadcast_content = kwargs.get('live_broadcast_content', None) + self.published_datetime = kwargs.get('published_datetime', None) + self.thumbnails = kwargs.get('thumbnails', dict()) def to_JSON(self): """Transform the Protocol object to JSON object.""" diff --git a/src/main/scripts/mlsearch b/src/main/scripts/mlsearch index 085c385..0d1b98e 100644 --- a/src/main/scripts/mlsearch +++ b/src/main/scripts/mlsearch @@ -19,9 +19,11 @@ ap.add_argument('-c', '--count', required=True, help="Total number of results to ap.add_argument('-s', '--source', required=True, help="Source API to be looking for.") ap.add_argument('-ck', '--cookies', required=True, help="Cookies of current user.") ap.add_argument('-tm', '--timestamp', required=True, help="Timestamp of requesting API.") -ap.add_argument('-u', '--pwc_user', required=False, help="Paper with code repository user name.") -ap.add_argument('-p', '--pwc_password', required=False, help="Paper with code repository password.") -ap.add_argument('-t', '--github_acc_token', required=False, help="Github access token.") +ap.add_argument('-pu', '--pwc_user', required=False, help="Paper with code repository user name.") +ap.add_argument('-pp', '--pwc_password', required=False, help="Paper with code repository password.") +ap.add_argument('-gt', '--github_acc_token', required=False, help="Github access token.") +ap.add_argument('-yk', '--youtube_dev_key', required=False, help="Youtube developer key.") +ap.add_argument('-yntp', '--y_next_page_token', required=False, help="Next page token for Youtube API.") args = vars(ap.parse_args()) def main(event): @@ -35,30 +37,41 @@ def main(event): param_names = [ 'query', 'init_idx', 'count', 'source', - 'cookies', 'timestamp'] + 'cookies', 'timestamp', + 'y_next_page_token'] response_msg = hp.response('success', 200) - if hp.is_valid_parameters(event, param_names): params = hp.parse_parameters(event) if params.values(): - api_request = APIRequest(params['source'], params['query'], params['init_idx'], params['count']) + api_request = APIRequest( + params['source'], + params['query'], + params['init_idx'], + params['count'], + params['y_next_page_token']) if 'pwc_user'in event and 'pwc_password' in event: api_request.pwc_auth_info = (event['pwc_user'], event['pwc_password']) if 'github_acc_token' in event: api_request.github_acc_token = event['github_acc_token'] + if 'youtube_developer_key' in event: + api_request.youtube_developer_key = event['youtube_developer_key'] data = api_request.fetch_data() response_msg = hp.response( message=data.get('content',''), status_code=data.get('response_code'), headers=headers, - optional_attributes={'has_next_page': data.get('has_next_page', False)}) + optional_attributes={ + 'has_next_page': data.get('has_next_page', False), + 'y_next_page_token': data.get('y_next_page_token', None)}) + return response_msg response_msg = hp.response('Invalid parameters.', 400) return response_msg - except (ValueError, TypeError): - response_msg = hp.response('Invalid parameters.', 400) + except (ValueError, TypeError) as ex: + response_msg = hp.response(str(ex), 400) + return response_msg except Exception as ex: response_msg = hp.response(str(ex), 500) @@ -71,7 +84,8 @@ if __name__ == "__main__": 'count': args['count'], 'source': args['source'], 'cookies': args['cookies'], - 'timestamp': args['timestamp'] + 'timestamp': args['timestamp'], + 'y_next_page_token': args['y_next_page_token'] } if args['pwc_user']: @@ -80,7 +94,9 @@ if __name__ == "__main__": event['pwc_password'] = args['pwc_password'] if args['github_acc_token']: event['github_acc_token'] = args['github_acc_token'] + if args['youtube_dev_key']: + event['youtube_developer_key'] = args['youtube_dev_key'] result = main(event) pp = pprint.PrettyPrinter(indent=2) - pp.pprint(result) \ No newline at end of file + pp.pprint(result)