diff --git a/README.md b/README.md
index 4366464..c425d65 100644
--- a/README.md
+++ b/README.md
@@ -32,12 +32,16 @@ Required Parameters:
Timestamp of requesting API.
Optional Parameters:
- -u PWC_USER, --pwc_user PWC_USER
+ -pu PWC_USER, --pwc_user PWC_USER
Paper with code repository user name.
- -p PWC_PASSWORD, --pwc_password PWC_PASSWORD
+ -pp PWC_PASSWORD, --pwc_password PWC_PASSWORD
Paper with code repository password.
- -t GITHUB_ACC_TOKEN, --github_acc_token GITHUB_ACC_TOKEN
+ -gt GITHUB_ACC_TOKEN, --github_acc_token GITHUB_ACC_TOKEN
Github access token.
+ -yk YOUTUBE_DEV_KEY, --youtube_dev_key YOUTUBE_DEV_KEY
+ Youtube developer key.
+ -ynpt NEXT_PAGE_TOKEN, --y_next_page_token NEXT_PAGE_TOKEN
+ Next page token for Youtube API.
```
@@ -51,4 +55,5 @@ api_request = APIRequest(source, query,
init_idx, count)
api_request.pwc_auth_info = ('user_name', 'password')
api_request.github_acc_token = 'token'
-```
\ No newline at end of file
+api_request.youtube_developer_key = 'your_key'
+```
diff --git a/build.py b/build.py
index cb91233..dd34c76 100644
--- a/build.py
+++ b/build.py
@@ -18,3 +18,4 @@ def set_properties(project):
project.build_depends_on("mock")
project.build_depends_on("requests")
project.build_depends_on("pygithub")
+ project.build_depends_on("google-api-python-client")
diff --git a/requirements.txt b/requirements.txt
index 656236f..c118007 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,4 @@
PyGithub==1.43.8
-pybuilder
\ No newline at end of file
+pybuilder
+requests
+google-api-python-client
\ No newline at end of file
diff --git a/src/main/python/mlsearch/api_requester.py b/src/main/python/mlsearch/api_requester.py
index 33076c2..3077ba5 100644
--- a/src/main/python/mlsearch/api_requester.py
+++ b/src/main/python/mlsearch/api_requester.py
@@ -2,6 +2,7 @@
from mlsearch.protocol import Protocol
from github import Github
from requests.auth import HTTPBasicAuth
+import googleapiclient.discovery
import json
import requests
# import scholarly
@@ -10,7 +11,7 @@
class APIRequest():
"""For handling the different Valid API requests."""
- def __init__(self, source, query, init_idx, count):
+ def __init__(self, source, query, init_idx, count, y_next_page_token=None):
"""
Initialization for the class.
@@ -18,10 +19,12 @@ def __init__(self, source, query, init_idx, count):
:param query: The query for searching.
:param init_idx: The initial pagination index.
:param count: The number of records to be fetched.
+ :param y_next_page_token: The current page token for youtube API.
"""
self.params = {'query':query, 'init_idx':init_idx,
- 'count':count, 'source': source}
+ 'count':count, 'source': source,
+ 'y_next_page_token': y_next_page_token}
self.params_model = {'query':str, 'init_idx':int,
'count':int}
# Load the configuration file
@@ -32,7 +35,8 @@ def __init__(self, source, query, init_idx, count):
self.data = {
'response_code': 201,
'content': None,
- 'has_next_page': False}
+ 'has_next_page': False,
+ 'y_next_page_token': None}
@property
def github_acc_token(self):
@@ -43,6 +47,14 @@ def github_acc_token(self, access_token):
if access_token:
self._config.GITHUB_ACC_TOKEN = access_token
+ @property
+ def youtube_developer_key(self):
+ return self._config.YOUTUBE_DEVELOPER_KEY
+
+ @youtube_developer_key.setter
+ def youtube_developer_key(self, developer_key):
+ if developer_key:
+ self._config.YOUTUBE_DEVELOPER_KEY = developer_key
@property
def pwc_auth_info(self):
@@ -70,7 +82,6 @@ def _validate_params(self):
raise TypeError(
f'Invalid type for {item}. {typ} is expected but '
f'{type(self.params[item])} is given.')
-
if self.params['source'] not in self._config.VALID_API_SOURCE:
raise ValueError(
f"Invalid value for {self.params['source']}. "
@@ -118,8 +129,8 @@ def _fetch_github(self) -> [Protocol]:
}
results.append(Protocol(data))
- self.data['response_code'] = 200
- self.data['content'] = [proto.to_JSON() for proto in results]
+ self.data['response_code'] = 200
+ self.data['content'] = [proto.to_JSON() for proto in results]
def _fetch_paperwithcode(self) -> [Protocol]:
"""Fetch Paper with Code Repository"""
@@ -161,7 +172,59 @@ def _fetch_paperwithcode(self) -> [Protocol]:
self.data['content'] = [proto.to_JSON() for proto in results]
self.data['response_code'] = query_result.status_code
-
+
+ def _fetch_youtube(self, y_next_page_token=None) -> [Protocol]:
+ """Fetch the Youtube Repository"""
+ results = []
+ youtube = googleapiclient.discovery.build(
+ self._config.YOUTUBE_SERVICE_NAME,
+ self._config.YOUTUBE_API_VERSION,
+ developerKey = self._config.YOUTUBE_DEVELOPER_KEY)
+ request = youtube.search().list(
+ part=self._config.YOUTUBE_PART,
+ maxResults=self.params['count'],
+ order=self._config.YOUTUBE_ORDER,
+ q=self.params['query'],
+ safeSearch=self._config.YOUTUBE_SAFESEARCH,
+ pageToken=y_next_page_token
+ )
+ response = request.execute()
+
+ if 'items' in response and len(response['items']) > 0:
+ for item in response['items']:
+ data = {
+ 'video_id': item.get(
+ 'id', dict({'videoId': None})
+ ).get('videoId', None),
+ 'title': item.get(
+ 'snippet', dict({'title': None})
+ ).get('title', None),
+ 'description': item.get(
+ 'snippet',dict({'description': None})
+ ).get('description', None),
+ 'channel_id': item.get(
+ 'snippet',dict({'channelId': None})
+ ).get('channelId', None),
+ 'channel_title': item.get(
+ 'snippet',dict({'channelTitle': None})
+ ).get('channelTitle', None),
+ 'live_broadcast_content': item.get(
+ 'snippet',dict({'liveBroadcastContent': None})
+ ).get('liveBroadcastContent', None),
+ 'published_datetime': item.get(
+ 'snippet',dict({'publishedAt': None})
+ ).get('publishedAt', None),
+ 'thumbnails': item.get(
+ 'snippet',dict({'thumbnails': None})
+ ).get('thumbnails', None),
+ 'source': self.params.get('source', ''),
+ }
+ results.append(Protocol(data))
+ self.data['y_next_page_token'] = response.get('nextPageToken', None)
+ self.data['content'] = [proto.to_JSON() for proto in results]
+ self.data['has_next_page'] = response.get('pageInfo', dict({'totalResults':0})).get('totalResults', 0) > 0
+ self.data['response_code'] = 200
+
def fetch_data(self) -> json:
"""Fetch the data from designated API source."""
@@ -170,7 +233,10 @@ def fetch_data(self) -> json:
self._fetch_paperwithcode()
if self.params.get('source', '') == 'github':
- responses = self._fetch_github()
+ self._fetch_github()
+
+ if self.params.get('source', '') == 'youtube':
+ self._fetch_youtube(self.params.get('y_next_page_token', None))
# TODO: Implement the function for Coursera. However, this function
# may be handled by the backend server.
@@ -181,4 +247,4 @@ def fetch_data(self) -> json:
self.data['response_code'] = 500
self.data['content'] = str(ex)
- return self.data
\ No newline at end of file
+ return self.data
diff --git a/src/main/python/mlsearch/config.py b/src/main/python/mlsearch/config.py
index fdbc95a..c53574f 100644
--- a/src/main/python/mlsearch/config.py
+++ b/src/main/python/mlsearch/config.py
@@ -13,4 +13,12 @@ class Config(object):
GITHUB_URL = os.environ.get('GITHUB_URL') or "in:readme+in:description"
# AIP Source
- VALID_API_SOURCE = ['paperwithcode', 'github', 'coursera']
\ No newline at end of file
+ VALID_API_SOURCE = ['paperwithcode', 'github', 'coursera', 'youtube']
+
+ # Youtube configuration
+ YOUTUBE_SERVICE_NAME = os.environ.get('YOUTUBE_SERVICE_NAME') or "youtube"
+ YOUTUBE_API_VERSION = os.environ.get('YOUTUBE_API_VERSION') or "v3"
+ YOUTUBE_DEVELOPER_KEY = os.environ.get('YOUTUBE_DEVELOPER_KEY') or None
+ YOUTUBE_ORDER = os.environ.get('YOUTUBE_ORDER') or "relevance"
+ YOUTUBE_SAFESEARCH = os.environ.get('YOUTUBE_SAFESEARCH') or "strict"
+ YOUTUBE_PART = os.environ.get('YOUTUBE_PART') or "snippet"
\ No newline at end of file
diff --git a/src/main/python/mlsearch/helper.py b/src/main/python/mlsearch/helper.py
index e53bef8..111cf5f 100644
--- a/src/main/python/mlsearch/helper.py
+++ b/src/main/python/mlsearch/helper.py
@@ -40,7 +40,8 @@ def parse_parameters(event):
:return: dict(
'query', 'init_idx',
'count', 'source',
- 'cookies', 'timestamp')
+ 'cookies', 'timestamp',
+ 'y_next_page_token')
"""
try:
param = dict()
@@ -50,6 +51,7 @@ def parse_parameters(event):
param['source'] = event['source']
param['cookies'] = event['cookies']
param['timestamp'] = event['timestamp']
+ param['y_next_page_token'] = event['y_next_page_token']
if param['init_idx'] >= 0 and param['count'] > 0:
return param
@@ -57,4 +59,4 @@ def parse_parameters(event):
return dict()
except:
- return dict()
\ No newline at end of file
+ return dict()
diff --git a/src/main/python/mlsearch/protocol.py b/src/main/python/mlsearch/protocol.py
index 05c67aa..826cc46 100644
--- a/src/main/python/mlsearch/protocol.py
+++ b/src/main/python/mlsearch/protocol.py
@@ -25,7 +25,13 @@ def __init__(self, kwargs):
'partners_v1', 'instructors_v1',
# Source Flag
- 'source'
+ 'source',
+
+ # Youtube
+ 'video_id',
+ 'channel_id', 'channel_title',
+ 'live_broadcast_content', 'published_datetime',
+ 'thumbnails',
]
for param in kwargs:
@@ -54,6 +60,12 @@ def __init__(self, kwargs):
self.instructors_v1 = kwargs.get('instructors_v1', None)
self.source = kwargs.get('source', None)
self.pwc_url = kwargs.get('pwc_url', None)
+ self.video_id = kwargs.get('video_id', None)
+ self.channel_id = kwargs.get('channel_id', None)
+ self.channel_title = kwargs.get('channel_title', None)
+ self.live_broadcast_content = kwargs.get('live_broadcast_content', None)
+ self.published_datetime = kwargs.get('published_datetime', None)
+ self.thumbnails = kwargs.get('thumbnails', dict())
def to_JSON(self):
"""Transform the Protocol object to JSON object."""
diff --git a/src/main/scripts/mlsearch b/src/main/scripts/mlsearch
index 085c385..0d1b98e 100644
--- a/src/main/scripts/mlsearch
+++ b/src/main/scripts/mlsearch
@@ -19,9 +19,11 @@ ap.add_argument('-c', '--count', required=True, help="Total number of results to
ap.add_argument('-s', '--source', required=True, help="Source API to be looking for.")
ap.add_argument('-ck', '--cookies', required=True, help="Cookies of current user.")
ap.add_argument('-tm', '--timestamp', required=True, help="Timestamp of requesting API.")
-ap.add_argument('-u', '--pwc_user', required=False, help="Paper with code repository user name.")
-ap.add_argument('-p', '--pwc_password', required=False, help="Paper with code repository password.")
-ap.add_argument('-t', '--github_acc_token', required=False, help="Github access token.")
+ap.add_argument('-pu', '--pwc_user', required=False, help="Paper with code repository user name.")
+ap.add_argument('-pp', '--pwc_password', required=False, help="Paper with code repository password.")
+ap.add_argument('-gt', '--github_acc_token', required=False, help="Github access token.")
+ap.add_argument('-yk', '--youtube_dev_key', required=False, help="Youtube developer key.")
+ap.add_argument('-yntp', '--y_next_page_token', required=False, help="Next page token for Youtube API.")
args = vars(ap.parse_args())
def main(event):
@@ -35,30 +37,41 @@ def main(event):
param_names = [
'query', 'init_idx',
'count', 'source',
- 'cookies', 'timestamp']
+ 'cookies', 'timestamp',
+ 'y_next_page_token']
response_msg = hp.response('success', 200)
-
if hp.is_valid_parameters(event, param_names):
params = hp.parse_parameters(event)
if params.values():
- api_request = APIRequest(params['source'], params['query'], params['init_idx'], params['count'])
+ api_request = APIRequest(
+ params['source'],
+ params['query'],
+ params['init_idx'],
+ params['count'],
+ params['y_next_page_token'])
if 'pwc_user'in event and 'pwc_password' in event:
api_request.pwc_auth_info = (event['pwc_user'], event['pwc_password'])
if 'github_acc_token' in event:
api_request.github_acc_token = event['github_acc_token']
+ if 'youtube_developer_key' in event:
+ api_request.youtube_developer_key = event['youtube_developer_key']
data = api_request.fetch_data()
response_msg = hp.response(
message=data.get('content',''),
status_code=data.get('response_code'),
headers=headers,
- optional_attributes={'has_next_page': data.get('has_next_page', False)})
+ optional_attributes={
+ 'has_next_page': data.get('has_next_page', False),
+ 'y_next_page_token': data.get('y_next_page_token', None)})
+
return response_msg
response_msg = hp.response('Invalid parameters.', 400)
return response_msg
- except (ValueError, TypeError):
- response_msg = hp.response('Invalid parameters.', 400)
+ except (ValueError, TypeError) as ex:
+ response_msg = hp.response(str(ex), 400)
+ return response_msg
except Exception as ex:
response_msg = hp.response(str(ex), 500)
@@ -71,7 +84,8 @@ if __name__ == "__main__":
'count': args['count'],
'source': args['source'],
'cookies': args['cookies'],
- 'timestamp': args['timestamp']
+ 'timestamp': args['timestamp'],
+ 'y_next_page_token': args['y_next_page_token']
}
if args['pwc_user']:
@@ -80,7 +94,9 @@ if __name__ == "__main__":
event['pwc_password'] = args['pwc_password']
if args['github_acc_token']:
event['github_acc_token'] = args['github_acc_token']
+ if args['youtube_dev_key']:
+ event['youtube_developer_key'] = args['youtube_dev_key']
result = main(event)
pp = pprint.PrettyPrinter(indent=2)
- pp.pprint(result)
\ No newline at end of file
+ pp.pprint(result)