Skip to content

Commit

Permalink
Merge pull request #4 from Machine-Learning-Tokyo/feature/add_youtube…
Browse files Browse the repository at this point in the history
…_api

Feature/add youtube api
  • Loading branch information
saihtaungkham authored Apr 14, 2020
2 parents 8c1176a + 1c07c84 commit 1a0ba11
Show file tree
Hide file tree
Showing 8 changed files with 141 additions and 29 deletions.
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,16 @@ Required Parameters:
Timestamp of requesting API.
Optional Parameters:
-u PWC_USER, --pwc_user PWC_USER
-pu PWC_USER, --pwc_user PWC_USER
Paper with code repository user name.
-p PWC_PASSWORD, --pwc_password PWC_PASSWORD
-pp PWC_PASSWORD, --pwc_password PWC_PASSWORD
Paper with code repository password.
-t GITHUB_ACC_TOKEN, --github_acc_token GITHUB_ACC_TOKEN
-gt GITHUB_ACC_TOKEN, --github_acc_token GITHUB_ACC_TOKEN
Github access token.
-yk YOUTUBE_DEV_KEY, --youtube_dev_key YOUTUBE_DEV_KEY
Youtube developer key.
-ynpt NEXT_PAGE_TOKEN, --y_next_page_token NEXT_PAGE_TOKEN
Next page token for Youtube API.
```

<hr>
Expand All @@ -51,4 +55,5 @@ api_request = APIRequest(source, query,
init_idx, count)
api_request.pwc_auth_info = ('user_name', 'password')
api_request.github_acc_token = 'token'
```
api_request.youtube_developer_key = 'your_key'
```
1 change: 1 addition & 0 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ def set_properties(project):
project.build_depends_on("mock")
project.build_depends_on("requests")
project.build_depends_on("pygithub")
project.build_depends_on("google-api-python-client")
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
PyGithub==1.43.8
pybuilder
pybuilder
requests
google-api-python-client
84 changes: 75 additions & 9 deletions src/main/python/mlsearch/api_requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from mlsearch.protocol import Protocol
from github import Github
from requests.auth import HTTPBasicAuth
import googleapiclient.discovery
import json
import requests
# import scholarly
Expand All @@ -10,18 +11,20 @@
class APIRequest():
"""For handling the different Valid API requests."""

def __init__(self, source, query, init_idx, count):
def __init__(self, source, query, init_idx, count, y_next_page_token=None):
"""
Initialization for the class.
:param source: The API request destination.
:param query: The query for searching.
:param init_idx: The initial pagination index.
:param count: The number of records to be fetched.
:param y_next_page_token: The current page token for youtube API.
"""

self.params = {'query':query, 'init_idx':init_idx,
'count':count, 'source': source}
'count':count, 'source': source,
'y_next_page_token': y_next_page_token}
self.params_model = {'query':str, 'init_idx':int,
'count':int}
# Load the configuration file
Expand All @@ -32,7 +35,8 @@ def __init__(self, source, query, init_idx, count):
self.data = {
'response_code': 201,
'content': None,
'has_next_page': False}
'has_next_page': False,
'y_next_page_token': None}

@property
def github_acc_token(self):
Expand All @@ -43,6 +47,14 @@ def github_acc_token(self, access_token):
if access_token:
self._config.GITHUB_ACC_TOKEN = access_token

@property
def youtube_developer_key(self):
return self._config.YOUTUBE_DEVELOPER_KEY

@youtube_developer_key.setter
def youtube_developer_key(self, developer_key):
if developer_key:
self._config.YOUTUBE_DEVELOPER_KEY = developer_key

@property
def pwc_auth_info(self):
Expand Down Expand Up @@ -70,7 +82,6 @@ def _validate_params(self):
raise TypeError(
f'Invalid type for {item}. {typ} is expected but '
f'{type(self.params[item])} is given.')

if self.params['source'] not in self._config.VALID_API_SOURCE:
raise ValueError(
f"Invalid value for {self.params['source']}. "
Expand Down Expand Up @@ -118,8 +129,8 @@ def _fetch_github(self) -> [Protocol]:
}
results.append(Protocol(data))

self.data['response_code'] = 200
self.data['content'] = [proto.to_JSON() for proto in results]
self.data['response_code'] = 200
self.data['content'] = [proto.to_JSON() for proto in results]

def _fetch_paperwithcode(self) -> [Protocol]:
"""Fetch Paper with Code Repository"""
Expand Down Expand Up @@ -161,7 +172,59 @@ def _fetch_paperwithcode(self) -> [Protocol]:
self.data['content'] = [proto.to_JSON() for proto in results]

self.data['response_code'] = query_result.status_code


def _fetch_youtube(self, y_next_page_token=None) -> [Protocol]:
"""Fetch the Youtube Repository"""
results = []
youtube = googleapiclient.discovery.build(
self._config.YOUTUBE_SERVICE_NAME,
self._config.YOUTUBE_API_VERSION,
developerKey = self._config.YOUTUBE_DEVELOPER_KEY)
request = youtube.search().list(
part=self._config.YOUTUBE_PART,
maxResults=self.params['count'],
order=self._config.YOUTUBE_ORDER,
q=self.params['query'],
safeSearch=self._config.YOUTUBE_SAFESEARCH,
pageToken=y_next_page_token
)
response = request.execute()

if 'items' in response and len(response['items']) > 0:
for item in response['items']:
data = {
'video_id': item.get(
'id', dict({'videoId': None})
).get('videoId', None),
'title': item.get(
'snippet', dict({'title': None})
).get('title', None),
'description': item.get(
'snippet',dict({'description': None})
).get('description', None),
'channel_id': item.get(
'snippet',dict({'channelId': None})
).get('channelId', None),
'channel_title': item.get(
'snippet',dict({'channelTitle': None})
).get('channelTitle', None),
'live_broadcast_content': item.get(
'snippet',dict({'liveBroadcastContent': None})
).get('liveBroadcastContent', None),
'published_datetime': item.get(
'snippet',dict({'publishedAt': None})
).get('publishedAt', None),
'thumbnails': item.get(
'snippet',dict({'thumbnails': None})
).get('thumbnails', None),
'source': self.params.get('source', ''),
}
results.append(Protocol(data))
self.data['y_next_page_token'] = response.get('nextPageToken', None)
self.data['content'] = [proto.to_JSON() for proto in results]
self.data['has_next_page'] = response.get('pageInfo', dict({'totalResults':0})).get('totalResults', 0) > 0
self.data['response_code'] = 200

def fetch_data(self) -> json:
"""Fetch the data from designated API source."""

Expand All @@ -170,7 +233,10 @@ def fetch_data(self) -> json:
self._fetch_paperwithcode()

if self.params.get('source', '') == 'github':
responses = self._fetch_github()
self._fetch_github()

if self.params.get('source', '') == 'youtube':
self._fetch_youtube(self.params.get('y_next_page_token', None))

# TODO: Implement the function for Coursera. However, this function
# may be handled by the backend server.
Expand All @@ -181,4 +247,4 @@ def fetch_data(self) -> json:
self.data['response_code'] = 500
self.data['content'] = str(ex)

return self.data
return self.data
10 changes: 9 additions & 1 deletion src/main/python/mlsearch/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,12 @@ class Config(object):
GITHUB_URL = os.environ.get('GITHUB_URL') or "in:readme+in:description"

# AIP Source
VALID_API_SOURCE = ['paperwithcode', 'github', 'coursera']
VALID_API_SOURCE = ['paperwithcode', 'github', 'coursera', 'youtube']

# Youtube configuration
YOUTUBE_SERVICE_NAME = os.environ.get('YOUTUBE_SERVICE_NAME') or "youtube"
YOUTUBE_API_VERSION = os.environ.get('YOUTUBE_API_VERSION') or "v3"
YOUTUBE_DEVELOPER_KEY = os.environ.get('YOUTUBE_DEVELOPER_KEY') or None
YOUTUBE_ORDER = os.environ.get('YOUTUBE_ORDER') or "relevance"
YOUTUBE_SAFESEARCH = os.environ.get('YOUTUBE_SAFESEARCH') or "strict"
YOUTUBE_PART = os.environ.get('YOUTUBE_PART') or "snippet"
6 changes: 4 additions & 2 deletions src/main/python/mlsearch/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def parse_parameters(event):
:return: dict(
'query', 'init_idx',
'count', 'source',
'cookies', 'timestamp')
'cookies', 'timestamp',
'y_next_page_token')
"""
try:
param = dict()
Expand All @@ -50,11 +51,12 @@ def parse_parameters(event):
param['source'] = event['source']
param['cookies'] = event['cookies']
param['timestamp'] = event['timestamp']
param['y_next_page_token'] = event['y_next_page_token']

if param['init_idx'] >= 0 and param['count'] > 0:
return param
else:
return dict()

except:
return dict()
return dict()
14 changes: 13 additions & 1 deletion src/main/python/mlsearch/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@ def __init__(self, kwargs):
'partners_v1', 'instructors_v1',

# Source Flag
'source'
'source',

# Youtube
'video_id',
'channel_id', 'channel_title',
'live_broadcast_content', 'published_datetime',
'thumbnails',
]

for param in kwargs:
Expand Down Expand Up @@ -54,6 +60,12 @@ def __init__(self, kwargs):
self.instructors_v1 = kwargs.get('instructors_v1', None)
self.source = kwargs.get('source', None)
self.pwc_url = kwargs.get('pwc_url', None)
self.video_id = kwargs.get('video_id', None)
self.channel_id = kwargs.get('channel_id', None)
self.channel_title = kwargs.get('channel_title', None)
self.live_broadcast_content = kwargs.get('live_broadcast_content', None)
self.published_datetime = kwargs.get('published_datetime', None)
self.thumbnails = kwargs.get('thumbnails', dict())

def to_JSON(self):
"""Transform the Protocol object to JSON object."""
Expand Down
38 changes: 27 additions & 11 deletions src/main/scripts/mlsearch
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ ap.add_argument('-c', '--count', required=True, help="Total number of results to
ap.add_argument('-s', '--source', required=True, help="Source API to be looking for.")
ap.add_argument('-ck', '--cookies', required=True, help="Cookies of current user.")
ap.add_argument('-tm', '--timestamp', required=True, help="Timestamp of requesting API.")
ap.add_argument('-u', '--pwc_user', required=False, help="Paper with code repository user name.")
ap.add_argument('-p', '--pwc_password', required=False, help="Paper with code repository password.")
ap.add_argument('-t', '--github_acc_token', required=False, help="Github access token.")
ap.add_argument('-pu', '--pwc_user', required=False, help="Paper with code repository user name.")
ap.add_argument('-pp', '--pwc_password', required=False, help="Paper with code repository password.")
ap.add_argument('-gt', '--github_acc_token', required=False, help="Github access token.")
ap.add_argument('-yk', '--youtube_dev_key', required=False, help="Youtube developer key.")
ap.add_argument('-yntp', '--y_next_page_token', required=False, help="Next page token for Youtube API.")
args = vars(ap.parse_args())

def main(event):
Expand All @@ -35,30 +37,41 @@ def main(event):
param_names = [
'query', 'init_idx',
'count', 'source',
'cookies', 'timestamp']
'cookies', 'timestamp',
'y_next_page_token']
response_msg = hp.response('success', 200)

if hp.is_valid_parameters(event, param_names):
params = hp.parse_parameters(event)
if params.values():
api_request = APIRequest(params['source'], params['query'], params['init_idx'], params['count'])
api_request = APIRequest(
params['source'],
params['query'],
params['init_idx'],
params['count'],
params['y_next_page_token'])
if 'pwc_user'in event and 'pwc_password' in event:
api_request.pwc_auth_info = (event['pwc_user'], event['pwc_password'])
if 'github_acc_token' in event:
api_request.github_acc_token = event['github_acc_token']
if 'youtube_developer_key' in event:
api_request.youtube_developer_key = event['youtube_developer_key']
data = api_request.fetch_data()
response_msg = hp.response(
message=data.get('content',''),
status_code=data.get('response_code'),
headers=headers,
optional_attributes={'has_next_page': data.get('has_next_page', False)})
optional_attributes={
'has_next_page': data.get('has_next_page', False),
'y_next_page_token': data.get('y_next_page_token', None)})

return response_msg

response_msg = hp.response('Invalid parameters.', 400)
return response_msg

except (ValueError, TypeError):
response_msg = hp.response('Invalid parameters.', 400)
except (ValueError, TypeError) as ex:
response_msg = hp.response(str(ex), 400)
return response_msg

except Exception as ex:
response_msg = hp.response(str(ex), 500)
Expand All @@ -71,7 +84,8 @@ if __name__ == "__main__":
'count': args['count'],
'source': args['source'],
'cookies': args['cookies'],
'timestamp': args['timestamp']
'timestamp': args['timestamp'],
'y_next_page_token': args['y_next_page_token']
}

if args['pwc_user']:
Expand All @@ -80,7 +94,9 @@ if __name__ == "__main__":
event['pwc_password'] = args['pwc_password']
if args['github_acc_token']:
event['github_acc_token'] = args['github_acc_token']
if args['youtube_dev_key']:
event['youtube_developer_key'] = args['youtube_dev_key']

result = main(event)
pp = pprint.PrettyPrinter(indent=2)
pp.pprint(result)
pp.pprint(result)

0 comments on commit 1a0ba11

Please sign in to comment.