diff --git a/.gitconfig b/.gitconfig
new file mode 100644
index 0000000..7b84997
--- /dev/null
+++ b/.gitconfig
@@ -0,0 +1,9 @@
+build
+*.pyc
+target
+*.ipynb
+.ipynb_checkpoints
+.DS_Store
+sync-config.json
+data
+.vscode
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c8d98ed
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+/src/main/python/mlsearch/__pycache__
+*/unittest/python/__pycache__
+/target
+__pycache__
+.coverage
+.DS_Store
+.vscode
\ No newline at end of file
diff --git a/README.md b/README.md
index 8e303c2..da9ec19 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,61 @@
-# search-api-requester
-API requester for recommendation system
+# MLSearch Libraries
+
+The mlsearch libraries are a collection of library that facilite as a wrapper over other repositories for fetching the data required for MLSearch Engine.
+
+The package could be install by `python setup.py`.
+
+
+Standalone usage
+
+`mlsearch -q query -i start_index -c number_of_result -s source`
+
+For example
+`mlsearch -q "cnn" -i 0 -c 3 -s "github"`
+
+Available Parameters:
+```
+ -h, --help show this help message and exit
+
+Required Parameters:
+
+ -q QUERY, --query QUERY
+ Keyword for searching.
+ -i INIT_IDX, --init_idx INIT_IDX
+ Initial index for pagination.
+ -c COUNT, --count COUNT
+ Total number of results to be fetched.
+ -s SOURCE, --source SOURCE
+ Source API to be looking for.
+ -ck COOKIES, --cookies COOKIES
+ Cookies of current user.
+ -tm TIMESTAMP, --timestamp TIMESTAMP
+ Timestamp of requesting API.
+Optional Parameters:
+
+ -pu PWC_USER, --pwc_user PWC_USER
+ Paper with code repository user name.
+ -pp PWC_PASSWORD, --pwc_password PWC_PASSWORD
+ Paper with code repository password.
+ -gt GITHUB_ACC_TOKEN, --github_acc_token GITHUB_ACC_TOKEN
+ Github access token.
+ -yk YOUTUBE_DEV_KEY, --youtube_dev_key YOUTUBE_DEV_KEY
+ Youtube developer key.
+ -ynpt NEXT_PAGE_TOKEN, --y_next_page_token NEXT_PAGE_TOKEN
+ Next page token for Youtube API.
+ -yo Y_QUERY_ORDER, --y_query_order Y_QUERY_ORDER
+ Youtube Query Order.
+```
+
+
+Using as an API
+
+
+```python
+from mlsearch.api_requester import APIRequest
+
+api_request = APIRequest(source, query,
+ init_idx, count)
+api_request.pwc_auth_info = ('user_name', 'password')
+api_request.github_acc_token = 'token'
+api_request.youtube_developer_key = 'your_key'
+```
diff --git a/build.py b/build.py
new file mode 100644
index 0000000..dd34c76
--- /dev/null
+++ b/build.py
@@ -0,0 +1,21 @@
+from pybuilder.core import use_plugin, init
+
+use_plugin("python.core")
+use_plugin("python.unittest")
+use_plugin("python.install_dependencies")
+use_plugin("python.flake8")
+use_plugin("python.coverage")
+use_plugin("python.distutils")
+
+
+name = "mlsearch"
+default_task = ["install_dependencies", "publish"]
+
+
+@init
+def set_properties(project):
+ project.set_property("coverage_break_build", False)
+ project.build_depends_on("mock")
+ project.build_depends_on("requests")
+ project.build_depends_on("pygithub")
+ project.build_depends_on("google-api-python-client")
diff --git a/docs/.gitkeep b/docs/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..c118007
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+PyGithub==1.43.8
+pybuilder
+requests
+google-api-python-client
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000..b25925b
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+from setuptools import setup
+from setuptools.command.install import install as _install
+
+class install(_install):
+ def pre_install_script(self):
+ pass
+
+ def post_install_script(self):
+ pass
+
+ def run(self):
+ self.pre_install_script()
+
+ _install.run(self)
+
+ self.post_install_script()
+
+if __name__ == '__main__':
+ setup(
+ name = 'mlsearch',
+ version = '1.0.dev0',
+ description = '',
+ long_description = '',
+ author = '',
+ author_email = '',
+ license = '',
+ url = '',
+ scripts = ['scripts/mlsearch'],
+ packages = ['mlsearch'],
+ namespace_packages = [],
+ py_modules = [],
+ classifiers = [
+ 'Development Status :: 3 - Alpha',
+ 'Programming Language :: Python'
+ ],
+ entry_points = {},
+ data_files = [],
+ package_data = {},
+ install_requires = [],
+ dependency_links = [],
+ zip_safe = True,
+ cmdclass = {'install': install},
+ keywords = '',
+ python_requires = '',
+ obsoletes = [],
+ )
diff --git a/src/main/python/mlsearch/__init__.py b/src/main/python/mlsearch/__init__.py
new file mode 100644
index 0000000..669354b
--- /dev/null
+++ b/src/main/python/mlsearch/__init__.py
@@ -0,0 +1,2 @@
+def greet(filelike):
+ filelike.write("Hello world!\n")
\ No newline at end of file
diff --git a/src/main/python/mlsearch/api_requester.py b/src/main/python/mlsearch/api_requester.py
new file mode 100644
index 0000000..8d1dc9c
--- /dev/null
+++ b/src/main/python/mlsearch/api_requester.py
@@ -0,0 +1,417 @@
+from mlsearch.config import Config
+from mlsearch.protocol import Protocol
+from github import Github
+from requests.auth import HTTPBasicAuth
+from github.GithubException import BadCredentialsException
+from github.GithubException import RateLimitExceededException
+from googleapiclient.errors import HttpError
+import googleapiclient.discovery
+import json
+import requests
+import html
+import random
+import collections
+import math
+
+# import scholarly
+
+ErrorType = collections.namedtuple("ErrorType", "reason status")
+
+
+class APIRequest:
+ """For handling the different Valid API requests."""
+
+ def __init__(self, source, query, init_idx, count, y_next_page_token=None):
+ """
+ Initialization for the class.
+
+ :param source: The API request destination.
+ :param query: The query for searching.
+ :param init_idx: The initial pagination index.
+ :param count: The number of records to be fetched.
+ :param y_next_page_token: The current page token for youtube API.
+ """
+
+ self.params = {
+ "query": query,
+ "init_idx": init_idx,
+ "count": count,
+ "source": source,
+ "y_next_page_token": y_next_page_token,
+ }
+ self.params_model = {"query": str, "init_idx": int, "count": int}
+ # Load the configuration file
+ self._config = Config
+ # Validate Params
+ self._validate_params()
+ # Response data
+ self.data = {
+ "response_code": 201,
+ "content": None,
+ "has_next_page": False,
+ "y_next_page_token": None,
+ }
+
+ @property
+ def youtube_query_order(self):
+ return self._config.YOUTUBE_ORDER
+
+ @youtube_query_order.setter
+ def youtube_query_order(self, youtube_order):
+ if youtube_order:
+ self._config.YOUTUBE_ORDER = youtube_order
+
+ @property
+ def github_acc_token(self):
+ return self._config.GITHUB_ACC_TOKEN
+
+ @github_acc_token.setter
+ def github_acc_token(self, access_token):
+ if access_token:
+ self._config.GITHUB_ACC_TOKEN = access_token
+
+ @property
+ def youtube_developer_key(self):
+ return self._config.YOUTUBE_DEVELOPER_KEY
+
+ @youtube_developer_key.setter
+ def youtube_developer_key(self, developer_key):
+ if isinstance(developer_key, list):
+ self._config.YOUTUBE_DEVELOPER_KEY = developer_key
+ elif isinstance(developer_key, str) and "," in developer_key:
+ self._config.YOUTUBE_DEVELOPER_KEY = developer_key.strip().split(
+ ","
+ )
+ elif developer_key and isinstance(developer_key, str):
+ self._config.YOUTUBE_DEVELOPER_KEY.append(developer_key)
+
+ @property
+ def pwc_auth_info(self):
+ return (self._config.PWC_USER_NAME, self._config.PWC_PASSWORD)
+
+ @pwc_auth_info.setter
+ def pwc_auth_info(self, auth_info: "tuple(user_name, password)"):
+ assert isinstance(
+ auth_info, tuple
+ ), f"Invalid type for auth_info. Expected tuple but got {type(auth_info)}."
+ if len(auth_info) == 2:
+ assert isinstance(
+ auth_info[0], str
+ ), f"Invalid type for user_name. Expected str but got {type(auth_info[0])}."
+ assert isinstance(
+ auth_info[1], str
+ ), f"Invalid type for password. Expected str but got {type(auth_info[1])}."
+ self._config.PWC_USER_NAME = auth_info[0]
+ self._config.PWC_PASSWORD = auth_info[1]
+ else:
+ raise AttributeError(
+ f"Expected tuple with length 2 but got {len(auth_info)}."
+ )
+
+ def _validate_params(self):
+ """Validate user input data."""
+
+ for item, typ in self.params_model.items():
+ if item in self.params.keys() and not typ == type(
+ self.params[item]
+ ):
+ raise TypeError(
+ f"Invalid type for {item}. {typ} is expected but "
+ f"{type(self.params[item])} is given."
+ )
+ if self.params["source"] not in self._config.VALID_API_SOURCE:
+ raise ValueError(
+ f"Invalid value for {self.params['source']}. "
+ f"Expected values are {self._config.VALID_API_SOURCE}"
+ )
+
+ def _is_valid_pagination(self, max_count=0):
+ """Validate pagination."""
+ # If init_idx is greater than acutal content
+ if max_count == 0 or self.params["init_idx"] > max_count:
+ return False
+
+ # Update pagination flag.
+ self.data["has_next_page"] = (
+ self.params["init_idx"] + self.params["count"] < max_count
+ )
+
+ return True
+
+ def _unescape(self, text):
+ """Unescape Html Script."""
+ if text and isinstance(text, str):
+ return html.unescape(text)
+ return text
+
+ def _fetch_github(self) -> [Protocol]:
+ """Fetch Github Repository"""
+ item_per_page = self._config.GITHUB_PER_PAGE
+ github = Github(self._config.GITHUB_ACC_TOKEN, per_page=item_per_page)
+
+ skip_page = math.floor(self.params["init_idx"] / item_per_page)
+ total_page = math.ceil(
+ (self.params["init_idx"] + self.params["count"]) / item_per_page
+ )
+ query = "+".join([self.params["query"], self._config.GITHUB_URL])
+ responses = github.search_repositories(query, "stars", "desc")
+ results = []
+
+ if not self._is_valid_pagination(responses.totalCount):
+ return
+
+ paginated_responses = list()
+ for i in range(skip_page + 1, total_page + 1):
+ paginated_responses.extend(responses.get_page(i))
+
+ first_slot_items = item_per_page - (
+ self.params["init_idx"] % item_per_page
+ )
+ end_slot_items = item_per_page - (
+ (total_page * item_per_page)
+ - (self.params["count"] + self.params["init_idx"])
+ )
+
+ start_idx = item_per_page - first_slot_items
+ end_idx = (len(paginated_responses) - item_per_page) + end_slot_items
+
+ for response in paginated_responses[start_idx:end_idx]:
+ data = {
+ "repository_url": self._unescape(
+ response.clone_url.replace(".git", "")
+ ),
+ "title": self._unescape(response.name),
+ "description": self._unescape(response.description),
+ "private": self._unescape(response.private),
+ "fork": self._unescape(response.fork),
+ "updated_at": self._unescape(
+ response.updated_at.strftime("%Y%m%dT%H:%M:%S")
+ ),
+ "stargazers_count": self._unescape(response.stargazers_count),
+ "watchers_count": self._unescape(response.watchers_count),
+ "language": self._unescape(response.language),
+ "forks_count": self._unescape(response.forks_count),
+ "source": self.params.get("source", ""),
+ }
+ results.append(Protocol(data))
+
+ self.data["response_code"] = 200
+ self.data["content"] = [proto.to_JSON() for proto in results]
+
+ def _fetch_paperwithcode(self) -> [Protocol]:
+ """Fetch Paper with Code Repository"""
+
+ results = []
+ url = f"{self._config.PWC_URL}{self.params['query']}"
+ query_result = requests.get(
+ url,
+ auth=HTTPBasicAuth(
+ self._config.PWC_USER_NAME, self._config.PWC_PASSWORD
+ ),
+ )
+
+ if query_result.status_code == 200:
+ content = json.loads(query_result.content)
+ max_content = len(content)
+ if not self._is_valid_pagination(max_content):
+ return
+
+ content = content[
+ self.params["init_idx"] : min(
+ self.params["init_idx"] + self.params["count"], max_content
+ )
+ ]
+
+ for item in content:
+ data = {
+ "title": self._unescape(item.get("paper_title", None)),
+ "description": self._unescape(
+ item.get("paper_abstract", None)
+ ),
+ "paper_url": self._unescape(item.get("paper_url", None)),
+ "num_of_implementations": self._unescape(
+ item.get("number_of_implementations", None)
+ ),
+ "tasks": self._unescape(item.get("tasks", None)),
+ "paper_conference": self._unescape(
+ item.get("paper_conference", None)
+ ),
+ "repository_url": self._unescape(
+ item.get("repository_url", None)
+ ),
+ "repository_name": self._unescape(
+ item.get("repository_name", None)
+ ),
+ "repository_framework": self._unescape(
+ item.get("repository_framework", None)
+ ),
+ "repository_stars": self._unescape(
+ item.get("repository_stars", None)
+ ),
+ "paper_published": self._unescape(
+ item.get("paper_published", None)
+ ),
+ "pwc_url": self._unescape(item.get("pwc_url", "")),
+ "source": self.params.get("source", ""),
+ }
+ results.append(Protocol(data))
+
+ self.data["content"] = [proto.to_JSON() for proto in results]
+ else:
+ print(str(query_result.status_code), query_result.content)
+ self.data["response_code"] = query_result.status_code
+ self.data["content"] = (
+ "There is an error in fetching data from PWC server."
+ f" {json.loads(query_result.content).get('error')}"
+ )
+
+ def _fetch_youtube(self, y_next_page_token=None) -> [Protocol]:
+ """Fetch the Youtube Repository"""
+ results = []
+ input_query = str(self.params["query"]).lower().strip()
+ user_query = input_query
+
+ if not self._config.YOUTUBE_FIX_KEYWORD.strip() in user_query:
+ user_query = input_query + self._config.YOUTUBE_QUERY_FILTER
+
+ sampled_dev_key = None
+ if not len(self._config.YOUTUBE_DEVELOPER_KEY) > 0:
+ auth_error = ErrorType(
+ reason="Empty YouTube Developer Key.", status="400"
+ )
+ raise HttpError(
+ auth_error, str.encode("YouTube Developer Key Required.")
+ )
+
+ sampled_dev_key = random.choice(self._config.YOUTUBE_DEVELOPER_KEY)
+
+ sampled_dev_key = random.choice(self._config.YOUTUBE_DEVELOPER_KEY)
+
+ youtube = googleapiclient.discovery.build(
+ self._config.YOUTUBE_SERVICE_NAME,
+ self._config.YOUTUBE_API_VERSION,
+ developerKey=sampled_dev_key,
+ )
+
+ request = youtube.search().list(
+ part=self._config.YOUTUBE_PART,
+ maxResults=self.params["count"],
+ order=self._config.YOUTUBE_ORDER,
+ q=user_query,
+ safeSearch=self._config.YOUTUBE_SAFESEARCH,
+ # Disabled the next page token due to limitation of api access.
+ # pageToken=y_next_page_token,
+ )
+ response = request.execute()
+
+ if "items" in response and len(response["items"]) > 0:
+ for item in response["items"]:
+ # Skip if the video id is null
+ if not item.get("id", dict({"videoId": None})).get(
+ "videoId", None
+ ):
+ continue
+
+ data = {
+ "video_id": self._unescape(
+ item.get("id", dict({"videoId": None})).get(
+ "videoId", None
+ )
+ ),
+ "title": self._unescape(
+ item.get("snippet", dict({"title": None})).get(
+ "title", None
+ )
+ ),
+ "description": self._unescape(
+ item.get("snippet", dict({"description": None})).get(
+ "description", None
+ )
+ ),
+ "channel_id": self._unescape(
+ item.get("snippet", dict({"channelId": None})).get(
+ "channelId", None
+ )
+ ),
+ "channel_title": self._unescape(
+ item.get("snippet", dict({"channelTitle": None})).get(
+ "channelTitle", None
+ )
+ ),
+ "live_broadcast_content": self._unescape(
+ item.get(
+ "snippet", dict({"liveBroadcastContent": None})
+ ).get("liveBroadcastContent", None)
+ ),
+ "published_datetime": self._unescape(
+ item.get("snippet", dict({"publishedAt": None})).get(
+ "publishedAt", None
+ )
+ ),
+ "thumbnails": self._unescape(
+ item.get("snippet", dict({"thumbnails": None})).get(
+ "thumbnails", None
+ )
+ ),
+ "source": self.params.get("source", ""),
+ }
+ results.append(Protocol(data))
+ # self.data["y_next_page_token"] = response.get("nextPageToken", None)
+ self.data["content"] = [proto.to_JSON() for proto in results]
+ # self.data["has_next_page"] = (
+ # response.get("pageInfo", dict({"totalResults": 0})).get(
+ # "totalResults", 0
+ # )
+ # > 0
+ # )
+ self.data["has_next_page"] = False
+ self.data["y_query_order"] = self._config.YOUTUBE_ORDER
+ self.data["response_code"] = 200
+
+ def fetch_data(self) -> json:
+ """Fetch the data from designated API source."""
+
+ try:
+ if self.params.get("source", "") == "paperwithcode":
+ self._fetch_paperwithcode()
+
+ if self.params.get("source", "") == "github":
+ try:
+ self._fetch_github()
+ except BadCredentialsException:
+ self.data["response_code"] = 400
+ self.data["content"] = "Invalid Github developer key."
+ except RateLimitExceededException:
+ self.data["response_code"] = 503
+ self.data["content"] = "Access rate limitation reached."
+
+ if self.params.get("source", "") == "youtube":
+ if (
+ not self._config.YOUTUBE_ORDER
+ in self._config.VALID_YOUTUBE_ORDER
+ ):
+ self.data["response_code"] = 400
+ self.data["content"] = "Invalid Youtube Query Order."
+ return self.data
+ try:
+ self._fetch_youtube(
+ self.params.get("y_next_page_token", None)
+ )
+ except HttpError as ex:
+ print(str(ex))
+ self.data["response_code"] = 400
+ self.data[
+ "content"
+ ] = "Seems there is an authentication error with Youtube server."
+
+ # TODO: Implement the function for Coursera. However, this function
+ # may be handled by the backend server.
+ if self.params.get("source", "") == "coursera":
+ pass
+
+ except Exception as ex:
+ print(str(ex))
+ self.data["content"] = "Oops... Something has gone wrong in server."
+ self.data["response_code"] = 500
+
+ return self.data
diff --git a/src/main/python/mlsearch/config.py b/src/main/python/mlsearch/config.py
new file mode 100644
index 0000000..48e9b66
--- /dev/null
+++ b/src/main/python/mlsearch/config.py
@@ -0,0 +1,46 @@
+import os
+
+
+class Config(object):
+ """Class for API Request configuration."""
+
+ # Paper with code configuration
+ PWC_USER_NAME = os.environ.get("PWC_USER_NAME") or ""
+ PWC_PASSWORD = os.environ.get("PWC_PASSWORD") or ""
+ PWC_URL = (
+ os.environ.get("PWC_URL")
+ or "https://paperswithcode.com/api/v0/search/?q="
+ )
+
+ # Github configuration
+ GITHUB_ACC_TOKEN = os.environ.get("GITHUB_ACC_TOKEN") or None
+ GITHUB_URL = os.environ.get("GITHUB_URL") or "in:readme+in:description"
+ GITHUB_PER_PAGE = os.environ.get("ITEM_PER_PAGE") or 10
+ # AIP Source
+ VALID_API_SOURCE = ["paperwithcode", "github", "coursera", "youtube"]
+
+ # Youtube configuration
+ YOUTUBE_SERVICE_NAME = os.environ.get("YOUTUBE_SERVICE_NAME") or "youtube"
+ YOUTUBE_API_VERSION = os.environ.get("YOUTUBE_API_VERSION") or "v3"
+ # Parsing Youtube Keys
+ YOUTUBE_DEVELOPER_KEY = list()
+ developer_key = os.environ.get("YOUTUBE_DEVELOPER_KEY")
+ if isinstance(developer_key, list):
+ YOUTUBE_DEVELOPER_KEY = developer_key
+ elif isinstance(developer_key, str) and "," in developer_key:
+ YOUTUBE_DEVELOPER_KEY = developer_key.strip().split(",")
+ elif developer_key and isinstance(developer_key, str):
+ YOUTUBE_DEVELOPER_KEY.append(developer_key)
+ YOUTUBE_ORDER = os.environ.get("YOUTUBE_ORDER") or "relevance"
+ YOUTUBE_SAFESEARCH = os.environ.get("YOUTUBE_SAFESEARCH") or "strict"
+ YOUTUBE_PART = os.environ.get("YOUTUBE_PART") or "snippet"
+ YOUTUBE_FIX_KEYWORD = "machine learning"
+ YOUTUBE_QUERY_FILTER = " " + YOUTUBE_FIX_KEYWORD + " -news"
+ VALID_YOUTUBE_ORDER = [
+ "date",
+ "rating",
+ "relevance",
+ "title",
+ # "videoCount", # This is for channel only
+ "viewCount",
+ ]
diff --git a/src/main/python/mlsearch/helper.py b/src/main/python/mlsearch/helper.py
new file mode 100644
index 0000000..111cf5f
--- /dev/null
+++ b/src/main/python/mlsearch/helper.py
@@ -0,0 +1,62 @@
+import json
+
+def is_valid_parameters(event, param_names):
+ """
+ Check whether the item in param_names exist in event dictionary.
+
+ :param event: Lambda event object.
+ :param param_names: The list of the param names to be checked.
+
+ :retrun: True if exist else False
+ """
+ for param in param_names:
+ if not param in event:
+ return False
+ return True
+
+def response(message, status_code, headers=dict(), optional_attributes=dict()):
+ """
+ Response message for the request.
+
+ :param message: The response message.
+ :param status_code: The response status.
+ :headers: The header of the response.
+ :optional_attributes: The dict key value used by backend to communicate
+ with front end.
+
+ :return: The dic('statusCode', 'body', 'optional_attributes')
+ """
+ return {
+ 'statusCode': status_code,
+ 'body': json.dumps({'content': message, 'optional_attributes': optional_attributes}),
+ 'headers': headers
+ }
+
+def parse_parameters(event):
+ """
+ Parse the parameters from event dictionary.
+
+ :param event: The event dictionary.
+ :return: dict(
+ 'query', 'init_idx',
+ 'count', 'source',
+ 'cookies', 'timestamp',
+ 'y_next_page_token')
+ """
+ try:
+ param = dict()
+ param['query'] = event['query']
+ param['init_idx'] = int(event['init_idx'])
+ param['count'] = int(event['count'])
+ param['source'] = event['source']
+ param['cookies'] = event['cookies']
+ param['timestamp'] = event['timestamp']
+ param['y_next_page_token'] = event['y_next_page_token']
+
+ if param['init_idx'] >= 0 and param['count'] > 0:
+ return param
+ else:
+ return dict()
+
+ except:
+ return dict()
diff --git a/src/main/python/mlsearch/protocol.py b/src/main/python/mlsearch/protocol.py
new file mode 100644
index 0000000..826cc46
--- /dev/null
+++ b/src/main/python/mlsearch/protocol.py
@@ -0,0 +1,76 @@
+class Protocol():
+ """The Protocol for standard communication accross different api sources."""
+
+ def __init__(self, kwargs):
+ param_list = [
+
+ # title -> paper_title, full_name, name
+ # description -> paper_abstract, description
+
+ # Paper with code
+ 'title',
+ 'paper_published', 'paper_url',
+ 'num_of_implementations', 'tasks',
+ 'paper_conference', 'repository_url',
+ 'repository_name', 'repository_framework',
+ 'repository_stars', 'pwc_url',
+
+ # Github
+ 'description', 'private',
+ 'fork', 'updated_at',
+ 'stargazers_count', 'watchers_count',
+ 'language', 'forks_count',
+
+ # Coursera
+ 'partners_v1', 'instructors_v1',
+
+ # Source Flag
+ 'source',
+
+ # Youtube
+ 'video_id',
+ 'channel_id', 'channel_title',
+ 'live_broadcast_content', 'published_datetime',
+ 'thumbnails',
+ ]
+
+ for param in kwargs:
+ if param not in param_list:
+ raise AttributeError('{} is not a valid parameter.'.format(param))
+
+ self.title = kwargs.get('title', None)
+ self.paper_published = kwargs.get('paper_published', None)
+ self.paper_url = kwargs.get('paper_url', None)
+ self.num_of_implementations = kwargs.get('num_of_implementations', None)
+ self.tasks = kwargs.get('tasks', None)
+ self.paper_conference = kwargs.get('paper_conference', None)
+ self.repository_url = kwargs.get('repository_url', None)
+ self.repository_name = kwargs.get('repository_name', None)
+ self.repository_framework = kwargs.get('repository_framework', None)
+ self.repository_stars = kwargs.get('repository_stars', None)
+ self.description = kwargs.get('description', None)
+ self.private = kwargs.get('private', None)
+ self.fork = kwargs.get('fork', None)
+ self.updated_at = kwargs.get('updated_at', None)
+ self.stargazers_count = kwargs.get('stargazers_count', None)
+ self.watchers_count = kwargs.get('watchers_count', None)
+ self.language = kwargs.get('language', None)
+ self.forks_count = kwargs.get('forks_count', None)
+ self.partners_v1 = kwargs.get('partners_v1', None)
+ self.instructors_v1 = kwargs.get('instructors_v1', None)
+ self.source = kwargs.get('source', None)
+ self.pwc_url = kwargs.get('pwc_url', None)
+ self.video_id = kwargs.get('video_id', None)
+ self.channel_id = kwargs.get('channel_id', None)
+ self.channel_title = kwargs.get('channel_title', None)
+ self.live_broadcast_content = kwargs.get('live_broadcast_content', None)
+ self.published_datetime = kwargs.get('published_datetime', None)
+ self.thumbnails = kwargs.get('thumbnails', dict())
+
+ def to_JSON(self):
+ """Transform the Protocol object to JSON object."""
+
+ return self.__dict__
+
+ def __repr__(self):
+ return str(self.__dict__)
\ No newline at end of file
diff --git a/src/main/scripts/mlsearch b/src/main/scripts/mlsearch
new file mode 100644
index 0000000..7ae2448
--- /dev/null
+++ b/src/main/scripts/mlsearch
@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+import argparse
+import sys
+import pprint
+import os
+import json
+
+# For debugging purpose
+if "mlsearch" not in sys.modules:
+ sys.path.append(os.path.join(os.getcwd(), "src/main/python"))
+
+from mlsearch.api_requester import APIRequest
+from mlsearch import helper as hp
+
+ap = argparse.ArgumentParser()
+ap.add_argument("-q", "--query", required=True, help="Keyword for searching.")
+ap.add_argument("-i", "--init_idx", required=True, help="Initial index for pagination.")
+ap.add_argument(
+ "-c", "--count", required=True, help="Total number of results to be fetched."
+)
+ap.add_argument("-s", "--source", required=True, help="Source API to be looking for.")
+ap.add_argument("-ck", "--cookies", required=True, help="Cookies of current user.")
+ap.add_argument(
+ "-tm", "--timestamp", required=True, help="Timestamp of requesting API."
+)
+ap.add_argument(
+ "-pu", "--pwc_user", required=False, help="Paper with code repository user name."
+)
+ap.add_argument(
+ "-pp", "--pwc_password", required=False, help="Paper with code repository password."
+)
+ap.add_argument(
+ "-gt", "--github_acc_token", required=False, help="Github access token."
+)
+ap.add_argument(
+ "-yk", "--y_dev_key", required=False, help="Youtube developer key."
+)
+ap.add_argument(
+ "-yntp",
+ "--y_next_page_token",
+ required=False,
+ help="Next page token for Youtube API.",
+)
+ap.add_argument(
+ "-yo", "--y_query_order", required=False, help="Youtube Query Order."
+)
+args = vars(ap.parse_args())
+
+
+def main(event):
+ headers = {
+ "Access-Control-Allow-Origin": "*",
+ "X-Requested-With": "*",
+ "Access-Control-Allow-Headers": "Content-Type,X-Amz-Date,Authorization,X-Api-Key,x-requested-with",
+ "Access-Control-Allow-Methods": "OPTIONS,POST,GET",
+ }
+ try:
+ param_names = [
+ "query",
+ "init_idx",
+ "count",
+ "source",
+ "cookies",
+ "timestamp",
+ "y_next_page_token",
+ ]
+ response_msg = hp.response("success", 200)
+ if hp.is_valid_parameters(event, param_names):
+ params = hp.parse_parameters(event)
+ if params.values():
+ api_request = APIRequest(
+ params["source"],
+ params["query"],
+ params["init_idx"],
+ params["count"],
+ params["y_next_page_token"],
+ )
+ if "pwc_user" in event and "pwc_password" in event:
+ api_request.pwc_auth_info = (
+ event["pwc_user"],
+ event["pwc_password"],
+ )
+ if "github_acc_token" in event:
+ api_request.github_acc_token = event["github_acc_token"]
+ if "y_dev_key" in event:
+ api_request.youtube_developer_key = event["y_dev_key"]
+ if "y_query_order" in event:
+ api_request.youtube_query_order = event["y_query_order"]
+ data = api_request.fetch_data()
+ response_msg = hp.response(
+ message=data.get("content", ""),
+ status_code=data.get("response_code"),
+ headers=headers,
+ optional_attributes={
+ "has_next_page": data.get("has_next_page", False),
+ "y_next_page_token": data.get("y_next_page_token", None),
+ "y_query_order": data.get("y_query_order", None)
+ },
+ )
+
+ return response_msg
+
+ response_msg = hp.response("Invalid parameters.", 400)
+ return response_msg
+
+ except (ValueError, TypeError) as ex:
+ response_msg = hp.response(str(ex), 400)
+ return response_msg
+
+ except Exception as ex:
+ response_msg = hp.response(str(ex), 500)
+ return response_msg
+
+
+if __name__ == "__main__":
+ event = {
+ "query": args["query"],
+ "init_idx": args["init_idx"],
+ "count": args["count"],
+ "source": args["source"],
+ "cookies": args["cookies"],
+ "timestamp": args["timestamp"],
+ "y_next_page_token": args["y_next_page_token"],
+ }
+
+ if args["pwc_user"]:
+ event["pwc_user"] = args["pwc_user"]
+ if args["pwc_password"]:
+ event["pwc_password"] = args["pwc_password"]
+ if args["github_acc_token"]:
+ event["github_acc_token"] = args["github_acc_token"]
+ if args["y_dev_key"]:
+ event["y_dev_key"] = args["y_dev_key"]
+ if args["y_query_order"]:
+ event["y_query_order"] = args["y_query_order"]
+ result = main(event)
+ pp = pprint.PrettyPrinter(indent=2)
+ pp.pprint(result)
diff --git a/src/unittest/python/myproject_tests.py b/src/unittest/python/myproject_tests.py
new file mode 100644
index 0000000..8aa0c2f
--- /dev/null
+++ b/src/unittest/python/myproject_tests.py
@@ -0,0 +1,13 @@
+from unittest import TestCase
+from mock import Mock
+from mlsearch import greet
+from mlsearch.api_requester import APIRequest
+
+
+class Test(TestCase):
+ def test_should_write_hello_world(self):
+ mock_stdout = Mock()
+
+ greet(mock_stdout)
+
+ mock_stdout.write.assert_called_with("Hello world!\n")
\ No newline at end of file