diff --git a/.gitconfig b/.gitconfig new file mode 100644 index 0000000..7b84997 --- /dev/null +++ b/.gitconfig @@ -0,0 +1,9 @@ +build +*.pyc +target +*.ipynb +.ipynb_checkpoints +.DS_Store +sync-config.json +data +.vscode diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c8d98ed --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +/src/main/python/mlsearch/__pycache__ +*/unittest/python/__pycache__ +/target +__pycache__ +.coverage +.DS_Store +.vscode \ No newline at end of file diff --git a/README.md b/README.md index 8e303c2..da9ec19 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,61 @@ -# search-api-requester -API requester for recommendation system +# MLSearch Libraries + +The mlsearch libraries are a collection of library that facilite as a wrapper over other repositories for fetching the data required for MLSearch Engine. + +The package could be install by `python setup.py`. + +
+Standalone usage + +`mlsearch -q query -i start_index -c number_of_result -s source` + +For example +`mlsearch -q "cnn" -i 0 -c 3 -s "github"` + +Available Parameters: +``` + -h, --help show this help message and exit + +Required Parameters: + + -q QUERY, --query QUERY + Keyword for searching. + -i INIT_IDX, --init_idx INIT_IDX + Initial index for pagination. + -c COUNT, --count COUNT + Total number of results to be fetched. + -s SOURCE, --source SOURCE + Source API to be looking for. + -ck COOKIES, --cookies COOKIES + Cookies of current user. + -tm TIMESTAMP, --timestamp TIMESTAMP + Timestamp of requesting API. +Optional Parameters: + + -pu PWC_USER, --pwc_user PWC_USER + Paper with code repository user name. + -pp PWC_PASSWORD, --pwc_password PWC_PASSWORD + Paper with code repository password. + -gt GITHUB_ACC_TOKEN, --github_acc_token GITHUB_ACC_TOKEN + Github access token. + -yk YOUTUBE_DEV_KEY, --youtube_dev_key YOUTUBE_DEV_KEY + Youtube developer key. + -ynpt NEXT_PAGE_TOKEN, --y_next_page_token NEXT_PAGE_TOKEN + Next page token for Youtube API. + -yo Y_QUERY_ORDER, --y_query_order Y_QUERY_ORDER + Youtube Query Order. +``` + +
+Using as an API +
+ +```python +from mlsearch.api_requester import APIRequest + +api_request = APIRequest(source, query, + init_idx, count) +api_request.pwc_auth_info = ('user_name', 'password') +api_request.github_acc_token = 'token' +api_request.youtube_developer_key = 'your_key' +``` diff --git a/build.py b/build.py new file mode 100644 index 0000000..dd34c76 --- /dev/null +++ b/build.py @@ -0,0 +1,21 @@ +from pybuilder.core import use_plugin, init + +use_plugin("python.core") +use_plugin("python.unittest") +use_plugin("python.install_dependencies") +use_plugin("python.flake8") +use_plugin("python.coverage") +use_plugin("python.distutils") + + +name = "mlsearch" +default_task = ["install_dependencies", "publish"] + + +@init +def set_properties(project): + project.set_property("coverage_break_build", False) + project.build_depends_on("mock") + project.build_depends_on("requests") + project.build_depends_on("pygithub") + project.build_depends_on("google-api-python-client") diff --git a/docs/.gitkeep b/docs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c118007 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +PyGithub==1.43.8 +pybuilder +requests +google-api-python-client \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..b25925b --- /dev/null +++ b/setup.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +from setuptools import setup +from setuptools.command.install import install as _install + +class install(_install): + def pre_install_script(self): + pass + + def post_install_script(self): + pass + + def run(self): + self.pre_install_script() + + _install.run(self) + + self.post_install_script() + +if __name__ == '__main__': + setup( + name = 'mlsearch', + version = '1.0.dev0', + description = '', + long_description = '', + author = '', + author_email = '', + license = '', + url = '', + scripts = ['scripts/mlsearch'], + packages = ['mlsearch'], + namespace_packages = [], + py_modules = [], + classifiers = [ + 'Development Status :: 3 - Alpha', + 'Programming Language :: Python' + ], + entry_points = {}, + data_files = [], + package_data = {}, + install_requires = [], + dependency_links = [], + zip_safe = True, + cmdclass = {'install': install}, + keywords = '', + python_requires = '', + obsoletes = [], + ) diff --git a/src/main/python/mlsearch/__init__.py b/src/main/python/mlsearch/__init__.py new file mode 100644 index 0000000..669354b --- /dev/null +++ b/src/main/python/mlsearch/__init__.py @@ -0,0 +1,2 @@ +def greet(filelike): + filelike.write("Hello world!\n") \ No newline at end of file diff --git a/src/main/python/mlsearch/api_requester.py b/src/main/python/mlsearch/api_requester.py new file mode 100644 index 0000000..8d1dc9c --- /dev/null +++ b/src/main/python/mlsearch/api_requester.py @@ -0,0 +1,417 @@ +from mlsearch.config import Config +from mlsearch.protocol import Protocol +from github import Github +from requests.auth import HTTPBasicAuth +from github.GithubException import BadCredentialsException +from github.GithubException import RateLimitExceededException +from googleapiclient.errors import HttpError +import googleapiclient.discovery +import json +import requests +import html +import random +import collections +import math + +# import scholarly + +ErrorType = collections.namedtuple("ErrorType", "reason status") + + +class APIRequest: + """For handling the different Valid API requests.""" + + def __init__(self, source, query, init_idx, count, y_next_page_token=None): + """ + Initialization for the class. + + :param source: The API request destination. + :param query: The query for searching. + :param init_idx: The initial pagination index. + :param count: The number of records to be fetched. + :param y_next_page_token: The current page token for youtube API. + """ + + self.params = { + "query": query, + "init_idx": init_idx, + "count": count, + "source": source, + "y_next_page_token": y_next_page_token, + } + self.params_model = {"query": str, "init_idx": int, "count": int} + # Load the configuration file + self._config = Config + # Validate Params + self._validate_params() + # Response data + self.data = { + "response_code": 201, + "content": None, + "has_next_page": False, + "y_next_page_token": None, + } + + @property + def youtube_query_order(self): + return self._config.YOUTUBE_ORDER + + @youtube_query_order.setter + def youtube_query_order(self, youtube_order): + if youtube_order: + self._config.YOUTUBE_ORDER = youtube_order + + @property + def github_acc_token(self): + return self._config.GITHUB_ACC_TOKEN + + @github_acc_token.setter + def github_acc_token(self, access_token): + if access_token: + self._config.GITHUB_ACC_TOKEN = access_token + + @property + def youtube_developer_key(self): + return self._config.YOUTUBE_DEVELOPER_KEY + + @youtube_developer_key.setter + def youtube_developer_key(self, developer_key): + if isinstance(developer_key, list): + self._config.YOUTUBE_DEVELOPER_KEY = developer_key + elif isinstance(developer_key, str) and "," in developer_key: + self._config.YOUTUBE_DEVELOPER_KEY = developer_key.strip().split( + "," + ) + elif developer_key and isinstance(developer_key, str): + self._config.YOUTUBE_DEVELOPER_KEY.append(developer_key) + + @property + def pwc_auth_info(self): + return (self._config.PWC_USER_NAME, self._config.PWC_PASSWORD) + + @pwc_auth_info.setter + def pwc_auth_info(self, auth_info: "tuple(user_name, password)"): + assert isinstance( + auth_info, tuple + ), f"Invalid type for auth_info. Expected tuple but got {type(auth_info)}." + if len(auth_info) == 2: + assert isinstance( + auth_info[0], str + ), f"Invalid type for user_name. Expected str but got {type(auth_info[0])}." + assert isinstance( + auth_info[1], str + ), f"Invalid type for password. Expected str but got {type(auth_info[1])}." + self._config.PWC_USER_NAME = auth_info[0] + self._config.PWC_PASSWORD = auth_info[1] + else: + raise AttributeError( + f"Expected tuple with length 2 but got {len(auth_info)}." + ) + + def _validate_params(self): + """Validate user input data.""" + + for item, typ in self.params_model.items(): + if item in self.params.keys() and not typ == type( + self.params[item] + ): + raise TypeError( + f"Invalid type for {item}. {typ} is expected but " + f"{type(self.params[item])} is given." + ) + if self.params["source"] not in self._config.VALID_API_SOURCE: + raise ValueError( + f"Invalid value for {self.params['source']}. " + f"Expected values are {self._config.VALID_API_SOURCE}" + ) + + def _is_valid_pagination(self, max_count=0): + """Validate pagination.""" + # If init_idx is greater than acutal content + if max_count == 0 or self.params["init_idx"] > max_count: + return False + + # Update pagination flag. + self.data["has_next_page"] = ( + self.params["init_idx"] + self.params["count"] < max_count + ) + + return True + + def _unescape(self, text): + """Unescape Html Script.""" + if text and isinstance(text, str): + return html.unescape(text) + return text + + def _fetch_github(self) -> [Protocol]: + """Fetch Github Repository""" + item_per_page = self._config.GITHUB_PER_PAGE + github = Github(self._config.GITHUB_ACC_TOKEN, per_page=item_per_page) + + skip_page = math.floor(self.params["init_idx"] / item_per_page) + total_page = math.ceil( + (self.params["init_idx"] + self.params["count"]) / item_per_page + ) + query = "+".join([self.params["query"], self._config.GITHUB_URL]) + responses = github.search_repositories(query, "stars", "desc") + results = [] + + if not self._is_valid_pagination(responses.totalCount): + return + + paginated_responses = list() + for i in range(skip_page + 1, total_page + 1): + paginated_responses.extend(responses.get_page(i)) + + first_slot_items = item_per_page - ( + self.params["init_idx"] % item_per_page + ) + end_slot_items = item_per_page - ( + (total_page * item_per_page) + - (self.params["count"] + self.params["init_idx"]) + ) + + start_idx = item_per_page - first_slot_items + end_idx = (len(paginated_responses) - item_per_page) + end_slot_items + + for response in paginated_responses[start_idx:end_idx]: + data = { + "repository_url": self._unescape( + response.clone_url.replace(".git", "") + ), + "title": self._unescape(response.name), + "description": self._unescape(response.description), + "private": self._unescape(response.private), + "fork": self._unescape(response.fork), + "updated_at": self._unescape( + response.updated_at.strftime("%Y%m%dT%H:%M:%S") + ), + "stargazers_count": self._unescape(response.stargazers_count), + "watchers_count": self._unescape(response.watchers_count), + "language": self._unescape(response.language), + "forks_count": self._unescape(response.forks_count), + "source": self.params.get("source", ""), + } + results.append(Protocol(data)) + + self.data["response_code"] = 200 + self.data["content"] = [proto.to_JSON() for proto in results] + + def _fetch_paperwithcode(self) -> [Protocol]: + """Fetch Paper with Code Repository""" + + results = [] + url = f"{self._config.PWC_URL}{self.params['query']}" + query_result = requests.get( + url, + auth=HTTPBasicAuth( + self._config.PWC_USER_NAME, self._config.PWC_PASSWORD + ), + ) + + if query_result.status_code == 200: + content = json.loads(query_result.content) + max_content = len(content) + if not self._is_valid_pagination(max_content): + return + + content = content[ + self.params["init_idx"] : min( + self.params["init_idx"] + self.params["count"], max_content + ) + ] + + for item in content: + data = { + "title": self._unescape(item.get("paper_title", None)), + "description": self._unescape( + item.get("paper_abstract", None) + ), + "paper_url": self._unescape(item.get("paper_url", None)), + "num_of_implementations": self._unescape( + item.get("number_of_implementations", None) + ), + "tasks": self._unescape(item.get("tasks", None)), + "paper_conference": self._unescape( + item.get("paper_conference", None) + ), + "repository_url": self._unescape( + item.get("repository_url", None) + ), + "repository_name": self._unescape( + item.get("repository_name", None) + ), + "repository_framework": self._unescape( + item.get("repository_framework", None) + ), + "repository_stars": self._unescape( + item.get("repository_stars", None) + ), + "paper_published": self._unescape( + item.get("paper_published", None) + ), + "pwc_url": self._unescape(item.get("pwc_url", "")), + "source": self.params.get("source", ""), + } + results.append(Protocol(data)) + + self.data["content"] = [proto.to_JSON() for proto in results] + else: + print(str(query_result.status_code), query_result.content) + self.data["response_code"] = query_result.status_code + self.data["content"] = ( + "There is an error in fetching data from PWC server." + f" {json.loads(query_result.content).get('error')}" + ) + + def _fetch_youtube(self, y_next_page_token=None) -> [Protocol]: + """Fetch the Youtube Repository""" + results = [] + input_query = str(self.params["query"]).lower().strip() + user_query = input_query + + if not self._config.YOUTUBE_FIX_KEYWORD.strip() in user_query: + user_query = input_query + self._config.YOUTUBE_QUERY_FILTER + + sampled_dev_key = None + if not len(self._config.YOUTUBE_DEVELOPER_KEY) > 0: + auth_error = ErrorType( + reason="Empty YouTube Developer Key.", status="400" + ) + raise HttpError( + auth_error, str.encode("YouTube Developer Key Required.") + ) + + sampled_dev_key = random.choice(self._config.YOUTUBE_DEVELOPER_KEY) + + sampled_dev_key = random.choice(self._config.YOUTUBE_DEVELOPER_KEY) + + youtube = googleapiclient.discovery.build( + self._config.YOUTUBE_SERVICE_NAME, + self._config.YOUTUBE_API_VERSION, + developerKey=sampled_dev_key, + ) + + request = youtube.search().list( + part=self._config.YOUTUBE_PART, + maxResults=self.params["count"], + order=self._config.YOUTUBE_ORDER, + q=user_query, + safeSearch=self._config.YOUTUBE_SAFESEARCH, + # Disabled the next page token due to limitation of api access. + # pageToken=y_next_page_token, + ) + response = request.execute() + + if "items" in response and len(response["items"]) > 0: + for item in response["items"]: + # Skip if the video id is null + if not item.get("id", dict({"videoId": None})).get( + "videoId", None + ): + continue + + data = { + "video_id": self._unescape( + item.get("id", dict({"videoId": None})).get( + "videoId", None + ) + ), + "title": self._unescape( + item.get("snippet", dict({"title": None})).get( + "title", None + ) + ), + "description": self._unescape( + item.get("snippet", dict({"description": None})).get( + "description", None + ) + ), + "channel_id": self._unescape( + item.get("snippet", dict({"channelId": None})).get( + "channelId", None + ) + ), + "channel_title": self._unescape( + item.get("snippet", dict({"channelTitle": None})).get( + "channelTitle", None + ) + ), + "live_broadcast_content": self._unescape( + item.get( + "snippet", dict({"liveBroadcastContent": None}) + ).get("liveBroadcastContent", None) + ), + "published_datetime": self._unescape( + item.get("snippet", dict({"publishedAt": None})).get( + "publishedAt", None + ) + ), + "thumbnails": self._unescape( + item.get("snippet", dict({"thumbnails": None})).get( + "thumbnails", None + ) + ), + "source": self.params.get("source", ""), + } + results.append(Protocol(data)) + # self.data["y_next_page_token"] = response.get("nextPageToken", None) + self.data["content"] = [proto.to_JSON() for proto in results] + # self.data["has_next_page"] = ( + # response.get("pageInfo", dict({"totalResults": 0})).get( + # "totalResults", 0 + # ) + # > 0 + # ) + self.data["has_next_page"] = False + self.data["y_query_order"] = self._config.YOUTUBE_ORDER + self.data["response_code"] = 200 + + def fetch_data(self) -> json: + """Fetch the data from designated API source.""" + + try: + if self.params.get("source", "") == "paperwithcode": + self._fetch_paperwithcode() + + if self.params.get("source", "") == "github": + try: + self._fetch_github() + except BadCredentialsException: + self.data["response_code"] = 400 + self.data["content"] = "Invalid Github developer key." + except RateLimitExceededException: + self.data["response_code"] = 503 + self.data["content"] = "Access rate limitation reached." + + if self.params.get("source", "") == "youtube": + if ( + not self._config.YOUTUBE_ORDER + in self._config.VALID_YOUTUBE_ORDER + ): + self.data["response_code"] = 400 + self.data["content"] = "Invalid Youtube Query Order." + return self.data + try: + self._fetch_youtube( + self.params.get("y_next_page_token", None) + ) + except HttpError as ex: + print(str(ex)) + self.data["response_code"] = 400 + self.data[ + "content" + ] = "Seems there is an authentication error with Youtube server." + + # TODO: Implement the function for Coursera. However, this function + # may be handled by the backend server. + if self.params.get("source", "") == "coursera": + pass + + except Exception as ex: + print(str(ex)) + self.data["content"] = "Oops... Something has gone wrong in server." + self.data["response_code"] = 500 + + return self.data diff --git a/src/main/python/mlsearch/config.py b/src/main/python/mlsearch/config.py new file mode 100644 index 0000000..48e9b66 --- /dev/null +++ b/src/main/python/mlsearch/config.py @@ -0,0 +1,46 @@ +import os + + +class Config(object): + """Class for API Request configuration.""" + + # Paper with code configuration + PWC_USER_NAME = os.environ.get("PWC_USER_NAME") or "" + PWC_PASSWORD = os.environ.get("PWC_PASSWORD") or "" + PWC_URL = ( + os.environ.get("PWC_URL") + or "https://paperswithcode.com/api/v0/search/?q=" + ) + + # Github configuration + GITHUB_ACC_TOKEN = os.environ.get("GITHUB_ACC_TOKEN") or None + GITHUB_URL = os.environ.get("GITHUB_URL") or "in:readme+in:description" + GITHUB_PER_PAGE = os.environ.get("ITEM_PER_PAGE") or 10 + # AIP Source + VALID_API_SOURCE = ["paperwithcode", "github", "coursera", "youtube"] + + # Youtube configuration + YOUTUBE_SERVICE_NAME = os.environ.get("YOUTUBE_SERVICE_NAME") or "youtube" + YOUTUBE_API_VERSION = os.environ.get("YOUTUBE_API_VERSION") or "v3" + # Parsing Youtube Keys + YOUTUBE_DEVELOPER_KEY = list() + developer_key = os.environ.get("YOUTUBE_DEVELOPER_KEY") + if isinstance(developer_key, list): + YOUTUBE_DEVELOPER_KEY = developer_key + elif isinstance(developer_key, str) and "," in developer_key: + YOUTUBE_DEVELOPER_KEY = developer_key.strip().split(",") + elif developer_key and isinstance(developer_key, str): + YOUTUBE_DEVELOPER_KEY.append(developer_key) + YOUTUBE_ORDER = os.environ.get("YOUTUBE_ORDER") or "relevance" + YOUTUBE_SAFESEARCH = os.environ.get("YOUTUBE_SAFESEARCH") or "strict" + YOUTUBE_PART = os.environ.get("YOUTUBE_PART") or "snippet" + YOUTUBE_FIX_KEYWORD = "machine learning" + YOUTUBE_QUERY_FILTER = " " + YOUTUBE_FIX_KEYWORD + " -news" + VALID_YOUTUBE_ORDER = [ + "date", + "rating", + "relevance", + "title", + # "videoCount", # This is for channel only + "viewCount", + ] diff --git a/src/main/python/mlsearch/helper.py b/src/main/python/mlsearch/helper.py new file mode 100644 index 0000000..111cf5f --- /dev/null +++ b/src/main/python/mlsearch/helper.py @@ -0,0 +1,62 @@ +import json + +def is_valid_parameters(event, param_names): + """ + Check whether the item in param_names exist in event dictionary. + + :param event: Lambda event object. + :param param_names: The list of the param names to be checked. + + :retrun: True if exist else False + """ + for param in param_names: + if not param in event: + return False + return True + +def response(message, status_code, headers=dict(), optional_attributes=dict()): + """ + Response message for the request. + + :param message: The response message. + :param status_code: The response status. + :headers: The header of the response. + :optional_attributes: The dict key value used by backend to communicate + with front end. + + :return: The dic('statusCode', 'body', 'optional_attributes') + """ + return { + 'statusCode': status_code, + 'body': json.dumps({'content': message, 'optional_attributes': optional_attributes}), + 'headers': headers + } + +def parse_parameters(event): + """ + Parse the parameters from event dictionary. + + :param event: The event dictionary. + :return: dict( + 'query', 'init_idx', + 'count', 'source', + 'cookies', 'timestamp', + 'y_next_page_token') + """ + try: + param = dict() + param['query'] = event['query'] + param['init_idx'] = int(event['init_idx']) + param['count'] = int(event['count']) + param['source'] = event['source'] + param['cookies'] = event['cookies'] + param['timestamp'] = event['timestamp'] + param['y_next_page_token'] = event['y_next_page_token'] + + if param['init_idx'] >= 0 and param['count'] > 0: + return param + else: + return dict() + + except: + return dict() diff --git a/src/main/python/mlsearch/protocol.py b/src/main/python/mlsearch/protocol.py new file mode 100644 index 0000000..826cc46 --- /dev/null +++ b/src/main/python/mlsearch/protocol.py @@ -0,0 +1,76 @@ +class Protocol(): + """The Protocol for standard communication accross different api sources.""" + + def __init__(self, kwargs): + param_list = [ + + # title -> paper_title, full_name, name + # description -> paper_abstract, description + + # Paper with code + 'title', + 'paper_published', 'paper_url', + 'num_of_implementations', 'tasks', + 'paper_conference', 'repository_url', + 'repository_name', 'repository_framework', + 'repository_stars', 'pwc_url', + + # Github + 'description', 'private', + 'fork', 'updated_at', + 'stargazers_count', 'watchers_count', + 'language', 'forks_count', + + # Coursera + 'partners_v1', 'instructors_v1', + + # Source Flag + 'source', + + # Youtube + 'video_id', + 'channel_id', 'channel_title', + 'live_broadcast_content', 'published_datetime', + 'thumbnails', + ] + + for param in kwargs: + if param not in param_list: + raise AttributeError('{} is not a valid parameter.'.format(param)) + + self.title = kwargs.get('title', None) + self.paper_published = kwargs.get('paper_published', None) + self.paper_url = kwargs.get('paper_url', None) + self.num_of_implementations = kwargs.get('num_of_implementations', None) + self.tasks = kwargs.get('tasks', None) + self.paper_conference = kwargs.get('paper_conference', None) + self.repository_url = kwargs.get('repository_url', None) + self.repository_name = kwargs.get('repository_name', None) + self.repository_framework = kwargs.get('repository_framework', None) + self.repository_stars = kwargs.get('repository_stars', None) + self.description = kwargs.get('description', None) + self.private = kwargs.get('private', None) + self.fork = kwargs.get('fork', None) + self.updated_at = kwargs.get('updated_at', None) + self.stargazers_count = kwargs.get('stargazers_count', None) + self.watchers_count = kwargs.get('watchers_count', None) + self.language = kwargs.get('language', None) + self.forks_count = kwargs.get('forks_count', None) + self.partners_v1 = kwargs.get('partners_v1', None) + self.instructors_v1 = kwargs.get('instructors_v1', None) + self.source = kwargs.get('source', None) + self.pwc_url = kwargs.get('pwc_url', None) + self.video_id = kwargs.get('video_id', None) + self.channel_id = kwargs.get('channel_id', None) + self.channel_title = kwargs.get('channel_title', None) + self.live_broadcast_content = kwargs.get('live_broadcast_content', None) + self.published_datetime = kwargs.get('published_datetime', None) + self.thumbnails = kwargs.get('thumbnails', dict()) + + def to_JSON(self): + """Transform the Protocol object to JSON object.""" + + return self.__dict__ + + def __repr__(self): + return str(self.__dict__) \ No newline at end of file diff --git a/src/main/scripts/mlsearch b/src/main/scripts/mlsearch new file mode 100644 index 0000000..7ae2448 --- /dev/null +++ b/src/main/scripts/mlsearch @@ -0,0 +1,138 @@ +#!/usr/bin/env python +import argparse +import sys +import pprint +import os +import json + +# For debugging purpose +if "mlsearch" not in sys.modules: + sys.path.append(os.path.join(os.getcwd(), "src/main/python")) + +from mlsearch.api_requester import APIRequest +from mlsearch import helper as hp + +ap = argparse.ArgumentParser() +ap.add_argument("-q", "--query", required=True, help="Keyword for searching.") +ap.add_argument("-i", "--init_idx", required=True, help="Initial index for pagination.") +ap.add_argument( + "-c", "--count", required=True, help="Total number of results to be fetched." +) +ap.add_argument("-s", "--source", required=True, help="Source API to be looking for.") +ap.add_argument("-ck", "--cookies", required=True, help="Cookies of current user.") +ap.add_argument( + "-tm", "--timestamp", required=True, help="Timestamp of requesting API." +) +ap.add_argument( + "-pu", "--pwc_user", required=False, help="Paper with code repository user name." +) +ap.add_argument( + "-pp", "--pwc_password", required=False, help="Paper with code repository password." +) +ap.add_argument( + "-gt", "--github_acc_token", required=False, help="Github access token." +) +ap.add_argument( + "-yk", "--y_dev_key", required=False, help="Youtube developer key." +) +ap.add_argument( + "-yntp", + "--y_next_page_token", + required=False, + help="Next page token for Youtube API.", +) +ap.add_argument( + "-yo", "--y_query_order", required=False, help="Youtube Query Order." +) +args = vars(ap.parse_args()) + + +def main(event): + headers = { + "Access-Control-Allow-Origin": "*", + "X-Requested-With": "*", + "Access-Control-Allow-Headers": "Content-Type,X-Amz-Date,Authorization,X-Api-Key,x-requested-with", + "Access-Control-Allow-Methods": "OPTIONS,POST,GET", + } + try: + param_names = [ + "query", + "init_idx", + "count", + "source", + "cookies", + "timestamp", + "y_next_page_token", + ] + response_msg = hp.response("success", 200) + if hp.is_valid_parameters(event, param_names): + params = hp.parse_parameters(event) + if params.values(): + api_request = APIRequest( + params["source"], + params["query"], + params["init_idx"], + params["count"], + params["y_next_page_token"], + ) + if "pwc_user" in event and "pwc_password" in event: + api_request.pwc_auth_info = ( + event["pwc_user"], + event["pwc_password"], + ) + if "github_acc_token" in event: + api_request.github_acc_token = event["github_acc_token"] + if "y_dev_key" in event: + api_request.youtube_developer_key = event["y_dev_key"] + if "y_query_order" in event: + api_request.youtube_query_order = event["y_query_order"] + data = api_request.fetch_data() + response_msg = hp.response( + message=data.get("content", ""), + status_code=data.get("response_code"), + headers=headers, + optional_attributes={ + "has_next_page": data.get("has_next_page", False), + "y_next_page_token": data.get("y_next_page_token", None), + "y_query_order": data.get("y_query_order", None) + }, + ) + + return response_msg + + response_msg = hp.response("Invalid parameters.", 400) + return response_msg + + except (ValueError, TypeError) as ex: + response_msg = hp.response(str(ex), 400) + return response_msg + + except Exception as ex: + response_msg = hp.response(str(ex), 500) + return response_msg + + +if __name__ == "__main__": + event = { + "query": args["query"], + "init_idx": args["init_idx"], + "count": args["count"], + "source": args["source"], + "cookies": args["cookies"], + "timestamp": args["timestamp"], + "y_next_page_token": args["y_next_page_token"], + } + + if args["pwc_user"]: + event["pwc_user"] = args["pwc_user"] + if args["pwc_password"]: + event["pwc_password"] = args["pwc_password"] + if args["github_acc_token"]: + event["github_acc_token"] = args["github_acc_token"] + if args["y_dev_key"]: + event["y_dev_key"] = args["y_dev_key"] + if args["y_query_order"]: + event["y_query_order"] = args["y_query_order"] + result = main(event) + pp = pprint.PrettyPrinter(indent=2) + pp.pprint(result) diff --git a/src/unittest/python/myproject_tests.py b/src/unittest/python/myproject_tests.py new file mode 100644 index 0000000..8aa0c2f --- /dev/null +++ b/src/unittest/python/myproject_tests.py @@ -0,0 +1,13 @@ +from unittest import TestCase +from mock import Mock +from mlsearch import greet +from mlsearch.api_requester import APIRequest + + +class Test(TestCase): + def test_should_write_hello_world(self): + mock_stdout = Mock() + + greet(mock_stdout) + + mock_stdout.write.assert_called_with("Hello world!\n") \ No newline at end of file