diff --git a/Label_Microservice/tests/test_worker.py b/Label_Microservice/tests/test_worker.py deleted file mode 100644 index c95ee84aa5..0000000000 --- a/Label_Microservice/tests/test_worker.py +++ /dev/null @@ -1,93 +0,0 @@ -import unittest -from unittest.mock import Mock -import os -import label_microservice -from label_microservice.worker import Worker -import code_intelligence - -class TestWorker(unittest.TestCase): - - def test_get_issue_embedding(self): - """Testing get_issue_embedding function while status code is not 200""" - # init setting - os.environ['GH_ISSUE_API_KEY'] = '' - os.environ['APP_URL'] = '' - label_microservice.worker.github_init = Mock() - Worker.create_subscription_if_not_exists = Mock() - - label_microservice.worker.get_issue_text = Mock() - label_microservice.worker.get_issue_text.return_value = { - 'title': 'test_title', - 'body': 'test_body' - } - # let status code to be 404 - label_microservice.worker.requests.post = Mock() - label_microservice.worker.requests.post.return_value.status_code = 404 - issue_embedding = Worker().get_issue_embedding('repo_owner', 'repo_name', 'issue_num') - # issue_embedding should be None - assert not issue_embedding - - def teat_predict_issue_probability_not_retrieve_embedding(self): - """Testing predict_issue_probability function while not retrieving embedding""" - # init setting - os.environ['GH_ISSUE_API_KEY'] = '' - os.environ['APP_URL'] = '' - label_microservice.worker.github_init = Mock() - Worker.create_subscription_if_not_exists = Mock() - - # let worker to not retrive embedding of an issue - Worker.get_issue_embedding = Mock() - Worker.get_issue_embedding.return_value = None - label_probabilities, issue_embedding = Worker().predict_issue_probability('repo_owner', 'repo_name', 'issue_num') - # issue_embedding is None, no predict probabilities - assert label_probabilities == [] and not issue_embedding - - def test_predict_labels(self): - """Testing predict_labels function""" - # init setting - os.environ['GH_ISSUE_API_KEY'] = '' - os.environ['APP_URL'] = '' - label_microservice.worker.github_init = Mock() - Worker.create_subscription_if_not_exists = Mock() - - Worker.predict_issue_probability = Mock() - Worker.predict_issue_probability.return_value = ([0.7, 0.6, 0.5], None) - Worker.load_label_columns = Mock() - Worker.load_label_columns.return_value = { - 'labels': ['label1', 'label2', 'label3'], - 'probability_thresholds': {0: 0.6, 1: 0.7, 2: 0.5} - } - predictions, _ = Worker().predict_labels('repo_owner', 'repo_name', 'issue_num') - # label1 and label3 satisfy thresholds - assert predictions['labels'] == ['label1', 'label3'] and\ - predictions['probabilities'] == [0.7, 0.5] - - def test_filter_specified_labels(self): - """Testing filter_specified_labels function when yaml specifies labels""" - # init setting - os.environ['GH_ISSUE_API_KEY'] = '' - os.environ['APP_URL'] = '' - label_microservice.worker.github_init = Mock() - Worker.create_subscription_if_not_exists = Mock() - - label_microservice.worker.get_yaml = Mock() - label_microservice.worker.get_yaml.return_value = {'predicted-labels': {'label1': None, 'label2': None}} - predictions = {'labels': ['label1', 'label3'], 'probabilities': [0.7, 0.8]} - label_names, label_probabilities = Worker().filter_specified_labels('repo_owner', 'repo_name', predictions) - # only label1 in the predicted label list - assert label_names == ['label1'] and label_probabilities == [0.7] - - def test_filter_specified_labels_yaml_not_specified(self): - """Testing filter_specified_labels function when yaml not specifies labels""" - # init setting - os.environ['GH_ISSUE_API_KEY'] = '' - os.environ['APP_URL'] = '' - label_microservice.worker.github_init = Mock() - Worker.create_subscription_if_not_exists = Mock() - - label_microservice.worker.get_yaml = Mock() - label_microservice.worker.get_yaml.return_value = None - predictions = {'labels': ['label1', 'label3'], 'probabilities': [0.7, 0.8]} - label_names, label_probabilities = Worker().filter_specified_labels('repo_owner', 'repo_name', predictions) - # not specified in the yaml, predict all satisfying thresholds - assert label_names == predictions['labels'] and label_probabilities == predictions['probabilities'] diff --git a/py/code_intelligence/github_app.py b/py/code_intelligence/github_app.py index 7d4485d991..d7e20545e6 100644 --- a/py/code_intelligence/github_app.py +++ b/py/code_intelligence/github_app.py @@ -1,3 +1,6 @@ +import logging +import os + from collections import namedtuple, Counter from github3 import GitHub from pathlib import Path @@ -25,6 +28,13 @@ def __init__(self, pem_path, app_id): if not self.path.is_file(): raise ValueError(f'argument: `pem_path` must be a valid filename. {pem_path} was not found.') + @staticmethod + def create_from_env(): + """Create a new instance based on environment variables.""" + app_id = os.getenv('GITHUB_APP_ID') + key_file_path = os.getenv("GITHUB_APP_PEM_KEY") + return GitHubApp(pem_path=key_file_path, app_id=app_id) + def get_app(self): with open(self.path, 'rb') as key_file: client = GitHub() @@ -34,11 +44,13 @@ def get_app(self): def get_installation(self, installation_id): "login as app installation without requesting previously gathered data." + logging.info("Logging in as GitHub App") with open(self.path, 'rb') as key_file: client = GitHub() client.login_as_app_installation(private_key_pem=key_file.read(), app_id=self.app_id, installation_id=installation_id) + logging.info("Successfully logged in as GitHub App") return client def get_test_installation_id(self): @@ -85,7 +97,9 @@ def get_installation_id(self, owner, repo): response = requests.get(url=url, headers=headers) if response.status_code != 200: - raise Exception(f'Status code : {response.status_code}, {response.json()}') + raise Exception(f"There was a problem requesting URL={URL} " + f"Status code : {response.status_code}, " + f"Response:{response.json()}") return response.json()['id'] def get_installation_access_token(self, installation_id): diff --git a/py/code_intelligence/github_util.py b/py/code_intelligence/github_util.py index bbc40b2d7d..7511c214ca 100644 --- a/py/code_intelligence/github_util.py +++ b/py/code_intelligence/github_util.py @@ -1,44 +1,26 @@ import os import logging -from code_intelligence.github_app import GitHubApp +from code_intelligence import github_app import yaml - -def init(): - "Load all necessary artifacts to make predictions." - #save keyfile - pem_string = os.getenv('PRIVATE_KEY') - if not pem_string: - raise ValueError('Environment variable PRIVATE_KEY was not supplied.') - - with open('private-key.pem', 'wb') as f: - f.write(str.encode(pem_string)) - -# TODO(jlewi): init is taking the PRIVATE_KEY from an environment variable -# and then writing it to a file. It would probably be better to follow -# the pattern of GOOGLE_APPLICATION_CREDENTIALS; i.e. mount the K8s secret -# to a volume and then use an environment variable to specify the path of -# the key file. -def get_app(): - "grab a fresh instance of the app handle." - app_id = os.getenv('APP_ID') - key_file_path = 'private-key.pem' - ghapp = GitHubApp(pem_path=key_file_path, app_id=app_id) - return ghapp - def get_issue_handle(installation_id, username, repository, number): "get an issue object." - ghapp = get_app() + ghapp = github_app.GitHubApp.create_from_env() install = ghapp.get_installation(installation_id) return install.issue(username, repository, number) -def get_yaml(owner, repo): +def get_yaml(owner, repo, ghapp=None): """ Looks for the yaml file in a /.github directory. yaml file must be named issue_label_bot.yaml """ - ghapp = get_app() + + if not ghapp: + # TODO(jlewi): Should we deprecate this code path and always pass + # in the github app? + ghapp = github_app.GitHubApp.create_from_env() + try: # get the app installation handle inst_id = ghapp.get_installation_id(owner=owner, repo=repo) diff --git a/py/code_intelligence/util.py b/py/code_intelligence/util.py index 842edb73da..baa9e5f714 100644 --- a/py/code_intelligence/util.py +++ b/py/code_intelligence/util.py @@ -1,5 +1,8 @@ import logging import json +import re + +ISSUE_RE = re.compile("([^/]*)/([^#]*)#([0-9]*)") # TODO(jlewi): Might be better to just write it # as a json list @@ -8,4 +11,18 @@ def write_items_to_json(output_file, results): for i in results: json.dump(i, hf) hf.write("\n") - logging.info("Wrote %s items to %s", len(results), output_file) \ No newline at end of file + logging.info("Wrote %s items to %s", len(results), output_file) + +def parse_issue_spec(issue): + """Parse an issue in the form {owner}/{repo}#{number} + + Args: + isue: An issue in the form {owner}/{repo}#{number} + + Returns: + owner, repo, number + """ + m = ISSUE_RE.match(issue) + if not m: + return None, None, None + return m.group(1), m.group(2), int(m.group(3)) diff --git a/py/code_intelligence/util_test.py b/py/code_intelligence/util_test.py new file mode 100644 index 0000000000..743655a4af --- /dev/null +++ b/py/code_intelligence/util_test.py @@ -0,0 +1,35 @@ +import logging +import pytest + +from code_intelligence import util + +def test_parse_issue_spec(): + """A unittest for parsing issues. """ + + test_cases = [ + { + "issue": "kubeflow/tfjob#153", + "expected": ("kubeflow", "tfjob", 153) + }, + { + "issue": "kubeflow/tfjob/tfjob", + "expected": (None, None, None) + } + ] + + for c in test_cases: + owner, repo, number = util.parse_issue_spec(c["issue"]) + assert owner == c["expected"][0] + assert repo == c["expected"][1] + assert number == c["expected"][2] + +if __name__ == "__main__": + logging.basicConfig( + level=logging.INFO, + format=('%(levelname)s|%(asctime)s' + '|%(pathname)s|%(lineno)d| %(message)s'), + datefmt='%Y-%m-%dT%H:%M:%S', + ) + logging.getLogger().setLevel(logging.INFO) + + pytest.main() diff --git a/py/label_microservice/cli.py b/py/label_microservice/cli.py new file mode 100644 index 0000000000..fcd8c4804d --- /dev/null +++ b/py/label_microservice/cli.py @@ -0,0 +1,47 @@ +"""A cli for interacting with the models. + +The CLI can be used to publish issues to perform inference on to pubsub +to be picked up by the backends. +""" +import logging +import fire +from code_intelligence import util +from google.cloud import pubsub + +DEFAULT_TOPIC = "projects/issue-label-bot-dev/topics/TEST_event_queue" +class Cli: + + @staticmethod + def label_issue(issue, pubsub_topic=DEFAULT_TOPIC): + """Label a specific issue. + + Args: + issue: The issue in the form {owner}/{repo}#{issue} + pubsub_topic: (Optional) the pubsub topic to publish to. This should + be in the form projects/{project}/topics/{topic_name} + """ + publisher = pubsub.PublisherClient() + repo_owner, repo_name, issue_num = util.parse_issue_spec(issue) + + if not repo_owner: + raise ValueError(f"issue={issue} didn't match regex " + f"{util.ISSUE_RE.pattern}") + + # all attributes being published to pubsub must be sent as text strings + publisher.publish(pubsub_topic, + b'New issue.', + # TODO(jlewi): Does the backend depend on the client + # providing the installation id + installation_id="", + repo_owner=repo_owner, + repo_name=repo_name, + issue_num=str(issue_num)) + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO, + format=('%(levelname)s|%(asctime)s' + '|%(message)s|%(pathname)s|%(lineno)d|'), + datefmt='%Y-%m-%dT%H:%M:%S', + ) + + fire.Fire(Cli) diff --git a/py/label_microservice/universal_kind_label_model.py b/py/label_microservice/universal_kind_label_model.py index ba60da05a1..cdaa5c3978 100644 --- a/py/label_microservice/universal_kind_label_model.py +++ b/py/label_microservice/universal_kind_label_model.py @@ -1,4 +1,4 @@ - +from collections import defaultdict import tensorflow as tf from tensorflow.keras import models as keras_models from tensorflow.keras import utils as keras_utils @@ -45,6 +45,13 @@ def __init__(self, class_names=['bug', 'feature', 'question']): self.class_names = class_names + # set the prediction threshold for everything except for the label question + # which has a different threshold. + # These values were copied from the original code. + # https://github.com/machine-learning-apps/Issue-Label-Bot/blob/536e8bf4928b03d522dd021c0464587747e90a87/flask_app/app.py#L43 + self._prediction_threshold = defaultdict(lambda: .52) + self._prediction_threshold["question"] = .60 + def predict_issue_labels(self, title:str, body:str): """ Get probabilities for the each class. @@ -76,4 +83,11 @@ def predict_issue_labels(self, title:str, body:str): with self._graph.as_default(): probs = self.model.predict(x=[vec_body, vec_title]).tolist()[0] - return {k:v for k,v in zip(self.class_names, probs)} + results = {} + + for label, p in zip(self.class_names, probs): + if p < self._prediction_threshold[label]: + continue + results[label] = p + + return results diff --git a/py/label_microservice/worker.py b/py/label_microservice/worker.py index 0b374ea38d..13974157b2 100644 --- a/py/label_microservice/worker.py +++ b/py/label_microservice/worker.py @@ -1,20 +1,16 @@ import os import fire import requests +import traceback import yaml -import numpy as np -from passlib.apps import custom_app_context as pwd_context from google.cloud import pubsub import logging from label_microservice.repo_config import RepoConfig -from label_microservice.mlp import MLPWrapper -from code_intelligence.github_util import init as github_init -from code_intelligence.github_util import get_issue_handle -from code_intelligence.github_util import get_yaml -from code_intelligence.embeddings import get_issue_text -from code_intelligence.gcs_util import download_file_from_gcs +from code_intelligence import github_app +from code_intelligence import github_util from code_intelligence.pubsub_util import check_subscription_name_exists from code_intelligence.pubsub_util import create_subscription_if_not_exists +from label_microservice import issue_label_predictor class Worker: """ @@ -29,8 +25,7 @@ class Worker: def __init__(self, project_id='issue-label-bot-dev', topic_name='event_queue', - subscription_name='subscription_for_event_queue', - embedding_api_endpoint='https://embeddings.gh-issue-labeler.com/text'): + subscription_name='subscription_for_event_queue',): """ Initialize the parameters and GitHub app. Args: @@ -39,34 +34,36 @@ def __init__(self, subscription_name: pubsub subscription name, str embedding_api_endpoint: endpoint of embedding api microservice, str """ - # TODO(chunhsiang): change the embedding microservice to be an internal DNS of k8s service. - # see: https://v1-12.docs.kubernetes.io/docs/concepts/services-networking/dns-pod-service/#services self.project_id = project_id self.topic_name = topic_name self.subscription_name = subscription_name - self.embedding_api_endpoint = embedding_api_endpoint - self.embedding_api_key = os.environ['GH_ISSUE_API_KEY'] + self.app_url = os.environ['APP_URL'] - # init GitHub app - github_init() # init pubsub subscription self.create_subscription_if_not_exists() - def load_yaml(self, repo_owner, repo_name): - """ - Load config from the YAML of the specific repo_owner/repo_name. - Args: - repo_owner: str - repo_name: str - """ - # TODO(chunhsiang): for now all the paths including gcs and local sides - # are set using repo_owner/repo_name (see repo_config.py), meaning the - # paths returned from `RepoConfig(...)` are related to the specific - # repo_owner/repo_name. - # Will update them after finish the config map. - self.config = RepoConfig(repo_owner=repo_owner, repo_name=repo_name) - + self._predictor = None + + @classmethod + def subscribe_from_env(cls): + """Build the worker from environment variables and subscribe""" + required_env = ["PROJECT", "ISSUE_EVENT_TOPIC", + "ISSUE_EVENT_SUBSCRIPTION"] + missing = [] + for e in required_env: + if not os.environ.get(e): + missing.append(e) + + if missing: + raise ValueError(f"Missing required environment variables " + f"{','.join(required)}") + worker = Worker(project_id=os.getenv("PROJECT"), + topic_name=os.getenv("ISSUE_EVENT_TOPIC"), + subscription_name=os.getenv("ISSUE_EVENT_SUBSCRIPTION")) + worker.subscribe() + + return worker def check_subscription_name_exists(self): """ Check if the subscription name exists in the project. @@ -117,36 +114,59 @@ def callback(message): } } """ + if self._predictor is None: + # We load the models here and not in __init__ because we + # need to create the TensorFlow models inside the thread used + # by pubsub for the callbacks. If we load them in __init__ + # they get created in a different thread and TF will return + # errors when trying to use the models in a different thread. + logging.info("Creating predictor") + self._predictor = issue_label_predictor.IssueLabelPredictor() + + # The code that publishes the message is: + # https://github.com/machine-learning-apps/Issue-Label-Bot/blob/26d8fb65be3b39de244c4be9e32b2838111dac10/flask_app/forward_utils.py#L57 + # The front end does have access to the title and body + # but its not being sent right now. + logging.info(f"Recieved message {message}") installation_id = message.attributes['installation_id'] repo_owner = message.attributes['repo_owner'] repo_name = message.attributes['repo_name'] issue_num = message.attributes['issue_num'] - logging.info(f'Receive issue #{issue_num} from {repo_owner}/{repo_name}') + data = { + "repo_owner": repo_owner, + "repo_name": repo_name, + "issue_num": issue_num, + } try: - # predict labels - self.load_yaml(repo_owner, repo_name) - self.download_model_from_gcs() - predictions, issue_embedding = self.predict_labels(repo_owner, repo_name, issue_num) + predictions = self._predictor.predict(data) self.add_labels_to_issue(installation_id, repo_owner, repo_name, issue_num, predictions) # log the prediction, which will be used to track the performance + # TODO(https://github.com/kubeflow/code-intelligence/issues/79) + # Ensure we capture the information needed to measure performance + # in stackdriver log_dict = { 'repo_owner': repo_owner, 'repo_name': repo_name, 'issue_num': int(issue_num), - 'labels': predictions['labels'] + 'predictions': predictions, } logging.info(log_dict) + #TODO(jlewi): We should catch a more narrow exception. + # On exception if we don't ack the message then we risk problems + # caused by poison pills repeatedly crashing our workers + # and preventing progress. except Exception as e: # hard to find out which errors should be handled differently (e.g., retrying for multiple times) # and how to handle the error that the same message causes for multiple times # so use generic exception to ignore all errors for now - logging.error(f'Addressing issue #{issue_num} from {repo_owner}/{repo_name} causes an error') - logging.error(f'Error type: {type(e)}') - logging.error(e) + logging.error(f"Exception occurred while handling issue " + f"{repo_owner}/{repo_name}#{issue_num}. \n" + f"Exception: {e}\n" + f"{traceback.format_exc()}") # acknowledge the message, or pubsub will repeatedly attempt to deliver it message.ack() @@ -156,162 +176,66 @@ def callback(message): future = subscriber.subscribe(subscription_path, callback=callback, flow_control=flow_control) + + # Calling future.result will block forever. future.cancel can be called + # to interrupt it. + # TODO(jlewi): It might be better to return the future. This would + # allow the caller to potentially cancel the process try: + logging.info("Wait forever or until pubsub future is cancelled") logging.info(future.result()) except KeyboardInterrupt: logging.info(future.cancel()) - def get_issue_embedding(self, repo_owner, repo_name, issue_num): + # TODO(jlewi): We should refactor this to make it easier to unittest and + # add an appropriate unittest. + @staticmethod + def apply_repo_config(repo_config, repo_owner, repo_name, predictions, + ghapp): """ - Get the embedding of the issue by calling GitHub Issue - Embeddings API endpoint. - Args: - repo_owner: repo owner - repo_name: repo name - issue_num: issue index + Only select those labels which are specified by yaml file to be predicted. + If there is no setting in the yaml file, return all predicted items. - Return - ------ - numpy.ndarray - shape: (1600,) - """ + Also apply any aliases listed in the config file. - issue_text = get_issue_text(owner=repo_owner, - repo=repo_name, - num=issue_num, - idx=None) - data = {'title': issue_text['title'], - 'body': issue_text['body']} - - # sending post request and saving response as response object - r = requests.post(url=self.embedding_api_endpoint, - headers={'Token': pwd_context.hash(self.embedding_api_key)}, - json=data) - if r.status_code != 200: - logging.warning(f'Status code is {r.status_code} not 200: ' - 'can not retrieve the embedding') - return None - - embeddings = np.frombuffer(r.content, dtype=' probability """ - Load label info from local path. + # Make a copy of the predictions so we don't modify the original. + filtered = {} + filtered.update(predictions) - Return - ------ - dict - {'labels': list, 'probability_thresholds': {label_index: threshold}} - """ - with open(self.config.labels_local_path, 'r') as f: - label_columns = yaml.safe_load(f) - return label_columns + if not repo_config: + logging.info("No repo specific config found for " + f"{repo_owner}/{repo_name}") + return filtered - def predict_issue_probability(self, repo_owner, repo_name, issue_num): - """ - Predict probabilities of labels for an issue. - Args: - repo_owner: repo owner - repo_name: repo name - issue_num: issue index + # Alias any labels. + if "label-alias" in repo_config: + logging.info(f"Applying label aliases for " + f"{repo_owner}/{repo_config}") + for old, new in repo_config["label-alias"].items(): + if old in filtered: + filtered[new] = filtered[old] + del filtered[old] - Return - ------ - numpy.ndarray - shape: (label_count,) - numpy.ndarray - shape: (1600,) - """ - issue_embedding = self.get_issue_embedding(repo_owner=repo_owner, - repo_name=repo_name, - issue_num=issue_num) - - # if not retrieve the embedding, ignore to predict it - if issue_embedding is None: - return [], None - - mlp_wrapper = MLPWrapper(clf=None, - model_file=self.config.model_local_path, - load_from_model=True) - # change embedding from 1d to 2d for prediction and extract the result - label_probabilities = mlp_wrapper.predict_probabilities([issue_embedding])[0] - return label_probabilities, issue_embedding - - def predict_labels(self, repo_owner, repo_name, issue_num): - """ - Predict labels for given issue. - Args: - repo_owner: repo owner - repo_name: repo name - issue_num: issue index - Return - ------ - dict - {'labels': list, 'probabilities': list} - numpy.ndarray - shape: (1600,) - """ - logging.info(f'Predicting labels for the issue #{issue_num} from {repo_owner}/{repo_name}') - # get probabilities of labels for an issue - label_probabilities, issue_embedding = self.predict_issue_probability(repo_owner, repo_name, issue_num) - - # get label info from local file - label_columns = self.load_label_columns() - label_names = label_columns['labels'] - label_thresholds = label_columns['probability_thresholds'] - - # check thresholds to get labels that need to be predicted - predictions = {'labels': [], 'probabilities': []} - for i in range(len(label_probabilities)): - # if the threshold of any label is None, just ignore it - # because the label does not meet both of precision & recall thresholds - if label_thresholds[i] and label_probabilities[i] >= label_thresholds[i]: - predictions['labels'].append(label_names[i]) - predictions['probabilities'].append(label_probabilities[i]) - return predictions, issue_embedding - - def filter_specified_labels(self, repo_owner, repo_name, predictions): - """ - Only select those labels which are specified by yaml file to be predicted. - If there is no setting in the yaml file, return all predicted items. - Args: - repo_owner: repo owner, str - repo_name: repo name, str - prediction: predicted result from `predict_labels()` function - dict {'labels': list, 'probabilities': list} - """ - label_names = [] - label_probabilities = [] - # handle the yaml file - yaml = get_yaml(owner=repo_owner, repo=repo_name) # user may set the labels they want to predict - if yaml and 'predicted-labels' in yaml: - for name, proba in zip(predictions['labels'], predictions['probabilities']): - if name in yaml['predicted-labels']: - label_names.append(name) - label_probabilities.append(proba) + if "predicted-labels" in repo_config: + allowed = set(repo_config["predicted-labels"]) + current_labels = filtered.keys() + for k in current_labels: + if not k in allowed: + del filtered[k] else: - logging.warning(f'YAML file does not contain `predicted-labels`, ' - 'bot will predict all labels with enough confidence') - # if user do not set `predicted-labels`, - # predict all labels with enough confidence - label_names = predictions['labels'] - label_probabilities = predictions['probabilities'] - return label_names, label_probabilities + logging.info(f'{repo_owner}/{repo_name} config file does not contain `predicted-labels`, ' + f'bot will predict all labels with enough confidence') + + return filtered def add_labels_to_issue(self, installation_id, repo_owner, repo_name, issue_num, predictions): @@ -322,30 +246,50 @@ def add_labels_to_issue(self, installation_id, repo_owner, repo_name, repo_owner: repo owner repo_name: repo name issue_num: issue index - prediction: predicted result from `predict_labels()` function - dict {'labels': list, 'probabilities': list} + prediction: dict str-> float; dictionary of labels and their predicted + probability """ - # take an action if the prediction is confident enough - if predictions['labels']: - label_names, label_probabilities = self.filter_specified_labels(repo_owner, - repo_name, - predictions) - else: - label_names = [] - # get the isssue handle - issue = get_issue_handle(installation_id, repo_owner, repo_name, issue_num) + # TODO(jlewi): Should we cache the GitHub App? What about token + # expiration? + ghapp = github_app.GitHubApp.create_from_env() + # handle the yaml file + repo_config = github_util.get_yaml(owner=repo_owner, repo=repo_name, + ghapp=ghapp) + + predictions = self.apply_repo_config(repo_config, repo_owner, repo_name, + predictions, ghapp) + + if not installation_id: + logging.info("No GitHub App Installation Provided Fetching it") + installation_id = ghapp.get_installation_id(repo_owner, repo_name) + install = ghapp.get_installation(installation_id) + issue = install.issue(repo_owner, repo_name, issue_num) + + label_names = predictions.keys() if label_names: # create message - message = """Issue-Label Bot is automatically applying the labels `{labels}` to this issue, with the confidence of {confidence}. - Please mark this comment with :thumbsup: or :thumbsdown: to give our bot feedback! - Links: [app homepage](https://github.com/marketplace/issue-label-bot), [dashboard]({app_url}data/{repo_owner}/{repo_name}) and [code](https://github.com/hamelsmu/MLapp) for this bot. - """.format(labels="`, `".join(label_names), - confidence=", ".join(["{:.2f}".format(p) for p in label_probabilities]), - app_url=self.app_url, - repo_owner=repo_owner, - repo_name=repo_name) + # Create a markdown table with probabilities. + rows = ["| Label | Probability |", + "| ------------- | ------------- |"] + + for l, p in predictions.items(): + rows.append("| {} | {:.2f} |".format(l, p)) + + lines = ["Issue-Label Bot is automatically applying the labels:", + ""] + lines.extend(rows) + lines.append("") + lines.append("Please mark this comment with :thumbsup: or :thumbsdown: " + "to give our bot feedback! ") + lines.append("Links: [app homepage](https://github.com/marketplace/issue-label-bot), " + "[dashboard]({app_url}data/{repo_owner}/{repo_name}) and " + "[code](https://github.com/hamelsmu/MLapp) for this bot.".format( + app_url=self.app_url, + repo_owner=repo_owner, + repo_name=repo_name)) + message = "\n".join(lines) # label the issue using the GitHub api issue.add_labels(*label_names) logging.info(f'Add `{"`, `".join(label_names)}` to the issue # {issue_num}')