Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🎉 Source Github: PullRequestCommentReactions - re-implemented using GraphQL #14795

Merged
merged 21 commits into from
Aug 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
2988f08
PullRequestCommentReactions - re-implemented using GraphQL
grubberr Jul 18, 2022
0d3d0a1
Merge branch 'master' into grubberr/14408-source-github
grubberr Jul 21, 2022
9eb7bf5
Merge branch 'master' into grubberr/14408-source-github
grubberr Jul 22, 2022
4cf8695
stream pull_request_comment_reactions fix abnormal_state
grubberr Jul 22, 2022
540e686
Merge branch 'master' into grubberr/14408-source-github
grubberr Jul 22, 2022
38a4c64
test_stream_pull_request_comment_reactions_read added
grubberr Jul 23, 2022
b12ed19
Merge branch 'master' into grubberr/14408-source-github
grubberr Jul 23, 2022
8170d15
pass Objects as param for CursorStorage
grubberr Jul 23, 2022
0531081
objects -> typenames
grubberr Jul 23, 2022
a5fc7ad
bump version to 0.2.43
grubberr Jul 24, 2022
9e28c10
Merge branch 'master' into grubberr/14408-source-github
grubberr Jul 25, 2022
75419d0
pull_request_comment_reactions.json updated
grubberr Jul 25, 2022
8f66781
Merge branch 'master' into grubberr/14408-source-github
grubberr Jul 27, 2022
fa61592
extract function _get_operation
grubberr Jul 27, 2022
270ea1a
Merge branch 'master' into grubberr/14408-source-github
grubberr Aug 1, 2022
5e965b0
Merge branch 'master' into grubberr/14408-source-github
grubberr Aug 2, 2022
b1ed578
type hints added
grubberr Aug 2, 2022
c1bdc39
add docs for get_query_*
grubberr Aug 2, 2022
05ec7e5
more docs
grubberr Aug 2, 2022
7024178
more docs
grubberr Aug 2, 2022
89fbf1b
auto-bump connector version [ci skip]
octavia-squidington-iii Aug 2, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@
- name: GitHub
sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e
dockerRepository: airbyte/source-github
dockerImageTag: 0.2.43
dockerImageTag: 0.2.44
documentationUrl: https://docs.airbyte.io/integrations/sources/github
icon: github.svg
sourceType: api
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2595,7 +2595,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-github:0.2.43"
- dockerImage: "airbyte/source-github:0.2.44"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/github"
connectionSpecification:
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-github/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.43
LABEL io.airbyte.version=0.2.44
LABEL io.airbyte.name=airbyte/source-github
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,7 @@
},
"pull_request_comment_reactions": {
"airbytehq/integration-test": {
"699253726": {
"created_at": "2121-12-31T23:59:59Z"
}
"created_at": "2121-12-31T23:59:59Z"
}
},
"pull_request_stats": {
Expand Down
196 changes: 180 additions & 16 deletions airbyte-integrations/connectors/source-github/source_github/graphql.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,30 @@
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#

import heapq
import itertools
from typing import Optional

import sgqlc.operation
from sgqlc.operation import Selector

from . import github_schema

_schema = github_schema
_schema_root = _schema.github_schema


def select_user_fields(user):
user.__fields__(
id="node_id",
database_id="id",
login=True,
avatar_url="avatar_url",
url="html_url",
is_site_admin="site_admin",
)


def get_query_pull_requests(owner, name, first, after, direction):
kwargs = {"first": first, "order_by": {"field": "UPDATED_AT", "direction": direction}}
if after:
Expand Down Expand Up @@ -41,14 +56,7 @@ def get_query_pull_requests(owner, name, first, after, direction):
reviews.total_count()
reviews.nodes.comments.__fields__(total_count=True)
user = pull_requests.nodes.merged_by(__alias__="merged_by").__as__(_schema_root.User)
user.__fields__(
id="node_id",
database_id="id",
login=True,
avatar_url="avatar_url",
url="html_url",
is_site_admin="site_admin",
)
select_user_fields(user)
pull_requests.page_info.__fields__(has_next_page=True, end_cursor=True)
return str(op)

Expand Down Expand Up @@ -87,12 +95,168 @@ def get_query_reviews(owner, name, first, after, number=None):
)
reviews.nodes.commit.oid()
user = reviews.nodes.author(__alias__="user").__as__(_schema_root.User)
user.__fields__(
id="node_id",
database_id="id",
login=True,
avatar_url="avatar_url",
url="html_url",
is_site_admin="site_admin",
)
select_user_fields(user)
return str(op)


class QueryReactions:

# AVERAGE_REVIEWS - optimal number of reviews to fetch inside every pull request.
# If we try to fetch too many (up to 100) we will spend too many scores of query cost.
# https://docs.github.com/en/graphql/overview/resource-limitations#calculating-a-rate-limit-score-before-running-the-call
# If we query too low we would need to make additional sub-queries to fetch the rest of the reviews inside specific pull request.
AVERAGE_REVIEWS = 5
AVERAGE_COMMENTS = 2
AVERAGE_REACTIONS = 2
Comment on lines +108 to +110
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unclear what these numbers mean and where they come from... code comment would be good.


def get_query_root_repository(self, owner: str, name: str, first: int, after: Optional[str] = None):
"""
Get GraphQL query which allows fetching reactions starting from the repository:
query {
repository {
pull_requests(first: page_size) {
reviews(first: AVERAGE_REVIEWS) {
comments(first: AVERAGE_COMMENTS) {
reactions(first: AVERAGE_REACTIONS) {
}
}
}
}
}
}
"""
op = self._get_operation()
repository = op.repository(owner=owner, name=name)
repository.name()
repository.owner.login()

kwargs = {"first": first}
if after:
kwargs["after"] = after
pull_requests = repository.pull_requests(**kwargs)
pull_requests.page_info.__fields__(has_next_page=True, end_cursor=True)
pull_requests.total_count()
pull_requests.nodes.id(__alias__="node_id")

reviews = self._select_reviews(pull_requests.nodes, first=self.AVERAGE_REVIEWS)
comments = self._select_comments(reviews.nodes, first=self.AVERAGE_COMMENTS)
self._select_reactions(comments.nodes, first=self.AVERAGE_REACTIONS)
return str(op)

def get_query_root_pull_request(self, node_id: str, first: int, after: str):
"""
Get GraphQL query which allows fetching reactions starting from the pull_request:
query {
pull_request {
reviews(first: AVERAGE_REVIEWS) {
comments(first: AVERAGE_COMMENTS) {
reactions(first: AVERAGE_REACTIONS) {
}
}
}
}
}
"""
op = self._get_operation()
pull_request = op.node(id=node_id).__as__(_schema_root.PullRequest)
pull_request.id(__alias__="node_id")
pull_request.repository.name()
pull_request.repository.owner.login()

reviews = self._select_reviews(pull_request, first, after)
comments = self._select_comments(reviews.nodes, first=self.AVERAGE_COMMENTS)
self._select_reactions(comments.nodes, first=self.AVERAGE_REACTIONS)
return str(op)

def get_query_root_review(self, node_id: str, first: int, after: str):
"""
Get GraphQL query which allows fetching reactions starting from the review:
query {
review {
comments(first: AVERAGE_COMMENTS) {
reactions(first: AVERAGE_REACTIONS) {
}
}
}
}
"""
op = self._get_operation()
review = op.node(id=node_id).__as__(_schema_root.PullRequestReview)
review.id(__alias__="node_id")
review.repository.name()
review.repository.owner.login()

comments = self._select_comments(review, first, after)
self._select_reactions(comments.nodes, first=self.AVERAGE_REACTIONS)
return str(op)

def get_query_root_comment(self, node_id: str, first: int, after: str):
"""
Get GraphQL query which allows fetching reactions starting from the comment:
query {
comment {
reactions(first: AVERAGE_REACTIONS) {
}
}
}
"""
op = self._get_operation()
comment = op.node(id=node_id).__as__(_schema_root.PullRequestReviewComment)
comment.id(__alias__="node_id")
comment.database_id(__alias__="id")
comment.repository.name()
comment.repository.owner.login()
self._select_reactions(comment, first, after)
return str(op)

def _select_reactions(self, comment: Selector, first: int, after: Optional[str] = None):
kwargs = {"first": first}
if after:
kwargs["after"] = after
reactions = comment.reactions(**kwargs)
reactions.page_info.__fields__(has_next_page=True, end_cursor=True)
reactions.total_count()
reactions.nodes.__fields__(id="node_id", database_id="id", content=True, created_at="created_at")
select_user_fields(reactions.nodes.user())
return reactions

def _select_comments(self, review: Selector, first: int, after: Optional[str] = None):
kwargs = {"first": first}
if after:
kwargs["after"] = after
comments = review.comments(**kwargs)
comments.page_info.__fields__(has_next_page=True, end_cursor=True)
comments.total_count()
comments.nodes.id(__alias__="node_id")
comments.nodes.database_id(__alias__="id")
return comments

def _select_reviews(self, pull_request: Selector, first: int, after: Optional[str] = None):
kwargs = {"first": first}
if after:
kwargs["after"] = after
reviews = pull_request.reviews(**kwargs)
reviews.page_info.__fields__(has_next_page=True, end_cursor=True)
reviews.total_count()
reviews.nodes.id(__alias__="node_id")
reviews.nodes.database_id(__alias__="id")
return reviews

def _get_operation(self):
return sgqlc.operation.Operation(_schema_root.query_type)


class CursorStorage:
def __init__(self, typenames):
self.typename_to_prio = {o: prio for prio, o in enumerate(reversed(typenames))}
self.count = itertools.count()
self.storage = []

def add_cursor(self, typename, cursor, total_count, parent_id=None):
priority = self.typename_to_prio[typename]
heapq.heappush(self.storage, (priority, next(self.count), (typename, cursor, total_count, parent_id)))

def get_cursor(self):
if self.storage:
_, _, c = heapq.heappop(self.storage)
return {"typename": c[0], "cursor": c[1], "total_count": c[2], "parent_id": c[3]}
Original file line number Diff line number Diff line change
@@ -1,4 +1,28 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$ref": "reaction.json"
"type": "object",
"properties": {
"id": {
"type": ["null", "integer"]
},
"node_id": {
"type": ["null", "string"]
},
"content": {
"type": ["null", "string"]
},
"created_at": {
"type": "string",
"format": "date-time"
},
"user": {
"$ref": "user_graphql.json"
},
"repository": {
"type": "string"
},
"comment_id": {
"type": "integer"
}
}
}
Loading