Skip to content

Commit

Permalink
Delete PR docs using GraphQL (#286)
Browse files Browse the repository at this point in the history
  • Loading branch information
mishig25 committed Aug 25, 2022
1 parent 03c62da commit d8dc51f
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 42 deletions.
26 changes: 1 addition & 25 deletions .github/workflows/delete_doc_comment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,33 +37,9 @@ jobs:
echo "package_name=${{ inputs.package_name }}" >> $GITHUB_ENV
fi
- name: Setup environment
run: |
rm -rf doc-build-dev
git clone --depth 1 https://HuggingFaceDocBuilderDev:${{ env.write_token }}@github.com/huggingface/doc-build-dev
- name: Setup git
run: |
git config --global user.name "Hugging Face Doc Builder"
git config --global user.email docs@huggingface.co
- name: Push to repositories
run: |
cd doc-build-dev
git pull
rm -rf ${{ env.package_name }}/pr_${{ inputs.pr_number }}
ls ${{ env.package_name }}
git status
if [[ `git status --porcelain` ]]; then
git add .
git commit -m "Closed PR ${{ inputs.pr_number }} in ${{ env.package_name }}"
git push origin main ||
(echo "Failed on the first try, rebasing and pushing again" && git pull --rebase && git push origin main) ||
(echo "Failed on the second try, rebasing and pushing again" && git pull --rebase && git push origin main)
else
echo "Branch was already deleted, nothing to do."
fi
doc-builder push ${{ env.package_name }} --doc_build_repo_id "huggingface/doc-build-dev" --token ${{ env.write_token }} --commit_msg "Closed PR ${{ inputs.pr_number }} in ${{ env.package_name }}" --n_retries 5 --doc_version_folder "pr_${{ inputs.pr_number }}" --is_remove
shell: bash

- name: Find doc comment
Expand Down
98 changes: 81 additions & 17 deletions src/doc_builder/commands/push.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ def create_additions(library_name: str) -> List[Dict]:
return additions


def create_deletions(repo_id: str, library_name: str, token: str) -> List[Dict]:
def create_deletions(
repo_id: str, library_name: str, token: str, doc_version_folder: Optional[str] = None, is_delete_all: bool = False
) -> List[Dict]:
"""
Given `repo_id/library_name` path, returns [FileDeletion!]!: [{path: "some_path"}, ...]
see more here: https://docs.github.com/en/graphql/reference/input-objects#filechanges
Expand All @@ -76,9 +78,12 @@ def create_deletions(repo_id: str, library_name: str, token: str) -> List[Dict]:
url = node["url"]

# 2. find url for `doc-build-dev/{library_name}/{doc_version}` ex: doc-build-dev/accelerate/pr_365
root_folder = Path(library_name)
doc_version_folder = next(filter(lambda x: not x.is_file(), root_folder.glob("*")), None).relative_to(root_folder)
doc_version_folder = str(doc_version_folder)
if doc_version_folder is None:
root_folder = Path(library_name)
doc_version_folder = next(filter(lambda x: not x.is_file(), root_folder.glob("*")), None).relative_to(
root_folder
)
doc_version_folder = str(doc_version_folder)
res = requests.get(url, headers={"Authorization": f"bearer {token}"})
if res.status_code != 200:
raise Exception(f"create_deletions failed (GET tree root/{repo_id}): {res.message}")
Expand All @@ -96,17 +101,24 @@ def create_deletions(repo_id: str, library_name: str, token: str) -> List[Dict]:
json = res.json()
tree = json["tree"]

# 4. list paths in currently built doc folder
built_docs_path = Path(f"{library_name}/{doc_version_folder}").absolute()
built_docs_files = [x for x in built_docs_path.glob("**/*") if x.is_file()]
built_docs_files_relative = set([str(f.relative_to(built_docs_path)) for f in built_docs_files])

# 5. deletions = set difference between step 3 & 4
deletions = [
{"path": f"{library_name}/{doc_version_folder}/{node['path']}"}
for node in tree
if node["type"] == "blob" and node["path"] not in built_docs_files_relative
]
if is_delete_all:
# 4. deletios for all files found in current git tree
deletions = [
{"path": f"{library_name}/{doc_version_folder}/{node['path']}"} for node in tree if node["type"] == "blob"
]
else:
# only delete files that were not part of current doc build
# 4. list paths in currently built doc folder
built_docs_path = Path(f"{library_name}/{doc_version_folder}").absolute()
built_docs_files = [x for x in built_docs_path.glob("**/*") if x.is_file()]
built_docs_files_relative = set([str(f.relative_to(built_docs_path)) for f in built_docs_files])

# 5. deletions = set difference between step 3 & 4
deletions = [
{"path": f"{library_name}/{doc_version_folder}/{node['path']}"}
for node in tree
if node["type"] == "blob" and node["path"] not in built_docs_files_relative
]

return deletions

Expand Down Expand Up @@ -188,16 +200,25 @@ def create_commit(

def push_command(args):
"""
Commit file additions using Github GraphQL rather than `git`.
Commit file additions and/or deletions using Github GraphQL rather than `git`.
Usage: doc-builder push $args
"""
if args.n_retries < 1:
raise ValueError(f"CLI arg `n_retries` MUST be positive & non-zero; supplied value was {args.n_retries}")
if args.is_remove:
push_command_remove(args)
else:
push_command_add(args)


def push_command_add(args):
"""
Commit file changes (additions & deletions) using Github GraphQL rather than `git`.
Used in: build_main_documentation.yml & build_pr_documentation.yml
"""
max_n_retries = args.n_retries + 1
number_of_retries = args.n_retries
n_seconds_sleep = 5

# file deletions
deletions = create_deletions(args.doc_build_repo_id, args.library_name, args.token)
# file additions
Expand Down Expand Up @@ -237,6 +258,38 @@ def push_command(args):
logging.debug(f"commit_additions took {time_end-time_start:.4f} seconds or {(time_end-time_start)/60.0:.2f} mins")


def push_command_remove(args):
"""
Commit file deletions only using Github GraphQL rather than `git`.
Used in: delete_doc_comment.yml
"""
max_n_retries = args.n_retries + 1
number_of_retries = args.n_retries
n_seconds_sleep = 5
doc_version_folder = args.doc_version
# file deletions
deletions = create_deletions(args.doc_build_repo_id, args.library_name, args.token, doc_version_folder, True)

while number_of_retries:
try:
# Create Github GraphQL client
transport = RequestsHTTPTransport(
url="https://api.github.com/graphql", headers={"Authorization": f"bearer {args.token}"}, verify=True
)
with Client(transport=transport, fetch_schema_from_transport=True, execute_timeout=None) as gql_client:
# commit file deletions
create_commit(gql_client, args.doc_build_repo_id, [], deletions, args.token, args.commit_msg)
break
except Exception as e:
number_of_retries -= 1
print(f"createCommitOnBranch error occurred: {e}")
if number_of_retries:
print(f"Failed on try #{max_n_retries-number_of_retries}, pushing again in {n_seconds_sleep} seconds")
sleep(n_seconds_sleep)
else:
raise RuntimeError("create_commit additions failed") from e


def push_command_parser(subparsers=None):
if subparsers is not None:
parser = subparsers.add_parser("push")
Expand All @@ -261,6 +314,17 @@ def push_command_parser(subparsers=None):
default="Github GraphQL createcommitonbranch commit",
)
parser.add_argument("--n_retries", type=int, help="Number of push retries in the event of conflict", default=1)
parser.add_argument(
"--doc_version",
type=str,
default=None,
help="Version of the generated documentation.",
)
parser.add_argument(
"--is_remove",
action="store_true",
help="Whether or not to remove entire folder ('--doc_version_folder') from git tree",
)

if subparsers is not None:
parser.set_defaults(func=push_command)
Expand Down

0 comments on commit d8dc51f

Please sign in to comment.