-
Notifications
You must be signed in to change notification settings - Fork 53
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add config change #2604
feat: add config change #2604
Changes from 31 commits
0a97d55
233a105
86d9111
663bc06
f4bc0f1
501918b
e1fc03d
9598a1f
9f8a95d
59e02bb
4f58df7
8bed423
88da8d8
5fda1d7
14d419c
69715be
ae5acb3
c032538
fc6fa0b
3bed10a
50c1fa6
0819266
4848d95
1094041
d37a2b4
93d5c10
df5f310
3cbf80e
f140c2b
ac8f0a9
9dd21a5
8e61a2b
067d06d
7d7ac09
37364d6
3e7bed8
c5d5540
f9bfed6
4e18fff
a784d06
cf58988
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
# Copyright 2024 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
import os | ||
import shutil | ||
from enum import Enum | ||
from typing import Optional | ||
from git import Commit, Repo | ||
from library_generation.model.generation_config import GenerationConfig | ||
from library_generation.model.library_config import LibraryConfig | ||
from library_generation.utils.utilities import sh_util | ||
from library_generation.utils.proto_path_utils import find_versioned_proto_path | ||
|
||
|
||
class ChangeType(Enum): | ||
GOOGLEAPIS_COMMIT = 1 | ||
REPO_LEVEL_CHANGE = 2 | ||
LIBRARIES_ADDITION = 3 | ||
# As of Mar. 2024, we decide not to produce this type of change because we | ||
# still need to manually remove the libray. | ||
# LIBRARIES_REMOVAL = 4 | ||
LIBRARY_LEVEL_CHANGE = 5 | ||
GAPIC_ADDITION = 6 | ||
# As of Mar. 2024, we decide not to produce this type of change because we | ||
# still need to manually remove the libray. | ||
# GAPIC_REMOVAL = 7 | ||
|
||
|
||
class HashLibrary: | ||
""" | ||
Data class to group a LibraryConfig object and its hash value together. | ||
""" | ||
|
||
def __init__(self, hash_value: int, library: LibraryConfig): | ||
self.hash_value = hash_value | ||
self.library = library | ||
|
||
|
||
class LibraryChange: | ||
def __init__(self, changed_param: str, latest_value: str, library_name: str = ""): | ||
self.changed_param = changed_param | ||
self.latest_value = latest_value | ||
self.library_name = library_name | ||
|
||
|
||
class QualifiedCommit: | ||
def __init__(self, commit: Commit, libraries: set[str]): | ||
self.commit = commit | ||
self.libraries = libraries | ||
|
||
|
||
class ConfigChange: | ||
ALL_LIBRARIES_CHANGED = None | ||
|
||
def __init__( | ||
self, | ||
change_to_libraries: dict[ChangeType, list[LibraryChange]], | ||
baseline_config: GenerationConfig, | ||
latest_config: GenerationConfig, | ||
): | ||
self.change_to_libraries = change_to_libraries | ||
self.baseline_config = baseline_config | ||
self.latest_config = latest_config | ||
|
||
def get_changed_libraries(self) -> Optional[list[str]]: | ||
""" | ||
Returns a unique, sorted list of library name of changed libraries. | ||
None if there is a repository level change, which means all libraries | ||
in the latest_config will be generated. | ||
:return: library names of change libraries. | ||
""" | ||
if ChangeType.REPO_LEVEL_CHANGE in self.change_to_libraries: | ||
return ConfigChange.ALL_LIBRARIES_CHANGED | ||
library_names = set() | ||
for change_type, library_changes in self.change_to_libraries.items(): | ||
if change_type == ChangeType.GOOGLEAPIS_COMMIT: | ||
library_names.update(self.__get_library_names_from_qualified_commits()) | ||
else: | ||
library_names.update( | ||
[library_change.library_name for library_change in library_changes] | ||
) | ||
return sorted(list(library_names)) | ||
|
||
def get_qualified_commits( | ||
self, | ||
repo_url: str = "https://github.com/googleapis/googleapis.git", | ||
) -> list[QualifiedCommit]: | ||
""" | ||
Returns qualified commits from configuration change. | ||
|
||
A qualified commit is a commit that changes at least one file (excluding | ||
BUILD.bazel) within a versioned proto path in the given proto_paths. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know we made the decision to exclude BUILD.bazel previously, but we may need to revisit it for scenarios that the changes include transport/numeric_enums/include_samples. It means we may have to parse BUILD.bazel in this step, which would definitely make it more complicated. It was fine because we regenerate the whole repo anyway, but if we start excluding libraries from regeneration, then we need to be more careful. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we exclude How about I change the regex in filtering commit message to exclude irrelevant message, rather than excluding from generation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no enforcement of the commit messages, so I don't think we should use the content of it to determine if we should include it in our release notes or generation. For example, this commit touched Go and Node but there is no mention of languages in the title. If we have Bazel integration, we might be able to pass these parameters to hermetic build scripts. But before it happens, I think we may have to parse the BUILD.bazel file to see if there is any changes to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I agree. I'll add a task as the next step and keep this part as-is. |
||
:param repo_url: the repository contains the commit history. | ||
:return: QualifiedCommit objects. | ||
""" | ||
tmp_dir = sh_util("get_output_folder") | ||
shutil.rmtree(tmp_dir, ignore_errors=True) | ||
os.mkdir(tmp_dir) | ||
# we only need commit history, thus shadow clone is enough. | ||
repo = Repo.clone_from(url=repo_url, to_path=tmp_dir, filter=["blob:none"]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know this was done in previous PRs. Based on our recent discussions, is it possible to get the commit history remotely? Without cloning the repo? Creating new temp folders could have other implications like preventing us from running images in parallel. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did some digging but I can't find one without I don't think we need to focus on parallel execution as it's not the main goal of this PR. Also, we can achieve parallelism if we can randomize the temp folder so that no thread will clone the repo into the same folder. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think it's a focus either but something we should keep in mind. Creating temp folders and writing files has other implications, e.g. it's usually slower than making a request, it complicates the scripts that we have to clean the temp folders and files up etc. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The googleapis repo is downloaded only once unless there are googleapis commit in library level. Since this use case is rarely used, how about removing it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on discussion, we keep this piece of code as-is. |
||
commit = repo.commit(self.latest_config.googleapis_commitish) | ||
proto_paths = self.latest_config.get_proto_path_to_library_name() | ||
qualified_commits = [] | ||
while str(commit.hexsha) != self.baseline_config.googleapis_commitish: | ||
qualified_commit = ConfigChange.__create_qualified_commit( | ||
proto_paths=proto_paths, commit=commit | ||
) | ||
if qualified_commit is not None: | ||
qualified_commits.append(qualified_commit) | ||
commit_parents = commit.parents | ||
if len(commit_parents) == 0: | ||
break | ||
commit = commit_parents[0] | ||
shutil.rmtree(tmp_dir, ignore_errors=True) | ||
return qualified_commits | ||
|
||
def __get_library_names_from_qualified_commits(self) -> list[str]: | ||
qualified_commits = self.get_qualified_commits() | ||
library_names = [] | ||
for qualified_commit in qualified_commits: | ||
library_names.extend(qualified_commit.libraries) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I understand correctly, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch. I updated the method to return a list of unique library names in ascending order and added a unit test to verify it. |
||
return library_names | ||
|
||
@staticmethod | ||
def __create_qualified_commit( | ||
proto_paths: dict[str, str], commit: Commit | ||
) -> Optional[QualifiedCommit]: | ||
""" | ||
Returns a qualified commit from the given Commit object; otherwise None. | ||
|
||
:param proto_paths: a mapping from versioned proto_path to library_name | ||
:param commit: a GitHub commit object. | ||
:return: qualified commits. | ||
""" | ||
libraries = set() | ||
for file in commit.stats.files.keys(): | ||
if file.endswith("BUILD.bazel"): | ||
continue | ||
versioned_proto_path = find_versioned_proto_path(file) | ||
if versioned_proto_path in proto_paths: | ||
# Even though a commit usually only changes one | ||
# library, we don't want to miss generating a | ||
# library because the commit may change multiple | ||
# libraries. | ||
libraries.add(proto_paths[versioned_proto_path]) | ||
if len(libraries) == 0: | ||
return None | ||
return QualifiedCommit(commit=commit, libraries=libraries) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Return a list of unique, sorted library names