Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add config change #2604

Merged
merged 41 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
0a97d55
feat: add a generation config change class
JoeWang1127 Mar 26, 2024
233a105
refact config comparator
JoeWang1127 Mar 26, 2024
86d9111
fix unit tests
JoeWang1127 Mar 26, 2024
663bc06
add proto_path_utils.py
JoeWang1127 Mar 26, 2024
f4bc0f1
refactor unit tests
JoeWang1127 Mar 26, 2024
501918b
refactor config comparator
JoeWang1127 Mar 26, 2024
e1fc03d
fix unit tests
JoeWang1127 Mar 26, 2024
9598a1f
format code
JoeWang1127 Mar 26, 2024
9f8a95d
refactor utilities
JoeWang1127 Mar 26, 2024
59e02bb
refactor proto_path_utils
JoeWang1127 Mar 26, 2024
4f58df7
add unit tests
JoeWang1127 Mar 26, 2024
8bed423
add get_qualified_commits
JoeWang1127 Mar 26, 2024
88da8d8
fix generate_pr_description.py
JoeWang1127 Mar 26, 2024
5fda1d7
add unit tests
JoeWang1127 Mar 26, 2024
14d419c
add tests
JoeWang1127 Mar 26, 2024
69715be
change test name
JoeWang1127 Mar 26, 2024
ae5acb3
Merge branch 'main' into feat/add-config-change
JoeWang1127 Mar 27, 2024
c032538
use meaningful variable for commit sha
JoeWang1127 Mar 28, 2024
fc6fa0b
use variable rather than none
JoeWang1127 Mar 28, 2024
3bed10a
change helper method name
JoeWang1127 Mar 28, 2024
50c1fa6
add method comment
JoeWang1127 Mar 28, 2024
0819266
return unique and sored library names
JoeWang1127 Mar 28, 2024
4848d95
add comment
JoeWang1127 Mar 28, 2024
1094041
restore owlbot
JoeWang1127 Mar 28, 2024
d37a2b4
format code
JoeWang1127 Mar 28, 2024
93d5c10
change param name
JoeWang1127 Mar 28, 2024
df5f310
use util to get output dir
JoeWang1127 Mar 28, 2024
3cbf80e
Merge branch 'main' into feat/add-config-change
JoeWang1127 Mar 28, 2024
f140c2b
refactor get_proto_path_to_library_name to generation_config
JoeWang1127 Apr 1, 2024
ac8f0a9
move utilities.py to utils/utilities.py
JoeWang1127 Apr 1, 2024
9dd21a5
refactor
JoeWang1127 Apr 2, 2024
8e61a2b
move utilities.sh to utils/utilities.sh
JoeWang1127 Apr 2, 2024
067d06d
refactor
JoeWang1127 Apr 2, 2024
7d7ac09
refactor
JoeWang1127 Apr 2, 2024
37364d6
refactor
JoeWang1127 Apr 2, 2024
3e7bed8
refactor
JoeWang1127 Apr 2, 2024
c5d5540
use file path
JoeWang1127 Apr 2, 2024
f9bfed6
format
JoeWang1127 Apr 2, 2024
4e18fff
restore
JoeWang1127 Apr 2, 2024
a784d06
change package
JoeWang1127 Apr 2, 2024
cf58988
explicitly declare shell scripts
JoeWang1127 Apr 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion library_generation/generate_composed_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import os
from pathlib import Path
from typing import List
import library_generation.utilities as util
import library_generation.utils.utilities as util
from library_generation.model.generation_config import GenerationConfig
from library_generation.model.gapic_config import GapicConfig
from library_generation.model.gapic_inputs import GapicInputs
Expand Down
6 changes: 2 additions & 4 deletions library_generation/generate_pr_description.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@
import click
from git import Commit, Repo
from library_generation.model.generation_config import from_yaml
from library_generation.utilities import find_versioned_proto_path
from library_generation.utils.proto_path_utils import find_versioned_proto_path
from library_generation.utils.commit_message_formatter import format_commit_message
from library_generation.utilities import get_file_paths
from library_generation.utils.commit_message_formatter import wrap_nested_commit
from library_generation.utils.commit_message_formatter import wrap_override_commit


Expand Down Expand Up @@ -87,7 +85,7 @@ def generate_pr_descriptions(
baseline_commit: str,
) -> str:
config = from_yaml(generation_config_yaml)
paths = get_file_paths(config)
paths = config.get_proto_path_to_library_name()
return __get_commit_messages(
repo_url=repo_url,
latest_commit=config.googleapis_commitish,
Expand Down
2 changes: 1 addition & 1 deletion library_generation/generate_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import library_generation.utilities as util
import library_generation.utils.utilities as util
import click
import os
from library_generation.generate_composed_library import generate_composed_library
Expand Down
158 changes: 158 additions & 0 deletions library_generation/model/config_change.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil
from enum import Enum
from typing import Optional
from git import Commit, Repo
from library_generation.model.generation_config import GenerationConfig
from library_generation.model.library_config import LibraryConfig
from library_generation.utils.utilities import sh_util
from library_generation.utils.proto_path_utils import find_versioned_proto_path


class ChangeType(Enum):
GOOGLEAPIS_COMMIT = 1
REPO_LEVEL_CHANGE = 2
LIBRARIES_ADDITION = 3
# As of Mar. 2024, we decide not to produce this type of change because we
# still need to manually remove the libray.
# LIBRARIES_REMOVAL = 4
LIBRARY_LEVEL_CHANGE = 5
GAPIC_ADDITION = 6
# As of Mar. 2024, we decide not to produce this type of change because we
# still need to manually remove the libray.
# GAPIC_REMOVAL = 7


class HashLibrary:
"""
Data class to group a LibraryConfig object and its hash value together.
"""

def __init__(self, hash_value: int, library: LibraryConfig):
self.hash_value = hash_value
self.library = library


class LibraryChange:
def __init__(self, changed_param: str, latest_value: str, library_name: str = ""):
self.changed_param = changed_param
self.latest_value = latest_value
self.library_name = library_name


class QualifiedCommit:
def __init__(self, commit: Commit, libraries: set[str]):
self.commit = commit
self.libraries = libraries


class ConfigChange:
ALL_LIBRARIES_CHANGED = None

def __init__(
self,
change_to_libraries: dict[ChangeType, list[LibraryChange]],
baseline_config: GenerationConfig,
latest_config: GenerationConfig,
):
self.change_to_libraries = change_to_libraries
self.baseline_config = baseline_config
self.latest_config = latest_config

def get_changed_libraries(self) -> Optional[list[str]]:
"""
Returns a unique, sorted list of library name of changed libraries.
None if there is a repository level change, which means all libraries
in the latest_config will be generated.
:return: library names of change libraries.
"""
if ChangeType.REPO_LEVEL_CHANGE in self.change_to_libraries:
return ConfigChange.ALL_LIBRARIES_CHANGED
library_names = set()
for change_type, library_changes in self.change_to_libraries.items():
if change_type == ChangeType.GOOGLEAPIS_COMMIT:
library_names.update(self.__get_library_names_from_qualified_commits())
else:
library_names.update(
[library_change.library_name for library_change in library_changes]
)
return sorted(list(library_names))
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Return a list of unique, sorted library names


def get_qualified_commits(
self,
repo_url: str = "https://github.com/googleapis/googleapis.git",
) -> list[QualifiedCommit]:
"""
Returns qualified commits from configuration change.

A qualified commit is a commit that changes at least one file (excluding
BUILD.bazel) within a versioned proto path in the given proto_paths.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know we made the decision to exclude BUILD.bazel previously, but we may need to revisit it for scenarios that the changes include transport/numeric_enums/include_samples. It means we may have to parse BUILD.bazel in this step, which would definitely make it more complicated. It was fine because we regenerate the whole repo anyway, but if we start excluding libraries from regeneration, then we need to be more careful.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we exclude BUILD.bazel because we don't want changes in other languages appear in commit messages, e.g., [ruby] xxx.

How about I change the regex in filtering commit message to exclude irrelevant message, rather than excluding from generation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no enforcement of the commit messages, so I don't think we should use the content of it to determine if we should include it in our release notes or generation. For example, this commit touched Go and Node but there is no mention of languages in the title.

If we have Bazel integration, we might be able to pass these parameters to hermetic build scripts. But before it happens, I think we may have to parse the BUILD.bazel file to see if there is any changes to transport/numeric_enums/include_samples in Java targets. Of course this can be a separate a PR and slightly lower priority as they don't change often.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we may have to parse the BUILD.bazel file to see if there is any changes to transport/numeric_enums/include_samples in Java targets. Of course this can be a separate a PR and slightly lower priority as they don't change often.

I agree. I'll add a task as the next step and keep this part as-is.

:param repo_url: the repository contains the commit history.
:return: QualifiedCommit objects.
"""
tmp_dir = sh_util("get_output_folder")
shutil.rmtree(tmp_dir, ignore_errors=True)
os.mkdir(tmp_dir)
# we only need commit history, thus shadow clone is enough.
repo = Repo.clone_from(url=repo_url, to_path=tmp_dir, filter=["blob:none"])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this was done in previous PRs. Based on our recent discussions, is it possible to get the commit history remotely? Without cloning the repo? Creating new temp folders could have other implications like preventing us from running images in parallel.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did some digging but I can't find one without git clone.

I don't think we need to focus on parallel execution as it's not the main goal of this PR. Also, we can achieve parallelism if we can randomize the temp folder so that no thread will clone the repo into the same folder.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it's a focus either but something we should keep in mind. Creating temp folders and writing files has other implications, e.g. it's usually slower than making a request, it complicates the scripts that we have to clean the temp folders and files up etc.
That being said, it's OK if there is no easy way at this moment. We can refactor how we pull the proto definitions in general in the future. e.g. I see that we are currently downloading proto definitions for every library, where we could download the whole googleapis repo as the first step of generate_repo.py. This actually brings another question that if googleapis_commitish is overridden on library level, our release notes might be generated incorrectly.
In short, this PR is good to go as it is, but we need to put the above tasks into consideration and track them somewhere.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that we are currently downloading proto definitions for every library,

The googleapis repo is downloaded only once unless there are googleapis commit in library level.

Since this use case is rarely used, how about removing it?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on discussion, we keep this piece of code as-is.

commit = repo.commit(self.latest_config.googleapis_commitish)
proto_paths = self.latest_config.get_proto_path_to_library_name()
qualified_commits = []
while str(commit.hexsha) != self.baseline_config.googleapis_commitish:
qualified_commit = ConfigChange.__create_qualified_commit(
proto_paths=proto_paths, commit=commit
)
if qualified_commit is not None:
qualified_commits.append(qualified_commit)
commit_parents = commit.parents
if len(commit_parents) == 0:
break
commit = commit_parents[0]
shutil.rmtree(tmp_dir, ignore_errors=True)
return qualified_commits

def __get_library_names_from_qualified_commits(self) -> list[str]:
qualified_commits = self.get_qualified_commits()
library_names = []
for qualified_commit in qualified_commits:
library_names.extend(qualified_commit.libraries)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand correctly, library_names could end up with repeated library names. Is this handled in any way?
If not, we can remove duplicates with something like return list(set(library_names))

Copy link
Collaborator Author

@JoeWang1127 JoeWang1127 Mar 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch.

I updated the method to return a list of unique library names in ascending order and added a unit test to verify it.

return library_names

@staticmethod
def __create_qualified_commit(
proto_paths: dict[str, str], commit: Commit
) -> Optional[QualifiedCommit]:
"""
Returns a qualified commit from the given Commit object; otherwise None.

:param proto_paths: a mapping from versioned proto_path to library_name
:param commit: a GitHub commit object.
:return: qualified commits.
"""
libraries = set()
for file in commit.stats.files.keys():
if file.endswith("BUILD.bazel"):
continue
versioned_proto_path = find_versioned_proto_path(file)
if versioned_proto_path in proto_paths:
# Even though a commit usually only changes one
# library, we don't want to miss generating a
# library because the commit may change multiple
# libraries.
libraries.add(proto_paths[versioned_proto_path])
if len(libraries) == 0:
return None
return QualifiedCommit(commit=commit, libraries=libraries)
12 changes: 12 additions & 0 deletions library_generation/model/generation_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@ def __init__(
# monorepos have more than one library defined in the config yaml
self.is_monorepo = len(libraries) > 1

def get_proto_path_to_library_name(self) -> dict[str, str]:
"""
Get versioned proto_path to library_name mapping from configuration.

:return: versioned proto_path to library_name mapping
"""
paths = {}
for library in self.libraries:
for gapic_config in library.gapic_configs:
paths[gapic_config.proto_path] = library.get_library_name()
return paths


def from_yaml(path_to_yaml: str) -> GenerationConfig:
"""
Expand Down
2 changes: 1 addition & 1 deletion library_generation/test/compare_poms.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
The only comparison points are: element path (e.g. project/dependencies) and element text
There is a special case for `dependency`, where the maven coordinates are prepared as well
"""
from library_generation.utilities import eprint
from library_generation.utils.utilities import eprint
import xml.etree.ElementTree as et
from collections import Counter
import sys
Expand Down
2 changes: 1 addition & 1 deletion library_generation/test/integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from library_generation.generate_repo import generate_from_yaml
from library_generation.model.generation_config import from_yaml, GenerationConfig
from library_generation.test.compare_poms import compare_xml
from library_generation.utilities import (
from library_generation.utils.utilities import (
sh_util as shell_call,
run_process_and_print_output,
)
Expand Down
Loading
Loading