diff --git a/.github/workflows/python-client.yml b/.github/workflows/python-client.yml index fb2dd17bc3..46fcd2f368 100644 --- a/.github/workflows/python-client.yml +++ b/.github/workflows/python-client.yml @@ -50,9 +50,6 @@ jobs: distribution: 'temurin' java-version: '21' - - name: Run regeneratePythonClient - run: ./gradlew regeneratePythonClient - - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: @@ -76,3 +73,7 @@ jobs: - name: Integration Tests run: | make client-integration-test + + - name: Run Polaris Client help maual + run: | + ./polaris --help diff --git a/.github/workflows/regtest.yml b/.github/workflows/regtest.yml index 00e79d4eaf..fc98bae408 100644 --- a/.github/workflows/regtest.yml +++ b/.github/workflows/regtest.yml @@ -51,9 +51,6 @@ jobs: - name: Fix permissions run: mkdir -p regtests/output && chmod 777 regtests/output && chmod 777 regtests/t_*/ref/* - - name: Run regeneratePythonClient - run: ./gradlew regeneratePythonClient - - name: Image build run: | ./gradlew \ diff --git a/.github/workflows/spark_client_regtests.yml b/.github/workflows/spark_client_regtests.yml index 754db264f6..850637e5ee 100644 --- a/.github/workflows/spark_client_regtests.yml +++ b/.github/workflows/spark_client_regtests.yml @@ -51,9 +51,6 @@ jobs: - name: Fix permissions run: mkdir -p regtests/output && chmod 777 regtests/output && chmod 777 regtests/t_*/ref/* - - name: Run regeneratePythonClient - run: ./gradlew regeneratePythonClient - - name: Project build without testing env: # publishToMavenLocal causes a GH API requests, use the token for those requests diff --git a/Makefile b/Makefile index 225b91249d..0423c39085 100644 --- a/Makefile +++ b/Makefile @@ -134,7 +134,7 @@ client-lint: client-setup-env ## Run linting checks for Polaris client .PHONY: client-regenerate client-regenerate: client-setup-env ## Regenerate the client code @echo "--- Regenerating client code ---" - @client/templates/regenerate.sh + @$(ACTIVATE_AND_CD) && python3 generate_clients.py @echo "--- Client code regeneration complete ---" .PHONY: client-unit-test diff --git a/build.gradle.kts b/build.gradle.kts index 4d52ad6dac..2d8e0bb7b7 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -142,18 +142,6 @@ tasks.named("rat").configure { excludes.add("**/*.png") } -tasks.register("regeneratePythonClient") { - description = "Regenerates the python client" - - workingDir = project.projectDir - commandLine("bash", "client/templates/regenerate.sh") - - dependsOn(":polaris-api-iceberg-service:processResources") - dependsOn(":polaris-api-management-service:processResources") - dependsOn(":polaris-api-catalog-service:processResources") - dependsOn(":polaris-api-management-model:processResources") -} - // Pass environment variables: // ORG_GRADLE_PROJECT_apacheUsername // ORG_GRADLE_PROJECT_apachePassword diff --git a/client/python/.openapi-generator-ignore b/client/python/.openapi-generator-ignore index 8fb602d90d..89de956192 100644 --- a/client/python/.openapi-generator-ignore +++ b/client/python/.openapi-generator-ignore @@ -52,5 +52,4 @@ git_push.sh setup.cfg tox.ini README.md - - +pyproject.toml diff --git a/client/python/generate_clients.py b/client/python/generate_clients.py new file mode 100644 index 0000000000..da9e8fb92f --- /dev/null +++ b/client/python/generate_clients.py @@ -0,0 +1,267 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import sys +import os.path +import subprocess +from pathlib import Path +import fnmatch + +# Paths +CLIENT_DIR = Path(__file__).parent +PROJECT_ROOT = CLIENT_DIR.parent.parent +HEADER_DIR = CLIENT_DIR.parent / "templates" +SPEC_DIR = os.path.join(PROJECT_ROOT, "spec") +POLARIS_MANAGEMENT_SPEC = os.path.join(SPEC_DIR, "polaris-management-service.yml") +ICEBERG_CATALOG_SPEC = os.path.join(SPEC_DIR, "iceberg-rest-catalog-open-api.yaml") +POLARIS_CATALOG_SPEC = os.path.join(SPEC_DIR, "polaris-catalog-service.yaml") +OPEN_API_GENERATOR_IGNORE = os.path.join(CLIENT_DIR, ".openapi-generator-ignore") + +# Open API Generator Configs +PACKAGE_NAME_POLARIS_MANAGEMENT = ( + "--additional-properties=packageName=polaris.management" +) +PACKAGE_NAME_POLARIS_CATALOG = "--additional-properties=packageName=polaris.catalog" +PYTHON_VERSION = "--additional-properties=pythonVersion=3.9" + +# Cleanup +KEEP_TEST_FILES = [ + Path("test/test_cli_parsing.py"), +] +EXCLUDE_PATHS = [ + Path(".gitignore"), + Path(".openapi-generator/"), + Path(".openapi-generator-ignore"), + Path(".pytest_cache/"), + Path("test/test_cli_parsing.py"), + Path("cli/"), + Path("polaris/__pycache__/"), + Path("polaris/catalog/__pycache__/"), + Path("polaris/catalog/models/__pycache__/"), + Path("polaris/catalog/api/__pycache__/"), + Path("polaris/management/__pycache__/"), + Path("polaris/management/models/__pycache__/"), + Path("polaris/management/api/__pycache__/"), + Path("integration_tests/"), + Path(".github/workflows/python.yml"), + Path(".gitlab-ci.yml"), + Path("pyproject.toml"), + Path("requirements.txt"), + Path("test-requirements.txt"), + Path("setup.py"), + Path(".DS_Store"), + Path("Makefile"), + Path("poetry.lock"), + Path("docker-compose.yml"), + Path(".pre-commit-config.yaml"), + Path("README.md"), + Path("generate_clients.py"), + Path(".venv"), +] +EXCLUDE_EXTENSIONS = [ + "json", + "iml", + "keep", + "gitignore", +] + + +def clean_old_tests() -> None: + print("Deleting old tests...") + test_dir = CLIENT_DIR / "test" + if not test_dir.exists(): + print(f"Test directory {test_dir} does not exist, skipping test cleanup.") + return + + for item in test_dir.rglob("*"): + if item.is_file(): + # Check if the file should be kept relative to CLIENT_DIR + relative_path = item.relative_to(CLIENT_DIR) + if relative_path not in KEEP_TEST_FILES: + try: + os.remove(item) + print(f"{relative_path}: removed") + except OSError as e: + print(f"Error removing {relative_path}: {e}") + else: + print(f"{relative_path}: skipped") + + init_py_to_delete = CLIENT_DIR / "test" / "__init__.py" + if init_py_to_delete.exists(): + try: + os.remove(init_py_to_delete) + print(f"{init_py_to_delete.relative_to(CLIENT_DIR)}: removed") + except OSError as e: + print(f"Error removing {init_py_to_delete.relative_to(CLIENT_DIR)}: {e}") + print("Old test deletion complete.") + + +def generate_polaris_management_client() -> None: + subprocess.check_call( + [ + "openapi-generator-cli", + "generate", + "-i", + POLARIS_MANAGEMENT_SPEC, + "-g", + "python", + "-o", + CLIENT_DIR, + PACKAGE_NAME_POLARIS_MANAGEMENT, + "--additional-properties=apiNamePrefix=polaris", + PYTHON_VERSION, + "--additional-properties=generateSourceCodeOnly=true", + "--skip-validate-spec", + "--ignore-file-override", + OPEN_API_GENERATOR_IGNORE, + ], + stdout=subprocess.DEVNULL, + ) + + +def generate_polaris_catalog_client() -> None: + subprocess.check_call( + [ + "openapi-generator-cli", + "generate", + "-i", + POLARIS_CATALOG_SPEC, + "-g", + "python", + "-o", + CLIENT_DIR, + PACKAGE_NAME_POLARIS_CATALOG, + "--additional-properties=apiNameSuffix=", + PYTHON_VERSION, + "--additional-properties=generateSourceCodeOnly=true", + "--skip-validate-spec", + "--ignore-file-override", + OPEN_API_GENERATOR_IGNORE, + ], + stdout=subprocess.DEVNULL, + ) + + +def generate_iceberg_catalog_client() -> None: + subprocess.check_call( + [ + "openapi-generator-cli", + "generate", + "-i", + ICEBERG_CATALOG_SPEC, + "-g", + "python", + "-o", + CLIENT_DIR, + PACKAGE_NAME_POLARIS_CATALOG, + "--additional-properties=apiNameSuffix=", + "--additional-properties=apiNamePrefix=Iceberg", + PYTHON_VERSION, + "--additional-properties=generateSourceCodeOnly=true", + "--skip-validate-spec", + "--ignore-file-override", + OPEN_API_GENERATOR_IGNORE, + ], + stdout=subprocess.DEVNULL, + ) + + +def _prepend_header_to_file(file_path: Path, header_file_path: Path) -> None: + try: + with open(header_file_path, "r") as hf: + header_content = hf.read() + with open(file_path, "r+") as f: + original_content = f.read() + f.seek(0) + f.write(header_content + original_content) + except IOError as e: + print(f"Error prepending header to {file_path}: {e}") + + +def prepend_licenses() -> None: + print("Re-applying license headers...") + for file_path in CLIENT_DIR.rglob("*"): + if file_path.is_file(): + relative_file_path = file_path.relative_to(CLIENT_DIR) + file_extension = "" + # If it's a "dotfile" like .keep + if ( + relative_file_path.name.startswith(".") + and "." not in relative_file_path.name[1:] + ): + # e.g., for '.keep', this is 'keep' + file_extension = relative_file_path.name.lstrip(".") + else: + # For standard files like generate_clients.py + file_extension = file_path.suffix.lstrip(".") + + # Check if extension is excluded + if file_extension in EXCLUDE_EXTENSIONS: + print(f"{relative_file_path}: skipped (extension excluded)") + continue + + is_excluded = False + # Combine EXCLUDE_PATHS and KEEP_TEST_FILES for comprehensive exclusion check + # Convert Path objects in EXCLUDE_PATHS to strings for fnmatch compatibility + # Ensure patterns ending with '/' are handled for directory matching + all_exclude_patterns = [ + str(p) + ("/" if p.is_dir() else "") for p in EXCLUDE_PATHS + ] + [str(p) for p in KEEP_TEST_FILES] + + for exclude_pattern_str in all_exclude_patterns: + # Handle direct file match or if the file is within an excluded directory + if fnmatch.fnmatch(str(relative_file_path), exclude_pattern_str) or ( + exclude_pattern_str.endswith("/") + and str(relative_file_path).startswith(exclude_pattern_str) + ): + is_excluded = True + break + + if is_excluded: + print(f"{relative_file_path}: skipped (path excluded)") + continue + + header_file_path = HEADER_DIR / f"header-{file_extension}.txt" + + if header_file_path.is_file(): + _prepend_header_to_file(file_path, header_file_path) + print(f"{relative_file_path}: updated") + else: + print(f"No header compatible with file {relative_file_path}") + sys.exit(2) + print("License fix complete.") + + +def build() -> None: + clean_old_tests() + generate_polaris_management_client() + generate_polaris_catalog_client() + generate_iceberg_catalog_client() + prepend_licenses() + + +if __name__ == "__main__": + build() diff --git a/client/python/pyproject.toml b/client/python/pyproject.toml index e087a5a37f..99533ef134 100644 --- a/client/python/pyproject.toml +++ b/client/python/pyproject.toml @@ -54,7 +54,7 @@ include = [ "polaris/**" ] -[tool.poetry.group.test.dependencies] +[tool.poetry.group.dev.dependencies] pytest = ">= 7.2.1" pytest-cov = ">= 2.8.1" tox = ">= 3.9.0" @@ -63,7 +63,12 @@ types-python-dateutil = ">= 2.8.19.14" mypy = ">=1.17, <=1.17.1" pyiceberg = "==0.9.1" pre-commit = "==4.3.0" +openapi-generator-cli = "==7.11.0.post0" [build-system] requires = ["poetry-core>=2.0.0,<3.0.0"] build-backend = "poetry.core.masonry.api" + +[tool.poetry.build] +generate-setup-file = false +script = "generate_clients.py" diff --git a/client/templates/regenerate.sh b/client/templates/regenerate.sh deleted file mode 100755 index 1040234f91..0000000000 --- a/client/templates/regenerate.sh +++ /dev/null @@ -1,217 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -set -e - -[[ -z ${DOCKER} ]] && DOCKER="$(which podman > /dev/null && echo podman || echo docker)" - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -pushd "$SCRIPT_DIR/../python" > /dev/null - -############################# -# Delete old Tests # -############################# - -# List of test files to keep (from .../client/python) -KEEP_TESTS=( - "test/test_cli_parsing.py" -) - -find "test" -type f | while read -r file; do - - # Check if file should be excluded - keep=false - for test_path in "${KEEP_TESTS[@]}"; do - if [[ "$file" == "$test_path"* ]]; then - keep=true - fi - done - - if [ "$keep" = true ]; then - echo "${file}: skipped" - continue - else - echo "${file}: removed" - rm "${file}" - fi -done - -############################# -# Regenerate Python clients # -############################# - -echo "Regenerating python from the spec" - -# TODO skip-validate-spec is needed because the upstream Iceberg spec seems invalid. e.g.: -# [main] ERROR o.o.codegen.DefaultCodegen - Required var sort-order-id not in properties - -OPEN_API_CLI_VERSION="v7.11.0" - -${DOCKER} run --rm \ - -v "${SCRIPT_DIR}/../..:/local" \ - openapitools/openapi-generator-cli:${OPEN_API_CLI_VERSION} generate \ - -i /local/spec/polaris-management-service.yml \ - -g python \ - -o /local/client/python \ - --additional-properties=packageName=polaris.management \ - --additional-properties=apiNamePrefix=polaris \ - --additional-properties=pythonVersion=3.9 \ - --ignore-file-override /local/client/python/.openapi-generator-ignore > /dev/null - -${DOCKER} run --rm \ - -v "${SCRIPT_DIR}/../..:/local" \ - openapitools/openapi-generator-cli:${OPEN_API_CLI_VERSION} generate \ - -i /local/spec/polaris-catalog-service.yaml \ - -g python \ - -o /local/client/python \ - --additional-properties=packageName=polaris.catalog \ - --additional-properties=apiNameSuffix="" \ - --skip-validate-spec \ - --additional-properties=pythonVersion=3.9 \ - --ignore-file-override /local/client/python/.openapi-generator-ignore > /dev/null - -${DOCKER} run --rm \ - -v "${SCRIPT_DIR}/../..:/local" \ - openapitools/openapi-generator-cli:${OPEN_API_CLI_VERSION} generate \ - -i /local/spec/iceberg-rest-catalog-open-api.yaml \ - -g python \ - -o /local/client/python \ - --additional-properties=packageName=polaris.catalog \ - --additional-properties=apiNameSuffix="" \ - --additional-properties=apiNamePrefix=Iceberg \ - --additional-properties=pythonVersion=3.9 \ - --skip-validate-spec \ - --ignore-file-override /local/client/python/.openapi-generator-ignore > /dev/null - -############################# -# Prepend Licenses # -############################# - -echo "Re-applying license headers" - -prepend_header() { - local file="$1" - local header_file="$2" - tmpfile=$(mktemp) - cat "$header_file" "$file" > "$tmpfile" - mv "$tmpfile" "$file" -} - -# List of paths to exclude (from .../client/python) -EXCLUDE_PATHS=( - "./.gitignore" - "./.openapi-generator/" - "./.openapi-generator-ignore" - "./.pytest_cache" - "./test/test_cli_parsing.py" - "./cli/" - "./polaris/__pycache__" - "./polaris/catalog/__pycache__/" - "./polaris/catalog/models/__pycache__/" - "./polaris/catalog/api/__pycache__/" - "./polaris/management/__pycache__/" - "./polaris/management/models/__pycache__/" - "./polaris/management/api/__pycache__/" - "./integration_tests/" - "./.github/workflows/python.yml" - "./.gitlab-ci.yml" - "./pyproject.toml" - "./requirements.txt" - "./test-requirements.txt" - "./setup.py" - "./.DS_Store" - "./Makefile" - "./poetry.lock" - "./docker-compose.yml" - "./.pre-commit-config.yaml" - "./README.md" -) - -EXCLUDE_EXTENSIONS=( - "json" - "iml" - "keep" - "gitignore" -) - -# Process all files in the target directory -find . -type f | while read -r file; do - if [ -f "$file" ]; then - # Extract the extension - ext="${file##*.}" - - # Check if we need to exclude this extension - extension_excluded=false - for exclude_ext in "${EXCLUDE_EXTENSIONS[@]}"; do - if [ "$ext" = "$exclude_ext" ]; then - extension_excluded=true - fi - done - - # Skip this file if its extension is excluded - if [ "$extension_excluded" = true ]; then - echo "${file}: skipped" - continue - else - # Check if file should be excluded - exclude=false - for exclude_path in "${EXCLUDE_PATHS[@]}"; do - if [[ "$file" == "$exclude_path"* ]]; then - exclude=true - fi - done - for exclude_path in "${KEEP_TESTS[@]}"; do - if [[ "$file" == "$exclude_path"* ]]; then - exclude=true - fi - done - if [ "$exclude" = false ]; then - # Construct the header file path - header_file="${SCRIPT_DIR}/header-${ext}.txt" - - # Only process if the license file exists - if [ -f "$header_file" ]; then - echo "${file}: updated" - prepend_header "$file" "$header_file" - else - echo "No header compatible with file ${file}" - exit 2 - fi - fi - fi - fi -done - -echo "License fix complete" - - -DELETE_PATHS=( - "test/__init__.py" -) - -for path in "${DELETE_PATHS[@]}"; do - rm -f "$path" -done - -echo "Deletion complete" - -echo "Regeneration complete" - -popd > /dev/null diff --git a/getting-started/spark/launch-docker.sh b/getting-started/spark/launch-docker.sh index 8bbb6aa1bd..31459de022 100644 --- a/getting-started/spark/launch-docker.sh +++ b/getting-started/spark/launch-docker.sh @@ -18,5 +18,5 @@ # under the License. # -./gradlew regeneratePythonClient +make client-regenerate docker-compose -f getting-started/spark/docker-compose.yml up diff --git a/polaris b/polaris index baffabbb22..aec3b2b25a 100755 --- a/polaris +++ b/polaris @@ -30,11 +30,6 @@ for arg in "$@"; do fi done -# Check if the python client needs regeneration -if [ "$repair" == true ] || [ ! -f "${dir}/client/python/polaris/catalog/__init__.py" ]; then - "${dir}/gradlew" regeneratePythonClient -fi - if [ ! -d "${dir}"/polaris-venv ] || [ "$repair" == true ]; then rm -f "${dir}"/poetry.lock rm -f "${dir}/client/python/poetry.lock" diff --git a/regtests/Dockerfile b/regtests/Dockerfile index 983f92f877..b75b5e2a70 100644 --- a/regtests/Dockerfile +++ b/regtests/Dockerfile @@ -41,6 +41,8 @@ COPY --chown=spark ./regtests/setup.sh /home/spark/polaris/regtests/setup.sh COPY --chown=spark ./regtests/pyspark-setup.sh /home/spark/polaris/regtests/pyspark-setup.sh COPY --chown=spark ./client/python /home/spark/polaris/client/python COPY --chown=spark ./polaris /home/spark/polaris/polaris +COPY --chown=spark ./client/templates /home/spark/polaris/client/templates +COPY --chown=spark ./spec /home/spark/polaris/spec COPY --chown=spark ./regtests/requirements.txt /tmp/ RUN python3 -m venv /home/spark/polaris/polaris-venv && \ @@ -48,7 +50,7 @@ RUN python3 -m venv /home/spark/polaris/polaris-venv && \ pip install -r /tmp/requirements.txt && \ cd /home/spark/polaris/client/python && \ poetry install && \ - deactivate \ + deactivate && \ /home/spark/polaris/regtests/setup.sh COPY --chown=spark ./regtests /home/spark/polaris/regtests diff --git a/regtests/run.sh b/regtests/run.sh index d1472a23c1..b87a0b578a 100755 --- a/regtests/run.sh +++ b/regtests/run.sh @@ -45,7 +45,6 @@ function logred() { REGTEST_HOME=$(dirname $(realpath $0)) -cd "${SCRIPT_DIR}/.." && ./gradlew regeneratePythonClient cd ${REGTEST_HOME} ./setup.sh diff --git a/spec/README.md b/spec/README.md index e168dbeda5..8d2c0d0f03 100644 --- a/spec/README.md +++ b/spec/README.md @@ -51,7 +51,10 @@ Note: the license header will be removed after the bundle generation, please man The file `iceberg-rest-catalog-open-api.yaml` is copied from the upstream Iceberg REST catalog spec. -However, when copying it, you may need to make some nonfunctional changes in order to ensure that the Python types -generated by the gradle task `regeneratePythonClient` still allow all tests to pass. For more context, see -[PR #1347](https://github.com/apache/polaris/pull/1347). +However, when copying it, you may need to make some nonfunctional changes to ensure that the generated Python types +still allow all tests to pass. You can regenerate the Python client by running: +``` +make client-regenerate +``` +For more context, see [PR #2192](https://github.com/apache/polaris/pull/2192).