diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py deleted file mode 100644 index ce7201c36dcc7..0000000000000 --- a/sdks/python/gen_protos.py +++ /dev/null @@ -1,129 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Generates Python proto modules and grpc stubs for Beam protos.""" -from __future__ import absolute_import -from __future__ import print_function - -import glob -import logging -import os -import shutil -import subprocess -import sys -import warnings - -import pkg_resources - -BEAM_PROTO_PATHS = [ - os.path.join('..', '..', 'model', 'pipeline', 'src', 'main', 'proto'), - os.path.join('..', '..', 'model', 'job-management', 'src', 'main', 'proto'), - os.path.join('..', '..', 'model', 'fn-execution', 'src', 'main', 'proto'), -] - -PYTHON_OUTPUT_PATH = os.path.join('apache_beam', 'portability', 'api') - -MODEL_RESOURCES = [ - os.path.normpath('../../model/fn-execution/src/main/resources'\ - + '/org/apache/beam/model/fnexecution/v1/standard_coders.yaml'), -] - - -def generate_proto_files(force=False, log=None): - - try: - import grpc_tools # pylint: disable=unused-import - except ImportError: - warnings.warn('Installing grpcio-tools is recommended for development.') - - if log is None: - log = logging.getLogger(__name__) - - py_sdk_root = os.path.dirname(os.path.abspath(__file__)) - proto_dirs = [os.path.join(py_sdk_root, path) for path in BEAM_PROTO_PATHS] - proto_files = sum( - [glob.glob(os.path.join(d, '*.proto')) for d in proto_dirs], []) - out_dir = os.path.join(py_sdk_root, PYTHON_OUTPUT_PATH) - out_files = [path for path in glob.glob(os.path.join(out_dir, '*_pb2.py'))] - - if out_files and not proto_files and not force: - # We have out_files but no protos; assume they're up to date. - # This is actually the common case (e.g. installation from an sdist). - log.info('No proto files; using existing generated files.') - return - - elif not out_files and not proto_files: - if not os.path.exists(py_sdk_root): - raise RuntimeError( - 'Not in apache git tree; unable to find proto definitions.') - else: - raise RuntimeError( - 'No proto files found in %s.' % proto_dirs) - - if force: - regenerate = 'forced' - elif not out_files: - regenerate = 'no output files' - elif len(out_files) < len(proto_files): - regenerate = 'not enough output files' - elif ( - min(os.path.getmtime(path) for path in out_files) - <= max(os.path.getmtime(path) - for path in proto_files + [os.path.realpath(__file__)])): - regenerate = 'output files are out-of-date' - elif len(out_files) > len(proto_files): - regenerate = 'output files without corresponding .proto files' - # too many output files: probably due to switching between git branches. - # remove them so they don't trigger constant regeneration. - for out_file in out_files: - os.remove(out_file) - else: - regenerate = None - - if regenerate: - - from grpc_tools import protoc - - log.info('Regenerating Python proto definitions (%s).' % regenerate) - builtin_protos = pkg_resources.resource_filename('grpc_tools', '_proto') - args = ( - [sys.executable] + # expecting to be called from command line - ['--proto_path=%s' % builtin_protos] + - ['--proto_path=%s' % d for d in proto_dirs] + - ['--python_out=%s' % out_dir] + - # TODO(robertwb): Remove the prefix once it's the default. - ['--grpc_python_out=grpc_2_0:%s' % out_dir] + - proto_files) - ret_code = protoc.main(args) - if ret_code: - raise RuntimeError( - 'Protoc returned non-zero status (see logs for details): ' - '%s' % ret_code) - - # copy resource files - for path in MODEL_RESOURCES: - shutil.copy2(os.path.join(py_sdk_root, path), out_dir) - - ret_code = subprocess.call( - ["futurize", "--both-stages", "--write", "--no-diff", out_dir]) - if ret_code: - raise RuntimeError( - 'Error applying futurize to generated protobuf python files.') - - -if __name__ == '__main__': - generate_proto_files(force=True) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 5321c6cc5a978..fb9d4e0b0b3dd 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -20,8 +20,11 @@ from __future__ import absolute_import from __future__ import print_function +import glob import os import platform +import shutil +import subprocess import sys import warnings from distutils import log @@ -32,6 +35,7 @@ import setuptools from pkg_resources import DistributionNotFound from pkg_resources import get_distribution +from pkg_resources import resource_filename from setuptools.command.build_py import build_py from setuptools.command.sdist import sdist @@ -170,24 +174,107 @@ def get_version(): 'timeloop>=1.0.2,<2', ] -# We must generate protos after setup_requires are installed. +BEAM_PROTO_PATHS = [ + os.path.join('..', '..', 'model', 'pipeline', 'src', 'main', 'proto'), + os.path.join('..', '..', 'model', 'job-management', 'src', 'main', 'proto'), + os.path.join('..', '..', 'model', 'fn-execution', 'src', 'main', 'proto'), +] + +PYTHON_PROTO_OUTPUT_PATH = os.path.join('apache_beam', 'portability', 'api') + +MODEL_RESOURCES = [ + os.path.normpath('../../model/fn-execution/src/main/resources'\ + + '/org/apache/beam/model/fnexecution/v1/standard_coders.yaml'), +] + + +def generate_proto_files(force=False): + + py_sdk_root = os.path.dirname(os.path.abspath(__file__)) + proto_dirs = [os.path.join(py_sdk_root, path) for path in BEAM_PROTO_PATHS] + proto_files = sum( + [glob.glob(os.path.join(d, '*.proto')) for d in proto_dirs], []) + out_dir = os.path.join(py_sdk_root, PYTHON_PROTO_OUTPUT_PATH) + out_files = [path for path in glob.glob(os.path.join(out_dir, '*_pb2.py'))] + + if out_files and not proto_files and not force: + # We have out_files but no protos; assume they're up to date. + # This is actually the common case (e.g. installation from an sdist). + log.info('No proto files; using existing generated files.') + return + + elif not out_files and not proto_files: + if not os.path.exists(py_sdk_root): + raise RuntimeError( + 'Not in apache git tree; unable to find proto definitions.') + else: + raise RuntimeError( + 'No proto files found in %s.' % proto_dirs) + + if force: + regenerate = 'forced' + elif not out_files: + regenerate = 'no output files' + elif len(out_files) < len(proto_files): + regenerate = 'not enough output files' + elif ( + min(os.path.getmtime(path) for path in out_files) + <= max(os.path.getmtime(path) + for path in proto_files + [os.path.realpath(__file__)])): + regenerate = 'output files are out-of-date' + elif len(out_files) > len(proto_files): + regenerate = 'output files without corresponding .proto files' + # too many output files: probably due to switching between git branches. + # remove them so they don't trigger constant regeneration. + for out_file in out_files: + os.remove(out_file) + else: + regenerate = None + + if regenerate: + from grpc_tools import protoc + + log.info('Regenerating Python proto definitions (%s).' % regenerate) + builtin_protos = resource_filename('grpc_tools', '_proto') + args = ( + [sys.executable] + # expecting to be called from command line + ['--proto_path=%s' % builtin_protos] + + ['--proto_path=%s' % d for d in proto_dirs] + + ['--python_out=%s' % out_dir] + + # TODO(robertwb): Remove the prefix once it's the default. + ['--grpc_python_out=grpc_2_0:%s' % out_dir] + + proto_files) + ret_code = protoc.main(args) + if ret_code: + raise RuntimeError( + 'Protoc returned non-zero status (see logs for details): ' + '%s' % ret_code) + + # copy resource files + for path in MODEL_RESOURCES: + shutil.copy2(os.path.join(py_sdk_root, path), out_dir) + + ret_code = subprocess.call( + ["futurize", "--both-stages", "--write", "--no-diff", out_dir]) + if ret_code: + raise RuntimeError( + 'Error applying futurize to generated protobuf python files.') + + +# We must generate protos after requirements from pyproject.toml are installed. def generate_protos_first(original_cmd): try: - # See https://issues.apache.org/jira/browse/BEAM-2366 - # pylint: disable=wrong-import-position - import gen_protos - - class cmd(original_cmd, object): - def run(self): - gen_protos.generate_proto_files(log=log) - super(cmd, self).run() - return cmd - except ImportError as err: - warnings.warn( - "Could not import gen_protos, skipping proto generation: %s (pwd=%s)" % - (err, os.getcwd())) + import grpc_tools # pylint: disable=unused-import + except ImportError: + warnings.warn('Installing grpcio-tools is recommended for development.') return original_cmd + class cmd(original_cmd, object): + def run(self): + generate_proto_files() + super(cmd, self).run() + return cmd + python_requires = '>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*'