diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..2c7d17083 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +version: 2 +updates: + # Maintain dependencies for GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 000000000..4f6360b71 --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,67 @@ +--- +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: CodeQL +on: + push: + branches: [master] + pull_request: + # The branches below must be a subset of the branches above + branches: [master] + schedule: + - cron: 19 10 * * 6 +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + strategy: + fail-fast: false + matrix: + language: [python] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] + # Learn more: + # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + # queries: ./path/to/local/query, your-org/your-repo/queries@main + + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v3 + + # ℹī¸ Command-line programs to run using the OS shell. + # 📚 https://git.io/JvXDl + + # ✏ī¸ If the Autobuild fails above, remove it and uncomment the following three lines + # and modify them (or add more) to build your code if your project + # uses a compiled language + + #- run: | + # make bootstrap + # make release + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 000000000..c5bd66218 --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,206 @@ +name: CI/CD + +on: + push: + branches: ["master"] + pull_request: + branches: ["master"] + release: + types: [created] + branches: + - 'master' + workflow_dispatch: + +env: + FORCE_COLOR: "1" # Make tools pretty. + PIP_DISABLE_PIP_VERSION_CHECK: "1" + PIP_NO_PYTHON_VERSION_WARNING: "1" + PYTHON_LATEST: "3.12" + KAFKA_LATEST: "2.6.0" + + # For re-actors/checkout-python-sdist + sdist-artifact: python-package-distributions + +jobs: + + build-sdist: + name: đŸ“Ļ Build the source distribution + runs-on: ubuntu-latest + steps: + - name: Checkout project + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_LATEST }} + cache: pip + - run: python -m pip install build + name: Install core libraries for build and install + - name: Build artifacts + run: python -m build + - name: Upload built artifacts for testing + uses: actions/upload-artifact@v4 + with: + name: ${{ env.sdist-artifact }} + # NOTE: Exact expected file names are specified here + # NOTE: as a safety measure — if anything weird ends + # NOTE: up being in this dir or not all dists will be + # NOTE: produced, this will fail the workflow. + path: dist/${{ env.sdist-name }} + retention-days: 15 + + test-python: + name: Tests on ${{ matrix.python-version }} + needs: + - build-sdist + runs-on: ubuntu-latest + continue-on-error: ${{ matrix.experimental }} + strategy: + fail-fast: false + matrix: + python-version: + - "3.8" + - "3.9" + - "3.10" + - "3.11" + - "3.12" + - "pypy3.9" + experimental: [ false ] + steps: + - name: Checkout the source code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Setup java + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: 11 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: | + requirements-dev.txt + - name: Check Java installation + run: source travis_java_install.sh + - name: Pull Kafka releases + run: ./build_integration.sh + env: + PLATFORM: ${{ matrix.platform }} + KAFKA_VERSION: ${{ env.KAFKA_LATEST }} + # TODO: Cache releases to expedite testing + - name: Install dependencies + run: | + sudo apt install -y libsnappy-dev libzstd-dev + python -m pip install --upgrade pip + python -m pip install tox tox-gh-actions + pip install . + pip install -r requirements-dev.txt + - name: Test with tox + run: tox + env: + PLATFORM: ${{ matrix.platform }} + KAFKA_VERSION: ${{ env.KAFKA_LATEST }} + + test-kafka: + name: Tests for Kafka ${{ matrix.kafka-version }} (Python ${{ matrix.python-version }}) + needs: + - build-sdist + runs-on: ubuntu-latest + timeout-minutes: 15 + strategy: + fail-fast: false + matrix: + kafka-version: + - "0.9.0.1" + - "0.10.2.2" + - "0.11.0.2" + - "0.11.0.3" + - "1.1.1" + - "2.4.0" + - "2.5.0" + - "2.6.0" + python-version: ['3.12'] + experimental: [false] + include: + - kafka-version: '0.8.2.2' + experimental: true + python-version: "3.12" + - kafka-version: '0.8.2.2' + experimental: false + python-version: "3.10" + env: + PYTHON_LATEST: ${{ matrix.python-version }} + continue-on-error: ${{ matrix.experimental }} + steps: + - name: Checkout the source code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Setup java + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: 8 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: | + requirements-dev.txt + - name: Pull Kafka releases + run: ./build_integration.sh + env: + # This is fast enough as long as you pull only one release at a time, + # no need to worry about caching + PLATFORM: ${{ matrix.platform }} + KAFKA_VERSION: ${{ matrix.kafka-version }} + - name: Install dependencies + run: | + sudo apt install -y libsnappy-dev libzstd-dev + python -m pip install --upgrade pip + python -m pip install tox tox-gh-actions + pip install . + pip install -r requirements-dev.txt + - name: Test with tox + run: tox + env: + PLATFORM: ${{ matrix.platform }} + KAFKA_VERSION: ${{ matrix.kafka-version }} + + check: # This job does nothing and is only used for the branch protection + name: ✅ Ensure the required checks passing + if: always() + needs: + - build-sdist + - test-python + - test-kafka + runs-on: ubuntu-latest + steps: + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@release/v1 + with: + jobs: ${{ toJSON(needs) }} + publish: + name: đŸ“Ļ Publish to PyPI + runs-on: ubuntu-latest + needs: [build-sdist] + permissions: + id-token: write + environment: pypi + if: github.event_name == 'release' && github.event.action == 'created' + steps: + - name: Download the artifacts + uses: actions/download-artifact@v4 + with: + name: ${{ env.sdist-artifact }} + path: dist/${{ env.sdist-name }} + - name: Publish package to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.travis.yml b/.travis.yml index b98aa16b1..21e51f5ed 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,7 @@ python: - 2.7 - 3.4 - 3.7 + - 3.8 - pypy2.7-6.0 env: @@ -16,11 +17,13 @@ env: - KAFKA_VERSION=1.1.1 - KAFKA_VERSION=2.4.0 - KAFKA_VERSION=2.5.0 + - KAFKA_VERSION=2.6.0 addons: apt: packages: - libsnappy-dev + - libzstd-dev - openjdk-8-jdk cache: diff --git a/CHANGES.md b/CHANGES.md index 08e3eccdd..ccec6b5c3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,45 @@ +# 2.0.2 (Sep 29, 2020) + +Consumer +* KIP-54: Implement sticky partition assignment strategy (aynroot / PR #2057) +* Fix consumer deadlock when heartbeat thread request timeout (huangcuiyang / PR #2064) + +Compatibility +* Python 3.8 support (Photonios / PR #2088) + +Cleanups +* Bump dev requirements (jeffwidman / PR #2129) +* Fix crc32c deprecation warning (crc32c==2.1) (jeffwidman / PR #2128) +* Lint cleanup (jeffwidman / PR #2126) +* Fix initialization order in KafkaClient (pecalleja / PR #2119) +* Allow installing crc32c via extras (mishas / PR #2069) +* Remove unused imports (jameslamb / PR #2046) + +Admin Client +* Merge _find_coordinator_id methods (jeffwidman / PR #2127) +* Feature: delete consumergroups (swenzel / PR #2040) +* Allow configurable timeouts in admin client check version (sunnyakaxd / PR #2107) +* Enhancement for Kafka Admin Client's "Describe Consumer Group" (Apurva007 / PR #2035) + +Protocol +* Add support for zstd compression (gabriel-tincu / PR #2021) +* Add protocol support for brokers 1.1.0 - 2.5.0 (gabriel-tincu / PR #2038) +* Add ProduceRequest/ProduceResponse v6/v7/v8 (gabriel-tincu / PR #2020) +* Fix parsing NULL header values (kvfi / PR #2024) + +Tests +* Add 2.5.0 to automated CI tests (gabriel-tincu / PR #2038) +* Add 2.1.1 to build_integration (gabriel-tincu / PR #2019) + +Documentation / Logging / Errors +* Disable logging during producer object gc (gioele / PR #2043) +* Update example.py; use threading instead of multiprocessing (Mostafa-Elmenbawy / PR #2081) +* Fix typo in exception message (haracejacob / PR #2096) +* Add kafka.structs docstrings (Mostafa-Elmenbawy / PR #2080) +* Fix broken compatibility page link (anuragrana / PR #2045) +* Rename README to README.md (qhzxc0015 / PR #2055) +* Fix docs by adding SASL mention (jeffwidman / #1990) + # 2.0.1 (Feb 19, 2020) Admin Client @@ -371,7 +413,7 @@ Some of the major changes include: * SASL authentication is working (we think) * Removed several circular references to improve gc on close() -Thanks to all contributors -- the state of the kafka-python community is strong! +Thanks to all contributors -- the state of the kafka-python-ng community is strong! Detailed changelog are listed below: diff --git a/Makefile b/Makefile index b4dcbffc9..9d7d89f4d 100644 --- a/Makefile +++ b/Makefile @@ -20,20 +20,20 @@ test37: build-integration test27: build-integration KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) tox -e py27 -- $(FLAGS) -# Test using py.test directly if you want to use local python. Useful for other +# Test using pytest directly if you want to use local python. Useful for other # platforms that require manual installation for C libraries, ie. Windows. test-local: build-integration - KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) py.test \ + KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) pytest \ --pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF $(FLAGS) kafka test cov-local: build-integration - KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) py.test \ + KAFKA_VERSION=$(KAFKA_VERSION) SCALA_VERSION=$(SCALA_VERSION) pytest \ --pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF --cov=kafka \ --cov-config=.covrc --cov-report html $(FLAGS) kafka test @echo "open file://`pwd`/htmlcov/index.html" # Check the readme for syntax errors, which can lead to invalid formatting on -# PyPi homepage (https://pypi.python.org/pypi/kafka-python) +# PyPi homepage (https://pypi.python.org/pypi/kafka-python-ng) check-readme: python setup.py check -rms diff --git a/README.rst b/README.rst index bae567ba6..b7acfc8a2 100644 --- a/README.rst +++ b/README.rst @@ -1,22 +1,28 @@ Kafka Python client ------------------------ -.. image:: https://img.shields.io/badge/kafka-2.4%2C%202.3%2C%202.2%2C%202.1%2C%202.0%2C%201.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg - :target: https://kafka-python.readthedocs.io/compatibility.html -.. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg - :target: https://pypi.python.org/pypi/kafka-python -.. image:: https://coveralls.io/repos/dpkp/kafka-python/badge.svg?branch=master&service=github - :target: https://coveralls.io/github/dpkp/kafka-python?branch=master -.. image:: https://travis-ci.org/dpkp/kafka-python.svg?branch=master - :target: https://travis-ci.org/dpkp/kafka-python +.. image:: https://img.shields.io/badge/kafka-2.6%2C%202.5%2C%202.4%2C%202.3%2C%202.2%2C%202.1%2C%202.0%2C%201.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg + :target: https://kafka-python-ng.readthedocs.io/en/master/compatibility.html +.. image:: https://img.shields.io/pypi/pyversions/kafka-python-ng.svg + :target: https://pypi.python.org/pypi/kafka-python-ng +.. image:: https://coveralls.io/repos/wbarnha/kafka-python-ng/badge.svg?branch=master&service=github + :target: https://coveralls.io/github/wbarnha/kafka-python-ng?branch=master .. image:: https://img.shields.io/badge/license-Apache%202-blue.svg - :target: https://github.com/dpkp/kafka-python/blob/master/LICENSE + :target: https://github.com/wbarnha/kafka-python-ng/blob/master/LICENSE +.. image:: https://img.shields.io/pypi/dw/kafka-python-ng.svg + :target: https://pypistats.org/packages/kafka-python-ng +.. image:: https://img.shields.io/pypi/v/kafka-python.svg + :target: https://pypi.org/project/kafka-python-ng +.. image:: https://img.shields.io/pypi/implementation/kafka-python-ng + :target: https://github.com/wbarnha/kafka-python-ng/blob/master/setup.py + + Python client for the Apache Kafka distributed stream processing system. -kafka-python is designed to function much like the official java client, with a +kafka-python-ng is designed to function much like the official java client, with a sprinkling of pythonic interfaces (e.g., consumer iterators). -kafka-python is best used with newer brokers (0.9+), but is backwards-compatible with +kafka-python-ng is best used with newer brokers (0.9+), but is backwards-compatible with older versions (to 0.8.0). Some features will only be enabled on newer brokers. For example, fully coordinated consumer groups -- i.e., dynamic partition assignment to multiple consumers in the same group -- requires use of 0.9+ kafka @@ -26,13 +32,20 @@ check code (perhaps using zookeeper or consul). For older brokers, you can achieve something similar by manually assigning different partitions to each consumer instance with config management tools like chef, ansible, etc. This approach will work fine, though it does not support rebalancing on failures. -See + +See https://kafka-python.readthedocs.io/en/master/compatibility.html + for more details. Please note that the master branch may contain unreleased features. For release documentation, please see readthedocs and/or python's inline help. ->>> pip install kafka-python + +.. code-block:: bash + + $ pip install kafka-python-ng + + KafkaConsumer ************* @@ -41,88 +54,124 @@ KafkaConsumer is a high-level message consumer, intended to operate as similarly as possible to the official java client. Full support for coordinated consumer groups requires use of kafka brokers that support the Group APIs: kafka v0.9+. -See + +See https://kafka-python.readthedocs.io/en/master/apidoc/KafkaConsumer.html + for API and configuration details. The consumer iterator returns ConsumerRecords, which are simple namedtuples that expose basic message attributes: topic, partition, offset, key, and value: ->>> from kafka import KafkaConsumer ->>> consumer = KafkaConsumer('my_favorite_topic') ->>> for msg in consumer: -... print (msg) - ->>> # join a consumer group for dynamic partition assignment and offset commits ->>> from kafka import KafkaConsumer ->>> consumer = KafkaConsumer('my_favorite_topic', group_id='my_favorite_group') ->>> for msg in consumer: -... print (msg) - ->>> # manually assign the partition list for the consumer ->>> from kafka import TopicPartition ->>> consumer = KafkaConsumer(bootstrap_servers='localhost:1234') ->>> consumer.assign([TopicPartition('foobar', 2)]) ->>> msg = next(consumer) - ->>> # Deserialize msgpack-encoded values ->>> consumer = KafkaConsumer(value_deserializer=msgpack.loads) ->>> consumer.subscribe(['msgpackfoo']) ->>> for msg in consumer: -... assert isinstance(msg.value, dict) - ->>> # Access record headers. The returned value is a list of tuples ->>> # with str, bytes for key and value ->>> for msg in consumer: -... print (msg.headers) - ->>> # Get consumer metrics ->>> metrics = consumer.metrics() +.. code-block:: python + + from kafka import KafkaConsumer + consumer = KafkaConsumer('my_favorite_topic') + for msg in consumer: + print (msg) + +.. code-block:: python + + # join a consumer group for dynamic partition assignment and offset commits + from kafka import KafkaConsumer + consumer = KafkaConsumer('my_favorite_topic', group_id='my_favorite_group') + for msg in consumer: + print (msg) + +.. code-block:: python + + # manually assign the partition list for the consumer + from kafka import TopicPartition + consumer = KafkaConsumer(bootstrap_servers='localhost:1234') + consumer.assign([TopicPartition('foobar', 2)]) + msg = next(consumer) + +.. code-block:: python + + # Deserialize msgpack-encoded values + consumer = KafkaConsumer(value_deserializer=msgpack.loads) + consumer.subscribe(['msgpackfoo']) + for msg in consumer: + assert isinstance(msg.value, dict) + +.. code-block:: python + + # Access record headers. The returned value is a list of tuples + # with str, bytes for key and value + for msg in consumer: + print (msg.headers) + +.. code-block:: python + + # Get consumer metrics + metrics = consumer.metrics() + KafkaProducer ************* KafkaProducer is a high-level, asynchronous message producer. The class is intended to operate as similarly as possible to the official java client. -See + +See https://kafka-python.readthedocs.io/en/master/apidoc/KafkaProducer.html + for more details. ->>> from kafka import KafkaProducer ->>> producer = KafkaProducer(bootstrap_servers='localhost:1234') ->>> for _ in range(100): -... producer.send('foobar', b'some_message_bytes') +.. code-block:: python + + from kafka import KafkaProducer + producer = KafkaProducer(bootstrap_servers='localhost:1234') + for _ in range(100): + producer.send('foobar', b'some_message_bytes') + +.. code-block:: python + + # Block until a single message is sent (or timeout) + future = producer.send('foobar', b'another_message') + result = future.get(timeout=60) + +.. code-block:: python + + # Block until all pending messages are at least put on the network + # NOTE: This does not guarantee delivery or success! It is really + # only useful if you configure internal batching using linger_ms + producer.flush() + +.. code-block:: python + + # Use a key for hashed-partitioning + producer.send('foobar', key=b'foo', value=b'bar') + +.. code-block:: python ->>> # Block until a single message is sent (or timeout) ->>> future = producer.send('foobar', b'another_message') ->>> result = future.get(timeout=60) + # Serialize json messages + import json + producer = KafkaProducer(value_serializer=lambda v: json.dumps(v).encode('utf-8')) + producer.send('fizzbuzz', {'foo': 'bar'}) ->>> # Block until all pending messages are at least put on the network ->>> # NOTE: This does not guarantee delivery or success! It is really ->>> # only useful if you configure internal batching using linger_ms ->>> producer.flush() +.. code-block:: python ->>> # Use a key for hashed-partitioning ->>> producer.send('foobar', key=b'foo', value=b'bar') + # Serialize string keys + producer = KafkaProducer(key_serializer=str.encode) + producer.send('flipflap', key='ping', value=b'1234') ->>> # Serialize json messages ->>> import json ->>> producer = KafkaProducer(value_serializer=lambda v: json.dumps(v).encode('utf-8')) ->>> producer.send('fizzbuzz', {'foo': 'bar'}) +.. code-block:: python ->>> # Serialize string keys ->>> producer = KafkaProducer(key_serializer=str.encode) ->>> producer.send('flipflap', key='ping', value=b'1234') + # Compress messages + producer = KafkaProducer(compression_type='gzip') + for i in range(1000): + producer.send('foobar', b'msg %d' % i) ->>> # Compress messages ->>> producer = KafkaProducer(compression_type='gzip') ->>> for i in range(1000): -... producer.send('foobar', b'msg %d' % i) +.. code-block:: python ->>> # Include record headers. The format is list of tuples with string key ->>> # and bytes value. ->>> producer.send('foobar', value=b'c29tZSB2YWx1ZQ==', headers=[('content-encoding', b'base64')]) + # Include record headers. The format is list of tuples with string key + # and bytes value. + producer.send('foobar', value=b'c29tZSB2YWx1ZQ==', headers=[('content-encoding', b'base64')]) + +.. code-block:: python + + # Get producer performance metrics + metrics = producer.metrics() ->>> # Get producer performance metrics ->>> metrics = producer.metrics() Thread safety ************* @@ -133,29 +182,40 @@ KafkaConsumer which cannot. While it is possible to use the KafkaConsumer in a thread-local manner, multiprocessing is recommended. + Compression *********** -kafka-python supports gzip compression/decompression natively. To produce or consume lz4 -compressed messages, you should install python-lz4 (pip install lz4). -To enable snappy compression/decompression install python-snappy (also requires snappy library). -See -for more information. +kafka-python-ng supports the following compression formats: + +- gzip +- LZ4 +- Snappy +- Zstandard (zstd) + +gzip is supported natively, the others require installing additional libraries. + +See https://kafka-python.readthedocs.io/en/master/install.html for more information. + + Optimized CRC32 Validation ************************** -Kafka uses CRC32 checksums to validate messages. kafka-python includes a pure +Kafka uses CRC32 checksums to validate messages. kafka-python-ng includes a pure python implementation for compatibility. To improve performance for high-throughput applications, kafka-python will use `crc32c` for optimized native code if installed. -See https://pypi.org/project/crc32c/ +See https://kafka-python.readthedocs.io/en/master/install.html for installation instructions. + +See https://pypi.org/project/crc32c/ for details on the underlying crc32c lib. + Protocol ******** -A secondary goal of kafka-python is to provide an easy-to-use protocol layer +A secondary goal of kafka-python-ng is to provide an easy-to-use protocol layer for interacting with kafka brokers via the python repl. This is useful for testing, probing, and general experimentation. The protocol support is leveraged to enable a KafkaClient.check_version() method that probes a kafka broker and attempts to identify which version it is running -(0.8.0 to 2.4+). +(0.8.0 to 2.6+). diff --git a/docs/Makefile b/docs/Makefile index b27cf7742..31e74e1aa 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -85,17 +85,17 @@ qthelp: @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/kafka-python.qhcp" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/kafka-python-ng.qhcp" @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/kafka-python.qhc" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/kafka-python-ng.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/kafka-python" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/kafka-python" + @echo "# mkdir -p $$HOME/.local/share/devhelp/kafka-python-ng" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/kafka-python-ng" @echo "# devhelp" epub: diff --git a/docs/apidoc/modules.rst b/docs/apidoc/modules.rst index 066fc6523..29be3486f 100644 --- a/docs/apidoc/modules.rst +++ b/docs/apidoc/modules.rst @@ -1,4 +1,4 @@ -kafka-python API +kafka-python-ng API **************** .. toctree:: diff --git a/docs/changelog.rst b/docs/changelog.rst index bcaaa2785..9d3cb6512 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -2,6 +2,57 @@ Changelog ========= +2.0.2 (Sep 29, 2020) +#################### + +Consumer +-------- +* KIP-54: Implement sticky partition assignment strategy (aynroot / PR #2057) +* Fix consumer deadlock when heartbeat thread request timeout (huangcuiyang / PR #2064) + +Compatibility +------------- +* Python 3.8 support (Photonios / PR #2088) + +Cleanups +-------- +* Bump dev requirements (jeffwidman / PR #2129) +* Fix crc32c deprecation warning (crc32c==2.1) (jeffwidman / PR #2128) +* Lint cleanup (jeffwidman / PR #2126) +* Fix initialization order in KafkaClient (pecalleja / PR #2119) +* Allow installing crc32c via extras (mishas / PR #2069) +* Remove unused imports (jameslamb / PR #2046) + +Admin Client +------------ +* Merge _find_coordinator_id methods (jeffwidman / PR #2127) +* Feature: delete consumergroups (swenzel / PR #2040) +* Allow configurable timeouts in admin client check version (sunnyakaxd / PR #2107) +* Enhancement for Kafka Admin Client's "Describe Consumer Group" (Apurva007 / PR #2035) + +Protocol +-------- +* Add support for zstd compression (gabriel-tincu / PR #2021) +* Add protocol support for brokers 1.1.0 - 2.5.0 (gabriel-tincu / PR #2038) +* Add ProduceRequest/ProduceResponse v6/v7/v8 (gabriel-tincu / PR #2020) +* Fix parsing NULL header values (kvfi / PR #2024) + +Tests +----- +* Add 2.5.0 to automated CI tests (gabriel-tincu / PR #2038) +* Add 2.1.1 to build_integration (gabriel-tincu / PR #2019) + +Documentation / Logging / Errors +-------------------------------- +* Disable logging during producer object gc (gioele / PR #2043) +* Update example.py; use threading instead of multiprocessing (Mostafa-Elmenbawy / PR #2081) +* Fix typo in exception message (haracejacob / PR #2096) +* Add kafka.structs docstrings (Mostafa-Elmenbawy / PR #2080) +* Fix broken compatibility page link (anuragrana / PR #2045) +* Rename README to README.md (qhzxc0015 / PR #2055) +* Fix docs by adding SASL mention (jeffwidman / #1990) + + 2.0.1 (Feb 19, 2020) #################### @@ -433,7 +484,7 @@ Some of the major changes include: * SASL authentication is working (we think) * Removed several circular references to improve gc on close() -Thanks to all contributors -- the state of the kafka-python community is strong! +Thanks to all contributors -- the state of the kafka-python-ng community is strong! Detailed changelog are listed below: diff --git a/docs/compatibility.rst b/docs/compatibility.rst index 93be6fd6e..e8e1342c3 100644 --- a/docs/compatibility.rst +++ b/docs/compatibility.rst @@ -1,21 +1,21 @@ Compatibility ------------- -.. image:: https://img.shields.io/badge/kafka-2.4%2C%202.3%2C%202.2%2C%202.1%2C%202.0%2C%201.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg - :target: https://kafka-python.readthedocs.io/compatibility.html -.. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg - :target: https://pypi.python.org/pypi/kafka-python +.. image:: https://img.shields.io/badge/kafka-2.6%2C%202.5%2C%202.4%2C%202.3%2C%202.2%2C%202.1%2C%202.0%2C%201.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg + :target: https://kafka-python-ng.readthedocs.io/compatibility.html +.. image:: https://img.shields.io/pypi/pyversions/kafka-python-ng.svg + :target: https://pypi.python.org/pypi/kafka-python-ng -kafka-python is compatible with (and tested against) broker versions 2.4 -through 0.8.0 . kafka-python is not compatible with the 0.8.2-beta release. +kafka-python-ng is compatible with (and tested against) broker versions 2.6 +through 0.8.0 . kafka-python-ng is not compatible with the 0.8.2-beta release. -Because the kafka server protocol is backwards compatible, kafka-python is +Because the kafka server protocol is backwards compatible, kafka-python-ng is expected to work with newer broker releases as well. -Although kafka-python is tested and expected to work on recent broker versions, +Although kafka-python-ng is tested and expected to work on recent broker versions, not all features are supported. Specifically, authentication codecs, and transactional producer/consumer support are not fully implemented. PRs welcome! -kafka-python is tested on python 2.7, 3.4, 3.7, and pypy2.7. +kafka-python-ng is tested on python 2.7, 3.4, 3.7, 3.8 and pypy2.7. -Builds and tests via Travis-CI. See https://travis-ci.org/dpkp/kafka-python +Builds and tests via Travis-CI. See https://travis-ci.org/wbarnha/kafka-python-ng diff --git a/docs/conf.py b/docs/conf.py index efa8d0807..e5b013b0d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# kafka-python documentation build configuration file, created by +# kafka-python-ng documentation build configuration file, created by # sphinx-quickstart on Sun Jan 4 12:21:50 2015. # # This file is execfile()d with the current directory set to its @@ -47,7 +47,7 @@ master_doc = 'index' # General information about the project. -project = u'kafka-python' +project = u'kafka-python-ng' copyright = u'2016 -- Dana Powers, David Arthur, and Contributors' # The version info for the project you're documenting, acts as replacement for @@ -201,7 +201,7 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - ('index', 'kafka-python.tex', u'kafka-python Documentation', + ('index', 'kafka-python-ng.tex', u'kafka-python-ng Documentation', u'Dana Powers', 'manual'), ] @@ -231,7 +231,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'kafka-python', u'kafka-python Documentation', + ('index', 'kafka-python-ng', u'kafka-python-ng Documentation', [u'Dana Powers'], 1) ] @@ -245,7 +245,7 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'kafka-python', u'kafka-python Documentation', + ('index', 'kafka-python-ng', u'kafka-python-ng Documentation', u'Dana Powers', 'kafka-python', 'One line description of project.', 'Miscellaneous'), ] diff --git a/docs/index.rst b/docs/index.rst index fa6f93c50..779ad997b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,16 +1,16 @@ -kafka-python +kafka-python-ng ############ -.. image:: https://img.shields.io/badge/kafka-2.4%2C%202.3%2C%202.2%2C%202.1%2C%202.0%2C%201.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg - :target: https://kafka-python.readthedocs.io/compatibility.html +.. image:: https://img.shields.io/badge/kafka-2.6%2C%202.5%2C%202.4%2C%202.3%2C%202.2%2C%202.1%2C%202.0%2C%201.1%2C%201.0%2C%200.11%2C%200.10%2C%200.9%2C%200.8-brightgreen.svg + :target: https://kafka-python.readthedocs.io/en/master/compatibility.html .. image:: https://img.shields.io/pypi/pyversions/kafka-python.svg - :target: https://pypi.python.org/pypi/kafka-python -.. image:: https://coveralls.io/repos/dpkp/kafka-python/badge.svg?branch=master&service=github - :target: https://coveralls.io/github/dpkp/kafka-python?branch=master -.. image:: https://travis-ci.org/dpkp/kafka-python.svg?branch=master - :target: https://travis-ci.org/dpkp/kafka-python + :target: https://pypi.python.org/pypi/kafka-python-ng +.. image:: https://coveralls.io/repos/wbarnha/kafka-python-ng/badge.svg?branch=master&service=github + :target: https://coveralls.io/github/wbarnha/kafka-python-ng?branch=master +.. image:: https://travis-ci.org/wbarnha/kafka-python-ng.svg?branch=master + :target: https://travis-ci.org/wbarnha/kafka-python-ng .. image:: https://img.shields.io/badge/license-Apache%202-blue.svg - :target: https://github.com/dpkp/kafka-python/blob/master/LICENSE + :target: https://github.com/wbarnha/kafka-python-ng/blob/master/LICENSE Python client for the Apache Kafka distributed stream processing system. kafka-python is designed to function much like the official java client, with a @@ -31,7 +31,12 @@ failures. See `Compatibility `_ for more details. Please note that the master branch may contain unreleased features. For release documentation, please see readthedocs and/or python's inline help. ->>> pip install kafka-python + +.. code:: bash + + pip install kafka-python-ng + + KafkaConsumer ************* @@ -46,28 +51,36 @@ See `KafkaConsumer `_ for API and configuration detai The consumer iterator returns ConsumerRecords, which are simple namedtuples that expose basic message attributes: topic, partition, offset, key, and value: ->>> from kafka import KafkaConsumer ->>> consumer = KafkaConsumer('my_favorite_topic') ->>> for msg in consumer: -... print (msg) +.. code:: python + + from kafka import KafkaConsumer + consumer = KafkaConsumer('my_favorite_topic') + for msg in consumer: + print (msg) ->>> # join a consumer group for dynamic partition assignment and offset commits ->>> from kafka import KafkaConsumer ->>> consumer = KafkaConsumer('my_favorite_topic', group_id='my_favorite_group') ->>> for msg in consumer: -... print (msg) +.. code:: python ->>> # manually assign the partition list for the consumer ->>> from kafka import TopicPartition ->>> consumer = KafkaConsumer(bootstrap_servers='localhost:1234') ->>> consumer.assign([TopicPartition('foobar', 2)]) ->>> msg = next(consumer) + # join a consumer group for dynamic partition assignment and offset commits + from kafka import KafkaConsumer + consumer = KafkaConsumer('my_favorite_topic', group_id='my_favorite_group') + for msg in consumer: + print (msg) ->>> # Deserialize msgpack-encoded values ->>> consumer = KafkaConsumer(value_deserializer=msgpack.loads) ->>> consumer.subscribe(['msgpackfoo']) ->>> for msg in consumer: -... assert isinstance(msg.value, dict) +.. code:: python + + # manually assign the partition list for the consumer + from kafka import TopicPartition + consumer = KafkaConsumer(bootstrap_servers='localhost:1234') + consumer.assign([TopicPartition('foobar', 2)]) + msg = next(consumer) + +.. code:: python + + # Deserialize msgpack-encoded values + consumer = KafkaConsumer(value_deserializer=msgpack.loads) + consumer.subscribe(['msgpackfoo']) + for msg in consumer: + assert isinstance(msg.value, dict) KafkaProducer @@ -77,36 +90,50 @@ KafkaProducer The class is intended to operate as similarly as possible to the official java client. See `KafkaProducer `_ for more details. ->>> from kafka import KafkaProducer ->>> producer = KafkaProducer(bootstrap_servers='localhost:1234') ->>> for _ in range(100): -... producer.send('foobar', b'some_message_bytes') +.. code:: python + + from kafka import KafkaProducer + producer = KafkaProducer(bootstrap_servers='localhost:1234') + for _ in range(100): + producer.send('foobar', b'some_message_bytes') + +.. code:: python ->>> # Block until a single message is sent (or timeout) ->>> future = producer.send('foobar', b'another_message') ->>> result = future.get(timeout=60) + # Block until a single message is sent (or timeout) + future = producer.send('foobar', b'another_message') + result = future.get(timeout=60) ->>> # Block until all pending messages are at least put on the network ->>> # NOTE: This does not guarantee delivery or success! It is really ->>> # only useful if you configure internal batching using linger_ms ->>> producer.flush() +.. code:: python ->>> # Use a key for hashed-partitioning ->>> producer.send('foobar', key=b'foo', value=b'bar') + # Block until all pending messages are at least put on the network + # NOTE: This does not guarantee delivery or success! It is really + # only useful if you configure internal batching using linger_ms + producer.flush() ->>> # Serialize json messages ->>> import json ->>> producer = KafkaProducer(value_serializer=lambda v: json.dumps(v).encode('utf-8')) ->>> producer.send('fizzbuzz', {'foo': 'bar'}) +.. code:: python ->>> # Serialize string keys ->>> producer = KafkaProducer(key_serializer=str.encode) ->>> producer.send('flipflap', key='ping', value=b'1234') + # Use a key for hashed-partitioning + producer.send('foobar', key=b'foo', value=b'bar') ->>> # Compress messages ->>> producer = KafkaProducer(compression_type='gzip') ->>> for i in range(1000): -... producer.send('foobar', b'msg %d' % i) +.. code:: python + + # Serialize json messages + import json + producer = KafkaProducer(value_serializer=lambda v: json.dumps(v).encode('utf-8')) + producer.send('fizzbuzz', {'foo': 'bar'}) + +.. code:: python + + # Serialize string keys + producer = KafkaProducer(key_serializer=str.encode) + producer.send('flipflap', key='ping', value=b'1234') + +.. code:: python + + # Compress messages + producer = KafkaProducer(compression_type='gzip') + for i in range(1000): + producer.send('foobar', b'msg %d' % i) Thread safety @@ -122,10 +149,25 @@ multiprocessing is recommended. Compression *********** -kafka-python supports gzip compression/decompression natively. To produce or -consume lz4 compressed messages, you should install python-lz4 (pip install lz4). -To enable snappy, install python-snappy (also requires snappy library). -See `Installation `_ for more information. +kafka-python supports the following compression formats: + + - gzip + - LZ4 + - Snappy + - Zstandard (zstd) + +gzip is supported natively, the others require installing additional libraries. +See `Install `_ for more information. + + +Optimized CRC32 Validation +************************** + +Kafka uses CRC32 checksums to validate messages. kafka-python includes a pure +python implementation for compatibility. To improve performance for high-throughput +applications, kafka-python will use `crc32c` for optimized native code if installed. +See `Install `_ for installation instructions and +https://pypi.org/project/crc32c/ for details on the underlying crc32c lib. Protocol @@ -136,7 +178,7 @@ for interacting with kafka brokers via the python repl. This is useful for testing, probing, and general experimentation. The protocol support is leveraged to enable a :meth:`~kafka.KafkaClient.check_version()` method that probes a kafka broker and -attempts to identify which version it is running (0.8.0 to 2.4+). +attempts to identify which version it is running (0.8.0 to 2.6+). .. toctree:: diff --git a/docs/install.rst b/docs/install.rst index 200ca17e1..6ed917cd4 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -9,9 +9,9 @@ Pip: .. code:: bash - pip install kafka-python + pip install kafka-python-ng -Releases are also listed at https://github.com/dpkp/kafka-python/releases +Releases are also listed at https://github.com/wbarnha/kafka-python-ng/releases Bleeding-Edge @@ -19,24 +19,37 @@ Bleeding-Edge .. code:: bash - git clone https://github.com/dpkp/kafka-python - pip install ./kafka-python + git clone https://github.com/wbarnha/kafka-python-ng + pip install ./kafka-python-ng -Optional LZ4 install +Optional crc32c install +*********************** +Highly recommended if you are using Kafka 11+ brokers. For those `kafka-python-ng` +uses a new message protocol version, that requires calculation of `crc32c`, +which differs from the `zlib.crc32` hash implementation. By default `kafka-python-ng` +calculates it in pure python, which is quite slow. To speed it up we optionally +support https://pypi.python.org/pypi/crc32c package if it's installed. + +.. code:: bash + + pip install 'kafka-python-ng[crc32c]' + + +Optional ZSTD install ******************** -To enable LZ4 compression/decompression, install python-lz4: +To enable ZSTD compression/decompression, install python-zstandard: ->>> pip install lz4 +>>> pip install 'kafka-python-ng[zstd]' -Optional crc32c install +Optional LZ4 install ******************** -To enable optimized CRC32 checksum validation, install crc32c: +To enable LZ4 compression/decompression, install python-lz4: ->>> pip install crc32c +>>> pip install 'kafka-python-ng[lz4]' Optional Snappy install @@ -77,17 +90,4 @@ Install the `python-snappy` module .. code:: bash - pip install python-snappy - - -Optional crc32c install -*********************** -Highly recommended if you are using Kafka 11+ brokers. For those `kafka-python` -uses a new message protocol version, that requires calculation of `crc32c`, -which differs from `zlib.crc32` hash implementation. By default `kafka-python` -calculates it in pure python, which is quite slow. To speed it up we optionally -support https://pypi.python.org/pypi/crc32c package if it's installed. - -.. code:: bash - - pip install crc32c + pip install 'kafka-python-ng[snappy]' diff --git a/docs/license.rst b/docs/license.rst index e9d5c9adb..016a916ba 100644 --- a/docs/license.rst +++ b/docs/license.rst @@ -2,9 +2,9 @@ License ------- .. image:: https://img.shields.io/badge/license-Apache%202-blue.svg - :target: https://github.com/dpkp/kafka-python/blob/master/LICENSE + :target: https://github.com/wbarnha/kafka-python-ng/blob/master/LICENSE -Apache License, v2.0. See `LICENSE `_. +Apache License, v2.0. See `LICENSE `_. Copyright 2016, Dana Powers, David Arthur, and Contributors -(See `AUTHORS `_). +(See `AUTHORS `_). diff --git a/docs/support.rst b/docs/support.rst index 63d4a86a2..25014b3fd 100644 --- a/docs/support.rst +++ b/docs/support.rst @@ -1,7 +1,7 @@ Support ------- -For support, see github issues at https://github.com/dpkp/kafka-python +For support, see github issues at https://github.com/wbarnha/kafka-python-ng Limited IRC chat at #kafka-python on freenode (general chat is #apache-kafka). diff --git a/docs/tests.rst b/docs/tests.rst index 561179ca5..763c2e54d 100644 --- a/docs/tests.rst +++ b/docs/tests.rst @@ -1,17 +1,17 @@ Tests ===== -.. image:: https://coveralls.io/repos/dpkp/kafka-python/badge.svg?branch=master&service=github - :target: https://coveralls.io/github/dpkp/kafka-python?branch=master -.. image:: https://travis-ci.org/dpkp/kafka-python.svg?branch=master - :target: https://travis-ci.org/dpkp/kafka-python +.. image:: https://coveralls.io/repos/wbarnha/kafka-python-ng/badge.svg?branch=master&service=github + :target: https://coveralls.io/github/wbarnha/kafka-python-ng?branch=master +.. image:: https://travis-ci.org/wbarnha/kafka-python-ng.svg?branch=master + :target: https://travis-ci.org/wbarnha/kafka-python-ng Test environments are managed via tox. The test suite is run via pytest. Linting is run via pylint, but is generally skipped on pypy due to pylint compatibility / performance issues. -For test coverage details, see https://coveralls.io/github/dpkp/kafka-python +For test coverage details, see https://coveralls.io/github/wbarnha/kafka-python-ng The test suite includes unit tests that mock network interfaces, as well as integration tests that setup and teardown kafka broker (and zookeeper) diff --git a/docs/usage.rst b/docs/usage.rst index 1cf1aa414..047bbad77 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -8,6 +8,8 @@ KafkaConsumer .. code:: python from kafka import KafkaConsumer + import json + import msgpack # To consume latest messages and auto-commit offsets consumer = KafkaConsumer('my-topic', @@ -57,6 +59,8 @@ KafkaProducer from kafka import KafkaProducer from kafka.errors import KafkaError + import msgpack + import json producer = KafkaProducer(bootstrap_servers=['broker1:1234']) @@ -108,3 +112,52 @@ KafkaProducer # configure multiple retries producer = KafkaProducer(retries=5) + + +ClusterMetadata +============= +.. code:: python + + from kafka.cluster import ClusterMetadata + + clusterMetadata = ClusterMetadata(bootstrap_servers=['broker1:1234']) + + # get all brokers metadata + print(clusterMetadata.brokers()) + + # get specific broker metadata + print(clusterMetadata.broker_metadata('bootstrap-0')) + + # get all partitions of a topic + print(clusterMetadata.partitions_for_topic("topic")) + + # list topics + print(clusterMetadata.topics()) + + +KafkaAdminClient +============= +.. code:: python + from kafka import KafkaAdminClient + from kafka.admin import NewTopic + + admin = KafkaAdminClient(bootstrap_servers=['broker1:1234']) + + # create a new topic + topics_list = [] + topics_list.append(NewTopic(name="testtopic", num_partitions=1, replication_factor=1)) + admin.create_topics(topics_list,timeout_ms=None, validate_only=False) + + # delete a topic + admin.delete_topics(['testtopic']) + + # list consumer groups + print(admin.list_consumer_groups()) + + # get consumer group details + print(admin.describe_consumer_groups('cft-plt-qa.connect')) + + # get consumer group offset + print(admin.list_consumer_group_offsets('cft-plt-qa.connect')) + + diff --git a/example.py b/example.py index dac97b751..9907450f6 100755 --- a/example.py +++ b/example.py @@ -1,15 +1,15 @@ #!/usr/bin/env python -import threading, logging, time -import multiprocessing +import threading, time -from kafka import KafkaConsumer, KafkaProducer +from kafka import KafkaAdminClient, KafkaConsumer, KafkaProducer +from kafka.admin import NewTopic class Producer(threading.Thread): def __init__(self): threading.Thread.__init__(self) self.stop_event = threading.Event() - + def stop(self): self.stop_event.set() @@ -23,14 +23,15 @@ def run(self): producer.close() -class Consumer(multiprocessing.Process): + +class Consumer(threading.Thread): def __init__(self): - multiprocessing.Process.__init__(self) - self.stop_event = multiprocessing.Event() - + threading.Thread.__init__(self) + self.stop_event = threading.Event() + def stop(self): self.stop_event.set() - + def run(self): consumer = KafkaConsumer(bootstrap_servers='localhost:9092', auto_offset_reset='earliest', @@ -44,29 +45,38 @@ def run(self): break consumer.close() - - + + def main(): + # Create 'my-topic' Kafka topic + try: + admin = KafkaAdminClient(bootstrap_servers='localhost:9092') + + topic = NewTopic(name='my-topic', + num_partitions=1, + replication_factor=1) + admin.create_topics([topic]) + except Exception: + pass + tasks = [ Producer(), Consumer() ] + # Start threads of a publisher/producer and a subscriber/consumer to 'my-topic' Kafka topic for t in tasks: t.start() time.sleep(10) - + + # Stop threads for task in tasks: task.stop() for task in tasks: task.join() - - + + if __name__ == "__main__": - logging.basicConfig( - format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s', - level=logging.INFO - ) main() diff --git a/kafka/__init__.py b/kafka/__init__.py index d5e30affa..685593ce3 100644 --- a/kafka/__init__.py +++ b/kafka/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - __title__ = 'kafka' from kafka.version import __version__ __author__ = 'Dana Powers' diff --git a/kafka/admin/__init__.py b/kafka/admin/__init__.py index c240fc6d0..c67fb9e6a 100644 --- a/kafka/admin/__init__.py +++ b/kafka/admin/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from kafka.admin.config_resource import ConfigResource, ConfigResourceType from kafka.admin.client import KafkaAdminClient from kafka.admin.acl_resource import (ACL, ACLFilter, ResourcePattern, ResourcePatternFilter, ACLOperation, diff --git a/kafka/admin/acl_resource.py b/kafka/admin/acl_resource.py index fd997a10a..fbc84be60 100644 --- a/kafka/admin/acl_resource.py +++ b/kafka/admin/acl_resource.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import from kafka.errors import IllegalArgumentError # enum in stdlib as of py3.4 @@ -69,7 +68,7 @@ class ACLResourcePatternType(IntEnum): PREFIXED = 4 -class ACLFilter(object): +class ACLFilter: """Represents a filter to use with describing and deleting ACLs The difference between this class and the ACL class is mainly that @@ -161,7 +160,7 @@ def __init__( permission_type, resource_pattern ): - super(ACL, self).__init__(principal, host, operation, permission_type, resource_pattern) + super().__init__(principal, host, operation, permission_type, resource_pattern) self.validate() def validate(self): @@ -173,7 +172,7 @@ def validate(self): raise IllegalArgumentError("resource_pattern must be a ResourcePattern object") -class ResourcePatternFilter(object): +class ResourcePatternFilter: def __init__( self, resource_type, @@ -232,7 +231,7 @@ def __init__( resource_name, pattern_type=ACLResourcePatternType.LITERAL ): - super(ResourcePattern, self).__init__(resource_type, resource_name, pattern_type) + super().__init__(resource_type, resource_name, pattern_type) self.validate() def validate(self): @@ -240,5 +239,5 @@ def validate(self): raise IllegalArgumentError("resource_type cannot be ANY") if self.pattern_type in [ACLResourcePatternType.ANY, ACLResourcePatternType.MATCH]: raise IllegalArgumentError( - "pattern_type cannot be {} on a concrete ResourcePattern".format(self.pattern_type.name) + f"pattern_type cannot be {self.pattern_type.name} on a concrete ResourcePattern" ) diff --git a/kafka/admin/client.py b/kafka/admin/client.py index d0fa84560..204c47b7c 100644 --- a/kafka/admin/client.py +++ b/kafka/admin/client.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from collections import defaultdict import copy import logging @@ -8,7 +6,10 @@ from . import ConfigResourceType from kafka.vendor import six +from kafka.admin.acl_resource import ACLOperation, ACLPermissionType, ACLFilter, ACL, ResourcePattern, ResourceType, \ + ACLResourcePatternType from kafka.client_async import KafkaClient, selectors +from kafka.coordinator.protocol import ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment, ConsumerProtocol import kafka.errors as Errors from kafka.errors import ( IncompatibleBrokerVersion, KafkaConfigurationError, NotControllerError, @@ -16,19 +17,20 @@ from kafka.metrics import MetricConfig, Metrics from kafka.protocol.admin import ( CreateTopicsRequest, DeleteTopicsRequest, DescribeConfigsRequest, AlterConfigsRequest, CreatePartitionsRequest, - ListGroupsRequest, DescribeGroupsRequest, DescribeAclsRequest, CreateAclsRequest, DeleteAclsRequest) + ListGroupsRequest, DescribeGroupsRequest, DescribeAclsRequest, CreateAclsRequest, DeleteAclsRequest, + DeleteGroupsRequest +) from kafka.protocol.commit import GroupCoordinatorRequest, OffsetFetchRequest from kafka.protocol.metadata import MetadataRequest -from kafka.structs import TopicPartition, OffsetAndMetadata -from kafka.admin.acl_resource import ACLOperation, ACLPermissionType, ACLFilter, ACL, ResourcePattern, ResourceType, \ - ACLResourcePatternType +from kafka.protocol.types import Array +from kafka.structs import TopicPartition, OffsetAndMetadata, MemberInformation, GroupInformation from kafka.version import __version__ log = logging.getLogger(__name__) -class KafkaAdminClient(object): +class KafkaAdminClient: """A class for administering the Kafka cluster. Warning: @@ -142,6 +144,7 @@ class KafkaAdminClient(object): sasl mechanism handshake. Default: one of bootstrap servers sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider instance. (See kafka.oauth.abstract). Default: None + kafka_client (callable): Custom class / callable for creating KafkaClient instances """ DEFAULT_CONFIG = { @@ -182,13 +185,14 @@ class KafkaAdminClient(object): 'metric_reporters': [], 'metrics_num_samples': 2, 'metrics_sample_window_ms': 30000, + 'kafka_client': KafkaClient, } def __init__(self, **configs): log.debug("Starting KafkaAdminClient with configuration: %s", configs) extra_configs = set(configs).difference(self.DEFAULT_CONFIG) if extra_configs: - raise KafkaConfigurationError("Unrecognized configs: {}".format(extra_configs)) + raise KafkaConfigurationError(f"Unrecognized configs: {extra_configs}") self.config = copy.copy(self.DEFAULT_CONFIG) self.config.update(configs) @@ -201,10 +205,12 @@ def __init__(self, **configs): reporters = [reporter() for reporter in self.config['metric_reporters']] self._metrics = Metrics(metric_config, reporters) - self._client = KafkaClient(metrics=self._metrics, - metric_group_prefix='admin', - **self.config) - self._client.check_version() + self._client = self.config['kafka_client']( + metrics=self._metrics, + metric_group_prefix='admin', + **self.config + ) + self._client.check_version(timeout=(self.config['api_version_auto_timeout_ms'] / 1000)) # Get auto-discovered version from client if necessary if self.config['api_version'] is None: @@ -271,7 +277,7 @@ def _refresh_controller_id(self): response = future.value controller_id = response.controller_id # verify the controller is new enough to support our requests - controller_version = self._client.check_version(controller_id) + controller_version = self._client.check_version(controller_id, timeout=(self.config['api_version_auto_timeout_ms'] / 1000)) if controller_version < (0, 10, 0): raise IncompatibleBrokerVersion( "The controller appears to be running Kafka {}. KafkaAdminClient requires brokers >= 0.10.0.0." @@ -324,30 +330,37 @@ def _find_coordinator_id_process_response(self, response): .format(response.API_VERSION)) return response.coordinator_id - def _find_coordinator_id(self, group_id): - """Find the broker node_id of the coordinator of the given group. + def _find_coordinator_ids(self, group_ids): + """Find the broker node_ids of the coordinators of the given groups. - Sends a FindCoordinatorRequest message to the cluster. Will block until - the FindCoordinatorResponse is received. Any errors are immediately - raised. + Sends a FindCoordinatorRequest message to the cluster for each group_id. + Will block until the FindCoordinatorResponse is received for all groups. + Any errors are immediately raised. - :param group_id: The consumer group ID. This is typically the group + :param group_ids: A list of consumer group IDs. This is typically the group name as a string. - :return: The node_id of the broker that is the coordinator. + :return: A dict of {group_id: node_id} where node_id is the id of the + broker that is the coordinator for the corresponding group. """ - # Note: Java may change how this is implemented in KAFKA-6791. - future = self._find_coordinator_id_send_request(group_id) - self._wait_for_futures([future]) - response = future.value - return self._find_coordinator_id_process_response(response) - - def _send_request_to_node(self, node_id, request): + groups_futures = { + group_id: self._find_coordinator_id_send_request(group_id) + for group_id in group_ids + } + self._wait_for_futures(groups_futures.values()) + groups_coordinators = { + group_id: self._find_coordinator_id_process_response(future.value) + for group_id, future in groups_futures.items() + } + return groups_coordinators + + def _send_request_to_node(self, node_id, request, wakeup=True): """Send a Kafka protocol message to a specific broker. Returns a future that may be polled for status and results. :param node_id: The broker id to which to send the message. :param request: The message to send. + :param wakeup: Optional flag to disable thread-wakeup. :return: A future object that may be polled for status and results. :exception: The exception if the message could not be sent. """ @@ -355,7 +368,7 @@ def _send_request_to_node(self, node_id, request): # poll until the connection to broker is ready, otherwise send() # will fail with NodeNotReadyError self._client.poll() - return self._client.send(node_id, request) + return self._client.send(node_id, request, wakeup) def _send_request_to_controller(self, request): """Send a Kafka protocol message to the cluster controller. @@ -682,7 +695,6 @@ def create_acls(self, acls): self._wait_for_futures([future]) response = future.value - return self._convert_create_acls_response_to_acls(acls, response) @staticmethod @@ -860,7 +872,7 @@ def describe_configs(self, config_resources, include_synonyms=False): )) else: raise NotImplementedError( - "Support for DescribeConfigs v{} has not yet been added to KafkaAdminClient.".format(version)) + f"Support for DescribeConfigs v{version} has not yet been added to KafkaAdminClient.") self._wait_for_futures(futures) return [f.value for f in futures] @@ -1000,22 +1012,47 @@ def _describe_consumer_groups_process_response(self, response): """Process a DescribeGroupsResponse into a group description.""" if response.API_VERSION <= 3: assert len(response.groups) == 1 - # TODO need to implement converting the response tuple into - # a more accessible interface like a namedtuple and then stop - # hardcoding tuple indices here. Several Java examples, - # including KafkaAdminClient.java - group_description = response.groups[0] - error_code = group_description[0] + for response_field, response_name in zip(response.SCHEMA.fields, response.SCHEMA.names): + if isinstance(response_field, Array): + described_groups_field_schema = response_field.array_of + described_group = response.__dict__[response_name][0] + described_group_information_list = [] + protocol_type_is_consumer = False + for (described_group_information, group_information_name, group_information_field) in zip(described_group, described_groups_field_schema.names, described_groups_field_schema.fields): + if group_information_name == 'protocol_type': + protocol_type = described_group_information + protocol_type_is_consumer = (protocol_type == ConsumerProtocol.PROTOCOL_TYPE or not protocol_type) + if isinstance(group_information_field, Array): + member_information_list = [] + member_schema = group_information_field.array_of + for members in described_group_information: + member_information = [] + for (member, member_field, member_name) in zip(members, member_schema.fields, member_schema.names): + if protocol_type_is_consumer: + if member_name == 'member_metadata' and member: + member_information.append(ConsumerProtocolMemberMetadata.decode(member)) + elif member_name == 'member_assignment' and member: + member_information.append(ConsumerProtocolMemberAssignment.decode(member)) + else: + member_information.append(member) + member_info_tuple = MemberInformation._make(member_information) + member_information_list.append(member_info_tuple) + described_group_information_list.append(member_information_list) + else: + described_group_information_list.append(described_group_information) + # Version 3 of the DescribeGroups API introduced the "authorized_operations" field. + # This will cause the namedtuple to fail. + # Therefore, appending a placeholder of None in it. + if response.API_VERSION <=2: + described_group_information_list.append(None) + group_description = GroupInformation._make(described_group_information_list) + error_code = group_description.error_code error_type = Errors.for_code(error_code) # Java has the note: KAFKA-6789, we can retry based on the error code if error_type is not Errors.NoError: raise error_type( "DescribeGroupsResponse failed with response '{}'." .format(response)) - # TODO Java checks the group protocol type, and if consumer - # (ConsumerProtocol.PROTOCOL_TYPE) or empty string, it decodes - # the members' partition assignments... that hasn't yet been - # implemented here so just return the raw struct results else: raise NotImplementedError( "Support for DescribeGroupsResponse_v{} has not yet been added to KafkaAdminClient." @@ -1044,18 +1081,19 @@ def describe_consumer_groups(self, group_ids, group_coordinator_id=None, include partition assignments. """ group_descriptions = [] - futures = [] - for group_id in group_ids: - if group_coordinator_id is not None: - this_groups_coordinator_id = group_coordinator_id - else: - this_groups_coordinator_id = self._find_coordinator_id(group_id) - f = self._describe_consumer_groups_send_request( + + if group_coordinator_id is not None: + groups_coordinators = {group_id: group_coordinator_id for group_id in group_ids} + else: + groups_coordinators = self._find_coordinator_ids(group_ids) + + futures = [ + self._describe_consumer_groups_send_request( group_id, - this_groups_coordinator_id, + coordinator_id, include_authorized_operations) - futures.append(f) - + for group_id, coordinator_id in groups_coordinators.items() + ] self._wait_for_futures(futures) for future in futures: @@ -1157,7 +1195,7 @@ def _list_consumer_group_offsets_send_request(self, group_id, topics_partitions_dict = defaultdict(set) for topic, partition in partitions: topics_partitions_dict[topic].add(partition) - topics_partitions = list(six.iteritems(topics_partitions_dict)) + topics_partitions = list(topics_partitions_dict.items()) request = OffsetFetchRequest[version](group_id, topics_partitions) else: raise NotImplementedError( @@ -1170,7 +1208,7 @@ def _list_consumer_group_offsets_process_response(self, response): :param response: an OffsetFetchResponse. :return: A dictionary composed of TopicPartition keys and - OffsetAndMetada values. + OffsetAndMetadata values. """ if response.API_VERSION <= 3: @@ -1184,7 +1222,7 @@ def _list_consumer_group_offsets_process_response(self, response): .format(response)) # transform response into a dictionary with TopicPartition keys and - # OffsetAndMetada values--this is what the Java AdminClient returns + # OffsetAndMetadata values--this is what the Java AdminClient returns offsets = {} for topic, partitions in response.topics: for partition, offset, metadata, error_code in partitions: @@ -1227,15 +1265,76 @@ def list_consumer_group_offsets(self, group_id, group_coordinator_id=None, explicitly specified. """ if group_coordinator_id is None: - group_coordinator_id = self._find_coordinator_id(group_id) + group_coordinator_id = self._find_coordinator_ids([group_id])[group_id] future = self._list_consumer_group_offsets_send_request( group_id, group_coordinator_id, partitions) self._wait_for_futures([future]) response = future.value return self._list_consumer_group_offsets_process_response(response) - # delete groups protocol not yet implemented - # Note: send the request to the group's coordinator. + def delete_consumer_groups(self, group_ids, group_coordinator_id=None): + """Delete Consumer Group Offsets for given consumer groups. + + Note: + This does not verify that the group ids actually exist and + group_coordinator_id is the correct coordinator for all these groups. + + The result needs checking for potential errors. + + :param group_ids: The consumer group ids of the groups which are to be deleted. + :param group_coordinator_id: The node_id of the broker which is the coordinator for + all the groups. Use only if all groups are coordinated by the same broker. + If set to None, will query the cluster to find the coordinator for every single group. + Explicitly specifying this can be useful to prevent + that extra network round trips if you already know the group + coordinator. Default: None. + :return: A list of tuples (group_id, KafkaError) + """ + if group_coordinator_id is not None: + futures = [self._delete_consumer_groups_send_request(group_ids, group_coordinator_id)] + else: + coordinators_groups = defaultdict(list) + for group_id, coordinator_id in self._find_coordinator_ids(group_ids).items(): + coordinators_groups[coordinator_id].append(group_id) + futures = [ + self._delete_consumer_groups_send_request(group_ids, coordinator_id) + for coordinator_id, group_ids in coordinators_groups.items() + ] + + self._wait_for_futures(futures) + + results = [] + for f in futures: + results.extend(self._convert_delete_groups_response(f.value)) + return results + + def _convert_delete_groups_response(self, response): + if response.API_VERSION <= 1: + results = [] + for group_id, error_code in response.results: + results.append((group_id, Errors.for_code(error_code))) + return results + else: + raise NotImplementedError( + "Support for DeleteGroupsResponse_v{} has not yet been added to KafkaAdminClient." + .format(response.API_VERSION)) + + def _delete_consumer_groups_send_request(self, group_ids, group_coordinator_id): + """Send a DeleteGroups request to a broker. + + :param group_ids: The consumer group ids of the groups which are to be deleted. + :param group_coordinator_id: The node_id of the broker which is the coordinator for + all the groups. + :return: A message future + """ + version = self._matching_api_version(DeleteGroupsRequest) + if version <= 1: + request = DeleteGroupsRequest[version](group_ids) + else: + raise NotImplementedError( + "Support for DeleteGroupsRequest_v{} has not yet been added to KafkaAdminClient." + .format(version)) + return self._send_request_to_node(group_coordinator_id, request) def _wait_for_futures(self, futures): while not all(future.succeeded() for future in futures): diff --git a/kafka/admin/config_resource.py b/kafka/admin/config_resource.py index e3294c9c4..0ae3f528e 100644 --- a/kafka/admin/config_resource.py +++ b/kafka/admin/config_resource.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - # enum in stdlib as of py3.4 try: from enum import IntEnum # pylint: disable=import-error @@ -15,7 +13,7 @@ class ConfigResourceType(IntEnum): TOPIC = 2 -class ConfigResource(object): +class ConfigResource: """A class for specifying config resources. Arguments: resource_type (ConfigResourceType): the type of kafka resource diff --git a/kafka/admin/new_partitions.py b/kafka/admin/new_partitions.py index 429b2e190..613fb861e 100644 --- a/kafka/admin/new_partitions.py +++ b/kafka/admin/new_partitions.py @@ -1,7 +1,4 @@ -from __future__ import absolute_import - - -class NewPartitions(object): +class NewPartitions: """A class for new partition creation on existing topics. Note that the length of new_assignments, if specified, must be the difference between the new total number of partitions and the existing number of partitions. Arguments: diff --git a/kafka/admin/new_topic.py b/kafka/admin/new_topic.py index 645ac383a..a50c3a374 100644 --- a/kafka/admin/new_topic.py +++ b/kafka/admin/new_topic.py @@ -1,9 +1,7 @@ -from __future__ import absolute_import - from kafka.errors import IllegalArgumentError -class NewTopic(object): +class NewTopic: """ A class for new topic creation Arguments: name (string): name of the topic diff --git a/kafka/client_async.py b/kafka/client_async.py index caa88cf5e..0b546c314 100644 --- a/kafka/client_async.py +++ b/kafka/client_async.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division - import collections import copy import logging @@ -32,14 +30,10 @@ from kafka.vendor import socketpair from kafka.version import __version__ -if six.PY2: - ConnectionError = None - - log = logging.getLogger('kafka.client') -class KafkaClient(object): +class KafkaClient: """ A network client for asynchronous request/response network I/O. @@ -154,6 +148,8 @@ class KafkaClient(object): sasl mechanism handshake. Default: one of bootstrap servers sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider instance. (See kafka.oauth.abstract). Default: None + raise_upon_socket_err_during_wakeup (bool): If set to True, raise an exception + upon socket error during wakeup(). Default: False """ DEFAULT_CONFIG = { @@ -192,7 +188,8 @@ class KafkaClient(object): 'sasl_plain_password': None, 'sasl_kerberos_service_name': 'kafka', 'sasl_kerberos_domain_name': None, - 'sasl_oauth_token_provider': None + 'sasl_oauth_token_provider': None, + 'raise_upon_socket_err_during_wakeup': False } def __init__(self, **configs): @@ -201,10 +198,15 @@ def __init__(self, **configs): if key in configs: self.config[key] = configs[key] + # these properties need to be set on top of the initialization pipeline + # because they are used when __del__ method is called + self._closed = False + self._wake_r, self._wake_w = socket.socketpair() + self._selector = self.config['selector']() + self.cluster = ClusterMetadata(**self.config) self._topics = set() # empty set will fetch all topic metadata self._metadata_refresh_in_progress = False - self._selector = self.config['selector']() self._conns = Dict() # object to support weakrefs self._api_versions = None self._connecting = set() @@ -212,7 +214,6 @@ def __init__(self, **configs): self._refresh_on_disconnects = True self._last_bootstrap = 0 self._bootstrap_fails = 0 - self._wake_r, self._wake_w = socket.socketpair() self._wake_r.setblocking(False) self._wake_w.settimeout(self.config['wakeup_timeout_ms'] / 1000.0) self._wake_lock = threading.Lock() @@ -226,7 +227,6 @@ def __init__(self, **configs): self._selector.register(self._wake_r, selectors.EVENT_READ) self._idle_expiry_manager = IdleConnectionManager(self.config['connections_max_idle_ms']) - self._closed = False self._sensors = None if self.config['metrics']: self._sensors = KafkaClientMetrics(self.config['metrics'], @@ -240,6 +240,8 @@ def __init__(self, **configs): check_timeout = self.config['api_version_auto_timeout_ms'] / 1000 self.config['api_version'] = self.check_version(timeout=check_timeout) + self._raise_upon_socket_err_during_wakeup = self.config['raise_upon_socket_err_during_wakeup'] + def _can_bootstrap(self): effective_failures = self._bootstrap_fails // self._num_bootstrap_hosts backoff_factor = 2 ** effective_failures @@ -366,7 +368,7 @@ def _maybe_connect(self, node_id): if conn is None: broker = self.cluster.broker_metadata(node_id) - assert broker, 'Broker id %s not in current metadata' % (node_id,) + assert broker, 'Broker id {} not in current metadata'.format(node_id) log.debug("Initiating connection to node %s at %s:%s", node_id, broker.host, broker.port) @@ -634,6 +636,9 @@ def _poll(self, timeout): self._sensors.select_time.record((end_select - start_select) * 1000000000) for key, events in ready: + if key.fileobj.fileno() < 0: + self._selector.unregister(key.fileobj) + if key.fileobj is self._wake_r: self._clear_wake_fd() continue @@ -675,7 +680,7 @@ def _poll(self, timeout): unexpected_data = key.fileobj.recv(1) if unexpected_data: # anything other than a 0-byte read means protocol issues log.warning('Protocol out of sync on %r, closing', conn) - except socket.error: + except OSError: pass conn.close(Errors.KafkaConnectionError('Socket EVENT_READ without in-flight-requests')) continue @@ -690,7 +695,7 @@ def _poll(self, timeout): if conn not in processed and conn.connected() and conn._sock.pending(): self._pending_completion.extend(conn.recv()) - for conn in six.itervalues(self._conns): + for conn in self._conns.values(): if conn.requests_timed_out(): log.warning('%s timed out after %s ms. Closing connection.', conn, conn.config['request_timeout_ms']) @@ -930,15 +935,17 @@ def wakeup(self): except socket.timeout: log.warning('Timeout to send to wakeup socket!') raise Errors.KafkaTimeoutError() - except socket.error: + except OSError as e: log.warning('Unable to send to wakeup socket!') + if self._raise_upon_socket_err_during_wakeup: + raise e def _clear_wake_fd(self): # reading from wake socket should only happen in a single thread while True: try: self._wake_r.recv(1024) - except socket.error: + except OSError: break def _maybe_close_oldest_connection(self): @@ -968,7 +975,7 @@ def bootstrap_connected(self): OrderedDict = dict -class IdleConnectionManager(object): +class IdleConnectionManager: def __init__(self, connections_max_idle_ms): if connections_max_idle_ms > 0: self.connections_max_idle = connections_max_idle_ms / 1000 @@ -1030,7 +1037,7 @@ def poll_expired_connection(self): return None -class KafkaClientMetrics(object): +class KafkaClientMetrics: def __init__(self, metrics, metric_group_prefix, conns): self.metrics = metrics self.metric_group_name = metric_group_prefix + '-metrics' diff --git a/kafka/cluster.py b/kafka/cluster.py index 438baf29d..db0e77818 100644 --- a/kafka/cluster.py +++ b/kafka/cluster.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import collections import copy import logging @@ -16,7 +14,7 @@ log = logging.getLogger(__name__) -class ClusterMetadata(object): +class ClusterMetadata: """ A class to manage kafka cluster metadata. @@ -128,9 +126,9 @@ def available_partitions_for_topic(self, topic): """ if topic not in self._partitions: return None - return set([partition for partition, metadata - in six.iteritems(self._partitions[topic]) - if metadata.leader != -1]) + return {partition for partition, metadata + in self._partitions[topic].items() + if metadata.leader != -1} def leader_for_partition(self, partition): """Return node_id of leader, -1 unavailable, None if unknown.""" @@ -361,7 +359,7 @@ def add_group_coordinator(self, group, response): # Use a coordinator-specific node id so that group requests # get a dedicated connection - node_id = 'coordinator-{}'.format(response.coordinator_id) + node_id = f'coordinator-{response.coordinator_id}' coordinator = BrokerMetadata( node_id, response.host, diff --git a/kafka/codec.py b/kafka/codec.py index aa9fc8291..2bdd72185 100644 --- a/kafka/codec.py +++ b/kafka/codec.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import gzip import io import platform @@ -10,12 +8,18 @@ _XERIAL_V1_HEADER = (-126, b'S', b'N', b'A', b'P', b'P', b'Y', 0, 1, 1) _XERIAL_V1_FORMAT = 'bccccccBii' +ZSTD_MAX_OUTPUT_SIZE = 1024 * 1024 try: import snappy except ImportError: snappy = None +try: + import zstandard as zstd +except ImportError: + zstd = None + try: import lz4.frame as lz4 @@ -58,6 +62,10 @@ def has_snappy(): return snappy is not None +def has_zstd(): + return zstd is not None + + def has_lz4(): if lz4 is not None: return True @@ -139,10 +147,6 @@ def snappy_encode(payload, xerial_compatible=True, xerial_blocksize=32*1024): # buffer... likely a python-snappy bug, so just use a slice copy chunker = lambda payload, i, size: payload[i:size+i] - elif six.PY2: - # Sliced buffer avoids additional copies - # pylint: disable-msg=undefined-variable - chunker = lambda payload, i, size: buffer(payload, i, size) else: # snappy.compress does not like raw memoryviews, so we have to convert # tobytes, which is a copy... oh well. it's the thought that counts. @@ -177,7 +181,7 @@ def _detect_xerial_stream(payload): The version is the version of this format as written by xerial, in the wild this is currently 1 as such we only support v1. - Compat is there to claim the miniumum supported version that + Compat is there to claim the minimum supported version that can read a xerial block stream, presently in the wild this is 1. """ @@ -299,3 +303,18 @@ def lz4_decode_old_kafka(payload): payload[header_size:] ]) return lz4_decode(munged_payload) + + +def zstd_encode(payload): + if not zstd: + raise NotImplementedError("Zstd codec is not available") + return zstd.ZstdCompressor().compress(payload) + + +def zstd_decode(payload): + if not zstd: + raise NotImplementedError("Zstd codec is not available") + try: + return zstd.ZstdDecompressor().decompress(payload) + except zstd.ZstdError: + return zstd.ZstdDecompressor().decompress(payload, max_output_size=ZSTD_MAX_OUTPUT_SIZE) diff --git a/kafka/conn.py b/kafka/conn.py index 5c7287568..1bac266e6 100644 --- a/kafka/conn.py +++ b/kafka/conn.py @@ -1,8 +1,5 @@ -from __future__ import absolute_import, division - import copy import errno -import io import logging from random import shuffle, uniform @@ -14,33 +11,29 @@ from kafka.vendor import selectors34 as selectors import socket -import struct import threading import time from kafka.vendor import six +from kafka import sasl import kafka.errors as Errors from kafka.future import Future from kafka.metrics.stats import Avg, Count, Max, Rate -from kafka.oauth.abstract import AbstractTokenProvider -from kafka.protocol.admin import SaslHandShakeRequest, DescribeAclsRequest_v2 +from kafka.protocol.admin import ( + DescribeAclsRequest_v2, + DescribeClientQuotasRequest, + SaslHandShakeRequest, +) from kafka.protocol.commit import OffsetFetchRequest from kafka.protocol.offset import OffsetRequest from kafka.protocol.produce import ProduceRequest from kafka.protocol.metadata import MetadataRequest from kafka.protocol.fetch import FetchRequest from kafka.protocol.parser import KafkaProtocol -from kafka.protocol.types import Int32, Int8 -from kafka.scram import ScramClient from kafka.version import __version__ -if six.PY2: - ConnectionError = socket.error - TimeoutError = socket.error - BlockingIOError = Exception - log = logging.getLogger(__name__) DEFAULT_KAFKA_PORT = 9092 @@ -78,11 +71,17 @@ class SSLWantWriteError(Exception): try: import gssapi from gssapi.raw.misc import GSSError -except ImportError: +except (ImportError, OSError): #no gssapi available, will disable gssapi mechanism gssapi = None GSSError = None +# needed for AWS_MSK_IAM authentication: +try: + from botocore.session import Session as BotoSession +except ImportError: + # no botocore available, will disable AWS_MSK_IAM mechanism + BotoSession = None AFI_NAMES = { socket.AF_UNSPEC: "unspecified", @@ -91,7 +90,7 @@ class SSLWantWriteError(Exception): } -class ConnectionStates(object): +class ConnectionStates: DISCONNECTING = '' DISCONNECTED = '' CONNECTING = '' @@ -100,7 +99,7 @@ class ConnectionStates(object): AUTHENTICATING = '' -class BrokerConnection(object): +class BrokerConnection: """Initialize a Kafka broker connection Keyword Arguments: @@ -227,7 +226,6 @@ class BrokerConnection(object): 'sasl_oauth_token_provider': None } SECURITY_PROTOCOLS = ('PLAINTEXT', 'SSL', 'SASL_PLAINTEXT', 'SASL_SSL') - SASL_MECHANISMS = ('PLAIN', 'GSSAPI', 'OAUTHBEARER', "SCRAM-SHA-256", "SCRAM-SHA-512") def __init__(self, host, port, afi, **configs): self.host = host @@ -256,26 +254,19 @@ def __init__(self, host, port, afi, **configs): assert self.config['security_protocol'] in self.SECURITY_PROTOCOLS, ( 'security_protocol must be in ' + ', '.join(self.SECURITY_PROTOCOLS)) + if self.config['security_protocol'] in ('SSL', 'SASL_SSL'): assert ssl_available, "Python wasn't built with SSL support" + if self.config['sasl_mechanism'] == 'AWS_MSK_IAM': + assert BotoSession is not None, 'AWS_MSK_IAM requires the "botocore" package' + assert self.config['security_protocol'] == 'SASL_SSL', 'AWS_MSK_IAM requires SASL_SSL' + if self.config['security_protocol'] in ('SASL_PLAINTEXT', 'SASL_SSL'): - assert self.config['sasl_mechanism'] in self.SASL_MECHANISMS, ( - 'sasl_mechanism must be in ' + ', '.join(self.SASL_MECHANISMS)) - if self.config['sasl_mechanism'] in ('PLAIN', 'SCRAM-SHA-256', 'SCRAM-SHA-512'): - assert self.config['sasl_plain_username'] is not None, ( - 'sasl_plain_username required for PLAIN or SCRAM sasl' - ) - assert self.config['sasl_plain_password'] is not None, ( - 'sasl_plain_password required for PLAIN or SCRAM sasl' - ) - if self.config['sasl_mechanism'] == 'GSSAPI': - assert gssapi is not None, 'GSSAPI lib not available' - assert self.config['sasl_kerberos_service_name'] is not None, 'sasl_kerberos_service_name required for GSSAPI sasl' - if self.config['sasl_mechanism'] == 'OAUTHBEARER': - token_provider = self.config['sasl_oauth_token_provider'] - assert token_provider is not None, 'sasl_oauth_token_provider required for OAUTHBEARER sasl' - assert callable(getattr(token_provider, "token", None)), 'sasl_oauth_token_provider must implement method #token()' + assert self.config['sasl_mechanism'] in sasl.MECHANISMS, ( + 'sasl_mechanism must be one of {}'.format(', '.join(sasl.MECHANISMS.keys())) + ) + sasl.MECHANISMS[self.config['sasl_mechanism']].validate_config(self) # This is not a general lock / this class is not generally thread-safe yet # However, to avoid pushing responsibility for maintaining # per-connection locks to the upstream client, we will use this lock to @@ -386,7 +377,7 @@ def connect(self): ret = None try: ret = self._sock.connect_ex(self._sock_addr) - except socket.error as err: + except OSError as err: ret = err.errno # Connection succeeded @@ -418,7 +409,7 @@ def connect(self): log.error('Connect attempt to %s returned error %s.' ' Disconnecting.', self, ret) errstr = errno.errorcode.get(ret, 'UNKNOWN') - self.close(Errors.KafkaConnectionError('{} {}'.format(ret, errstr))) + self.close(Errors.KafkaConnectionError(f'{ret} {errstr}')) return self.state # Needs retry @@ -496,7 +487,7 @@ def _wrap_ssl(self): try: self._sock = self._ssl_context.wrap_socket( self._sock, - server_hostname=self.host, + server_hostname=self.host.rstrip("."), do_handshake_on_connect=False) except ssl.SSLError as e: log.exception('%s: Failed to wrap socket in SSLContext!', self) @@ -510,7 +501,7 @@ def _try_handshake(self): # old ssl in python2.6 will swallow all SSLErrors here... except (SSLWantReadError, SSLWantWriteError): pass - except (SSLZeroReturnError, ConnectionError, TimeoutError, SSLEOFError): + except (SSLZeroReturnError, ConnectionError, TimeoutError, SSLEOFError, ssl.SSLError, OSError) as e: log.warning('SSL connection closed by server during handshake.') self.close(Errors.KafkaConnectionError('SSL connection closed by server during handshake')) # Other SSLErrors will be raised to user @@ -553,19 +544,9 @@ def _handle_sasl_handshake_response(self, future, response): Errors.UnsupportedSaslMechanismError( 'Kafka broker does not support %s sasl mechanism. Enabled mechanisms are: %s' % (self.config['sasl_mechanism'], response.enabled_mechanisms))) - elif self.config['sasl_mechanism'] == 'PLAIN': - return self._try_authenticate_plain(future) - elif self.config['sasl_mechanism'] == 'GSSAPI': - return self._try_authenticate_gssapi(future) - elif self.config['sasl_mechanism'] == 'OAUTHBEARER': - return self._try_authenticate_oauth(future) - elif self.config['sasl_mechanism'].startswith("SCRAM-SHA-"): - return self._try_authenticate_scram(future) - else: - return future.failure( - Errors.UnsupportedSaslMechanismError( - 'kafka-python does not support SASL mechanism %s' % - self.config['sasl_mechanism'])) + + try_authenticate = sasl.MECHANISMS[self.config['sasl_mechanism']].try_authenticate + return try_authenticate(self, future) def _send_bytes(self, data): """Send some data via non-blocking IO @@ -588,8 +569,7 @@ def _send_bytes(self, data): break raise except BlockingIOError: - if six.PY3: - break + break raise return total_sent @@ -619,225 +599,6 @@ def _recv_bytes_blocking(self, n): finally: self._sock.settimeout(0.0) - def _try_authenticate_plain(self, future): - if self.config['security_protocol'] == 'SASL_PLAINTEXT': - log.warning('%s: Sending username and password in the clear', self) - - data = b'' - # Send PLAIN credentials per RFC-4616 - msg = bytes('\0'.join([self.config['sasl_plain_username'], - self.config['sasl_plain_username'], - self.config['sasl_plain_password']]).encode('utf-8')) - size = Int32.encode(len(msg)) - - err = None - close = False - with self._lock: - if not self._can_send_recv(): - err = Errors.NodeNotReadyError(str(self)) - close = False - else: - try: - self._send_bytes_blocking(size + msg) - - # The server will send a zero sized message (that is Int32(0)) on success. - # The connection is closed on failure - data = self._recv_bytes_blocking(4) - - except (ConnectionError, TimeoutError) as e: - log.exception("%s: Error receiving reply from server", self) - err = Errors.KafkaConnectionError("%s: %s" % (self, e)) - close = True - - if err is not None: - if close: - self.close(error=err) - return future.failure(err) - - if data != b'\x00\x00\x00\x00': - error = Errors.AuthenticationFailedError('Unrecognized response during authentication') - return future.failure(error) - - log.info('%s: Authenticated as %s via PLAIN', self, self.config['sasl_plain_username']) - return future.success(True) - - def _try_authenticate_scram(self, future): - if self.config['security_protocol'] == 'SASL_PLAINTEXT': - log.warning('%s: Exchanging credentials in the clear', self) - - scram_client = ScramClient( - self.config['sasl_plain_username'], self.config['sasl_plain_password'], self.config['sasl_mechanism'] - ) - - err = None - close = False - with self._lock: - if not self._can_send_recv(): - err = Errors.NodeNotReadyError(str(self)) - close = False - else: - try: - client_first = scram_client.first_message().encode('utf-8') - size = Int32.encode(len(client_first)) - self._send_bytes_blocking(size + client_first) - - (data_len,) = struct.unpack('>i', self._recv_bytes_blocking(4)) - server_first = self._recv_bytes_blocking(data_len).decode('utf-8') - scram_client.process_server_first_message(server_first) - - client_final = scram_client.final_message().encode('utf-8') - size = Int32.encode(len(client_final)) - self._send_bytes_blocking(size + client_final) - - (data_len,) = struct.unpack('>i', self._recv_bytes_blocking(4)) - server_final = self._recv_bytes_blocking(data_len).decode('utf-8') - scram_client.process_server_final_message(server_final) - - except (ConnectionError, TimeoutError) as e: - log.exception("%s: Error receiving reply from server", self) - err = Errors.KafkaConnectionError("%s: %s" % (self, e)) - close = True - - if err is not None: - if close: - self.close(error=err) - return future.failure(err) - - log.info( - '%s: Authenticated as %s via %s', self, self.config['sasl_plain_username'], self.config['sasl_mechanism'] - ) - return future.success(True) - - def _try_authenticate_gssapi(self, future): - kerberos_damin_name = self.config['sasl_kerberos_domain_name'] or self.host - auth_id = self.config['sasl_kerberos_service_name'] + '@' + kerberos_damin_name - gssapi_name = gssapi.Name( - auth_id, - name_type=gssapi.NameType.hostbased_service - ).canonicalize(gssapi.MechType.kerberos) - log.debug('%s: GSSAPI name: %s', self, gssapi_name) - - err = None - close = False - with self._lock: - if not self._can_send_recv(): - err = Errors.NodeNotReadyError(str(self)) - close = False - else: - # Establish security context and negotiate protection level - # For reference RFC 2222, section 7.2.1 - try: - # Exchange tokens until authentication either succeeds or fails - client_ctx = gssapi.SecurityContext(name=gssapi_name, usage='initiate') - received_token = None - while not client_ctx.complete: - # calculate an output token from kafka token (or None if first iteration) - output_token = client_ctx.step(received_token) - - # pass output token to kafka, or send empty response if the security - # context is complete (output token is None in that case) - if output_token is None: - self._send_bytes_blocking(Int32.encode(0)) - else: - msg = output_token - size = Int32.encode(len(msg)) - self._send_bytes_blocking(size + msg) - - # The server will send a token back. Processing of this token either - # establishes a security context, or it needs further token exchange. - # The gssapi will be able to identify the needed next step. - # The connection is closed on failure. - header = self._recv_bytes_blocking(4) - (token_size,) = struct.unpack('>i', header) - received_token = self._recv_bytes_blocking(token_size) - - # Process the security layer negotiation token, sent by the server - # once the security context is established. - - # unwraps message containing supported protection levels and msg size - msg = client_ctx.unwrap(received_token).message - # Kafka currently doesn't support integrity or confidentiality security layers, so we - # simply set QoP to 'auth' only (first octet). We reuse the max message size proposed - # by the server - msg = Int8.encode(SASL_QOP_AUTH & Int8.decode(io.BytesIO(msg[0:1]))) + msg[1:] - # add authorization identity to the response, GSS-wrap and send it - msg = client_ctx.wrap(msg + auth_id.encode(), False).message - size = Int32.encode(len(msg)) - self._send_bytes_blocking(size + msg) - - except (ConnectionError, TimeoutError) as e: - log.exception("%s: Error receiving reply from server", self) - err = Errors.KafkaConnectionError("%s: %s" % (self, e)) - close = True - except Exception as e: - err = e - close = True - - if err is not None: - if close: - self.close(error=err) - return future.failure(err) - - log.info('%s: Authenticated as %s via GSSAPI', self, gssapi_name) - return future.success(True) - - def _try_authenticate_oauth(self, future): - data = b'' - - msg = bytes(self._build_oauth_client_request().encode("utf-8")) - size = Int32.encode(len(msg)) - - err = None - close = False - with self._lock: - if not self._can_send_recv(): - err = Errors.NodeNotReadyError(str(self)) - close = False - else: - try: - # Send SASL OAuthBearer request with OAuth token - self._send_bytes_blocking(size + msg) - - # The server will send a zero sized message (that is Int32(0)) on success. - # The connection is closed on failure - data = self._recv_bytes_blocking(4) - - except (ConnectionError, TimeoutError) as e: - log.exception("%s: Error receiving reply from server", self) - err = Errors.KafkaConnectionError("%s: %s" % (self, e)) - close = True - - if err is not None: - if close: - self.close(error=err) - return future.failure(err) - - if data != b'\x00\x00\x00\x00': - error = Errors.AuthenticationFailedError('Unrecognized response during authentication') - return future.failure(error) - - log.info('%s: Authenticated via OAuth', self) - return future.success(True) - - def _build_oauth_client_request(self): - token_provider = self.config['sasl_oauth_token_provider'] - return "n,,\x01auth=Bearer {}{}\x01\x01".format(token_provider.token(), self._token_extensions()) - - def _token_extensions(self): - """ - Return a string representation of the OPTIONAL key-value pairs that can be sent with an OAUTHBEARER - initial request. - """ - token_provider = self.config['sasl_oauth_token_provider'] - - # Only run if the #extensions() method is implemented by the clients Token Provider class - # Builds up a string separated by \x01 via a dict of key value pairs - if callable(getattr(token_provider, "extensions", None)) and len(token_provider.extensions()) > 0: - msg = "\x01".join(["{}={}".format(k, v) for k, v in token_provider.extensions().items()]) - return "\x01" + msg - else: - return "" - def blacked_out(self): """ Return true if we are disconnected from the given node and can't @@ -916,7 +677,7 @@ def close(self, error=None): with self._lock: if self.state is ConnectionStates.DISCONNECTED: return - log.info('%s: Closing connection. %s', self, error or '') + log.log(logging.ERROR if error else logging.INFO, '%s: Closing connection. %s', self, error or '') self._update_reconnect_backoff() self._sasl_auth_future = None self._protocol = KafkaProtocol( @@ -1003,7 +764,7 @@ def send_pending_requests(self): except (ConnectionError, TimeoutError) as e: log.exception("Error sending request data to %s", self) - error = Errors.KafkaConnectionError("%s: %s" % (self, e)) + error = Errors.KafkaConnectionError("{}: {}".format(self, e)) self.close(error=error) return False @@ -1036,7 +797,7 @@ def send_pending_requests_v2(self): except (ConnectionError, TimeoutError, Exception) as e: log.exception("Error sending request data to %s", self) - error = Errors.KafkaConnectionError("%s: %s" % (self, e)) + error = Errors.KafkaConnectionError("{}: {}".format(self, e)) self.close(error=error) return False @@ -1109,8 +870,7 @@ def _recv(self): err = Errors.KafkaConnectionError(e) break except BlockingIOError: - if six.PY3: - break + break # For PY2 this is a catchall and should be re-raised raise @@ -1145,10 +905,10 @@ def requests_timed_out(self): def _handle_api_version_response(self, response): error_type = Errors.for_code(response.error_code) assert error_type is Errors.NoError, "API version check failed" - self._api_versions = dict([ - (api_key, (min_version, max_version)) + self._api_versions = { + api_key: (min_version, max_version) for api_key, min_version, max_version in response.api_versions - ]) + } return self._api_versions def get_api_versions(self): @@ -1169,6 +929,7 @@ def _infer_broker_version_from_api_versions(self, api_versions): # in reverse order. As soon as we find one that works, return it test_cases = [ # format (, ) + ((2, 6, 0), DescribeClientQuotasRequest[0]), ((2, 5, 0), DescribeAclsRequest_v2), ((2, 4, 0), ProduceRequest[8]), ((2, 3, 0), FetchRequest[11]), @@ -1285,9 +1046,6 @@ def reset_override_configs(): elif (isinstance(f.exception, Errors.CorrelationIdError) and version == (0, 10)): pass - elif six.PY2: - assert isinstance(f.exception.args[0], socket.error) - assert f.exception.args[0].errno in (32, 54, 104) else: assert isinstance(f.exception.args[0], ConnectionError) log.info("Broker is not v%s -- it did not recognize %s", @@ -1305,7 +1063,7 @@ def __str__(self): AFI_NAMES[self._sock_afi], self._sock_addr) -class BrokerConnectionMetrics(object): +class BrokerConnectionMetrics: def __init__(self, metrics, metric_group_prefix, node_id): self.metrics = metrics @@ -1360,7 +1118,7 @@ def __init__(self, metrics, metric_group_prefix, node_id): # if one sensor of the metrics has been registered for the connection, # then all other sensors should have been registered; and vice versa - node_str = 'node-{0}'.format(node_id) + node_str = f'node-{node_id}' node_sensor = metrics.get_sensor(node_str + '.bytes-sent') if not node_sensor: metric_group_name = metric_group_prefix + '-node-metrics.' + node_str @@ -1427,7 +1185,7 @@ def _address_family(address): try: socket.inet_pton(af, address) return af - except (ValueError, AttributeError, socket.error): + except (ValueError, AttributeError, OSError): continue return socket.AF_UNSPEC @@ -1471,7 +1229,7 @@ def get_ip_port_afi(host_and_port_str): log.warning('socket.inet_pton not available on this platform.' ' consider `pip install win_inet_pton`') pass - except (ValueError, socket.error): + except (ValueError, OSError): # it's a host:port pair pass host, port = host_and_port_str.rsplit(':', 1) @@ -1487,7 +1245,7 @@ def collect_hosts(hosts, randomize=True): randomize the returned list. """ - if isinstance(hosts, six.string_types): + if isinstance(hosts, str): hosts = hosts.strip().split(',') result = [] diff --git a/kafka/consumer/__init__.py b/kafka/consumer/__init__.py index e09bcc1b8..5341d5648 100644 --- a/kafka/consumer/__init__.py +++ b/kafka/consumer/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from kafka.consumer.group import KafkaConsumer __all__ = [ diff --git a/kafka/consumer/fetcher.py b/kafka/consumer/fetcher.py index 7fb2466b0..ec43c0e66 100644 --- a/kafka/consumer/fetcher.py +++ b/kafka/consumer/fetcher.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import collections import copy import logging @@ -45,7 +43,7 @@ class RecordTooLargeError(Errors.KafkaError): pass -class Fetcher(six.Iterator): +class Fetcher: DEFAULT_CONFIG = { 'key_deserializer': None, 'value_deserializer': None, @@ -120,7 +118,7 @@ def send_fetches(self): List of Futures: each future resolves to a FetchResponse """ futures = [] - for node_id, request in six.iteritems(self._create_fetch_requests()): + for node_id, request in self._create_fetch_requests().items(): if self._client.ready(node_id): log.debug("Sending FetchRequest to node %s", node_id) future = self._client.send(node_id, request, wakeup=False) @@ -209,7 +207,7 @@ def end_offsets(self, partitions, timeout_ms): partitions, OffsetResetStrategy.LATEST, timeout_ms) def beginning_or_end_offset(self, partitions, timestamp, timeout_ms): - timestamps = dict([(tp, timestamp) for tp in partitions]) + timestamps = {tp: timestamp for tp in partitions} offsets = self._retrieve_offsets(timestamps, timeout_ms) for tp in timestamps: offsets[tp] = offsets[tp][0] @@ -244,7 +242,7 @@ def _reset_offset(self, partition): if self._subscriptions.is_assigned(partition): self._subscriptions.seek(partition, offset) else: - log.debug("Could not find offset for partition %s since it is probably deleted" % (partition,)) + log.debug(f"Could not find offset for partition {partition} since it is probably deleted") def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")): """Fetch offset for each partition passed in ``timestamps`` map. @@ -293,10 +291,10 @@ def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")): # Issue #1780 # Recheck partition existence after after a successful metadata refresh if refresh_future.succeeded() and isinstance(future.exception, Errors.StaleMetadata): - log.debug("Stale metadata was raised, and we now have an updated metadata. Rechecking partition existance") + log.debug("Stale metadata was raised, and we now have an updated metadata. Rechecking partition existence") unknown_partition = future.exception.args[0] # TopicPartition from StaleMetadata if self._client.cluster.leader_for_partition(unknown_partition) is None: - log.debug("Removed partition %s from offsets retrieval" % (unknown_partition, )) + log.debug(f"Removed partition {unknown_partition} from offsets retrieval") timestamps.pop(unknown_partition) else: time.sleep(self.config['retry_backoff_ms'] / 1000.0) @@ -305,7 +303,7 @@ def _retrieve_offsets(self, timestamps, timeout_ms=float("inf")): remaining_ms = timeout_ms - elapsed_ms raise Errors.KafkaTimeoutError( - "Failed to get offsets by timestamps in %s ms" % (timeout_ms,)) + f"Failed to get offsets by timestamps in {timeout_ms} ms") def fetched_records(self, max_records=None, update_offsets=True): """Returns previously fetched records and updates consumed offsets. @@ -522,7 +520,7 @@ def _send_offset_requests(self, timestamps): Future: resolves to a mapping of retrieved offsets """ timestamps_by_node = collections.defaultdict(dict) - for partition, timestamp in six.iteritems(timestamps): + for partition, timestamp in timestamps.items(): node_id = self._client.cluster.leader_for_partition(partition) if node_id is None: self._client.add_topic(partition.topic) @@ -554,7 +552,7 @@ def on_fail(err): if not list_offsets_future.is_done: list_offsets_future.failure(err) - for node_id, timestamps in six.iteritems(timestamps_by_node): + for node_id, timestamps in timestamps_by_node.items(): _f = self._send_offset_request(node_id, timestamps) _f.add_callback(on_success) _f.add_errback(on_fail) @@ -562,7 +560,7 @@ def on_fail(err): def _send_offset_request(self, node_id, timestamps): by_topic = collections.defaultdict(list) - for tp, timestamp in six.iteritems(timestamps): + for tp, timestamp in timestamps.items(): if self.config['api_version'] >= (0, 10, 1): data = (tp.partition, timestamp) else: @@ -570,9 +568,9 @@ def _send_offset_request(self, node_id, timestamps): by_topic[tp.topic].append(data) if self.config['api_version'] >= (0, 10, 1): - request = OffsetRequest[1](-1, list(six.iteritems(by_topic))) + request = OffsetRequest[1](-1, list(by_topic.items())) else: - request = OffsetRequest[0](-1, list(six.iteritems(by_topic))) + request = OffsetRequest[0](-1, list(by_topic.items())) # Client returns a future that only fails on network issues # so create a separate future and attach a callback to update it @@ -713,7 +711,7 @@ def _create_fetch_requests(self): else: version = 0 requests = {} - for node_id, partition_data in six.iteritems(fetchable): + for node_id, partition_data in fetchable.items(): if version < 3: requests[node_id] = FetchRequest[version]( -1, # replica_id @@ -755,9 +753,9 @@ def _handle_fetch_response(self, request, send_time, response): partition, offset = partition_data[:2] fetch_offsets[TopicPartition(topic, partition)] = offset - partitions = set([TopicPartition(topic, partition_data[0]) + partitions = {TopicPartition(topic, partition_data[0]) for topic, partitions in response.topics - for partition_data in partitions]) + for partition_data in partitions} metric_aggregator = FetchResponseMetricAggregator(self._sensors, partitions) # randomized ordering should improve balance for short-lived consumers @@ -817,8 +815,9 @@ def _parse_fetched_data(self, completed_fetch): position) unpacked = list(self._unpack_message_set(tp, records)) parsed_records = self.PartitionRecords(fetch_offset, tp, unpacked) - last_offset = unpacked[-1].offset - self._sensors.records_fetch_lag.record(highwater - last_offset) + if unpacked: + last_offset = unpacked[-1].offset + self._sensors.records_fetch_lag.record(highwater - last_offset) num_bytes = records.valid_bytes() records_count = len(unpacked) elif records.size_in_bytes() > 0: @@ -865,7 +864,7 @@ def _parse_fetched_data(self, completed_fetch): return parsed_records - class PartitionRecords(object): + class PartitionRecords: def __init__(self, fetch_offset, tp, messages): self.fetch_offset = fetch_offset self.topic_partition = tp @@ -909,7 +908,7 @@ def take(self, n=None): return res -class FetchResponseMetricAggregator(object): +class FetchResponseMetricAggregator: """ Since we parse the message data for each partition from each fetch response lazily, fetch-level metrics need to be aggregated as the messages @@ -938,10 +937,10 @@ def record(self, partition, num_bytes, num_records): self.sensors.records_fetched.record(self.total_records) -class FetchManagerMetrics(object): +class FetchManagerMetrics: def __init__(self, metrics, prefix): self.metrics = metrics - self.group_name = '%s-fetch-manager-metrics' % (prefix,) + self.group_name = f'{prefix}-fetch-manager-metrics' self.bytes_fetched = metrics.sensor('bytes-fetched') self.bytes_fetched.add(metrics.metric_name('fetch-size-avg', self.group_name, @@ -985,15 +984,15 @@ def record_topic_fetch_metrics(self, topic, num_bytes, num_records): bytes_fetched = self.metrics.sensor(name) bytes_fetched.add(self.metrics.metric_name('fetch-size-avg', self.group_name, - 'The average number of bytes fetched per request for topic %s' % (topic,), + f'The average number of bytes fetched per request for topic {topic}', metric_tags), Avg()) bytes_fetched.add(self.metrics.metric_name('fetch-size-max', self.group_name, - 'The maximum number of bytes fetched per request for topic %s' % (topic,), + f'The maximum number of bytes fetched per request for topic {topic}', metric_tags), Max()) bytes_fetched.add(self.metrics.metric_name('bytes-consumed-rate', self.group_name, - 'The average number of bytes consumed per second for topic %s' % (topic,), + f'The average number of bytes consumed per second for topic {topic}', metric_tags), Rate()) bytes_fetched.record(num_bytes) @@ -1006,10 +1005,10 @@ def record_topic_fetch_metrics(self, topic, num_bytes, num_records): records_fetched = self.metrics.sensor(name) records_fetched.add(self.metrics.metric_name('records-per-request-avg', self.group_name, - 'The average number of records in each request for topic %s' % (topic,), + f'The average number of records in each request for topic {topic}', metric_tags), Avg()) records_fetched.add(self.metrics.metric_name('records-consumed-rate', self.group_name, - 'The average number of records consumed per second for topic %s' % (topic,), + f'The average number of records consumed per second for topic {topic}', metric_tags), Rate()) records_fetched.record(num_records) diff --git a/kafka/consumer/group.py b/kafka/consumer/group.py index 26408c3a5..53800a1cc 100644 --- a/kafka/consumer/group.py +++ b/kafka/consumer/group.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division - import copy import logging import socket @@ -23,7 +21,7 @@ log = logging.getLogger(__name__) -class KafkaConsumer(six.Iterator): +class KafkaConsumer: """Consume records from a Kafka cluster. The consumer will transparently handle the failure of servers in the Kafka @@ -244,6 +242,7 @@ class KafkaConsumer(six.Iterator): sasl mechanism handshake. Default: one of bootstrap servers sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider instance. (See kafka.oauth.abstract). Default: None + kafka_client (callable): Custom class / callable for creating KafkaClient instances Note: Configuration parameters are described in more detail at @@ -306,6 +305,7 @@ class KafkaConsumer(six.Iterator): 'sasl_kerberos_domain_name': None, 'sasl_oauth_token_provider': None, 'legacy_iterator': False, # enable to revert to < 1.4.7 iterator + 'kafka_client': KafkaClient, } DEFAULT_SESSION_TIMEOUT_MS_0_9 = 30000 @@ -313,7 +313,7 @@ def __init__(self, *topics, **configs): # Only check for extra config keys in top-level class extra_configs = set(configs).difference(self.DEFAULT_CONFIG) if extra_configs: - raise KafkaConfigurationError("Unrecognized configs: %s" % (extra_configs,)) + raise KafkaConfigurationError(f"Unrecognized configs: {extra_configs}") self.config = copy.copy(self.DEFAULT_CONFIG) self.config.update(configs) @@ -353,7 +353,7 @@ def __init__(self, *topics, **configs): log.warning('use api_version=%s [tuple] -- "%s" as str is deprecated', str(self.config['api_version']), str_version) - self._client = KafkaClient(metrics=self._metrics, **self.config) + self._client = self.config['kafka_client'](metrics=self._metrics, **self.config) # Get auto-discovered version from client if necessary if self.config['api_version'] is None: @@ -651,7 +651,7 @@ def poll(self, timeout_ms=0, max_records=None, update_offsets=True): # Poll for new data until the timeout expires start = time.time() remaining = timeout_ms - while True: + while not self._closed: records = self._poll_once(remaining, max_records, update_offsets=update_offsets) if records: return records @@ -660,7 +660,9 @@ def poll(self, timeout_ms=0, max_records=None, update_offsets=True): remaining = timeout_ms - elapsed_ms if remaining <= 0: - return {} + break + + return {} def _poll_once(self, timeout_ms, max_records, update_offsets=True): """Do one round of polling. In addition to checking for new data, this does @@ -964,7 +966,7 @@ def metrics(self, raw=False): return self._metrics.metrics.copy() metrics = {} - for k, v in six.iteritems(self._metrics.metrics.copy()): + for k, v in self._metrics.metrics.copy().items(): if k.group not in metrics: metrics[k.group] = {} if k.name not in metrics[k.group]: @@ -1009,7 +1011,7 @@ def offsets_for_times(self, timestamps): raise UnsupportedVersionError( "offsets_for_times API not supported for cluster version {}" .format(self.config['api_version'])) - for tp, ts in six.iteritems(timestamps): + for tp, ts in timestamps.items(): timestamps[tp] = int(ts) if ts < 0: raise ValueError( @@ -1114,7 +1116,7 @@ def _update_fetch_positions(self, partitions): def _message_generator_v2(self): timeout_ms = 1000 * (self._consumer_timeout - time.time()) record_map = self.poll(timeout_ms=timeout_ms, update_offsets=False) - for tp, records in six.iteritems(record_map): + for tp, records in record_map.items(): # Generators are stateful, and it is possible that the tp / records # here may become stale during iteration -- i.e., we seek to a # different offset, pause consumption, or lose assignment. diff --git a/kafka/consumer/subscription_state.py b/kafka/consumer/subscription_state.py index 08842d133..31102b8bc 100644 --- a/kafka/consumer/subscription_state.py +++ b/kafka/consumer/subscription_state.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import abc import logging import re @@ -13,7 +11,7 @@ log = logging.getLogger(__name__) -class SubscriptionState(object): +class SubscriptionState: """ A class for tracking the topics, partitions, and offsets for the consumer. A partition is "assigned" either directly with assign_from_user() (manual @@ -130,16 +128,16 @@ def _ensure_valid_topic_name(self, topic): # https://github.com/apache/kafka/blob/39eb31feaeebfb184d98cc5d94da9148c2319d81/clients/src/main/java/org/apache/kafka/common/internals/Topic.java if topic is None: raise TypeError('All topics must not be None') - if not isinstance(topic, six.string_types): + if not isinstance(topic, str): raise TypeError('All topics must be strings') if len(topic) == 0: raise ValueError('All topics must be non-empty strings') if topic == '.' or topic == '..': raise ValueError('Topic name cannot be "." or ".."') if len(topic) > self._MAX_NAME_LENGTH: - raise ValueError('Topic name is illegal, it can\'t be longer than {0} characters, topic: "{1}"'.format(self._MAX_NAME_LENGTH, topic)) + raise ValueError(f'Topic name is illegal, it can\'t be longer than {self._MAX_NAME_LENGTH} characters, topic: "{topic}"') if not self._TOPIC_LEGAL_CHARS.match(topic): - raise ValueError('Topic name "{0}" is illegal, it contains a character other than ASCII alphanumerics, ".", "_" and "-"'.format(topic)) + raise ValueError(f'Topic name "{topic}" is illegal, it contains a character other than ASCII alphanumerics, ".", "_" and "-"') def change_subscription(self, topics): """Change the topic subscription. @@ -157,7 +155,7 @@ def change_subscription(self, topics): if self._user_assignment: raise IllegalStateError(self._SUBSCRIPTION_EXCEPTION_MESSAGE) - if isinstance(topics, six.string_types): + if isinstance(topics, str): topics = [topics] if self.subscription == set(topics): @@ -247,7 +245,7 @@ def assign_from_subscribed(self, assignments): for tp in assignments: if tp.topic not in self.subscription: - raise ValueError("Assigned partition %s for non-subscribed topic." % (tp,)) + raise ValueError(f"Assigned partition {tp} for non-subscribed topic.") # after rebalancing, we always reinitialize the assignment state self.assignment.clear() @@ -299,13 +297,13 @@ def assigned_partitions(self): def paused_partitions(self): """Return current set of paused TopicPartitions.""" - return set(partition for partition in self.assignment - if self.is_paused(partition)) + return {partition for partition in self.assignment + if self.is_paused(partition)} def fetchable_partitions(self): """Return set of TopicPartitions that should be Fetched.""" fetchable = set() - for partition, state in six.iteritems(self.assignment): + for partition, state in self.assignment.items(): if state.is_fetchable(): fetchable.add(partition) return fetchable @@ -317,7 +315,7 @@ def partitions_auto_assigned(self): def all_consumed_offsets(self): """Returns consumed offsets as {TopicPartition: OffsetAndMetadata}""" all_consumed = {} - for partition, state in six.iteritems(self.assignment): + for partition, state in self.assignment.items(): if state.has_valid_position: all_consumed[partition] = OffsetAndMetadata(state.position, '') return all_consumed @@ -348,7 +346,7 @@ def has_all_fetch_positions(self): def missing_fetch_positions(self): missing = set() - for partition, state in six.iteritems(self.assignment): + for partition, state in self.assignment.items(): if not state.has_valid_position: missing.add(partition) return missing @@ -372,7 +370,7 @@ def _add_assigned_partition(self, partition): self.assignment[partition] = TopicPartitionState() -class TopicPartitionState(object): +class TopicPartitionState: def __init__(self): self.committed = None # last committed OffsetAndMetadata self.has_valid_position = False # whether we have valid position @@ -420,7 +418,7 @@ def is_fetchable(self): return not self.paused and self.has_valid_position -class ConsumerRebalanceListener(object): +class ConsumerRebalanceListener: """ A callback interface that the user can implement to trigger custom actions when the set of partitions assigned to the consumer changes. diff --git a/kafka/coordinator/assignors/sticky/__init__.py b/kafka/coordinator/assignors/sticky/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kafka/coordinator/assignors/sticky/partition_movements.py b/kafka/coordinator/assignors/sticky/partition_movements.py new file mode 100644 index 000000000..8851e4cda --- /dev/null +++ b/kafka/coordinator/assignors/sticky/partition_movements.py @@ -0,0 +1,149 @@ +import logging +from collections import defaultdict, namedtuple +from copy import deepcopy + +from kafka.vendor import six + +log = logging.getLogger(__name__) + + +ConsumerPair = namedtuple("ConsumerPair", ["src_member_id", "dst_member_id"]) +""" +Represents a pair of Kafka consumer ids involved in a partition reassignment. +Each ConsumerPair corresponds to a particular partition or topic, indicates that the particular partition or some +partition of the particular topic was moved from the source consumer to the destination consumer +during the rebalance. This class helps in determining whether a partition reassignment results in cycles among +the generated graph of consumer pairs. +""" + + +def is_sublist(source, target): + """Checks if one list is a sublist of another. + + Arguments: + source: the list in which to search for the occurrence of target. + target: the list to search for as a sublist of source + + Returns: + true if target is in source; false otherwise + """ + for index in (i for i, e in enumerate(source) if e == target[0]): + if tuple(source[index: index + len(target)]) == target: + return True + return False + + +class PartitionMovements: + """ + This class maintains some data structures to simplify lookup of partition movements among consumers. + At each point of time during a partition rebalance it keeps track of partition movements + corresponding to each topic, and also possible movement (in form a ConsumerPair object) for each partition. + """ + + def __init__(self): + self.partition_movements_by_topic = defaultdict( + lambda: defaultdict(set) + ) + self.partition_movements = {} + + def move_partition(self, partition, old_consumer, new_consumer): + pair = ConsumerPair(src_member_id=old_consumer, dst_member_id=new_consumer) + if partition in self.partition_movements: + # this partition has previously moved + existing_pair = self._remove_movement_record_of_partition(partition) + assert existing_pair.dst_member_id == old_consumer + if existing_pair.src_member_id != new_consumer: + # the partition is not moving back to its previous consumer + self._add_partition_movement_record( + partition, ConsumerPair(src_member_id=existing_pair.src_member_id, dst_member_id=new_consumer) + ) + else: + self._add_partition_movement_record(partition, pair) + + def get_partition_to_be_moved(self, partition, old_consumer, new_consumer): + if partition.topic not in self.partition_movements_by_topic: + return partition + if partition in self.partition_movements: + # this partition has previously moved + assert old_consumer == self.partition_movements[partition].dst_member_id + old_consumer = self.partition_movements[partition].src_member_id + reverse_pair = ConsumerPair(src_member_id=new_consumer, dst_member_id=old_consumer) + if reverse_pair not in self.partition_movements_by_topic[partition.topic]: + return partition + + return next(iter(self.partition_movements_by_topic[partition.topic][reverse_pair])) + + def are_sticky(self): + for topic, movements in six.iteritems(self.partition_movements_by_topic): + movement_pairs = set(movements.keys()) + if self._has_cycles(movement_pairs): + log.error( + "Stickiness is violated for topic {}\n" + "Partition movements for this topic occurred among the following consumer pairs:\n" + "{}".format(topic, movement_pairs) + ) + return False + return True + + def _remove_movement_record_of_partition(self, partition): + pair = self.partition_movements[partition] + del self.partition_movements[partition] + + self.partition_movements_by_topic[partition.topic][pair].remove(partition) + if not self.partition_movements_by_topic[partition.topic][pair]: + del self.partition_movements_by_topic[partition.topic][pair] + if not self.partition_movements_by_topic[partition.topic]: + del self.partition_movements_by_topic[partition.topic] + + return pair + + def _add_partition_movement_record(self, partition, pair): + self.partition_movements[partition] = pair + self.partition_movements_by_topic[partition.topic][pair].add(partition) + + def _has_cycles(self, consumer_pairs): + cycles = set() + for pair in consumer_pairs: + reduced_pairs = deepcopy(consumer_pairs) + reduced_pairs.remove(pair) + path = [pair.src_member_id] + if self._is_linked(pair.dst_member_id, pair.src_member_id, reduced_pairs, path) and not self._is_subcycle( + path, cycles + ): + cycles.add(tuple(path)) + log.error("A cycle of length {} was found: {}".format(len(path) - 1, path)) + + # for now we want to make sure there is no partition movements of the same topic between a pair of consumers. + # the odds of finding a cycle among more than two consumers seem to be very low (according to various randomized + # tests with the given sticky algorithm) that it should not worth the added complexity of handling those cases. + for cycle in cycles: + if len(cycle) == 3: # indicates a cycle of length 2 + return True + return False + + @staticmethod + def _is_subcycle(cycle, cycles): + super_cycle = deepcopy(cycle) + super_cycle = super_cycle[:-1] + super_cycle.extend(cycle) + for found_cycle in cycles: + if len(found_cycle) == len(cycle) and is_sublist(super_cycle, found_cycle): + return True + return False + + def _is_linked(self, src, dst, pairs, current_path): + if src == dst: + return False + if not pairs: + return False + if ConsumerPair(src, dst) in pairs: + current_path.append(src) + current_path.append(dst) + return True + for pair in pairs: + if pair.src_member_id == src: + reduced_set = deepcopy(pairs) + reduced_set.remove(pair) + current_path.append(pair.src_member_id) + return self._is_linked(pair.dst_member_id, dst, reduced_set, current_path) + return False diff --git a/kafka/coordinator/assignors/sticky/sorted_set.py b/kafka/coordinator/assignors/sticky/sorted_set.py new file mode 100644 index 000000000..6a454a42d --- /dev/null +++ b/kafka/coordinator/assignors/sticky/sorted_set.py @@ -0,0 +1,63 @@ +class SortedSet: + def __init__(self, iterable=None, key=None): + self._key = key if key is not None else lambda x: x + self._set = set(iterable) if iterable is not None else set() + + self._cached_last = None + self._cached_first = None + + def first(self): + if self._cached_first is not None: + return self._cached_first + + first = None + for element in self._set: + if first is None or self._key(first) > self._key(element): + first = element + self._cached_first = first + return first + + def last(self): + if self._cached_last is not None: + return self._cached_last + + last = None + for element in self._set: + if last is None or self._key(last) < self._key(element): + last = element + self._cached_last = last + return last + + def pop_last(self): + value = self.last() + self._set.remove(value) + self._cached_last = None + return value + + def add(self, value): + if self._cached_last is not None and self._key(value) > self._key(self._cached_last): + self._cached_last = value + if self._cached_first is not None and self._key(value) < self._key(self._cached_first): + self._cached_first = value + + return self._set.add(value) + + def remove(self, value): + if self._cached_last is not None and self._cached_last == value: + self._cached_last = None + if self._cached_first is not None and self._cached_first == value: + self._cached_first = None + + return self._set.remove(value) + + def __contains__(self, value): + return value in self._set + + def __iter__(self): + return iter(sorted(self._set, key=self._key)) + + def _bool(self): + return len(self._set) != 0 + + __nonzero__ = _bool + __bool__ = _bool diff --git a/kafka/coordinator/assignors/sticky/sticky_assignor.py b/kafka/coordinator/assignors/sticky/sticky_assignor.py new file mode 100644 index 000000000..dce714f1a --- /dev/null +++ b/kafka/coordinator/assignors/sticky/sticky_assignor.py @@ -0,0 +1,685 @@ +import logging +from collections import defaultdict, namedtuple +from copy import deepcopy + +from kafka.cluster import ClusterMetadata +from kafka.coordinator.assignors.abstract import AbstractPartitionAssignor +from kafka.coordinator.assignors.sticky.partition_movements import PartitionMovements +from kafka.coordinator.assignors.sticky.sorted_set import SortedSet +from kafka.coordinator.protocol import ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment +from kafka.coordinator.protocol import Schema +from kafka.protocol.struct import Struct +from kafka.protocol.types import String, Array, Int32 +from kafka.structs import TopicPartition +from kafka.vendor import six + +log = logging.getLogger(__name__) + +ConsumerGenerationPair = namedtuple("ConsumerGenerationPair", ["consumer", "generation"]) + + +def has_identical_list_elements(list_): + """Checks if all lists in the collection have the same members + + Arguments: + list_: collection of lists + + Returns: + true if all lists in the collection have the same members; false otherwise + """ + if not list_: + return True + for i in range(1, len(list_)): + if list_[i] != list_[i - 1]: + return False + return True + + +def subscriptions_comparator_key(element): + return len(element[1]), element[0] + + +def partitions_comparator_key(element): + return len(element[1]), element[0].topic, element[0].partition + + +def remove_if_present(collection, element): + try: + collection.remove(element) + except (ValueError, KeyError): + pass + + +StickyAssignorMemberMetadataV1 = namedtuple("StickyAssignorMemberMetadataV1", + ["subscription", "partitions", "generation"]) + + +class StickyAssignorUserDataV1(Struct): + """ + Used for preserving consumer's previously assigned partitions + list and sending it as user data to the leader during a rebalance + """ + + SCHEMA = Schema( + ("previous_assignment", Array(("topic", String("utf-8")), ("partitions", Array(Int32)))), ("generation", Int32) + ) + + +class StickyAssignmentExecutor: + def __init__(self, cluster, members): + self.members = members + # a mapping between consumers and their assigned partitions that is updated during assignment procedure + self.current_assignment = defaultdict(list) + # an assignment from a previous generation + self.previous_assignment = {} + # a mapping between partitions and their assigned consumers + self.current_partition_consumer = {} + # a flag indicating that there were no previous assignments performed ever + self.is_fresh_assignment = False + # a mapping of all topic partitions to all consumers that can be assigned to them + self.partition_to_all_potential_consumers = {} + # a mapping of all consumers to all potential topic partitions that can be assigned to them + self.consumer_to_all_potential_partitions = {} + # an ascending sorted set of consumers based on how many topic partitions are already assigned to them + self.sorted_current_subscriptions = SortedSet() + # an ascending sorted list of topic partitions based on how many consumers can potentially use them + self.sorted_partitions = [] + # all partitions that need to be assigned + self.unassigned_partitions = [] + # a flag indicating that a certain partition cannot remain assigned to its current consumer because the consumer + # is no longer subscribed to its topic + self.revocation_required = False + + self.partition_movements = PartitionMovements() + self._initialize(cluster) + + def perform_initial_assignment(self): + self._populate_sorted_partitions() + self._populate_partitions_to_reassign() + + def balance(self): + self._initialize_current_subscriptions() + initializing = len(self.current_assignment[self._get_consumer_with_most_subscriptions()]) == 0 + + # assign all unassigned partitions + for partition in self.unassigned_partitions: + # skip if there is no potential consumer for the partition + if not self.partition_to_all_potential_consumers[partition]: + continue + self._assign_partition(partition) + + # narrow down the reassignment scope to only those partitions that can actually be reassigned + fixed_partitions = set() + for partition in six.iterkeys(self.partition_to_all_potential_consumers): + if not self._can_partition_participate_in_reassignment(partition): + fixed_partitions.add(partition) + for fixed_partition in fixed_partitions: + remove_if_present(self.sorted_partitions, fixed_partition) + remove_if_present(self.unassigned_partitions, fixed_partition) + + # narrow down the reassignment scope to only those consumers that are subject to reassignment + fixed_assignments = {} + for consumer in six.iterkeys(self.consumer_to_all_potential_partitions): + if not self._can_consumer_participate_in_reassignment(consumer): + self._remove_consumer_from_current_subscriptions_and_maintain_order(consumer) + fixed_assignments[consumer] = self.current_assignment[consumer] + del self.current_assignment[consumer] + + # create a deep copy of the current assignment so we can revert to it + # if we do not get a more balanced assignment later + prebalance_assignment = deepcopy(self.current_assignment) + prebalance_partition_consumers = deepcopy(self.current_partition_consumer) + + # if we don't already need to revoke something due to subscription changes, + # first try to balance by only moving newly added partitions + if not self.revocation_required: + self._perform_reassignments(self.unassigned_partitions) + reassignment_performed = self._perform_reassignments(self.sorted_partitions) + + # if we are not preserving existing assignments and we have made changes to the current assignment + # make sure we are getting a more balanced assignment; otherwise, revert to previous assignment + if ( + not initializing + and reassignment_performed + and self._get_balance_score(self.current_assignment) >= self._get_balance_score(prebalance_assignment) + ): + self.current_assignment = prebalance_assignment + self.current_partition_consumer.clear() + self.current_partition_consumer.update(prebalance_partition_consumers) + + # add the fixed assignments (those that could not change) back + for consumer, partitions in six.iteritems(fixed_assignments): + self.current_assignment[consumer] = partitions + self._add_consumer_to_current_subscriptions_and_maintain_order(consumer) + + def get_final_assignment(self, member_id): + assignment = defaultdict(list) + for topic_partition in self.current_assignment[member_id]: + assignment[topic_partition.topic].append(topic_partition.partition) + assignment = {k: sorted(v) for k, v in six.iteritems(assignment)} + return six.viewitems(assignment) + + def _initialize(self, cluster): + self._init_current_assignments(self.members) + + for topic in cluster.topics(): + partitions = cluster.partitions_for_topic(topic) + if partitions is None: + log.warning("No partition metadata for topic %s", topic) + continue + for p in partitions: + partition = TopicPartition(topic=topic, partition=p) + self.partition_to_all_potential_consumers[partition] = [] + for consumer_id, member_metadata in six.iteritems(self.members): + self.consumer_to_all_potential_partitions[consumer_id] = [] + for topic in member_metadata.subscription: + if cluster.partitions_for_topic(topic) is None: + log.warning("No partition metadata for topic {}".format(topic)) + continue + for p in cluster.partitions_for_topic(topic): + partition = TopicPartition(topic=topic, partition=p) + self.consumer_to_all_potential_partitions[consumer_id].append(partition) + self.partition_to_all_potential_consumers[partition].append(consumer_id) + if consumer_id not in self.current_assignment: + self.current_assignment[consumer_id] = [] + + def _init_current_assignments(self, members): + # we need to process subscriptions' user data with each consumer's reported generation in mind + # higher generations overwrite lower generations in case of a conflict + # note that a conflict could exists only if user data is for different generations + + # for each partition we create a map of its consumers by generation + sorted_partition_consumers_by_generation = {} + for consumer, member_metadata in six.iteritems(members): + for partitions in member_metadata.partitions: + if partitions in sorted_partition_consumers_by_generation: + consumers = sorted_partition_consumers_by_generation[partitions] + if member_metadata.generation and member_metadata.generation in consumers: + # same partition is assigned to two consumers during the same rebalance. + # log a warning and skip this record + log.warning( + "Partition {} is assigned to multiple consumers " + "following sticky assignment generation {}.".format(partitions, member_metadata.generation) + ) + else: + consumers[member_metadata.generation] = consumer + else: + sorted_consumers = {member_metadata.generation: consumer} + sorted_partition_consumers_by_generation[partitions] = sorted_consumers + + # previous_assignment holds the prior ConsumerGenerationPair (before current) of each partition + # current and previous consumers are the last two consumers of each partition in the above sorted map + for partitions, consumers in six.iteritems(sorted_partition_consumers_by_generation): + generations = sorted(consumers.keys(), reverse=True) + self.current_assignment[consumers[generations[0]]].append(partitions) + # now update previous assignment if any + if len(generations) > 1: + self.previous_assignment[partitions] = ConsumerGenerationPair( + consumer=consumers[generations[1]], generation=generations[1] + ) + + self.is_fresh_assignment = len(self.current_assignment) == 0 + + for consumer_id, partitions in six.iteritems(self.current_assignment): + for partition in partitions: + self.current_partition_consumer[partition] = consumer_id + + def _are_subscriptions_identical(self): + """ + Returns: + true, if both potential consumers of partitions and potential partitions that consumers can + consume are the same + """ + if not has_identical_list_elements(list(six.itervalues(self.partition_to_all_potential_consumers))): + return False + return has_identical_list_elements(list(six.itervalues(self.consumer_to_all_potential_partitions))) + + def _populate_sorted_partitions(self): + # set of topic partitions with their respective potential consumers + all_partitions = set((tp, tuple(consumers)) + for tp, consumers in six.iteritems(self.partition_to_all_potential_consumers)) + partitions_sorted_by_num_of_potential_consumers = sorted(all_partitions, key=partitions_comparator_key) + + self.sorted_partitions = [] + if not self.is_fresh_assignment and self._are_subscriptions_identical(): + # if this is a reassignment and the subscriptions are identical (all consumers can consumer from all topics) + # then we just need to simply list partitions in a round robin fashion (from consumers with + # most assigned partitions to those with least) + assignments = deepcopy(self.current_assignment) + for consumer_id, partitions in six.iteritems(assignments): + to_remove = [] + for partition in partitions: + if partition not in self.partition_to_all_potential_consumers: + to_remove.append(partition) + for partition in to_remove: + partitions.remove(partition) + + sorted_consumers = SortedSet( + iterable=[(consumer, tuple(partitions)) for consumer, partitions in six.iteritems(assignments)], + key=subscriptions_comparator_key, + ) + # at this point, sorted_consumers contains an ascending-sorted list of consumers based on + # how many valid partitions are currently assigned to them + while sorted_consumers: + # take the consumer with the most partitions + consumer, _ = sorted_consumers.pop_last() + # currently assigned partitions to this consumer + remaining_partitions = assignments[consumer] + # from partitions that had a different consumer before, + # keep only those that are assigned to this consumer now + previous_partitions = set(six.iterkeys(self.previous_assignment)).intersection(set(remaining_partitions)) + if previous_partitions: + # if there is a partition of this consumer that was assigned to another consumer before + # mark it as good options for reassignment + partition = previous_partitions.pop() + remaining_partitions.remove(partition) + self.sorted_partitions.append(partition) + sorted_consumers.add((consumer, tuple(assignments[consumer]))) + elif remaining_partitions: + # otherwise, mark any other one of the current partitions as a reassignment candidate + self.sorted_partitions.append(remaining_partitions.pop()) + sorted_consumers.add((consumer, tuple(assignments[consumer]))) + + while partitions_sorted_by_num_of_potential_consumers: + partition = partitions_sorted_by_num_of_potential_consumers.pop(0)[0] + if partition not in self.sorted_partitions: + self.sorted_partitions.append(partition) + else: + while partitions_sorted_by_num_of_potential_consumers: + self.sorted_partitions.append(partitions_sorted_by_num_of_potential_consumers.pop(0)[0]) + + def _populate_partitions_to_reassign(self): + self.unassigned_partitions = deepcopy(self.sorted_partitions) + + assignments_to_remove = [] + for consumer_id, partitions in six.iteritems(self.current_assignment): + if consumer_id not in self.members: + # if a consumer that existed before (and had some partition assignments) is now removed, + # remove it from current_assignment + for partition in partitions: + del self.current_partition_consumer[partition] + assignments_to_remove.append(consumer_id) + else: + # otherwise (the consumer still exists) + partitions_to_remove = [] + for partition in partitions: + if partition not in self.partition_to_all_potential_consumers: + # if this topic partition of this consumer no longer exists + # remove it from current_assignment of the consumer + partitions_to_remove.append(partition) + elif partition.topic not in self.members[consumer_id].subscription: + # if this partition cannot remain assigned to its current consumer because the consumer + # is no longer subscribed to its topic remove it from current_assignment of the consumer + partitions_to_remove.append(partition) + self.revocation_required = True + else: + # otherwise, remove the topic partition from those that need to be assigned only if + # its current consumer is still subscribed to its topic (because it is already assigned + # and we would want to preserve that assignment as much as possible) + self.unassigned_partitions.remove(partition) + for partition in partitions_to_remove: + self.current_assignment[consumer_id].remove(partition) + del self.current_partition_consumer[partition] + for consumer_id in assignments_to_remove: + del self.current_assignment[consumer_id] + + def _initialize_current_subscriptions(self): + self.sorted_current_subscriptions = SortedSet( + iterable=[(consumer, tuple(partitions)) for consumer, partitions in six.iteritems(self.current_assignment)], + key=subscriptions_comparator_key, + ) + + def _get_consumer_with_least_subscriptions(self): + return self.sorted_current_subscriptions.first()[0] + + def _get_consumer_with_most_subscriptions(self): + return self.sorted_current_subscriptions.last()[0] + + def _remove_consumer_from_current_subscriptions_and_maintain_order(self, consumer): + self.sorted_current_subscriptions.remove((consumer, tuple(self.current_assignment[consumer]))) + + def _add_consumer_to_current_subscriptions_and_maintain_order(self, consumer): + self.sorted_current_subscriptions.add((consumer, tuple(self.current_assignment[consumer]))) + + def _is_balanced(self): + """Determines if the current assignment is a balanced one""" + if ( + len(self.current_assignment[self._get_consumer_with_least_subscriptions()]) + >= len(self.current_assignment[self._get_consumer_with_most_subscriptions()]) - 1 + ): + # if minimum and maximum numbers of partitions assigned to consumers differ by at most one return true + return True + + # create a mapping from partitions to the consumer assigned to them + all_assigned_partitions = {} + for consumer_id, consumer_partitions in six.iteritems(self.current_assignment): + for partition in consumer_partitions: + if partition in all_assigned_partitions: + log.error("{} is assigned to more than one consumer.".format(partition)) + all_assigned_partitions[partition] = consumer_id + + # for each consumer that does not have all the topic partitions it can get + # make sure none of the topic partitions it could but did not get cannot be moved to it + # (because that would break the balance) + for consumer, _ in self.sorted_current_subscriptions: + consumer_partition_count = len(self.current_assignment[consumer]) + # skip if this consumer already has all the topic partitions it can get + if consumer_partition_count == len(self.consumer_to_all_potential_partitions[consumer]): + continue + + # otherwise make sure it cannot get any more + for partition in self.consumer_to_all_potential_partitions[consumer]: + if partition not in self.current_assignment[consumer]: + other_consumer = all_assigned_partitions[partition] + other_consumer_partition_count = len(self.current_assignment[other_consumer]) + if consumer_partition_count < other_consumer_partition_count: + return False + return True + + def _assign_partition(self, partition): + for consumer, _ in self.sorted_current_subscriptions: + if partition in self.consumer_to_all_potential_partitions[consumer]: + self._remove_consumer_from_current_subscriptions_and_maintain_order(consumer) + self.current_assignment[consumer].append(partition) + self.current_partition_consumer[partition] = consumer + self._add_consumer_to_current_subscriptions_and_maintain_order(consumer) + break + + def _can_partition_participate_in_reassignment(self, partition): + return len(self.partition_to_all_potential_consumers[partition]) >= 2 + + def _can_consumer_participate_in_reassignment(self, consumer): + current_partitions = self.current_assignment[consumer] + current_assignment_size = len(current_partitions) + max_assignment_size = len(self.consumer_to_all_potential_partitions[consumer]) + if current_assignment_size > max_assignment_size: + log.error("The consumer {} is assigned more partitions than the maximum possible.".format(consumer)) + if current_assignment_size < max_assignment_size: + # if a consumer is not assigned all its potential partitions it is subject to reassignment + return True + for partition in current_partitions: + # if any of the partitions assigned to a consumer is subject to reassignment the consumer itself + # is subject to reassignment + if self._can_partition_participate_in_reassignment(partition): + return True + return False + + def _perform_reassignments(self, reassignable_partitions): + reassignment_performed = False + + # repeat reassignment until no partition can be moved to improve the balance + while True: + modified = False + # reassign all reassignable partitions until the full list is processed or a balance is achieved + # (starting from the partition with least potential consumers and if needed) + for partition in reassignable_partitions: + if self._is_balanced(): + break + # the partition must have at least two potential consumers + if len(self.partition_to_all_potential_consumers[partition]) <= 1: + log.error("Expected more than one potential consumer for partition {}".format(partition)) + # the partition must have a current consumer + consumer = self.current_partition_consumer.get(partition) + if consumer is None: + log.error("Expected partition {} to be assigned to a consumer".format(partition)) + + if ( + partition in self.previous_assignment + and len(self.current_assignment[consumer]) + > len(self.current_assignment[self.previous_assignment[partition].consumer]) + 1 + ): + self._reassign_partition_to_consumer( + partition, self.previous_assignment[partition].consumer, + ) + reassignment_performed = True + modified = True + continue + + # check if a better-suited consumer exist for the partition; if so, reassign it + for other_consumer in self.partition_to_all_potential_consumers[partition]: + if len(self.current_assignment[consumer]) > len(self.current_assignment[other_consumer]) + 1: + self._reassign_partition(partition) + reassignment_performed = True + modified = True + break + + if not modified: + break + return reassignment_performed + + def _reassign_partition(self, partition): + new_consumer = None + for another_consumer, _ in self.sorted_current_subscriptions: + if partition in self.consumer_to_all_potential_partitions[another_consumer]: + new_consumer = another_consumer + break + assert new_consumer is not None + self._reassign_partition_to_consumer(partition, new_consumer) + + def _reassign_partition_to_consumer(self, partition, new_consumer): + consumer = self.current_partition_consumer[partition] + # find the correct partition movement considering the stickiness requirement + partition_to_be_moved = self.partition_movements.get_partition_to_be_moved(partition, consumer, new_consumer) + self._move_partition(partition_to_be_moved, new_consumer) + + def _move_partition(self, partition, new_consumer): + old_consumer = self.current_partition_consumer[partition] + self._remove_consumer_from_current_subscriptions_and_maintain_order(old_consumer) + self._remove_consumer_from_current_subscriptions_and_maintain_order(new_consumer) + + self.partition_movements.move_partition(partition, old_consumer, new_consumer) + + self.current_assignment[old_consumer].remove(partition) + self.current_assignment[new_consumer].append(partition) + self.current_partition_consumer[partition] = new_consumer + + self._add_consumer_to_current_subscriptions_and_maintain_order(new_consumer) + self._add_consumer_to_current_subscriptions_and_maintain_order(old_consumer) + + @staticmethod + def _get_balance_score(assignment): + """Calculates a balance score of a give assignment + as the sum of assigned partitions size difference of all consumer pairs. + A perfectly balanced assignment (with all consumers getting the same number of partitions) + has a balance score of 0. Lower balance score indicates a more balanced assignment. + + Arguments: + assignment (dict): {consumer: list of assigned topic partitions} + + Returns: + the balance score of the assignment + """ + score = 0 + consumer_to_assignment = {} + for consumer_id, partitions in six.iteritems(assignment): + consumer_to_assignment[consumer_id] = len(partitions) + + consumers_to_explore = set(consumer_to_assignment.keys()) + for consumer_id in consumer_to_assignment.keys(): + if consumer_id in consumers_to_explore: + consumers_to_explore.remove(consumer_id) + for other_consumer_id in consumers_to_explore: + score += abs(consumer_to_assignment[consumer_id] - consumer_to_assignment[other_consumer_id]) + return score + + +class StickyPartitionAssignor(AbstractPartitionAssignor): + """ + https://cwiki.apache.org/confluence/display/KAFKA/KIP-54+-+Sticky+Partition+Assignment+Strategy + + The sticky assignor serves two purposes. First, it guarantees an assignment that is as balanced as possible, meaning either: + - the numbers of topic partitions assigned to consumers differ by at most one; or + - each consumer that has 2+ fewer topic partitions than some other consumer cannot get any of those topic partitions transferred to it. + + Second, it preserved as many existing assignment as possible when a reassignment occurs. + This helps in saving some of the overhead processing when topic partitions move from one consumer to another. + + Starting fresh it would work by distributing the partitions over consumers as evenly as possible. + Even though this may sound similar to how round robin assignor works, the second example below shows that it is not. + During a reassignment it would perform the reassignment in such a way that in the new assignment + - topic partitions are still distributed as evenly as possible, and + - topic partitions stay with their previously assigned consumers as much as possible. + + The first goal above takes precedence over the second one. + + Example 1. + Suppose there are three consumers C0, C1, C2, + four topics t0, t1, t2, t3, and each topic has 2 partitions, + resulting in partitions t0p0, t0p1, t1p0, t1p1, t2p0, t2p1, t3p0, t3p1. + Each consumer is subscribed to all three topics. + + The assignment with both sticky and round robin assignors will be: + - C0: [t0p0, t1p1, t3p0] + - C1: [t0p1, t2p0, t3p1] + - C2: [t1p0, t2p1] + + Now, let's assume C1 is removed and a reassignment is about to happen. The round robin assignor would produce: + - C0: [t0p0, t1p0, t2p0, t3p0] + - C2: [t0p1, t1p1, t2p1, t3p1] + + while the sticky assignor would result in: + - C0 [t0p0, t1p1, t3p0, t2p0] + - C2 [t1p0, t2p1, t0p1, t3p1] + preserving all the previous assignments (unlike the round robin assignor). + + + Example 2. + There are three consumers C0, C1, C2, + and three topics t0, t1, t2, with 1, 2, and 3 partitions respectively. + Therefore, the partitions are t0p0, t1p0, t1p1, t2p0, t2p1, t2p2. + C0 is subscribed to t0; + C1 is subscribed to t0, t1; + and C2 is subscribed to t0, t1, t2. + + The round robin assignor would come up with the following assignment: + - C0 [t0p0] + - C1 [t1p0] + - C2 [t1p1, t2p0, t2p1, t2p2] + + which is not as balanced as the assignment suggested by sticky assignor: + - C0 [t0p0] + - C1 [t1p0, t1p1] + - C2 [t2p0, t2p1, t2p2] + + Now, if consumer C0 is removed, these two assignors would produce the following assignments. + Round Robin (preserves 3 partition assignments): + - C1 [t0p0, t1p1] + - C2 [t1p0, t2p0, t2p1, t2p2] + + Sticky (preserves 5 partition assignments): + - C1 [t1p0, t1p1, t0p0] + - C2 [t2p0, t2p1, t2p2] + """ + + DEFAULT_GENERATION_ID = -1 + + name = "sticky" + version = 0 + + member_assignment = None + generation = DEFAULT_GENERATION_ID + + _latest_partition_movements = None + + @classmethod + def assign(cls, cluster, members): + """Performs group assignment given cluster metadata and member subscriptions + + Arguments: + cluster (ClusterMetadata): cluster metadata + members (dict of {member_id: MemberMetadata}): decoded metadata for each member in the group. + + Returns: + dict: {member_id: MemberAssignment} + """ + members_metadata = {} + for consumer, member_metadata in six.iteritems(members): + members_metadata[consumer] = cls.parse_member_metadata(member_metadata) + + executor = StickyAssignmentExecutor(cluster, members_metadata) + executor.perform_initial_assignment() + executor.balance() + + cls._latest_partition_movements = executor.partition_movements + + assignment = {} + for member_id in members: + assignment[member_id] = ConsumerProtocolMemberAssignment( + cls.version, sorted(executor.get_final_assignment(member_id)), b'' + ) + return assignment + + @classmethod + def parse_member_metadata(cls, metadata): + """ + Parses member metadata into a python object. + This implementation only serializes and deserializes the StickyAssignorMemberMetadataV1 user data, + since no StickyAssignor written in Python was deployed ever in the wild with version V0, meaning that + there is no need to support backward compatibility with V0. + + Arguments: + metadata (MemberMetadata): decoded metadata for a member of the group. + + Returns: + parsed metadata (StickyAssignorMemberMetadataV1) + """ + user_data = metadata.user_data + if not user_data: + return StickyAssignorMemberMetadataV1( + partitions=[], generation=cls.DEFAULT_GENERATION_ID, subscription=metadata.subscription + ) + + try: + decoded_user_data = StickyAssignorUserDataV1.decode(user_data) + except Exception as e: + # ignore the consumer's previous assignment if it cannot be parsed + log.error("Could not parse member data", e) # pylint: disable=logging-too-many-args + return StickyAssignorMemberMetadataV1( + partitions=[], generation=cls.DEFAULT_GENERATION_ID, subscription=metadata.subscription + ) + + member_partitions = [] + for topic, partitions in decoded_user_data.previous_assignment: # pylint: disable=no-member + member_partitions.extend([TopicPartition(topic, partition) for partition in partitions]) + return StickyAssignorMemberMetadataV1( + # pylint: disable=no-member + partitions=member_partitions, generation=decoded_user_data.generation, subscription=metadata.subscription + ) + + @classmethod + def metadata(cls, topics): + return cls._metadata(topics, cls.member_assignment, cls.generation) + + @classmethod + def _metadata(cls, topics, member_assignment_partitions, generation=-1): + if member_assignment_partitions is None: + log.debug("No member assignment available") + user_data = b'' + else: + log.debug("Member assignment is available, generating the metadata: generation {}".format(cls.generation)) + partitions_by_topic = defaultdict(list) + for topic_partition in member_assignment_partitions: + partitions_by_topic[topic_partition.topic].append(topic_partition.partition) + data = StickyAssignorUserDataV1(six.viewitems(partitions_by_topic), generation) + user_data = data.encode() + return ConsumerProtocolMemberMetadata(cls.version, list(topics), user_data) + + @classmethod + def on_assignment(cls, assignment): + """Callback that runs on each assignment. Updates assignor's state. + + Arguments: + assignment: MemberAssignment + """ + log.debug("On assignment: assignment={}".format(assignment)) + cls.member_assignment = assignment.partitions() + + @classmethod + def on_generation_assignment(cls, generation): + """Callback that runs on each assignment. Updates assignor's generation id. + + Arguments: + generation: generation id + """ + log.debug("On generation assignment: generation={}".format(generation)) + cls.generation = generation diff --git a/kafka/coordinator/base.py b/kafka/coordinator/base.py index cd110ce06..d8f8ed9b0 100644 --- a/kafka/coordinator/base.py +++ b/kafka/coordinator/base.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division - import abc import copy import logging @@ -21,13 +19,13 @@ log = logging.getLogger('kafka.coordinator') -class MemberState(object): +class MemberState: UNJOINED = '' # the client is not part of a group REBALANCING = '' # the client has begun rebalancing STABLE = '' # the client has joined and is sending heartbeats -class Generation(object): +class Generation: def __init__(self, generation_id, member_id, protocol): self.generation_id = generation_id self.member_id = member_id @@ -43,7 +41,7 @@ class UnjoinedGroupException(Errors.KafkaError): retriable = True -class BaseCoordinator(object): +class BaseCoordinator: """ BaseCoordinator implements group management for a single group member by interacting with a designated Kafka broker (the coordinator). Group @@ -242,7 +240,7 @@ def ensure_coordinator_ready(self): """Block until the coordinator for this group is known (and we have an active connection -- java client uses unsent queue). """ - with self._lock: + with self._client._lock, self._lock: while self.coordinator_unknown(): # Prior to 0.8.2 there was no group coordinator @@ -345,7 +343,7 @@ def _handle_join_failure(self, _): def ensure_active_group(self): """Ensure that the group is active (i.e. joined and synced)""" - with self._lock: + with self._client._lock, self._lock: if self._heartbeat_thread is None: self._start_heartbeat_thread() @@ -597,7 +595,7 @@ def _on_join_leader(self, response): self._generation.member_id, [(member_id, assignment if isinstance(assignment, bytes) else assignment.encode()) - for member_id, assignment in six.iteritems(group_assignment)]) + for member_id, assignment in group_assignment.items()]) log.debug("Sending leader SyncGroup for group %s to coordinator %s: %s", self.group_id, self.coordinator_id, request) @@ -763,7 +761,7 @@ def close(self): def maybe_leave_group(self): """Leave the current group and reset local generation/memberId.""" - with self._lock: + with self._client._lock, self._lock: if (not self.coordinator_unknown() and self.state is not MemberState.UNJOINED and self._generation is not Generation.NO_GENERATION): @@ -850,7 +848,7 @@ def _handle_heartbeat_response(self, future, send_time, response): future.failure(error) -class GroupCoordinatorMetrics(object): +class GroupCoordinatorMetrics: def __init__(self, heartbeat, metrics, prefix, tags=None): self.heartbeat = heartbeat self.metrics = metrics @@ -903,7 +901,7 @@ def __init__(self, heartbeat, metrics, prefix, tags=None): class HeartbeatThread(threading.Thread): def __init__(self, coordinator): - super(HeartbeatThread, self).__init__() + super().__init__() self.name = coordinator.group_id + '-heartbeat' self.coordinator = coordinator self.enabled = False @@ -946,6 +944,15 @@ def run(self): log.debug('Heartbeat thread closed') def _run_once(self): + with self.coordinator._client._lock, self.coordinator._lock: + if self.enabled and self.coordinator.state is MemberState.STABLE: + # TODO: When consumer.wakeup() is implemented, we need to + # disable here to prevent propagating an exception to this + # heartbeat thread + # must get client._lock, or maybe deadlock at heartbeat + # failure callback in consumer poll + self.coordinator._client.poll(timeout_ms=0) + with self.coordinator._lock: if not self.enabled: log.debug('Heartbeat disabled. Waiting') @@ -961,11 +968,6 @@ def _run_once(self): self.disable() return - # TODO: When consumer.wakeup() is implemented, we need to - # disable here to prevent propagating an exception to this - # heartbeat thread - self.coordinator._client.poll(timeout_ms=0) - if self.coordinator.coordinator_unknown(): future = self.coordinator.lookup_coordinator() if not future.is_done or future.failed(): diff --git a/kafka/coordinator/consumer.py b/kafka/coordinator/consumer.py index fda80aa67..1e415fa7a 100644 --- a/kafka/coordinator/consumer.py +++ b/kafka/coordinator/consumer.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division - import collections import copy import functools @@ -11,6 +9,7 @@ from kafka.coordinator.base import BaseCoordinator, Generation from kafka.coordinator.assignors.range import RangePartitionAssignor from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor +from kafka.coordinator.assignors.sticky.sticky_assignor import StickyPartitionAssignor from kafka.coordinator.protocol import ConsumerProtocol import kafka.errors as Errors from kafka.future import Future @@ -31,7 +30,7 @@ class ConsumerCoordinator(BaseCoordinator): 'enable_auto_commit': True, 'auto_commit_interval_ms': 5000, 'default_offset_commit_callback': None, - 'assignors': (RangePartitionAssignor, RoundRobinPartitionAssignor), + 'assignors': (RangePartitionAssignor, RoundRobinPartitionAssignor, StickyPartitionAssignor), 'session_timeout_ms': 10000, 'heartbeat_interval_ms': 3000, 'max_poll_interval_ms': 300000, @@ -77,7 +76,7 @@ def __init__(self, client, subscription, metrics, **configs): True the only way to receive records from an internal topic is subscribing to it. Requires 0.10+. Default: True """ - super(ConsumerCoordinator, self).__init__(client, metrics, **configs) + super().__init__(client, metrics, **configs) self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: @@ -128,7 +127,7 @@ def __init__(self, client, subscription, metrics, **configs): def __del__(self): if hasattr(self, '_cluster') and self._cluster: self._cluster.remove_listener(WeakMethod(self._handle_metadata_update)) - super(ConsumerCoordinator, self).__del__() + super().__del__() def protocol_type(self): return ConsumerProtocol.PROTOCOL_TYPE @@ -217,7 +216,7 @@ def _on_join_complete(self, generation, member_id, protocol, self._assignment_snapshot = None assignor = self._lookup_assignor(protocol) - assert assignor, 'Coordinator selected invalid assignment protocol: %s' % (protocol,) + assert assignor, f'Coordinator selected invalid assignment protocol: {protocol}' assignment = ConsumerProtocol.ASSIGNMENT.decode(member_assignment_bytes) @@ -234,6 +233,8 @@ def _on_join_complete(self, generation, member_id, protocol, # give the assignor a chance to update internal state # based on the received assignment assignor.on_assignment(assignment) + if assignor.name == 'sticky': + assignor.on_generation_assignment(generation) # reschedule the auto commit starting from now self.next_auto_commit_deadline = time.time() + self.auto_commit_interval @@ -302,7 +303,7 @@ def time_to_next_poll(self): def _perform_assignment(self, leader_id, assignment_strategy, members): assignor = self._lookup_assignor(assignment_strategy) - assert assignor, 'Invalid assignment protocol: %s' % (assignment_strategy,) + assert assignor, f'Invalid assignment protocol: {assignment_strategy}' member_metadata = {} all_subscribed_topics = set() for member_id, metadata_bytes in members: @@ -333,7 +334,7 @@ def _perform_assignment(self, leader_id, assignment_strategy, members): log.debug("Finished assignment for group %s: %s", self.group_id, assignments) group_assignment = {} - for member_id, assignment in six.iteritems(assignments): + for member_id, assignment in assignments.items(): group_assignment[member_id] = assignment return group_assignment @@ -378,13 +379,13 @@ def need_rejoin(self): and self._joined_subscription != self._subscription.subscription): return True - return super(ConsumerCoordinator, self).need_rejoin() + return super().need_rejoin() def refresh_committed_offsets_if_needed(self): """Fetch committed offsets for assigned partitions.""" if self._subscription.needs_fetch_committed_offsets: offsets = self.fetch_committed_offsets(self._subscription.assigned_partitions()) - for partition, offset in six.iteritems(offsets): + for partition, offset in offsets.items(): # verify assignment is still active if self._subscription.is_assigned(partition): self._subscription.assignment[partition].committed = offset @@ -430,7 +431,7 @@ def close(self, autocommit=True): if autocommit: self._maybe_auto_commit_offsets_sync() finally: - super(ConsumerCoordinator, self).close() + super().close() def _invoke_completed_offset_commit_callbacks(self): while self.completed_offset_commits: @@ -565,7 +566,7 @@ def _send_offset_commit_request(self, offsets): # create the offset commit request offset_data = collections.defaultdict(dict) - for tp, offset in six.iteritems(offsets): + for tp, offset in offsets.items(): offset_data[tp.topic][tp.partition] = offset if self._subscription.partitions_auto_assigned(): @@ -590,8 +591,8 @@ def _send_offset_commit_request(self, offsets): partition, offset.offset, offset.metadata - ) for partition, offset in six.iteritems(partitions)] - ) for topic, partitions in six.iteritems(offset_data)] + ) for partition, offset in partitions.items()] + ) for topic, partitions in offset_data.items()] ) elif self.config['api_version'] >= (0, 8, 2): request = OffsetCommitRequest[1]( @@ -602,8 +603,8 @@ def _send_offset_commit_request(self, offsets): offset.offset, -1, offset.metadata - ) for partition, offset in six.iteritems(partitions)] - ) for topic, partitions in six.iteritems(offset_data)] + ) for partition, offset in partitions.items()] + ) for topic, partitions in offset_data.items()] ) elif self.config['api_version'] >= (0, 8, 1): request = OffsetCommitRequest[0]( @@ -613,8 +614,8 @@ def _send_offset_commit_request(self, offsets): partition, offset.offset, offset.metadata - ) for partition, offset in six.iteritems(partitions)] - ) for topic, partitions in six.iteritems(offset_data)] + ) for partition, offset in partitions.items()] + ) for topic, partitions in offset_data.items()] ) log.debug("Sending offset-commit request with %s for group %s to %s", @@ -806,10 +807,10 @@ def _maybe_auto_commit_offsets_async(self): self._commit_offsets_async_on_complete) -class ConsumerCoordinatorMetrics(object): +class ConsumerCoordinatorMetrics: def __init__(self, metrics, metric_group_prefix, subscription): self.metrics = metrics - self.metric_group_name = '%s-coordinator-metrics' % (metric_group_prefix,) + self.metric_group_name = f'{metric_group_prefix}-coordinator-metrics' self.commit_latency = metrics.sensor('commit-latency') self.commit_latency.add(metrics.metric_name( diff --git a/kafka/coordinator/heartbeat.py b/kafka/coordinator/heartbeat.py index 2f5930b63..b12159cdd 100644 --- a/kafka/coordinator/heartbeat.py +++ b/kafka/coordinator/heartbeat.py @@ -1,10 +1,8 @@ -from __future__ import absolute_import, division - import copy import time -class Heartbeat(object): +class Heartbeat: DEFAULT_CONFIG = { 'group_id': None, 'heartbeat_interval_ms': 3000, diff --git a/kafka/coordinator/protocol.py b/kafka/coordinator/protocol.py index 56a390159..97efc1c84 100644 --- a/kafka/coordinator/protocol.py +++ b/kafka/coordinator/protocol.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from kafka.protocol.struct import Struct from kafka.protocol.types import Array, Bytes, Int16, Int32, Schema, String from kafka.structs import TopicPartition @@ -26,7 +24,7 @@ def partitions(self): for partition in partitions] -class ConsumerProtocol(object): +class ConsumerProtocol: PROTOCOL_TYPE = 'consumer' ASSIGNMENT_STRATEGIES = ('range', 'roundrobin') METADATA = ConsumerProtocolMemberMetadata diff --git a/kafka/errors.py b/kafka/errors.py index 2c1df82de..cb3ff285f 100644 --- a/kafka/errors.py +++ b/kafka/errors.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import inspect import sys @@ -12,8 +10,8 @@ class KafkaError(RuntimeError): def __str__(self): if not self.args: return self.__class__.__name__ - return '{0}: {1}'.format(self.__class__.__name__, - super(KafkaError, self).__str__()) + return '{}: {}'.format(self.__class__.__name__, + super().__str__()) class IllegalStateError(KafkaError): @@ -68,7 +66,7 @@ class IncompatibleBrokerVersion(KafkaError): class CommitFailedError(KafkaError): def __init__(self, *args, **kwargs): - super(CommitFailedError, self).__init__( + super().__init__( """Commit cannot be completed since the group has already rebalanced and assigned the partitions to another member. This means that the time between subsequent calls to poll() @@ -96,9 +94,9 @@ class BrokerResponseError(KafkaError): def __str__(self): """Add errno to standard KafkaError str""" - return '[Error {0}] {1}'.format( + return '[Error {}] {}'.format( self.errno, - super(BrokerResponseError, self).__str__()) + super().__str__()) class NoError(BrokerResponseError): @@ -449,6 +447,18 @@ class SecurityDisabledError(BrokerResponseError): description = 'Security features are disabled.' +class NonEmptyGroupError(BrokerResponseError): + errno = 68 + message = 'NON_EMPTY_GROUP' + description = 'The group is not empty.' + + +class GroupIdNotFoundError(BrokerResponseError): + errno = 69 + message = 'GROUP_ID_NOT_FOUND' + description = 'The group id does not exist.' + + class KafkaUnavailableError(KafkaError): pass @@ -459,7 +469,7 @@ class KafkaTimeoutError(KafkaError): class FailedPayloadsError(KafkaError): def __init__(self, payload, *args): - super(FailedPayloadsError, self).__init__(*args) + super().__init__(*args) self.payload = payload @@ -486,7 +496,7 @@ class QuotaViolationError(KafkaError): class AsyncProducerQueueFull(KafkaError): def __init__(self, failed_msgs, *args): - super(AsyncProducerQueueFull, self).__init__(*args) + super().__init__(*args) self.failed_msgs = failed_msgs @@ -496,7 +506,7 @@ def _iter_broker_errors(): yield obj -kafka_errors = dict([(x.errno, x) for x in _iter_broker_errors()]) +kafka_errors = {x.errno: x for x in _iter_broker_errors()} def for_code(error_code): diff --git a/kafka/future.py b/kafka/future.py index d0f3c6658..e9f534611 100644 --- a/kafka/future.py +++ b/kafka/future.py @@ -1,12 +1,10 @@ -from __future__ import absolute_import - import functools import logging log = logging.getLogger(__name__) -class Future(object): +class Future: error_on_callbacks = False # and errbacks def __init__(self): diff --git a/kafka/metrics/__init__.py b/kafka/metrics/__init__.py index 2a62d6334..22427e967 100644 --- a/kafka/metrics/__init__.py +++ b/kafka/metrics/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from kafka.metrics.compound_stat import NamedMeasurable from kafka.metrics.dict_reporter import DictReporter from kafka.metrics.kafka_metric import KafkaMetric diff --git a/kafka/metrics/compound_stat.py b/kafka/metrics/compound_stat.py index ac92480dc..260714256 100644 --- a/kafka/metrics/compound_stat.py +++ b/kafka/metrics/compound_stat.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import abc from kafka.metrics.stat import AbstractStat @@ -20,7 +18,7 @@ def stats(self): raise NotImplementedError -class NamedMeasurable(object): +class NamedMeasurable: def __init__(self, metric_name, measurable_stat): self._name = metric_name self._stat = measurable_stat diff --git a/kafka/metrics/dict_reporter.py b/kafka/metrics/dict_reporter.py index 0b98fe1e4..bc8088ca9 100644 --- a/kafka/metrics/dict_reporter.py +++ b/kafka/metrics/dict_reporter.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import logging import threading @@ -29,10 +27,10 @@ def snapshot(self): } } """ - return dict((category, dict((name, metric.value()) - for name, metric in list(metrics.items()))) + return {category: {name: metric.value() + for name, metric in list(metrics.items())} for category, metrics in - list(self._store.items())) + list(self._store.items())} def init(self, metrics): for metric in metrics: @@ -71,7 +69,7 @@ def get_category(self, metric): prefix = None, group = 'bar', tags = None returns: 'bar' """ - tags = ','.join('%s=%s' % (k, v) for k, v in + tags = ','.join(f'{k}={v}' for k, v in sorted(metric.metric_name.tags.items())) return '.'.join(x for x in [self._prefix, metric.metric_name.group, tags] if x) diff --git a/kafka/metrics/kafka_metric.py b/kafka/metrics/kafka_metric.py index 9fb8d89f1..40d74952a 100644 --- a/kafka/metrics/kafka_metric.py +++ b/kafka/metrics/kafka_metric.py @@ -1,9 +1,7 @@ -from __future__ import absolute_import - import time -class KafkaMetric(object): +class KafkaMetric: # NOTE java constructor takes a lock instance def __init__(self, metric_name, measurable, config): if not metric_name: diff --git a/kafka/metrics/measurable.py b/kafka/metrics/measurable.py index b06d4d789..fd5be1205 100644 --- a/kafka/metrics/measurable.py +++ b/kafka/metrics/measurable.py @@ -1,9 +1,7 @@ -from __future__ import absolute_import - import abc -class AbstractMeasurable(object): +class AbstractMeasurable: """A measurable quantity that can be registered as a metric""" @abc.abstractmethod def measure(self, config, now): diff --git a/kafka/metrics/measurable_stat.py b/kafka/metrics/measurable_stat.py index 4487adf6e..dba887d2b 100644 --- a/kafka/metrics/measurable_stat.py +++ b/kafka/metrics/measurable_stat.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import abc from kafka.metrics.measurable import AbstractMeasurable diff --git a/kafka/metrics/metric_config.py b/kafka/metrics/metric_config.py index 2e55abfcb..39a5b4168 100644 --- a/kafka/metrics/metric_config.py +++ b/kafka/metrics/metric_config.py @@ -1,9 +1,7 @@ -from __future__ import absolute_import - import sys -class MetricConfig(object): +class MetricConfig: """Configuration values for metrics""" def __init__(self, quota=None, samples=2, event_window=sys.maxsize, time_window_ms=30 * 1000, tags=None): diff --git a/kafka/metrics/metric_name.py b/kafka/metrics/metric_name.py index b5acd1662..5739c64f0 100644 --- a/kafka/metrics/metric_name.py +++ b/kafka/metrics/metric_name.py @@ -1,9 +1,7 @@ -from __future__ import absolute_import - import copy -class MetricName(object): +class MetricName: """ This class encapsulates a metric's name, logical group and its related attributes (tags). @@ -102,5 +100,5 @@ def __ne__(self, other): return not self.__eq__(other) def __str__(self): - return 'MetricName(name=%s, group=%s, description=%s, tags=%s)' % ( + return 'MetricName(name={}, group={}, description={}, tags={})'.format( self.name, self.group, self.description, self.tags) diff --git a/kafka/metrics/metrics.py b/kafka/metrics/metrics.py index 2c53488ff..67d609d08 100644 --- a/kafka/metrics/metrics.py +++ b/kafka/metrics/metrics.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import logging import sys import time @@ -11,7 +9,7 @@ logger = logging.getLogger(__name__) -class Metrics(object): +class Metrics: """ A registry of sensors and metrics. @@ -230,7 +228,7 @@ def register_metric(self, metric): for reporter in self._reporters: reporter.metric_change(metric) - class ExpireSensorTask(object): + class ExpireSensorTask: """ This iterates over every Sensor and triggers a remove_sensor if it has expired. Package private for testing diff --git a/kafka/metrics/metrics_reporter.py b/kafka/metrics/metrics_reporter.py index d8bd12b3b..3f0fe189b 100644 --- a/kafka/metrics/metrics_reporter.py +++ b/kafka/metrics/metrics_reporter.py @@ -1,9 +1,7 @@ -from __future__ import absolute_import - import abc -class AbstractMetricsReporter(object): +class AbstractMetricsReporter: """ An abstract class to allow things to listen as new metrics are created so they can be reported. diff --git a/kafka/metrics/quota.py b/kafka/metrics/quota.py index 4d1b0d6cb..5ec5d13d1 100644 --- a/kafka/metrics/quota.py +++ b/kafka/metrics/quota.py @@ -1,7 +1,4 @@ -from __future__ import absolute_import - - -class Quota(object): +class Quota: """An upper or lower bound for metrics""" def __init__(self, bound, is_upper): self._bound = bound diff --git a/kafka/metrics/stat.py b/kafka/metrics/stat.py index 9fd2f01ec..daf01935d 100644 --- a/kafka/metrics/stat.py +++ b/kafka/metrics/stat.py @@ -1,9 +1,7 @@ -from __future__ import absolute_import - import abc -class AbstractStat(object): +class AbstractStat: """ An AbstractStat is a quantity such as average, max, etc that is computed off the stream of updates to a sensor diff --git a/kafka/oauth/__init__.py b/kafka/oauth/__init__.py index 8c8349564..f4d780892 100644 --- a/kafka/oauth/__init__.py +++ b/kafka/oauth/__init__.py @@ -1,3 +1 @@ -from __future__ import absolute_import - from kafka.oauth.abstract import AbstractTokenProvider diff --git a/kafka/oauth/abstract.py b/kafka/oauth/abstract.py index 8d89ff51d..fa5f4eb99 100644 --- a/kafka/oauth/abstract.py +++ b/kafka/oauth/abstract.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import abc # This statement is compatible with both Python 2.7 & 3+ diff --git a/kafka/partitioner/__init__.py b/kafka/partitioner/__init__.py index 21a3bbb66..eed1dca69 100644 --- a/kafka/partitioner/__init__.py +++ b/kafka/partitioner/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from kafka.partitioner.default import DefaultPartitioner, murmur2 diff --git a/kafka/partitioner/default.py b/kafka/partitioner/default.py index d0914c682..13fef6b76 100644 --- a/kafka/partitioner/default.py +++ b/kafka/partitioner/default.py @@ -1,11 +1,9 @@ -from __future__ import absolute_import - import random from kafka.vendor import six -class DefaultPartitioner(object): +class DefaultPartitioner: """Default partitioner. Hashes key to partition using murmur2 hashing (from java client) diff --git a/kafka/producer/__init__.py b/kafka/producer/__init__.py index 576c772a0..869dbb3dc 100644 --- a/kafka/producer/__init__.py +++ b/kafka/producer/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from kafka.producer.kafka import KafkaProducer __all__ = [ diff --git a/kafka/producer/buffer.py b/kafka/producer/buffer.py index 100801700..0e5b57b93 100644 --- a/kafka/producer/buffer.py +++ b/kafka/producer/buffer.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division - import collections import io import threading @@ -10,7 +8,7 @@ import kafka.errors as Errors -class SimpleBufferPool(object): +class SimpleBufferPool: """A simple pool of BytesIO objects with a weak memory ceiling.""" def __init__(self, memory, poolable_size, metrics=None, metric_group_prefix='producer-metrics'): """Create a new buffer pool. diff --git a/kafka/producer/future.py b/kafka/producer/future.py index 07fa4adb4..72a4d3985 100644 --- a/kafka/producer/future.py +++ b/kafka/producer/future.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import collections import threading @@ -9,17 +7,17 @@ class FutureProduceResult(Future): def __init__(self, topic_partition): - super(FutureProduceResult, self).__init__() + super().__init__() self.topic_partition = topic_partition self._latch = threading.Event() def success(self, value): - ret = super(FutureProduceResult, self).success(value) + ret = super().success(value) self._latch.set() return ret def failure(self, error): - ret = super(FutureProduceResult, self).failure(error) + ret = super().failure(error) self._latch.set() return ret @@ -30,7 +28,7 @@ def wait(self, timeout=None): class FutureRecordMetadata(Future): def __init__(self, produce_future, relative_offset, timestamp_ms, checksum, serialized_key_size, serialized_value_size, serialized_header_size): - super(FutureRecordMetadata, self).__init__() + super().__init__() self._produce_future = produce_future # packing args as a tuple is a minor speed optimization self.args = (relative_offset, timestamp_ms, checksum, serialized_key_size, serialized_value_size, serialized_header_size) @@ -59,7 +57,7 @@ def _produce_success(self, offset_and_timestamp): def get(self, timeout=None): if not self.is_done and not self._produce_future.wait(timeout): raise Errors.KafkaTimeoutError( - "Timeout after waiting for %s secs." % (timeout,)) + f"Timeout after waiting for {timeout} secs.") assert self.is_done if self.failed(): raise self.exception # pylint: disable-msg=raising-bad-type diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py index 768db161c..30c53c235 100644 --- a/kafka/producer/kafka.py +++ b/kafka/producer/kafka.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import atexit import copy import logging @@ -12,7 +10,7 @@ import kafka.errors as Errors from kafka.client_async import KafkaClient, selectors -from kafka.codec import has_gzip, has_snappy, has_lz4 +from kafka.codec import has_gzip, has_snappy, has_lz4, has_zstd from kafka.metrics import MetricConfig, Metrics from kafka.partitioner.default import DefaultPartitioner from kafka.producer.future import FutureRecordMetadata, FutureProduceResult @@ -28,7 +26,7 @@ PRODUCER_CLIENT_ID_SEQUENCE = AtomicInteger() -class KafkaProducer(object): +class KafkaProducer: """A Kafka client that publishes records to the Kafka cluster. The producer is thread safe and sharing a single producer instance across @@ -119,7 +117,7 @@ class KafkaProducer(object): available guarantee. If unset, defaults to acks=1. compression_type (str): The compression type for all data generated by - the producer. Valid values are 'gzip', 'snappy', 'lz4', or None. + the producer. Valid values are 'gzip', 'snappy', 'lz4', 'zstd' or None. Compression is of full batches of data, so the efficacy of batching will also impact the compression ratio (more batching means better compression). Default: None. @@ -233,7 +231,7 @@ class KafkaProducer(object): should verify that the certificate matches the brokers hostname. default: true. ssl_cafile (str): optional filename of ca file to use in certificate - veriication. default: none. + verification. default: none. ssl_certfile (str): optional filename of file in pem format containing the client certificate, as well as any ca certificates needed to establish the certificate's authenticity. default: none. @@ -280,10 +278,11 @@ class KafkaProducer(object): sasl mechanism handshake. Default: one of bootstrap servers sasl_oauth_token_provider (AbstractTokenProvider): OAuthBearer token provider instance. (See kafka.oauth.abstract). Default: None + kafka_client (callable): Custom class / callable for creating KafkaClient instances Note: Configuration parameters are described in more detail at - https://kafka.apache.org/0100/configuration.html#producerconfigs + https://kafka.apache.org/0100/documentation/#producerconfigs """ DEFAULT_CONFIG = { 'bootstrap_servers': 'localhost', @@ -332,13 +331,15 @@ class KafkaProducer(object): 'sasl_plain_password': None, 'sasl_kerberos_service_name': 'kafka', 'sasl_kerberos_domain_name': None, - 'sasl_oauth_token_provider': None + 'sasl_oauth_token_provider': None, + 'kafka_client': KafkaClient, } _COMPRESSORS = { 'gzip': (has_gzip, LegacyRecordBatchBuilder.CODEC_GZIP), 'snappy': (has_snappy, LegacyRecordBatchBuilder.CODEC_SNAPPY), 'lz4': (has_lz4, LegacyRecordBatchBuilder.CODEC_LZ4), + 'zstd': (has_zstd, DefaultRecordBatchBuilder.CODEC_ZSTD), None: (lambda: True, LegacyRecordBatchBuilder.CODEC_NONE), } @@ -350,7 +351,7 @@ def __init__(self, **configs): self.config[key] = configs.pop(key) # Only check for extra config keys in top-level class - assert not configs, 'Unrecognized configs: %s' % (configs,) + assert not configs, f'Unrecognized configs: {configs}' if self.config['client_id'] is None: self.config['client_id'] = 'kafka-python-producer-%s' % \ @@ -377,9 +378,10 @@ def __init__(self, **configs): reporters = [reporter() for reporter in self.config['metric_reporters']] self._metrics = Metrics(metric_config, reporters) - client = KafkaClient(metrics=self._metrics, metric_group_prefix='producer', - wakeup_timeout_ms=self.config['max_block_ms'], - **self.config) + client = self.config['kafka_client']( + metrics=self._metrics, metric_group_prefix='producer', + wakeup_timeout_ms=self.config['max_block_ms'], + **self.config) # Get auto-discovered version from client if necessary if self.config['api_version'] is None: @@ -388,13 +390,16 @@ def __init__(self, **configs): if self.config['compression_type'] == 'lz4': assert self.config['api_version'] >= (0, 8, 2), 'LZ4 Requires >= Kafka 0.8.2 Brokers' + if self.config['compression_type'] == 'zstd': + assert self.config['api_version'] >= (2, 1, 0), 'Zstd Requires >= Kafka 2.1.0 Brokers' + # Check compression_type for library support ct = self.config['compression_type'] if ct not in self._COMPRESSORS: - raise ValueError("Not supported codec: {}".format(ct)) + raise ValueError(f"Not supported codec: {ct}") else: checker, compression_attrs = self._COMPRESSORS[ct] - assert checker(), "Libraries for {} compression codec not found".format(ct) + assert checker(), f"Libraries for {ct} compression codec not found" self.config['compression_attrs'] = compression_attrs message_version = self._max_usable_produce_magic() @@ -445,6 +450,14 @@ def _unregister_cleanup(self): self._cleanup = None def __del__(self): + # Disable logger during destruction to avoid touching dangling references + class NullLogger: + def __getattr__(self, name): + return lambda *args: None + + global log + log = NullLogger() + self.close() def close(self, timeout=None): @@ -690,7 +703,7 @@ def _wait_on_metadata(self, topic, max_wait): raise future.exception if not metadata_event.is_set(): raise Errors.KafkaTimeoutError( - "Failed to update metadata after %.1f secs." % (max_wait,)) + f"Failed to update metadata after {max_wait:.1f} secs.") elif topic in self._metadata.unauthorized_topics: raise Errors.TopicAuthorizationFailedError(topic) else: @@ -730,7 +743,7 @@ def metrics(self, raw=False): return self._metrics.metrics.copy() metrics = {} - for k, v in six.iteritems(self._metrics.metrics.copy()): + for k, v in self._metrics.metrics.copy().items(): if k.group not in metrics: metrics[k.group] = {} if k.name not in metrics[k.group]: diff --git a/kafka/producer/record_accumulator.py b/kafka/producer/record_accumulator.py index a2aa0e8ec..fc4bfeb30 100644 --- a/kafka/producer/record_accumulator.py +++ b/kafka/producer/record_accumulator.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import collections import copy import logging @@ -16,7 +14,7 @@ log = logging.getLogger(__name__) -class AtomicInteger(object): +class AtomicInteger: def __init__(self, val=0): self._lock = threading.Lock() self._val = val @@ -35,7 +33,7 @@ def get(self): return self._val -class ProducerBatch(object): +class ProducerBatch: def __init__(self, tp, records, buffer): self.max_record_size = 0 now = time.time() @@ -110,7 +108,7 @@ def maybe_expire(self, request_timeout_ms, retry_backoff_ms, linger_ms, is_full) if error: self.records.close() self.done(-1, None, Errors.KafkaTimeoutError( - "Batch for %s containing %s record(s) expired: %s" % ( + "Batch for {} containing {} record(s) expired: {}".format( self.topic_partition, self.records.next_offset(), error))) return True return False @@ -129,7 +127,7 @@ def __str__(self): self.topic_partition, self.records.next_offset()) -class RecordAccumulator(object): +class RecordAccumulator: """ This class maintains a dequeue per TopicPartition that accumulates messages into MessageSets to be sent to the server. @@ -570,7 +568,7 @@ def close(self): self._closed = True -class IncompleteProducerBatches(object): +class IncompleteProducerBatches: """A threadsafe helper class to hold ProducerBatches that haven't been ack'd yet""" def __init__(self): diff --git a/kafka/producer/sender.py b/kafka/producer/sender.py index 35688d3f1..132b68d47 100644 --- a/kafka/producer/sender.py +++ b/kafka/producer/sender.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division - import collections import copy import logging @@ -35,7 +33,7 @@ class Sender(threading.Thread): } def __init__(self, client, metadata, accumulator, metrics, **configs): - super(Sender, self).__init__() + super().__init__() self.config = copy.copy(self.DEFAULT_CONFIG) for key in self.config: if key in configs: @@ -118,7 +116,7 @@ def run_once(self): if self.config['guarantee_message_order']: # Mute all the partitions drained - for batch_list in six.itervalues(batches_by_node): + for batch_list in batches_by_node.values(): for batch in batch_list: self._accumulator.muted.add(batch.topic_partition) @@ -142,7 +140,7 @@ def run_once(self): log.debug("Created %d produce requests: %s", len(requests), requests) # trace poll_timeout_ms = 0 - for node_id, request in six.iteritems(requests): + for node_id, request in requests.items(): batches = batches_by_node[node_id] log.debug('Sending Produce Request: %r', request) (self._client.send(node_id, request, wakeup=False) @@ -190,8 +188,8 @@ def _handle_produce_response(self, node_id, send_time, batches, response): # if we have a response, parse it log.debug('Parsing produce response: %r', response) if response: - batches_by_partition = dict([(batch.topic_partition, batch) - for batch in batches]) + batches_by_partition = {batch.topic_partition: batch + for batch in batches} for topic, partitions in response.topics: for partition_info in partitions: @@ -281,7 +279,7 @@ def _create_produce_requests(self, collated): dict: {node_id: ProduceRequest} (version depends on api_version) """ requests = {} - for node_id, batches in six.iteritems(collated): + for node_id, batches in collated.items(): requests[node_id] = self._produce_request( node_id, self.config['acks'], self.config['request_timeout_ms'], batches) @@ -324,7 +322,7 @@ def _produce_request(self, node_id, acks, timeout, batches): timeout=timeout, topics=[(topic, list(partition_info.items())) for topic, partition_info - in six.iteritems(produce_records_by_partition)], + in produce_records_by_partition.items()], **kwargs ) @@ -336,7 +334,7 @@ def bootstrap_connected(self): return self._client.bootstrap_connected() -class SenderMetrics(object): +class SenderMetrics: def __init__(self, metrics, client, metadata): self.metrics = metrics @@ -434,7 +432,7 @@ def add_metric(self, metric_name, measurable, group_name='producer-metrics', def maybe_register_topic_metrics(self, topic): def sensor_name(name): - return 'topic.{0}.{1}'.format(topic, name) + return f'topic.{topic}.{name}' # if one sensor of the metrics has been registered for the topic, # then all other sensors should have been registered; and vice versa diff --git a/kafka/protocol/__init__.py b/kafka/protocol/__init__.py index 26dcc78c5..ff9c68306 100644 --- a/kafka/protocol/__init__.py +++ b/kafka/protocol/__init__.py @@ -1,6 +1,3 @@ -from __future__ import absolute_import - - API_KEYS = { 0: 'Produce', 1: 'Fetch', @@ -43,4 +40,7 @@ 40: 'ExpireDelegationToken', 41: 'DescribeDelegationToken', 42: 'DeleteGroups', + 45: 'AlterPartitionReassignments', + 46: 'ListPartitionReassignments', + 48: 'DescribeClientQuotas', } diff --git a/kafka/protocol/abstract.py b/kafka/protocol/abstract.py index 2de65c4bb..10eed5649 100644 --- a/kafka/protocol/abstract.py +++ b/kafka/protocol/abstract.py @@ -1,9 +1,7 @@ -from __future__ import absolute_import - import abc -class AbstractType(object): +class AbstractType: __metaclass__ = abc.ABCMeta @abc.abstractmethod diff --git a/kafka/protocol/admin.py b/kafka/protocol/admin.py index af88ea473..6109d90f9 100644 --- a/kafka/protocol/admin.py +++ b/kafka/protocol/admin.py @@ -1,7 +1,5 @@ -from __future__ import absolute_import - from kafka.protocol.api import Request, Response -from kafka.protocol.types import Array, Boolean, Bytes, Int8, Int16, Int32, Int64, Schema, String +from kafka.protocol.types import Array, Boolean, Bytes, Int8, Int16, Int32, Int64, Schema, String, Float64, CompactString, CompactArray, TaggedFields class ApiVersionResponse_v0(Response): @@ -719,7 +717,7 @@ class DescribeConfigsResponse_v1(Response): ('config_names', String('utf-8')), ('config_value', String('utf-8')), ('read_only', Boolean), - ('is_default', Boolean), + ('config_source', Int8), ('is_sensitive', Boolean), ('config_synonyms', Array( ('config_name', String('utf-8')), @@ -882,3 +880,173 @@ class CreatePartitionsRequest_v1(Request): CreatePartitionsResponse_v0, CreatePartitionsResponse_v1, ] + +class DeleteGroupsResponse_v0(Response): + API_KEY = 42 + API_VERSION = 0 + SCHEMA = Schema( + ("throttle_time_ms", Int32), + ("results", Array( + ("group_id", String("utf-8")), + ("error_code", Int16))) + ) + + +class DeleteGroupsResponse_v1(Response): + API_KEY = 42 + API_VERSION = 1 + SCHEMA = DeleteGroupsResponse_v0.SCHEMA + + +class DeleteGroupsRequest_v0(Request): + API_KEY = 42 + API_VERSION = 0 + RESPONSE_TYPE = DeleteGroupsResponse_v0 + SCHEMA = Schema( + ("groups_names", Array(String("utf-8"))) + ) + + +class DeleteGroupsRequest_v1(Request): + API_KEY = 42 + API_VERSION = 1 + RESPONSE_TYPE = DeleteGroupsResponse_v1 + SCHEMA = DeleteGroupsRequest_v0.SCHEMA + + +DeleteGroupsRequest = [ + DeleteGroupsRequest_v0, DeleteGroupsRequest_v1 +] + +DeleteGroupsResponse = [ + DeleteGroupsResponse_v0, DeleteGroupsResponse_v1 +] + + +class DescribeClientQuotasResponse_v0(Response): + API_KEY = 48 + API_VERSION = 0 + SCHEMA = Schema( + ('throttle_time_ms', Int32), + ('error_code', Int16), + ('error_message', String('utf-8')), + ('entries', Array( + ('entity', Array( + ('entity_type', String('utf-8')), + ('entity_name', String('utf-8')))), + ('values', Array( + ('name', String('utf-8')), + ('value', Float64))))), + ) + + +class DescribeClientQuotasRequest_v0(Request): + API_KEY = 48 + API_VERSION = 0 + RESPONSE_TYPE = DescribeClientQuotasResponse_v0 + SCHEMA = Schema( + ('components', Array( + ('entity_type', String('utf-8')), + ('match_type', Int8), + ('match', String('utf-8')), + )), + ('strict', Boolean) + ) + + +DescribeClientQuotasRequest = [ + DescribeClientQuotasRequest_v0, +] + +DescribeClientQuotasResponse = [ + DescribeClientQuotasResponse_v0, +] + + +class AlterPartitionReassignmentsResponse_v0(Response): + API_KEY = 45 + API_VERSION = 0 + SCHEMA = Schema( + ("throttle_time_ms", Int32), + ("error_code", Int16), + ("error_message", CompactString("utf-8")), + ("responses", CompactArray( + ("name", CompactString("utf-8")), + ("partitions", CompactArray( + ("partition_index", Int32), + ("error_code", Int16), + ("error_message", CompactString("utf-8")), + ("tags", TaggedFields) + )), + ("tags", TaggedFields) + )), + ("tags", TaggedFields) + ) + + +class AlterPartitionReassignmentsRequest_v0(Request): + FLEXIBLE_VERSION = True + API_KEY = 45 + API_VERSION = 0 + RESPONSE_TYPE = AlterPartitionReassignmentsResponse_v0 + SCHEMA = Schema( + ("timeout_ms", Int32), + ("topics", CompactArray( + ("name", CompactString("utf-8")), + ("partitions", CompactArray( + ("partition_index", Int32), + ("replicas", CompactArray(Int32)), + ("tags", TaggedFields) + )), + ("tags", TaggedFields) + )), + ("tags", TaggedFields) + ) + + +AlterPartitionReassignmentsRequest = [AlterPartitionReassignmentsRequest_v0] + +AlterPartitionReassignmentsResponse = [AlterPartitionReassignmentsResponse_v0] + + +class ListPartitionReassignmentsResponse_v0(Response): + API_KEY = 46 + API_VERSION = 0 + SCHEMA = Schema( + ("throttle_time_ms", Int32), + ("error_code", Int16), + ("error_message", CompactString("utf-8")), + ("topics", CompactArray( + ("name", CompactString("utf-8")), + ("partitions", CompactArray( + ("partition_index", Int32), + ("replicas", CompactArray(Int32)), + ("adding_replicas", CompactArray(Int32)), + ("removing_replicas", CompactArray(Int32)), + ("tags", TaggedFields) + )), + ("tags", TaggedFields) + )), + ("tags", TaggedFields) + ) + + +class ListPartitionReassignmentsRequest_v0(Request): + FLEXIBLE_VERSION = True + API_KEY = 46 + API_VERSION = 0 + RESPONSE_TYPE = ListPartitionReassignmentsResponse_v0 + SCHEMA = Schema( + ("timeout_ms", Int32), + ("topics", CompactArray( + ("name", CompactString("utf-8")), + ("partition_index", CompactArray(Int32)), + ("tags", TaggedFields) + )), + ("tags", TaggedFields) + ) + + +ListPartitionReassignmentsRequest = [ListPartitionReassignmentsRequest_v0] + +ListPartitionReassignmentsResponse = [ListPartitionReassignmentsResponse_v0] diff --git a/kafka/protocol/api.py b/kafka/protocol/api.py index 64276fc17..24cf61a62 100644 --- a/kafka/protocol/api.py +++ b/kafka/protocol/api.py @@ -1,9 +1,7 @@ -from __future__ import absolute_import - import abc from kafka.protocol.struct import Struct -from kafka.protocol.types import Int16, Int32, String, Schema, Array +from kafka.protocol.types import Int16, Int32, String, Schema, Array, TaggedFields class RequestHeader(Struct): @@ -15,14 +13,45 @@ class RequestHeader(Struct): ) def __init__(self, request, correlation_id=0, client_id='kafka-python'): - super(RequestHeader, self).__init__( + super().__init__( request.API_KEY, request.API_VERSION, correlation_id, client_id ) +class RequestHeaderV2(Struct): + # Flexible response / request headers end in field buffer + SCHEMA = Schema( + ('api_key', Int16), + ('api_version', Int16), + ('correlation_id', Int32), + ('client_id', String('utf-8')), + ('tags', TaggedFields), + ) + + def __init__(self, request, correlation_id=0, client_id='kafka-python', tags=None): + super().__init__( + request.API_KEY, request.API_VERSION, correlation_id, client_id, tags or {} + ) + + +class ResponseHeader(Struct): + SCHEMA = Schema( + ('correlation_id', Int32), + ) + + +class ResponseHeaderV2(Struct): + SCHEMA = Schema( + ('correlation_id', Int32), + ('tags', TaggedFields), + ) + + class Request(Struct): __metaclass__ = abc.ABCMeta + FLEXIBLE_VERSION = False + @abc.abstractproperty def API_KEY(self): """Integer identifier for api request""" @@ -50,6 +79,16 @@ def expect_response(self): def to_object(self): return _to_object(self.SCHEMA, self) + def build_request_header(self, correlation_id, client_id): + if self.FLEXIBLE_VERSION: + return RequestHeaderV2(self, correlation_id=correlation_id, client_id=client_id) + return RequestHeader(self, correlation_id=correlation_id, client_id=client_id) + + def parse_response_header(self, read_buffer): + if self.FLEXIBLE_VERSION: + return ResponseHeaderV2.decode(read_buffer) + return ResponseHeader.decode(read_buffer) + class Response(Struct): __metaclass__ = abc.ABCMeta diff --git a/kafka/protocol/commit.py b/kafka/protocol/commit.py index 31fc23707..0fb2b646f 100644 --- a/kafka/protocol/commit.py +++ b/kafka/protocol/commit.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from kafka.protocol.api import Request, Response from kafka.protocol.types import Array, Int8, Int16, Int32, Int64, Schema, String diff --git a/kafka/protocol/fetch.py b/kafka/protocol/fetch.py index f367848ce..3b742b357 100644 --- a/kafka/protocol/fetch.py +++ b/kafka/protocol/fetch.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from kafka.protocol.api import Request, Response from kafka.protocol.types import Array, Int8, Int16, Int32, Int64, Schema, String, Bytes diff --git a/kafka/protocol/frame.py b/kafka/protocol/frame.py index 7b4a32bcf..10ebf7c9b 100644 --- a/kafka/protocol/frame.py +++ b/kafka/protocol/frame.py @@ -1,6 +1,6 @@ class KafkaBytes(bytearray): def __init__(self, size): - super(KafkaBytes, self).__init__(size) + super().__init__(size) self._idx = 0 def read(self, nbytes=None): diff --git a/kafka/protocol/group.py b/kafka/protocol/group.py index bcb96553b..68efdc8f9 100644 --- a/kafka/protocol/group.py +++ b/kafka/protocol/group.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from kafka.protocol.api import Request, Response from kafka.protocol.struct import Struct from kafka.protocol.types import Array, Bytes, Int16, Int32, Schema, String diff --git a/kafka/protocol/message.py b/kafka/protocol/message.py index 31527bf63..b07d4eb0e 100644 --- a/kafka/protocol/message.py +++ b/kafka/protocol/message.py @@ -1,10 +1,8 @@ -from __future__ import absolute_import - import io import time -from kafka.codec import (has_gzip, has_snappy, has_lz4, - gzip_decode, snappy_decode, +from kafka.codec import (has_gzip, has_snappy, has_lz4, has_zstd, + gzip_decode, snappy_decode, zstd_decode, lz4_decode, lz4_decode_old_kafka) from kafka.protocol.frame import KafkaBytes from kafka.protocol.struct import Struct @@ -35,6 +33,7 @@ class Message(Struct): CODEC_GZIP = 0x01 CODEC_SNAPPY = 0x02 CODEC_LZ4 = 0x03 + CODEC_ZSTD = 0x04 TIMESTAMP_TYPE_MASK = 0x08 HEADER_SIZE = 22 # crc(4), magic(1), attributes(1), timestamp(8), key+value size(4*2) @@ -77,7 +76,7 @@ def _encode_self(self, recalc_crc=True): elif version == 0: fields = (self.crc, self.magic, self.attributes, self.key, self.value) else: - raise ValueError('Unrecognized message version: %s' % (version,)) + raise ValueError(f'Unrecognized message version: {version}') message = Message.SCHEMAS[version].encode(fields) if not recalc_crc: return message @@ -93,7 +92,7 @@ def decode(cls, data): data = io.BytesIO(data) # Partial decode required to determine message version base_fields = cls.SCHEMAS[0].fields[0:3] - crc, magic, attributes = [field.decode(data) for field in base_fields] + crc, magic, attributes = (field.decode(data) for field in base_fields) remaining = cls.SCHEMAS[magic].fields[3:] fields = [field.decode(data) for field in remaining] if magic == 1: @@ -119,7 +118,7 @@ def is_compressed(self): def decompress(self): codec = self.attributes & self.CODEC_MASK - assert codec in (self.CODEC_GZIP, self.CODEC_SNAPPY, self.CODEC_LZ4) + assert codec in (self.CODEC_GZIP, self.CODEC_SNAPPY, self.CODEC_LZ4, self.CODEC_ZSTD) if codec == self.CODEC_GZIP: assert has_gzip(), 'Gzip decompression unsupported' raw_bytes = gzip_decode(self.value) @@ -132,6 +131,9 @@ def decompress(self): raw_bytes = lz4_decode_old_kafka(self.value) else: raw_bytes = lz4_decode(self.value) + elif codec == self.CODEC_ZSTD: + assert has_zstd(), "ZSTD decompression unsupported" + raw_bytes = zstd_decode(self.value) else: raise Exception('This should be impossible') @@ -143,7 +145,7 @@ def __hash__(self): class PartialMessage(bytes): def __repr__(self): - return 'PartialMessage(%s)' % (self,) + return f'PartialMessage({self})' class MessageSet(AbstractType): diff --git a/kafka/protocol/metadata.py b/kafka/protocol/metadata.py index 414e5b84a..041444100 100644 --- a/kafka/protocol/metadata.py +++ b/kafka/protocol/metadata.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from kafka.protocol.api import Request, Response from kafka.protocol.types import Array, Boolean, Int16, Int32, Schema, String diff --git a/kafka/protocol/offset.py b/kafka/protocol/offset.py index 1ed382b0d..34b00a40b 100644 --- a/kafka/protocol/offset.py +++ b/kafka/protocol/offset.py @@ -1,12 +1,10 @@ -from __future__ import absolute_import - from kafka.protocol.api import Request, Response from kafka.protocol.types import Array, Int8, Int16, Int32, Int64, Schema, String UNKNOWN_OFFSET = -1 -class OffsetResetStrategy(object): +class OffsetResetStrategy: LATEST = -1 EARLIEST = -2 NONE = 0 diff --git a/kafka/protocol/parser.py b/kafka/protocol/parser.py index cfee0466d..a667105ad 100644 --- a/kafka/protocol/parser.py +++ b/kafka/protocol/parser.py @@ -1,19 +1,16 @@ -from __future__ import absolute_import - import collections import logging import kafka.errors as Errors -from kafka.protocol.api import RequestHeader from kafka.protocol.commit import GroupCoordinatorResponse from kafka.protocol.frame import KafkaBytes -from kafka.protocol.types import Int32 +from kafka.protocol.types import Int32, TaggedFields from kafka.version import __version__ log = logging.getLogger(__name__) -class KafkaProtocol(object): +class KafkaProtocol: """Manage the kafka network protocol Use an instance of KafkaProtocol to manage bytes send/recv'd @@ -59,9 +56,8 @@ def send_request(self, request, correlation_id=None): log.debug('Sending request %s', request) if correlation_id is None: correlation_id = self._next_correlation_id() - header = RequestHeader(request, - correlation_id=correlation_id, - client_id=self._client_id) + + header = request.build_request_header(correlation_id=correlation_id, client_id=self._client_id) message = b''.join([header.encode(), request.encode()]) size = Int32.encode(len(message)) data = size + message @@ -135,17 +131,12 @@ def receive_bytes(self, data): return responses def _process_response(self, read_buffer): - recv_correlation_id = Int32.decode(read_buffer) - log.debug('Received correlation id: %d', recv_correlation_id) - if not self.in_flight_requests: - raise Errors.CorrelationIdError( - 'No in-flight-request found for server response' - ' with correlation ID %d' - % (recv_correlation_id,)) - + raise Errors.CorrelationIdError('No in-flight-request found for server response') (correlation_id, request) = self.in_flight_requests.popleft() - + response_header = request.parse_response_header(read_buffer) + recv_correlation_id = response_header.correlation_id + log.debug('Received correlation id: %d', recv_correlation_id) # 0.8.2 quirk if (recv_correlation_id == 0 and correlation_id != 0 and diff --git a/kafka/protocol/pickle.py b/kafka/protocol/pickle.py index d6e5fa74f..cd73b3add 100644 --- a/kafka/protocol/pickle.py +++ b/kafka/protocol/pickle.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - try: import copyreg # pylint: disable=import-error except ImportError: diff --git a/kafka/protocol/produce.py b/kafka/protocol/produce.py index 9b3f6bf55..b62430c22 100644 --- a/kafka/protocol/produce.py +++ b/kafka/protocol/produce.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from kafka.protocol.api import Request, Response from kafka.protocol.types import Int16, Int32, Int64, String, Array, Schema, Bytes diff --git a/kafka/protocol/struct.py b/kafka/protocol/struct.py index e9da6e6c1..eb08ac8ef 100644 --- a/kafka/protocol/struct.py +++ b/kafka/protocol/struct.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from io import BytesIO from kafka.protocol.abstract import AbstractType @@ -57,7 +55,7 @@ def get_item(self, name): def __repr__(self): key_vals = [] for name, field in zip(self.SCHEMA.names, self.SCHEMA.fields): - key_vals.append('%s=%s' % (name, field.repr(self.__dict__[name]))) + key_vals.append(f'{name}={field.repr(self.__dict__[name])}') return self.__class__.__name__ + '(' + ', '.join(key_vals) + ')' def __hash__(self): diff --git a/kafka/protocol/types.py b/kafka/protocol/types.py index d508b2605..0118af11b 100644 --- a/kafka/protocol/types.py +++ b/kafka/protocol/types.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import struct from struct import error @@ -77,6 +75,19 @@ def decode(cls, data): return _unpack(cls._unpack, data.read(8)) +class Float64(AbstractType): + _pack = struct.Struct('>d').pack + _unpack = struct.Struct('>d').unpack + + @classmethod + def encode(cls, value): + return _pack(cls._pack, value) + + @classmethod + def decode(cls, data): + return _unpack(cls._unpack, data.read(8)) + + class String(AbstractType): def __init__(self, encoding='utf-8'): self.encoding = encoding @@ -162,7 +173,7 @@ def repr(self, value): field_val = getattr(value, self.names[i]) except AttributeError: field_val = value[i] - key_vals.append('%s=%s' % (self.names[i], self.fields[i].repr(field_val))) + key_vals.append(f'{self.names[i]}={self.fields[i].repr(field_val)}') return '(' + ', '.join(key_vals) + ')' except Exception: return repr(value) @@ -181,9 +192,10 @@ def __init__(self, *array_of): def encode(self, items): if items is None: return Int32.encode(-1) + encoded_items = [self.array_of.encode(item) for item in items] return b''.join( - [Int32.encode(len(items))] + - [self.array_of.encode(item) for item in items] + [Int32.encode(len(encoded_items))] + + encoded_items ) def decode(self, data): @@ -196,3 +208,156 @@ def repr(self, list_of_items): if list_of_items is None: return 'NULL' return '[' + ', '.join([self.array_of.repr(item) for item in list_of_items]) + ']' + + +class UnsignedVarInt32(AbstractType): + @classmethod + def decode(cls, data): + value, i = 0, 0 + while True: + b, = struct.unpack('B', data.read(1)) + if not (b & 0x80): + break + value |= (b & 0x7f) << i + i += 7 + if i > 28: + raise ValueError(f'Invalid value {value}') + value |= b << i + return value + + @classmethod + def encode(cls, value): + value &= 0xffffffff + ret = b'' + while (value & 0xffffff80) != 0: + b = (value & 0x7f) | 0x80 + ret += struct.pack('B', b) + value >>= 7 + ret += struct.pack('B', value) + return ret + + +class VarInt32(AbstractType): + @classmethod + def decode(cls, data): + value = UnsignedVarInt32.decode(data) + return (value >> 1) ^ -(value & 1) + + @classmethod + def encode(cls, value): + # bring it in line with the java binary repr + value &= 0xffffffff + return UnsignedVarInt32.encode((value << 1) ^ (value >> 31)) + + +class VarInt64(AbstractType): + @classmethod + def decode(cls, data): + value, i = 0, 0 + while True: + b = data.read(1) + if not (b & 0x80): + break + value |= (b & 0x7f) << i + i += 7 + if i > 63: + raise ValueError(f'Invalid value {value}') + value |= b << i + return (value >> 1) ^ -(value & 1) + + @classmethod + def encode(cls, value): + # bring it in line with the java binary repr + value &= 0xffffffffffffffff + v = (value << 1) ^ (value >> 63) + ret = b'' + while (v & 0xffffffffffffff80) != 0: + b = (value & 0x7f) | 0x80 + ret += struct.pack('B', b) + v >>= 7 + ret += struct.pack('B', v) + return ret + + +class CompactString(String): + def decode(self, data): + length = UnsignedVarInt32.decode(data) - 1 + if length < 0: + return None + value = data.read(length) + if len(value) != length: + raise ValueError('Buffer underrun decoding string') + return value.decode(self.encoding) + + def encode(self, value): + if value is None: + return UnsignedVarInt32.encode(0) + value = str(value).encode(self.encoding) + return UnsignedVarInt32.encode(len(value) + 1) + value + + +class TaggedFields(AbstractType): + @classmethod + def decode(cls, data): + num_fields = UnsignedVarInt32.decode(data) + ret = {} + if not num_fields: + return ret + prev_tag = -1 + for i in range(num_fields): + tag = UnsignedVarInt32.decode(data) + if tag <= prev_tag: + raise ValueError(f'Invalid or out-of-order tag {tag}') + prev_tag = tag + size = UnsignedVarInt32.decode(data) + val = data.read(size) + ret[tag] = val + return ret + + @classmethod + def encode(cls, value): + ret = UnsignedVarInt32.encode(len(value)) + for k, v in value.items(): + # do we allow for other data types ?? It could get complicated really fast + assert isinstance(v, bytes), f'Value {v} is not a byte array' + assert isinstance(k, int) and k > 0, f'Key {k} is not a positive integer' + ret += UnsignedVarInt32.encode(k) + ret += v + return ret + + +class CompactBytes(AbstractType): + @classmethod + def decode(cls, data): + length = UnsignedVarInt32.decode(data) - 1 + if length < 0: + return None + value = data.read(length) + if len(value) != length: + raise ValueError('Buffer underrun decoding Bytes') + return value + + @classmethod + def encode(cls, value): + if value is None: + return UnsignedVarInt32.encode(0) + else: + return UnsignedVarInt32.encode(len(value) + 1) + value + + +class CompactArray(Array): + + def encode(self, items): + if items is None: + return UnsignedVarInt32.encode(0) + return b''.join( + [UnsignedVarInt32.encode(len(items) + 1)] + + [self.array_of.encode(item) for item in items] + ) + + def decode(self, data): + length = UnsignedVarInt32.decode(data) - 1 + if length == -1: + return None + return [self.array_of.decode(data) for _ in range(length)] + diff --git a/kafka/record/_crc32c.py b/kafka/record/_crc32c.py index ecff48f5e..6642b5bbe 100644 --- a/kafka/record/_crc32c.py +++ b/kafka/record/_crc32c.py @@ -105,7 +105,7 @@ def crc_update(crc, data): Returns: 32-bit updated CRC-32C as long. """ - if type(data) != array.array or data.itemsize != 1: + if not isinstance(data, array.array) or data.itemsize != 1: buf = array.array("B", data) else: buf = data @@ -139,7 +139,5 @@ def crc(data): if __name__ == "__main__": import sys - # TODO remove the pylint disable once pylint fixes - # https://github.com/PyCQA/pylint/issues/2571 - data = sys.stdin.read() # pylint: disable=assignment-from-no-return + data = sys.stdin.buffer.read() # pylint: disable=assignment-from-no-return print(hex(crc(data))) diff --git a/kafka/record/abc.py b/kafka/record/abc.py index d5c172aaa..f45176051 100644 --- a/kafka/record/abc.py +++ b/kafka/record/abc.py @@ -1,8 +1,7 @@ -from __future__ import absolute_import import abc -class ABCRecord(object): +class ABCRecord: __metaclass__ = abc.ABCMeta __slots__ = () @@ -44,7 +43,7 @@ def headers(self): """ -class ABCRecordBatchBuilder(object): +class ABCRecordBatchBuilder: __metaclass__ = abc.ABCMeta __slots__ = () @@ -84,8 +83,8 @@ def build(self): """ -class ABCRecordBatch(object): - """ For v2 incapsulates a RecordBatch, for v0/v1 a single (maybe +class ABCRecordBatch: + """ For v2 encapsulates a RecordBatch, for v0/v1 a single (maybe compressed) message. """ __metaclass__ = abc.ABCMeta @@ -98,7 +97,7 @@ def __iter__(self): """ -class ABCRecords(object): +class ABCRecords: __metaclass__ = abc.ABCMeta __slots__ = () diff --git a/kafka/record/default_records.py b/kafka/record/default_records.py index 07368bba9..5045f31ee 100644 --- a/kafka/record/default_records.py +++ b/kafka/record/default_records.py @@ -62,13 +62,13 @@ ) from kafka.errors import CorruptRecordException, UnsupportedCodecError from kafka.codec import ( - gzip_encode, snappy_encode, lz4_encode, - gzip_decode, snappy_decode, lz4_decode + gzip_encode, snappy_encode, lz4_encode, zstd_encode, + gzip_decode, snappy_decode, lz4_decode, zstd_decode ) import kafka.codec as codecs -class DefaultRecordBase(object): +class DefaultRecordBase: __slots__ = () @@ -97,6 +97,7 @@ class DefaultRecordBase(object): CODEC_GZIP = 0x01 CODEC_SNAPPY = 0x02 CODEC_LZ4 = 0x03 + CODEC_ZSTD = 0x04 TIMESTAMP_TYPE_MASK = 0x08 TRANSACTIONAL_MASK = 0x10 CONTROL_MASK = 0x20 @@ -111,9 +112,11 @@ def _assert_has_codec(self, compression_type): checker, name = codecs.has_snappy, "snappy" elif compression_type == self.CODEC_LZ4: checker, name = codecs.has_lz4, "lz4" + elif compression_type == self.CODEC_ZSTD: + checker, name = codecs.has_zstd, "zstd" if not checker(): raise UnsupportedCodecError( - "Libraries for {} compression codec not found".format(name)) + f"Libraries for {name} compression codec not found") class DefaultRecordBatch(DefaultRecordBase, ABCRecordBatch): @@ -185,6 +188,8 @@ def _maybe_uncompress(self): uncompressed = snappy_decode(data.tobytes()) if compression_type == self.CODEC_LZ4: uncompressed = lz4_decode(data.tobytes()) + if compression_type == self.CODEC_ZSTD: + uncompressed = zstd_decode(data.tobytes()) self._buffer = bytearray(uncompressed) self._pos = 0 self._decompressed = True @@ -242,7 +247,7 @@ def _read_msg( h_key_len, pos = decode_varint(buffer, pos) if h_key_len < 0: raise CorruptRecordException( - "Invalid negative header key size {}".format(h_key_len)) + f"Invalid negative header key size {h_key_len}") h_key = buffer[pos: pos + h_key_len].decode("utf-8") pos += h_key_len @@ -282,7 +287,7 @@ def __next__(self): msg = self._read_msg() except (ValueError, IndexError) as err: raise CorruptRecordException( - "Found invalid record structure: {!r}".format(err)) + f"Found invalid record structure: {err!r}") else: self._next_record_index += 1 return msg @@ -416,10 +421,10 @@ def append(self, offset, timestamp, key, value, headers, raise TypeError(timestamp) if not (key is None or get_type(key) in byte_like): raise TypeError( - "Not supported type for key: {}".format(type(key))) + f"Not supported type for key: {type(key)}") if not (value is None or get_type(value) in byte_like): raise TypeError( - "Not supported type for value: {}".format(type(value))) + f"Not supported type for value: {type(value)}") # We will always add the first message, so those will be set if self._first_timestamp is None: @@ -517,6 +522,8 @@ def _maybe_compress(self): compressed = snappy_encode(data) elif self._compression_type == self.CODEC_LZ4: compressed = lz4_encode(data) + elif self._compression_type == self.CODEC_ZSTD: + compressed = zstd_encode(data) compressed_size = len(compressed) if len(data) <= compressed_size: # We did not get any benefit from compression, lets send @@ -591,7 +598,7 @@ def estimate_size_in_bytes(cls, key, value, headers): ) -class DefaultRecordMetadata(object): +class DefaultRecordMetadata: __slots__ = ("_size", "_timestamp", "_offset") diff --git a/kafka/record/legacy_records.py b/kafka/record/legacy_records.py index e2ee5490c..9ab8873ca 100644 --- a/kafka/record/legacy_records.py +++ b/kafka/record/legacy_records.py @@ -55,7 +55,7 @@ from kafka.errors import CorruptRecordException, UnsupportedCodecError -class LegacyRecordBase(object): +class LegacyRecordBase: __slots__ = () @@ -124,7 +124,7 @@ def _assert_has_codec(self, compression_type): checker, name = codecs.has_lz4, "lz4" if not checker(): raise UnsupportedCodecError( - "Libraries for {} compression codec not found".format(name)) + f"Libraries for {name} compression codec not found") class LegacyRecordBatch(ABCRecordBatch, LegacyRecordBase): @@ -263,7 +263,7 @@ def __iter__(self): # When magic value is greater than 0, the timestamp # of a compressed message depends on the - # typestamp type of the wrapper message: + # timestamp type of the wrapper message: if timestamp_type == self.LOG_APPEND_TIME: timestamp = self._timestamp @@ -367,11 +367,11 @@ def append(self, offset, timestamp, key, value, headers=None): if not (key is None or isinstance(key, (bytes, bytearray, memoryview))): raise TypeError( - "Not supported type for key: {}".format(type(key))) + f"Not supported type for key: {type(key)}") if not (value is None or isinstance(value, (bytes, bytearray, memoryview))): raise TypeError( - "Not supported type for value: {}".format(type(value))) + f"Not supported type for value: {type(value)}") # Check if we have room for another message pos = len(self._buffer) @@ -514,7 +514,7 @@ def estimate_size_in_bytes(cls, magic, compression_type, key, value): return cls.LOG_OVERHEAD + cls.record_size(magic, key, value) -class LegacyRecordMetadata(object): +class LegacyRecordMetadata: __slots__ = ("_crc", "_size", "_timestamp", "_offset") diff --git a/kafka/record/memory_records.py b/kafka/record/memory_records.py index a6c4b51f7..7a604887c 100644 --- a/kafka/record/memory_records.py +++ b/kafka/record/memory_records.py @@ -18,7 +18,6 @@ # # So we can iterate over batches just by knowing offsets of Length. Magic is # used to construct the correct class for Batch itself. -from __future__ import division import struct @@ -110,14 +109,14 @@ def next_batch(self, _min_slice=MIN_SLICE, return DefaultRecordBatch(next_slice) -class MemoryRecordsBuilder(object): +class MemoryRecordsBuilder: __slots__ = ("_builder", "_batch_size", "_buffer", "_next_offset", "_closed", "_bytes_written") def __init__(self, magic, compression_type, batch_size): assert magic in [0, 1, 2], "Not supported magic" - assert compression_type in [0, 1, 2, 3], "Not valid compression type" + assert compression_type in [0, 1, 2, 3, 4], "Not valid compression type" if magic >= 2: self._builder = DefaultRecordBatchBuilder( magic=magic, compression_type=compression_type, diff --git a/kafka/record/util.py b/kafka/record/util.py index 2f8286d0d..3b712005d 100644 --- a/kafka/record/util.py +++ b/kafka/record/util.py @@ -2,7 +2,7 @@ from kafka.record._crc32c import crc as crc32c_py try: - from crc32c import crc32 as crc32c_c + from crc32c import crc32c as crc32c_c except ImportError: crc32c_c = None diff --git a/kafka/sasl/__init__.py b/kafka/sasl/__init__.py new file mode 100644 index 000000000..337c90949 --- /dev/null +++ b/kafka/sasl/__init__.py @@ -0,0 +1,54 @@ +import logging + +from kafka.sasl import gssapi, oauthbearer, plain, scram, msk + +log = logging.getLogger(__name__) + +MECHANISMS = { + 'GSSAPI': gssapi, + 'OAUTHBEARER': oauthbearer, + 'PLAIN': plain, + 'SCRAM-SHA-256': scram, + 'SCRAM-SHA-512': scram, + 'AWS_MSK_IAM': msk, +} + + +def register_mechanism(key, module): + """ + Registers a custom SASL mechanism that can be used via sasl_mechanism={key}. + + Example: + import kakfa.sasl + from kafka import KafkaProducer + from mymodule import custom_sasl + kafka.sasl.register_mechanism('CUSTOM_SASL', custom_sasl) + + producer = KafkaProducer(sasl_mechanism='CUSTOM_SASL') + + Arguments: + key (str): The name of the mechanism returned by the broker and used + in the sasl_mechanism config value. + module (module): A module that implements the following methods... + + def validate_config(conn: BrokerConnection): -> None: + # Raises an AssertionError for missing or invalid conifg values. + + def try_authenticate(conn: BrokerConncetion, future: -> Future): + # Executes authentication routine and returns a resolved Future. + + Raises: + AssertionError: The registered module does not define a required method. + """ + assert callable(getattr(module, 'validate_config', None)), ( + 'Custom SASL mechanism {} must implement method #validate_config()' + .format(key) + ) + assert callable(getattr(module, 'try_authenticate', None)), ( + 'Custom SASL mechanism {} must implement method #try_authenticate()' + .format(key) + ) + if key in MECHANISMS: + log.warning(f'Overriding existing SASL mechanism {key}') + + MECHANISMS[key] = module diff --git a/kafka/sasl/gssapi.py b/kafka/sasl/gssapi.py new file mode 100644 index 000000000..3daf7e148 --- /dev/null +++ b/kafka/sasl/gssapi.py @@ -0,0 +1,100 @@ +import io +import logging +import struct + +import kafka.errors as Errors +from kafka.protocol.types import Int8, Int32 + +try: + import gssapi + from gssapi.raw.misc import GSSError +except ImportError: + gssapi = None + GSSError = None + +log = logging.getLogger(__name__) + +SASL_QOP_AUTH = 1 + + +def validate_config(conn): + assert gssapi is not None, ( + 'gssapi library required when sasl_mechanism=GSSAPI' + ) + assert conn.config['sasl_kerberos_service_name'] is not None, ( + 'sasl_kerberos_service_name required when sasl_mechanism=GSSAPI' + ) + + +def try_authenticate(conn, future): + kerberos_damin_name = conn.config['sasl_kerberos_domain_name'] or conn.host + auth_id = conn.config['sasl_kerberos_service_name'] + '@' + kerberos_damin_name + gssapi_name = gssapi.Name( + auth_id, + name_type=gssapi.NameType.hostbased_service + ).canonicalize(gssapi.MechType.kerberos) + log.debug('%s: GSSAPI name: %s', conn, gssapi_name) + + err = None + close = False + with conn._lock: + if not conn._can_send_recv(): + err = Errors.NodeNotReadyError(str(conn)) + close = False + else: + # Establish security context and negotiate protection level + # For reference RFC 2222, section 7.2.1 + try: + # Exchange tokens until authentication either succeeds or fails + client_ctx = gssapi.SecurityContext(name=gssapi_name, usage='initiate') + received_token = None + while not client_ctx.complete: + # calculate an output token from kafka token (or None if first iteration) + output_token = client_ctx.step(received_token) + + # pass output token to kafka, or send empty response if the security + # context is complete (output token is None in that case) + if output_token is None: + conn._send_bytes_blocking(Int32.encode(0)) + else: + msg = output_token + size = Int32.encode(len(msg)) + conn._send_bytes_blocking(size + msg) + + # The server will send a token back. Processing of this token either + # establishes a security context, or it needs further token exchange. + # The gssapi will be able to identify the needed next step. + # The connection is closed on failure. + header = conn._recv_bytes_blocking(4) + (token_size,) = struct.unpack('>i', header) + received_token = conn._recv_bytes_blocking(token_size) + + # Process the security layer negotiation token, sent by the server + # once the security context is established. + + # unwraps message containing supported protection levels and msg size + msg = client_ctx.unwrap(received_token).message + # Kafka currently doesn't support integrity or confidentiality + # security layers, so we simply set QoP to 'auth' only (first octet). + # We reuse the max message size proposed by the server + msg = Int8.encode(SASL_QOP_AUTH & Int8.decode(io.BytesIO(msg[0:1]))) + msg[1:] + # add authorization identity to the response, GSS-wrap and send it + msg = client_ctx.wrap(msg + auth_id.encode(), False).message + size = Int32.encode(len(msg)) + conn._send_bytes_blocking(size + msg) + + except (ConnectionError, TimeoutError) as e: + log.exception("%s: Error receiving reply from server", conn) + err = Errors.KafkaConnectionError(f"{conn}: {e}") + close = True + except Exception as e: + err = e + close = True + + if err is not None: + if close: + conn.close(error=err) + return future.failure(err) + + log.info('%s: Authenticated as %s via GSSAPI', conn, gssapi_name) + return future.success(True) diff --git a/kafka/sasl/msk.py b/kafka/sasl/msk.py new file mode 100644 index 000000000..83a203270 --- /dev/null +++ b/kafka/sasl/msk.py @@ -0,0 +1,231 @@ +import datetime +import hashlib +import hmac +import json +import string +import struct +import logging + + +from kafka.vendor.six.moves import urllib +from kafka.protocol.types import Int32 +import kafka.errors as Errors + +from botocore.session import Session as BotoSession # importing it in advance is not an option apparently... + + +def try_authenticate(self, future): + + session = BotoSession() + credentials = session.get_credentials().get_frozen_credentials() + client = AwsMskIamClient( + host=self.host, + access_key=credentials.access_key, + secret_key=credentials.secret_key, + region=session.get_config_variable('region'), + token=credentials.token, + ) + + msg = client.first_message() + size = Int32.encode(len(msg)) + + err = None + close = False + with self._lock: + if not self._can_send_recv(): + err = Errors.NodeNotReadyError(str(self)) + close = False + else: + try: + self._send_bytes_blocking(size + msg) + data = self._recv_bytes_blocking(4) + data = self._recv_bytes_blocking(struct.unpack('4B', data)[-1]) + except (ConnectionError, TimeoutError) as e: + logging.exception("%s: Error receiving reply from server", self) + err = Errors.KafkaConnectionError(f"{self}: {e}") + close = True + + if err is not None: + if close: + self.close(error=err) + return future.failure(err) + + logging.info('%s: Authenticated via AWS_MSK_IAM %s', self, data.decode('utf-8')) + return future.success(True) + + +class AwsMskIamClient: + UNRESERVED_CHARS = string.ascii_letters + string.digits + '-._~' + + def __init__(self, host, access_key, secret_key, region, token=None): + """ + Arguments: + host (str): The hostname of the broker. + access_key (str): An AWS_ACCESS_KEY_ID. + secret_key (str): An AWS_SECRET_ACCESS_KEY. + region (str): An AWS_REGION. + token (Optional[str]): An AWS_SESSION_TOKEN if using temporary + credentials. + """ + self.algorithm = 'AWS4-HMAC-SHA256' + self.expires = '900' + self.hashfunc = hashlib.sha256 + self.headers = [ + ('host', host) + ] + self.version = '2020_10_22' + + self.service = 'kafka-cluster' + self.action = f'{self.service}:Connect' + + now = datetime.datetime.utcnow() + self.datestamp = now.strftime('%Y%m%d') + self.timestamp = now.strftime('%Y%m%dT%H%M%SZ') + + self.host = host + self.access_key = access_key + self.secret_key = secret_key + self.region = region + self.token = token + + @property + def _credential(self): + return '{0.access_key}/{0._scope}'.format(self) + + @property + def _scope(self): + return '{0.datestamp}/{0.region}/{0.service}/aws4_request'.format(self) + + @property + def _signed_headers(self): + """ + Returns (str): + An alphabetically sorted, semicolon-delimited list of lowercase + request header names. + """ + return ';'.join(sorted(k.lower() for k, _ in self.headers)) + + @property + def _canonical_headers(self): + """ + Returns (str): + A newline-delited list of header names and values. + Header names are lowercased. + """ + return '\n'.join(map(':'.join, self.headers)) + '\n' + + @property + def _canonical_request(self): + """ + Returns (str): + An AWS Signature Version 4 canonical request in the format: + \n + \n + \n + \n + \n + + """ + # The hashed_payload is always an empty string for MSK. + hashed_payload = self.hashfunc(b'').hexdigest() + return '\n'.join(( + 'GET', + '/', + self._canonical_querystring, + self._canonical_headers, + self._signed_headers, + hashed_payload, + )) + + @property + def _canonical_querystring(self): + """ + Returns (str): + A '&'-separated list of URI-encoded key/value pairs. + """ + params = [] + params.append(('Action', self.action)) + params.append(('X-Amz-Algorithm', self.algorithm)) + params.append(('X-Amz-Credential', self._credential)) + params.append(('X-Amz-Date', self.timestamp)) + params.append(('X-Amz-Expires', self.expires)) + if self.token: + params.append(('X-Amz-Security-Token', self.token)) + params.append(('X-Amz-SignedHeaders', self._signed_headers)) + + return '&'.join(self._uriencode(k) + '=' + self._uriencode(v) for k, v in params) + + @property + def _signing_key(self): + """ + Returns (bytes): + An AWS Signature V4 signing key generated from the secret_key, date, + region, service, and request type. + """ + key = self._hmac(('AWS4' + self.secret_key).encode('utf-8'), self.datestamp) + key = self._hmac(key, self.region) + key = self._hmac(key, self.service) + key = self._hmac(key, 'aws4_request') + return key + + @property + def _signing_str(self): + """ + Returns (str): + A string used to sign the AWS Signature V4 payload in the format: + \n + \n + \n + + """ + canonical_request_hash = self.hashfunc(self._canonical_request.encode('utf-8')).hexdigest() + return '\n'.join((self.algorithm, self.timestamp, self._scope, canonical_request_hash)) + + def _uriencode(self, msg): + """ + Arguments: + msg (str): A string to URI-encode. + + Returns (str): + The URI-encoded version of the provided msg, following the encoding + rules specified: https://github.com/aws/aws-msk-iam-auth#uriencode + """ + return urllib.parse.quote(msg, safe=self.UNRESERVED_CHARS) + + def _hmac(self, key, msg): + """ + Arguments: + key (bytes): A key to use for the HMAC digest. + msg (str): A value to include in the HMAC digest. + Returns (bytes): + An HMAC digest of the given key and msg. + """ + return hmac.new(key, msg.encode('utf-8'), digestmod=self.hashfunc).digest() + + def first_message(self): + """ + Returns (bytes): + An encoded JSON authentication payload that can be sent to the + broker. + """ + signature = hmac.new( + self._signing_key, + self._signing_str.encode('utf-8'), + digestmod=self.hashfunc, + ).hexdigest() + msg = { + 'version': self.version, + 'host': self.host, + 'user-agent': 'kafka-python', + 'action': self.action, + 'x-amz-algorithm': self.algorithm, + 'x-amz-credential': self._credential, + 'x-amz-date': self.timestamp, + 'x-amz-signedheaders': self._signed_headers, + 'x-amz-expires': self.expires, + 'x-amz-signature': signature, + } + if self.token: + msg['x-amz-security-token'] = self.token + + return json.dumps(msg, separators=(',', ':')).encode('utf-8') diff --git a/kafka/sasl/oauthbearer.py b/kafka/sasl/oauthbearer.py new file mode 100644 index 000000000..5d2488baf --- /dev/null +++ b/kafka/sasl/oauthbearer.py @@ -0,0 +1,80 @@ +import logging + +import kafka.errors as Errors +from kafka.protocol.types import Int32 + +log = logging.getLogger(__name__) + + +def validate_config(conn): + token_provider = conn.config.get('sasl_oauth_token_provider') + assert token_provider is not None, ( + 'sasl_oauth_token_provider required when sasl_mechanism=OAUTHBEARER' + ) + assert callable(getattr(token_provider, 'token', None)), ( + 'sasl_oauth_token_provider must implement method #token()' + ) + + +def try_authenticate(conn, future): + data = b'' + + msg = bytes(_build_oauth_client_request(conn).encode("utf-8")) + size = Int32.encode(len(msg)) + + err = None + close = False + with conn._lock: + if not conn._can_send_recv(): + err = Errors.NodeNotReadyError(str(conn)) + close = False + else: + try: + # Send SASL OAuthBearer request with OAuth token + conn._send_bytes_blocking(size + msg) + + # The server will send a zero sized message (that is Int32(0)) on success. + # The connection is closed on failure + data = conn._recv_bytes_blocking(4) + + except (ConnectionError, TimeoutError) as e: + log.exception("%s: Error receiving reply from server", conn) + err = Errors.KafkaConnectionError(f"{conn}: {e}") + close = True + + if err is not None: + if close: + conn.close(error=err) + return future.failure(err) + + if data != b'\x00\x00\x00\x00': + error = Errors.AuthenticationFailedError('Unrecognized response during authentication') + return future.failure(error) + + log.info('%s: Authenticated via OAuth', conn) + return future.success(True) + + +def _build_oauth_client_request(conn): + token_provider = conn.config['sasl_oauth_token_provider'] + return "n,,\x01auth=Bearer {}{}\x01\x01".format( + token_provider.token(), + _token_extensions(conn), + ) + + +def _token_extensions(conn): + """ + Return a string representation of the OPTIONAL key-value pairs that can be + sent with an OAUTHBEARER initial request. + """ + token_provider = conn.config['sasl_oauth_token_provider'] + + # Only run if the #extensions() method is implemented by the clients Token Provider class + # Builds up a string separated by \x01 via a dict of key value pairs + if (callable(getattr(token_provider, "extensions", None)) + and len(token_provider.extensions()) > 0): + msg = "\x01".join([f"{k}={v}" for k, v in token_provider.extensions().items()]) + return "\x01" + msg + else: + return "" diff --git a/kafka/sasl/plain.py b/kafka/sasl/plain.py new file mode 100644 index 000000000..625a43f08 --- /dev/null +++ b/kafka/sasl/plain.py @@ -0,0 +1,58 @@ +import logging + +import kafka.errors as Errors +from kafka.protocol.types import Int32 + +log = logging.getLogger(__name__) + + +def validate_config(conn): + assert conn.config['sasl_plain_username'] is not None, ( + 'sasl_plain_username required when sasl_mechanism=PLAIN' + ) + assert conn.config['sasl_plain_password'] is not None, ( + 'sasl_plain_password required when sasl_mechanism=PLAIN' + ) + + +def try_authenticate(conn, future): + if conn.config['security_protocol'] == 'SASL_PLAINTEXT': + log.warning('%s: Sending username and password in the clear', conn) + + data = b'' + # Send PLAIN credentials per RFC-4616 + msg = bytes('\0'.join([conn.config['sasl_plain_username'], + conn.config['sasl_plain_username'], + conn.config['sasl_plain_password']]).encode('utf-8')) + size = Int32.encode(len(msg)) + + err = None + close = False + with conn._lock: + if not conn._can_send_recv(): + err = Errors.NodeNotReadyError(str(conn)) + close = False + else: + try: + conn._send_bytes_blocking(size + msg) + + # The server will send a zero sized message (that is Int32(0)) on success. + # The connection is closed on failure + data = conn._recv_bytes_blocking(4) + + except (ConnectionError, TimeoutError) as e: + log.exception("%s: Error receiving reply from server", conn) + err = Errors.KafkaConnectionError(f"{conn}: {e}") + close = True + + if err is not None: + if close: + conn.close(error=err) + return future.failure(err) + + if data != b'\x00\x00\x00\x00': + error = Errors.AuthenticationFailedError('Unrecognized response during authentication') + return future.failure(error) + + log.info('%s: Authenticated as %s via PLAIN', conn, conn.config['sasl_plain_username']) + return future.success(True) diff --git a/kafka/sasl/scram.py b/kafka/sasl/scram.py new file mode 100644 index 000000000..4f3e60126 --- /dev/null +++ b/kafka/sasl/scram.py @@ -0,0 +1,68 @@ +import logging +import struct + +import kafka.errors as Errors +from kafka.protocol.types import Int32 +from kafka.scram import ScramClient + +log = logging.getLogger() + + +def validate_config(conn): + assert conn.config['sasl_plain_username'] is not None, ( + 'sasl_plain_username required when sasl_mechanism=SCRAM-*' + ) + assert conn.config['sasl_plain_password'] is not None, ( + 'sasl_plain_password required when sasl_mechanism=SCRAM-*' + ) + + +def try_authenticate(conn, future): + if conn.config['security_protocol'] == 'SASL_PLAINTEXT': + log.warning('%s: Exchanging credentials in the clear', conn) + + scram_client = ScramClient( + conn.config['sasl_plain_username'], + conn.config['sasl_plain_password'], + conn.config['sasl_mechanism'], + ) + + err = None + close = False + with conn._lock: + if not conn._can_send_recv(): + err = Errors.NodeNotReadyError(str(conn)) + close = False + else: + try: + client_first = scram_client.first_message().encode('utf-8') + size = Int32.encode(len(client_first)) + conn._send_bytes_blocking(size + client_first) + + (data_len,) = struct.unpack('>i', conn._recv_bytes_blocking(4)) + server_first = conn._recv_bytes_blocking(data_len).decode('utf-8') + scram_client.process_server_first_message(server_first) + + client_final = scram_client.final_message().encode('utf-8') + size = Int32.encode(len(client_final)) + conn._send_bytes_blocking(size + client_final) + + (data_len,) = struct.unpack('>i', conn._recv_bytes_blocking(4)) + server_final = conn._recv_bytes_blocking(data_len).decode('utf-8') + scram_client.process_server_final_message(server_final) + + except (ConnectionError, TimeoutError) as e: + log.exception("%s: Error receiving reply from server", conn) + err = Errors.KafkaConnectionError(f"{conn}: {e}") + close = True + + if err is not None: + if close: + conn.close(error=err) + return future.failure(err) + + log.info( + '%s: Authenticated as %s via %s', + conn, conn.config['sasl_plain_username'], conn.config['sasl_mechanism'] + ) + return future.success(True) diff --git a/kafka/scram.py b/kafka/scram.py index 7f003750c..05a7667d8 100644 --- a/kafka/scram.py +++ b/kafka/scram.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import base64 import hashlib import hmac @@ -8,12 +6,8 @@ from kafka.vendor import six -if six.PY2: - def xor_bytes(left, right): - return bytearray(ord(lb) ^ ord(rb) for lb, rb in zip(left, right)) -else: - def xor_bytes(left, right): - return bytes(lb ^ rb for lb, rb in zip(left, right)) +def xor_bytes(left, right): + return bytes(lb ^ rb for lb, rb in zip(left, right)) class ScramClient: @@ -38,7 +32,7 @@ def __init__(self, user, password, mechanism): self.server_signature = None def first_message(self): - client_first_bare = 'n={},r={}'.format(self.user, self.nonce) + client_first_bare = f'n={self.user},r={self.nonce}' self.auth_message += client_first_bare return 'n,,' + client_first_bare diff --git a/kafka/serializer/__init__.py b/kafka/serializer/__init__.py index 90cd93ab2..168277519 100644 --- a/kafka/serializer/__init__.py +++ b/kafka/serializer/__init__.py @@ -1,3 +1 @@ -from __future__ import absolute_import - from kafka.serializer.abstract import Serializer, Deserializer diff --git a/kafka/serializer/abstract.py b/kafka/serializer/abstract.py index 18ad8d69c..529662b07 100644 --- a/kafka/serializer/abstract.py +++ b/kafka/serializer/abstract.py @@ -1,9 +1,7 @@ -from __future__ import absolute_import - import abc -class Serializer(object): +class Serializer: __meta__ = abc.ABCMeta def __init__(self, **config): @@ -17,7 +15,7 @@ def close(self): pass -class Deserializer(object): +class Deserializer: __meta__ = abc.ABCMeta def __init__(self, **config): diff --git a/kafka/structs.py b/kafka/structs.py index 9ab4f8bfa..4f11259aa 100644 --- a/kafka/structs.py +++ b/kafka/structs.py @@ -1,27 +1,86 @@ -from __future__ import absolute_import +""" Other useful structs """ from collections import namedtuple -# Other useful structs +"""A topic and partition tuple + +Keyword Arguments: + topic (str): A topic name + partition (int): A partition id +""" TopicPartition = namedtuple("TopicPartition", ["topic", "partition"]) + +"""A Kafka broker metadata used by admin tools. + +Keyword Arguments: + nodeID (int): The Kafka broker id. + host (str): The Kafka broker hostname. + port (int): The Kafka broker port. + rack (str): The rack of the broker, which is used to in rack aware + partition assignment for fault tolerance. + Examples: `RACK1`, `us-east-1d`. Default: None +""" BrokerMetadata = namedtuple("BrokerMetadata", ["nodeId", "host", "port", "rack"]) + +"""A topic partition metadata describing the state in the MetadataResponse. + +Keyword Arguments: + topic (str): The topic name of the partition this metadata relates to. + partition (int): The id of the partition this metadata relates to. + leader (int): The id of the broker that is the leader for the partition. + replicas (List[int]): The ids of all brokers that contain replicas of the + partition. + isr (List[int]): The ids of all brokers that contain in-sync replicas of + the partition. + error (KafkaError): A KafkaError object associated with the request for + this partition metadata. +""" PartitionMetadata = namedtuple("PartitionMetadata", ["topic", "partition", "leader", "replicas", "isr", "error"]) + +"""The Kafka offset commit API + +The Kafka offset commit API allows users to provide additional metadata +(in the form of a string) when an offset is committed. This can be useful +(for example) to store information about which node made the commit, +what time the commit was made, etc. + +Keyword Arguments: + offset (int): The offset to be committed + metadata (str): Non-null metadata +""" OffsetAndMetadata = namedtuple("OffsetAndMetadata", # TODO add leaderEpoch: OffsetAndMetadata(offset, leaderEpoch, metadata) ["offset", "metadata"]) + +"""An offset and timestamp tuple + +Keyword Arguments: + offset (int): An offset + timestamp (int): The timestamp associated to the offset +""" OffsetAndTimestamp = namedtuple("OffsetAndTimestamp", ["offset", "timestamp"]) +MemberInformation = namedtuple("MemberInformation", + ["member_id", "client_id", "client_host", "member_metadata", "member_assignment"]) + +GroupInformation = namedtuple("GroupInformation", + ["error_code", "group", "state", "protocol_type", "protocol", "members", "authorized_operations"]) + +"""Define retry policy for async producer -# Define retry policy for async producer -# Limit value: int >= 0, 0 means no retries +Keyword Arguments: + Limit (int): Number of retries. limit >= 0, 0 means no retries + backoff_ms (int): Milliseconds to backoff. + retry_on_timeouts: +""" RetryOptions = namedtuple("RetryOptions", ["limit", "backoff_ms", "retry_on_timeouts"]) diff --git a/kafka/util.py b/kafka/util.py index e31d99305..474a5e54d 100644 --- a/kafka/util.py +++ b/kafka/util.py @@ -1,28 +1,23 @@ -from __future__ import absolute_import - import binascii import weakref from kafka.vendor import six -if six.PY3: - MAX_INT = 2 ** 31 - TO_SIGNED = 2 ** 32 +MAX_INT = 2 ** 31 +TO_SIGNED = 2 ** 32 - def crc32(data): - crc = binascii.crc32(data) - # py2 and py3 behave a little differently - # CRC is encoded as a signed int in kafka protocol - # so we'll convert the py3 unsigned result to signed - if crc >= MAX_INT: - crc -= TO_SIGNED - return crc -else: - from binascii import crc32 +def crc32(data): + crc = binascii.crc32(data) + # py2 and py3 behave a little differently + # CRC is encoded as a signed int in kafka protocol + # so we'll convert the py3 unsigned result to signed + if crc >= MAX_INT: + crc -= TO_SIGNED + return crc -class WeakMethod(object): +class WeakMethod: """ Callable that weakly references a method and the object it is bound to. It is based on https://stackoverflow.com/a/24287465. diff --git a/kafka/vendor/enum34.py b/kafka/vendor/enum34.py index 5f64bd2d8..363be19b1 100644 --- a/kafka/vendor/enum34.py +++ b/kafka/vendor/enum34.py @@ -39,7 +39,7 @@ def any(iterable): # In Python 3 unicode no longer exists (it's just str) unicode = str -class _RouteClassAttributeToGetattr(object): +class _RouteClassAttributeToGetattr: """Route attribute access on a class to __getattr__. This is a descriptor, used to define attributes that act differently when @@ -103,7 +103,7 @@ class _EnumDict(dict): """ def __init__(self): - super(_EnumDict, self).__init__() + super().__init__() self._member_names = [] def __setitem__(self, key, value): @@ -139,7 +139,7 @@ def __setitem__(self, key, value): # enum overwriting a descriptor? raise TypeError('Key already defined as: %r' % self[key]) self._member_names.append(key) - super(_EnumDict, self).__setitem__(key, value) + super().__setitem__(key, value) # Dummy value for Enum as EnumMeta explicity checks for it, but of course until @@ -170,7 +170,7 @@ def __new__(metacls, cls, bases, classdict): first_enum) # save enum items into separate mapping so they don't get baked into # the new class - members = dict((k, classdict[k]) for k in classdict._member_names) + members = {k: classdict[k] for k in classdict._member_names} for name in classdict._member_names: del classdict[name] @@ -192,16 +192,16 @@ def __new__(metacls, cls, bases, classdict): _order_ += aliases # check for illegal enum names (any others?) - invalid_names = set(members) & set(['mro']) + invalid_names = set(members) & {'mro'} if invalid_names: - raise ValueError('Invalid enum member name(s): %s' % ( - ', '.join(invalid_names), )) + raise ValueError('Invalid enum member name(s): {}'.format( + ', '.join(invalid_names))) # save attributes from super classes so we know if we can take # the shortcut of storing members in the class dict - base_attributes = set([a for b in bases for a in b.__dict__]) + base_attributes = {a for b in bases for a in b.__dict__} # create our new Enum type - enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict) + enum_class = super().__new__(metacls, cls, bases, classdict) enum_class._member_names_ = [] # names in random order if OrderedDict is not None: enum_class._member_map_ = OrderedDict() @@ -361,7 +361,7 @@ def __delattr__(cls, attr): if attr in cls._member_map_: raise AttributeError( "%s: cannot delete Enum member." % cls.__name__) - super(EnumMeta, cls).__delattr__(attr) + super().__delattr__(attr) def __dir__(self): return (['__class__', '__doc__', '__members__', '__module__'] + @@ -421,7 +421,7 @@ def __setattr__(cls, name, value): member_map = cls.__dict__.get('_member_map_', {}) if name in member_map: raise AttributeError('Cannot reassign members.') - super(EnumMeta, cls).__setattr__(name, value) + super().__setattr__(name, value) def _create_(cls, class_name, names=None, module=None, type=None, start=1): """Convenience method to create a new Enum class. @@ -663,18 +663,18 @@ def __new__(cls, value): for member in cls._member_map_.values(): if member.value == value: return member - raise ValueError("%s is not a valid %s" % (value, cls.__name__)) + raise ValueError(f"{value} is not a valid {cls.__name__}") temp_enum_dict['__new__'] = __new__ del __new__ def __repr__(self): - return "<%s.%s: %r>" % ( + return "<{}.{}: {!r}>".format( self.__class__.__name__, self._name_, self._value_) temp_enum_dict['__repr__'] = __repr__ del __repr__ def __str__(self): - return "%s.%s" % (self.__class__.__name__, self._name_) + return f"{self.__class__.__name__}.{self._name_}" temp_enum_dict['__str__'] = __str__ del __str__ @@ -719,29 +719,29 @@ def __cmp__(self, other): return 0 return -1 return NotImplemented - raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__)) + raise TypeError(f"unorderable types: {self.__class__.__name__}() and {other.__class__.__name__}()") temp_enum_dict['__cmp__'] = __cmp__ del __cmp__ else: def __le__(self, other): - raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__)) + raise TypeError(f"unorderable types: {self.__class__.__name__}() <= {other.__class__.__name__}()") temp_enum_dict['__le__'] = __le__ del __le__ def __lt__(self, other): - raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__)) + raise TypeError(f"unorderable types: {self.__class__.__name__}() < {other.__class__.__name__}()") temp_enum_dict['__lt__'] = __lt__ del __lt__ def __ge__(self, other): - raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__)) + raise TypeError(f"unorderable types: {self.__class__.__name__}() >= {other.__class__.__name__}()") temp_enum_dict['__ge__'] = __ge__ del __ge__ def __gt__(self, other): - raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__)) + raise TypeError(f"unorderable types: {self.__class__.__name__}() > {other.__class__.__name__}()") temp_enum_dict['__gt__'] = __gt__ del __gt__ @@ -804,7 +804,7 @@ def _convert(cls, name, module, filter, source=None): source = vars(source) else: source = module_globals - members = dict((name, value) for name, value in source.items() if filter(name)) + members = {name: value for name, value in source.items() if filter(name)} cls = cls(name, members, module=module) cls.__reduce_ex__ = _reduce_ex_by_name module_globals.update(cls.__members__) @@ -833,7 +833,7 @@ def unique(enumeration): duplicates.append((name, member.name)) if duplicates: duplicate_names = ', '.join( - ["%s -> %s" % (alias, name) for (alias, name) in duplicates] + [f"{alias} -> {name}" for (alias, name) in duplicates] ) raise ValueError('duplicate names found in %r: %s' % (enumeration, duplicate_names) diff --git a/kafka/vendor/selectors34.py b/kafka/vendor/selectors34.py index ebf5d515e..496ad1cd4 100644 --- a/kafka/vendor/selectors34.py +++ b/kafka/vendor/selectors34.py @@ -12,10 +12,13 @@ The following code adapted from trollius.selectors. """ -from __future__ import absolute_import from abc import ABCMeta, abstractmethod -from collections import namedtuple, Mapping +from collections import namedtuple +try: + from collections.abc import Mapping +except ImportError: + from collections.abc import Mapping from errno import EINTR import math import select @@ -35,7 +38,7 @@ def _wrap_error(exc, mapping, key): traceback = exc.__traceback__ else: traceback = sys.exc_info()[2] - six.reraise(new_err_cls, new_err, traceback) + raise new_err.with_traceback(traceback) # generic events, that must be mapped to implementation-specific ones @@ -55,16 +58,16 @@ def _fileobj_to_fd(fileobj): Raises: ValueError if the object is invalid """ - if isinstance(fileobj, six.integer_types): + if isinstance(fileobj, int): fd = fileobj else: try: fd = int(fileobj.fileno()) except (AttributeError, TypeError, ValueError): raise ValueError("Invalid file object: " - "{0!r}".format(fileobj)) + "{!r}".format(fileobj)) if fd < 0: - raise ValueError("Invalid file descriptor: {0}".format(fd)) + raise ValueError(f"Invalid file descriptor: {fd}") return fd @@ -87,15 +90,14 @@ def __getitem__(self, fileobj): fd = self._selector._fileobj_lookup(fileobj) return self._selector._fd_to_key[fd] except KeyError: - raise KeyError("{0!r} is not registered".format(fileobj)) + raise KeyError(f"{fileobj!r} is not registered") def __iter__(self): return iter(self._selector._fd_to_key) # Using six.add_metaclass() decorator instead of six.with_metaclass() because # the latter leaks temporary_class to garbage with gc disabled -@six.add_metaclass(ABCMeta) -class BaseSelector(object): +class BaseSelector(metaclass=ABCMeta): """Selector abstract base class. A selector supports registering file objects to be monitored for specific @@ -207,7 +209,7 @@ def get_key(self, fileobj): try: return mapping[fileobj] except KeyError: - raise KeyError("{0!r} is not registered".format(fileobj)) + raise KeyError(f"{fileobj!r} is not registered") @abstractmethod def get_map(self): @@ -251,12 +253,12 @@ def _fileobj_lookup(self, fileobj): def register(self, fileobj, events, data=None): if (not events) or (events & ~(EVENT_READ | EVENT_WRITE)): - raise ValueError("Invalid events: {0!r}".format(events)) + raise ValueError(f"Invalid events: {events!r}") key = SelectorKey(fileobj, self._fileobj_lookup(fileobj), events, data) if key.fd in self._fd_to_key: - raise KeyError("{0!r} (FD {1}) is already registered" + raise KeyError("{!r} (FD {}) is already registered" .format(fileobj, key.fd)) self._fd_to_key[key.fd] = key @@ -266,7 +268,7 @@ def unregister(self, fileobj): try: key = self._fd_to_key.pop(self._fileobj_lookup(fileobj)) except KeyError: - raise KeyError("{0!r} is not registered".format(fileobj)) + raise KeyError(f"{fileobj!r} is not registered") return key def modify(self, fileobj, events, data=None): @@ -274,7 +276,7 @@ def modify(self, fileobj, events, data=None): try: key = self._fd_to_key[self._fileobj_lookup(fileobj)] except KeyError: - raise KeyError("{0!r} is not registered".format(fileobj)) + raise KeyError(f"{fileobj!r} is not registered") if events != key.events: self.unregister(fileobj) key = self.register(fileobj, events, data) @@ -310,12 +312,12 @@ class SelectSelector(_BaseSelectorImpl): """Select-based selector.""" def __init__(self): - super(SelectSelector, self).__init__() + super().__init__() self._readers = set() self._writers = set() def register(self, fileobj, events, data=None): - key = super(SelectSelector, self).register(fileobj, events, data) + key = super().register(fileobj, events, data) if events & EVENT_READ: self._readers.add(key.fd) if events & EVENT_WRITE: @@ -323,7 +325,7 @@ def register(self, fileobj, events, data=None): return key def unregister(self, fileobj): - key = super(SelectSelector, self).unregister(fileobj) + key = super().unregister(fileobj) self._readers.discard(key.fd) self._writers.discard(key.fd) return key @@ -340,7 +342,7 @@ def select(self, timeout=None): ready = [] try: r, w, _ = self._select(self._readers, self._writers, [], timeout) - except select.error as exc: + except OSError as exc: if exc.args[0] == EINTR: return ready else: @@ -366,11 +368,11 @@ class PollSelector(_BaseSelectorImpl): """Poll-based selector.""" def __init__(self): - super(PollSelector, self).__init__() + super().__init__() self._poll = select.poll() def register(self, fileobj, events, data=None): - key = super(PollSelector, self).register(fileobj, events, data) + key = super().register(fileobj, events, data) poll_events = 0 if events & EVENT_READ: poll_events |= select.POLLIN @@ -380,7 +382,7 @@ def register(self, fileobj, events, data=None): return key def unregister(self, fileobj): - key = super(PollSelector, self).unregister(fileobj) + key = super().unregister(fileobj) self._poll.unregister(key.fd) return key @@ -396,7 +398,7 @@ def select(self, timeout=None): ready = [] try: fd_event_list = self._poll.poll(timeout) - except select.error as exc: + except OSError as exc: if exc.args[0] == EINTR: return ready else: @@ -420,14 +422,14 @@ class EpollSelector(_BaseSelectorImpl): """Epoll-based selector.""" def __init__(self): - super(EpollSelector, self).__init__() + super().__init__() self._epoll = select.epoll() def fileno(self): return self._epoll.fileno() def register(self, fileobj, events, data=None): - key = super(EpollSelector, self).register(fileobj, events, data) + key = super().register(fileobj, events, data) epoll_events = 0 if events & EVENT_READ: epoll_events |= select.EPOLLIN @@ -437,10 +439,10 @@ def register(self, fileobj, events, data=None): return key def unregister(self, fileobj): - key = super(EpollSelector, self).unregister(fileobj) + key = super().unregister(fileobj) try: self._epoll.unregister(key.fd) - except IOError: + except OSError: # This can happen if the FD was closed since it # was registered. pass @@ -464,7 +466,7 @@ def select(self, timeout=None): ready = [] try: fd_event_list = self._epoll.poll(timeout, max_ev) - except IOError as exc: + except OSError as exc: if exc.errno == EINTR: return ready else: @@ -483,7 +485,7 @@ def select(self, timeout=None): def close(self): self._epoll.close() - super(EpollSelector, self).close() + super().close() if hasattr(select, 'devpoll'): @@ -492,14 +494,14 @@ class DevpollSelector(_BaseSelectorImpl): """Solaris /dev/poll selector.""" def __init__(self): - super(DevpollSelector, self).__init__() + super().__init__() self._devpoll = select.devpoll() def fileno(self): return self._devpoll.fileno() def register(self, fileobj, events, data=None): - key = super(DevpollSelector, self).register(fileobj, events, data) + key = super().register(fileobj, events, data) poll_events = 0 if events & EVENT_READ: poll_events |= select.POLLIN @@ -509,7 +511,7 @@ def register(self, fileobj, events, data=None): return key def unregister(self, fileobj): - key = super(DevpollSelector, self).unregister(fileobj) + key = super().unregister(fileobj) self._devpoll.unregister(key.fd) return key @@ -544,7 +546,7 @@ def select(self, timeout=None): def close(self): self._devpoll.close() - super(DevpollSelector, self).close() + super().close() if hasattr(select, 'kqueue'): @@ -553,14 +555,14 @@ class KqueueSelector(_BaseSelectorImpl): """Kqueue-based selector.""" def __init__(self): - super(KqueueSelector, self).__init__() + super().__init__() self._kqueue = select.kqueue() def fileno(self): return self._kqueue.fileno() def register(self, fileobj, events, data=None): - key = super(KqueueSelector, self).register(fileobj, events, data) + key = super().register(fileobj, events, data) if events & EVENT_READ: kev = select.kevent(key.fd, select.KQ_FILTER_READ, select.KQ_EV_ADD) @@ -572,7 +574,7 @@ def register(self, fileobj, events, data=None): return key def unregister(self, fileobj): - key = super(KqueueSelector, self).unregister(fileobj) + key = super().unregister(fileobj) if key.events & EVENT_READ: kev = select.kevent(key.fd, select.KQ_FILTER_READ, select.KQ_EV_DELETE) @@ -619,7 +621,7 @@ def select(self, timeout=None): def close(self): self._kqueue.close() - super(KqueueSelector, self).close() + super().close() # Choose the best implementation, roughly: diff --git a/kafka/vendor/six.py b/kafka/vendor/six.py index 3621a0ab4..e7057ee30 100644 --- a/kafka/vendor/six.py +++ b/kafka/vendor/six.py @@ -1,6 +1,6 @@ # pylint: skip-file -# Copyright (c) 2010-2017 Benjamin Peterson +# Copyright (c) 2010-2020 Benjamin Peterson # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -22,7 +22,6 @@ """Utilities for writing code that runs on Python 2 and 3""" -from __future__ import absolute_import import functools import itertools @@ -31,7 +30,7 @@ import types __author__ = "Benjamin Peterson " -__version__ = "1.11.0" +__version__ = "1.16.0" # Useful for very coarse version differentiation. @@ -59,7 +58,7 @@ MAXSIZE = int((1 << 31) - 1) else: # It's possible to have sizeof(long) != sizeof(Py_ssize_t). - class X(object): + class X: def __len__(self): return 1 << 31 @@ -77,6 +76,11 @@ def __len__(self): # https://github.com/dpkp/kafka-python/pull/979#discussion_r100403389 # del X +if PY34: + from importlib.util import spec_from_loader +else: + spec_from_loader = None + def _add_doc(func, doc): """Add documentation to a function.""" @@ -89,7 +93,7 @@ def _import_module(name): return sys.modules[name] -class _LazyDescr(object): +class _LazyDescr: def __init__(self, name): self.name = name @@ -109,7 +113,7 @@ def __get__(self, obj, tp): class MovedModule(_LazyDescr): def __init__(self, name, old, new=None): - super(MovedModule, self).__init__(name) + super().__init__(name) if PY3: if new is None: new = name @@ -130,7 +134,7 @@ def __getattr__(self, attr): class _LazyModule(types.ModuleType): def __init__(self, name): - super(_LazyModule, self).__init__(name) + super().__init__(name) self.__doc__ = self.__class__.__doc__ def __dir__(self): @@ -145,7 +149,7 @@ def __dir__(self): class MovedAttribute(_LazyDescr): def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): - super(MovedAttribute, self).__init__(name) + super().__init__(name) if PY3: if new_mod is None: new_mod = name @@ -167,7 +171,7 @@ def _resolve(self): return getattr(module, self.attr) -class _SixMetaPathImporter(object): +class _SixMetaPathImporter: """ A meta path importer to import six.moves and its submodules. @@ -192,6 +196,11 @@ def find_module(self, fullname, path=None): return self return None + def find_spec(self, fullname, path, target=None): + if fullname in self.known_modules: + return spec_from_loader(fullname, self) + return None + def __get_module(self, fullname): try: return self.known_modules[fullname] @@ -229,6 +238,12 @@ def get_code(self, fullname): return None get_source = get_code # same as get_code + def create_module(self, spec): + return self.load_module(spec.name) + + def exec_module(self, module): + pass + _importer = _SixMetaPathImporter(__name__) @@ -253,7 +268,7 @@ class _MovedItems(_LazyModule): MovedAttribute("reduce", "__builtin__", "functools"), MovedAttribute("shlex_quote", "pipes", "shlex", "quote"), MovedAttribute("StringIO", "StringIO", "io"), - MovedAttribute("UserDict", "UserDict", "collections"), + MovedAttribute("UserDict", "UserDict", "collections", "IterableUserDict", "UserDict"), MovedAttribute("UserList", "UserList", "collections"), MovedAttribute("UserString", "UserString", "collections"), MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), @@ -261,9 +276,11 @@ class _MovedItems(_LazyModule): MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"), MovedModule("builtins", "__builtin__"), MovedModule("configparser", "ConfigParser"), + MovedModule("collections_abc", "collections", "collections.abc" if sys.version_info >= (3, 3) else "collections"), MovedModule("copyreg", "copy_reg"), MovedModule("dbm_gnu", "gdbm", "dbm.gnu"), - MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"), + MovedModule("dbm_ndbm", "dbm", "dbm.ndbm"), + MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread" if sys.version_info < (3, 9) else "_thread"), MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), MovedModule("http_cookies", "Cookie", "http.cookies"), MovedModule("html_entities", "htmlentitydefs", "html.entities"), @@ -508,7 +525,7 @@ def remove_move(name): try: del moves.__dict__[name] except KeyError: - raise AttributeError("no such move, %r" % (name,)) + raise AttributeError(f"no such move, {name!r}") if PY3: @@ -564,7 +581,7 @@ def create_bound_method(func, obj): def create_unbound_method(func, cls): return types.MethodType(func, None, cls) - class Iterator(object): + class Iterator: def next(self): return type(self).__next__(self) @@ -643,13 +660,16 @@ def u(s): import io StringIO = io.StringIO BytesIO = io.BytesIO + del io _assertCountEqual = "assertCountEqual" if sys.version_info[1] <= 1: _assertRaisesRegex = "assertRaisesRegexp" _assertRegex = "assertRegexpMatches" + _assertNotRegex = "assertNotRegexpMatches" else: _assertRaisesRegex = "assertRaisesRegex" _assertRegex = "assertRegex" + _assertNotRegex = "assertNotRegex" else: def b(s): return s @@ -671,6 +691,7 @@ def indexbytes(buf, i): _assertCountEqual = "assertItemsEqual" _assertRaisesRegex = "assertRaisesRegexp" _assertRegex = "assertRegexpMatches" + _assertNotRegex = "assertNotRegexpMatches" _add_doc(b, """Byte literal""") _add_doc(u, """Text literal""") @@ -687,6 +708,10 @@ def assertRegex(self, *args, **kwargs): return getattr(self, _assertRegex)(*args, **kwargs) +def assertNotRegex(self, *args, **kwargs): + return getattr(self, _assertNotRegex)(*args, **kwargs) + + if PY3: exec_ = getattr(moves.builtins, "exec") @@ -722,16 +747,7 @@ def exec_(_code_, _globs_=None, _locs_=None): """) -if sys.version_info[:2] == (3, 2): - exec_("""def raise_from(value, from_value): - try: - if from_value is None: - raise value - raise value from from_value - finally: - value = None -""") -elif sys.version_info[:2] > (3, 2): +if sys.version_info[:2] > (3,): exec_("""def raise_from(value, from_value): try: raise value from from_value @@ -811,13 +827,33 @@ def print_(*args, **kwargs): _add_doc(reraise, """Reraise an exception.""") if sys.version_info[0:2] < (3, 4): + # This does exactly the same what the :func:`py3:functools.update_wrapper` + # function does on Python versions after 3.2. It sets the ``__wrapped__`` + # attribute on ``wrapper`` object and it doesn't raise an error if any of + # the attributes mentioned in ``assigned`` and ``updated`` are missing on + # ``wrapped`` object. + def _update_wrapper(wrapper, wrapped, + assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + for attr in assigned: + try: + value = getattr(wrapped, attr) + except AttributeError: + continue + else: + setattr(wrapper, attr, value) + for attr in updated: + getattr(wrapper, attr).update(getattr(wrapped, attr, {})) + wrapper.__wrapped__ = wrapped + return wrapper + _update_wrapper.__doc__ = functools.update_wrapper.__doc__ + def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS, updated=functools.WRAPPER_UPDATES): - def wrapper(f): - f = functools.wraps(wrapped, assigned, updated)(f) - f.__wrapped__ = wrapped - return f - return wrapper + return functools.partial(_update_wrapper, wrapped=wrapped, + assigned=assigned, updated=updated) + wraps.__doc__ = functools.wraps.__doc__ + else: wraps = functools.wraps @@ -830,7 +866,15 @@ def with_metaclass(meta, *bases): class metaclass(type): def __new__(cls, name, this_bases, d): - return meta(name, bases, d) + if sys.version_info[:2] >= (3, 7): + # This version introduced PEP 560 that requires a bit + # of extra care (we mimic what is done by __build_class__). + resolved_bases = types.resolve_bases(bases) + if resolved_bases is not bases: + d['__orig_bases__'] = bases + else: + resolved_bases = bases + return meta(name, resolved_bases, d) @classmethod def __prepare__(cls, name, this_bases): @@ -850,13 +894,75 @@ def wrapper(cls): orig_vars.pop(slots_var) orig_vars.pop('__dict__', None) orig_vars.pop('__weakref__', None) + if hasattr(cls, '__qualname__'): + orig_vars['__qualname__'] = cls.__qualname__ return metaclass(cls.__name__, cls.__bases__, orig_vars) return wrapper +def ensure_binary(s, encoding='utf-8', errors='strict'): + """Coerce **s** to six.binary_type. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> encoded to `bytes` + - `bytes` -> `bytes` + """ + if isinstance(s, binary_type): + return s + if isinstance(s, text_type): + return s.encode(encoding, errors) + raise TypeError("not expecting type '%s'" % type(s)) + + +def ensure_str(s, encoding='utf-8', errors='strict'): + """Coerce *s* to `str`. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + # Optimization: Fast return for the common case. + if type(s) is str: + return s + if PY2 and isinstance(s, text_type): + return s.encode(encoding, errors) + elif PY3 and isinstance(s, binary_type): + return s.decode(encoding, errors) + elif not isinstance(s, (text_type, binary_type)): + raise TypeError("not expecting type '%s'" % type(s)) + return s + + +def ensure_text(s, encoding='utf-8', errors='strict'): + """Coerce *s* to six.text_type. + + For Python 2: + - `unicode` -> `unicode` + - `str` -> `unicode` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + if isinstance(s, binary_type): + return s.decode(encoding, errors) + elif isinstance(s, text_type): + return s + else: + raise TypeError("not expecting type '%s'" % type(s)) + + def python_2_unicode_compatible(klass): """ - A decorator that defines __unicode__ and __str__ methods under Python 2. + A class decorator that defines __unicode__ and __str__ methods under Python 2. Under Python 3 it does nothing. To support Python 2 and 3 with a single code base, define a __str__ method diff --git a/kafka/vendor/socketpair.py b/kafka/vendor/socketpair.py index b55e629ee..8099f8aea 100644 --- a/kafka/vendor/socketpair.py +++ b/kafka/vendor/socketpair.py @@ -1,6 +1,5 @@ # pylint: skip-file # vendored from https://github.com/mhils/backports.socketpair -from __future__ import absolute_import import sys import socket @@ -35,17 +34,10 @@ def socketpair(family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0): csock = socket.socket(family, type, proto) try: csock.setblocking(False) - if sys.version_info >= (3, 0): - try: - csock.connect((addr, port)) - except (BlockingIOError, InterruptedError): - pass - else: - try: - csock.connect((addr, port)) - except socket.error as e: - if e.errno != errno.WSAEWOULDBLOCK: - raise + try: + csock.connect((addr, port)) + except (BlockingIOError, InterruptedError): + pass csock.setblocking(True) ssock, _ = lsock.accept() except Exception: diff --git a/kafka/version.py b/kafka/version.py index 6533622a9..ee5ea98ce 100644 --- a/kafka/version.py +++ b/kafka/version.py @@ -1 +1,6 @@ -__version__ = '2.0.2-dev' +import sys + +from importlib.metadata import version + + +__version__ = version("kafka-python-ng") diff --git a/requirements-dev.txt b/requirements-dev.txt index d2830905b..3f6e5542c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,17 +1,18 @@ -flake8==3.4.1 -pytest==3.10.0 -pytest-cov==2.6.0 -docker-py==1.10.6 -coveralls==1.5.1 -Sphinx==1.6.4 -lz4==2.1.2 -xxhash==1.3.0 -python-snappy==0.5.3 -tox==3.5.3 -mock==3.0.5 -pylint==1.9.3 -pytest-pylint==0.12.3 -pytest-mock==1.10.0 -sphinx-rtd-theme==0.2.4 -crc32c==1.7 -py==1.8.0 +coveralls +crc32c +docker-py +flake8 +lz4 +mock +py +pylint +pytest +pytest-cov +pytest-mock +pytest-pylint +python-snappy +Sphinx +sphinx-rtd-theme +tox +xxhash +botocore \ No newline at end of file diff --git a/servers/2.6.0/resources/kafka.properties b/servers/2.6.0/resources/kafka.properties new file mode 100644 index 000000000..5775cfdc4 --- /dev/null +++ b/servers/2.6.0/resources/kafka.properties @@ -0,0 +1,147 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id={broker_id} + +############################# Socket Server Settings ############################# + +listeners={transport}://{host}:{port} +security.inter.broker.protocol={transport} + +{sasl_config} + +ssl.keystore.location={ssl_dir}/kafka.server.keystore.jks +ssl.keystore.password=foobar +ssl.key.password=foobar +ssl.truststore.location={ssl_dir}/kafka.server.truststore.jks +ssl.truststore.password=foobar + +authorizer.class.name=kafka.security.auth.SimpleAclAuthorizer +allow.everyone.if.no.acl.found=true + +# The port the socket server listens on +#port=9092 + +# Hostname the broker will bind to. If not set, the server will bind to all interfaces +#host.name=localhost + +# Hostname the broker will advertise to producers and consumers. If not set, it uses the +# value for "host.name" if configured. Otherwise, it will use the value returned from +# java.net.InetAddress.getCanonicalHostName(). +#advertised.host.name= + +# The port to publish to ZooKeeper for clients to use. If this is not set, +# it will publish the same port that the broker binds to. +#advertised.port= + +# The number of threads handling network requests +num.network.threads=3 + +# The number of threads doing disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs={tmp_dir}/data + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions={partitions} +default.replication.factor={replicas} + +## Short Replica Lag -- Drops failed brokers out of ISR +replica.lag.time.max.ms=1000 +replica.socket.timeout.ms=1000 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. +# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. +log.cleaner.enable=false + +# tune down offset topics to reduce setup time in tests +offsets.commit.timeout.ms=500 +offsets.topic.num.partitions=2 +offsets.topic.replication.factor=1 + +# Allow shorter session timeouts for tests +group.min.session.timeout.ms=1000 + + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect={zk_host}:{zk_port}/{zk_chroot} + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=30000 +# We want to expire kafka broker sessions quickly when brokers die b/c we restart them quickly +zookeeper.session.timeout.ms=500 diff --git a/servers/2.6.0/resources/kafka_server_jaas.conf b/servers/2.6.0/resources/kafka_server_jaas.conf new file mode 100644 index 000000000..af4306c86 --- /dev/null +++ b/servers/2.6.0/resources/kafka_server_jaas.conf @@ -0,0 +1,4 @@ +KafkaServer {{ + {jaas_config} +}}; +Client {{}}; diff --git a/servers/2.6.0/resources/log4j.properties b/servers/2.6.0/resources/log4j.properties new file mode 100644 index 000000000..b0b76aa79 --- /dev/null +++ b/servers/2.6.0/resources/log4j.properties @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +log4j.rootLogger=INFO, stdout, logfile + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.logfile=org.apache.log4j.FileAppender +log4j.appender.logfile.File=${kafka.logs.dir}/server.log +log4j.appender.logfile.layout=org.apache.log4j.PatternLayout +log4j.appender.logfile.layout.ConversionPattern=[%d] %p %m (%c)%n diff --git a/servers/2.6.0/resources/zookeeper.properties b/servers/2.6.0/resources/zookeeper.properties new file mode 100644 index 000000000..e3fd09742 --- /dev/null +++ b/servers/2.6.0/resources/zookeeper.properties @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# the directory where the snapshot is stored. +dataDir={tmp_dir} +# the port at which the clients will connect +clientPort={port} +clientPortAddress={host} +# disable the per-ip limit on the number of connections since this is a non-production config +maxClientCnxns=0 diff --git a/setup.cfg b/setup.cfg index 5c6311daf..76daa0897 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,4 +2,4 @@ universal=1 [metadata] -license_file = LICENSE +license_files = LICENSE diff --git a/setup.py b/setup.py index 8bc484c9a..dd4e5de90 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ # Pull version from source without importing # since we can't import something we haven't built yet :) -exec(open('kafka/version.py').read()) + class Tox(Command): @@ -31,31 +31,42 @@ def run(cls): README = f.read() setup( - name="kafka-python", - version=__version__, - + name="kafka-python-ng", + python_requires=">=3.8", + use_scm_version=True, + setup_requires=["setuptools_scm"], tests_require=test_require, + extras_require={ + "crc32c": ["crc32c"], + "lz4": ["lz4"], + "snappy": ["python-snappy"], + "zstd": ["zstandard"], + }, cmdclass={"test": Tox}, packages=find_packages(exclude=['test']), author="Dana Powers", author_email="dana.powers@gmail.com", - url="https://github.com/dpkp/kafka-python", + maintainer="William Barnhart", + maintainer_email="williambbarnhart@gmail.com", + url="https://github.com/wbarnha/kafka-python-ng", license="Apache License 2.0", description="Pure Python client for Apache Kafka", long_description=README, - keywords="apache kafka", + keywords=[ + "apache kafka", + "kafka", + ], classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: Libraries :: Python Modules", ] diff --git a/test/fixtures.py b/test/fixtures.py index 26fb5e89d..d9c072b86 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -25,7 +25,7 @@ def get_open_port(): sock = socket.socket() - sock.bind(("", 0)) + sock.bind(("127.0.0.1", 0)) port = sock.getsockname()[1] sock.close() return port diff --git a/test/record/test_default_records.py b/test/record/test_default_records.py index c3a7b02c8..a3f69da6d 100644 --- a/test/record/test_default_records.py +++ b/test/record/test_default_records.py @@ -1,7 +1,5 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals import pytest -from mock import patch +from unittest.mock import patch import kafka.codec from kafka.record.default_records import ( DefaultRecordBatch, DefaultRecordBatchBuilder @@ -197,7 +195,7 @@ def test_unavailable_codec(magic, compression_type, name, checker_name): magic=2, compression_type=compression_type, is_transactional=0, producer_id=-1, producer_epoch=-1, base_sequence=-1, batch_size=1024) - error_msg = "Libraries for {} compression codec not found".format(name) + error_msg = f"Libraries for {name} compression codec not found" with pytest.raises(UnsupportedCodecError, match=error_msg): builder.append(0, timestamp=None, key=None, value=b"M", headers=[]) builder.build() diff --git a/test/record/test_legacy_records.py b/test/record/test_legacy_records.py index 43970f7c9..f16c29809 100644 --- a/test/record/test_legacy_records.py +++ b/test/record/test_legacy_records.py @@ -1,6 +1,5 @@ -from __future__ import unicode_literals import pytest -from mock import patch +from unittest.mock import patch from kafka.record.legacy_records import ( LegacyRecordBatch, LegacyRecordBatchBuilder ) @@ -186,7 +185,7 @@ def test_unavailable_codec(magic, compression_type, name, checker_name): # Check that builder raises error builder = LegacyRecordBatchBuilder( magic=magic, compression_type=compression_type, batch_size=1024) - error_msg = "Libraries for {} compression codec not found".format(name) + error_msg = f"Libraries for {name} compression codec not found" with pytest.raises(UnsupportedCodecError, match=error_msg): builder.append(0, timestamp=None, key=None, value=b"M") builder.build() diff --git a/test/record/test_records.py b/test/record/test_records.py index 9f72234ae..adde3ba06 100644 --- a/test/record/test_records.py +++ b/test/record/test_records.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals import pytest from kafka.record import MemoryRecords, MemoryRecordsBuilder from kafka.errors import CorruptRecordException diff --git a/test/test_admin_integration.py b/test/test_admin_integration.py index 37b140573..283023049 100644 --- a/test/test_admin_integration.py +++ b/test/test_admin_integration.py @@ -1,10 +1,15 @@ +import platform + import pytest -from test.testutil import env_kafka_version +from logging import info +from test.testutil import env_kafka_version, random_string +from threading import Event, Thread +from time import time, sleep -from kafka.errors import NoError from kafka.admin import ( ACLFilter, ACLOperation, ACLPermissionType, ResourcePattern, ResourceType, ACL, ConfigResource, ConfigResourceType) +from kafka.errors import (NoError, GroupCoordinatorNotAvailableError, NonEmptyGroupError, GroupIdNotFoundError) @pytest.mark.skipif(env_kafka_version() < (0, 11), reason="ACL features require broker >=0.11") @@ -138,3 +143,177 @@ def test_describe_configs_invalid_broker_id_raises(kafka_admin_client): with pytest.raises(ValueError): configs = kafka_admin_client.describe_configs([ConfigResource(ConfigResourceType.BROKER, broker_id)]) + + +@pytest.mark.skipif(env_kafka_version() < (0, 11), reason='Describe consumer group requires broker >=0.11') +def test_describe_consumer_group_does_not_exist(kafka_admin_client): + """Tests that the describe consumer group call fails if the group coordinator is not available + """ + with pytest.raises(GroupCoordinatorNotAvailableError): + group_description = kafka_admin_client.describe_consumer_groups(['test']) + + +@pytest.mark.skipif( + platform.python_implementation() == "PyPy", reason="Works on PyPy if run locally, but not in CI/CD pipeline." +) +@pytest.mark.skipif(env_kafka_version() < (0, 11), reason='Describe consumer group requires broker >=0.11') +def test_describe_consumer_group_exists(kafka_admin_client, kafka_consumer_factory, topic): + """Tests that the describe consumer group call returns valid consumer group information + This test takes inspiration from the test 'test_group' in test_consumer_group.py. + """ + consumers = {} + stop = {} + threads = {} + random_group_id = 'test-group-' + random_string(6) + group_id_list = [random_group_id, random_group_id + '_2'] + generations = {group_id_list[0]: set(), group_id_list[1]: set()} + def consumer_thread(i, group_id): + assert i not in consumers + assert i not in stop + stop[i] = Event() + consumers[i] = kafka_consumer_factory(group_id=group_id) + while not stop[i].is_set(): + consumers[i].poll(20) + consumers[i].close() + consumers[i] = None + stop[i] = None + + num_consumers = 3 + for i in range(num_consumers): + group_id = group_id_list[i % 2] + t = Thread(target=consumer_thread, args=(i, group_id,)) + t.start() + threads[i] = t + + try: + timeout = time() + 35 + while True: + for c in range(num_consumers): + + # Verify all consumers have been created + if c not in consumers: + break + + # Verify all consumers have an assignment + elif not consumers[c].assignment(): + break + + # If all consumers exist and have an assignment + else: + + info('All consumers have assignment... checking for stable group') + # Verify all consumers are in the same generation + # then log state and break while loop + + for consumer in consumers.values(): + generations[consumer.config['group_id']].add(consumer._coordinator._generation.generation_id) + + is_same_generation = any([len(consumer_generation) == 1 for consumer_generation in generations.values()]) + + # New generation assignment is not complete until + # coordinator.rejoining = False + rejoining = any([consumer._coordinator.rejoining + for consumer in list(consumers.values())]) + + if not rejoining and is_same_generation: + break + else: + sleep(1) + assert time() < timeout, "timeout waiting for assignments" + + info('Group stabilized; verifying assignment') + output = kafka_admin_client.describe_consumer_groups(group_id_list) + assert len(output) == 2 + consumer_groups = set() + for consumer_group in output: + assert(consumer_group.group in group_id_list) + if consumer_group.group == group_id_list[0]: + assert(len(consumer_group.members) == 2) + else: + assert(len(consumer_group.members) == 1) + for member in consumer_group.members: + assert(member.member_metadata.subscription[0] == topic) + assert(member.member_assignment.assignment[0][0] == topic) + consumer_groups.add(consumer_group.group) + assert(sorted(list(consumer_groups)) == group_id_list) + finally: + info('Shutting down %s consumers', num_consumers) + for c in range(num_consumers): + info('Stopping consumer %s', c) + stop[c].set() + threads[c].join() + threads[c] = None + + +@pytest.mark.skipif(env_kafka_version() < (1, 1), reason="Delete consumer groups requires broker >=1.1") +def test_delete_consumergroups(kafka_admin_client, kafka_consumer_factory, send_messages): + random_group_id = 'test-group-' + random_string(6) + group1 = random_group_id + "_1" + group2 = random_group_id + "_2" + group3 = random_group_id + "_3" + + send_messages(range(0, 100), partition=0) + consumer1 = kafka_consumer_factory(group_id=group1) + next(consumer1) + consumer1.close() + + consumer2 = kafka_consumer_factory(group_id=group2) + next(consumer2) + consumer2.close() + + consumer3 = kafka_consumer_factory(group_id=group3) + next(consumer3) + consumer3.close() + + consumergroups = {group_id for group_id, _ in kafka_admin_client.list_consumer_groups()} + assert group1 in consumergroups + assert group2 in consumergroups + assert group3 in consumergroups + + delete_results = { + group_id: error + for group_id, error in kafka_admin_client.delete_consumer_groups([group1, group2]) + } + assert delete_results[group1] == NoError + assert delete_results[group2] == NoError + assert group3 not in delete_results + + consumergroups = {group_id for group_id, _ in kafka_admin_client.list_consumer_groups()} + assert group1 not in consumergroups + assert group2 not in consumergroups + assert group3 in consumergroups + + +@pytest.mark.skipif(env_kafka_version() < (1, 1), reason="Delete consumer groups requires broker >=1.1") +def test_delete_consumergroups_with_errors(kafka_admin_client, kafka_consumer_factory, send_messages): + random_group_id = 'test-group-' + random_string(6) + group1 = random_group_id + "_1" + group2 = random_group_id + "_2" + group3 = random_group_id + "_3" + + send_messages(range(0, 100), partition=0) + consumer1 = kafka_consumer_factory(group_id=group1) + next(consumer1) + consumer1.close() + + consumer2 = kafka_consumer_factory(group_id=group2) + next(consumer2) + + consumergroups = {group_id for group_id, _ in kafka_admin_client.list_consumer_groups()} + assert group1 in consumergroups + assert group2 in consumergroups + assert group3 not in consumergroups + + delete_results = { + group_id: error + for group_id, error in kafka_admin_client.delete_consumer_groups([group1, group2, group3]) + } + + assert delete_results[group1] == NoError + assert delete_results[group2] == NonEmptyGroupError + assert delete_results[group3] == GroupIdNotFoundError + + consumergroups = {group_id for group_id, _ in kafka_admin_client.list_consumer_groups()} + assert group1 not in consumergroups + assert group2 in consumergroups + assert group3 not in consumergroups diff --git a/test/test_assignors.py b/test/test_assignors.py index 0821caf83..858ef426d 100644 --- a/test/test_assignors.py +++ b/test/test_assignors.py @@ -1,28 +1,45 @@ # pylint: skip-file from __future__ import absolute_import +from collections import defaultdict +from random import randint, sample + import pytest +from kafka.structs import TopicPartition from kafka.coordinator.assignors.range import RangePartitionAssignor from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor -from kafka.coordinator.protocol import ConsumerProtocolMemberAssignment +from kafka.coordinator.assignors.sticky.sticky_assignor import StickyPartitionAssignor, StickyAssignorUserDataV1 +from kafka.coordinator.protocol import ConsumerProtocolMemberAssignment, ConsumerProtocolMemberMetadata +from kafka.vendor import six + +@pytest.fixture(autouse=True) +def reset_sticky_assignor(): + yield + StickyPartitionAssignor.member_assignment = None + StickyPartitionAssignor.generation = -1 -@pytest.fixture -def cluster(mocker): + +def create_cluster(mocker, topics, topics_partitions=None, topic_partitions_lambda=None): cluster = mocker.MagicMock() - cluster.partitions_for_topic.return_value = set([0, 1, 2]) + cluster.topics.return_value = topics + if topics_partitions is not None: + cluster.partitions_for_topic.return_value = topics_partitions + if topic_partitions_lambda is not None: + cluster.partitions_for_topic.side_effect = topic_partitions_lambda return cluster -def test_assignor_roundrobin(cluster): +def test_assignor_roundrobin(mocker): assignor = RoundRobinPartitionAssignor member_metadata = { - 'C0': assignor.metadata(set(['t0', 't1'])), - 'C1': assignor.metadata(set(['t0', 't1'])), + 'C0': assignor.metadata({'t0', 't1'}), + 'C1': assignor.metadata({'t0', 't1'}), } + cluster = create_cluster(mocker, {'t0', 't1'}, topics_partitions={0, 1, 2}) ret = assignor.assign(cluster, member_metadata) expected = { 'C0': ConsumerProtocolMemberAssignment( @@ -36,14 +53,15 @@ def test_assignor_roundrobin(cluster): assert ret[member].encode() == expected[member].encode() -def test_assignor_range(cluster): +def test_assignor_range(mocker): assignor = RangePartitionAssignor member_metadata = { - 'C0': assignor.metadata(set(['t0', 't1'])), - 'C1': assignor.metadata(set(['t0', 't1'])), + 'C0': assignor.metadata({'t0', 't1'}), + 'C1': assignor.metadata({'t0', 't1'}), } + cluster = create_cluster(mocker, {'t0', 't1'}, topics_partitions={0, 1, 2}) ret = assignor.assign(cluster, member_metadata) expected = { 'C0': ConsumerProtocolMemberAssignment( @@ -55,3 +73,799 @@ def test_assignor_range(cluster): assert set(ret) == set(expected) for member in ret: assert ret[member].encode() == expected[member].encode() + + +def test_sticky_assignor1(mocker): + """ + Given: there are three consumers C0, C1, C2, + four topics t0, t1, t2, t3, and each topic has 2 partitions, + resulting in partitions t0p0, t0p1, t1p0, t1p1, t2p0, t2p1, t3p0, t3p1. + Each consumer is subscribed to all three topics. + Then: perform fresh assignment + Expected: the assignment is + - C0: [t0p0, t1p1, t3p0] + - C1: [t0p1, t2p0, t3p1] + - C2: [t1p0, t2p1] + Then: remove C1 consumer and perform the reassignment + Expected: the new assignment is + - C0 [t0p0, t1p1, t2p0, t3p0] + - C2 [t0p1, t1p0, t2p1, t3p1] + """ + cluster = create_cluster(mocker, topics={'t0', 't1', 't2', 't3'}, topics_partitions={0, 1}) + + subscriptions = { + 'C0': {'t0', 't1', 't2', 't3'}, + 'C1': {'t0', 't1', 't2', 't3'}, + 'C2': {'t0', 't1', 't2', 't3'}, + } + member_metadata = make_member_metadata(subscriptions) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C0': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t0', [0]), ('t1', [1]), ('t3', [0])], b''), + 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t0', [1]), ('t2', [0]), ('t3', [1])], b''), + 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [0]), ('t2', [1])], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + del subscriptions['C1'] + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, sticky_assignment[member].partitions()) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C0': ConsumerProtocolMemberAssignment( + StickyPartitionAssignor.version, [('t0', [0]), ('t1', [1]), ('t2', [0]), ('t3', [0])], b'' + ), + 'C2': ConsumerProtocolMemberAssignment( + StickyPartitionAssignor.version, [('t0', [1]), ('t1', [0]), ('t2', [1]), ('t3', [1])], b'' + ), + } + assert_assignment(sticky_assignment, expected_assignment) + + +def test_sticky_assignor2(mocker): + """ + Given: there are three consumers C0, C1, C2, + and three topics t0, t1, t2, with 1, 2, and 3 partitions respectively. + Therefore, the partitions are t0p0, t1p0, t1p1, t2p0, t2p1, t2p2. + C0 is subscribed to t0; + C1 is subscribed to t0, t1; + and C2 is subscribed to t0, t1, t2. + Then: perform the assignment + Expected: the assignment is + - C0 [t0p0] + - C1 [t1p0, t1p1] + - C2 [t2p0, t2p1, t2p2] + Then: remove C0 and perform the assignment + Expected: the assignment is + - C1 [t0p0, t1p0, t1p1] + - C2 [t2p0, t2p1, t2p2] + """ + + partitions = {'t0': {0}, 't1': {0, 1}, 't2': {0, 1, 2}} + cluster = create_cluster(mocker, topics={'t0', 't1', 't2'}, topic_partitions_lambda=lambda t: partitions[t]) + + subscriptions = { + 'C0': {'t0'}, + 'C1': {'t0', 't1'}, + 'C2': {'t0', 't1', 't2'}, + } + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, []) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C0': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t0', [0])], b''), + 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [0, 1])], b''), + 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t2', [0, 1, 2])], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + del subscriptions['C0'] + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, sticky_assignment[member].partitions()) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t0', [0]), ('t1', [0, 1])], b''), + 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t2', [0, 1, 2])], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + +def test_sticky_one_consumer_no_topic(mocker): + cluster = create_cluster(mocker, topics={}, topics_partitions={}) + + subscriptions = { + 'C': set(), + } + member_metadata = make_member_metadata(subscriptions) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + +def test_sticky_one_consumer_nonexisting_topic(mocker): + cluster = create_cluster(mocker, topics={}, topics_partitions={}) + + subscriptions = { + 'C': {'t'}, + } + member_metadata = make_member_metadata(subscriptions) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + +def test_sticky_one_consumer_one_topic(mocker): + cluster = create_cluster(mocker, topics={'t'}, topics_partitions={0, 1, 2}) + + subscriptions = { + 'C': {'t'}, + } + member_metadata = make_member_metadata(subscriptions) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t', [0, 1, 2])], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + +def test_sticky_should_only_assign_partitions_from_subscribed_topics(mocker): + cluster = create_cluster(mocker, topics={'t', 'other-t'}, topics_partitions={0, 1, 2}) + + subscriptions = { + 'C': {'t'}, + } + member_metadata = make_member_metadata(subscriptions) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t', [0, 1, 2])], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + +def test_sticky_one_consumer_multiple_topics(mocker): + cluster = create_cluster(mocker, topics={'t1', 't2'}, topics_partitions={0, 1, 2}) + + subscriptions = { + 'C': {'t1', 't2'}, + } + member_metadata = make_member_metadata(subscriptions) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [0, 1, 2]), ('t2', [0, 1, 2])], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + +def test_sticky_two_consumers_one_topic_one_partition(mocker): + cluster = create_cluster(mocker, topics={'t'}, topics_partitions={0}) + + subscriptions = { + 'C1': {'t'}, + 'C2': {'t'}, + } + member_metadata = make_member_metadata(subscriptions) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t', [0])], b''), + 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + +def test_sticky_two_consumers_one_topic_two_partitions(mocker): + cluster = create_cluster(mocker, topics={'t'}, topics_partitions={0, 1}) + + subscriptions = { + 'C1': {'t'}, + 'C2': {'t'}, + } + member_metadata = make_member_metadata(subscriptions) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t', [0])], b''), + 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t', [1])], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + +def test_sticky_multiple_consumers_mixed_topic_subscriptions(mocker): + partitions = {'t1': {0, 1, 2}, 't2': {0, 1}} + cluster = create_cluster(mocker, topics={'t1', 't2'}, topic_partitions_lambda=lambda t: partitions[t]) + + subscriptions = { + 'C1': {'t1'}, + 'C2': {'t1', 't2'}, + 'C3': {'t1'}, + } + member_metadata = make_member_metadata(subscriptions) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [0, 2])], b''), + 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t2', [0, 1])], b''), + 'C3': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [1])], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + +def test_sticky_add_remove_consumer_one_topic(mocker): + cluster = create_cluster(mocker, topics={'t'}, topics_partitions={0, 1, 2}) + + subscriptions = { + 'C1': {'t'}, + } + member_metadata = make_member_metadata(subscriptions) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t', [0, 1, 2])], b''), + } + assert_assignment(assignment, expected_assignment) + + subscriptions = { + 'C1': {'t'}, + 'C2': {'t'}, + } + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata( + topics, assignment[member].partitions() if member in assignment else [] + ) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + + subscriptions = { + 'C2': {'t'}, + } + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, assignment[member].partitions()) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + assert len(assignment['C2'].assignment[0][1]) == 3 + + +def test_sticky_add_remove_topic_two_consumers(mocker): + cluster = create_cluster(mocker, topics={'t1', 't2'}, topics_partitions={0, 1, 2}) + + subscriptions = { + 'C1': {'t1'}, + 'C2': {'t1'}, + } + member_metadata = make_member_metadata(subscriptions) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [0, 2])], b''), + 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [1])], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + subscriptions = { + 'C1': {'t1', 't2'}, + 'C2': {'t1', 't2'}, + } + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, sticky_assignment[member].partitions()) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [0, 2]), ('t2', [1])], b''), + 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [1]), ('t2', [0, 2])], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + subscriptions = { + 'C1': {'t2'}, + 'C2': {'t2'}, + } + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, sticky_assignment[member].partitions()) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t2', [1])], b''), + 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t2', [0, 2])], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + +def test_sticky_reassignment_after_one_consumer_leaves(mocker): + partitions = dict([('t{}'.format(i), set(range(i))) for i in range(1, 20)]) + cluster = create_cluster( + mocker, topics=set(['t{}'.format(i) for i in range(1, 20)]), topic_partitions_lambda=lambda t: partitions[t] + ) + + subscriptions = {} + for i in range(1, 20): + topics = set() + for j in range(1, i + 1): + topics.add('t{}'.format(j)) + subscriptions['C{}'.format(i)] = topics + + member_metadata = make_member_metadata(subscriptions) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + + del subscriptions['C10'] + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, assignment[member].partitions()) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + assert StickyPartitionAssignor._latest_partition_movements.are_sticky() + + +def test_sticky_reassignment_after_one_consumer_added(mocker): + cluster = create_cluster(mocker, topics={'t'}, topics_partitions=set(range(20))) + + subscriptions = defaultdict(set) + for i in range(1, 10): + subscriptions['C{}'.format(i)] = {'t'} + + member_metadata = make_member_metadata(subscriptions) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + + subscriptions['C10'] = {'t'} + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata( + topics, assignment[member].partitions() if member in assignment else [] + ) + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + assert StickyPartitionAssignor._latest_partition_movements.are_sticky() + + +def test_sticky_same_subscriptions(mocker): + partitions = dict([('t{}'.format(i), set(range(i))) for i in range(1, 15)]) + cluster = create_cluster( + mocker, topics=set(['t{}'.format(i) for i in range(1, 15)]), topic_partitions_lambda=lambda t: partitions[t] + ) + + subscriptions = defaultdict(set) + for i in range(1, 9): + for j in range(1, len(six.viewkeys(partitions)) + 1): + subscriptions['C{}'.format(i)].add('t{}'.format(j)) + + member_metadata = make_member_metadata(subscriptions) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + + del subscriptions['C5'] + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, assignment[member].partitions()) + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + assert StickyPartitionAssignor._latest_partition_movements.are_sticky() + + +def test_sticky_large_assignment_with_multiple_consumers_leaving(mocker): + n_topics = 40 + n_consumers = 200 + + all_topics = set(['t{}'.format(i) for i in range(1, n_topics + 1)]) + partitions = dict([(t, set(range(1, randint(0, 10) + 1))) for t in all_topics]) + cluster = create_cluster(mocker, topics=all_topics, topic_partitions_lambda=lambda t: partitions[t]) + + subscriptions = defaultdict(set) + for i in range(1, n_consumers + 1): + for j in range(0, randint(1, 20)): + subscriptions['C{}'.format(i)].add('t{}'.format(randint(1, n_topics))) + + member_metadata = make_member_metadata(subscriptions) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, assignment[member].partitions()) + + for i in range(50): + member = 'C{}'.format(randint(1, n_consumers)) + if member in subscriptions: + del subscriptions[member] + del member_metadata[member] + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + assert StickyPartitionAssignor._latest_partition_movements.are_sticky() + + +def test_new_subscription(mocker): + cluster = create_cluster(mocker, topics={'t1', 't2', 't3', 't4'}, topics_partitions={0}) + + subscriptions = defaultdict(set) + for i in range(3): + for j in range(i, 3 * i - 2 + 1): + subscriptions['C{}'.format(i)].add('t{}'.format(j)) + + member_metadata = make_member_metadata(subscriptions) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + + subscriptions['C0'].add('t1') + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, []) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + assert StickyPartitionAssignor._latest_partition_movements.are_sticky() + + +def test_move_existing_assignments(mocker): + cluster = create_cluster(mocker, topics={'t1', 't2', 't3', 't4', 't5', 't6'}, topics_partitions={0}) + + subscriptions = { + 'C1': {'t1', 't2'}, + 'C2': {'t1', 't2', 't3', 't4'}, + 'C3': {'t2', 't3', 't4', 't5', 't6'}, + } + member_assignments = { + 'C1': [TopicPartition('t1', 0)], + 'C2': [TopicPartition('t2', 0), TopicPartition('t3', 0)], + 'C3': [TopicPartition('t4', 0), TopicPartition('t5', 0), TopicPartition('t6', 0)], + } + + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, member_assignments[member]) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + + +def test_stickiness(mocker): + cluster = create_cluster(mocker, topics={'t'}, topics_partitions={0, 1, 2}) + subscriptions = { + 'C1': {'t'}, + 'C2': {'t'}, + 'C3': {'t'}, + 'C4': {'t'}, + } + member_metadata = make_member_metadata(subscriptions) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + partitions_assigned = {} + for consumer, consumer_assignment in six.iteritems(assignment): + assert ( + len(consumer_assignment.partitions()) <= 1 + ), 'Consumer {} is assigned more topic partitions than expected.'.format(consumer) + if len(consumer_assignment.partitions()) == 1: + partitions_assigned[consumer] = consumer_assignment.partitions()[0] + + # removing the potential group leader + del subscriptions['C1'] + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, assignment[member].partitions()) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + assert StickyPartitionAssignor._latest_partition_movements.are_sticky() + + for consumer, consumer_assignment in six.iteritems(assignment): + assert ( + len(consumer_assignment.partitions()) <= 1 + ), 'Consumer {} is assigned more topic partitions than expected.'.format(consumer) + assert ( + consumer not in partitions_assigned or partitions_assigned[consumer] in consumer_assignment.partitions() + ), 'Stickiness was not honored for consumer {}'.format(consumer) + + +def test_assignment_updated_for_deleted_topic(mocker): + def topic_partitions(topic): + if topic == 't1': + return {0} + if topic == 't3': + return set(range(100)) + + cluster = create_cluster(mocker, topics={'t1', 't3'}, topic_partitions_lambda=topic_partitions) + + subscriptions = { + 'C': {'t1', 't2', 't3'}, + } + member_metadata = make_member_metadata(subscriptions) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [0]), ('t3', list(range(100)))], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + +def test_no_exceptions_when_only_subscribed_topic_is_deleted(mocker): + cluster = create_cluster(mocker, topics={'t'}, topics_partitions={0, 1, 2}) + + subscriptions = { + 'C': {'t'}, + } + member_metadata = make_member_metadata(subscriptions) + + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t', [0, 1, 2])], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + subscriptions = { + 'C': {}, + } + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, sticky_assignment[member].partitions()) + + cluster = create_cluster(mocker, topics={}, topics_partitions={}) + sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + expected_assignment = { + 'C': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [], b''), + } + assert_assignment(sticky_assignment, expected_assignment) + + +def test_conflicting_previous_assignments(mocker): + cluster = create_cluster(mocker, topics={'t'}, topics_partitions={0, 1}) + + subscriptions = { + 'C1': {'t'}, + 'C2': {'t'}, + } + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + # assume both C1 and C2 have partition 1 assigned to them in generation 1 + member_metadata[member] = StickyPartitionAssignor._metadata(topics, [TopicPartition('t', 0), TopicPartition('t', 0)], 1) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + + +@pytest.mark.parametrize( + 'execution_number,n_topics,n_consumers', [(i, randint(10, 20), randint(20, 40)) for i in range(100)] +) +def test_reassignment_with_random_subscriptions_and_changes(mocker, execution_number, n_topics, n_consumers): + all_topics = sorted(['t{}'.format(i) for i in range(1, n_topics + 1)]) + partitions = dict([(t, set(range(1, i + 1))) for i, t in enumerate(all_topics)]) + cluster = create_cluster(mocker, topics=all_topics, topic_partitions_lambda=lambda t: partitions[t]) + + subscriptions = defaultdict(set) + for i in range(n_consumers): + topics_sample = sample(all_topics, randint(1, len(all_topics) - 1)) + subscriptions['C{}'.format(i)].update(topics_sample) + + member_metadata = make_member_metadata(subscriptions) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + + subscriptions = defaultdict(set) + for i in range(n_consumers): + topics_sample = sample(all_topics, randint(1, len(all_topics) - 1)) + subscriptions['C{}'.format(i)].update(topics_sample) + + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, assignment[member].partitions()) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance(subscriptions, assignment) + assert StickyPartitionAssignor._latest_partition_movements.are_sticky() + + +def test_assignment_with_multiple_generations1(mocker): + cluster = create_cluster(mocker, topics={'t'}, topics_partitions={0, 1, 2, 3, 4, 5}) + + member_metadata = { + 'C1': StickyPartitionAssignor._metadata({'t'}, []), + 'C2': StickyPartitionAssignor._metadata({'t'}, []), + 'C3': StickyPartitionAssignor._metadata({'t'}, []), + } + + assignment1 = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance({'C1': {'t'}, 'C2': {'t'}, 'C3': {'t'}}, assignment1) + assert len(assignment1['C1'].assignment[0][1]) == 2 + assert len(assignment1['C2'].assignment[0][1]) == 2 + assert len(assignment1['C3'].assignment[0][1]) == 2 + + member_metadata = { + 'C1': StickyPartitionAssignor._metadata({'t'}, assignment1['C1'].partitions()), + 'C2': StickyPartitionAssignor._metadata({'t'}, assignment1['C2'].partitions()), + } + + assignment2 = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance({'C1': {'t'}, 'C2': {'t'}}, assignment2) + assert len(assignment2['C1'].assignment[0][1]) == 3 + assert len(assignment2['C2'].assignment[0][1]) == 3 + assert all([partition in assignment2['C1'].assignment[0][1] for partition in assignment1['C1'].assignment[0][1]]) + assert all([partition in assignment2['C2'].assignment[0][1] for partition in assignment1['C2'].assignment[0][1]]) + assert StickyPartitionAssignor._latest_partition_movements.are_sticky() + + member_metadata = { + 'C2': StickyPartitionAssignor._metadata({'t'}, assignment2['C2'].partitions(), 2), + 'C3': StickyPartitionAssignor._metadata({'t'}, assignment1['C3'].partitions(), 1), + } + + assignment3 = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance({'C2': {'t'}, 'C3': {'t'}}, assignment3) + assert len(assignment3['C2'].assignment[0][1]) == 3 + assert len(assignment3['C3'].assignment[0][1]) == 3 + assert StickyPartitionAssignor._latest_partition_movements.are_sticky() + + +def test_assignment_with_multiple_generations2(mocker): + cluster = create_cluster(mocker, topics={'t'}, topics_partitions={0, 1, 2, 3, 4, 5}) + + member_metadata = { + 'C1': StickyPartitionAssignor._metadata({'t'}, []), + 'C2': StickyPartitionAssignor._metadata({'t'}, []), + 'C3': StickyPartitionAssignor._metadata({'t'}, []), + } + + assignment1 = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance({'C1': {'t'}, 'C2': {'t'}, 'C3': {'t'}}, assignment1) + assert len(assignment1['C1'].assignment[0][1]) == 2 + assert len(assignment1['C2'].assignment[0][1]) == 2 + assert len(assignment1['C3'].assignment[0][1]) == 2 + + member_metadata = { + 'C2': StickyPartitionAssignor._metadata({'t'}, assignment1['C2'].partitions(), 1), + } + + assignment2 = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance({'C2': {'t'}}, assignment2) + assert len(assignment2['C2'].assignment[0][1]) == 6 + assert all([partition in assignment2['C2'].assignment[0][1] for partition in assignment1['C2'].assignment[0][1]]) + assert StickyPartitionAssignor._latest_partition_movements.are_sticky() + + member_metadata = { + 'C1': StickyPartitionAssignor._metadata({'t'}, assignment1['C1'].partitions(), 1), + 'C2': StickyPartitionAssignor._metadata({'t'}, assignment2['C2'].partitions(), 2), + 'C3': StickyPartitionAssignor._metadata({'t'}, assignment1['C3'].partitions(), 1), + } + + assignment3 = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance({'C1': {'t'}, 'C2': {'t'}, 'C3': {'t'}}, assignment3) + assert StickyPartitionAssignor._latest_partition_movements.are_sticky() + assert set(assignment3['C1'].assignment[0][1]) == set(assignment1['C1'].assignment[0][1]) + assert set(assignment3['C2'].assignment[0][1]) == set(assignment1['C2'].assignment[0][1]) + assert set(assignment3['C3'].assignment[0][1]) == set(assignment1['C3'].assignment[0][1]) + + +@pytest.mark.parametrize('execution_number', range(50)) +def test_assignment_with_conflicting_previous_generations(mocker, execution_number): + cluster = create_cluster(mocker, topics={'t'}, topics_partitions={0, 1, 2, 3, 4, 5}) + + member_assignments = { + 'C1': [TopicPartition('t', p) for p in {0, 1, 4}], + 'C2': [TopicPartition('t', p) for p in {0, 2, 3}], + 'C3': [TopicPartition('t', p) for p in {3, 4, 5}], + } + member_generations = { + 'C1': 1, + 'C2': 1, + 'C3': 2, + } + member_metadata = {} + for member in six.iterkeys(member_assignments): + member_metadata[member] = StickyPartitionAssignor._metadata({'t'}, member_assignments[member], member_generations[member]) + + assignment = StickyPartitionAssignor.assign(cluster, member_metadata) + verify_validity_and_balance({'C1': {'t'}, 'C2': {'t'}, 'C3': {'t'}}, assignment) + assert StickyPartitionAssignor._latest_partition_movements.are_sticky() + + +def make_member_metadata(subscriptions): + member_metadata = {} + for member, topics in six.iteritems(subscriptions): + member_metadata[member] = StickyPartitionAssignor._metadata(topics, []) + return member_metadata + + +def assert_assignment(result_assignment, expected_assignment): + assert result_assignment == expected_assignment + assert set(result_assignment) == set(expected_assignment) + for member in result_assignment: + assert result_assignment[member].encode() == expected_assignment[member].encode() + + +def verify_validity_and_balance(subscriptions, assignment): + """ + Verifies that the given assignment is valid with respect to the given subscriptions + Validity requirements: + - each consumer is subscribed to topics of all partitions assigned to it, and + - each partition is assigned to no more than one consumer + Balance requirements: + - the assignment is fully balanced (the numbers of topic partitions assigned to consumers differ by at most one), or + - there is no topic partition that can be moved from one consumer to another with 2+ fewer topic partitions + + :param subscriptions topic subscriptions of each consumer + :param assignment: given assignment for balance check + """ + assert six.viewkeys(subscriptions) == six.viewkeys(assignment) + + consumers = sorted(six.viewkeys(assignment)) + for i in range(len(consumers)): + consumer = consumers[i] + partitions = assignment[consumer].partitions() + for partition in partitions: + assert partition.topic in subscriptions[consumer], ( + 'Error: Partition {} is assigned to consumer {}, ' + 'but it is not subscribed to topic {}\n' + 'Subscriptions: {}\n' + 'Assignments: {}'.format(partition, consumers[i], partition.topic, subscriptions, assignment) + ) + if i == len(consumers) - 1: + continue + + for j in range(i + 1, len(consumers)): + other_consumer = consumers[j] + other_partitions = assignment[other_consumer].partitions() + partitions_intersection = set(partitions).intersection(set(other_partitions)) + assert partitions_intersection == set(), ( + 'Error: Consumers {} and {} have common partitions ' + 'assigned to them: {}\n' + 'Subscriptions: {}\n' + 'Assignments: {}'.format(consumer, other_consumer, partitions_intersection, subscriptions, assignment) + ) + + if abs(len(partitions) - len(other_partitions)) <= 1: + continue + + assignments_by_topic = group_partitions_by_topic(partitions) + other_assignments_by_topic = group_partitions_by_topic(other_partitions) + if len(partitions) > len(other_partitions): + for topic in six.iterkeys(assignments_by_topic): + assert topic not in other_assignments_by_topic, ( + 'Error: Some partitions can be moved from {} ({} partitions) ' + 'to {} ({} partitions) ' + 'to achieve a better balance\n' + 'Subscriptions: {}\n' + 'Assignments: {}'.format(consumer, len(partitions), other_consumer, len(other_partitions), subscriptions, assignment) + ) + if len(other_partitions) > len(partitions): + for topic in six.iterkeys(other_assignments_by_topic): + assert topic not in assignments_by_topic, ( + 'Error: Some partitions can be moved from {} ({} partitions) ' + 'to {} ({} partitions) ' + 'to achieve a better balance\n' + 'Subscriptions: {}\n' + 'Assignments: {}'.format(other_consumer, len(other_partitions), consumer, len(partitions), subscriptions, assignment) + ) + + +def group_partitions_by_topic(partitions): + result = defaultdict(set) + for p in partitions: + result[p.topic].add(p.partition) + return result diff --git a/test/test_client_async.py b/test/test_client_async.py index 74da66a36..66b227aa9 100644 --- a/test/test_client_async.py +++ b/test/test_client_async.py @@ -220,12 +220,12 @@ def test_send(cli, conn): request = ProduceRequest[0](0, 0, []) assert request.expect_response() is False ret = cli.send(0, request) - assert conn.send.called_with(request) + conn.send.assert_called_with(request, blocking=False) assert isinstance(ret, Future) request = MetadataRequest[0]([]) cli.send(0, request) - assert conn.send.called_with(request) + conn.send.assert_called_with(request, blocking=False) def test_poll(mocker): diff --git a/test/test_codec.py b/test/test_codec.py index 9eff888fe..e05707451 100644 --- a/test/test_codec.py +++ b/test/test_codec.py @@ -7,11 +7,12 @@ from kafka.vendor.six.moves import range from kafka.codec import ( - has_snappy, has_lz4, + has_snappy, has_lz4, has_zstd, gzip_encode, gzip_decode, snappy_encode, snappy_decode, lz4_encode, lz4_decode, lz4_encode_old_kafka, lz4_decode_old_kafka, + zstd_encode, zstd_decode, ) from test.testutil import random_string @@ -113,3 +114,11 @@ def test_lz4_incremental(): b2 = lz4_decode(lz4_encode(b1)) assert len(b1) == len(b2) assert b1 == b2 + + +@pytest.mark.skipif(not has_zstd(), reason="Zstd not available") +def test_zstd(): + for _ in range(1000): + b1 = random_string(100).encode('utf-8') + b2 = zstd_decode(zstd_encode(b1)) + assert b1 == b2 diff --git a/test/test_consumer_group.py b/test/test_consumer_group.py index 58dc7ebf9..4904ffeea 100644 --- a/test/test_consumer_group.py +++ b/test/test_consumer_group.py @@ -1,5 +1,6 @@ import collections import logging +import platform import threading import time @@ -40,6 +41,9 @@ def test_consumer_topics(kafka_broker, topic): consumer.close() +@pytest.mark.skipif( + platform.python_implementation() == "PyPy", reason="Works on PyPy if run locally, but not in CI/CD pipeline." +) @pytest.mark.skipif(env_kafka_version() < (0, 9), reason='Unsupported Kafka Version') def test_group(kafka_broker, topic): num_partitions = 4 diff --git a/test/test_consumer_integration.py b/test/test_consumer_integration.py index 90b7ed203..cfe36b500 100644 --- a/test/test_consumer_integration.py +++ b/test/test_consumer_integration.py @@ -99,6 +99,7 @@ def test_kafka_consumer__blocking(kafka_consumer_factory, topic, send_messages): assert t.interval >= (TIMEOUT_MS / 1000.0) +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") @pytest.mark.skipif(env_kafka_version() < (0, 8, 1), reason="Requires KAFKA_VERSION >= 0.8.1") def test_kafka_consumer__offset_commit_resume(kafka_consumer_factory, send_messages): GROUP_ID = random_string(10) @@ -143,6 +144,7 @@ def test_kafka_consumer__offset_commit_resume(kafka_consumer_factory, send_messa assert_message_count(output_msgs1 + output_msgs2, 200) +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") @pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") def test_kafka_consumer_max_bytes_simple(kafka_consumer_factory, topic, send_messages): send_messages(range(100, 200), partition=0) @@ -162,6 +164,7 @@ def test_kafka_consumer_max_bytes_simple(kafka_consumer_factory, topic, send_mes assert seen_partitions == {TopicPartition(topic, 0), TopicPartition(topic, 1)} +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") @pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") def test_kafka_consumer_max_bytes_one_msg(kafka_consumer_factory, send_messages): # We send to only 1 partition so we don't have parallel requests to 2 @@ -188,6 +191,7 @@ def test_kafka_consumer_max_bytes_one_msg(kafka_consumer_factory, send_messages) assert_message_count(fetched_msgs, 10) +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") @pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") def test_kafka_consumer_offsets_for_time(topic, kafka_consumer, kafka_producer): late_time = int(time.time()) * 1000 @@ -237,6 +241,7 @@ def test_kafka_consumer_offsets_for_time(topic, kafka_consumer, kafka_producer): assert offsets == {tp: late_msg.offset + 1} +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") @pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") def test_kafka_consumer_offsets_search_many_partitions(kafka_consumer, kafka_producer, topic): tp0 = TopicPartition(topic, 0) @@ -275,6 +280,7 @@ def test_kafka_consumer_offsets_search_many_partitions(kafka_consumer, kafka_pro } +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") @pytest.mark.skipif(env_kafka_version() >= (0, 10, 1), reason="Requires KAFKA_VERSION < 0.10.1") def test_kafka_consumer_offsets_for_time_old(kafka_consumer, topic): consumer = kafka_consumer @@ -284,6 +290,7 @@ def test_kafka_consumer_offsets_for_time_old(kafka_consumer, topic): consumer.offsets_for_times({tp: int(time.time())}) +@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") @pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason="Requires KAFKA_VERSION >= 0.10.1") def test_kafka_consumer_offsets_for_times_errors(kafka_consumer_factory, topic): consumer = kafka_consumer_factory(fetch_max_wait_ms=200, diff --git a/test/test_coordinator.py b/test/test_coordinator.py index ea8f84bb6..a35cdd1a0 100644 --- a/test/test_coordinator.py +++ b/test/test_coordinator.py @@ -9,6 +9,7 @@ SubscriptionState, ConsumerRebalanceListener) from kafka.coordinator.assignors.range import RangePartitionAssignor from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor +from kafka.coordinator.assignors.sticky.sticky_assignor import StickyPartitionAssignor from kafka.coordinator.base import Generation, MemberState, HeartbeatThread from kafka.coordinator.consumer import ConsumerCoordinator from kafka.coordinator.protocol import ( @@ -77,6 +78,10 @@ def test_group_protocols(coordinator): RoundRobinPartitionAssignor.version, ['foobar'], b'')), + ('sticky', ConsumerProtocolMemberMetadata( + StickyPartitionAssignor.version, + ['foobar'], + b'')), ] @@ -95,7 +100,7 @@ def test_pattern_subscription(coordinator, api_version): [(0, 'fizz', []), (0, 'foo1', [(0, 0, 0, [], [])]), (0, 'foo2', [(0, 0, 1, [], [])])])) - assert coordinator._subscription.subscription == set(['foo1', 'foo2']) + assert coordinator._subscription.subscription == {'foo1', 'foo2'} # 0.9 consumers should trigger dynamic partition assignment if api_version >= (0, 9): @@ -103,14 +108,14 @@ def test_pattern_subscription(coordinator, api_version): # earlier consumers get all partitions assigned locally else: - assert set(coordinator._subscription.assignment.keys()) == set([ - TopicPartition('foo1', 0), - TopicPartition('foo2', 0)]) + assert set(coordinator._subscription.assignment.keys()) == {TopicPartition('foo1', 0), + TopicPartition('foo2', 0)} def test_lookup_assignor(coordinator): assert coordinator._lookup_assignor('roundrobin') is RoundRobinPartitionAssignor assert coordinator._lookup_assignor('range') is RangePartitionAssignor + assert coordinator._lookup_assignor('sticky') is StickyPartitionAssignor assert coordinator._lookup_assignor('foobar') is None @@ -121,10 +126,25 @@ def test_join_complete(mocker, coordinator): mocker.spy(assignor, 'on_assignment') assert assignor.on_assignment.call_count == 0 assignment = ConsumerProtocolMemberAssignment(0, [('foobar', [0, 1])], b'') - coordinator._on_join_complete( - 0, 'member-foo', 'roundrobin', assignment.encode()) + coordinator._on_join_complete(0, 'member-foo', 'roundrobin', assignment.encode()) + assert assignor.on_assignment.call_count == 1 + assignor.on_assignment.assert_called_with(assignment) + + +def test_join_complete_with_sticky_assignor(mocker, coordinator): + coordinator._subscription.subscribe(topics=['foobar']) + assignor = StickyPartitionAssignor() + coordinator.config['assignors'] = (assignor,) + mocker.spy(assignor, 'on_assignment') + mocker.spy(assignor, 'on_generation_assignment') + assert assignor.on_assignment.call_count == 0 + assert assignor.on_generation_assignment.call_count == 0 + assignment = ConsumerProtocolMemberAssignment(0, [('foobar', [0, 1])], b'') + coordinator._on_join_complete(0, 'member-foo', 'sticky', assignment.encode()) assert assignor.on_assignment.call_count == 1 + assert assignor.on_generation_assignment.call_count == 1 assignor.on_assignment.assert_called_with(assignment) + assignor.on_generation_assignment.assert_called_with(0) def test_subscription_listener(mocker, coordinator): @@ -141,9 +161,7 @@ def test_subscription_listener(mocker, coordinator): coordinator._on_join_complete( 0, 'member-foo', 'roundrobin', assignment.encode()) assert listener.on_partitions_assigned.call_count == 1 - listener.on_partitions_assigned.assert_called_with(set([ - TopicPartition('foobar', 0), - TopicPartition('foobar', 1)])) + listener.on_partitions_assigned.assert_called_with({TopicPartition('foobar', 0), TopicPartition('foobar', 1)}) def test_subscription_listener_failure(mocker, coordinator): diff --git a/test/test_msk.py b/test/test_msk.py new file mode 100644 index 000000000..7fca53b3d --- /dev/null +++ b/test/test_msk.py @@ -0,0 +1,70 @@ +import datetime +import json + + +try: + from unittest import mock +except ImportError: + import mock + +from kafka.sasl.msk import AwsMskIamClient + + +def client_factory(token=None): + + now = datetime.datetime.utcfromtimestamp(1629321911) + with mock.patch('kafka.sasl.msk.datetime') as mock_dt: + mock_dt.datetime.utcnow = mock.Mock(return_value=now) + return AwsMskIamClient( + host='localhost', + access_key='XXXXXXXXXXXXXXXXXXXX', + secret_key='XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX', + region='us-east-1', + token=token, + ) + + +def test_aws_msk_iam_client_permanent_credentials(): + client = client_factory(token=None) + msg = client.first_message() + assert msg + assert isinstance(msg, bytes) + actual = json.loads(msg) + + expected = { + 'version': '2020_10_22', + 'host': 'localhost', + 'user-agent': 'kafka-python', + 'action': 'kafka-cluster:Connect', + 'x-amz-algorithm': 'AWS4-HMAC-SHA256', + 'x-amz-credential': 'XXXXXXXXXXXXXXXXXXXX/20210818/us-east-1/kafka-cluster/aws4_request', + 'x-amz-date': '20210818T212511Z', + 'x-amz-signedheaders': 'host', + 'x-amz-expires': '900', + 'x-amz-signature': '0fa42ae3d5693777942a7a4028b564f0b372bafa2f71c1a19ad60680e6cb994b', + } + assert actual == expected + + +def test_aws_msk_iam_client_temporary_credentials(): + client = client_factory(token='XXXXX') + msg = client.first_message() + assert msg + assert isinstance(msg, bytes) + actual = json.loads(msg) + + expected = { + 'version': '2020_10_22', + 'host': 'localhost', + 'user-agent': 'kafka-python', + 'action': 'kafka-cluster:Connect', + 'x-amz-algorithm': 'AWS4-HMAC-SHA256', + 'x-amz-credential': 'XXXXXXXXXXXXXXXXXXXX/20210818/us-east-1/kafka-cluster/aws4_request', + 'x-amz-date': '20210818T212511Z', + 'x-amz-signedheaders': 'host', + 'x-amz-expires': '900', + 'x-amz-signature': 'b0619c50b7ecb4a7f6f92bd5f733770df5710e97b25146f97015c0b1db783b05', + 'x-amz-security-token': 'XXXXX', + } + assert actual == expected + diff --git a/test/test_partition_movements.py b/test/test_partition_movements.py new file mode 100644 index 000000000..bc990bf3d --- /dev/null +++ b/test/test_partition_movements.py @@ -0,0 +1,23 @@ +from kafka.structs import TopicPartition + +from kafka.coordinator.assignors.sticky.partition_movements import PartitionMovements + + +def test_empty_movements_are_sticky(): + partition_movements = PartitionMovements() + assert partition_movements.are_sticky() + + +def test_sticky_movements(): + partition_movements = PartitionMovements() + partition_movements.move_partition(TopicPartition('t', 1), 'C1', 'C2') + partition_movements.move_partition(TopicPartition('t', 1), 'C2', 'C3') + partition_movements.move_partition(TopicPartition('t', 1), 'C3', 'C1') + assert partition_movements.are_sticky() + + +def test_should_detect_non_sticky_assignment(): + partition_movements = PartitionMovements() + partition_movements.move_partition(TopicPartition('t', 1), 'C1', 'C2') + partition_movements.move_partition(TopicPartition('t', 2), 'C2', 'C1') + assert not partition_movements.are_sticky() diff --git a/test/test_partitioner.py b/test/test_partitioner.py index 853fbf69e..09fa0412a 100644 --- a/test/test_partitioner.py +++ b/test/test_partitioner.py @@ -2,6 +2,7 @@ import pytest + from kafka.partitioner import DefaultPartitioner, murmur2 diff --git a/test/test_producer.py b/test/test_producer.py index 9605adf58..15c244113 100644 --- a/test/test_producer.py +++ b/test/test_producer.py @@ -1,5 +1,6 @@ import gc import platform +import sys import time import threading @@ -10,6 +11,7 @@ from test.testutil import env_kafka_version, random_string +@pytest.mark.skipif(env_kafka_version() <= (0, 8, 2) and sys.version_info > (3, 11), reason="Kafka 0.8.2 and earlier not supported by 3.12") def test_buffer_pool(): pool = SimpleBufferPool(1000, 1000) @@ -21,18 +23,18 @@ def test_buffer_pool(): buf2 = pool.allocate(1000, 1000) assert buf2.read() == b'' - @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") -@pytest.mark.parametrize("compression", [None, 'gzip', 'snappy', 'lz4']) +@pytest.mark.skipif(env_kafka_version() <= (0, 8, 2) and sys.version_info > (3, 11), reason="Kafka 0.8.2 and earlier not supported by 3.12") +@pytest.mark.parametrize("compression", [None, 'gzip', 'snappy', 'lz4', 'zstd']) def test_end_to_end(kafka_broker, compression): - if compression == 'lz4': - # LZ4 requires 0.8.2 if env_kafka_version() < (0, 8, 2): - return - # python-lz4 crashes on older versions of pypy + pytest.skip('LZ4 requires 0.8.2') elif platform.python_implementation() == 'PyPy': - return + pytest.skip('python-lz4 crashes on older versions of pypy') + + if compression == 'zstd' and env_kafka_version() < (2, 1, 0): + pytest.skip('zstd requires kafka 2.1.0 or newer') connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) producer = KafkaProducer(bootstrap_servers=connect_str, @@ -70,6 +72,7 @@ def test_end_to_end(kafka_broker, compression): @pytest.mark.skipif(platform.python_implementation() != 'CPython', reason='Test relies on CPython-specific gc policies') +@pytest.mark.skipif(env_kafka_version() <= (0, 8, 2) and sys.version_info > (3, 11), reason="Kafka 0.8.2 and earlier not supported by 3.12") def test_kafka_producer_gc_cleanup(): gc.collect() threads = threading.active_count() @@ -81,8 +84,11 @@ def test_kafka_producer_gc_cleanup(): @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set") -@pytest.mark.parametrize("compression", [None, 'gzip', 'snappy', 'lz4']) +@pytest.mark.skipif(env_kafka_version() <= (0, 8, 2) and sys.version_info > (3, 11), reason="Kafka 0.8.2 and earlier not supported by 3.12") +@pytest.mark.parametrize("compression", [None, 'gzip', 'snappy', 'lz4', 'zstd']) def test_kafka_producer_proper_record_metadata(kafka_broker, compression): + if compression == 'zstd' and env_kafka_version() < (2, 1, 0): + pytest.skip('zstd requires 2.1.0 or more') connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) producer = KafkaProducer(bootstrap_servers=connect_str, retries=5, @@ -124,10 +130,8 @@ def test_kafka_producer_proper_record_metadata(kafka_broker, compression): if headers: assert record.serialized_header_size == 22 - # generated timestamp case is skipped for broker 0.9 and below if magic == 0: - return - + pytest.skip('generated timestamp case is skipped for broker 0.9 and below') send_time = time.time() * 1000 future = producer.send( topic, diff --git a/test/test_protocol.py b/test/test_protocol.py index e295174d4..6a77e19d6 100644 --- a/test/test_protocol.py +++ b/test/test_protocol.py @@ -9,7 +9,7 @@ from kafka.protocol.fetch import FetchRequest, FetchResponse from kafka.protocol.message import Message, MessageSet, PartialMessage from kafka.protocol.metadata import MetadataRequest -from kafka.protocol.types import Int16, Int32, Int64, String +from kafka.protocol.types import Int16, Int32, Int64, String, UnsignedVarInt32, CompactString, CompactArray, CompactBytes def test_create_message(): @@ -282,3 +282,55 @@ def test_struct_unrecognized_kwargs(): def test_struct_missing_kwargs(): fr = FetchRequest[0](max_wait_time=100) assert fr.min_bytes is None + + +def test_unsigned_varint_serde(): + pairs = { + 0: [0], + -1: [0xff, 0xff, 0xff, 0xff, 0x0f], + 1: [1], + 63: [0x3f], + -64: [0xc0, 0xff, 0xff, 0xff, 0x0f], + 64: [0x40], + 8191: [0xff, 0x3f], + -8192: [0x80, 0xc0, 0xff, 0xff, 0x0f], + 8192: [0x80, 0x40], + -8193: [0xff, 0xbf, 0xff, 0xff, 0x0f], + 1048575: [0xff, 0xff, 0x3f], + + } + for value, expected_encoded in pairs.items(): + value &= 0xffffffff + encoded = UnsignedVarInt32.encode(value) + assert encoded == b''.join(struct.pack('>B', x) for x in expected_encoded) + assert value == UnsignedVarInt32.decode(io.BytesIO(encoded)) + + +def test_compact_data_structs(): + cs = CompactString() + encoded = cs.encode(None) + assert encoded == struct.pack('B', 0) + decoded = cs.decode(io.BytesIO(encoded)) + assert decoded is None + assert b'\x01' == cs.encode('') + assert '' == cs.decode(io.BytesIO(b'\x01')) + encoded = cs.encode("foobarbaz") + assert cs.decode(io.BytesIO(encoded)) == "foobarbaz" + + arr = CompactArray(CompactString()) + assert arr.encode(None) == b'\x00' + assert arr.decode(io.BytesIO(b'\x00')) is None + enc = arr.encode([]) + assert enc == b'\x01' + assert [] == arr.decode(io.BytesIO(enc)) + encoded = arr.encode(["foo", "bar", "baz", "quux"]) + assert arr.decode(io.BytesIO(encoded)) == ["foo", "bar", "baz", "quux"] + + enc = CompactBytes.encode(None) + assert enc == b'\x00' + assert CompactBytes.decode(io.BytesIO(b'\x00')) is None + enc = CompactBytes.encode(b'') + assert enc == b'\x01' + assert CompactBytes.decode(io.BytesIO(b'\x01')) is b'' + enc = CompactBytes.encode(b'foo') + assert CompactBytes.decode(io.BytesIO(enc)) == b'foo' diff --git a/tox.ini b/tox.ini index 06403d6ed..3d8bfbbc4 100644 --- a/tox.ini +++ b/tox.ini @@ -1,37 +1,45 @@ [tox] -envlist = py{26,27,34,35,36,37,py}, docs +envlist = py{38,39,310,311,312,py}, docs [pytest] testpaths = kafka test addopts = --durations=10 log_format = %(created)f %(filename)-23s %(threadName)s %(message)s +[gh-actions] +python = + 3.8: py38 + 3.9: py39 + 3.10: py310 + 3.11: py311 + 3.12: py312 + pypy-3.9: pypy + [testenv] deps = - pytest<4.0 + pytest pytest-cov - py{27,34,35,36,37,py}: pylint - py{27,34,35,36,37,py}: pytest-pylint + pylint + pytest-pylint pytest-mock mock python-snappy + zstandard lz4 xxhash crc32c + botocore commands = - py.test {posargs:--pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF --cov=kafka --cov-config=.covrc} + pytest {posargs:--pylint --pylint-rcfile=pylint.rc --pylint-error-types=EF --cov=kafka --cov-config=.covrc} setenv = CRC32C_SW_MODE = auto PROJECT_ROOT = {toxinidir} passenv = KAFKA_VERSION -[testenv:py26] -# pylint doesn't support python2.6 -commands = py.test {posargs:--cov=kafka --cov-config=.covrc} [testenv:pypy] # pylint is super slow on pypy... -commands = py.test {posargs:--cov=kafka --cov-config=.covrc} +commands = pytest {posargs:--cov=kafka --cov-config=.covrc} [testenv:docs] deps = diff --git a/travis_java_install.sh b/travis_java_install.sh old mode 100644 new mode 100755