From c2e8033ca4ec28f3ef1a9107ee6d163925960c57 Mon Sep 17 00:00:00 2001 From: zhuo Date: Wed, 9 Sep 2020 11:46:34 -0700 Subject: [PATCH] Removed all usages of "six" in tfx_bsl. Also, omit the MRO search type passed to super() in cases of single-inheritance. PiperOrigin-RevId: 330766477 --- RELEASE.md | 1 + setup.py | 1 - tfx_bsl/arrow/array_util_test.py | 8 ------- tfx_bsl/arrow/path.py | 24 ++++++------------- tfx_bsl/beam/run_inference.py | 20 +++++++--------- tfx_bsl/beam/run_inference_test.py | 10 ++++---- tfx_bsl/coders/csv_decoder.py | 10 ++------ tfx_bsl/coders/tf_graph_record_decoder.py | 8 +++---- .../coders/tf_graph_record_decoder_test.py | 4 ++-- tfx_bsl/test_util/run_all_tests.py | 19 +++++---------- tfx_bsl/tfxio/csv_tfxio.py | 6 ++--- tfx_bsl/tfxio/csv_tfxio_test.py | 2 +- tfx_bsl/tfxio/dataset_options.py | 7 +++--- tfx_bsl/tfxio/raw_tf_record.py | 6 ++--- tfx_bsl/tfxio/raw_tf_record_test.py | 2 +- tfx_bsl/tfxio/record_based_tfxio.py | 4 +--- tfx_bsl/tfxio/record_to_tensor_tfxio.py | 6 ++--- tfx_bsl/tfxio/record_to_tensor_tfxio_test.py | 4 ++-- tfx_bsl/tfxio/tensor_adapter.py | 16 ++++++------- tfx_bsl/tfxio/tensor_adapter_test.py | 3 +-- .../tfxio/tensor_representation_util_test.py | 3 +-- tfx_bsl/tfxio/tensor_to_arrow.py | 8 +++---- tfx_bsl/tfxio/test_util.py | 2 +- tfx_bsl/tfxio/tf_example_record.py | 8 +++---- tfx_bsl/tfxio/tf_example_record_test.py | 2 +- tfx_bsl/tfxio/tf_sequence_example_record.py | 8 +++---- .../tfxio/tf_sequence_example_record_test.py | 2 +- tfx_bsl/tfxio/tfxio.py | 4 +--- tfx_bsl/tfxio/tfxio_test.py | 2 +- 29 files changed, 74 insertions(+), 126 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 9ca28d3a..f7a0d630 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -15,6 +15,7 @@ * Depends on `absl-py>=0.9,<0.11`. * Depends on `pandas>=1.0,<2`. * Depends on `protobuf>=3.9.2,<4`. +* Stopped depending on `six`. ## Breaking changes diff --git a/setup.py b/setup.py index 777be3c1..a9c1659c 100644 --- a/setup.py +++ b/setup.py @@ -149,7 +149,6 @@ def has_ext_modules(self): 'pandas>=1.0,<2', 'protobuf>=3.9.2,<4', 'pyarrow>=0.17,<0.18', - 'six>=1.12,<2', 'tensorflow>=1.15.2,!=2.0.*,!=2.1.*,!=2.2.*,<3', 'tensorflow-metadata>=0.23,<0.24', 'tensorflow-serving-api>=1.15,!=2.0.*,!=2.1.*,!=2.2.*,<3', diff --git a/tfx_bsl/arrow/array_util_test.py b/tfx_bsl/arrow/array_util_test.py index 564f9433..779c1f9a 100644 --- a/tfx_bsl/arrow/array_util_test.py +++ b/tfx_bsl/arrow/array_util_test.py @@ -17,7 +17,6 @@ import numpy as np import pyarrow as pa -import six from tfx_bsl.arrow import array_util @@ -235,13 +234,6 @@ def test_match(self): values=pa.array([], type=pa.int64()), expected=pa.array([None, None, None, None, None], type=pa.list_(pa.int64()))), - dict( - testcase_name="long_num_parent", - num_parents=(long(1) if six.PY2 else 1), - parent_indices=pa.array([0], type=pa.int64()), - values=pa.array([1]), - expected=pa.array([[1]]) - ), dict( testcase_name="leading nones", num_parents=3, diff --git a/tfx_bsl/arrow/path.py b/tfx_bsl/arrow/path.py index e38014be..ef1052fd 100644 --- a/tfx_bsl/arrow/path.py +++ b/tfx_bsl/arrow/path.py @@ -15,27 +15,19 @@ from typing import Iterable, Text, Tuple, Union -import six - from tensorflow_metadata.proto.v0 import path_pb2 -# Text on py3, bytes on py2. -Step = Union[bytes, Text] - - -@six.python_2_unicode_compatible class ColumnPath(object): """ColumnPath addresses a column potentially nested under a StructArray.""" __slot__ = ["_steps"] - def __init__(self, steps: Union[Iterable[Step], Step]): + def __init__(self, steps: Union[Iterable[Text], Text]): """If a single Step is specified, constructs a Path of that step.""" - if isinstance(steps, (bytes, six.text_type)): + if isinstance(steps, Text): steps = (steps,) - self._steps = tuple( - s if isinstance(s, six.text_type) else s.decode("utf-8") for s in steps) + self._steps = tuple(steps) def to_proto(self) -> path_pb2.Path: """Creates a tensorflow_metadata path proto this ColumnPath.""" @@ -53,7 +45,7 @@ def from_proto(path_proto: path_pb2.Path): """ return ColumnPath(path_proto.step) - def steps(self) -> Tuple[Step, ...]: + def steps(self) -> Tuple[Text, ...]: """Returns the tuple of steps that represents this ColumnPath.""" return self._steps @@ -70,7 +62,7 @@ def parent(self) -> "ColumnPath": raise ValueError("Root does not have parent.") return ColumnPath(self._steps[:-1]) - def child(self, child_step: Step) -> "ColumnPath": + def child(self, child_step: Text) -> "ColumnPath": """Creates a new ColumnPath with a new child. example: ColumnPath(["this", "is", "my", "path"]).child("new_step") will @@ -82,9 +74,7 @@ def child(self, child_step: Step) -> "ColumnPath": Returns: A ColumnPath with the new child_step """ - if isinstance(child_step, six.text_type): - return ColumnPath(self._steps + (child_step,)) - return ColumnPath(self._steps + (child_step.decode("utf-8"),)) + return ColumnPath(self._steps + (child_step,)) def prefix(self, ending_index: int) -> "ColumnPath": """Creates a new ColumnPath, taking the prefix until the ending_index. @@ -114,7 +104,7 @@ def suffix(self, starting_index: int) -> "ColumnPath": """ return ColumnPath(self._steps[starting_index:]) - def initial_step(self) -> Step: + def initial_step(self) -> Text: """Returns the first step of this path. Raises: diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py index f5804baf..30450ee3 100644 --- a/tfx_bsl/beam/run_inference.py +++ b/tfx_bsl/beam/run_inference.py @@ -34,7 +34,6 @@ from googleapiclient import discovery from googleapiclient import http import numpy as np -import six import tensorflow as tf from tfx_bsl.beam import shared from tfx_bsl.public.proto import model_spec_pb2 @@ -207,8 +206,7 @@ def _MultiInference(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-n raise NotImplementedError -@six.add_metaclass(abc.ABCMeta) -class _BaseDoFn(beam.DoFn): +class _BaseDoFn(beam.DoFn, metaclass=abc.ABCMeta): """Base DoFn that performs bulk inference.""" class _MetricsCollector(object): @@ -267,7 +265,7 @@ def update(self, elements: List[Union[tf.train.Example, sum(element.ByteSize() for element in elements)) def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType): - super(_BaseDoFn, self).__init__() + super().__init__() self._clock = None self._metrics_collector = self._MetricsCollector(inference_spec_type) @@ -346,7 +344,7 @@ class _RemotePredictDoFn(_BaseDoFn): def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType, pipeline_options: PipelineOptions): - super(_RemotePredictDoFn, self).__init__(inference_spec_type) + super().__init__(inference_spec_type) self._ai_platform_prediction_model_spec = ( inference_spec_type.ai_platform_prediction_model_spec) self._api_client = None @@ -373,7 +371,7 @@ def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType, version_name) def setup(self): - super(_RemotePredictDoFn, self).setup() + super().setup() # TODO(b/151468119): Add tfx_bsl_version and tfx_bsl_py_version to # user agent once custom header is supported in googleapiclient. self._api_client = discovery.build('ml', 'v1') @@ -505,7 +503,7 @@ def __init__( inference_spec_type: model_spec_pb2.InferenceSpecType, shared_model_handle: shared.Shared, ): - super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type) + super().__init__(inference_spec_type) self._inference_spec_type = inference_spec_type self._shared_model_handle = shared_model_handle self._model_path = inference_spec_type.saved_model_spec.model_path @@ -524,7 +522,7 @@ def setup(self): to b/139207285. """ - super(_BaseBatchSavedModelDoFn, self).setup() + super().setup() self._tags = _get_tags(self._inference_spec_type) self._io_tensor_spec = self._pre_process() @@ -636,7 +634,7 @@ def setup(self): 'BulkInferrerClassifyDoFn requires signature method ' 'name %s, got: %s' % tf.saved_model.CLASSIFY_METHOD_NAME, signature_def.method_name) - super(_BatchClassifyDoFn, self).setup() + super().setup() def _check_elements( self, elements: List[Union[tf.train.Example, @@ -661,7 +659,7 @@ class _BatchRegressDoFn(_BaseBatchSavedModelDoFn): """A DoFn that run inference on regression model.""" def setup(self): - super(_BatchRegressDoFn, self).setup() + super().setup() def _check_elements( self, elements: List[Union[tf.train.Example, @@ -690,7 +688,7 @@ def setup(self): 'BulkInferrerPredictDoFn requires signature method ' 'name %s, got: %s' % tf.saved_model.PREDICT_METHOD_NAME, signature_def.method_name) - super(_BatchPredictDoFn, self).setup() + super().setup() def _check_elements( self, elements: List[Union[tf.train.Example, diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py index eaa216e1..20a3ef7f 100644 --- a/tfx_bsl/beam/run_inference_test.py +++ b/tfx_bsl/beam/run_inference_test.py @@ -14,6 +14,7 @@ """Tests for tfx_bsl.run_inference.""" import base64 +from http import client as http_client import json import os try: @@ -27,7 +28,6 @@ from apache_beam.testing.util import equal_to from googleapiclient import discovery from googleapiclient import http -from six.moves import http_client import tensorflow as tf from tfx_bsl.beam import run_inference from tfx_bsl.public.proto import model_spec_pb2 @@ -40,7 +40,7 @@ class RunInferenceFixture(tf.test.TestCase): def setUp(self): - super(RunInferenceFixture, self).setUp() + super().setUp() self._predict_examples = [ text_format.Parse( """ @@ -70,7 +70,7 @@ def _prepare_predict_examples(self, example_path): class RunOfflineInferenceTest(RunInferenceFixture): def setUp(self): - super(RunOfflineInferenceTest, self).setUp() + super().setUp() self._predict_examples = [ text_format.Parse( """ @@ -361,7 +361,7 @@ def testKerasModelPredict(self): class TestKerasModel(tf.keras.Model): def __init__(self, inference_model): - super(TestKerasModel, self).__init__(name='test_keras_model') + super().__init__(name='test_keras_model') self.inference_model = inference_model @tf.function(input_signature=[ @@ -449,7 +449,7 @@ def testTelemetry(self): class RunRemoteInferenceTest(RunInferenceFixture): def setUp(self): - super(RunRemoteInferenceTest, self).setUp() + super().setUp() self.example_path = self._get_output_data_dir('example') self._prepare_predict_examples(self.example_path) # This is from https://ml.googleapis.com/$discovery/rest?version=v1. diff --git a/tfx_bsl/coders/csv_decoder.py b/tfx_bsl/coders/csv_decoder.py index a8f0cfa6..2f3d6c55 100644 --- a/tfx_bsl/coders/csv_decoder.py +++ b/tfx_bsl/coders/csv_decoder.py @@ -22,7 +22,6 @@ import apache_beam as beam import numpy as np import pyarrow as pa -import six import tensorflow as tf from tfx_bsl.coders import batch_util @@ -263,7 +262,7 @@ def merge_accumulators( # Merge the types inferred in each partition using the type hierarchy. # Specifically, whenever we observe a type higher in the type hierarchy # we update the type. - for feature_name, feature_type in six.iteritems(shard_types): + for feature_name, feature_type in shard_types.items(): if feature_name not in result or feature_type > result[feature_name]: result[feature_name] = feature_type return result @@ -448,12 +447,7 @@ def __init__(self, delimiter: Union[Text, bytes]): self._delimiter = delimiter self._line_iterator = _MutableRepeat() self._reader = csv.reader(self._line_iterator, delimiter=delimiter) - # Python 2 csv reader accepts bytes while Python 3 csv reader accepts - # unicode. - if six.PY2: - self._to_reader_input = tf.compat.as_bytes - else: - self._to_reader_input = tf.compat.as_text + self._to_reader_input = tf.compat.as_text def ReadLine(self, csv_line: CSVLine) -> List[CSVCell]: """Reads out bytes for PY2 and Unicode for PY3.""" diff --git a/tfx_bsl/coders/tf_graph_record_decoder.py b/tfx_bsl/coders/tf_graph_record_decoder.py index 6ad253fe..741173c9 100644 --- a/tfx_bsl/coders/tf_graph_record_decoder.py +++ b/tfx_bsl/coders/tf_graph_record_decoder.py @@ -16,7 +16,6 @@ import abc from typing import Dict, List, Optional, Text, Union -import six import tensorflow as tf from tensorflow.python.framework import composite_tensor # pylint: disable=g-direct-tensorflow-import @@ -25,8 +24,7 @@ TensorAlike = Union[tf.Tensor, composite_tensor.CompositeTensor] -@six.add_metaclass(abc.ABCMeta) -class TFGraphRecordDecoder(tf.Module): +class TFGraphRecordDecoder(tf.Module, metaclass=abc.ABCMeta): """Base class for decoders that turns a list of bytes to (composite) tensors. Sub-classes must implemented `_decode_record_internal()` (see its docstring @@ -45,7 +43,7 @@ def __init__(self, name: Text): name: Must be a valid TF scope name. May be used to create TF namescopes. see https://www.tensorflow.org/api_docs/python/tf/Graph#name_scope. """ - super(TFGraphRecordDecoder, self).__init__(name=name) + super().__init__(name=name) @tf.function(input_signature=[tf.TensorSpec(shape=(None,), dtype=tf.string)]) def decode_record(self, records: List[bytes]) -> Dict[Text, TensorAlike]: @@ -131,7 +129,7 @@ class LoadedDecoder(TFGraphRecordDecoder): """ def __init__(self, loaded_module): - super(LoadedDecoder, self).__init__(name="LoadedDecoder") + super().__init__(name="LoadedDecoder") self._loaded_module = loaded_module if tf.executing_eagerly(): record_index_tensor_name = ( diff --git a/tfx_bsl/coders/tf_graph_record_decoder_test.py b/tfx_bsl/coders/tf_graph_record_decoder_test.py index bd09732c..5acd602c 100644 --- a/tfx_bsl/coders/tf_graph_record_decoder_test.py +++ b/tfx_bsl/coders/tf_graph_record_decoder_test.py @@ -26,7 +26,7 @@ class _DecoderForTesting(tf_graph_record_decoder.TFGraphRecordDecoder): def __init__(self): - super(_DecoderForTesting, self).__init__("DecoderForTesting") + super().__init__("DecoderForTesting") def _decode_record_internal(self, record): indices = tf.transpose(tf.stack([ @@ -63,7 +63,7 @@ def record_index_tensor_name(self): class TfGraphRecordDecoderTest(tf.test.TestCase): def setUp(self): - super(TfGraphRecordDecoderTest, self).setUp() + super().setUp() self._tmp_dir = tempfile.mkdtemp(dir=FLAGS.test_tmpdir) def test_save_load_decode(self): diff --git a/tfx_bsl/test_util/run_all_tests.py b/tfx_bsl/test_util/run_all_tests.py index fe5fcb09..22c03c21 100644 --- a/tfx_bsl/test_util/run_all_tests.py +++ b/tfx_bsl/test_util/run_all_tests.py @@ -32,7 +32,6 @@ from absl import app from absl import flags from absl import logging -import six flags.DEFINE_list( @@ -118,18 +117,12 @@ def PrintLogs(self) -> None: (self.stdout, "STDOUT"), (self.stderr, "STDERR")): f.flush() f.seek(0) - if six.PY2: - sys.stdout.write("BEGIN %s of test %s\n" % (stream_name, self)) - sys.stdout.write(f.read()) - sys.stdout.write("END %s of test %s\n" % (stream_name, self)) - sys.stdout.flush() - else: - # Since we collected binary data, we have to write binary data. - encoded = (stream_name.encode(), str(self).encode()) - sys.stdout.buffer.write(b"BEGIN %s of test %s\n" % encoded) - sys.stdout.buffer.write(f.read()) - sys.stdout.buffer.write(b"END %s of test %s\n" % encoded) - sys.stdout.buffer.flush() + # Since we collected binary data, we have to write binary data. + encoded = (stream_name.encode(), str(self).encode()) + sys.stdout.buffer.write(b"BEGIN %s of test %s\n" % encoded) + sys.stdout.buffer.write(f.read()) + sys.stdout.buffer.write(b"END %s of test %s\n" % encoded) + sys.stdout.buffer.flush() def _DiscoverTests(root_dirs: List[Text], diff --git a/tfx_bsl/tfxio/csv_tfxio.py b/tfx_bsl/tfxio/csv_tfxio.py index c90ab459..1a008223 100644 --- a/tfx_bsl/tfxio/csv_tfxio.py +++ b/tfx_bsl/tfxio/csv_tfxio.py @@ -19,7 +19,6 @@ import apache_beam as beam import pyarrow as pa -import six from tfx_bsl.coders import csv_decoder from tfx_bsl.tfxio import record_based_tfxio from tfx_bsl.tfxio import tensor_adapter @@ -29,7 +28,6 @@ from tensorflow_metadata.proto.v0 import schema_pb2 -@six.add_metaclass(abc.ABCMeta) class _CsvTFXIOBase(record_based_tfxio.RecordBasedTFXIO): """Base class for TFXIO implementations for CSV.""" @@ -43,7 +41,7 @@ def __init__(self, schema: Optional[schema_pb2.Schema] = None, raw_record_column_name: Optional[Text] = None, telemetry_descriptors: Optional[List[Text]] = None): - super(_CsvTFXIOBase, self).__init__( + super().__init__( telemetry_descriptors=telemetry_descriptors, raw_record_column_name=raw_record_column_name, logical_format="csv", @@ -219,7 +217,7 @@ def __init__(self, skipped from each file. Must be 0 or higher. Large number of skipped lines might impact performance. """ - super(CsvTFXIO, self).__init__( + super().__init__( column_names=column_names, delimiter=delimiter, skip_blank_lines=skip_blank_lines, diff --git a/tfx_bsl/tfxio/csv_tfxio_test.py b/tfx_bsl/tfxio/csv_tfxio_test.py index 6054af2f..ef9766cf 100644 --- a/tfx_bsl/tfxio/csv_tfxio_test.py +++ b/tfx_bsl/tfxio/csv_tfxio_test.py @@ -180,7 +180,7 @@ class CsvRecordTest(parameterized.TestCase): @classmethod def setUpClass(cls): - super(CsvRecordTest, cls).setUpClass() + super().setUpClass() cls._example_file = os.path.join(FLAGS.test_tmpdir, "csvtexttest", "input.csv") tf.io.gfile.makedirs(os.path.dirname(cls._example_file)) diff --git a/tfx_bsl/tfxio/dataset_options.py b/tfx_bsl/tfxio/dataset_options.py index 8ebe4599..31c09506 100644 --- a/tfx_bsl/tfxio/dataset_options.py +++ b/tfx_bsl/tfxio/dataset_options.py @@ -58,7 +58,6 @@ def __new__(cls, the tuple is the label tensor and the dict (the first term) will not contain the label feature. """ - return super(TensorFlowDatasetOptions, - cls).__new__(cls, batch_size, drop_final_batch, num_epochs, - shuffle, shuffle_buffer_size, shuffle_seed, - label_key) + return super().__new__(cls, batch_size, drop_final_batch, num_epochs, + shuffle, shuffle_buffer_size, shuffle_seed, + label_key) diff --git a/tfx_bsl/tfxio/raw_tf_record.py b/tfx_bsl/tfxio/raw_tf_record.py index b214ebd5..24ff0413 100644 --- a/tfx_bsl/tfxio/raw_tf_record.py +++ b/tfx_bsl/tfxio/raw_tf_record.py @@ -43,7 +43,7 @@ def __init__(self, raw_record_column_name: Text, telemetry_descriptors: List[Text], physical_format: Text): assert raw_record_column_name is not None - super(_RawRecordTFXIO, self).__init__( + super().__init__( raw_record_column_name=raw_record_column_name, telemetry_descriptors=telemetry_descriptors, logical_format="bytes", @@ -130,7 +130,7 @@ def __init__(self, be identifiers of the component itself and not individual instances of source use. """ - super(RawBeamRecordTFXIO, self).__init__( + super().__init__( telemetry_descriptors=telemetry_descriptors, physical_format=physical_format, raw_record_column_name=raw_record_column_name) @@ -159,7 +159,7 @@ def __init__(self, file_pattern: Union[Text, List[Text]], be identifiers of the component itself and not individual instances of source use. """ - super(RawTfRecordTFXIO, self).__init__( + super().__init__( telemetry_descriptors=telemetry_descriptors, physical_format="tfrecords_gzip", raw_record_column_name=raw_record_column_name) diff --git a/tfx_bsl/tfxio/raw_tf_record_test.py b/tfx_bsl/tfxio/raw_tf_record_test.py index 7d95d891..c1aabfb1 100644 --- a/tfx_bsl/tfxio/raw_tf_record_test.py +++ b/tfx_bsl/tfxio/raw_tf_record_test.py @@ -44,7 +44,7 @@ class RawTfRecordTest(absltest.TestCase): @classmethod def setUpClass(cls): - super(RawTfRecordTest, cls).setUpClass() + super().setUpClass() cls._raw_record_file = os.path.join( FLAGS.test_tmpdir, "rawtfrecordtest", "input.recordio.gz") tf.io.gfile.makedirs(os.path.dirname(cls._raw_record_file)) diff --git a/tfx_bsl/tfxio/record_based_tfxio.py b/tfx_bsl/tfxio/record_based_tfxio.py index 36a4b442..36f1427c 100644 --- a/tfx_bsl/tfxio/record_based_tfxio.py +++ b/tfx_bsl/tfxio/record_based_tfxio.py @@ -22,13 +22,11 @@ import apache_beam as beam import numpy as np import pyarrow as pa -import six import tensorflow as tf from tfx_bsl.tfxio import telemetry from tfx_bsl.tfxio import tfxio -@six.add_metaclass(abc.ABCMeta) class RecordBasedTFXIO(tfxio.TFXIO): """Base class for all TFXIO implementations for record-based on-disk formats. @@ -62,7 +60,7 @@ def __init__(self, telemetry_descriptors: Optional[List[Text]], logical_format: Text, physical_format: Text, raw_record_column_name: Optional[Text] = None): - super(RecordBasedTFXIO, self).__init__() + super().__init__() if not self.SupportAttachingRawRecords(): assert raw_record_column_name is None, ( "{} did not support attaching raw records, but requested.".format( diff --git a/tfx_bsl/tfxio/record_to_tensor_tfxio.py b/tfx_bsl/tfxio/record_to_tensor_tfxio.py index 7dc58320..40c2d5cb 100644 --- a/tfx_bsl/tfxio/record_to_tensor_tfxio.py +++ b/tfx_bsl/tfxio/record_to_tensor_tfxio.py @@ -37,7 +37,7 @@ def __init__(self, physical_format: Text, raw_record_column_name: Optional[Text]): - super(_RecordToTensorTFXIO, self).__init__( + super().__init__( telemetry_descriptors, logical_format="tensor", physical_format=physical_format, @@ -142,7 +142,7 @@ def __init__(self, will contain a column of the given name that contains serialized records. """ - super(TFRecordToTensorTFXIO, self).__init__( + super().__init__( saved_decoder_path, telemetry_descriptors, physical_format="tfrecords_gzip", @@ -241,7 +241,7 @@ def __init__(self, saved_decoder_path: Text, raw_record_column_name: Optional[Text], produce_large_raw_record_column: bool, record_index_column_name: Optional[Text]): - super(_RecordsToRecordBatch, self).__init__() + super().__init__() self._saved_decoder_path = saved_decoder_path self._raw_record_column_name = raw_record_column_name self._produce_large_raw_record_column = produce_large_raw_record_column diff --git a/tfx_bsl/tfxio/record_to_tensor_tfxio_test.py b/tfx_bsl/tfxio/record_to_tensor_tfxio_test.py index 31afcea5..8ef386bd 100644 --- a/tfx_bsl/tfxio/record_to_tensor_tfxio_test.py +++ b/tfx_bsl/tfxio/record_to_tensor_tfxio_test.py @@ -37,7 +37,7 @@ class _DecoderForTesting(tf_graph_record_decoder.TFGraphRecordDecoder): def __init__(self): - super(_DecoderForTesting, self).__init__("DecoderForTesting") + super().__init__("DecoderForTesting") def _decode_record_internal(self, record): indices = tf.transpose( @@ -114,7 +114,7 @@ def _write_decoder(decoder=_DecoderForTesting()): class RecordToTensorTfxioTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): - super(RecordToTensorTfxioTest, self).setUp() + super().setUp() self._input_path = _write_input() def _assert_sparse_tensor_equal(self, lhs, rhs): diff --git a/tfx_bsl/tfxio/tensor_adapter.py b/tfx_bsl/tfxio/tensor_adapter.py index 093f5f4a..29fd87f1 100644 --- a/tfx_bsl/tfxio/tensor_adapter.py +++ b/tfx_bsl/tfxio/tensor_adapter.py @@ -19,7 +19,6 @@ import numpy as np import pyarrow as pa -import six import tensorflow as tf from tfx_bsl.arrow import array_util from tfx_bsl.arrow import path @@ -161,8 +160,7 @@ def ToBatchTensors( return result -@six.add_metaclass(abc.ABCMeta) -class _TypeHandler(object): +class _TypeHandler(abc.ABC): """Base class of all type handlers. A TypeHandler converts one or more columns in a RecordBatch to a TF Tensor @@ -228,7 +226,7 @@ class _BaseDenseTensorHandler(_TypeHandler): def __init__(self, arrow_schema: pa.Schema, tensor_representation: schema_pb2.TensorRepresentation): - super(_BaseDenseTensorHandler, self).__init__(arrow_schema, + super().__init__(arrow_schema, tensor_representation) dense_rep = tensor_representation.dense_tensor column_name = dense_rep.column_name @@ -306,7 +304,7 @@ class _DefaultFillingDenseTensorHandler(_BaseDenseTensorHandler): def __init__(self, arrow_schema: pa.Schema, tensor_representation: schema_pb2.TensorRepresentation): - super(_DefaultFillingDenseTensorHandler, self).__init__( + super().__init__( arrow_schema, tensor_representation) _, value_type = _GetNestDepthAndValueType( arrow_schema, @@ -337,7 +335,7 @@ class _VarLenSparseTensorHandler(_TypeHandler): def __init__(self, arrow_schema: pa.Schema, tensor_representation: schema_pb2.TensorRepresentation): - super(_VarLenSparseTensorHandler, self).__init__( + super().__init__( arrow_schema, tensor_representation) column_name = tensor_representation.varlen_sparse_tensor.column_name self._column_index = arrow_schema.get_field_index(column_name) @@ -390,7 +388,7 @@ class _SparseTensorHandler(_TypeHandler): def __init__(self, arrow_schema: pa.Schema, tensor_representation: schema_pb2.TensorRepresentation): - super(_SparseTensorHandler, self).__init__( + super().__init__( arrow_schema, tensor_representation) sparse_representation = tensor_representation.sparse_tensor self._index_column_indices = tuple( @@ -480,7 +478,7 @@ class _RaggedTensorHandler(_TypeHandler): def __init__(self, arrow_schema: pa.Schema, tensor_representation: schema_pb2.TensorRepresentation): - super(_RaggedTensorHandler, self).__init__(arrow_schema, + super().__init__(arrow_schema, tensor_representation) ragged_representation = tensor_representation.ragged_tensor self._path = path.ColumnPath.from_proto(ragged_representation.feature_path) @@ -604,7 +602,7 @@ def _BuildTypeHandlers( arrow_schema: pa.Schema) -> List[Tuple[Text, _TypeHandler]]: """Builds type handlers according to TensorRepresentations.""" result = [] - for tensor_name, rep in six.iteritems(tensor_representations): + for tensor_name, rep in tensor_representations.items(): potential_handlers = _TYPE_HANDLER_MAP.get(rep.WhichOneof("kind")) if not potential_handlers: raise ValueError("Unable to handle tensor {} with rep {}".format( diff --git a/tfx_bsl/tfxio/tensor_adapter_test.py b/tfx_bsl/tfxio/tensor_adapter_test.py index 2ed4fcb9..b8d78d9e 100644 --- a/tfx_bsl/tfxio/tensor_adapter_test.py +++ b/tfx_bsl/tfxio/tensor_adapter_test.py @@ -17,7 +17,6 @@ import numpy as np import pyarrow as pa -import six import tensorflow as tf from tfx_bsl.tfxio import tensor_adapter @@ -1091,7 +1090,7 @@ def testMultipleColumns(self): [b"kk", b"kk"]]), tensors["bytes_default_filled_dense"]) if tf.executing_eagerly(): - for name, spec in six.iteritems(type_specs): + for name, spec in type_specs.items(): self.assertTrue( spec.is_compatible_with(tensors[name]), "{} is not compatible with spec {}".format(tensors[name], spec)) diff --git a/tfx_bsl/tfxio/tensor_representation_util_test.py b/tfx_bsl/tfxio/tensor_representation_util_test.py index 0a2b3b7b..0652a8df 100644 --- a/tfx_bsl/tfxio/tensor_representation_util_test.py +++ b/tfx_bsl/tfxio/tensor_representation_util_test.py @@ -16,7 +16,6 @@ import sys import numpy as np -import six import tensorflow as tf from tfx_bsl.arrow import path @@ -731,7 +730,7 @@ def testInferTensorRepresentationsFromSchema( schema.generate_legacy_feature_spec = generate_legacy_feature_spec expected_protos = { k: text_format.Parse(pbtxt, schema_pb2.TensorRepresentation()) - for k, pbtxt in six.iteritems(expected) + for k, pbtxt in expected.items() } self.assertEqual( expected_protos, diff --git a/tfx_bsl/tfxio/tensor_to_arrow.py b/tfx_bsl/tfxio/tensor_to_arrow.py index 64503156..052ba33e 100644 --- a/tfx_bsl/tfxio/tensor_to_arrow.py +++ b/tfx_bsl/tfxio/tensor_to_arrow.py @@ -19,7 +19,6 @@ from absl import logging import numpy as np import pyarrow as pa -import six import tensorflow as tf # CompositeTensor is not public yet. @@ -99,8 +98,7 @@ def convert(self, tensors: Dict[Text, TensorAlike]) -> pa.RecordBatch: return pa.record_batch(arrays, schema=self._arrow_schema) -@six.add_metaclass(abc.ABCMeta) -class _TypeHandler(object): +class _TypeHandler(abc.ABC): """Interface of a type handler that converts a tensor to arrow arrays. Note that a handler may convert a Tensor to multiple pa.Arrays. See @@ -158,7 +156,7 @@ class _VarLenSparseTensorHandler(_TypeHandler): __slots__ = ["_values_arrow_type"] def __init__(self, tensor_name: Text, type_spec: tf.TypeSpec): - super(_VarLenSparseTensorHandler, self).__init__(tensor_name, type_spec) + super().__init__(tensor_name, type_spec) self._values_arrow_type = _tf_dtype_to_arrow_type(type_spec.dtype) def _convert_internal(self, tensor: TensorAlike) -> List[pa.Array]: @@ -193,7 +191,7 @@ class _RaggedTensorHandler(_TypeHandler): __slots__ = ["_values_arrow_type", "_row_partition_dtype"] def __init__(self, tensor_name: Text, type_spec: tf.TypeSpec): - super(_RaggedTensorHandler, self).__init__(tensor_name, type_spec) + super().__init__(tensor_name, type_spec) # TODO(b/159717195): clean up protected-access # pylint: disable=protected-access diff --git a/tfx_bsl/tfxio/test_util.py b/tfx_bsl/tfxio/test_util.py index d2e3b7d3..3c406899 100644 --- a/tfx_bsl/tfxio/test_util.py +++ b/tfx_bsl/tfxio/test_util.py @@ -26,7 +26,7 @@ class InMemoryTFExampleRecord(tf_example_record.TFExampleBeamRecord): def __init__(self, schema: Optional[schema_pb2.Schema] = None, raw_record_column_name: Optional[Text] = None): - super(InMemoryTFExampleRecord, self).__init__( + super().__init__( physical_format="inmem", telemetry_descriptors=["test", "component"], schema=schema, diff --git a/tfx_bsl/tfxio/tf_example_record.py b/tfx_bsl/tfxio/tf_example_record.py index c949f224..6ae934cb 100644 --- a/tfx_bsl/tfxio/tf_example_record.py +++ b/tfx_bsl/tfxio/tf_example_record.py @@ -19,7 +19,6 @@ from absl import logging import apache_beam as beam import pyarrow as pa -import six import tensorflow as tf from tfx_bsl.arrow import path from tfx_bsl.coders import batch_util @@ -33,7 +32,6 @@ from tensorflow_metadata.proto.v0 import schema_pb2 -@six.add_metaclass(abc.ABCMeta) class _TFExampleRecordBase(record_based_tfxio.RecordBasedTFXIO): """Base class for TFXIO implementations for record based tf.Examples.""" @@ -47,7 +45,7 @@ def __init__(self, # arguments, when TFT's compatibility TFXIO starts setting them. if physical_format is None: physical_format = "unknown" - super(_TFExampleRecordBase, self).__init__( + super().__init__( telemetry_descriptors=telemetry_descriptors, raw_record_column_name=raw_record_column_name, logical_format="tf_example", @@ -163,7 +161,7 @@ def __init__(self, will contain a column of the given name that contains serialized records. """ - super(TFExampleBeamRecord, self).__init__( + super().__init__( schema=schema, raw_record_column_name=raw_record_column_name, telemetry_descriptors=telemetry_descriptors, physical_format=physical_format) @@ -209,7 +207,7 @@ def __init__(self, be identifiers of the component itself and not individual instances of source use. """ - super(TFExampleRecord, self).__init__( + super().__init__( schema=schema, raw_record_column_name=raw_record_column_name, telemetry_descriptors=telemetry_descriptors, physical_format="tfrecords_gzip") diff --git a/tfx_bsl/tfxio/tf_example_record_test.py b/tfx_bsl/tfxio/tf_example_record_test.py index 2a31bbdb..ed53248d 100644 --- a/tfx_bsl/tfxio/tf_example_record_test.py +++ b/tfx_bsl/tfxio/tf_example_record_test.py @@ -191,7 +191,7 @@ class TfExampleRecordTest(tf.test.TestCase): @classmethod def setUpClass(cls): - super(TfExampleRecordTest, cls).setUpClass() + super().setUpClass() cls._example_file = os.path.join( FLAGS.test_tmpdir, "tfexamplerecordtest", "input.recordio.gz") tf.io.gfile.makedirs(os.path.dirname(cls._example_file)) diff --git a/tfx_bsl/tfxio/tf_sequence_example_record.py b/tfx_bsl/tfxio/tf_sequence_example_record.py index 62f49643..aaa7e70f 100644 --- a/tfx_bsl/tfxio/tf_sequence_example_record.py +++ b/tfx_bsl/tfxio/tf_sequence_example_record.py @@ -19,7 +19,6 @@ from absl import logging import apache_beam as beam import pyarrow as pa -import six from tfx_bsl.arrow import path from tfx_bsl.coders import batch_util from tfx_bsl.coders import sequence_example_coder @@ -34,7 +33,6 @@ _SEQUENCE_COLUMN_NAME = "##SEQUENCE##" -@six.add_metaclass(abc.ABCMeta) class _TFSequenceExampleRecordBase(record_based_tfxio.RecordBasedTFXIO): """Base class for TFXIO classes for record based tf.SequenceExamples.""" @@ -43,7 +41,7 @@ def __init__(self, raw_record_column_name: Optional[Text], telemetry_descriptors: List[Text], physical_format: Text): - super(_TFSequenceExampleRecordBase, self).__init__( + super().__init__( telemetry_descriptors=telemetry_descriptors, raw_record_column_name=raw_record_column_name, logical_format="tf_sequence_example", @@ -165,7 +163,7 @@ def __init__(self, will contain a column of the given name that contains serialized records. """ - super(TFSequenceExampleBeamRecord, self).__init__( + super().__init__( schema=schema, raw_record_column_name=raw_record_column_name, telemetry_descriptors=telemetry_descriptors, physical_format=physical_format) @@ -213,7 +211,7 @@ def __init__(self, will contain a column of the given name that contains serialized records. """ - super(TFSequenceExampleRecord, self).__init__( + super().__init__( schema=schema, raw_record_column_name=raw_record_column_name, telemetry_descriptors=telemetry_descriptors, physical_format="tfrecords_gzip") diff --git a/tfx_bsl/tfxio/tf_sequence_example_record_test.py b/tfx_bsl/tfxio/tf_sequence_example_record_test.py index 3bad7652..db097ea8 100644 --- a/tfx_bsl/tfxio/tf_sequence_example_record_test.py +++ b/tfx_bsl/tfxio/tf_sequence_example_record_test.py @@ -197,7 +197,7 @@ class TfSequenceExampleRecordTest(parameterized.TestCase): @classmethod def setUpClass(cls): - super(TfSequenceExampleRecordTest, cls).setUpClass() + super().setUpClass() cls._example_file = os.path.join( FLAGS.test_tmpdir, "tfsequenceexamplerecordtest", "input.recordio.gz") tf.io.gfile.makedirs(os.path.dirname(cls._example_file)) diff --git a/tfx_bsl/tfxio/tfxio.py b/tfx_bsl/tfxio/tfxio.py index 248d1e06..a1aadbe5 100644 --- a/tfx_bsl/tfxio/tfxio.py +++ b/tfx_bsl/tfxio/tfxio.py @@ -28,15 +28,13 @@ import apache_beam as beam import pyarrow as pa -import six import tensorflow as tf from tfx_bsl.arrow import pyarrow_capability from tfx_bsl.tfxio import dataset_options from tfx_bsl.tfxio import tensor_adapter -@six.add_metaclass(abc.ABCMeta) -class TFXIO(object): +class TFXIO(object, metaclass=abc.ABCMeta): """Abstract basic class of all TFXIO API implementations.""" @abc.abstractmethod diff --git a/tfx_bsl/tfxio/tfxio_test.py b/tfx_bsl/tfxio/tfxio_test.py index 6f7f043d..b9e3bb58 100644 --- a/tfx_bsl/tfxio/tfxio_test.py +++ b/tfx_bsl/tfxio/tfxio_test.py @@ -25,7 +25,7 @@ class _FakeTFXIO(tfxio.TFXIO): """A fake TFXIO for testing the projection origin tracking.""" def __init__(self, columns): - super(_FakeTFXIO, self).__init__() + super().__init__() self._columns = columns def ArrowSchema(self):