diff --git a/airbyte-cdk/python/airbyte_cdk/sources/file_based/config/file_based_stream_config.py b/airbyte-cdk/python/airbyte_cdk/sources/file_based/config/file_based_stream_config.py index 8b6a881d5e67..6f38ed4abf56 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/file_based/config/file_based_stream_config.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/file_based/config/file_based_stream_config.py @@ -32,6 +32,11 @@ class FileBasedStreamConfig(BaseModel): title="Globs", description='The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look here.', ) + legacy_prefix: Optional[str] = Field( + title="Legacy Prefix", + description="The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.", + airbyte_hidden=True, + ) validation_policy: ValidationPolicy = Field( title="Validation Policy", description="The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema.", diff --git a/airbyte-cdk/python/airbyte_cdk/sources/file_based/file_based_stream_reader.py b/airbyte-cdk/python/airbyte_cdk/sources/file_based/file_based_stream_reader.py index 58270ea2c777..4a7de3bb6992 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/file_based/file_based_stream_reader.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/file_based/file_based_stream_reader.py @@ -60,6 +60,7 @@ def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str def get_matching_files( self, globs: List[str], + prefix: Optional[str], logger: logging.Logger, ) -> Iterable[RemoteFile]: """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py b/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py index 24070246a2d5..fd9ff174637d 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py @@ -200,7 +200,7 @@ def list_files(self) -> List[RemoteFile]: The output of this method is cached so we don't need to list the files more than once. This means we won't pick up changes to the files during a sync. """ - return list(self._stream_reader.get_matching_files(self.config.globs or [], self.logger)) + return list(self._stream_reader.get_matching_files(self.config.globs or [], self.config.legacy_prefix, self.logger)) def infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]: loop = asyncio.get_event_loop() diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/in_memory_files_source.py b/airbyte-cdk/python/unit_tests/sources/file_based/in_memory_files_source.py index 6b80d8bfba91..ca25289bbf2c 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/in_memory_files_source.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/in_memory_files_source.py @@ -85,6 +85,7 @@ def config(self, value: AbstractFileBasedSpec) -> None: def get_matching_files( self, globs: List[str], + prefix: Optional[str], logger: logging.Logger, ) -> Iterable[RemoteFile]: yield from self.filter_files_by_globs_and_start_date([ diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/csv_scenarios.py b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/csv_scenarios.py index decec347bc49..a853fab6a6f2 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/csv_scenarios.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/csv_scenarios.py @@ -74,6 +74,12 @@ "type": "array", "items": {"type": "string"}, }, + "legacy_prefix": { + "title": "Legacy Prefix", + "airbyte_hidden": True, + "type": "string", + "description": "The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.", + }, "validation_policy": { "title": "Validation Policy", "description": "The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema.",