From fb714e4f9038cecb5345684eed3f519c90537dc0 Mon Sep 17 00:00:00 2001 From: Dhroov Makwana Date: Mon, 17 Oct 2022 22:43:20 +0530 Subject: [PATCH 1/9] Initial commit for source-gutendex --- .../connectors/source-gutendex/.dockerignore | 6 + .../connectors/source-gutendex/Dockerfile | 38 +++++ .../connectors/source-gutendex/README.md | 79 ++++++++++ .../connectors/source-gutendex/__init__.py | 3 + .../acceptance-test-config.yml | 30 ++++ .../source-gutendex/acceptance-test-docker.sh | 16 ++ .../connectors/source-gutendex/build.gradle | 9 ++ .../integration_tests/__init__.py | 3 + .../integration_tests/acceptance.py | 16 ++ .../integration_tests/catalog.json | 39 +++++ .../integration_tests/configured_catalog.json | 144 ++++++++++++++++++ .../integration_tests/invalid_config.json | 5 + .../integration_tests/sample_config.json | 6 + .../integration_tests/sample_state.json | 1 + .../connectors/source-gutendex/main.py | 13 ++ .../source-gutendex/requirements.txt | 2 + .../connectors/source-gutendex/setup.py | 29 ++++ .../source_gutendex/__init__.py | 8 + .../source_gutendex/gutendex.yaml | 40 +++++ .../source_gutendex/schemas/TODO.md | 16 ++ .../source_gutendex/schemas/results.json | 132 ++++++++++++++++ .../source-gutendex/source_gutendex/source.py | 18 +++ .../source-gutendex/source_gutendex/spec.yaml | 59 +++++++ 23 files changed, 712 insertions(+) create mode 100644 airbyte-integrations/connectors/source-gutendex/.dockerignore create mode 100644 airbyte-integrations/connectors/source-gutendex/Dockerfile create mode 100644 airbyte-integrations/connectors/source-gutendex/README.md create mode 100644 airbyte-integrations/connectors/source-gutendex/__init__.py create mode 100644 airbyte-integrations/connectors/source-gutendex/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-gutendex/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-gutendex/build.gradle create mode 100644 airbyte-integrations/connectors/source-gutendex/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-gutendex/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-gutendex/integration_tests/catalog.json create mode 100644 airbyte-integrations/connectors/source-gutendex/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-gutendex/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-gutendex/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-gutendex/integration_tests/sample_state.json create mode 100644 airbyte-integrations/connectors/source-gutendex/main.py create mode 100644 airbyte-integrations/connectors/source-gutendex/requirements.txt create mode 100644 airbyte-integrations/connectors/source-gutendex/setup.py create mode 100644 airbyte-integrations/connectors/source-gutendex/source_gutendex/__init__.py create mode 100644 airbyte-integrations/connectors/source-gutendex/source_gutendex/gutendex.yaml create mode 100644 airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/TODO.md create mode 100644 airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/results.json create mode 100644 airbyte-integrations/connectors/source-gutendex/source_gutendex/source.py create mode 100644 airbyte-integrations/connectors/source-gutendex/source_gutendex/spec.yaml diff --git a/airbyte-integrations/connectors/source-gutendex/.dockerignore b/airbyte-integrations/connectors/source-gutendex/.dockerignore new file mode 100644 index 000000000000..55b5f4c1e88c --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/.dockerignore @@ -0,0 +1,6 @@ +* +!Dockerfile +!main.py +!source_gutendex +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-gutendex/Dockerfile b/airbyte-integrations/connectors/source-gutendex/Dockerfile new file mode 100644 index 000000000000..9326be96bcde --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.9.11-alpine3.15 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_gutendex ./source_gutendex + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-gutendex diff --git a/airbyte-integrations/connectors/source-gutendex/README.md b/airbyte-integrations/connectors/source-gutendex/README.md new file mode 100644 index 000000000000..99d0de8d9bf5 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/README.md @@ -0,0 +1,79 @@ +# Gutendex Source + +This is the repository for the Gutendex configuration based source connector. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/gutendex). + +## Local development + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-gutendex:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/gutendex) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_gutendex/spec.yaml` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source gutendex test creds` +and place them into `secrets/config.json`. + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-gutendex:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-gutendex:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-gutendex:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-gutendex:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-gutendex:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-gutendex:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing + +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-gutendex:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-gutendex:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-gutendex/__init__.py b/airbyte-integrations/connectors/source-gutendex/__init__.py new file mode 100644 index 000000000000..1100c1c58cf5 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-gutendex/acceptance-test-config.yml b/airbyte-integrations/connectors/source-gutendex/acceptance-test-config.yml new file mode 100644 index 000000000000..89c28f7d7106 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/acceptance-test-config.yml @@ -0,0 +1,30 @@ +# See [Source Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-gutendex:dev +tests: + spec: + - spec_path: "source_gutendex/spec.yaml" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + # TODO uncomment this block to specify that the tests should assert the connector outputs the records provided in the input file a file + # expect_records: + # path: "integration_tests/expected_records.txt" + # extra_fields: no + # exact_order: no + # extra_records: yes + # incremental: # TODO if your connector does not implement incremental sync, remove this block + # - config_path: "secrets/config.json" + # configured_catalog_path: "integration_tests/configured_catalog.json" + # future_state_path: "integration_tests/abnormal_state.json" + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-gutendex/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-gutendex/acceptance-test-docker.sh new file mode 100644 index 000000000000..c51577d10690 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2-) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-gutendex/build.gradle b/airbyte-integrations/connectors/source-gutendex/build.gradle new file mode 100644 index 000000000000..ac8a19190f5c --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/build.gradle @@ -0,0 +1,9 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_gutendex' +} diff --git a/airbyte-integrations/connectors/source-gutendex/integration_tests/__init__.py b/airbyte-integrations/connectors/source-gutendex/integration_tests/__init__.py new file mode 100644 index 000000000000..1100c1c58cf5 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-gutendex/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-gutendex/integration_tests/acceptance.py new file mode 100644 index 000000000000..1302b2f57e10 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/integration_tests/acceptance.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments + yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-gutendex/integration_tests/catalog.json b/airbyte-integrations/connectors/source-gutendex/integration_tests/catalog.json new file mode 100644 index 000000000000..6799946a6851 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/integration_tests/catalog.json @@ -0,0 +1,39 @@ +{ + "streams": [ + { + "name": "TODO fix this file", + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": "column1", + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "column1": { + "type": "string" + }, + "column2": { + "type": "number" + } + } + } + }, + { + "name": "table1", + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "column1": { + "type": "string" + }, + "column2": { + "type": "number" + } + } + } + } + ] +} diff --git a/airbyte-integrations/connectors/source-gutendex/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-gutendex/integration_tests/configured_catalog.json new file mode 100644 index 000000000000..b3edc97396c7 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/integration_tests/configured_catalog.json @@ -0,0 +1,144 @@ +{ + "streams": [ + { + "stream": { + "name": "results", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "count": { + "type": "integer" + }, + "next": { + "type": ["string", "null"] + }, + "previous": { + "type": ["string", "null"] + }, + "results": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "birth_year": { + "type": ["integer", "null"] + }, + "death_year": { + "type": ["integer", "null"] + } + }, + "required": ["name", "birth_year", "death_year"] + } + }, + "translators": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "birth_year": { + "type": ["integer", "null"] + }, + "death_year": { + "type": ["integer", "null"] + } + }, + "required": ["name", "birth_year", "death_year"] + } + }, + "subjects": { + "type": "array", + "items": { + "type": "string" + } + }, + "bookshelves": { + "type": "array", + "items": { + "type": "string" + } + }, + "languages": { + "type": "array", + "items": { + "type": "string" + } + }, + "copyright": { + "type": ["boolean", "null"] + }, + "media_type": { + "type": "string" + }, + "formats": { + "type": "object", + "properties": { + "image/jpeg": { + "type": "string" + }, + "application/x-mobipocket-ebook": { + "type": "string" + }, + "application/rdf+xml": { + "type": "string" + }, + "text/html": { + "type": "string" + }, + "application/epub+zip": { + "type": "string" + }, + "text/plain; charset=us-ascii": { + "type": "string" + }, + "application/octet-stream": { + "type": "string" + } + } + }, + "download_count": { + "type": "integer" + } + }, + "required": [ + "id", + "title", + "authors", + "translators", + "subjects", + "bookshelves", + "languages", + "copyright", + "media_type", + "formats", + "download_count" + ] + } + } + }, + "required": ["count", "next", "previous", "results"] + }, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-gutendex/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-gutendex/integration_tests/invalid_config.json new file mode 100644 index 000000000000..25a5d3e24e15 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/integration_tests/invalid_config.json @@ -0,0 +1,5 @@ +{ + "author_year_start": "3000", + "languages": "en,fr", + "topic": "young" +} diff --git a/airbyte-integrations/connectors/source-gutendex/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-gutendex/integration_tests/sample_config.json new file mode 100644 index 000000000000..323e5056d589 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/integration_tests/sample_config.json @@ -0,0 +1,6 @@ +{ + "author_year_start": "1900", + "languages": "en,fr", + "topic": "young", + "sort": "popular" +} diff --git a/airbyte-integrations/connectors/source-gutendex/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-gutendex/integration_tests/sample_state.json new file mode 100644 index 000000000000..0967ef424bce --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/integration_tests/sample_state.json @@ -0,0 +1 @@ +{} diff --git a/airbyte-integrations/connectors/source-gutendex/main.py b/airbyte-integrations/connectors/source-gutendex/main.py new file mode 100644 index 000000000000..cc68e623a4c9 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_gutendex import SourceGutendex + +if __name__ == "__main__": + source = SourceGutendex() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-gutendex/requirements.txt b/airbyte-integrations/connectors/source-gutendex/requirements.txt new file mode 100644 index 000000000000..0411042aa091 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-gutendex/setup.py b/airbyte-integrations/connectors/source-gutendex/setup.py new file mode 100644 index 000000000000..75f9ae066236 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.1", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + +setup( + name="source_gutendex", + description="Source implementation for Gutendex.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-gutendex/source_gutendex/__init__.py b/airbyte-integrations/connectors/source-gutendex/source_gutendex/__init__.py new file mode 100644 index 000000000000..96976f4f8912 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/source_gutendex/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceGutendex + +__all__ = ["SourceGutendex"] diff --git a/airbyte-integrations/connectors/source-gutendex/source_gutendex/gutendex.yaml b/airbyte-integrations/connectors/source-gutendex/source_gutendex/gutendex.yaml new file mode 100644 index 000000000000..818ebac4ea7c --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/source_gutendex/gutendex.yaml @@ -0,0 +1,40 @@ +version: "0.1.0" + +definitions: + selector: + extractor: + field_pointer: [] + requester: + url_base: "https://gutendex.com/" + http_method: "GET" + request_options_provider: + request_parameters: + author_year_start: "{{ config['author_year_start'] }}" + author_year_end: "{{ config['author_year_end'] }}" + copyright: "{{ config['copyright'] }}" + languages: "{{ config['languages'] }}" + search: "{{ config['search'] }}" + sort: "{{ config['sort'] }}" + topic: "{{ config['topic'] }}" + retriever: + record_selector: + $ref: "*ref(definitions.selector)" + paginator: + type: NoPagination + requester: + $ref: "*ref(definitions.requester)" + base_stream: + retriever: + $ref: "*ref(definitions.retriever)" + results_stream: + $ref: "*ref(definitions.base_stream)" + $options: + name: "results" + path: "/books" + +streams: + - "*ref(definitions.results_stream)" + +check: + stream_names: + - "results" diff --git a/airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/TODO.md b/airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/TODO.md new file mode 100644 index 000000000000..0e1dfe18bb86 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/TODO.md @@ -0,0 +1,16 @@ +# TODO: Define your stream schemas +Your connector must describe the schema of each stream it can output using [JSONSchema](https://json-schema.org). + +You can describe the schema of your streams using one `.json` file per stream. + +## Static schemas +From the `gutendex.yaml` configuration file, you read the `.json` files in the `schemas/` directory. You can refer to a schema in your configuration file using the `schema_loader` component's `file_path` field. For example: +``` +schema_loader: + type: JsonSchema + file_path: "./source_gutendex/schemas/customers.json" +``` +Every stream specified in the configuration file should have a corresponding `.json` schema file. + +Delete this file once you're done. Or don't. Up to you :) + diff --git a/airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/results.json b/airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/results.json new file mode 100644 index 000000000000..a79491793f67 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/results.json @@ -0,0 +1,132 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "count": { + "type": "integer" + }, + "next": { + "type": ["string", "null"] + }, + "previous": { + "type": ["string", "null"] + }, + "results": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "birth_year": { + "type": ["integer", "null"] + }, + "death_year": { + "type": ["integer", "null"] + } + }, + "required": ["name", "birth_year", "death_year"] + } + }, + "translators": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "birth_year": { + "type": ["integer", "null"] + }, + "death_year": { + "type": ["integer", "null"] + } + }, + "required": ["name", "birth_year", "death_year"] + } + }, + "subjects": { + "type": "array", + "items": { + "type": "string" + } + }, + "bookshelves": { + "type": "array", + "items": { + "type": "string" + } + }, + "languages": { + "type": "array", + "items": { + "type": "string" + } + }, + "copyright": { + "type": ["boolean", "null"] + }, + "media_type": { + "type": "string" + }, + "formats": { + "type": "object", + "properties": { + "image/jpeg": { + "type": "string" + }, + "application/x-mobipocket-ebook": { + "type": "string" + }, + "application/rdf+xml": { + "type": "string" + }, + "text/html": { + "type": "string" + }, + "application/epub+zip": { + "type": "string" + }, + "text/plain; charset=us-ascii": { + "type": "string" + }, + "application/octet-stream": { + "type": "string" + } + } + }, + "download_count": { + "type": "integer" + } + }, + "required": [ + "id", + "title", + "authors", + "translators", + "subjects", + "bookshelves", + "languages", + "copyright", + "media_type", + "formats", + "download_count" + ] + } + } + }, + "required": ["count", "next", "previous", "results"] +} diff --git a/airbyte-integrations/connectors/source-gutendex/source_gutendex/source.py b/airbyte-integrations/connectors/source-gutendex/source_gutendex/source.py new file mode 100644 index 000000000000..f7a5c4092cff --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/source_gutendex/source.py @@ -0,0 +1,18 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + +from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource + +""" +This file provides the necessary constructs to interpret a provided declarative YAML configuration file into +source connector. + +WARNING: Do not modify this file. +""" + + +# Declarative Source +class SourceGutendex(YamlDeclarativeSource): + def __init__(self): + super().__init__(**{"path_to_yaml": "gutendex.yaml"}) diff --git a/airbyte-integrations/connectors/source-gutendex/source_gutendex/spec.yaml b/airbyte-integrations/connectors/source-gutendex/source_gutendex/spec.yaml new file mode 100644 index 000000000000..149488cdf961 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/source_gutendex/spec.yaml @@ -0,0 +1,59 @@ +documentationUrl: https://docsurl.com +connectionSpecification: + $schema: http://json-schema.org/draft-07/schema# + title: Gutendex Spec + type: object + additionalProperties: true + properties: + author_year_start: + type: string + description: (Optional) Defines the minimum birth year of the authors. Books by authors born prior to the start year will not be returned. Supports both positive (CE) or negative (BCE) integer values + pattern: ^[-]?[0-9]{1,4}$ + examples: + - 2002 + - 500 + - -500 + - 2020 + author_year_end: + type: string + description: (Optional) Defines the maximum birth year of the authors. Books by authors born after the end year will not be returned. Supports both positive (CE) or negative (BCE) integer values + pattern: ^[-]?[0-9]{1,4}$ + examples: + - 2002 + - 500 + - -500 + - 2020 + copyright: + type: string + description: (Optional) Use this to find books with a certain copyright status - true for books with existing copyrights, false for books in the public domain in the USA, or null for books with no available copyright information. + pattern: ^(true|false|null)$ + examples: + - true + - false + - null + languages: + type: string + description: (Optional) Use this to find books in any of a list of languages. They must be comma-separated, two-character language codes. + examples: + - en + - en,fr,fi + search: + type: string + description: (Optional) Use this to search author names and book titles with given words. They must be separated by a space (i.e. %20 in URL-encoded format) and are case-insensitive. + examples: + - dickens%20great%20expect + - dickens + sort: + type: string + description: (Optional) Use this to sort books - ascending for Project Gutenberg ID numbers from lowest to highest, descending for IDs highest to lowest, or popular (the default) for most popular to least popular by number of downloads. + pattern: ^(ascending|descending|popular)$ + examples: + - ascending + - descending + - popular + topic: + type: string + description: (Optional) Use this to search for a case-insensitive key-phrase in books' bookshelves or subjects. + examples: + - children + - fantasy From 731e1bd7f3f5577af19feebea0ba2dd76082d806 Mon Sep 17 00:00:00 2001 From: Dhroov Makwana Date: Mon, 17 Oct 2022 23:35:13 +0530 Subject: [PATCH 2/9] Clean up unnecessary code --- .../acceptance-test-config.yml | 10 -- .../integration_tests/acceptance.py | 2 - .../integration_tests/catalog.json | 39 ----- .../integration_tests/configured_catalog.json | 133 +----------------- 4 files changed, 1 insertion(+), 183 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-gutendex/integration_tests/catalog.json diff --git a/airbyte-integrations/connectors/source-gutendex/acceptance-test-config.yml b/airbyte-integrations/connectors/source-gutendex/acceptance-test-config.yml index 89c28f7d7106..dfaeaba5938a 100644 --- a/airbyte-integrations/connectors/source-gutendex/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-gutendex/acceptance-test-config.yml @@ -15,16 +15,6 @@ tests: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" empty_streams: [] - # TODO uncomment this block to specify that the tests should assert the connector outputs the records provided in the input file a file - # expect_records: - # path: "integration_tests/expected_records.txt" - # extra_fields: no - # exact_order: no - # extra_records: yes - # incremental: # TODO if your connector does not implement incremental sync, remove this block - # - config_path: "secrets/config.json" - # configured_catalog_path: "integration_tests/configured_catalog.json" - # future_state_path: "integration_tests/abnormal_state.json" full_refresh: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-gutendex/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-gutendex/integration_tests/acceptance.py index 1302b2f57e10..950b53b59d41 100644 --- a/airbyte-integrations/connectors/source-gutendex/integration_tests/acceptance.py +++ b/airbyte-integrations/connectors/source-gutendex/integration_tests/acceptance.py @@ -11,6 +11,4 @@ @pytest.fixture(scope="session", autouse=True) def connector_setup(): """This fixture is a placeholder for external resources that acceptance test might require.""" - # TODO: setup test dependencies if needed. otherwise remove the TODO comments yield - # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-gutendex/integration_tests/catalog.json b/airbyte-integrations/connectors/source-gutendex/integration_tests/catalog.json deleted file mode 100644 index 6799946a6851..000000000000 --- a/airbyte-integrations/connectors/source-gutendex/integration_tests/catalog.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "streams": [ - { - "name": "TODO fix this file", - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": "column1", - "json_schema": { - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "column1": { - "type": "string" - }, - "column2": { - "type": "number" - } - } - } - }, - { - "name": "table1", - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": false, - "json_schema": { - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "column1": { - "type": "string" - }, - "column2": { - "type": "number" - } - } - } - } - ] -} diff --git a/airbyte-integrations/connectors/source-gutendex/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-gutendex/integration_tests/configured_catalog.json index b3edc97396c7..116d042b8576 100644 --- a/airbyte-integrations/connectors/source-gutendex/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-gutendex/integration_tests/configured_catalog.json @@ -3,138 +3,7 @@ { "stream": { "name": "results", - "json_schema": { - "$schema": "http://json-schema.org/draft-04/schema#", - "type": "object", - "properties": { - "count": { - "type": "integer" - }, - "next": { - "type": ["string", "null"] - }, - "previous": { - "type": ["string", "null"] - }, - "results": { - "type": "array", - "items": { - "type": "object", - "properties": { - "id": { - "type": "integer" - }, - "title": { - "type": "string" - }, - "authors": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "birth_year": { - "type": ["integer", "null"] - }, - "death_year": { - "type": ["integer", "null"] - } - }, - "required": ["name", "birth_year", "death_year"] - } - }, - "translators": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "birth_year": { - "type": ["integer", "null"] - }, - "death_year": { - "type": ["integer", "null"] - } - }, - "required": ["name", "birth_year", "death_year"] - } - }, - "subjects": { - "type": "array", - "items": { - "type": "string" - } - }, - "bookshelves": { - "type": "array", - "items": { - "type": "string" - } - }, - "languages": { - "type": "array", - "items": { - "type": "string" - } - }, - "copyright": { - "type": ["boolean", "null"] - }, - "media_type": { - "type": "string" - }, - "formats": { - "type": "object", - "properties": { - "image/jpeg": { - "type": "string" - }, - "application/x-mobipocket-ebook": { - "type": "string" - }, - "application/rdf+xml": { - "type": "string" - }, - "text/html": { - "type": "string" - }, - "application/epub+zip": { - "type": "string" - }, - "text/plain; charset=us-ascii": { - "type": "string" - }, - "application/octet-stream": { - "type": "string" - } - } - }, - "download_count": { - "type": "integer" - } - }, - "required": [ - "id", - "title", - "authors", - "translators", - "subjects", - "bookshelves", - "languages", - "copyright", - "media_type", - "formats", - "download_count" - ] - } - } - }, - "required": ["count", "next", "previous", "results"] - }, + "json_schema": {}, "supported_sync_modes": ["full_refresh"] }, "sync_mode": "full_refresh", From 53e9733a322fc39b15b82def89c62939c8500dc7 Mon Sep 17 00:00:00 2001 From: marcosmarxm Date: Thu, 20 Oct 2022 15:55:44 -0300 Subject: [PATCH 3/9] solve conflict --- docs/integrations/README.md | 3 +- docs/integrations/sources/gutendex.md | 63 +++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 docs/integrations/sources/gutendex.md diff --git a/docs/integrations/README.md b/docs/integrations/README.md index c8c1d3f21fe6..014e564f494a 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -75,7 +75,8 @@ For more information about the grading system, see [Product Release Stages](http | [Google Search Console](sources/google-search-console.md) | Generally Available | Yes | | [Google Sheets](sources/google-sheets.md) | Generally Available | Yes | | [Google Workspace Admin Reports](sources/google-workspace-admin-reports.md) | Alpha | Yes | -| [Greenhouse](sources/greenhouse.md) | Generally Available | Yes | +| [Greenhouse](sources/greenhouse.md) | Beta | Yes | +| [Gutendex](sources/gutendex.md) | Alpha | No | | [Harness](sources/harness.md) | Alpha | No | | [Harvest](sources/harvest.md) | Generally Available | Yes | | [http-request](sources/http-request.md) | Alpha | No | diff --git a/docs/integrations/sources/gutendex.md b/docs/integrations/sources/gutendex.md new file mode 100644 index 000000000000..a1bf90371fe8 --- /dev/null +++ b/docs/integrations/sources/gutendex.md @@ -0,0 +1,63 @@ +# Gutendex + +## Overview + +The Gutendex source can sync data from the [Gutendex API](https://gutendex.com/) + +## Requirements + +Gutendex requires no access token/API key to make requests. + +## Output schema + +Lists of book information in the Project Gutenberg database are queried using the API at /books (e.g. gutendex.com/books). Book data will be returned in the format:- + + { + "count": , + "next": , + "previous": , + "results": + } + +where `results` is an array of 0-32 book objects, next and previous are URLs to the next and previous pages of results, and count in the total number of books for the query on all pages combined. + +By default, books are ordered by popularity, determined by their numbers of downloads from Project Gutenberg. + +The source is capable of syncing the results stream. + +## Setup guide + +## Step 1: Set up the Gutendex connector in Airbyte + +### For Airbyte Cloud: + +1. [Log into your Airbyte Cloud](https://cloud.airbyte.io/workspaces) account. +2. In the left navigation bar, click **Sources**. In the top-right corner, click **+new source**. +3. On the Set up the source page, select **Gutendex** from the Source type dropdown. +4. Click **Set up source**. + +### For Airbyte OSS: + +1. Navigate to the Airbyte Open Source dashboard. +2. Set the name for your source (Gutendex). +3. Click **Set up source**. + +## Supported sync modes + +The Gutendex source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): + +| Feature | Supported? | +| :---------------- | :--------- | +| Full Refresh Sync | Yes | +| Incremental Sync | No | +| Namespaces | No | + +## Performance considerations + +There is no published rate limit. However, since this data updates infrequently, it is recommended to set the update cadence to 24hr or higher. + +## Changelog + +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :----------- | :------------------- | +| 0.1.0 | 2022-10-17 | | New Source: Gutendex | From 4bccc37d1aab9d65a4500b32fd41744048b2ce6f Mon Sep 17 00:00:00 2001 From: Dhroov Makwana Date: Tue, 18 Oct 2022 19:07:29 +0530 Subject: [PATCH 4/9] Fix schema, Rename results_stream to books_stream --- .../integration_tests/configured_catalog.json | 2 +- .../source_gutendex/gutendex.yaml | 11 +- .../source_gutendex/schemas/books.json | 114 +++++++++++++++ .../source_gutendex/schemas/results.json | 132 ------------------ 4 files changed, 121 insertions(+), 138 deletions(-) create mode 100644 airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/books.json delete mode 100644 airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/results.json diff --git a/airbyte-integrations/connectors/source-gutendex/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-gutendex/integration_tests/configured_catalog.json index 116d042b8576..19be72fdac18 100644 --- a/airbyte-integrations/connectors/source-gutendex/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-gutendex/integration_tests/configured_catalog.json @@ -2,7 +2,7 @@ "streams": [ { "stream": { - "name": "results", + "name": "books", "json_schema": {}, "supported_sync_modes": ["full_refresh"] }, diff --git a/airbyte-integrations/connectors/source-gutendex/source_gutendex/gutendex.yaml b/airbyte-integrations/connectors/source-gutendex/source_gutendex/gutendex.yaml index 818ebac4ea7c..bd6a382743b7 100644 --- a/airbyte-integrations/connectors/source-gutendex/source_gutendex/gutendex.yaml +++ b/airbyte-integrations/connectors/source-gutendex/source_gutendex/gutendex.yaml @@ -3,7 +3,8 @@ version: "0.1.0" definitions: selector: extractor: - field_pointer: [] + field_pointer: + - results requester: url_base: "https://gutendex.com/" http_method: "GET" @@ -26,15 +27,15 @@ definitions: base_stream: retriever: $ref: "*ref(definitions.retriever)" - results_stream: + books_stream: $ref: "*ref(definitions.base_stream)" $options: - name: "results" + name: "books" path: "/books" streams: - - "*ref(definitions.results_stream)" + - "*ref(definitions.books_stream)" check: stream_names: - - "results" + - "books" diff --git a/airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/books.json b/airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/books.json new file mode 100644 index 000000000000..715c849065bd --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/books.json @@ -0,0 +1,114 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "birth_year": { + "type": ["integer", "null"] + }, + "death_year": { + "type": ["integer", "null"] + } + }, + "required": ["name", "birth_year", "death_year"] + } + }, + "translators": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "birth_year": { + "type": ["integer", "null"] + }, + "death_year": { + "type": ["integer", "null"] + } + }, + "required": ["name", "birth_year", "death_year"] + } + }, + "subjects": { + "type": "array", + "items": { + "type": "string" + } + }, + "bookshelves": { + "type": "array", + "items": { + "type": "string" + } + }, + "languages": { + "type": "array", + "items": { + "type": "string" + } + }, + "copyright": { + "type": ["boolean", "null"] + }, + "media_type": { + "type": "string" + }, + "formats": { + "type": "object", + "properties": { + "image/jpeg": { + "type": "string" + }, + "application/x-mobipocket-ebook": { + "type": "string" + }, + "application/rdf+xml": { + "type": "string" + }, + "text/html": { + "type": "string" + }, + "application/epub+zip": { + "type": "string" + }, + "text/plain; charset=us-ascii": { + "type": "string" + }, + "application/octet-stream": { + "type": "string" + } + } + }, + "download_count": { + "type": "integer" + } + }, + "required": [ + "id", + "title", + "authors", + "translators", + "subjects", + "bookshelves", + "languages", + "copyright", + "media_type", + "formats", + "download_count" + ] +} diff --git a/airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/results.json b/airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/results.json deleted file mode 100644 index a79491793f67..000000000000 --- a/airbyte-integrations/connectors/source-gutendex/source_gutendex/schemas/results.json +++ /dev/null @@ -1,132 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-04/schema#", - "type": "object", - "properties": { - "count": { - "type": "integer" - }, - "next": { - "type": ["string", "null"] - }, - "previous": { - "type": ["string", "null"] - }, - "results": { - "type": "array", - "items": { - "type": "object", - "properties": { - "id": { - "type": "integer" - }, - "title": { - "type": "string" - }, - "authors": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "birth_year": { - "type": ["integer", "null"] - }, - "death_year": { - "type": ["integer", "null"] - } - }, - "required": ["name", "birth_year", "death_year"] - } - }, - "translators": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "birth_year": { - "type": ["integer", "null"] - }, - "death_year": { - "type": ["integer", "null"] - } - }, - "required": ["name", "birth_year", "death_year"] - } - }, - "subjects": { - "type": "array", - "items": { - "type": "string" - } - }, - "bookshelves": { - "type": "array", - "items": { - "type": "string" - } - }, - "languages": { - "type": "array", - "items": { - "type": "string" - } - }, - "copyright": { - "type": ["boolean", "null"] - }, - "media_type": { - "type": "string" - }, - "formats": { - "type": "object", - "properties": { - "image/jpeg": { - "type": "string" - }, - "application/x-mobipocket-ebook": { - "type": "string" - }, - "application/rdf+xml": { - "type": "string" - }, - "text/html": { - "type": "string" - }, - "application/epub+zip": { - "type": "string" - }, - "text/plain; charset=us-ascii": { - "type": "string" - }, - "application/octet-stream": { - "type": "string" - } - } - }, - "download_count": { - "type": "integer" - } - }, - "required": [ - "id", - "title", - "authors", - "translators", - "subjects", - "bookshelves", - "languages", - "copyright", - "media_type", - "formats", - "download_count" - ] - } - } - }, - "required": ["count", "next", "previous", "results"] -} From c093ddf3d0d601f78496f6dad3dfdcf28a247730 Mon Sep 17 00:00:00 2001 From: Dhroov Makwana Date: Tue, 18 Oct 2022 19:50:11 +0530 Subject: [PATCH 5/9] Add parameters in gutendex.md, change doc url to official Airbyte url --- .../source-gutendex/source_gutendex/spec.yaml | 2 +- docs/integrations/sources/gutendex.md | 28 +++++++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-gutendex/source_gutendex/spec.yaml b/airbyte-integrations/connectors/source-gutendex/source_gutendex/spec.yaml index 149488cdf961..f22b1645b25e 100644 --- a/airbyte-integrations/connectors/source-gutendex/source_gutendex/spec.yaml +++ b/airbyte-integrations/connectors/source-gutendex/source_gutendex/spec.yaml @@ -1,4 +1,4 @@ -documentationUrl: https://docsurl.com +documentationUrl: https://docs.airbyte.com/integrations/sources/gutendex connectionSpecification: $schema: http://json-schema.org/draft-07/schema# title: Gutendex Spec diff --git a/docs/integrations/sources/gutendex.md b/docs/integrations/sources/gutendex.md index a1bf90371fe8..ada413dfbce8 100644 --- a/docs/integrations/sources/gutendex.md +++ b/docs/integrations/sources/gutendex.md @@ -7,6 +7,28 @@ The Gutendex source can sync data from the [Gutendex API](https://gutendex.com/) ## Requirements Gutendex requires no access token/API key to make requests. +The following (optional) parameters can be provided to the connector :- +___ +##### `author_year_start` and `author_year_end` +Use these to find books with at least one author alive in a given range of years. They must have positive (CE) or negative (BCE) integer values. + +For example, `/books?author_year_start=1800&author_year_end=1899` gives books with authors alive in the 19th Century. +___ +##### `copyright` +Use this to find books with a certain copyright status: true for books with existing copyrights, false for books in the public domain in the USA, or null for books with no available copyright information. +___ +##### `languages` +Use this to find books in any of a list of languages. They must be comma-separated, two-character language codes. For example, `/books?languages=en` gives books in English, and `/books?languages=fr,fi` gives books in either French or Finnish or both. +___ +##### `search` +Use this to search author names and book titles with given words. They must be separated by a space (i.e. %20 in URL-encoded format) and are case-insensitive. For example, `/books?search=dickens%20great` includes Great Expectations by Charles Dickens. +___ +##### `sort` +Use this to sort books: ascending for Project Gutenberg ID numbers from lowest to highest, descending for IDs highest to lowest, or popular (the default) for most popular to least popular by number of downloads. +___ +##### `topic` +Use this to search for a case-insensitive key-phrase in books' bookshelves or subjects. For example, `/books?topic=children` gives books on the "Children's Literature" bookshelf, with the subject "Sick children -- Fiction", and so on. +___ ## Output schema @@ -58,6 +80,6 @@ There is no published rate limit. However, since this data updates infrequently, ## Changelog -| Version | Date | Pull Request | Subject | -| :------ | :--------- | :----------- | :------------------- | -| 0.1.0 | 2022-10-17 | | New Source: Gutendex | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :-------------------------------------------------------- | :----------------------------------------- | +| 0.1.0 | 2022-10-17 | [#18075](https://github.com/airbytehq/airbyte/pull/18075) | 🎉 New Source: Gutendex API [low-code CDK] | From 078a36cc1a84dbaf36fc9847a8498d7a05b5be95 Mon Sep 17 00:00:00 2001 From: Dhroov Makwana Date: Tue, 18 Oct 2022 22:06:03 +0530 Subject: [PATCH 6/9] Add pagination, read each and every record instead of just 32 --- .../source-gutendex/source_gutendex/gutendex.yaml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-gutendex/source_gutendex/gutendex.yaml b/airbyte-integrations/connectors/source-gutendex/source_gutendex/gutendex.yaml index bd6a382743b7..e596131eee4f 100644 --- a/airbyte-integrations/connectors/source-gutendex/source_gutendex/gutendex.yaml +++ b/airbyte-integrations/connectors/source-gutendex/source_gutendex/gutendex.yaml @@ -21,7 +21,17 @@ definitions: record_selector: $ref: "*ref(definitions.selector)" paginator: - type: NoPagination + type: "DefaultPaginator" + url_base: "*ref(definitions.requester.url_base)" + pagination_strategy: + type: "PageIncrement" + page_size: 32 + page_token_option: + inject_into: "request_parameter" + field_name: "page" + page_size_option: + inject_into: "body_data" + field_name: "page_size" requester: $ref: "*ref(definitions.requester)" base_stream: From ddd2e98d5ced7f8b22ecbed8de1d1b5c8d9f5db2 Mon Sep 17 00:00:00 2001 From: Dhroov Makwana Date: Wed, 19 Oct 2022 12:20:39 +0530 Subject: [PATCH 7/9] Add bootstrap.md, add badge in builds.md --- airbyte-integrations/builds.md | 1 + .../connectors/source-gutendex/bootstrap.md | 51 +++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 airbyte-integrations/connectors/source-gutendex/bootstrap.md diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index 7af75b786c27..76c855754cd2 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -48,6 +48,7 @@ | Google Directory API | [![source-google-directory](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-google-directory%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-google-directory) | | Google Workspace Admin | [![source-google-workspace-admin-reports](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-google-workspace-admin-reports%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-google-workspace-admin-reports) | | Greenhouse | [![source-greenhouse](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-greenhouse%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-greenhouse) | +| Gutendex | [![source-gutendex](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-gutendex%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-gutendex) | | HubSpot | [![source-hubspot](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-hubspot%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-hubspot) | | IBM Db2 | [![source-db2](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-db2%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-db2) | | Instagram | [![source-instagram](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-instagram%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-instagram) | diff --git a/airbyte-integrations/connectors/source-gutendex/bootstrap.md b/airbyte-integrations/connectors/source-gutendex/bootstrap.md new file mode 100644 index 000000000000..961a8e20b7a9 --- /dev/null +++ b/airbyte-integrations/connectors/source-gutendex/bootstrap.md @@ -0,0 +1,51 @@ +# Gutendex + +## Overview + +Project Gutenberg is a volunteer effort to digitize and archive cultural works, as well as to "encourage the creation and distribution of eBooks." It was founded in 1971 by American writer Michael S. Hart and is the oldest digital library. It has over 60,000 books + +[Gutendex](https://gutendex.com/) is a JSON web API for Project Gutenberg eBook metadata. The Gutendex Connector is implemented with the [Airbyte Low-Code CDK](https://docs.airbyte.com/connector-development/config-based/low-code-cdk-overview). + +## Output Format + +#### Each Book has the following structure + +```yaml +{ + "id": , + "title": , + "authors": , + "translators": , + "subjects": , + "bookshelves": , + "languages": , + "copyright": , + "media_type": , + "formats": , + "download_count": , +} +``` + +#### Each Person has the following structure + +```yaml +{ + "birth_year": , + "death_year": , + "name": , +} +``` + +## Core Streams + +Connector supports the `books` stream that provides information and metadata about books matching the query. + +## Rate Limiting + +No published rate limit. + +## Authentication and Permissions + +No authentication. + +See [this](https://docs.airbyte.io/integrations/sources/gutendex) link for the connector docs. \ No newline at end of file From da2a045db9eab5e21856eb00186d24cbb2607aec Mon Sep 17 00:00:00 2001 From: marcosmarxm Date: Thu, 20 Oct 2022 16:51:17 -0300 Subject: [PATCH 8/9] add source definition to airbyte-config --- .../init/src/main/resources/seed/source_definitions.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 6e73e42baae8..6c07071b668c 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -453,6 +453,13 @@ icon: greenhouse.svg sourceType: api releaseStage: generally_available +- name: Gutendex + sourceDefinitionId: bff9a277-e01d-420d-81ee-80f28a307318 + dockerRepository: airbyte/source-gutendex + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.com/integrations/sources/gutendex + sourceType: api + releaseStage: alpha - name: Harness sourceDefinitionId: 6fe89830-d04d-401b-aad6-6552ffa5c4af dockerRepository: farosai/airbyte-harness-source From f848b44c5eb0822f9f81caca48d19fa97f603f30 Mon Sep 17 00:00:00 2001 From: Octavia Squidington III Date: Thu, 20 Oct 2022 20:13:58 +0000 Subject: [PATCH 9/9] auto-bump connector version --- .../src/main/resources/seed/source_specs.yaml | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 1a08b6b37450..fb5699e5cdbb 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -4575,6 +4575,84 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-gutendex:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.com/integrations/sources/gutendex" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Gutendex Spec" + type: "object" + additionalProperties: true + properties: + author_year_start: + type: "string" + description: "(Optional) Defines the minimum birth year of the authors.\ + \ Books by authors born prior to the start year will not be returned.\ + \ Supports both positive (CE) or negative (BCE) integer values" + pattern: "^[-]?[0-9]{1,4}$" + examples: + - 2002 + - 500 + - -500 + - 2020 + author_year_end: + type: "string" + description: "(Optional) Defines the maximum birth year of the authors.\ + \ Books by authors born after the end year will not be returned. Supports\ + \ both positive (CE) or negative (BCE) integer values" + pattern: "^[-]?[0-9]{1,4}$" + examples: + - 2002 + - 500 + - -500 + - 2020 + copyright: + type: "string" + description: "(Optional) Use this to find books with a certain copyright\ + \ status - true for books with existing copyrights, false for books in\ + \ the public domain in the USA, or null for books with no available copyright\ + \ information." + pattern: "^(true|false|null)$" + examples: + - true + - false + - null + languages: + type: "string" + description: "(Optional) Use this to find books in any of a list of languages.\ + \ They must be comma-separated, two-character language codes." + examples: + - "en" + - "en,fr,fi" + search: + type: "string" + description: "(Optional) Use this to search author names and book titles\ + \ with given words. They must be separated by a space (i.e. %20 in URL-encoded\ + \ format) and are case-insensitive." + examples: + - "dickens%20great%20expect" + - "dickens" + sort: + type: "string" + description: "(Optional) Use this to sort books - ascending for Project\ + \ Gutenberg ID numbers from lowest to highest, descending for IDs highest\ + \ to lowest, or popular (the default) for most popular to least popular\ + \ by number of downloads." + pattern: "^(ascending|descending|popular)$" + examples: + - "ascending" + - "descending" + - "popular" + topic: + type: "string" + description: "(Optional) Use this to search for a case-insensitive key-phrase\ + \ in books' bookshelves or subjects." + examples: + - "children" + - "fantasy" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] - dockerImage: "farosai/airbyte-harness-source:0.1.23" spec: documentationUrl: "https://docs.faros.ai"