From 2b6cfe57bb7a52a0c066e60e89c3e023cda4af03 Mon Sep 17 00:00:00 2001 From: evantahler Date: Fri, 4 Aug 2023 17:10:26 -0700 Subject: [PATCH 1/2] [Docs] No `Deduped + Hostory`, `Append + Deduped` is the future! --- docs/cloud/core-concepts.md | 62 +- .../getting-started-with-airbyte-cloud.md | 57 +- .../connector-builder-ui/incremental-sync.md | 74 ++- .../record-processing.mdx | 119 +++- docs/integrations/destinations/amazon-sqs.md | 8 +- .../destinations/azure-blob-storage.md | 101 ++-- docs/integrations/destinations/bigquery.md | 2 +- docs/integrations/destinations/cassandra.md | 12 +- docs/integrations/destinations/chargify.md | 32 +- docs/integrations/destinations/clickhouse.md | 12 +- docs/integrations/destinations/csv.md | 29 +- docs/integrations/destinations/cumulio.md | 99 ++- docs/integrations/destinations/databricks.md | 161 ++--- docs/integrations/destinations/doris.md | 2 +- docs/integrations/destinations/duckdb.md | 37 +- docs/integrations/destinations/dynamodb.md | 74 +-- docs/integrations/destinations/exasol.md | 18 +- docs/integrations/destinations/gcs.md | 14 +- docs/integrations/destinations/kafka.md | 12 +- docs/integrations/destinations/keen.md | 12 +- docs/integrations/destinations/kinesis.md | 12 +- docs/integrations/destinations/local-json.md | 33 +- docs/integrations/destinations/meilisearch.md | 24 +- docs/integrations/destinations/mongodb.md | 12 +- docs/integrations/destinations/mqtt.md | 62 +- docs/integrations/destinations/mssql.md | 24 +- docs/integrations/destinations/mysql.md | 14 +- docs/integrations/destinations/oracle.md | 18 +- docs/integrations/destinations/postgres.md | 12 +- docs/integrations/destinations/pubsub.md | 56 +- docs/integrations/destinations/pulsar.md | 12 +- docs/integrations/destinations/r2.md | 187 +++--- docs/integrations/destinations/rabbitmq.md | 41 +- docs/integrations/destinations/redis.md | 58 +- docs/integrations/destinations/redpanda.md | 12 +- docs/integrations/destinations/redshift.md | 4 +- docs/integrations/destinations/rockset.md | 12 +- docs/integrations/destinations/s3-glue.md | 16 +- docs/integrations/destinations/s3.md | 134 ++--- docs/integrations/destinations/scylla.md | 43 +- docs/integrations/destinations/selectdb.md | 24 +- docs/integrations/destinations/snowflake.md | 4 +- docs/integrations/destinations/sqlite.md | 32 +- .../destinations/starburst-galaxy.md | 76 +-- docs/integrations/destinations/streamr.md | 12 +- docs/integrations/destinations/teradata.md | 47 +- docs/integrations/destinations/tidb.md | 14 +- docs/integrations/destinations/typesense.md | 14 +- docs/integrations/destinations/vertica.md | 85 +-- docs/integrations/destinations/weaviate.md | 41 +- docs/integrations/sources/alloydb.md | 123 ++-- docs/integrations/sources/apple-search-ads.md | 26 +- docs/integrations/sources/bing-ads.md | 79 ++- docs/integrations/sources/delighted.md | 28 +- .../sources/facebook-marketing.md | 56 +- docs/integrations/sources/freshdesk.md | 69 ++- docs/integrations/sources/gitlab.md | 54 +- docs/integrations/sources/google-ads.md | 23 +- .../sources/google-analytics-data-api.md | 4 +- .../sources/google-analytics-v4.md | 2 +- .../sources/google-search-console.md | 4 +- docs/integrations/sources/greenhouse.md | 74 +-- docs/integrations/sources/harvest.md | 62 +- docs/integrations/sources/instagram.md | 83 +-- docs/integrations/sources/iterable.md | 99 ++- docs/integrations/sources/jira.md | 115 ++-- docs/integrations/sources/klaviyo.md | 33 +- docs/integrations/sources/mixpanel.md | 31 +- docs/integrations/sources/notion.md | 36 +- docs/integrations/sources/onesignal.md | 31 +- docs/integrations/sources/pinterest.md | 42 +- docs/integrations/sources/postgres.md | 13 +- docs/integrations/sources/prestashop.md | 4 +- docs/integrations/sources/quickbooks.md | 35 +- docs/integrations/sources/redshift.md | 4 +- docs/integrations/sources/salesforce.md | 24 +- docs/integrations/sources/salesloft.md | 73 +-- docs/integrations/sources/sentry.md | 52 +- docs/integrations/sources/sftp-bulk.md | 47 +- docs/integrations/sources/sftp.md | 48 +- docs/integrations/sources/square.md | 100 ++-- docs/integrations/sources/strava.md | 113 ++-- docs/integrations/sources/surveycto.md | 20 +- docs/integrations/sources/tempo.md | 23 +- docs/integrations/sources/woocommerce.md | 20 +- docs/integrations/sources/yandex-metrica.md | 17 +- docs/integrations/sources/zendesk-chat.md | 44 +- .../basic-normalization.md | 215 +++---- .../connections/README.md | 45 +- ...story.md => incremental-append-deduped.md} | 72 +-- .../connections/incremental-append.md | 2 +- docusaurus/sidebars.js | 566 +++++++++--------- 92 files changed, 2401 insertions(+), 2282 deletions(-) rename docs/understanding-airbyte/connections/{incremental-deduped-history.md => incremental-append-deduped.md} (58%) diff --git a/docs/cloud/core-concepts.md b/docs/cloud/core-concepts.md index 1439c769e013..9383c6ffd036 100644 --- a/docs/cloud/core-concepts.md +++ b/docs/cloud/core-concepts.md @@ -1,12 +1,12 @@ # Core Concepts -Airbyte enables you to build data pipelines and replicate data from a source to a destination. You can configure how frequently the data is synced, what data is replicated, what format the data is written to in the destination, and if the data is stored in raw tables format or basic normalized (or JSON) format. +Airbyte enables you to build data pipelines and replicate data from a source to a destination. You can configure how frequently the data is synced, what data is replicated, what format the data is written to in the destination, and if the data is stored in raw tables format or basic normalized (or JSON) format. This page describes the concepts you need to know to use Airbyte. -## Source +## Source -A source is an API, file, database, or data warehouse that you want to ingest data from. +A source is an API, file, database, or data warehouse that you want to ingest data from. ## Destination @@ -18,7 +18,7 @@ An Airbyte component which pulls data from a source or pushes data to a destinat ## Connection -A connection is an automated data pipeline that replicates data from a source to a destination. +A connection is an automated data pipeline that replicates data from a source to a destination. Setting up a connection involves configuring the following parameters: @@ -38,7 +38,7 @@ Setting up a connection involves configuring the following parameters: Destination Namespace and stream names - Where should the replicated data be written? + Where should the replicated data be written? @@ -63,28 +63,28 @@ Setting up a connection involves configuring the following parameters: ## Stream -A stream is a group of related records. +A stream is a group of related records. Examples of streams: -* A table in a relational database -* A resource or API endpoint for a REST API -* The records from a directory containing many files in a filesystem +- A table in a relational database +- A resource or API endpoint for a REST API +- The records from a directory containing many files in a filesystem ## Field -A field is an attribute of a record in a stream. +A field is an attribute of a record in a stream. -Examples of fields: +Examples of fields: -* A column in the table in a relational database -* A field in an API response +- A column in the table in a relational database +- A field in an API response ## Namespace Namespace is a group of streams in a source or destination. Common use cases for namespaces are enforcing permissions, segregating test and production data, and general data organization. -A schema in a relational database system is an example of a namespace. +A schema in a relational database system is an example of a namespace. In a source, the namespace is the location from where the data is replicated to the destination. @@ -121,32 +121,32 @@ In a destination, the namespace is the location where the replicated data is sto A sync mode governs how Airbyte reads from a source and writes to a destination. Airbyte provides different sync modes to account for various use cases. -* **Full Refresh | Overwrite:** Sync all records from the source and replace data in destination by overwriting it. -* **Full Refresh | Append:** Sync all records from the source and add them to the destination without deleting any data. -* **Incremental Sync | Append:** Sync new records from the source and add them to the destination without deleting any data. -* **Incremental Sync | Deduped History:** Sync new records from the source and add them to the destination. Also provides a de-duplicated view mirroring the state of the stream in the source. +- **Full Refresh | Overwrite:** Sync all records from the source and replace data in destination by overwriting it. +- **Full Refresh | Append:** Sync all records from the source and add them to the destination without deleting any data. +- **Incremental Sync | Append:** Sync new records from the source and add them to the destination without deleting any data. +- **Incremental Sync | Append + Deduped:** Sync new records from the source and add them to the destination. Also provides a de-duplicated view mirroring the state of the stream in the source. ## Normalization Normalization is the process of structuring data from the source into a format appropriate for consumption in the destination. For example, when writing data from a nested, dynamically typed source like a JSON API to a relational destination like Postgres, normalization is the process which un-nests JSON from the source into a relational table format which uses the appropriate column types in the destination. -Note that normalization is only relevant for the following relational database & warehouse destinations: +Note that normalization is only relevant for the following relational database & warehouse destinations: -* BigQuery -* Snowflake -* Redshift -* Postgres -* Oracle -* MySQL -* MSSQL +- BigQuery +- Snowflake +- Redshift +- Postgres +- Oracle +- MySQL +- MSSQL -Other destinations do not support normalization as described in this section, though they may normalize data in a format that makes sense for them. For example, the S3 destination connector offers the option of writing JSON files in S3, but also offers the option of writing statically typed files such as Parquet or Avro. +Other destinations do not support normalization as described in this section, though they may normalize data in a format that makes sense for them. For example, the S3 destination connector offers the option of writing JSON files in S3, but also offers the option of writing statically typed files such as Parquet or Avro. After a sync is complete, Airbyte normalizes the data. When setting up a connection, you can choose one of the following normalization options: -* Raw data (no normalization): Airbyte places the JSON blob version of your data in a table called `_airbyte_raw_` -* Basic Normalization: Airbyte converts the raw JSON blob version of your data to the format of your destination. *Note: Not all destinations support normalization.* -* [dbt Cloud integration](https://docs.airbyte.com/cloud/managing-airbyte-cloud/dbt-cloud-integration): Airbyte's dbt Cloud integration allows you to use dbt Cloud for transforming and cleaning your data during the normalization process. +- Raw data (no normalization): Airbyte places the JSON blob version of your data in a table called `_airbyte_raw_` +- Basic Normalization: Airbyte converts the raw JSON blob version of your data to the format of your destination. _Note: Not all destinations support normalization._ +- [dbt Cloud integration](https://docs.airbyte.com/cloud/managing-airbyte-cloud/dbt-cloud-integration): Airbyte's dbt Cloud integration allows you to use dbt Cloud for transforming and cleaning your data during the normalization process. :::note @@ -156,7 +156,7 @@ Normalizing data may cause an increase in your destination's compute cost. This ## Workspace -A workspace is a grouping of sources, destinations, connections, and other configurations. It lets you collaborate with team members and share resources across your team under a shared billing account. +A workspace is a grouping of sources, destinations, connections, and other configurations. It lets you collaborate with team members and share resources across your team under a shared billing account. When you [sign up](http://cloud.airbyte.com/signup) for Airbyte Cloud, we automatically create your first workspace where you are the only user with access. You can set up your sources and destinations to start syncing data and invite other users to join your workspace. diff --git a/docs/cloud/getting-started-with-airbyte-cloud.md b/docs/cloud/getting-started-with-airbyte-cloud.md index 8278e04c4f64..1071020a92c9 100644 --- a/docs/cloud/getting-started-with-airbyte-cloud.md +++ b/docs/cloud/getting-started-with-airbyte-cloud.md @@ -64,26 +64,27 @@ A connection is an automated data pipeline that replicates data from a source to Setting up a connection involves configuring the following parameters: -| Parameter | Description | -|----------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Replication frequency | How often should the data sync? | -| [Data residency](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-data-residency#choose-the-data-residency-for-a-connection) | Where should the data be processed? | -| Destination Namespace and stream names | Where should the replicated data be written? | -| Catalog selection | Which streams and fields should be replicated from the source to the destination? | -| Sync mode | How should the streams be replicated (read and written)? | -| Optional transformations | How should Airbyte protocol messages (raw JSON blob) data be converted into other data representations? | +| Parameter | Description | +| ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | +| Replication frequency | How often should the data sync? | +| [Data residency](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-data-residency#choose-the-data-residency-for-a-connection) | Where should the data be processed? | +| Destination Namespace and stream names | Where should the replicated data be written? | +| Catalog selection | Which streams and fields should be replicated from the source to the destination? | +| Sync mode | How should the streams be replicated (read and written)? | +| Optional transformations | How should Airbyte protocol messages (raw JSON blob) data be converted into other data representations? | For more information, see [Connections and Sync Modes](../understanding-airbyte/connections/README.md) and [Namespaces](../understanding-airbyte/namespaces.md) If you need to use [cron scheduling](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html): -1. In the **Replication Frequency** dropdown, click **Cron**. + +1. In the **Replication Frequency** dropdown, click **Cron**. 2. Enter a cron expression and choose a time zone to create a sync schedule. :::note -* Only one sync per connection can run at a time. -* If cron schedules a sync to run before the last one finishes, the scheduled sync will start after the last sync completes. -* Cloud does not allow schedules that sync more than once per hour. +- Only one sync per connection can run at a time. +- If cron schedules a sync to run before the last one finishes, the scheduled sync will start after the last sync completes. +- Cloud does not allow schedules that sync more than once per hour. ::: @@ -171,12 +172,12 @@ To better understand the destination namespace configurations, see [Destination - Select **Overwrite** to erase the old data and replace it completely - Select **Append** to capture changes to your table **Note:** This creates duplicate records - - Select **Deduped + history** to mirror your source while keeping records unique + - Select **Append + Deduped** to mirror your source while keeping records unique **Note:** Some sync modes may not yet be available for your source or destination 4. **Cursor field**: Used in **Incremental** sync mode to determine which records to sync. Airbyte pre-selects the cursor field for you (example: updated date). If you have multiple cursor fields, select the one you want. - 5. **Primary key**: Used in **Deduped + history** sync mode to determine the unique identifier. + 5. **Primary key**: Used in **Append + Deduped** sync mode to determine the unique identifier. 6. **Destination**: - **Namespace:** The database schema of your destination tables. - **Stream name:** The final table name in destination. @@ -193,24 +194,28 @@ Verify the sync by checking the logs: 3. Check the data at your destination. If you added a Destination Stream Prefix while setting up the connection, make sure to search for the stream name with the prefix. ## Allowlist IP addresses + Depending on your [data residency](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-data-residency#choose-your-default-data-residency) location, you may need to allowlist the following IP addresses to enable access to Airbyte: ### United States and Airbyte Default + #### GCP region: us-west3 -[comment]: # (IMPORTANT: if changing the list of IP addresses below, you must also update the connector.airbyteCloudIpAddresses LaunchDarkly flag to show the new list so that the correct list is shown in the Airbyte Cloud UI, then reach out to the frontend team and ask them to update the default value in the useAirbyteCloudIps hook!) +[comment]: # "IMPORTANT: if changing the list of IP addresses below, you must also update the connector.airbyteCloudIpAddresses LaunchDarkly flag to show the new list so that the correct list is shown in the Airbyte Cloud UI, then reach out to the frontend team and ask them to update the default value in the useAirbyteCloudIps hook!" -* 34.106.109.131 -* 34.106.196.165 -* 34.106.60.246 -* 34.106.229.69 -* 34.106.127.139 -* 34.106.218.58 -* 34.106.115.240 -* 34.106.225.141 +- 34.106.109.131 +- 34.106.196.165 +- 34.106.60.246 +- 34.106.229.69 +- 34.106.127.139 +- 34.106.218.58 +- 34.106.115.240 +- 34.106.225.141 ### European Union + #### AWS region: eu-west-3 -* 13.37.4.46 -* 13.37.142.60 -* 35.181.124.238 + +- 13.37.4.46 +- 13.37.142.60 +- 35.181.124.238 diff --git a/docs/connector-development/connector-builder-ui/incremental-sync.md b/docs/connector-development/connector-builder-ui/incremental-sync.md index 4b5a2de7fcaf..83252fe9248c 100644 --- a/docs/connector-development/connector-builder-ui/incremental-sync.md +++ b/docs/connector-development/connector-builder-ui/incremental-sync.md @@ -7,24 +7,26 @@ This is especially important if there are a large number of records to sync and/ Incremental syncs are usually implemented using a cursor value (like a timestamp) that delineates which data was pulled and which data is new. A very common cursor value is an `updated_at` timestamp. This cursor means that records whose `updated_at` value is less than or equal than that cursor value have been synced already, and that the next sync should only export records whose `updated_at` value is greater than the cursor value. To use incremental syncs, the API endpoint needs to fullfil the following requirements: -* Records contain a top-level date/time field that defines when this record was last updated (the "cursor field") - * If the record's cursor field is nested, you can use an "Add Field" transformation to copy it to the top-level, and a Remove Field to remove it from the object. This will effectively move the field to the top-level of the record -* It's possible to filter/request records by the cursor field -* The records are sorted in ascending order based on their cursor field -The knowledge of a cursor value also allows the Airbyte system to automatically keep a history of changes to records in the destination. To learn more about how different modes of incremental syncs, check out the [Incremental Sync - Append](/understanding-airbyte/connections/incremental-append/) and [Incremental Sync - Deduped History](/understanding-airbyte/connections/incremental-deduped-history) pages. +- Records contain a top-level date/time field that defines when this record was last updated (the "cursor field") + - If the record's cursor field is nested, you can use an "Add Field" transformation to copy it to the top-level, and a Remove Field to remove it from the object. This will effectively move the field to the top-level of the record +- It's possible to filter/request records by the cursor field +- The records are sorted in ascending order based on their cursor field + +The knowledge of a cursor value also allows the Airbyte system to automatically keep a history of changes to records in the destination. To learn more about how different modes of incremental syncs, check out the [Incremental Sync - Append](/understanding-airbyte/connections/incremental-append/) and [Incremental Sync - Append + Deduped](/understanding-airbyte/connections/incremental-apped-deduped) pages. ## Configuration To configure incremental syncs for a stream in the connector builder, you have to specify how the records will represent the **"last changed" / "updated at" timestamp**, the **initial time range** to fetch records for and **how to request records from a certain time range**. In the builder UI, these things are specified like this: -* The "Cursor field" is the property in the record that defines the date and time when the record got changed. It's used to decide which records are synced already and which records are "new" -* The "Datetime format" specifies the format the cursor field is using to specify date and time. Check out the [YAML reference](/connector-development/config-based/understanding-the-yaml-file/reference#/definitions/DatetimeBasedCursor) for a full list of supported formats. -* "API time filtering capabilities" specifies if the API allows filtering by start and end datetime or whether it's a "feed" of data going from newest to oldest records. See the "Incremental sync without time filtering" section below for details. -* The "Start datetime" is the initial start date of the time range to fetch records for. When doing incremental syncs, the second sync will overwrite this date with the last record that got synced so far. -* The "End datetime" is the end date of the time range to fetch records for. In most cases it's set to the current date and time when the sync is started to sync all changes that happened so far. -* The "Inject start/end time into outgoing HTTP request" defines how to request records that got changed in the time range to sync. In most cases the start and end time is added as a query parameter or body parameter + +- The "Cursor field" is the property in the record that defines the date and time when the record got changed. It's used to decide which records are synced already and which records are "new" +- The "Datetime format" specifies the format the cursor field is using to specify date and time. Check out the [YAML reference](/connector-development/config-based/understanding-the-yaml-file/reference#/definitions/DatetimeBasedCursor) for a full list of supported formats. +- "API time filtering capabilities" specifies if the API allows filtering by start and end datetime or whether it's a "feed" of data going from newest to oldest records. See the "Incremental sync without time filtering" section below for details. +- The "Start datetime" is the initial start date of the time range to fetch records for. When doing incremental syncs, the second sync will overwrite this date with the last record that got synced so far. +- The "End datetime" is the end date of the time range to fetch records for. In most cases it's set to the current date and time when the sync is started to sync all changes that happened so far. +- The "Inject start/end time into outgoing HTTP request" defines how to request records that got changed in the time range to sync. In most cases the start and end time is added as a query parameter or body parameter ## Example @@ -33,6 +35,7 @@ The [API of The Guardian](https://open-platform.theguardian.com/documentation/se The `/search` endpoint has a `from-date` and a `to-date` query parameter which can be used to only request data for a certain time range. Content records have the following form: + ``` { "id": "world/2022/oct/21/russia-ukraine-war-latest-what-we-know-on-day-240-of-the-invasion", @@ -46,26 +49,30 @@ Content records have the following form: ``` As this fulfills the requirements for incremental syncs, we can configure the "Incremental sync" section in the following way: -* "Cursor field" is set to `webPublicationDate` -* "Datetime format" is set to `%Y-%m-%dT%H:%M:%SZ` -* "Start datetime" is set to "user input" to allow the user of the connector configuring a Source to specify the time to start syncing -* "End datetime" is set to "now" to fetch all articles up to the current date -* "Inject start time into outgoing HTTP request" is set to `request_parameter` with "Field" set to `from-date` -* "Inject end time into outgoing HTTP request" is set to `request_parameter` with "Field" set to `to-date` + +- "Cursor field" is set to `webPublicationDate` +- "Datetime format" is set to `%Y-%m-%dT%H:%M:%SZ` +- "Start datetime" is set to "user input" to allow the user of the connector configuring a Source to specify the time to start syncing +- "End datetime" is set to "now" to fetch all articles up to the current date +- "Inject start time into outgoing HTTP request" is set to `request_parameter` with "Field" set to `from-date` +- "Inject end time into outgoing HTTP request" is set to `request_parameter` with "Field" set to `to-date` This API orders records by default from new to old, which is not optimal for a reliable sync as the last encountered cursor value will be the most recent date even if some older records did not get synced (for example if a sync fails halfway through). It's better to start with the oldest records and work your way up to make sure that all older records are synced already once a certain date is encountered on a record. In this case the API can be configured to behave like this by setting an additional parameter: -* Add a new "Query Parameter" near the top of the page -* Set the key to `order-by` -* Set the value to `oldest` + +- Add a new "Query Parameter" near the top of the page +- Set the key to `order-by` +- Set the value to `oldest` Setting the start date in the "Testing values" to a date in the past like **2023-04-09T00:00:00Z** results in the following request: +
 curl 'https://content.guardianapis.com/search?order-by=oldest&from-date=2023-04-09T00:00:00Z&to-date={`now`}'
 
The last encountered date will be saved as part of the connection - when the next sync is running, it picks up from the last record. Let's assume the last ecountered article looked like this: +
 {`{
   "id": "business/live/2023/apr/15/uk-bosses-more-optimistic-energy-prices-fall-ai-spending-boom-economics-business-live",
@@ -77,6 +84,7 @@ The last encountered date will be saved as part of the connection - when the nex
 
Then when a sync is triggered for the same connection the next day, the following request is made: +
 curl 'https://content.guardianapis.com/search?order-by=oldest&from-date=2023-04-15T07:30:58Z&to-date={``}'
 
@@ -99,7 +107,7 @@ The "No filter" option can only be used if the data is sorted from newest to old ## Advanced settings -The description above is sufficient for a lot of APIs. However there are some more subtle configurations which sometimes become relevant. +The description above is sufficient for a lot of APIs. However there are some more subtle configurations which sometimes become relevant. ### Split up interval @@ -108,6 +116,7 @@ When incremental syncs are enabled and "Split up interval" is set, the connector The "cursor granularity" also needs to be set to an ISO 8601 duration - it represents the smallest possible time unit the API supports to filter records by. It's used to ensure the start of a interval does not overlap with the end of the previous one. For example if the "Step" is set to 10 days (`P10D`) and the "Cursor granularity" set to second (`PT1S`) for the Guardian articles stream described above and a longer time range, then the following requests will be performed: +
 curl 'https://content.guardianapis.com/search?order-by=oldest&from-date=2023-01-01T00:00:00Z&to-date=2023-01-10T00:00:00Z'{`\n`}
 curl 'https://content.guardianapis.com/search?order-by=oldest&from-date=2023-01-10T00:00:00Z&to-date=2023-01-20T00:00:00Z'{`\n`}
@@ -118,8 +127,9 @@ curl 'https://content.guardianapis.com/search?order-by=oldest&from-date=2023-
 After an interval is processed, the cursor value of the last record will be saved as part of the connection as the new cutoff date.
 
 If left unset, the connector will not split up the time range at all but will instead just request all records for the entire target time range. This configuration works for all connectors, but there are two reasons to change it:
-* **To protect a connection against intermittent failures** - if the "Step" size is a day, the cutoff date is saved after all records associated with a day are proccessed. If a sync fails halfway through because the API, the Airbyte system, the destination or the network between these components has a failure, then at most one day worth of data needs to be resynced. However, a smaller step size might cause more requests to the API and more load on the system. It depends on the expected amount of data and load characteristics of an API what step size is optimal, but for a lot of applications the default of one month is a good starting point.
-* **The API requires the connector to fetch data in pre-specified chunks** - for example the [Exchange Rates API](https://exchangeratesapi.io/documentation/) makes the date to fetch data for part of the URL path and only allows to fetch data for a single day at a time
+
+- **To protect a connection against intermittent failures** - if the "Step" size is a day, the cutoff date is saved after all records associated with a day are proccessed. If a sync fails halfway through because the API, the Airbyte system, the destination or the network between these components has a failure, then at most one day worth of data needs to be resynced. However, a smaller step size might cause more requests to the API and more load on the system. It depends on the expected amount of data and load characteristics of an API what step size is optimal, but for a lot of applications the default of one month is a good starting point.
+- **The API requires the connector to fetch data in pre-specified chunks** - for example the [Exchange Rates API](https://exchangeratesapi.io/documentation/) makes the date to fetch data for part of the URL path and only allows to fetch data for a single day at a time
 
 ### Lookback window
 
@@ -128,10 +138,12 @@ The "Lookback window" specifies a duration that is subtracted from the last cuto
 Some APIs update records over time but do not allow to filter or search by modification date, only by creation date. For example the API of The Guardian might change the title of an article after it got published, but the `webPublicationDate` still shows the original date the article got published initially.
 
 In these cases, there are two options:
-* **Do not use incremental sync** and always sync the full set of records to always have a consistent state, losing the advantages of reduced load and [automatic history keeping in the destination](/understanding-airbyte/connections/incremental-deduped-history)
-* **Configure the "Lookback window"** to not only sync exclusively new records, but resync some portion of records before the cutoff date to catch changes that were made to existing records, trading off data consistency and the amount of synced records. In the case of the API of The Guardian, news articles tend to only be updated for a few days after the initial release date, so this strategy should be able to catch most updates without having to resync all articles.
+
+- **Do not use incremental sync** and always sync the full set of records to always have a consistent state, losing the advantages of reduced load and [automatic history keeping in the destination](/understanding-airbyte/connections/incremental-apped-deduped)
+- **Configure the "Lookback window"** to not only sync exclusively new records, but resync some portion of records before the cutoff date to catch changes that were made to existing records, trading off data consistency and the amount of synced records. In the case of the API of The Guardian, news articles tend to only be updated for a few days after the initial release date, so this strategy should be able to catch most updates without having to resync all articles.
 
 Reiterating the example from above with a "Lookback window" of 2 days configured, let's assume the last encountered article looked like this:
+
 
 {`{
   "id": "business/live/2023/apr/15/uk-bosses-more-optimistic-energy-prices-fall-ai-spending-boom-economics-business-live",
@@ -143,6 +155,7 @@ Reiterating the example from above with a "Lookback window" of 2 days configured
 
Then when a sync is triggered for the same connection the next day, the following request is made: +
 curl 'https://content.guardianapis.com/search?order-by=oldest&from-date=2023-04-13T07:30:58Z&to-date={``}'
 
@@ -150,12 +163,13 @@ curl 'https://content.guardianapis.com/search?order-by=oldest&from-date=2023- ## Custom parameter injection Using the "Inject start time / end time into outgoing HTTP request" option in the incremental sync form works for most cases, but sometimes the API has special requirements that can't be handled this way: -* The API requires adding a prefix or a suffix to the actual value -* Multiple values need to be put together in a single parameter -* The value needs to be injected into the URL path -* Some conditional logic needs to be applied + +- The API requires adding a prefix or a suffix to the actual value +- Multiple values need to be put together in a single parameter +- The value needs to be injected into the URL path +- Some conditional logic needs to be applied To handle these cases, disable injection in the incremental sync form and use the generic parameter section at the bottom of the stream configuration form to freely configure query parameters, headers and properties of the JSON body, by using jinja expressions and [available variables](/connector-development/config-based/understanding-the-yaml-file/reference/#/variables). You can also use these variables as part of the URL path. For example the [Sendgrid API](https://docs.sendgrid.com/api-reference/e-mail-activity/filter-all-messages) requires setting both start and end time in a `query` parameter. -For this case, you can use the `stream_interval` variable to configure a query parameter with "key" `query` and "value" `last_event_time BETWEEN TIMESTAMP "{{stream_interval.start_time}}" AND TIMESTAMP "{{stream_interval.end_time}}"` to filter down to the right window in time. \ No newline at end of file +For this case, you can use the `stream_interval` variable to configure a query parameter with "key" `query` and "value" `last_event_time BETWEEN TIMESTAMP "{{stream_interval.start_time}}" AND TIMESTAMP "{{stream_interval.end_time}}"` to filter down to the right window in time. diff --git a/docs/connector-development/connector-builder-ui/record-processing.mdx b/docs/connector-development/connector-builder-ui/record-processing.mdx index 34e7c27420ec..c83cb3b4fc3b 100644 --- a/docs/connector-development/connector-builder-ui/record-processing.mdx +++ b/docs/connector-development/connector-builder-ui/record-processing.mdx @@ -1,27 +1,39 @@ -import Diff from './assets/record-processing-schema-diff.png'; +import Diff from "./assets/record-processing-schema-diff.png"; # Record processing Connectors built with the connector builder always make HTTP requests, receive the responses and emit records. Besides making the right requests, it's important to properly hand over the records to the system: -* Extract the records (record selection) -* Do optional post-processing (transformations) -* Provide record meta data to the system to inform downstream processes (primary key and declared schema) + +- Extract the records (record selection) +- Do optional post-processing (transformations) +- Provide record meta data to the system to inform downstream processes (primary key and declared schema) ## Record selection - + When doing HTTP requests, the connector expects the records to be part of the response JSON body. The "Record selector" field of the stream needs to be set to the property of the response object that holds the records. Very often, the response body contains an array of records along with some suplementary information (for example meta data for pagination). For example the ["Most popular" NY Times API](https://developer.nytimes.com/docs/most-popular-product/1/overview) returns the following response body: +
-{`{
+  {`{
     "status": "OK",
     "copyright": "Copyright (c) 2023 The New York Times Company.  All Rights Reserved.",
     "num_results": 20,
-    `}{`"results": [`}{`
+    `}
+  {`"results": [`}
+  {`
       {
         "uri": "nyt://article/c15e5227-ed68-54d9-9e5b-acf5a451ec37",
         "url": "https://www.nytimes.com/2023/04/16/us/science-of-reading-literacy-parents.html",
@@ -31,7 +43,9 @@ For example the ["Most popular" NY Times API](https://developer.nytimes.com/docs
         // ...
       },
       // ..
-    `}{`]`}{`,
+    `}
+  {`]`}
+  {`,
     // ...
 }`}
 
@@ -41,11 +55,14 @@ For example the ["Most popular" NY Times API](https://developer.nytimes.com/docs ### Nested objects In some cases the array of actual records is nested multiple levels deep in the response, like for the ["Archive" NY Times API](https://developer.nytimes.com/docs/archive-product/1/overview): +
-{`{
+  {`{
     "copyright": "Copyright (c) 2020 The New York Times Company. All Rights Reserved.",
     "response": {
-      `}{`"docs": [`}{`
+      `}
+  {`"docs": [`}
+  {`
         {
           "abstract": "From the Treaty of Versailles to Prohibition, the events of that year shaped America, and the world, for a century to come. ",
           "web_url": "https://www.nytimes.com/2018/12/31/opinion/1919-america.html",
@@ -53,9 +70,11 @@ In some cases the array of actual records is nested multiple levels deep in the
           // ...
         },
         // ...
-      `}{`]`}{`
+      `}
+  {`]`}
+  {`
     }
-}`}    
+}`}
 
**Setting the record selector needs to be set to "`response`,`docs`"** selects the nested array. @@ -63,8 +82,10 @@ In some cases the array of actual records is nested multiple levels deep in the ### Root array In some cases, the response body itself is an array of records, like in the [CoinAPI API](https://docs.coinapi.io/market-data/rest-api/quotes): +
-{`[`}{`
+  {`[`}
+  {`
   {
     "symbol_id": "BITSTAMP_SPOT_BTC_USD",
     "time_exchange": "2013-09-28T22:40:50.0000000Z",
@@ -78,7 +99,8 @@ In some cases, the response body itself is an array of records, like in the [Coi
    // ..
   }
   // ...
-`}{`]`}
+`}
+  {`]`}
 
In this case, **the record selector can be omitted** and the whole response becomes the list of records. @@ -86,18 +108,23 @@ In this case, **the record selector can be omitted** and the whole response beco ### Single object Sometimes, there is only one record returned per request from the API. In this case, the record selector can also point to an object instead of an array which will be handled as the only record, like in the case of the [Exchange Rates API](https://exchangeratesapi.io/documentation/#historicalrates): +
-{`{
+  {`{
     "success": true,
     "historical": true,
     "date": "2013-12-24",
     "timestamp": 1387929599,
     "base": "GBP",
-    `}{`"rates": {`}{`
+    `}
+  {`"rates": {`}
+  {`
         "USD": 1.636492,
         "EUR": 1.196476,
         "CAD": 1.739516
-    `}{`}`}{`
+    `}
+  {`}`}
+  {`
 }`}
 
@@ -142,17 +169,27 @@ In this case a record selector with a placeholder `*` selects all children at th ## Transformations It is recommended to not change records during the extraction process the connector is performing, but instead load them into the downstream warehouse unchanged and perform necessary transformations there in order to stay flexible in what data is required. However there are some reasons that require the modifying the fields of records before they are sent to the warehouse: -* Remove personally identifiable information (PII) to ensure compliance with local legislation -* Pseudonymise sensitive fields -* Remove large fields that don't contain interesting information and significantly increase load on the system + +- Remove personally identifiable information (PII) to ensure compliance with local legislation +- Pseudonymise sensitive fields +- Remove large fields that don't contain interesting information and significantly increase load on the system The "transformations" feature can be used for these purposes. ### Removing fields - + To remove a field from a record, add a new transformation in the "Transformations" section of type "remove" and enter the field path. For example in case of the [EmailOctopus API](https://emailoctopus.com/api-documentation/campaigns/get-all), the campaigns records also include the html content of the mailing which takes up a lot of space: + ``` { "data": [ @@ -177,6 +214,7 @@ To remove a field from a record, add a new transformation in the "Transformation ``` Setting the "Path" of the remove-transformation to `content` removes these fields from the records: + ``` { "id": "00000000-0000-0000-0000-000000000000", @@ -212,6 +250,7 @@ Imagine that regardless of which level a properties appears, it should be remove ``` The `*` character can also be used as a placeholder to filter for all fields that start with a certain prefix - the "Path" `s*` will remove all fields from the top level that start with the character s: + ``` { "id": "00000000-0000-0000-0000-000000000000", @@ -222,12 +261,20 @@ The `*` character can also be used as a placeholder to filter for all fields tha } ``` - ### Adding fields - + Adding fields can be used to apply a hashing function to an existing field to pseudonymize it. To do this, add a new transformation in the "Transformations" section of type "add" and enter the field path and the new value. For example in case of the [EmailOctopus API](https://emailoctopus.com/api-documentation/campaigns/get-all), the campaigns records include the name of the sender: + ``` { "data": [ @@ -248,6 +295,7 @@ Adding fields can be used to apply a hashing function to an existing field to ps ``` To apply a hash function to it, set the "Path" to "`from`, `name`" to select the name property nested in the from object and set the value to `{{ record['from']['name'] | hash('md5') }}`. This hashes the name in the record: + ``` { "id": "00000000-0000-0000-0000-000000000000", @@ -273,11 +321,12 @@ Besides bringing the records in the right shape, it's important to communicate s ### Primary key -The "Primary key" field specifies how to uniquely identify a record. This is important for downstream de-duplication of records (e.g. by the [incremental sync - deduped history sync mode](/understanding-airbyte/connections/incremental-deduped-history)). +The "Primary key" field specifies how to uniquely identify a record. This is important for downstream de-duplication of records (e.g. by the [incremental sync - Append + Deduped sync mode](/understanding-airbyte/connections/incremental-apped-deduped)). In a lot of cases, like for the EmailOctopus example from above, there is a dedicated id field that can be used for this purpose. It's important that the value of the id field is guaranteed to only occur once for a single record. In some cases there is no such field but a combination of multiple fields is guaranteed to be unique, for example the shipping zone locations of the [Woocommerce API](https://woocommerce.github.io/woocommerce-rest-api-docs/#shipping-zone-locations) do not have an id, but each combination of the `code` and `type` fields is guaranteed to be unique: + ``` [ { @@ -298,14 +347,16 @@ In this case, the "Primary key" can be set to "`code`, `type`" to allow automati Similar to the "Primary key", the "Declared schema" defines how the records will be shaped via a [JSON Schema definition](https://json-schema.org/). It defines which fields and nested fields occur in the records, whether they are always available or sometimes missing and which types they are. This information is used by the Airbyte system for different purposes: -* **Column selection** when configuring a connection - in Airbyte cloud, the declared schema allows the user to pick which columns/fields are passed to the destination to dynamically reduce the amount of synced data -* **Recreating the data structure with right columns** in destination - this allows a warehouse destination to create a SQL table which the columns matching the fields of records -* **Detecting schema changes** - if the schema of a stream changes for an existing connection, this situation can be handled gracefully by Airbyte instead of causing errors in the destination + +- **Column selection** when configuring a connection - in Airbyte cloud, the declared schema allows the user to pick which columns/fields are passed to the destination to dynamically reduce the amount of synced data +- **Recreating the data structure with right columns** in destination - this allows a warehouse destination to create a SQL table which the columns matching the fields of records +- **Detecting schema changes** - if the schema of a stream changes for an existing connection, this situation can be handled gracefully by Airbyte instead of causing errors in the destination When doing test reads, the connector builder analyzes the test records and shows the derived schema in the "Detected schema" tab. By default, new streams are configured to automatically import the detected schema into the declared schema on every test read. This behavior can be toggled off by disabling the `Automatically import declared schema` switch, in which case the declared schema can be manually edited in the UI and it will no longer be automatically updated when triggering test reads. For example the following test records: + ``` [ { @@ -326,6 +377,7 @@ For example the following test records: ``` result in the following schema: + ``` { "$schema": "http://json-schema.org/schema#", @@ -354,9 +406,14 @@ More strict is always better, but the detected schema is a good default to rely If `Automatically import detected schema` is disabled, and the declared schema deviates from the detected schema, the "Detected schema" tab in the testing panel highlights the differences. It's important to note that differences are not necessarily a problem that needs to be fixed - in some cases the currently loaded set of records in the testing panel doesn't feature all possible cases so the detected schema is too strict. However, if the declared schema is incompatible with the detected schema based on the test records, it's very likely there will be errors when running syncs. -Detected schema with highlighted differences +Detected schema with highlighted differences In the case of the example above, there are two differences between detected and declared schema. The first difference for the `name` field is not problematic: + ``` "name": { - "type": [ @@ -370,6 +427,7 @@ In the case of the example above, there are two differences between detected and The declared schema allows the `null` value for the name while the detected schema only encountered strings. If it's possible the `name` is set to null, the detected schema is configured correctly. The second difference will likely cause problems: + ``` "subject": { - "type": "number" @@ -378,7 +436,8 @@ The second difference will likely cause problems: ``` The `subject` field was detected as `string`, but is configured to be a `number` in the declared schema. As the API returned string subjects during testing, it's likely this will also happen during syncs which would render the declared schema inaccurate. Depending on the situation this can be fixed in multiple ways: -* If the API changed and subject is always a string now, the declared schema should be updated to reflect this: `"subject": { "type": "string" }` -* If the API is sometimes returning subject as number of string depending on the record, the declared schema should be updated to allow both data types: `"subject": { "type": ["string","number"] }` + +- If the API changed and subject is always a string now, the declared schema should be updated to reflect this: `"subject": { "type": "string" }` +- If the API is sometimes returning subject as number of string depending on the record, the declared schema should be updated to allow both data types: `"subject": { "type": ["string","number"] }` A common situation is that certain record fields do not have any any values for the test read data, so they are set to `null`. In the detected schema, these field are of type `"null"` which is most likely not correct for all cases. In these situations, the declared schema should be manually corrected. diff --git a/docs/integrations/destinations/amazon-sqs.md b/docs/integrations/destinations/amazon-sqs.md index 04143f055ccc..6178d690e0f0 100644 --- a/docs/integrations/destinations/amazon-sqs.md +++ b/docs/integrations/destinations/amazon-sqs.md @@ -18,7 +18,7 @@ Amazon SQS messages can only contain JSON, XML or text, and this connector suppo | :--- | :--- | :--- | | Full Refresh Sync | No | | | Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | | +| Incremental - Append + Deduped | No | | | Namespaces | No | | ## Getting started @@ -33,7 +33,7 @@ Amazon SQS messages can only contain JSON, XML or text, and this connector suppo If the target SQS Queue is not public, you will need the following permissions on the Queue: -* `sqs:SendMessage` +* `sqs:SendMessage` ### Properties @@ -54,9 +54,9 @@ Required properties are 'Queue URL' and 'AWS Region' as noted in **bold** below. * Message Body Key (STRING) * Rather than sending the entire Record as the Message Body, use this property to reference a Key in the Record to use as the message body. The value of this property should be the Key name in the input Record. The key must be at the top level of the Record, nested Keys are not supported. * Message Group Id (STRING) - * When using a FIFO queue, this property is **required**. + * When using a FIFO queue, this property is **required**. * See the [AWS SQS documentation](https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/using-messagegroupid-property.html) for more detail. - + ### Setup guide * [Create IAM Keys](https://aws.amazon.com/premiumsupport/knowledge-center/create-access-key/) diff --git a/docs/integrations/destinations/azure-blob-storage.md b/docs/integrations/destinations/azure-blob-storage.md index 640a468eee65..d93f132cfb99 100644 --- a/docs/integrations/destinations/azure-blob-storage.md +++ b/docs/integrations/destinations/azure-blob-storage.md @@ -7,48 +7,49 @@ This destination writes data to Azure Blob Storage. The Airbyte Azure Blob Storage destination allows you to sync data to Azure Blob Storage. Each stream is written to its own blob under the container. ## Prerequisites + - For Airbyte Open Source users using the [Postgres](https://docs.airbyte.com/integrations/sources/postgres) source connector, [upgrade](https://docs.airbyte.com/operator-guides/upgrading-airbyte/) your Airbyte platform to version `v0.40.0-alpha` or newer and upgrade your AzureBlobStorage connector to version `0.1.6` or newer ## Sync Mode -| Feature | Support | Notes | -| :--- | :---: | :--- | -| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured blob. | -| Incremental - Append Sync | ✅ | The append mode would only work for "Append blobs" blobs as per Azure limitations, more details [https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction\#blobs](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction#blobs) | -| Incremental - Deduped History | ❌ | As this connector does not support dbt, we don't support this sync mode on this destination. | +| Feature | Support | Notes | +| :----------------------------- | :-----: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured blob. | +| Incremental - Append Sync | ✅ | The append mode would only work for "Append blobs" blobs as per Azure limitations, more details [https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction\#blobs](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction#blobs) | +| Incremental - Append + Deduped | ❌ | destination. | ## Configuration -| Parameter | Type | Notes | -|:---------------------------------------------|:-------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Parameter | Type | Notes | +| :------------------------------------------- | :-----: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | Endpoint Domain Name | string | This is Azure Blob Storage endpoint domain name. Leave default value \(or leave it empty if run container from command line\) to use Microsoft native one. | | Azure blob storage container \(Bucket\) Name | string | A name of the Azure blob storage container. If not exists - will be created automatically. If leave empty, then will be created automatically airbytecontainer+timestamp. | | Azure Blob Storage account name | string | The account's name of the Azure Blob Storage. | | The Azure blob storage account key | string | Azure blob storage account key. Example: `abcdefghijklmnopqrstuvwxyz/0123456789+ABCDEFGHIJKLMNOPQRSTUVWXYZ/0123456789%++sampleKey==`. | | Azure Blob Storage output buffer size | integer | Azure Blob Storage output buffer size, in megabytes. Example: 5 | -| Azure Blob Storage spill size | integer | Azure Blob Storage spill size, in megabytes. Example: 500. After exceeding threshold connector will create new blob with incremented sequence number 'prefix_name'_seq+1 | +| Azure Blob Storage spill size | integer | Azure Blob Storage spill size, in megabytes. Example: 500. After exceeding threshold connector will create new blob with incremented sequence number 'prefix_name'\_seq+1 | | Format | object | Format specific configuration. See below for details. | ⚠️ Please note that under "Full Refresh Sync" mode, data in the configured blob will be wiped out before each sync. We recommend you to provision a dedicated Azure Blob Storage Container resource for this sync to prevent unexpected data deletion from misconfiguration. ⚠️ ## Output Schema -Each stream will be outputted to its dedicated Blob according to the configuration. The complete datastore of each stream includes all the output files under that Blob. You can think of the Blob as equivalent of a Table in the database world. +Each stream will be outputted to its dedicated Blob according to the configuration. The complete datastore of each stream includes all the output files under that Blob. You can think of the Blob as equivalent of a Table in the database world. If stream replication exceeds configured threshold data will continue to be replicated in a new blob file for better read performance -* Under Full Refresh Sync mode, old output files will be purged before new files are created. -* Under Incremental - Append Sync mode, new output files will be added that only contain the new data. +- Under Full Refresh Sync mode, old output files will be purged before new files are created. +- Under Incremental - Append Sync mode, new output files will be added that only contain the new data. ### CSV Like most of the other Airbyte destination connectors, usually the output has three columns: a UUID, an emission timestamp, and the data blob. With the CSV output, it is possible to normalize \(flatten\) the data blob to multiple columns. -| Column | Condition | Description | -| :--- | :--- | :--- | -| `_airbyte_ab_id` | Always exists | A uuid assigned by Airbyte to each processed record. | -| `_airbyte_emitted_at` | Always exists. | A timestamp representing when the event was pulled from the data source. | -| `_airbyte_data` | When no normalization \(flattening\) is needed, all data reside under this column as a json blob. | | -| root level fields | When root level normalization \(flattening\) is selected, the root level fields are expanded. | | +| Column | Condition | Description | +| :-------------------- | :------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------- | +| `_airbyte_ab_id` | Always exists | A uuid assigned by Airbyte to each processed record. | +| `_airbyte_emitted_at` | Always exists. | A timestamp representing when the event was pulled from the data source. | +| `_airbyte_data` | When no normalization \(flattening\) is needed, all data reside under this column as a json blob. | | +| root level fields | When root level normalization \(flattening\) is selected, the root level fields are expanded. | | For example, given the following json object from a source: @@ -64,15 +65,15 @@ For example, given the following json object from a source: With no normalization, the output CSV is: -| `_airbyte_ab_id` | `_airbyte_emitted_at` | `_airbyte_data` | -| :--- | :--- | :--- | -| `26d73cde-7eb1-4e1e-b7db-a4c03b4cf206` | 1622135805000 | `{ "user_id": 123, name: { "first": "John", "last": "Doe" } }` | +| `_airbyte_ab_id` | `_airbyte_emitted_at` | `_airbyte_data` | +| :------------------------------------- | :-------------------- | :------------------------------------------------------------- | +| `26d73cde-7eb1-4e1e-b7db-a4c03b4cf206` | 1622135805000 | `{ "user_id": 123, name: { "first": "John", "last": "Doe" } }` | With root level normalization, the output CSV is: -| `_airbyte_ab_id` | `_airbyte_emitted_at` | `user_id` | `name` | -| :--- | :--- | :--- | :--- | -| `26d73cde-7eb1-4e1e-b7db-a4c03b4cf206` | 1622135805000 | 123 | `{ "first": "John", "last": "Doe" }` | +| `_airbyte_ab_id` | `_airbyte_emitted_at` | `user_id` | `name` | +| :------------------------------------- | :-------------------- | :-------- | :----------------------------------- | +| `26d73cde-7eb1-4e1e-b7db-a4c03b4cf206` | 1622135805000 | 123 | `{ "first": "John", "last": "Doe" }` | ### JSON Lines \(JSONL\) @@ -91,20 +92,20 @@ For example, given the following two json objects from a source: ```javascript [ { - "user_id": 123, - "name": { - "first": "John", - "last": "Doe" - } + user_id: 123, + name: { + first: "John", + last: "Doe", + }, }, { - "user_id": 456, - "name": { - "first": "Jane", - "last": "Roe" - } - } -] + user_id: 456, + name: { + first: "Jane", + last: "Roe", + }, + }, +]; ``` They will be like this in the output file: @@ -123,25 +124,25 @@ They will be like this in the output file: ### Setup guide -* Fill up AzureBlobStorage info - * **Endpoint Domain Name** - * Leave default value \(or leave it empty if run container from command line\) to use Microsoft native one or use your own. - * **Azure blob storage container** - * If not exists - will be created automatically. If leave empty, then will be created automatically airbytecontainer+timestamp.. - * **Azure Blob Storage account name** - * See [this](https://docs.microsoft.com/en-us/azure/storage/common/storage-account-create?tabs=azure-portal) on how to create an account. - * **The Azure blob storage account key** - * Corresponding key to the above user. - * **Format** - * Data format that will be use for a migrated data representation in blob. -* Make sure your user has access to Azure from the machine running Airbyte. - * This depends on your networking setup. - * The easiest way to verify if Airbyte is able to connect to your Azure blob storage container is via the check connection tool in the UI. +- Fill up AzureBlobStorage info + - **Endpoint Domain Name** + - Leave default value \(or leave it empty if run container from command line\) to use Microsoft native one or use your own. + - **Azure blob storage container** + - If not exists - will be created automatically. If leave empty, then will be created automatically airbytecontainer+timestamp.. + - **Azure Blob Storage account name** + - See [this](https://docs.microsoft.com/en-us/azure/storage/common/storage-account-create?tabs=azure-portal) on how to create an account. + - **The Azure blob storage account key** + - Corresponding key to the above user. + - **Format** + - Data format that will be use for a migrated data representation in blob. +- Make sure your user has access to Azure from the machine running Airbyte. + - This depends on your networking setup. + - The easiest way to verify if Airbyte is able to connect to your Azure blob storage container is via the check connection tool in the UI. ## CHANGELOG | Version | Date | Pull Request | Subject | -|:--------|:-----------|:-----------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------| +| :------ | :--------- | :--------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------- | | 0.2.0 | 2023-01-18 | [\#15318](https://github.com/airbytehq/airbyte/pull/21467) | Support spilling of objects exceeding configured size threshold | | 0.1.6 | 2022-08-08 | [\#15318](https://github.com/airbytehq/airbyte/pull/15318) | Support per-stream state | | 0.1.5 | 2022-06-16 | [\#13852](https://github.com/airbytehq/airbyte/pull/13852) | Updated stacktrace format for any trace message errors | diff --git a/docs/integrations/destinations/bigquery.md b/docs/integrations/destinations/bigquery.md index afdf228a3261..c3be27a91936 100644 --- a/docs/integrations/destinations/bigquery.md +++ b/docs/integrations/destinations/bigquery.md @@ -73,7 +73,7 @@ The BigQuery destination connector supports the following [sync modes](https://d - Full Refresh Sync - Incremental - Append Sync -- Incremental - Deduped History +- Incremental - Append + Deduped ## Output schema diff --git a/docs/integrations/destinations/cassandra.md b/docs/integrations/destinations/cassandra.md index 8ec03e961189..0b78fc28b2bd 100644 --- a/docs/integrations/destinations/cassandra.md +++ b/docs/integrations/destinations/cassandra.md @@ -19,12 +19,12 @@ contain the following columns. ### Features -| Feature | Support | Notes | -| :---------------------------- | :-----: | :------------------------------------------------------------------------------------------- | -| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured DynamoDB table. | -| Incremental - Append Sync | ✅ | | -| Incremental - Deduped History | ❌ | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | ✅ | Namespace will be used as part of the table name. | +| Feature | Support | Notes | +| :----------------------------- | :-----: | :-------------------------------------------------------------------------------------- | +| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured DynamoDB table. | +| Incremental - Append Sync | ✅ | | +| Incremental - Append + Deduped | ❌ | | +| Namespaces | ✅ | Namespace will be used as part of the table name. | ### Performance considerations diff --git a/docs/integrations/destinations/chargify.md b/docs/integrations/destinations/chargify.md index 0bab856d6b30..68fb1c2fdaa3 100644 --- a/docs/integrations/destinations/chargify.md +++ b/docs/integrations/destinations/chargify.md @@ -17,12 +17,12 @@ Each replicated stream from Airbyte will output data into a corresponding event #### Features -| Feature | Supported?\(Yes/No\) | Notes | -| :--- | :--- | :--- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | No | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | No | | ## Getting started @@ -32,8 +32,8 @@ To use the Chargify destination, you'll first need to create a [Chargify account Once you have a Chargify account, you can use the following credentials to set up the connector -* A Project ID associated with the site -* A Master API key associated with the site +- A Project ID associated with the site +- A Master API key associated with the site You can reach out to [support@chargify.com](mailto:support@chargify.com) to request your Project ID and Master API key for the Airbyte destination connector. @@ -63,9 +63,9 @@ The `Infer Timestamp` field lets you specify if you want the connector to infer Now, you should have all the parameters needed to configure Chargify destination. -* **Project ID** -* **Master API Key** -* **Infer Timestamp** +- **Project ID** +- **Master API Key** +- **Infer Timestamp** Connect your first source and then head to the Chargify application. You can seamlessly run [custom analysis](https://www.chargify.com/business-intelligence/) on your data and build [multi-attribute, usage-based pricing models](http://chargify.com/events-based-billing/). @@ -73,8 +73,8 @@ If you have any questions or want to get started, [please reach out to a billing ## CHANGELOG -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.2.2 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | -| 0.2.0 | 2021-09-10 | [\#5973](https://github.com/airbytehq/airbyte/pull/5973) | Fix timestamp inference for complex schemas | -| 0.1.0 | 2021-08-18 | [\#5339](https://github.com/airbytehq/airbyte/pull/5339) | Chargify Destination Release! | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :------------------------------------------- | +| 0.2.2 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | +| 0.2.0 | 2021-09-10 | [\#5973](https://github.com/airbytehq/airbyte/pull/5973) | Fix timestamp inference for complex schemas | +| 0.1.0 | 2021-08-18 | [\#5339](https://github.com/airbytehq/airbyte/pull/5339) | Chargify Destination Release! | diff --git a/docs/integrations/destinations/clickhouse.md b/docs/integrations/destinations/clickhouse.md index a0612c306d30..75da81407f48 100644 --- a/docs/integrations/destinations/clickhouse.md +++ b/docs/integrations/destinations/clickhouse.md @@ -2,12 +2,12 @@ ## Features -| Feature | Supported?\(Yes/No\) | Notes | -| :---------------------------- | :------------------- | :---- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | Yes | | -| Namespaces | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | Yes | | +| Namespaces | Yes | | #### Output Schema diff --git a/docs/integrations/destinations/csv.md b/docs/integrations/destinations/csv.md index 77ffadec8e70..4cc00f440c79 100644 --- a/docs/integrations/destinations/csv.md +++ b/docs/integrations/destinations/csv.md @@ -22,18 +22,18 @@ Please make sure that Docker Desktop has access to `/tmp` (and `/private` on a M Each stream will be output into its own file. Each file will contain 3 columns: -* `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. -* `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. -* `_airbyte_data`: a json blob representing with the event data. +- `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. +- `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. +- `_airbyte_data`: a json blob representing with the event data. #### Features -| Feature | Supported | | -| :--- | :--- | :--- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | No | | +| Feature | Supported | | +| :----------------------------- | :-------- | :-- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | No | | #### Performance considerations @@ -49,13 +49,13 @@ The local mount is mounted by Docker onto `LOCAL_ROOT`. This means the `/local` ### Example: -* If `destination_path` is set to `/local/cars/models` -* the local mount is using the `/tmp/airbyte_local` default -* then all data will be written to `/tmp/airbyte_local/cars/models` directory. +- If `destination_path` is set to `/local/cars/models` +- the local mount is using the `/tmp/airbyte_local` default +- then all data will be written to `/tmp/airbyte_local/cars/models` directory. ## Access Replicated Data Files -If your Airbyte instance is running on the same computer that you are navigating with, you can open your browser and enter [file:///tmp/airbyte\_local](file:///tmp/airbyte_local) to look at the replicated data locally. If the first approach fails or if your Airbyte instance is running on a remote server, follow the following steps to access the replicated files: +If your Airbyte instance is running on the same computer that you are navigating with, you can open your browser and enter [file:///tmp/airbyte_local](file:///tmp/airbyte_local) to look at the replicated data locally. If the first approach fails or if your Airbyte instance is running on a remote server, follow the following steps to access the replicated files: 1. Access the scheduler container using `docker exec -it airbyte-server bash` 2. Navigate to the default local mount using `cd /tmp/airbyte_local` @@ -74,7 +74,7 @@ Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :------------------------------------------------------------------------------ | | 1.0.0 | 2022-12-20 | [17998](https://github.com/airbytehq/airbyte/pull/17998) | Breaking changes: non backwards compatible. Adds delimiter dropdown. | | 0.2.10 | 2022-06-20 | [13932](https://github.com/airbytehq/airbyte/pull/13932) | Merging published connector changes | | 0.2.9 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add ExitOnOutOfMemoryError to java connectors and bump versions | @@ -95,4 +95,3 @@ Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have | 0.1.2 | 2020-11-18 | [998](https://github.com/airbytehq/airbyte/pull/998) | Adding incremental to the data model | | 0.1.1 | 2020-11-10 | [895](https://github.com/airbytehq/airbyte/pull/895) | bump versions: all destinations and source exchange rate | | 0.1.0 | 2020-10-21 | [676](https://github.com/airbytehq/airbyte/pull/676) | Integrations Reorganization: Connectors | - diff --git a/docs/integrations/destinations/cumulio.md b/docs/integrations/destinations/cumulio.md index 4b73c2facce9..9e554b682bc3 100644 --- a/docs/integrations/destinations/cumulio.md +++ b/docs/integrations/destinations/cumulio.md @@ -1,14 +1,10 @@ - - # Cumul.io - ## General -The Airbyte Cumul.io destination connector allows you to stream data into Cumul.io from [any Airbyte Source](https://airbyte.io/connectors?connector-type=Sources). - -Cumul.io is an **[Embedded analytics SaaS solution](https://cumul.io/product/embedded-analytics)** that enables other SaaS companies to grow with an **engaging customer analytics experience**, seamlessly embedded in their product. Cumul.io's intuitive, low-code interface empowers business users with insight-driven actions in record time **without straining engineering resources from the core product**. +The Airbyte Cumul.io destination connector allows you to stream data into Cumul.io from [any Airbyte Source](https://airbyte.io/connectors?connector-type=Sources). +Cumul.io is an **[Embedded analytics SaaS solution](https://cumul.io/product/embedded-analytics)** that enables other SaaS companies to grow with an **engaging customer analytics experience**, seamlessly embedded in their product. Cumul.io's intuitive, low-code interface empowers business users with insight-driven actions in record time **without straining engineering resources from the core product**. ## Getting started @@ -16,83 +12,86 @@ In order to use the Cumul.io destination, you'll first need to **create a [Cumul After logging in to Cumul.io, you can **generate an API key and token** in your [Profile -> API Tokens](https://app.cumul.io/start/profile/integration). To set up the destination connector in Airbyte, you'll need to provide the following Cumul.io properties: -* "**Cumul.io API Host URL**": the API host URL for the **Cumul.io environment** where your **Cumul.io account resides** (i.e. `https://api.cumul.io` for EU multi-tenant users, `https://api.us.cumul.io/` for US multi-tenant users, or a VPC-specific address). This property depends on the environment in which your Cumul.io account was created (e.g. if you have signed up via https://app.us.cumul.io/signup, the API host URL would be `https://api.us.cumul.io/`). -* "**Cumul.io API key**": a Cumul.io API key (see above how to generate an API key-token pair) -* "**Cumul.io API token**": the corresponding Cumul.io API token (see above how to generate an API key-token pair) +- "**Cumul.io API Host URL**": the API host URL for the **Cumul.io environment** where your **Cumul.io account resides** (i.e. `https://api.cumul.io` for EU multi-tenant users, `https://api.us.cumul.io/` for US multi-tenant users, or a VPC-specific address). This property depends on the environment in which your Cumul.io account was created (e.g. if you have signed up via https://app.us.cumul.io/signup, the API host URL would be `https://api.us.cumul.io/`). +- "**Cumul.io API key**": a Cumul.io API key (see above how to generate an API key-token pair) +- "**Cumul.io API token**": the corresponding Cumul.io API token (see above how to generate an API key-token pair) -As soon as you've connected a source and the **first stream synchronization** has **succeeded**, the desired **Dataset(s)** will be **available in Cumul.io to build dashboards on** (Cumul.io's ["Getting started" Academy course](https://academy.cumul.io/course/a0bf5530-edfb-441e-901b-e1fcb95dfac7) might be interesting to get familiar with its platform). +As soon as you've connected a source and the **first stream synchronization** has **succeeded**, the desired **Dataset(s)** will be **available in Cumul.io to build dashboards on** (Cumul.io's ["Getting started" Academy course](https://academy.cumul.io/course/a0bf5530-edfb-441e-901b-e1fcb95dfac7) might be interesting to get familiar with its platform). Depending on the **synchronization mode** set up, the **next synchronizations** will either **replace/append data in/to these datasets**! -*If you have any questions or want to get started with Cumul.io, don't hesitate to reach out via [our contact page](https://cumul.io/contact).* - +_If you have any questions or want to get started with Cumul.io, don't hesitate to reach out via [our contact page](https://cumul.io/contact)._ ## Connector overview ### Sync modes support -| [Sync modes](https://docs.airbyte.com/understanding-airbyte/connections/#sync-modes) | Supported?\(Yes/No\) | Notes | -| :--- | :--- | :--- | -| [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append/) | Yes | / | -| [Full Refresh - Replace](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) | Yes | / | -| [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append/) | Yes | / | -| [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) | No | Cumul.io's data warehouse does not support dbt (yet). | + +| [Sync modes](https://docs.airbyte.com/understanding-airbyte/connections/#sync-modes) | Supported?\(Yes/No\) | Notes | +| :---------------------------------------------------------------------------------------------------------------------- | :------------------- | :---------------------------------------------------- | +| [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append/) | Yes | / | +| [Full Refresh - Replace](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) | Yes | / | +| [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append/) | Yes | / | +| [Incremental - Append + Deduped ](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) | No | Cumul.io's data warehouse does not support dbt (yet). | ### Airbyte Features support -| Feature | Supported?\(Yes/No\) | Notes | -| :--- | :--- | :--- | -| [Namespaces](https://docs.airbyte.com/understanding-airbyte/namespaces/) | Yes | (***Highly recommended***) A **concatenation of the namespace and stream name** will be used as a unique identifier for the related Cumul.io dataset (using [Tags](https://academy.cumul.io/article/mam7lkdt)) and ensures next synchronizations can target the same dataset. Use this property to **ensure identically named destination streams** from different connections **do not coincide**!| -| [Reset data](https://docs.airbyte.com/operator-guides/reset) | Yes | **Existing data** in a dataset is **not deleted** upon resetting a stream in Airbyte, however the next synchronization batch will replace all existing data. This ensures that the dataset is never empty (e.g. upon disabling the synchronization), which would otherwise result in "No data" upon querying it.| + +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------------------------------------------------- | :------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [Namespaces](https://docs.airbyte.com/understanding-airbyte/namespaces/) | Yes | (**_Highly recommended_**) A **concatenation of the namespace and stream name** will be used as a unique identifier for the related Cumul.io dataset (using [Tags](https://academy.cumul.io/article/mam7lkdt)) and ensures next synchronizations can target the same dataset. Use this property to **ensure identically named destination streams** from different connections **do not coincide**! | +| [Reset data](https://docs.airbyte.com/operator-guides/reset) | Yes | **Existing data** in a dataset is **not deleted** upon resetting a stream in Airbyte, however the next synchronization batch will replace all existing data. This ensures that the dataset is never empty (e.g. upon disabling the synchronization), which would otherwise result in "No data" upon querying it. | ### Airbyte data types support -| [Airbyte data types](https://docs.airbyte.com/understanding-airbyte/supported-data-types#the-types) | Remarks | -| :--- | :--- | -| Array & Object | To support a limited amount of insights, this connector will **stringify data values with type `Array` or `Object`** ([recommended by Airbyte](https://docs.airbyte.com/understanding-airbyte/supported-data-types/#unsupported-types)) as Cumul.io does not support storing nor querying such data types. For analytical purposes, it's always recommended to **unpack these values in different rows or columns** (depending on the use-case) before pushing the data to Cumul.io!| -| Time with(out) timezone | While these values **will be stored as-is** in Cumul.io, they should be interpreted as `hierarchy`* (i.e. text/string, see [Cumul.io's data types Academy article](https://academy.cumul.io/article/p68253bn)). Alternatively, you could either **provide a (default) date and timezone** for these values, or **unpack them in different columns** (e.g. `hour`, `minute`, `second` columns), before pushing the data to Cumul.io.| -| Timestamp without timezone | Cumul.io **does not support storing dates without timestamps**, these timestamps will be **interpreted as UTC date values**.| -| Number & Integer data types with NaN, Infinity, -Infinity values | While these values **will be stored as-is** in Cumul.io, they will not support numeric aggregations such as sum, avg, etc. (*using such aggregations on these values likely causes unexpected behavior*). Ideally, such values are **converted into meaningful values** (e.g. no value, 0, a specific value, etc.) before pushing the data to Cumul.io. | -| Boolean | Boolean values **will be stringified** ([recommended by Airbyte](https://docs.airbyte.com/understanding-airbyte/supported-data-types/#unsupported-types)) and result in a hierarchy column type (i.e. text/string, see [Cumul.io's data types Academy article](https://academy.cumul.io/article/p68253bn)). You could use Cumul.io's hierarchy translation (see [this Academy article](https://academy.cumul.io/article/dqgn0316)) to assign translations to `true` and `false` that are meaningful to the business user in the column's context. | -| All other data types | Should be supported and correctly interpreted by Cumul.io's Data API service*. | +| [Airbyte data types](https://docs.airbyte.com/understanding-airbyte/supported-data-types#the-types) | Remarks | +| :-------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| Array & Object | To support a limited amount of insights, this connector will **stringify data values with type `Array` or `Object`** ([recommended by Airbyte](https://docs.airbyte.com/understanding-airbyte/supported-data-types/#unsupported-types)) as Cumul.io does not support storing nor querying such data types. For analytical purposes, it's always recommended to **unpack these values in different rows or columns** (depending on the use-case) before pushing the data to Cumul.io! | +| Time with(out) timezone | While these values **will be stored as-is** in Cumul.io, they should be interpreted as `hierarchy`\* (i.e. text/string, see [Cumul.io's data types Academy article](https://academy.cumul.io/article/p68253bn)). Alternatively, you could either **provide a (default) date and timezone** for these values, or **unpack them in different columns** (e.g. `hour`, `minute`, `second` columns), before pushing the data to Cumul.io. | +| Timestamp without timezone | Cumul.io **does not support storing dates without timestamps**, these timestamps will be **interpreted as UTC date values**. | +| Number & Integer data types with NaN, Infinity, -Infinity values | While these values **will be stored as-is** in Cumul.io, they will not support numeric aggregations such as sum, avg, etc. (_using such aggregations on these values likely causes unexpected behavior_). Ideally, such values are **converted into meaningful values** (e.g. no value, 0, a specific value, etc.) before pushing the data to Cumul.io. | +| Boolean | Boolean values **will be stringified** ([recommended by Airbyte](https://docs.airbyte.com/understanding-airbyte/supported-data-types/#unsupported-types)) and result in a hierarchy column type (i.e. text/string, see [Cumul.io's data types Academy article](https://academy.cumul.io/article/p68253bn)). You could use Cumul.io's hierarchy translation (see [this Academy article](https://academy.cumul.io/article/dqgn0316)) to assign translations to `true` and `false` that are meaningful to the business user in the column's context. | +| All other data types | Should be supported and correctly interpreted by Cumul.io's Data API service\*. | -**Note: It might be that Cumul.io's automatic typing could initially interpret this type of data wrongly due to its format (see `Possible future improvements` below), you could then alter the column type in the Cumul.io UI to try changing it manually.* +\*_Note: It might be that Cumul.io's automatic typing could initially interpret this type of data wrongly due to its format (see `Possible future improvements` below), you could then alter the column type in the Cumul.io UI to try changing it manually._ ### Output schema in Cumul.io + Each replicated stream from Airbyte will output data into a corresponding dataset in Cumul.io. Each dataset will **initially** have an **`Airbyte - ` English name** which can be **further adapted in Cumul.io's UI**, or even [via API](https://developer.cumul.io/#dashboard_update). If the request of pushing a batch of data fails, the connector will gracefully retry pushing the batch up to three times, with a backoff interval of 5 minutes, 10 minutes, and 20 minutes, respectively. The connector will **associate one or more of the following tags to each dataset**: -* `[AIRBYTE - DO NOT DELETE] - `: this tag will be **used to retrieve the dataset ID and its current columns** from Cumul.io, and will be associated with the dataset after the first batch of data is written to a new dataset. -* `[AIRBYTE - DO NOT DELETE] - REPLACE DATA`: this tag will be **associated to a dataset** when it should be "resetted" (i.e. the **existing data should be replaced**, see `Feature` -> `Reset data` above). The first batch of data of the next synchronization will replace all existing data if this tag is present on a dataset. -As noted in the tag name, it is important to **never remove such tags from the dataset(s) nor manually set them** on other datasets. Doing so might break existing or new synchronizations! +- `[AIRBYTE - DO NOT DELETE] - `: this tag will be **used to retrieve the dataset ID and its current columns** from Cumul.io, and will be associated with the dataset after the first batch of data is written to a new dataset. +- `[AIRBYTE - DO NOT DELETE] - REPLACE DATA`: this tag will be **associated to a dataset** when it should be "resetted" (i.e. the **existing data should be replaced**, see `Feature` -> `Reset data` above). The first batch of data of the next synchronization will replace all existing data if this tag is present on a dataset. +As noted in the tag name, it is important to **never remove such tags from the dataset(s) nor manually set them** on other datasets. Doing so might break existing or new synchronizations! ## Data recommendations -### Data structure +### Data structure + To ensure the most performant queries, we recommend to **denormalize your data as much as possible beforehand** (this ensures that the least amount of joins are required to achieve your desired insights). Denormalized datasets also ensure that they can be easily consumed by less technical users, who often do not understand relations between tables! Instead of denormalizing your datasets to specific insights, it is recommended to **set up one or more dimensional data models** that support all kinds of slicing and dicing within a dashboard: this ensures a **flexible & scalable setup** which is **easy-to-understand and performant-to-query**! This Cumul.io blog post goes into more detail on why customer-facing analytics requires a simple data model: https://blog.cumul.io/2022/12/07/why-a-dimensional-data-model-for-embedded-analytics/. ### Pushing data -Cumul.io uses an **OLAP database** to **ensure the most performant concurrent "Read" queries** on large amounts of data. OLAP databases, such as Cumul.io's database, are however often less suitable for a lot of "Write" queries with small amounts of data. -To ensure the best performance when writing data, we **recommend synchronizing larger amounts of data less frequently** rather than *smaller amounts of data more frequently*! +Cumul.io uses an **OLAP database** to **ensure the most performant concurrent "Read" queries** on large amounts of data. OLAP databases, such as Cumul.io's database, are however often less suitable for a lot of "Write" queries with small amounts of data. +To ensure the best performance when writing data, we **recommend synchronizing larger amounts of data less frequently** rather than _smaller amounts of data more frequently_! ## Possible future improvements -* In case of many concurrent synchronizations, the following issues might arise at one point (not evaluated yet): - * The combination of all write buffers' data could cause memory overload, in that case it might be interesting to alter the flush rate by changing the `flush_interval` variable in `destination_cumulio/writer.py` (currently set to 10 000, which is the maximum amount of data points that can be sent via Cumul.io's Data API service in a single request, see note [here](https://developer.cumul.io/#data_create)). We do recommend keeping the `flush_interval` value **as high as possible** to ensure the least amount of total overhead on all batches pushed! - * Having more than 200 concurrent Airbyte connections flushing the data simultaneously, and using the same Cumul.io API key and token for each connection, might run into [Cumul.io's API Rate limit](https://developer.cumul.io/#core_api_ratelimiting). As this will rarely occur due to Cumul.io's burstable rate limit, we recommend using separate API key and tokens for identical destination connectors in case you would expect such concurrency. Note that synchronizing multiple streams in a single connection will happen sequentially and thus not run into the rate limit. -* The current connector will not take into account the Airbyte source data types, instead Cumul.io's API will automatically detect column types based on a random data sample. If Cumul.io's detected data type is not as desired, it's possible to alter the column's type via Cumul.io's UI to manually change the column type (e.g. if a `VARCHAR` column would only contain numeric values, it could initially be interpreted as a `numeric` column in Cumul.io but can at any point be changed to `hierarchy` if more appropriate). - * As a future improvement, it is possible to: - 1. Create a new dataset - [Create Dataset API Documentation](https://developer.cumul.io/#dataset_create) - 2. Create the appropriate tag (`[AIRBYTE - DO NOT DELETE] - `) and associate it with the newly created dataset (in `destination_cumulio/client.py`, a method `_validate_tag_dataset_id_association(stream_name, dataset_id)` is defined which could be used for this step) - 3. Create each column with the correct Cumul.io type - [Create Column API Documentation](https://developer.cumul.io/#column_create) - 4. Associate each column with the dataset - [Associate Dataset Column API Documentation](https://developer.cumul.io/#column_assoc_dataset) - 5. From there on out, you can replace/append data for this dataset based on the tag (already implemented). +- In case of many concurrent synchronizations, the following issues might arise at one point (not evaluated yet): + - The combination of all write buffers' data could cause memory overload, in that case it might be interesting to alter the flush rate by changing the `flush_interval` variable in `destination_cumulio/writer.py` (currently set to 10 000, which is the maximum amount of data points that can be sent via Cumul.io's Data API service in a single request, see note [here](https://developer.cumul.io/#data_create)). We do recommend keeping the `flush_interval` value **as high as possible** to ensure the least amount of total overhead on all batches pushed! + - Having more than 200 concurrent Airbyte connections flushing the data simultaneously, and using the same Cumul.io API key and token for each connection, might run into [Cumul.io's API Rate limit](https://developer.cumul.io/#core_api_ratelimiting). As this will rarely occur due to Cumul.io's burstable rate limit, we recommend using separate API key and tokens for identical destination connectors in case you would expect such concurrency. Note that synchronizing multiple streams in a single connection will happen sequentially and thus not run into the rate limit. +- The current connector will not take into account the Airbyte source data types, instead Cumul.io's API will automatically detect column types based on a random data sample. If Cumul.io's detected data type is not as desired, it's possible to alter the column's type via Cumul.io's UI to manually change the column type (e.g. if a `VARCHAR` column would only contain numeric values, it could initially be interpreted as a `numeric` column in Cumul.io but can at any point be changed to `hierarchy` if more appropriate). + - As a future improvement, it is possible to: + 1. Create a new dataset - [Create Dataset API Documentation](https://developer.cumul.io/#dataset_create) + 2. Create the appropriate tag (`[AIRBYTE - DO NOT DELETE] - `) and associate it with the newly created dataset (in `destination_cumulio/client.py`, a method `_validate_tag_dataset_id_association(stream_name, dataset_id)` is defined which could be used for this step) + 3. Create each column with the correct Cumul.io type - [Create Column API Documentation](https://developer.cumul.io/#column_create) + 4. Associate each column with the dataset - [Associate Dataset Column API Documentation](https://developer.cumul.io/#column_assoc_dataset) + 5. From there on out, you can replace/append data for this dataset based on the tag (already implemented). ## CHANGELOG -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.1.0 | 2023-02-16 | | Initial release of Cumul.io's Destination connector | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :----------- | :-------------------------------------------------- | +| 0.1.0 | 2023-02-16 | | Initial release of Cumul.io's Destination connector | diff --git a/docs/integrations/destinations/databricks.md b/docs/integrations/destinations/databricks.md index 1b1d89f94eb2..c8a6a0516d49 100644 --- a/docs/integrations/destinations/databricks.md +++ b/docs/integrations/destinations/databricks.md @@ -11,20 +11,25 @@ Currently, this connector requires 30+MB of memory for each stream. When syncing ## Getting started ## Databricks AWS Setup + ### 1. Create a Databricks Workspace + - Follow Databricks guide [Create a workspace using the account console](https://docs.databricks.com/administration-guide/workspace/create-workspace.html#create-a-workspace-using-the-account-console). -> **_IMPORTANT:_** Don't forget to create a [cross-account IAM role](https://docs.databricks.com/administration-guide/cloud-configurations/aws/iam-role.html#create-a-cross-account-iam-role) for workspaces + > **_IMPORTANT:_** Don't forget to create a [cross-account IAM role](https://docs.databricks.com/administration-guide/cloud-configurations/aws/iam-role.html#create-a-cross-account-iam-role) for workspaces > **_TIP:_** Alternatively use Databricks quickstart for new workspace > ![](../../.gitbook/assets/destination/databricks/databricks_workspace_quciksetup.png) ### 2. Create a metastore and attach it to workspace + > **_IMPORTANT:_** The metastore should be in the same region as the workspaces you want to use to access the data. Make sure that this matches the region of the cloud storage bucket you created earlier. #### Setup storage bucket and IAM role in AWS - Follow [Configure a storage bucket and IAM role in AWS](https://docs.databricks.com/data-governance/unity-catalog/get-started.html#configure-a-storage-bucket-and-iam-role-in-aws) to setup AWS bucket with necessary permissions. + +Follow [Configure a storage bucket and IAM role in AWS](https://docs.databricks.com/data-governance/unity-catalog/get-started.html#configure-a-storage-bucket-and-iam-role-in-aws) to setup AWS bucket with necessary permissions. #### Create metastore + - Login into Databricks [account console](https://accounts.cloud.databricks.com/login) with admin permissions. - Go to Data tab and hit Create metastore button: @@ -32,14 +37,17 @@ Currently, this connector requires 30+MB of memory for each stream. When syncing - Provide all necessary data and click Create: - ![](../../.gitbook/assets/destination/databricks/databrikcs_metastore_fields.png) + ![](../../.gitbook/assets/destination/databricks/databrikcs_metastore_fields.png) + - `Name` - `Region` The metastore should be in same region as the workspace. - `S3 bucket path` created at [Setup storage bucket and IAM role in AWS](#setup-storage-bucket-and-iam-role-in-aws) step. - `IAM role ARN` created at [Setup storage bucket and IAM role in AWS](#setup-storage-bucket-and-iam-role-in-aws) step. Example: `arn:aws:iam:::role/` + - Select the workspaces in `Assign to workspaces` tab and click Assign. ### 3. Create Databricks SQL Warehouse + > **_TIP:_** If you use Databricks cluster skip this step - Open the workspace tab and click on created workspace console: @@ -55,6 +63,7 @@ Currently, this connector requires 30+MB of memory for each stream. When syncing - After SQL warehouse was created we can it's Connection details to con ### 4. Databricks SQL Warehouse connection details + > **_TIP:_** If you use Databricks cluster skip this step - Open workspace console. @@ -69,6 +78,7 @@ Currently, this connector requires 30+MB of memory for each stream. When syncing > **_IMPORTANT:_** `Server hostname`, `Port`, `HTTP path` are used for Airbyte connection ### 5. Create Databricks Cluster + > **_TIP:_** If you use Databricks SQL Warehouse skip this step - Open the workspace tab and click on created workspace console: @@ -79,11 +89,12 @@ Currently, this connector requires 30+MB of memory for each stream. When syncing ![](../../.gitbook/assets/destination/databricks/databrick_new_cluster.png) - - Switch to Data science & Engineering - - Click New button - - Choose Cluster + - Switch to Data science & Engineering + - Click New button + - Choose Cluster ### 6. Databricks Cluster connection details + > **_TIP:_** If you use Databricks SQL Warehouse skip this step - Open workspace console. @@ -94,9 +105,11 @@ Currently, this connector requires 30+MB of memory for each stream. When syncing - Open Advanced options under Configuration, choose JDBC/ODBC tab: ![](../../.gitbook/assets/destination/databricks/databricks_cluster_connection_details2.png) -> **_IMPORTANT:_** `Server hostname`, `Port`, `HTTP path` are used for Airbyte connection + + > **_IMPORTANT:_** `Server hostname`, `Port`, `HTTP path` are used for Airbyte connection ### 7. Create Databricks Token + - Open workspace console. - Open User Settings, go to Access tokens tab and click Generate new token: @@ -109,6 +122,7 @@ Currently, this connector requires 30+MB of memory for each stream. When syncing > **_TIP:_** `Lifetime` can be set to `0` ### 8. Adding External Locations (Optional) + > **_TIP:_** Skip this step if no external data source is used. - Open workspace console. @@ -123,18 +137,22 @@ Currently, this connector requires 30+MB of memory for each stream. When syncing > **_TIP:_** The new `Storage credential` can be added in the `Storage Credentials` tab or use same as for Metastore. ## Airbyte Setup + ### Databricks fields -- `Agree to the Databricks JDBC Driver Terms & Conditions` - [Databricks JDBC ODBC driver license](https://www.databricks.com/legal/jdbc-odbc-driver-license). -- `Server Hostname` - can be taken from [4. Databricks SQL Warehouse connection details](#4-databricks-sql-warehouse-connection-details) or [6. Databricks Cluster connection details](#6-databricks-cluster-connection-details) steps. -- `HTTP Path` - can be taken from [4. Databricks SQL Warehouse connection details](#4-databricks-sql-warehouse-connection-details) or [6. Databricks Cluster connection details](#6-databricks-cluster-connection-details) steps. -- `Port` - can be taken from [4. Databricks SQL Warehouse connection details](#4-databricks-sql-warehouse-connection-details) or [6. Databricks Cluster connection details](#6-databricks-cluster-connection-details) steps. + +- `Agree to the Databricks JDBC Driver Terms & Conditions` - [Databricks JDBC ODBC driver license](https://www.databricks.com/legal/jdbc-odbc-driver-license). +- `Server Hostname` - can be taken from [4. Databricks SQL Warehouse connection details](#4-databricks-sql-warehouse-connection-details) or [6. Databricks Cluster connection details](#6-databricks-cluster-connection-details) steps. +- `HTTP Path` - can be taken from [4. Databricks SQL Warehouse connection details](#4-databricks-sql-warehouse-connection-details) or [6. Databricks Cluster connection details](#6-databricks-cluster-connection-details) steps. +- `Port` - can be taken from [4. Databricks SQL Warehouse connection details](#4-databricks-sql-warehouse-connection-details) or [6. Databricks Cluster connection details](#6-databricks-cluster-connection-details) steps. - `Access Token` - can be taken from [7. Create Databricks Token](#7-create-databricks-token) step. ### Data Source + You could choose a data source type - - Managed tables - - Amazon S3 (External storage) - - Azure Blob Storage (External storage) + +- Managed tables +- Amazon S3 (External storage) +- Azure Blob Storage (External storage) #### Managed tables data source type @@ -143,13 +161,15 @@ Please check Databricks documentation about [What is managed tables](https://doc > **_TIP:_** There is no addition setup should be done for this type. #### Amazon S3 data source type (External storage) + > **_IMPORTANT:_** Make sure the `External Locations` has been added to the workspace. Check [Adding External Locations](#8-adding-external-locations-optional) step. Provide your Amazon S3 data: + - `S3 Bucket Name` - The bucket name - `S3 Bucket Path` - Subdirectory under the above bucket to sync the data into - `S3 Bucket Region` - See [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions) for all region codes. -> **_IMPORTANT:_** The metastore should be in the same region as the workspaces you want to use to access the data. Make sure that this matches the region of the cloud storage bucket you created earlier. + > **_IMPORTANT:_** The metastore should be in the same region as the workspaces you want to use to access the data. Make sure that this matches the region of the cloud storage bucket you created earlier. - `S3 Access Key ID` - Corresponding key to the above key id - `S3 Secret Access Key` - - See [this](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) on how to generate an access key. @@ -157,45 +177,47 @@ Provide your Amazon S3 data: - `S3 Filename pattern` - The pattern allows you to set the file-name format for the S3 staging file(s), next placeholders combinations are currently supported: {date}, {date:yyyy_MM}, {timestamp}, {timestamp:millis}, {timestamp:micros}, {part_number}, {sync_id}, {format_extension}. Please, don't use empty space and not supportable placeholders, as they won't be recognized #### Azure Blob Storage data source type (External storage) + > **_IMPORTANT:_** The work in progress. ## Sync Mode -| Feature | Support | Notes | -| :--- | :---: | :--- | -| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | -| Incremental - Append Sync | ✅ | | -| Incremental - Deduped History | ❌ | | -| Namespaces | ✅ | | +| Feature | Support | Notes | +| :----------------------------- | :-----: | :----------------------------------------------------------------------------------- | +| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | +| Incremental - Append Sync | ✅ | | +| Incremental - Append + Deduped | ❌ | | +| Namespaces | ✅ | | ## Configuration -| Category | Parameter | Type | Notes | -|:--------------------|:------------------------|:-------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Databricks | Server Hostname | string | Required. Example: `abc-12345678-wxyz.cloud.databricks.com`. See [documentation](https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url). Please note that this is the server for the Databricks Cluster. It is different from the SQL Endpoint Cluster. | -| | HTTP Path | string | Required. Example: `sql/protocolvx/o/1234567489/0000-1111111-abcd90`. See [documentation](https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url). | -| | Port | string | Optional. Default to "443". See [documentation](https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url). | -| | Personal Access Token | string | Required. Example: `dapi0123456789abcdefghij0123456789AB`. See [documentation](https://docs.databricks.com/sql/user/security/personal-access-tokens.html). | -| General | Databricks catalog | string | Optional. The name of the catalog. If not specified otherwise, the "hive_metastore" will be used. | -| | Database schema | string | Optional. The default schema tables are written. If not specified otherwise, the "default" will be used. -| | Schema evolution | boolean | Optional. The connector enables automatic schema evolution in the destination tables. | -| | Purge Staging Data | boolean | The connector creates staging files and tables on S3 or Azure. By default, they will be purged when the data sync is complete. Set it to `false` for debugging purposes. | -| Data Source - S3 | Bucket Name | string | Name of the bucket to sync data into. | -| | Bucket Path | string | Subdirectory under the above bucket to sync the data into. | -| | Region | string | See [documentation](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions) for all region codes. | -| | Access Key ID | string | AWS/Minio credential. | -| | Secret Access Key | string | AWS/Minio credential. | -| | S3 Filename pattern | string | The pattern allows you to set the file-name format for the S3 staging file(s), next placeholders combinations are currently supported: {date}, {date:yyyy_MM}, {timestamp}, {timestamp:millis}, {timestamp:micros}, {part_number}, {sync_id}, {format_extension}. Please, don't use empty space and not supportable placeholders, as they won't recognized. | -| Data Source - Azure | Account Name | string | Name of the account to sync data into. | -| | Container Name | string | Container under the above account to sync the data into. | -| | SAS token | string | Shared-access signature token for the above account. | -| | Endpoint domain name | string | Usually blob.core.windows.net. | +| Category | Parameter | Type | Notes | +| :------------------ | :-------------------- | :-----: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Databricks | Server Hostname | string | Required. Example: `abc-12345678-wxyz.cloud.databricks.com`. See [documentation](https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url). Please note that this is the server for the Databricks Cluster. It is different from the SQL Endpoint Cluster. | +| | HTTP Path | string | Required. Example: `sql/protocolvx/o/1234567489/0000-1111111-abcd90`. See [documentation](https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url). | +| | Port | string | Optional. Default to "443". See [documentation](https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url). | +| | Personal Access Token | string | Required. Example: `dapi0123456789abcdefghij0123456789AB`. See [documentation](https://docs.databricks.com/sql/user/security/personal-access-tokens.html). | +| General | Databricks catalog | string | Optional. The name of the catalog. If not specified otherwise, the "hive_metastore" will be used. | +| | Database schema | string | Optional. The default schema tables are written. If not specified otherwise, the "default" will be used. | +| | Schema evolution | boolean | Optional. The connector enables automatic schema evolution in the destination tables. | +| | Purge Staging Data | boolean | The connector creates staging files and tables on S3 or Azure. By default, they will be purged when the data sync is complete. Set it to `false` for debugging purposes. | +| Data Source - S3 | Bucket Name | string | Name of the bucket to sync data into. | +| | Bucket Path | string | Subdirectory under the above bucket to sync the data into. | +| | Region | string | See [documentation](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions) for all region codes. | +| | Access Key ID | string | AWS/Minio credential. | +| | Secret Access Key | string | AWS/Minio credential. | +| | S3 Filename pattern | string | The pattern allows you to set the file-name format for the S3 staging file(s), next placeholders combinations are currently supported: {date}, {date:yyyy_MM}, {timestamp}, {timestamp:millis}, {timestamp:micros}, {part_number}, {sync_id}, {format_extension}. Please, don't use empty space and not supportable placeholders, as they won't recognized. | +| Data Source - Azure | Account Name | string | Name of the account to sync data into. | +| | Container Name | string | Container under the above account to sync the data into. | +| | SAS token | string | Shared-access signature token for the above account. | +| | Endpoint domain name | string | Usually blob.core.windows.net. | ⚠️ Please note that under "Full Refresh Sync" mode, data in the configured bucket and path will be wiped out before each sync. We recommend you provision a dedicated S3 or Azure resource for this sync to prevent unexpected data deletion from misconfiguration. ⚠️ ## Staging Files (Delta Format) ### S3 + Data streams are first written as staging delta-table ([Parquet](https://parquet.apache.org/) + [Transaction Log](https://databricks.com/blog/2019/08/21/diving-into-delta-lake-unpacking-the-transaction-log.html)) files on S3, and then loaded into Databricks delta-tables. All the staging files will be deleted after the sync is done. For debugging purposes, here is the full path for a staging file: ```text @@ -215,6 +237,7 @@ s3://testing_bucket/data_output_path/98c450be-5b1c-422d-b8b5-6ca9903727d9/users/ ``` ### Azure + Similarly, streams are first written to a staging location, but the Azure option uses CSV format. A staging table is created from the CSV files. ## Unmanaged Spark SQL Table @@ -241,44 +264,46 @@ In Azure, the full path of each data stream is: ```text abfss://@.dfs.core.windows.net// ``` + Please keep these data directories on S3/Azure. Otherwise, the corresponding tables will have no data in Databricks. ## Output Schema Each table will have the following columns: -| Column | Type | Notes | -| :--- | :---: | :--- | -| `_airbyte_ab_id` | string | UUID. | -| `_airbyte_emitted_at` | timestamp | Data emission timestamp. | -| Data fields from the source stream | various | All fields in the staging files will be expanded in the table. | +| Column | Type | Notes | +| :--------------------------------- | :-------: | :------------------------------------------------------------- | +| `_airbyte_ab_id` | string | UUID. | +| `_airbyte_emitted_at` | timestamp | Data emission timestamp. | +| Data fields from the source stream | various | All fields in the staging files will be expanded in the table. | Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record, and finally the Avro record is outputted to the Parquet format. Because the data stream can come from any data source, the Json to Avro conversion process has arbitrary rules and limitations. Learn more about how source data is converted to Avro and the current limitations [here](https://docs.airbyte.com/understanding-airbyte/json-avro-conversion). ## Related tutorial + Suppose you are interested in learning more about the Databricks connector or details on how the Delta Lake tables are created. You may want to consult the tutorial on [How to Load Data into Delta Lake on Databricks Lakehouse](https://airbyte.com/tutorials/load-data-into-delta-lake-on-databricks-lakehouse). ## CHANGELOG -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------| -| 1.1.0 | 2023-06-02 | [\#26942](https://github.com/airbytehq/airbyte/pull/26942) | Support schema evolution | -| 1.0.2 | 2023-04-20 | [\#25366](https://github.com/airbytehq/airbyte/pull/25366) | Fix default catalog to be `hive_metastore` | -| 1.0.1 | 2023-03-30 | [\#24657](https://github.com/airbytehq/airbyte/pull/24657) | Fix support for external tables on S3 | -| 1.0.0 | 2023-03-21 | [\#23965](https://github.com/airbytehq/airbyte/pull/23965) | Added: Managed table storage type, Databricks Catalog field | -| 0.3.1 | 2022-10-15 | [\#18032](https://github.com/airbytehq/airbyte/pull/18032) | Add `SSL=1` to the JDBC URL to ensure SSL connection. | -| 0.3.0 | 2022-10-14 | [\#15329](https://github.com/airbytehq/airbyte/pull/15329) | Add support for Azure storage. | -| | 2022-09-01 | [\#16243](https://github.com/airbytehq/airbyte/pull/16243) | Fix Json to Avro conversion when there is field name clash from combined restrictions (`anyOf`, `oneOf`, `allOf` fields) | -| 0.2.6 | 2022-08-05 | [\#14801](https://github.com/airbytehq/airbyte/pull/14801) | Fix multiply log bindings | -| 0.2.5 | 2022-07-15 | [\#14494](https://github.com/airbytehq/airbyte/pull/14494) | Make S3 output filename configurable. | -| 0.2.4 | 2022-07-14 | [\#14618](https://github.com/airbytehq/airbyte/pull/14618) | Removed additionalProperties: false from JDBC destination connectors | -| 0.2.3 | 2022-06-16 | [\#13852](https://github.com/airbytehq/airbyte/pull/13852) | Updated stacktrace format for any trace message errors | -| 0.2.2 | 2022-06-13 | [\#13722](https://github.com/airbytehq/airbyte/pull/13722) | Rename to "Databricks Lakehouse". | -| 0.2.1 | 2022-06-08 | [\#13630](https://github.com/airbytehq/airbyte/pull/13630) | Rename to "Databricks Delta Lake" and add field orders in the spec. | -| 0.2.0 | 2022-05-15 | [\#12861](https://github.com/airbytehq/airbyte/pull/12861) | Use new public Databricks JDBC driver, and open source the connector. | -| 0.1.5 | 2022-05-04 | [\#12578](https://github.com/airbytehq/airbyte/pull/12578) | In JSON to Avro conversion, log JSON field values that do not follow Avro schema for debugging. | -| 0.1.4 | 2022-02-14 | [\#10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | -| 0.1.3 | 2022-01-06 | [\#7622](https://github.com/airbytehq/airbyte/pull/7622) [\#9153](https://github.com/airbytehq/airbyte/issues/9153) | Upgrade Spark JDBC driver to `2.6.21` to patch Log4j vulnerability; update connector fields title/description. | -| 0.1.2 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | -| 0.1.1 | 2021-10-05 | [\#6792](https://github.com/airbytehq/airbyte/pull/6792) | Require users to accept Databricks JDBC Driver [Terms & Conditions](https://databricks.com/jdbc-odbc-driver-license). | -| 0.1.0 | 2021-09-14 | [\#5998](https://github.com/airbytehq/airbyte/pull/5998) | Initial private release. | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------------------------------- | +| 1.1.0 | 2023-06-02 | [\#26942](https://github.com/airbytehq/airbyte/pull/26942) | Support schema evolution | +| 1.0.2 | 2023-04-20 | [\#25366](https://github.com/airbytehq/airbyte/pull/25366) | Fix default catalog to be `hive_metastore` | +| 1.0.1 | 2023-03-30 | [\#24657](https://github.com/airbytehq/airbyte/pull/24657) | Fix support for external tables on S3 | +| 1.0.0 | 2023-03-21 | [\#23965](https://github.com/airbytehq/airbyte/pull/23965) | Added: Managed table storage type, Databricks Catalog field | +| 0.3.1 | 2022-10-15 | [\#18032](https://github.com/airbytehq/airbyte/pull/18032) | Add `SSL=1` to the JDBC URL to ensure SSL connection. | +| 0.3.0 | 2022-10-14 | [\#15329](https://github.com/airbytehq/airbyte/pull/15329) | Add support for Azure storage. | +| | 2022-09-01 | [\#16243](https://github.com/airbytehq/airbyte/pull/16243) | Fix Json to Avro conversion when there is field name clash from combined restrictions (`anyOf`, `oneOf`, `allOf` fields) | +| 0.2.6 | 2022-08-05 | [\#14801](https://github.com/airbytehq/airbyte/pull/14801) | Fix multiply log bindings | +| 0.2.5 | 2022-07-15 | [\#14494](https://github.com/airbytehq/airbyte/pull/14494) | Make S3 output filename configurable. | +| 0.2.4 | 2022-07-14 | [\#14618](https://github.com/airbytehq/airbyte/pull/14618) | Removed additionalProperties: false from JDBC destination connectors | +| 0.2.3 | 2022-06-16 | [\#13852](https://github.com/airbytehq/airbyte/pull/13852) | Updated stacktrace format for any trace message errors | +| 0.2.2 | 2022-06-13 | [\#13722](https://github.com/airbytehq/airbyte/pull/13722) | Rename to "Databricks Lakehouse". | +| 0.2.1 | 2022-06-08 | [\#13630](https://github.com/airbytehq/airbyte/pull/13630) | Rename to "Databricks Delta Lake" and add field orders in the spec. | +| 0.2.0 | 2022-05-15 | [\#12861](https://github.com/airbytehq/airbyte/pull/12861) | Use new public Databricks JDBC driver, and open source the connector. | +| 0.1.5 | 2022-05-04 | [\#12578](https://github.com/airbytehq/airbyte/pull/12578) | In JSON to Avro conversion, log JSON field values that do not follow Avro schema for debugging. | +| 0.1.4 | 2022-02-14 | [\#10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | +| 0.1.3 | 2022-01-06 | [\#7622](https://github.com/airbytehq/airbyte/pull/7622) [\#9153](https://github.com/airbytehq/airbyte/issues/9153) | Upgrade Spark JDBC driver to `2.6.21` to patch Log4j vulnerability; update connector fields title/description. | +| 0.1.2 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | +| 0.1.1 | 2021-10-05 | [\#6792](https://github.com/airbytehq/airbyte/pull/6792) | Require users to accept Databricks JDBC Driver [Terms & Conditions](https://databricks.com/jdbc-odbc-driver-license). | +| 0.1.0 | 2021-09-14 | [\#5998](https://github.com/airbytehq/airbyte/pull/5998) | Initial private release. | diff --git a/docs/integrations/destinations/doris.md b/docs/integrations/destinations/doris.md index 0f0554e88b9e..91b0e1176f0a 100644 --- a/docs/integrations/destinations/doris.md +++ b/docs/integrations/destinations/doris.md @@ -20,7 +20,7 @@ This section should contain a table with the following format: | :------------------------------------- | :----------------- | :----------------------- | | Full Refresh Sync | Yes | | | Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | it will soon be realized | +| Incremental - Append + Deduped | No | it will soon be realized | | For databases, WAL/Logical replication | Yes | | ### Performance considerations diff --git a/docs/integrations/destinations/duckdb.md b/docs/integrations/destinations/duckdb.md index 345576e31cf1..352975308ec4 100644 --- a/docs/integrations/destinations/duckdb.md +++ b/docs/integrations/destinations/duckdb.md @@ -1,5 +1,4 @@ - -# DuckDB +# DuckDB :::danger @@ -21,18 +20,18 @@ If you set [Normalization](https://docs.airbyte.com/understanding-airbyte/basic- Each table will contain 3 columns: -* `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. -* `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. -* `_airbyte_data`: a json blob representing with the event data. +- `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. +- `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. +- `_airbyte_data`: a json blob representing with the event data. #### Features -| Feature | Supported | | -| :--- | :--- | :--- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | | -| Namespaces | No | | +| Feature | Supported | | +| :----------------------------- | :-------- | :-- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | No | | #### Performance consideration @@ -52,16 +51,15 @@ Please make sure that Docker Desktop has access to `/tmp` (and `/private` on a M ::: - ### Example: -* If `destination_path` is set to `/local/destination.duckdb` -* the local mount is using the `/tmp/airbyte_local` default -* then all data will be written to `/tmp/airbyte_local/destination.duckdb`. +- If `destination_path` is set to `/local/destination.duckdb` +- the local mount is using the `/tmp/airbyte_local` default +- then all data will be written to `/tmp/airbyte_local/destination.duckdb`. ## Access Replicated Data Files -If your Airbyte instance is running on the same computer that you are navigating with, you can open your browser and enter [file:///tmp/airbyte\_local](file:///tmp/airbyte_local) to look at the replicated data locally. If the first approach fails or if your Airbyte instance is running on a remote server, follow the following steps to access the replicated files: +If your Airbyte instance is running on the same computer that you are navigating with, you can open your browser and enter [file:///tmp/airbyte_local](file:///tmp/airbyte_local) to look at the replicated data locally. If the first approach fails or if your Airbyte instance is running on a remote server, follow the following steps to access the replicated files: 1. Access the scheduler container using `docker exec -it airbyte-server bash` 2. Navigate to the default local mount using `cd /tmp/airbyte_local` @@ -78,7 +76,6 @@ Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have ## Changelog -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.1.0 | 2022-10-14 | [17494](https://github.com/airbytehq/airbyte/pull/17494) | New DuckDB destination | - +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :--------------------- | +| 0.1.0 | 2022-10-14 | [17494](https://github.com/airbytehq/airbyte/pull/17494) | New DuckDB destination | diff --git a/docs/integrations/destinations/dynamodb.md b/docs/integrations/destinations/dynamodb.md index a0fbccc87b0b..08000797ba53 100644 --- a/docs/integrations/destinations/dynamodb.md +++ b/docs/integrations/destinations/dynamodb.md @@ -5,6 +5,7 @@ This destination writes data to AWS DynamoDB. The Airbyte DynamoDB destination allows you to sync data to AWS DynamoDB. Each stream is written to its own table under the DynamoDB. ## Prerequisites + - For Airbyte Open Source users using the [Postgres](https://docs.airbyte.com/integrations/sources/postgres) source connector, [upgrade](https://docs.airbyte.com/operator-guides/upgrading-airbyte/) your Airbyte platform to version `v0.40.0-alpha` or newer and upgrade your DynamoDB connector to version `0.1.5` or newer ## Sync overview @@ -13,19 +14,19 @@ The Airbyte DynamoDB destination allows you to sync data to AWS DynamoDB. Each s Each stream will be output into its own DynamoDB table. Each table will a collections of `json` objects containing 4 fields: -* `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. -* `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. -* `_airbyte_data`: a json blob representing with the extracted data. -* `sync_time`: a timestamp representing when the sync up task be triggered. +- `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. +- `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. +- `_airbyte_data`: a json blob representing with the extracted data. +- `sync_time`: a timestamp representing when the sync up task be triggered. ### Features -| Feature | Support | Notes | -| :--- | :---: | :--- | -| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured DynamoDB table. | -| Incremental - Append Sync | ✅ | | -| Incremental - Deduped History | ❌ | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | ✅ | Namespace will be used as part of the table name. | +| Feature | Support | Notes | +| :----------------------------- | :-----: | :-------------------------------------------------------------------------------------- | +| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured DynamoDB table. | +| Incremental - Append Sync | ✅ | | +| Incremental - Append + Deduped | ❌ | | +| Namespaces | ✅ | Namespace will be used as part of the table name. | ### Performance considerations @@ -40,33 +41,32 @@ This connector by default uses 10 capacity units for both Read and Write in Dyna ### Setup guide -* Fill up DynamoDB info - * **DynamoDB Endpoint** - * Leave empty if using AWS DynamoDB, fill in endpoint URL if using customized endpoint. - * **DynamoDB Table Name** - * The name prefix of the DynamoDB table to store the extracted data. The table name is \\_\\_\. - * **DynamoDB Region** - * The region of the DynamoDB. - * **Access Key Id** - * See [this](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) on how to generate an access key. - * We recommend creating an Airbyte-specific user. This user will require [read and write permissions](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_examples_dynamodb_specific-table.html) to the DynamoDB table. - * **Secret Access Key** - * Corresponding key to the above key id. -* Make sure your DynamoDB tables are accessible from the machine running Airbyte. - * This depends on your networking setup. - * You can check AWS DynamoDB documentation with a tutorial on how to properly configure your DynamoDB's access [here](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/access-control-overview.html). - * The easiest way to verify if Airbyte is able to connect to your DynamoDB tables is via the check connection tool in the UI. +- Fill up DynamoDB info + - **DynamoDB Endpoint** + - Leave empty if using AWS DynamoDB, fill in endpoint URL if using customized endpoint. + - **DynamoDB Table Name** + - The name prefix of the DynamoDB table to store the extracted data. The table name is \\_\\_\. + - **DynamoDB Region** + - The region of the DynamoDB. + - **Access Key Id** + - See [this](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) on how to generate an access key. + - We recommend creating an Airbyte-specific user. This user will require [read and write permissions](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_examples_dynamodb_specific-table.html) to the DynamoDB table. + - **Secret Access Key** + - Corresponding key to the above key id. +- Make sure your DynamoDB tables are accessible from the machine running Airbyte. + - This depends on your networking setup. + - You can check AWS DynamoDB documentation with a tutorial on how to properly configure your DynamoDB's access [here](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/access-control-overview.html). + - The easiest way to verify if Airbyte is able to connect to your DynamoDB tables is via the check connection tool in the UI. ## CHANGELOG -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.1.7 | 2022-11-03 | [\#18672](https://github.com/airbytehq/airbyte/pull/18672) | Added strict-encrypt cloud runner | -| 0.1.6 | 2022-11-01 | [\#18672](https://github.com/airbytehq/airbyte/pull/18672) | Enforce to use ssl connection | -| 0.1.5 | 2022-08-05 | [\#15350](https://github.com/airbytehq/airbyte/pull/15350) | Added per-stream handling | -| 0.1.4 | 2022-06-16 | [\#13852](https://github.com/airbytehq/airbyte/pull/13852) | Updated stacktrace format for any trace message errors | -| 0.1.3 | 2022-05-17 | [12820](https://github.com/airbytehq/airbyte/pull/12820) | Improved 'check' operation performance | -| 0.1.2 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | -| 0.1.1 | 2022-12-05 | [\#9314](https://github.com/airbytehq/airbyte/pull/9314) | Rename dynamo_db_table_name to dynamo_db_table_name_prefix. | -| 0.1.0 | 2021-08-20 | [\#5561](https://github.com/airbytehq/airbyte/pull/5561) | Initial release. | - +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :--------------------------------------------------------- | :---------------------------------------------------------- | +| 0.1.7 | 2022-11-03 | [\#18672](https://github.com/airbytehq/airbyte/pull/18672) | Added strict-encrypt cloud runner | +| 0.1.6 | 2022-11-01 | [\#18672](https://github.com/airbytehq/airbyte/pull/18672) | Enforce to use ssl connection | +| 0.1.5 | 2022-08-05 | [\#15350](https://github.com/airbytehq/airbyte/pull/15350) | Added per-stream handling | +| 0.1.4 | 2022-06-16 | [\#13852](https://github.com/airbytehq/airbyte/pull/13852) | Updated stacktrace format for any trace message errors | +| 0.1.3 | 2022-05-17 | [12820](https://github.com/airbytehq/airbyte/pull/12820) | Improved 'check' operation performance | +| 0.1.2 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | +| 0.1.1 | 2022-12-05 | [\#9314](https://github.com/airbytehq/airbyte/pull/9314) | Rename dynamo_db_table_name to dynamo_db_table_name_prefix. | +| 0.1.0 | 2021-08-20 | [\#5561](https://github.com/airbytehq/airbyte/pull/5561) | Initial release. | diff --git a/docs/integrations/destinations/exasol.md b/docs/integrations/destinations/exasol.md index 9c030a8a0bc2..179ff93359cb 100644 --- a/docs/integrations/destinations/exasol.md +++ b/docs/integrations/destinations/exasol.md @@ -16,15 +16,15 @@ Each Airbyte Stream becomes an Exasol table and each Airbyte Field becomes an Ex The Exasol destination supports the following features: -| Feature | Supported? (Yes/No) | Notes | -| :---------------------------- | :------------------ | :---- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | | -| Normalization | No | | -| Namespaces | Yes | | -| SSL connection | Yes | TLS | -| SSH Tunnel Support | No | | +| Feature | Supported? (Yes/No) | Notes | +| :----------------------------- | :------------------ | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Normalization | No | | +| Namespaces | Yes | | +| SSL connection | Yes | TLS | +| SSH Tunnel Support | No | | ### Limitations diff --git a/docs/integrations/destinations/gcs.md b/docs/integrations/destinations/gcs.md index b1d49711eec0..9692fc4dda55 100644 --- a/docs/integrations/destinations/gcs.md +++ b/docs/integrations/destinations/gcs.md @@ -10,12 +10,12 @@ The Airbyte GCS destination allows you to sync data to cloud storage buckets. Ea #### Features -| Feature | Support | Notes | -| :---------------------------- | :-----: | :------------------------------------------------------------------------------------------- | -| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | -| Incremental - Append Sync | ✅ | | -| Incremental - Deduped History | ❌ | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | +| Feature | Support | Notes | +| :----------------------------- | :-----: | :----------------------------------------------------------------------------------- | +| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | +| Incremental - Append Sync | ✅ | | +| Incremental - Append + Deduped | ❌ | | +| Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | ## Configuration @@ -237,7 +237,7 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A | Version | Date | Pull Request | Subject | | :------ | :--------- | :--------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------- | -| 0.4.4 | 2023-07-14 | [#28345](https://github.com/airbytehq/airbyte/pull/28345) | Increment patch to trigger a rebuild | +| 0.4.4 | 2023-07-14 | [#28345](https://github.com/airbytehq/airbyte/pull/28345) | Increment patch to trigger a rebuild | | 0.4.3 | 2023-07-05 | [#27936](https://github.com/airbytehq/airbyte/pull/27936) | Internal code update | | 0.4.2 | 2023-06-30 | [#27891](https://github.com/airbytehq/airbyte/pull/27891) | Internal code update | | 0.4.1 | 2023-06-28 | [#27268](https://github.com/airbytehq/airbyte/pull/27268) | Internal code update | diff --git a/docs/integrations/destinations/kafka.md b/docs/integrations/destinations/kafka.md index 1d9ac1d8bde2..6415dc1cf9b4 100644 --- a/docs/integrations/destinations/kafka.md +++ b/docs/integrations/destinations/kafka.md @@ -25,12 +25,12 @@ Each record will contain in its key the uuid assigned by Airbyte, and in the val #### Features -| Feature | Supported?\(Yes/No\) | Notes | -| :---------------------------- | :------------------- | :------------------------------------------------------------------------------------------- | -| Full Refresh Sync | No | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | No | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | Yes | | ## Getting started diff --git a/docs/integrations/destinations/keen.md b/docs/integrations/destinations/keen.md index 37c33f75ca80..dc8db2b25877 100644 --- a/docs/integrations/destinations/keen.md +++ b/docs/integrations/destinations/keen.md @@ -21,12 +21,12 @@ Each replicated stream from Airbyte will output data into a corresponding event #### Features -| Feature | Supported?\(Yes/No\) | Notes | -| :---------------------------- | :------------------- | :------------------------------------------------------------------------------------------- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | No | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | No | | ## Getting started diff --git a/docs/integrations/destinations/kinesis.md b/docs/integrations/destinations/kinesis.md index 21f288f573a9..be39fa228a52 100644 --- a/docs/integrations/destinations/kinesis.md +++ b/docs/integrations/destinations/kinesis.md @@ -17,12 +17,12 @@ This connector maps an incoming data from a namespace and stream to a unique Kin ### Features -| Feature | Support | Notes | -| :---------------------------- | :-----: | :-------------------------------------------------------------------------------- | -| Full Refresh Sync | ❌ | | -| Incremental - Append Sync | ✅ | Incoming messages are streamed/appended to a Kinesis stream as they are received. | -| Incremental - Deduped History | ❌ | | -| Namespaces | ✅ | Namespaces will be used to determine the Kinesis stream name. | +| Feature | Support | Notes | +| :----------------------------- | :-----: | :-------------------------------------------------------------------------------- | +| Full Refresh Sync | ❌ | | +| Incremental - Append Sync | ✅ | Incoming messages are streamed/appended to a Kinesis stream as they are received. | +| Incremental - Append + Deduped | ❌ | | +| Namespaces | ✅ | Namespaces will be used to determine the Kinesis stream name. | ### Performance considerations diff --git a/docs/integrations/destinations/local-json.md b/docs/integrations/destinations/local-json.md index 4e8081bd3ec1..11870a8d5177 100644 --- a/docs/integrations/destinations/local-json.md +++ b/docs/integrations/destinations/local-json.md @@ -16,18 +16,18 @@ This destination writes data to a directory on the _local_ filesystem on the hos Each stream will be output into its own file. Each file will a collections of `json` objects containing 3 fields: -* `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. -* `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. -* `_airbyte_data`: a json blob representing with the extracted data. +- `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. +- `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. +- `_airbyte_data`: a json blob representing with the extracted data. #### Features -| Feature | Supported | | -| :--- | :--- | :--- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | No | | +| Feature | Supported | | +| :----------------------------- | :-------- | :-- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | No | | #### Performance considerations @@ -47,16 +47,15 @@ Please make sure that Docker Desktop has access to `/tmp` (and `/private` on a M ::: - ### Example: -* If `destination_path` is set to `/local/cars/models` -* the local mount is using the `/tmp/airbyte_local` default -* then all data will be written to `/tmp/airbyte_local/cars/models` directory. +- If `destination_path` is set to `/local/cars/models` +- the local mount is using the `/tmp/airbyte_local` default +- then all data will be written to `/tmp/airbyte_local/cars/models` directory. ## Access Replicated Data Files -If your Airbyte instance is running on the same computer that you are navigating with, you can open your browser and enter [file:///tmp/airbyte\_local](file:///tmp/airbyte_local) to look at the replicated data locally. If the first approach fails or if your Airbyte instance is running on a remote server, follow the following steps to access the replicated files: +If your Airbyte instance is running on the same computer that you are navigating with, you can open your browser and enter [file:///tmp/airbyte_local](file:///tmp/airbyte_local) to look at the replicated data locally. If the first approach fails or if your Airbyte instance is running on a remote server, follow the following steps to access the replicated files: 1. Access the scheduler container using `docker exec -it airbyte-server bash` 2. Navigate to the default local mount using `cd /tmp/airbyte_local` @@ -74,6 +73,6 @@ Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have ## Changelog -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.2.11 | 2022-02-14 | [14641](https://github.com/airbytehq/airbyte/pull/14641) | Include lifecycle management | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :--------------------------- | +| 0.2.11 | 2022-02-14 | [14641](https://github.com/airbytehq/airbyte/pull/14641) | Include lifecycle management | diff --git a/docs/integrations/destinations/meilisearch.md b/docs/integrations/destinations/meilisearch.md index a1bb7eeb1181..d7f40201b775 100644 --- a/docs/integrations/destinations/meilisearch.md +++ b/docs/integrations/destinations/meilisearch.md @@ -12,12 +12,12 @@ Each stream will be output into its own index in MeiliSearch. Each table will be #### Features -| Feature | Supported?\(Yes/No\) | Notes | -| :--- | :--- | :--- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | No | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | No | | ## Getting started @@ -31,9 +31,9 @@ The setup only requires two fields. First is the `host` which is the address at ## Changelog -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 1.0.0 | 2022-10-26 | [18036](https://github.com/airbytehq/airbyte/pull/18036) | Migrate MeiliSearch to Python CDK | -| 0.2.13 | 2022-06-17 | [13864](https://github.com/airbytehq/airbyte/pull/13864) | Updated stacktrace format for any trace message errors | -| 0.2.12 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | -| 0.2.11 | 2021-12-28 | [9156](https://github.com/airbytehq/airbyte/pull/9156) | Update connector fields title/description | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :----------------------------------------------------- | +| 1.0.0 | 2022-10-26 | [18036](https://github.com/airbytehq/airbyte/pull/18036) | Migrate MeiliSearch to Python CDK | +| 0.2.13 | 2022-06-17 | [13864](https://github.com/airbytehq/airbyte/pull/13864) | Updated stacktrace format for any trace message errors | +| 0.2.12 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | +| 0.2.11 | 2021-12-28 | [9156](https://github.com/airbytehq/airbyte/pull/9156) | Update connector fields title/description | diff --git a/docs/integrations/destinations/mongodb.md b/docs/integrations/destinations/mongodb.md index 3542f3f45fee..51bd94cb8c46 100644 --- a/docs/integrations/destinations/mongodb.md +++ b/docs/integrations/destinations/mongodb.md @@ -2,12 +2,12 @@ ## Features -| Feature | Supported?\(Yes/No\) | Notes | -| :---------------------------- | :------------------- | :------------------------------------------------------------------------------------------- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | Yes | | ## Prerequisites diff --git a/docs/integrations/destinations/mqtt.md b/docs/integrations/destinations/mqtt.md index d7f012097dbf..0c33e9ed9348 100644 --- a/docs/integrations/destinations/mqtt.md +++ b/docs/integrations/destinations/mqtt.md @@ -5,6 +5,7 @@ The Airbyte MQTT destination allows you to sync data to any MQTT system compliance with version 3.1.X. Each stream is written to the corresponding MQTT topic. ## Prerequisites + - For Airbyte Open Source users using the [Postgres](https://docs.airbyte.com/integrations/sources/postgres) source connector, [upgrade](https://docs.airbyte.com/operator-guides/upgrading-airbyte/) your Airbyte platform to version `v0.40.0-alpha` or newer and upgrade your MQTT connector to the latest version ### Sync overview @@ -17,19 +18,19 @@ This connector writes data with JSON format (in bytes). Each record will contain in its payload these 4 fields: -* `_airbyte_ab_id`: an uuid assigned by Airbyte to each event that is processed. -* `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. -* `_airbyte_data`: a json blob representing with the event data. -* `_airbyte_stream`: the name of each record's stream. +- `_airbyte_ab_id`: an uuid assigned by Airbyte to each event that is processed. +- `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. +- `_airbyte_data`: a json blob representing with the event data. +- `_airbyte_stream`: the name of each record's stream. #### Features -| Feature | Supported?\(Yes/No\) | Notes | -| :--- | :--- | :--- | -| Full Refresh Sync | No | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | No | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | Yes | | ## Getting started @@ -37,7 +38,7 @@ Each record will contain in its payload these 4 fields: To use the MQTT destination, you'll need: -* A MQTT broker implementing MQTT protocol version 3.1.X. +- A MQTT broker implementing MQTT protocol version 3.1.X. ### Setup guide @@ -61,30 +62,29 @@ To define the output topics dynamically, you can leverage the `{namespace}` and You should now have all the requirements needed to configure MQTT as a destination in the UI. You can configure the following parameters on the MQTT destination \(though many of these are optional or have default values\): -* **MQTT broker host** -* **MQTT broker port** -* **Use TLS** -* **Username** -* **Password** -* **Topic pattern** -* **Test topic** -* **Client ID** -* **Sync publisher** -* **Connect timeout** -* **Automatic reconnect** -* **Clean session** -* **Message retained** -* **Message QoS** +- **MQTT broker host** +- **MQTT broker port** +- **Use TLS** +- **Username** +- **Password** +- **Topic pattern** +- **Test topic** +- **Client ID** +- **Sync publisher** +- **Connect timeout** +- **Automatic reconnect** +- **Clean session** +- **Message retained** +- **Message QoS** More info about this can be found in the [OASIS MQTT standard site](http://docs.oasis-open.org/mqtt/mqtt/v3.1.1/mqtt-v3.1.1.html). _NOTE_: MQTT version 5 is not supported yet. - ## Changelog -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.1.3 | 2022-09-02 | [16263](https://github.com/airbytehq/airbyte/pull/16263) | Marked password field in spec as airbyte_secret | -| 0.1.2 | 2022-07-12 | [14648](https://github.com/airbytehq/airbyte/pull/14648) | Include lifecycle management | -| 0.1.1 | 2022-05-24 | [13099](https://github.com/airbytehq/airbyte/pull/13099) | Fixed build's tests | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :---------------------------------------------- | +| 0.1.3 | 2022-09-02 | [16263](https://github.com/airbytehq/airbyte/pull/16263) | Marked password field in spec as airbyte_secret | +| 0.1.2 | 2022-07-12 | [14648](https://github.com/airbytehq/airbyte/pull/14648) | Include lifecycle management | +| 0.1.1 | 2022-05-24 | [13099](https://github.com/airbytehq/airbyte/pull/13099) | Fixed build's tests | diff --git a/docs/integrations/destinations/mssql.md b/docs/integrations/destinations/mssql.md index be7313e5f62c..c48261be1a0b 100644 --- a/docs/integrations/destinations/mssql.md +++ b/docs/integrations/destinations/mssql.md @@ -2,12 +2,12 @@ ## Features -| Feature | Supported?\(Yes/No\) | Notes | -| :---------------------------- | :------------------- | :------------------------------------------------------------------------------------------- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | Yes | | +| Namespaces | Yes | | ## Output Schema @@ -26,12 +26,12 @@ Each stream will be output into its own table in SQL Server. Each table will con Airbyte Cloud only supports connecting to your MSSQL instance with TLS encryption. Other than that, you can proceed with the open-source instructions below. -| Feature | Supported?\(Yes/No\) | Notes | -| :---------------------------- | :------------------- | :---- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | Yes | | -| Namespaces | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | Yes | | +| Namespaces | Yes | | ## Getting Started \(Airbyte Open-Source\) diff --git a/docs/integrations/destinations/mysql.md b/docs/integrations/destinations/mysql.md index 3d7a4968ea94..58826d631142 100644 --- a/docs/integrations/destinations/mysql.md +++ b/docs/integrations/destinations/mysql.md @@ -7,13 +7,13 @@ There are two flavors of connectors for this destination: ## Features -| Feature | Supported?\(Yes/No\) | Notes | -| :---------------------------- | :------------------- | :---- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | | -| Namespaces | Yes | | -| SSH Tunnel Connection | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | Yes | | +| Namespaces | Yes | | +| SSH Tunnel Connection | Yes | | #### Output Schema diff --git a/docs/integrations/destinations/oracle.md b/docs/integrations/destinations/oracle.md index e1b6e834974e..2b26a69cbf6c 100644 --- a/docs/integrations/destinations/oracle.md +++ b/docs/integrations/destinations/oracle.md @@ -2,15 +2,15 @@ ## Features -| Feature | Supported?\(Yes/No\) | Notes | -| :---------------------------- | :------------------- | :-------------------------------------------------------------------- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | Yes | | -| Namespaces | Yes | | -| Basic Normalization | Yes | Doesn't support for nested json yet | -| SSH Tunnel Connection | Yes | | -| Encryption | Yes | Support Native Network Encryption (NNE) as well as TLS using SSL cert | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :-------------------------------------------------------------------- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | Yes | | +| Namespaces | Yes | | +| Basic Normalization | Yes | Doesn't support for nested json yet | +| SSH Tunnel Connection | Yes | | +| Encryption | Yes | Support Native Network Encryption (NNE) as well as TLS using SSL cert | ## Output Schema diff --git a/docs/integrations/destinations/postgres.md b/docs/integrations/destinations/postgres.md index eb0c20911723..a05718c145e2 100644 --- a/docs/integrations/destinations/postgres.md +++ b/docs/integrations/destinations/postgres.md @@ -139,12 +139,12 @@ characters. The Postgres destination connector supports the following[ sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -| Feature | Supported?\(Yes/No\) | Notes | -| :---------------------------- | :------------------- | :---- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | Yes | | -| Namespaces | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | Yes | | +| Namespaces | Yes | | ## Schema map diff --git a/docs/integrations/destinations/pubsub.md b/docs/integrations/destinations/pubsub.md index 79f553e59333..a3aef1932a7e 100644 --- a/docs/integrations/destinations/pubsub.md +++ b/docs/integrations/destinations/pubsub.md @@ -11,6 +11,7 @@ description: >- The Airbyte Google PubSub destination allows you to send/stream data into PubSub. Pub/Sub is an asynchronous messaging service provided by Google Cloud Provider. ## Prerequisites + - For Airbyte Open Source users using the [Postgres](https://docs.airbyte.com/integrations/sources/postgres) source connector, [upgrade](https://docs.airbyte.com/operator-guides/upgrading-airbyte/) your Airbyte platform to version `v0.40.0-alpha` or newer and upgrade your PubSub connector to version `0.1.6` or newer ### Sync overview @@ -19,23 +20,23 @@ The Airbyte Google PubSub destination allows you to send/stream data into PubSub Each stream will be output a PubSubMessage with attributes. The message attributes will be -* `_stream`: the name of stream where the data is coming from -* `_namespace`: namespace if available from the stream +- `_stream`: the name of stream where the data is coming from +- `_namespace`: namespace if available from the stream The data will be a serialized JSON, containing the following fields -* `_airbyte_ab_id`: a uuid string assigned by Airbyte to each event that is processed. -* `_airbyte_emitted_at`: a long timestamp\(ms\) representing when the event was pulled from the data source. -* `_airbyte_data`: a json string representing source data. +- `_airbyte_ab_id`: a uuid string assigned by Airbyte to each event that is processed. +- `_airbyte_emitted_at`: a long timestamp\(ms\) representing when the event was pulled from the data source. +- `_airbyte_data`: a json string representing source data. #### Features -| Feature | Supported?\(Yes/No\) | Notes | -| :--- | :--- | :--- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | Yes | | ## Getting started @@ -43,10 +44,10 @@ The data will be a serialized JSON, containing the following fields To use the PubSub destination, you'll need: -* A Google Cloud Project with PubSub enabled -* A PubSub Topic to which Airbyte can stream/sync your data -* A Google Cloud Service Account with the `Pub/Sub Editor` role in your GCP project -* A Service Account Key to authenticate into your Service Account +- A Google Cloud Project with PubSub enabled +- A PubSub Topic to which Airbyte can stream/sync your data +- A Google Cloud Service Account with the `Pub/Sub Editor` role in your GCP project +- A Service Account Key to authenticate into your Service Account See the setup guide for more information about how to create the required resources. @@ -82,21 +83,20 @@ Follow the [Creating and Managing Service Account Keys](https://cloud.google.com You should now have all the requirements needed to configure PubSub as a destination in the UI. You'll need the following information to configure the PubSub destination: -* **Project ID**: GCP project id -* **Topic ID**: name of pubsub topic under the project -* **Service Account Key**: the contents of your Service Account Key JSON file +- **Project ID**: GCP project id +- **Topic ID**: name of pubsub topic under the project +- **Service Account Key**: the contents of your Service Account Key JSON file Once you've configured PubSub as a destination, delete the Service Account Key from your computer. ## CHANGELOG -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.2.0 | August 16, 2022 | [15705](https://github.com/airbytehq/airbyte/pull/15705) | Add configuration for Batching and Ordering | -| 0.1.5 | 2022-06-17 | [13864](https://github.com/airbytehq/airbyte/pull/13864) | Updated stacktrace format for any trace message errors | -| 0.1.4 | February 21, 2022 | [\#9819](https://github.com/airbytehq/airbyte/pull/9819) | Upgrade version of google-cloud-pubsub | -| 0.1.3 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | (unpublished) Add `-XX:+ExitOnOutOfMemoryError` JVM option | -| 0.1.2 | December 29, 2021 | [\#9183](https://github.com/airbytehq/airbyte/pull/9183) | Update connector fields title/description | -| 0.1.1 | August 13, 2021 | [\#4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | -| 0.1.0 | June 24, 2021 | [\#4339](https://github.com/airbytehq/airbyte/pull/4339) | Initial release | - +| Version | Date | Pull Request | Subject | +| :------ | :---------------- | :------------------------------------------------------- | :--------------------------------------------------------- | +| 0.2.0 | August 16, 2022 | [15705](https://github.com/airbytehq/airbyte/pull/15705) | Add configuration for Batching and Ordering | +| 0.1.5 | 2022-06-17 | [13864](https://github.com/airbytehq/airbyte/pull/13864) | Updated stacktrace format for any trace message errors | +| 0.1.4 | February 21, 2022 | [\#9819](https://github.com/airbytehq/airbyte/pull/9819) | Upgrade version of google-cloud-pubsub | +| 0.1.3 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | (unpublished) Add `-XX:+ExitOnOutOfMemoryError` JVM option | +| 0.1.2 | December 29, 2021 | [\#9183](https://github.com/airbytehq/airbyte/pull/9183) | Update connector fields title/description | +| 0.1.1 | August 13, 2021 | [\#4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | +| 0.1.0 | June 24, 2021 | [\#4339](https://github.com/airbytehq/airbyte/pull/4339) | Initial release | diff --git a/docs/integrations/destinations/pulsar.md b/docs/integrations/destinations/pulsar.md index 9a44887da19f..67d0bf40c006 100644 --- a/docs/integrations/destinations/pulsar.md +++ b/docs/integrations/destinations/pulsar.md @@ -25,12 +25,12 @@ Each record will contain in its key the uuid assigned by Airbyte, and in the val #### Features -| Feature | Supported?\(Yes/No\) | Notes | -| :---------------------------- | :------------------- | :------------------------------------------------------------------------------------------- | -| Full Refresh Sync | No | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | No | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | Yes | | ## Getting started diff --git a/docs/integrations/destinations/r2.md b/docs/integrations/destinations/r2.md index b18a2475a2c1..6ed9c67deebc 100644 --- a/docs/integrations/destinations/r2.md +++ b/docs/integrations/destinations/r2.md @@ -3,12 +3,14 @@ This page guides you through the process of setting up the R2 destination connector. ## Prerequisites + List of required fields: -* **Account ID** -* **Access Key ID** -* **Secret Access Key** -* **R2 Bucket Name** -* **R2 Bucket Path** + +- **Account ID** +- **Access Key ID** +- **Secret Access Key** +- **R2 Bucket Name** +- **R2 Bucket Path** 1. Allow connections from Airbyte server to your Cloudflare R2 bucket @@ -30,22 +32,21 @@ to create an S3 bucket, or you can create bucket via R2 module of [dashboard](ht 2. In the left navigation bar, click **Destinations**. In the top-right corner, click **+ new destination**. 3. On the destination setup page, select **R2** from the Destination type dropdown and enter a name for this connector. 4. Configure fields: - * **Account Id** - * See [this](https://developers.cloudflare.com/r2/get-started/#4-bind-your-bucket-to-a-worker) to copy your Account ID. - * **Access Key Id** - * See [this](https://developers.cloudflare.com/r2/platform/s3-compatibility/tokens) on how to generate an access key. - * **Secret Access Key** - * Corresponding key to the above key id. - * **R2 Bucket Name** - * See [this](https://developers.cloudflare.com/r2/get-started/#3-create-your-bucket) to create an R2 bucket or you can create bucket via R2 module of [dashboard](https://dash.cloudflare.com). - * **R2 Bucket Path** - * Subdirectory under the above bucket to sync the data into. - * **R2 Path Format** - * Additional string format on how to store data under R2 Bucket Path. Default value is `${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY} - _${EPOCH}_`. - * **R2 Filename pattern** - * The pattern allows you to set the file-name format for the R2 staging file(s), next placeholders combinations are currently supported: - {date}, {date:yyyy_MM}, {timestamp}, {timestamp:millis}, {timestamp:micros}, {part_number}, {sync_id}, {format_extension}. Please, don't use empty space and not supportable placeholders, as they won't recognized. + - **Account Id** + - See [this](https://developers.cloudflare.com/r2/get-started/#4-bind-your-bucket-to-a-worker) to copy your Account ID. + - **Access Key Id** + - See [this](https://developers.cloudflare.com/r2/platform/s3-compatibility/tokens) on how to generate an access key. + - **Secret Access Key** + - Corresponding key to the above key id. + - **R2 Bucket Name** + - See [this](https://developers.cloudflare.com/r2/get-started/#3-create-your-bucket) to create an R2 bucket or you can create bucket via R2 module of [dashboard](https://dash.cloudflare.com). + - **R2 Bucket Path** + - Subdirectory under the above bucket to sync the data into. + - **R2 Path Format** - Additional string format on how to store data under R2 Bucket Path. Default value is `${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY} +_${EPOCH}_`. + - **R2 Filename pattern** + - The pattern allows you to set the file-name format for the R2 staging file(s), next placeholders combinations are currently supported: + {date}, {date:yyyy_MM}, {timestamp}, {timestamp:millis}, {timestamp:micros}, {part_number}, {sync_id}, {format_extension}. Please, don't use empty space and not supportable placeholders, as they won't recognized. 5. Click `Set up destination`. **For Airbyte OSS:** @@ -54,25 +55,25 @@ to create an S3 bucket, or you can create bucket via R2 module of [dashboard](ht 2. In the left navigation bar, click **Destinations**. In the top-right corner, click **+ new destination**. 3. On the destination setup page, select **R2** from the Destination type dropdown and enter a name for this connector. 4. Configure fields: - * **Account Id** - * See [this](https://developers.cloudflare.com/r2/get-started/#4-bind-your-bucket-to-a-worker) to copy your Account ID. - * **Access Key Id** - * See [this](https://developers.cloudflare.com/r2/platform/s3-compatibility/tokens) on how to generate an access key. - * **Secret Access Key** - * Corresponding key to the above key id. - * Make sure your R2 bucket is accessible from the machine running Airbyte. - * This depends on your networking setup. - * The easiest way to verify if Airbyte is able to connect to your R2 bucket is via the check connection tool in the UI. - * **R2 Bucket Name** - * See [this](https://developers.cloudflare.com/r2/get-started/#3-create-your-bucket) to create an R2 bucket or you can create bucket via R2 module of [dashboard](https://dash.cloudflare.com). - * **R2 Bucket Path** - * Subdirectory under the above bucket to sync the data into. - * **R2 Path Format** - * Additional string format on how to store data under R2 Bucket Path. Default value is `${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY} - _${EPOCH}_`. - * **R2 Filename pattern** - * The pattern allows you to set the file-name format for the R2 staging file(s), next placeholders combinations are currently supported: - {date}, {date:yyyy_MM}, {timestamp}, {timestamp:millis}, {timestamp:micros}, {part_number}, {sync_id}, {format_extension}. Please, don't use empty space and not supportable placeholders, as they won't recognized. + + - **Account Id** + - See [this](https://developers.cloudflare.com/r2/get-started/#4-bind-your-bucket-to-a-worker) to copy your Account ID. + - **Access Key Id** + - See [this](https://developers.cloudflare.com/r2/platform/s3-compatibility/tokens) on how to generate an access key. + - **Secret Access Key** + - Corresponding key to the above key id. + - Make sure your R2 bucket is accessible from the machine running Airbyte. + - This depends on your networking setup. + - The easiest way to verify if Airbyte is able to connect to your R2 bucket is via the check connection tool in the UI. + - **R2 Bucket Name** + - See [this](https://developers.cloudflare.com/r2/get-started/#3-create-your-bucket) to create an R2 bucket or you can create bucket via R2 module of [dashboard](https://dash.cloudflare.com). + - **R2 Bucket Path** + - Subdirectory under the above bucket to sync the data into. + - **R2 Path Format** - Additional string format on how to store data under R2 Bucket Path. Default value is `${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY} +_${EPOCH}_`. + - **R2 Filename pattern** + - The pattern allows you to set the file-name format for the R2 staging file(s), next placeholders combinations are currently supported: + {date}, {date:yyyy_MM}, {timestamp}, {timestamp:millis}, {timestamp:micros}, {part_number}, {sync_id}, {format_extension}. Please, don't use empty space and not supportable placeholders, as they won't recognized. 5. Click `Set up destination`. @@ -104,6 +105,7 @@ The rationales behind this naming pattern are: 3. The upload time composes of a date part and millis part so that it is both readable and unique. But it is possible to further customize by using the available variables to format the bucket path: + - `${NAMESPACE}`: Namespace where the stream comes from or configured by the connection namespace fields. - `${STREAM_NAME}`: Name of the stream - `${YEAR}`: Year in which the sync was writing the output data in. @@ -117,6 +119,7 @@ But it is possible to further customize by using the available variables to form - `${UUID}`: random uuid string Note: + - Multiple `/` characters in the R2 path are collapsed into a single `/` character. - If the output bucket contains too many files, the part id variable is using a `UUID` instead. It uses sequential ID otherwise. @@ -125,12 +128,12 @@ A data sync may create multiple files as the output files can be partitioned by ## Supported sync modes -| Feature | Support | Notes | -| :--- | :---: | :--- | -| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | -| Incremental - Append Sync | ✅ | | -| Incremental - Deduped History | ❌ | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | +| Feature | Support | Notes | +| :----------------------------- | :-----: | :----------------------------------------------------------------------------------- | +| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | +| Incremental - Append Sync | ✅ | | +| Incremental - Append + Deduped | ❌ | | +| Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | The Airbyte R2 destination allows you to sync data to Cloudflare R2. Each stream is written to its own directory under the bucket. ⚠️ Please note that under "Full Refresh Sync" mode, data in the configured bucket and path will be wiped out before each sync. We recommend you to provision a dedicated R2 resource for this sync to prevent unexpected data deletion from misconfiguration. ⚠️ @@ -139,8 +142,8 @@ The Airbyte R2 destination allows you to sync data to Cloudflare R2. Each stream Each stream will be outputted to its dedicated directory according to the configuration. The complete datastore of each stream includes all the output files under that directory. You can think of the directory as equivalent of a Table in the database world. -* Under Full Refresh Sync mode, old output files will be purged before new files are created. -* Under Incremental - Append Sync mode, new output files will be added that only contain the new data. +- Under Full Refresh Sync mode, old output files will be purged before new files are created. +- Under Incremental - Append Sync mode, new output files will be added that only contain the new data. ### Avro @@ -150,28 +153,28 @@ Each stream will be outputted to its dedicated directory according to the config Here is the available compression codecs: -* No compression -* `deflate` - * Compression level - * Range `[0, 9]`. Default to 0. - * Level 0: no compression & fastest. - * Level 9: best compression & slowest. -* `bzip2` -* `xz` - * Compression level - * Range `[0, 9]`. Default to 6. - * Level 0-3 are fast with medium compression. - * Level 4-6 are fairly slow with high compression. - * Level 7-9 are like level 6 but use bigger dictionaries and have higher memory requirements. Unless the uncompressed size of the file exceeds 8 MiB, 16 MiB, or 32 MiB, it is waste of memory to use the presets 7, 8, or 9, respectively. -* `zstandard` - * Compression level - * Range `[-5, 22]`. Default to 3. - * Negative levels are 'fast' modes akin to `lz4` or `snappy`. - * Levels above 9 are generally for archival purposes. - * Levels above 18 use a lot of memory. - * Include checksum - * If set to `true`, a checksum will be included in each data block. -* `snappy` +- No compression +- `deflate` + - Compression level + - Range `[0, 9]`. Default to 0. + - Level 0: no compression & fastest. + - Level 9: best compression & slowest. +- `bzip2` +- `xz` + - Compression level + - Range `[0, 9]`. Default to 6. + - Level 0-3 are fast with medium compression. + - Level 4-6 are fairly slow with high compression. + - Level 7-9 are like level 6 but use bigger dictionaries and have higher memory requirements. Unless the uncompressed size of the file exceeds 8 MiB, 16 MiB, or 32 MiB, it is waste of memory to use the presets 7, 8, or 9, respectively. +- `zstandard` + - Compression level + - Range `[-5, 22]`. Default to 3. + - Negative levels are 'fast' modes akin to `lz4` or `snappy`. + - Levels above 9 are generally for archival purposes. + - Levels above 18 use a lot of memory. + - Include checksum + - If set to `true`, a checksum will be included in each data block. +- `snappy` #### Data schema @@ -181,12 +184,12 @@ Under the hood, an Airbyte data stream in JSON schema is first converted to an A Like most of the other Airbyte destination connectors, usually the output has three columns: a UUID, an emission timestamp, and the data blob. With the CSV output, it is possible to normalize \(flatten\) the data blob to multiple columns. -| Column | Condition | Description | -| :--- | :--- | :--- | -| `_airbyte_ab_id` | Always exists | A uuid assigned by Airbyte to each processed record. | -| `_airbyte_emitted_at` | Always exists. | A timestamp representing when the event was pulled from the data source. | -| `_airbyte_data` | When no normalization \(flattening\) is needed, all data reside under this column as a json blob. | | -| root level fields | When root level normalization \(flattening\) is selected, the root level fields are expanded. | | +| Column | Condition | Description | +| :-------------------- | :------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------- | +| `_airbyte_ab_id` | Always exists | A uuid assigned by Airbyte to each processed record. | +| `_airbyte_emitted_at` | Always exists. | A timestamp representing when the event was pulled from the data source. | +| `_airbyte_data` | When no normalization \(flattening\) is needed, all data reside under this column as a json blob. | | +| root level fields | When root level normalization \(flattening\) is selected, the root level fields are expanded. | | For example, given the following json object from a source: @@ -202,15 +205,15 @@ For example, given the following json object from a source: With no normalization, the output CSV is: -| `_airbyte_ab_id` | `_airbyte_emitted_at` | `_airbyte_data` | -| :--- | :--- | :--- | -| `26d73cde-7eb1-4e1e-b7db-a4c03b4cf206` | 1622135805000 | `{ "user_id": 123, name: { "first": "John", "last": "Doe" } }` | +| `_airbyte_ab_id` | `_airbyte_emitted_at` | `_airbyte_data` | +| :------------------------------------- | :-------------------- | :------------------------------------------------------------- | +| `26d73cde-7eb1-4e1e-b7db-a4c03b4cf206` | 1622135805000 | `{ "user_id": 123, name: { "first": "John", "last": "Doe" } }` | With root level normalization, the output CSV is: -| `_airbyte_ab_id` | `_airbyte_emitted_at` | `user_id` | `name` | -| :--- | :--- | :--- | :--- | -| `26d73cde-7eb1-4e1e-b7db-a4c03b4cf206` | 1622135805000 | 123 | `{ "first": "John", "last": "Doe" }` | +| `_airbyte_ab_id` | `_airbyte_emitted_at` | `user_id` | `name` | +| :------------------------------------- | :-------------------- | :-------- | :----------------------------------- | +| `26d73cde-7eb1-4e1e-b7db-a4c03b4cf206` | 1622135805000 | 123 | `{ "first": "John", "last": "Doe" }` | Output files can be compressed. The default option is GZIP compression. If compression is selected, the output filename will have an extra extension (GZIP: `.csv.gz`). @@ -262,14 +265,14 @@ Output files can be compressed. The default option is GZIP compression. If compr The following configuration is available to configure the Parquet output: -| Parameter | Type | Default | Description | -| :--- | :---: | :---: | :--- | -| `compression_codec` | enum | `UNCOMPRESSED` | **Compression algorithm**. Available candidates are: `UNCOMPRESSED`, `SNAPPY`, `GZIP`, `LZO`, `BROTLI`, `LZ4`, and `ZSTD`. | -| `block_size_mb` | integer | 128 \(MB\) | **Block size \(row group size\)** in MB. This is the size of a row group being buffered in memory. It limits the memory usage when writing. Larger values will improve the IO when reading, but consume more memory when writing. | -| `max_padding_size_mb` | integer | 8 \(MB\) | **Max padding size** in MB. This is the maximum size allowed as padding to align row groups. This is also the minimum size of a row group. | -| `page_size_kb` | integer | 1024 \(KB\) | **Page size** in KB. The page size is for compression. A block is composed of pages. A page is the smallest unit that must be read fully to access a single record. If this value is too small, the compression will deteriorate. | -| `dictionary_page_size_kb` | integer | 1024 \(KB\) | **Dictionary Page Size** in KB. There is one dictionary page per column per row group when dictionary encoding is used. The dictionary page size works like the page size but for dictionary. | -| `dictionary_encoding` | boolean | `true` | **Dictionary encoding**. This parameter controls whether dictionary encoding is turned on. | +| Parameter | Type | Default | Description | +| :------------------------ | :-----: | :------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `compression_codec` | enum | `UNCOMPRESSED` | **Compression algorithm**. Available candidates are: `UNCOMPRESSED`, `SNAPPY`, `GZIP`, `LZO`, `BROTLI`, `LZ4`, and `ZSTD`. | +| `block_size_mb` | integer | 128 \(MB\) | **Block size \(row group size\)** in MB. This is the size of a row group being buffered in memory. It limits the memory usage when writing. Larger values will improve the IO when reading, but consume more memory when writing. | +| `max_padding_size_mb` | integer | 8 \(MB\) | **Max padding size** in MB. This is the maximum size allowed as padding to align row groups. This is also the minimum size of a row group. | +| `page_size_kb` | integer | 1024 \(KB\) | **Page size** in KB. The page size is for compression. A block is composed of pages. A page is the smallest unit that must be read fully to access a single record. If this value is too small, the compression will deteriorate. | +| `dictionary_page_size_kb` | integer | 1024 \(KB\) | **Dictionary Page Size** in KB. There is one dictionary page per column per row group when dictionary encoding is used. The dictionary page size works like the page size but for dictionary. | +| `dictionary_encoding` | boolean | `true` | **Dictionary encoding**. This parameter controls whether dictionary encoding is turned on. | These parameters are related to the `ParquetOutputFormat`. See the [Java doc](https://www.javadoc.io/doc/org.apache.parquet/parquet-hadoop/1.12.0/org/apache/parquet/hadoop/ParquetOutputFormat.html) for more details. Also see [Parquet documentation](https://parquet.apache.org/docs/file-format/configurations/) for their recommended configurations \(512 - 1024 MB block size, 8 KB page size\). @@ -279,6 +282,6 @@ Under the hood, an Airbyte data stream in JSON schema is first converted to an A ## CHANGELOG -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:-----------------------------------------------------------|:--------| +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :--------------------------------------------------------- | :--------------- | | 0.1.0 | 2022-09-25 | [\#15296](https://github.com/airbytehq/airbyte/pull/15296) | Initial release. | diff --git a/docs/integrations/destinations/rabbitmq.md b/docs/integrations/destinations/rabbitmq.md index d448fd4788d9..b1cb1a730236 100644 --- a/docs/integrations/destinations/rabbitmq.md +++ b/docs/integrations/destinations/rabbitmq.md @@ -10,22 +10,22 @@ The RabbitMQ destination allows you to send/stream data to a RabbitMQ routing ke Each stream will be output a RabbitMQ message with properties. The message properties will be -* `content_type`: set as `application/json` -* `headers`: message headers, which include: - * `stream`: the name of stream where the data is coming from - * `namespace`: namespace if available from the stream - * `emitted_at`: timestamp the `AirbyteRecord` was emitted at. +- `content_type`: set as `application/json` +- `headers`: message headers, which include: + - `stream`: the name of stream where the data is coming from + - `namespace`: namespace if available from the stream + - `emitted_at`: timestamp the `AirbyteRecord` was emitted at. The `AirbyteRecord` data will be serialized as JSON and set as the RabbitMQ message body. #### Features -| Feature | Supported?\(Yes/No\) | Notes | -| :--- | :--- | :--- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | Yes | | ## Getting started @@ -33,16 +33,15 @@ The `AirbyteRecord` data will be serialized as JSON and set as the RabbitMQ mess To use the RabbitMQ destination, you'll need: -* A RabbitMQ host and credentials (username/password) to publish messages, if required. -* A RabbitMQ routing key. -* RabbitMQ exchange is optional. If specified, a binding between exchange and routing key is required. -* RabbitMQ port is optional (it defaults to 5672). -* RabbitMQ virtual host is also optional. +- A RabbitMQ host and credentials (username/password) to publish messages, if required. +- A RabbitMQ routing key. +- RabbitMQ exchange is optional. If specified, a binding between exchange and routing key is required. +- RabbitMQ port is optional (it defaults to 5672). +- RabbitMQ virtual host is also optional. ## CHANGELOG -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.1.1 | 2022-09-09 | [16528](https://github.com/airbytehq/airbyte/pull/16528) | Marked password field in spec as airbyte_secret | -| 0.1.0 | October 29, 2021 | [\#7560](https://github.com/airbytehq/airbyte/pull/7560) | Initial release | - +| Version | Date | Pull Request | Subject | +| :------ | :--------------- | :------------------------------------------------------- | :---------------------------------------------- | +| 0.1.1 | 2022-09-09 | [16528](https://github.com/airbytehq/airbyte/pull/16528) | Marked password field in spec as airbyte_secret | +| 0.1.0 | October 29, 2021 | [\#7560](https://github.com/airbytehq/airbyte/pull/7560) | Initial release | diff --git a/docs/integrations/destinations/redis.md b/docs/integrations/destinations/redis.md index e46a449a8f54..fd07573388e4 100644 --- a/docs/integrations/destinations/redis.md +++ b/docs/integrations/destinations/redis.md @@ -12,46 +12,44 @@ For the **_hash_** implementation as a Redis data type the keys and the hashes a **_key_**: namespace:stream:id - -**_hash_**: -* `_airbyte_ab_id`: Sequential id for a given key generated by using the INCR Redis command. -* `_airbyte_emitted_at`: a timestamp representing when the event was received from the data source. -* `_airbyte_data`: a json text/object representing the data that was received from the data source. +**_hash_**: +- `_airbyte_ab_id`: Sequential id for a given key generated by using the INCR Redis command. +- `_airbyte_emitted_at`: a timestamp representing when the event was received from the data source. +- `_airbyte_data`: a json text/object representing the data that was received from the data source. ### Features -| Feature | Support| Notes | -|:------------------------------| :-----:|:-------------------------------------------------------------------------------| -| Full Refresh Sync | ✅ | Existing keys in the Redis cache are deleted and replaced with the new keys. | -| Incremental - Append Sync | ✅ | New keys are inserted in the same keyspace without touching the existing keys. | -| Incremental - Deduped History | ❌ | | -| Namespaces | ✅ | Namespaces will be used to determine the correct Redis key. | -| SSH Tunnel Connection | ✅ | | -| SSL connection | ✅ | | - +| Feature | Support | Notes | +| :----------------------------- | :-----: | :----------------------------------------------------------------------------- | +| Full Refresh Sync | ✅ | Existing keys in the Redis cache are deleted and replaced with the new keys. | +| Incremental - Append Sync | ✅ | New keys are inserted in the same keyspace without touching the existing keys. | +| Incremental - Append + Deduped | ❌ | | +| Namespaces | ✅ | Namespaces will be used to determine the correct Redis key. | +| SSH Tunnel Connection | ✅ | | +| SSL connection | ✅ | | ### Performance considerations -As long as you have the necessary memory capacity for your cache, Redis should be able to handle even millions of records without any issues since the data is stored in-memory with the option to +As long as you have the necessary memory capacity for your cache, Redis should be able to handle even millions of records without any issues since the data is stored in-memory with the option to save snapshots periodically on disk. ## Getting started ### Requirements -* The connector is fully compatible with redis 2.8.x, 3.x.x and above -* Configuration - * **_host_**: Hostname or address of the Redis server where to connect. - * **_port_**: Port of the Redis server where to connect. - * **_username_**: Username for authenticating with the Redis server. - * **_password_**: Password for authenticating with the Redis server. - * **_cache_type_**: Redis cache/data type to use when storing the incoming messages. i.e hash,set,list,stream,etc. -* SSL toggle the switch to connect using SSL -* For SSL Modes, select: - - **disable** to disable encrypted communication between Airbyte and the source - - **verify-full** to always require encryption and verify the identity of the source +- The connector is fully compatible with redis 2.8.x, 3.x.x and above +- Configuration + - **_host_**: Hostname or address of the Redis server where to connect. + - **_port_**: Port of the Redis server where to connect. + - **_username_**: Username for authenticating with the Redis server. + - **_password_**: Password for authenticating with the Redis server. + - **_cache_type_**: Redis cache/data type to use when storing the incoming messages. i.e hash,set,list,stream,etc. +- SSL toggle the switch to connect using SSL +- For SSL Modes, select: + - **disable** to disable encrypted communication between Airbyte and the source + - **verify-full** to always require encryption and verify the identity of the source ### Setup guide @@ -59,7 +57,7 @@ save snapshots periodically on disk. ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:-----------------------------------------------------------|:-----------------| -| 0.1.4 | 2022-10-25 | [\#18358](https://github.com/airbytehq/airbyte/pull/18358) | TLS support | -| 0.1.3 | 2022-10-18 | [\#17951](https://github.com/airbytehq/airbyte/pull/17951) | Add SSH support | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :--------------------------------------------------------- | :-------------- | +| 0.1.4 | 2022-10-25 | [\#18358](https://github.com/airbytehq/airbyte/pull/18358) | TLS support | +| 0.1.3 | 2022-10-18 | [\#17951](https://github.com/airbytehq/airbyte/pull/17951) | Add SSH support | diff --git a/docs/integrations/destinations/redpanda.md b/docs/integrations/destinations/redpanda.md index e1f4496a2e6e..410b17d453e8 100644 --- a/docs/integrations/destinations/redpanda.md +++ b/docs/integrations/destinations/redpanda.md @@ -27,12 +27,12 @@ Each record will contain in its key the uuid assigned by Airbyte, and in the val This section should contain a table with the following format: -| Feature | Supported?\(Yes/No\) | Notes | -| :---------------------------- | :------------------- | :------------------------------------------------------------------------------------------- | -| Full Refresh Sync | No | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | No | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | Yes | | ### Performance considerations diff --git a/docs/integrations/destinations/redshift.md b/docs/integrations/destinations/redshift.md index f6da251d20ca..9f3390662759 100644 --- a/docs/integrations/destinations/redshift.md +++ b/docs/integrations/destinations/redshift.md @@ -95,7 +95,7 @@ The Redshift destination connector supports the following [sync modes](https://d - Full Refresh - Incremental - Append Sync -- Incremental - Deduped History +- Incremental - Append + Deduped ## Performance considerations @@ -155,7 +155,7 @@ Each stream will be output into its own raw table in Redshift. Each table will c ## Changelog | Version | Date | Pull Request | Subject | -| :------ | :--------- | :--------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| :------ | :--------- | :--------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --- | | 0.6.2 | 2023-07-24 | [\#28618](https://github.com/airbytehq/airbyte/pull/28618) | Add hooks in preparation for destinations v2 implementation | | 0.6.1 | 2023-07-14 | [\#28345](https://github.com/airbytehq/airbyte/pull/28345) | Increment patch to trigger a rebuild | | 0.6.0 | 2023-06-27 | [\#27993](https://github.com/airbytehq/airbyte/pull/27993) | destination-redshift will fail syncs if records or properties are too large, rather than silently skipping records and succeeding | diff --git a/docs/integrations/destinations/rockset.md b/docs/integrations/destinations/rockset.md index 4e2597bcf0d3..0ab1709a68b6 100644 --- a/docs/integrations/destinations/rockset.md +++ b/docs/integrations/destinations/rockset.md @@ -6,12 +6,12 @@ ## Features -| Feature | Support | -| :---------------------------- | :-----: | -| Full Refresh Sync | ✅ | -| Incremental - Append Sync | ✅ | -| Incremental - Deduped History | ❌ | -| Namespaces | ❌ | +| Feature | Support | +| :----------------------------- | :-----: | +| Full Refresh Sync | ✅ | +| Incremental - Append Sync | ✅ | +| Incremental - Append + Deduped | ❌ | +| Namespaces | ❌ | ## Troubleshooting diff --git a/docs/integrations/destinations/s3-glue.md b/docs/integrations/destinations/s3-glue.md index 92ab54fb9fe0..d96ad8468b2f 100644 --- a/docs/integrations/destinations/s3-glue.md +++ b/docs/integrations/destinations/s3-glue.md @@ -175,12 +175,12 @@ A data sync may create multiple files as the output files can be partitioned by ## Supported sync modes -| Feature | Support | Notes | -| :---------------------------- | :-----: | :------------------------------------------------------------------------------------------- | -| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | -| Incremental - Append Sync | ✅ | | -| Incremental - Deduped History | ❌ | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | +| Feature | Support | Notes | +| :----------------------------- | :-----: | :----------------------------------------------------------------------------------- | +| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | +| Incremental - Append Sync | ✅ | | +| Incremental - Append + Deduped | ❌ | | +| Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | The Airbyte S3 destination allows you to sync data to AWS S3 or Minio S3. Each stream is written to its own directory under the bucket. ⚠️ Please note that under "Full Refresh Sync" mode, data in the configured bucket and path will be wiped out before each sync. We recommend you to provision a dedicated S3 resource for this sync to prevent unexpected data deletion from misconfiguration. ⚠️ @@ -244,12 +244,12 @@ Output files can be compressed. The default option is GZIP compression. If compr ## CHANGELOG | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:----------------------------------------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :-------------------------------------------------------------------------------------- | | 0.1.7 | 2023-05-01 | [25724](https://github.com/airbytehq/airbyte/pull/25724) | Fix decimal type creation syntax to avoid overflow | | 0.1.6 | 2023-04-13 | [25178](https://github.com/airbytehq/airbyte/pull/25178) | Fix decimal precision and scale to allow for a wider range of numeric values | | 0.1.5 | 2023-04-11 | [25048](https://github.com/airbytehq/airbyte/pull/25048) | Fix config schema to support new JSONL flattening configuration interface | | 0.1.4 | 2023-03-10 | [23950](https://github.com/airbytehq/airbyte/pull/23950) | Fix schema syntax error for struct fields and handle missing `items` in array fields | -| 0.1.3 | 2023-02-10 | [22822](https://github.com/airbytehq/airbyte/pull/22822) | Fix data type for _ab_emitted_at column in table definition | +| 0.1.3 | 2023-02-10 | [22822](https://github.com/airbytehq/airbyte/pull/22822) | Fix data type for \_ab_emitted_at column in table definition | | 0.1.2 | 2023-02-01 | [22220](https://github.com/airbytehq/airbyte/pull/22220) | Fix race condition in test, table metadata, add Airbyte sync fields to table definition | | 0.1.1 | 2022-12-13 | [19907](https://github.com/airbytehq/airbyte/pull/19907) | Fix parsing empty object in schema | | 0.1.0 | 2022-11-17 | [18695](https://github.com/airbytehq/airbyte/pull/18695) | Initial Commit | diff --git a/docs/integrations/destinations/s3.md b/docs/integrations/destinations/s3.md index 8d2af620c3cb..d59e51caf063 100644 --- a/docs/integrations/destinations/s3.md +++ b/docs/integrations/destinations/s3.md @@ -171,12 +171,12 @@ A data sync may create multiple files as the output files can be partitioned by ## Supported sync modes -| Feature | Support | Notes | -| :---------------------------- | :-----: | :------------------------------------------------------------------------------------------- | -| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | -| Incremental - Append Sync | ✅ | | -| Incremental - Deduped History | ❌ | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | +| Feature | Support | Notes | +| :----------------------------- | :-----: | :----------------------------------------------------------------------------------- | +| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | +| Incremental - Append Sync | ✅ | | +| Incremental - Append + Deduped | ❌ | | +| Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | The Airbyte S3 destination allows you to sync data to AWS S3 or Minio S3. Each stream is written to its own directory under the bucket. ⚠️ Please note that under "Full Refresh Sync" mode, data in the configured bucket and path will be wiped out before each sync. We recommend you to provision a dedicated S3 resource for this sync to prevent unexpected data deletion from misconfiguration. ⚠️ @@ -343,64 +343,64 @@ In order for everything to work correctly, it is also necessary that the user wh ## CHANGELOG -| Version | Date | Pull Request | Subject | -|:--------| :--------- |:--------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------| -| 0.5.1 | 2023-06-26 | [#27786](https://github.com/airbytehq/airbyte/pull/27786) | Fix build | -| 0.5.0 | 2023-06-26 | [#27725](https://github.com/airbytehq/airbyte/pull/27725) | License Update: Elv2 | -| 0.4.2 | 2023-06-21 | [#27555](https://github.com/airbytehq/airbyte/pull/27555) | Reduce image size | -| 0.4.1 | 2023-05-18 | [#26284](https://github.com/airbytehq/airbyte/pull/26284) | Fix: reenable LZO compression for Parquet output | -| 0.4.0 | 2023-04-28 | [#25570](https://github.com/airbytehq/airbyte/pull/25570) | Fix: all integer schemas should be converted to Avro longs | -| 0.3.25 | 2023-04-27 | [#25346](https://github.com/airbytehq/airbyte/pull/25346) | Internal code cleanup | -| 0.3.23 | 2023-03-30 | [#24736](https://github.com/airbytehq/airbyte/pull/24736) | Improve behavior when throttled by AWS API | -| 0.3.22 | 2023-03-17 | [#23788](https://github.com/airbytehq/airbyte/pull/23788) | S3-Parquet: added handler to process null values in arrays | -| 0.3.21 | 2023-03-10 | [#23466](https://github.com/airbytehq/airbyte/pull/23466) | Changed S3 Avro type from Int to Long | -| 0.3.20 | 2023-02-23 | [#21355](https://github.com/airbytehq/airbyte/pull/21355) | Add root level flattening option to JSONL output. | -| 0.3.19 | 2023-01-18 | [#21087](https://github.com/airbytehq/airbyte/pull/21087) | Wrap Authentication Errors as Config Exceptions | -| 0.3.18 | 2022-12-15 | [\#20088](https://github.com/airbytehq/airbyte/pull/20088) | New data type support v0/v1 | -| 0.3.17 | 2022-10-15 | [\#18031](https://github.com/airbytehq/airbyte/pull/18031) | Fix integration tests to use bucket path | -| 0.3.16 | 2022-10-03 | [\#17340](https://github.com/airbytehq/airbyte/pull/17340) | Enforced encrypted only traffic to S3 buckets and check logic | -| 0.3.15 | 2022-09-01 | [\#16243](https://github.com/airbytehq/airbyte/pull/16243) | Fix Json to Avro conversion when there is field name clash from combined restrictions (`anyOf`, `oneOf`, `allOf` fields). | -| 0.3.14 | 2022-08-24 | [\#15207](https://github.com/airbytehq/airbyte/pull/15207) | Fix S3 bucket path to be used for check. | -| 0.3.13 | 2022-08-09 | [\#15394](https://github.com/airbytehq/airbyte/pull/15394) | Added LZO compression support to Parquet format | -| 0.3.12 | 2022-08-05 | [\#14801](https://github.com/airbytehq/airbyte/pull/14801) | Fix multiple log bindings | -| 0.3.11 | 2022-07-15 | [\#14494](https://github.com/airbytehq/airbyte/pull/14494) | Make S3 output filename configurable. | -| 0.3.10 | 2022-06-30 | [\#14332](https://github.com/airbytehq/airbyte/pull/14332) | Change INSTANCE_PROFILE to use `AWSDefaultProfileCredential`, which supports more authentications on AWS | -| 0.3.9 | 2022-06-24 | [\#14114](https://github.com/airbytehq/airbyte/pull/14114) | Remove "additionalProperties": false from specs for connectors with staging | -| 0.3.8 | 2022-06-17 | [\#13753](https://github.com/airbytehq/airbyte/pull/13753) | Deprecate and remove PART_SIZE_MB fields from connectors based on StreamTransferManager | -| 0.3.7 | 2022-06-14 | [\#13483](https://github.com/airbytehq/airbyte/pull/13483) | Added support for int, long, float data types to Avro/Parquet formats. | -| 0.3.6 | 2022-05-19 | [\#13043](https://github.com/airbytehq/airbyte/pull/13043) | Destination S3: Remove configurable part size. | -| 0.3.5 | 2022-05-12 | [\#12797](https://github.com/airbytehq/airbyte/pull/12797) | Update spec to replace markdown. | -| 0.3.4 | 2022-05-04 | [\#12578](https://github.com/airbytehq/airbyte/pull/12578) | In JSON to Avro conversion, log JSON field values that do not follow Avro schema for debugging. | -| 0.3.3 | 2022-04-20 | [\#12167](https://github.com/airbytehq/airbyte/pull/12167) | Add gzip compression option for CSV and JSONL formats. | -| 0.3.2 | 2022-04-22 | [\#11795](https://github.com/airbytehq/airbyte/pull/11795) | Fix the connection check to verify the provided bucket path. | -| 0.3.1 | 2022-04-05 | [\#11728](https://github.com/airbytehq/airbyte/pull/11728) | Properly clean-up bucket when running OVERWRITE sync mode | -| 0.3.0 | 2022-04-04 | [\#11666](https://github.com/airbytehq/airbyte/pull/11666) | 0.2.12 actually has breaking changes since files are compressed by default, this PR also fixes the naming to be more compatible with older versions. | -| 0.2.13 | 2022-03-29 | [\#11496](https://github.com/airbytehq/airbyte/pull/11496) | Fix S3 bucket path to be included with S3 bucket format | -| 0.2.12 | 2022-03-28 | [\#11294](https://github.com/airbytehq/airbyte/pull/11294) | Change to serialized buffering strategy to reduce memory consumption | -| 0.2.11 | 2022-03-23 | [\#11173](https://github.com/airbytehq/airbyte/pull/11173) | Added support for AWS Glue crawler | -| 0.2.10 | 2022-03-07 | [\#10856](https://github.com/airbytehq/airbyte/pull/10856) | `check` method now tests for listObjects permissions on the target bucket | -| 0.2.7 | 2022-02-14 | [\#10318](https://github.com/airbytehq/airbyte/pull/10318) | Prevented double slashes in S3 destination path | -| 0.2.6 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | -| 0.2.5 | 2022-01-13 | [\#9399](https://github.com/airbytehq/airbyte/pull/9399) | Use instance profile authentication if credentials are not provided | -| 0.2.4 | 2022-01-12 | [\#9415](https://github.com/airbytehq/airbyte/pull/9415) | BigQuery Destination : Fix GCS processing of Facebook data | -| 0.2.3 | 2022-01-11 | [\#9367](https://github.com/airbytehq/airbyte/pull/9367) | Avro & Parquet: support array field with unknown item type; default any improperly typed field to string. | -| 0.2.2 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | -| 0.2.1 | 2021-12-20 | [\#8974](https://github.com/airbytehq/airbyte/pull/8974) | Release a new version to ensure there is no excessive logging. | -| 0.2.0 | 2021-12-15 | [\#8607](https://github.com/airbytehq/airbyte/pull/8607) | Change the output filename for CSV files - it's now `bucketPath/namespace/streamName/timestamp_epochMillis_randomUuid.csv` | -| 0.1.16 | 2021-12-10 | [\#8562](https://github.com/airbytehq/airbyte/pull/8562) | Swap dependencies with destination-jdbc. | -| 0.1.15 | 2021-12-03 | [\#8501](https://github.com/airbytehq/airbyte/pull/8501) | Remove excessive logging for Avro and Parquet invalid date strings. | -| 0.1.14 | 2021-11-09 | [\#7732](https://github.com/airbytehq/airbyte/pull/7732) | Support timestamp in Avro and Parquet | -| 0.1.13 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | -| 0.1.12 | 2021-09-13 | [\#5720](https://github.com/airbytehq/airbyte/issues/5720) | Added configurable block size for stream. Each stream is limited to 10,000 by S3 | -| 0.1.11 | 2021-09-10 | [\#5729](https://github.com/airbytehq/airbyte/pull/5729) | For field names that start with a digit, a `_` will be appended at the beginning for the`Parquet` and `Avro` formats. | -| 0.1.10 | 2021-08-17 | [\#4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | -| 0.1.9 | 2021-07-12 | [\#4666](https://github.com/airbytehq/airbyte/pull/4666) | Fix MinIO output for Parquet format. | -| 0.1.8 | 2021-07-07 | [\#4613](https://github.com/airbytehq/airbyte/pull/4613) | Patched schema converter to support combined restrictions. | -| 0.1.7 | 2021-06-23 | [\#4227](https://github.com/airbytehq/airbyte/pull/4227) | Added Avro and JSONL output. | -| 0.1.6 | 2021-06-16 | [\#4130](https://github.com/airbytehq/airbyte/pull/4130) | Patched the check to verify prefix access instead of full-bucket access. | -| 0.1.5 | 2021-06-14 | [\#3908](https://github.com/airbytehq/airbyte/pull/3908) | Fixed default `max_padding_size_mb` in `spec.json`. | -| 0.1.4 | 2021-06-14 | [\#3908](https://github.com/airbytehq/airbyte/pull/3908) | Added Parquet output. | -| 0.1.3 | 2021-06-13 | [\#4038](https://github.com/airbytehq/airbyte/pull/4038) | Added support for alternative S3. | -| 0.1.2 | 2021-06-10 | [\#4029](https://github.com/airbytehq/airbyte/pull/4029) | Fixed `_airbyte_emitted_at` field to be a UTC instead of local timestamp for consistency. | -| 0.1.1 | 2021-06-09 | [\#3973](https://github.com/airbytehq/airbyte/pull/3973) | Added `AIRBYTE_ENTRYPOINT` in base Docker image for Kubernetes support. | -| 0.1.0 | 2021-06-03 | [\#3672](https://github.com/airbytehq/airbyte/pull/3672) | Initial release with CSV output. | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :--------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------- | +| 0.5.1 | 2023-06-26 | [#27786](https://github.com/airbytehq/airbyte/pull/27786) | Fix build | +| 0.5.0 | 2023-06-26 | [#27725](https://github.com/airbytehq/airbyte/pull/27725) | License Update: Elv2 | +| 0.4.2 | 2023-06-21 | [#27555](https://github.com/airbytehq/airbyte/pull/27555) | Reduce image size | +| 0.4.1 | 2023-05-18 | [#26284](https://github.com/airbytehq/airbyte/pull/26284) | Fix: reenable LZO compression for Parquet output | +| 0.4.0 | 2023-04-28 | [#25570](https://github.com/airbytehq/airbyte/pull/25570) | Fix: all integer schemas should be converted to Avro longs | +| 0.3.25 | 2023-04-27 | [#25346](https://github.com/airbytehq/airbyte/pull/25346) | Internal code cleanup | +| 0.3.23 | 2023-03-30 | [#24736](https://github.com/airbytehq/airbyte/pull/24736) | Improve behavior when throttled by AWS API | +| 0.3.22 | 2023-03-17 | [#23788](https://github.com/airbytehq/airbyte/pull/23788) | S3-Parquet: added handler to process null values in arrays | +| 0.3.21 | 2023-03-10 | [#23466](https://github.com/airbytehq/airbyte/pull/23466) | Changed S3 Avro type from Int to Long | +| 0.3.20 | 2023-02-23 | [#21355](https://github.com/airbytehq/airbyte/pull/21355) | Add root level flattening option to JSONL output. | +| 0.3.19 | 2023-01-18 | [#21087](https://github.com/airbytehq/airbyte/pull/21087) | Wrap Authentication Errors as Config Exceptions | +| 0.3.18 | 2022-12-15 | [\#20088](https://github.com/airbytehq/airbyte/pull/20088) | New data type support v0/v1 | +| 0.3.17 | 2022-10-15 | [\#18031](https://github.com/airbytehq/airbyte/pull/18031) | Fix integration tests to use bucket path | +| 0.3.16 | 2022-10-03 | [\#17340](https://github.com/airbytehq/airbyte/pull/17340) | Enforced encrypted only traffic to S3 buckets and check logic | +| 0.3.15 | 2022-09-01 | [\#16243](https://github.com/airbytehq/airbyte/pull/16243) | Fix Json to Avro conversion when there is field name clash from combined restrictions (`anyOf`, `oneOf`, `allOf` fields). | +| 0.3.14 | 2022-08-24 | [\#15207](https://github.com/airbytehq/airbyte/pull/15207) | Fix S3 bucket path to be used for check. | +| 0.3.13 | 2022-08-09 | [\#15394](https://github.com/airbytehq/airbyte/pull/15394) | Added LZO compression support to Parquet format | +| 0.3.12 | 2022-08-05 | [\#14801](https://github.com/airbytehq/airbyte/pull/14801) | Fix multiple log bindings | +| 0.3.11 | 2022-07-15 | [\#14494](https://github.com/airbytehq/airbyte/pull/14494) | Make S3 output filename configurable. | +| 0.3.10 | 2022-06-30 | [\#14332](https://github.com/airbytehq/airbyte/pull/14332) | Change INSTANCE_PROFILE to use `AWSDefaultProfileCredential`, which supports more authentications on AWS | +| 0.3.9 | 2022-06-24 | [\#14114](https://github.com/airbytehq/airbyte/pull/14114) | Remove "additionalProperties": false from specs for connectors with staging | +| 0.3.8 | 2022-06-17 | [\#13753](https://github.com/airbytehq/airbyte/pull/13753) | Deprecate and remove PART_SIZE_MB fields from connectors based on StreamTransferManager | +| 0.3.7 | 2022-06-14 | [\#13483](https://github.com/airbytehq/airbyte/pull/13483) | Added support for int, long, float data types to Avro/Parquet formats. | +| 0.3.6 | 2022-05-19 | [\#13043](https://github.com/airbytehq/airbyte/pull/13043) | Destination S3: Remove configurable part size. | +| 0.3.5 | 2022-05-12 | [\#12797](https://github.com/airbytehq/airbyte/pull/12797) | Update spec to replace markdown. | +| 0.3.4 | 2022-05-04 | [\#12578](https://github.com/airbytehq/airbyte/pull/12578) | In JSON to Avro conversion, log JSON field values that do not follow Avro schema for debugging. | +| 0.3.3 | 2022-04-20 | [\#12167](https://github.com/airbytehq/airbyte/pull/12167) | Add gzip compression option for CSV and JSONL formats. | +| 0.3.2 | 2022-04-22 | [\#11795](https://github.com/airbytehq/airbyte/pull/11795) | Fix the connection check to verify the provided bucket path. | +| 0.3.1 | 2022-04-05 | [\#11728](https://github.com/airbytehq/airbyte/pull/11728) | Properly clean-up bucket when running OVERWRITE sync mode | +| 0.3.0 | 2022-04-04 | [\#11666](https://github.com/airbytehq/airbyte/pull/11666) | 0.2.12 actually has breaking changes since files are compressed by default, this PR also fixes the naming to be more compatible with older versions. | +| 0.2.13 | 2022-03-29 | [\#11496](https://github.com/airbytehq/airbyte/pull/11496) | Fix S3 bucket path to be included with S3 bucket format | +| 0.2.12 | 2022-03-28 | [\#11294](https://github.com/airbytehq/airbyte/pull/11294) | Change to serialized buffering strategy to reduce memory consumption | +| 0.2.11 | 2022-03-23 | [\#11173](https://github.com/airbytehq/airbyte/pull/11173) | Added support for AWS Glue crawler | +| 0.2.10 | 2022-03-07 | [\#10856](https://github.com/airbytehq/airbyte/pull/10856) | `check` method now tests for listObjects permissions on the target bucket | +| 0.2.7 | 2022-02-14 | [\#10318](https://github.com/airbytehq/airbyte/pull/10318) | Prevented double slashes in S3 destination path | +| 0.2.6 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | +| 0.2.5 | 2022-01-13 | [\#9399](https://github.com/airbytehq/airbyte/pull/9399) | Use instance profile authentication if credentials are not provided | +| 0.2.4 | 2022-01-12 | [\#9415](https://github.com/airbytehq/airbyte/pull/9415) | BigQuery Destination : Fix GCS processing of Facebook data | +| 0.2.3 | 2022-01-11 | [\#9367](https://github.com/airbytehq/airbyte/pull/9367) | Avro & Parquet: support array field with unknown item type; default any improperly typed field to string. | +| 0.2.2 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | +| 0.2.1 | 2021-12-20 | [\#8974](https://github.com/airbytehq/airbyte/pull/8974) | Release a new version to ensure there is no excessive logging. | +| 0.2.0 | 2021-12-15 | [\#8607](https://github.com/airbytehq/airbyte/pull/8607) | Change the output filename for CSV files - it's now `bucketPath/namespace/streamName/timestamp_epochMillis_randomUuid.csv` | +| 0.1.16 | 2021-12-10 | [\#8562](https://github.com/airbytehq/airbyte/pull/8562) | Swap dependencies with destination-jdbc. | +| 0.1.15 | 2021-12-03 | [\#8501](https://github.com/airbytehq/airbyte/pull/8501) | Remove excessive logging for Avro and Parquet invalid date strings. | +| 0.1.14 | 2021-11-09 | [\#7732](https://github.com/airbytehq/airbyte/pull/7732) | Support timestamp in Avro and Parquet | +| 0.1.13 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | +| 0.1.12 | 2021-09-13 | [\#5720](https://github.com/airbytehq/airbyte/issues/5720) | Added configurable block size for stream. Each stream is limited to 10,000 by S3 | +| 0.1.11 | 2021-09-10 | [\#5729](https://github.com/airbytehq/airbyte/pull/5729) | For field names that start with a digit, a `_` will be appended at the beginning for the`Parquet` and `Avro` formats. | +| 0.1.10 | 2021-08-17 | [\#4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | +| 0.1.9 | 2021-07-12 | [\#4666](https://github.com/airbytehq/airbyte/pull/4666) | Fix MinIO output for Parquet format. | +| 0.1.8 | 2021-07-07 | [\#4613](https://github.com/airbytehq/airbyte/pull/4613) | Patched schema converter to support combined restrictions. | +| 0.1.7 | 2021-06-23 | [\#4227](https://github.com/airbytehq/airbyte/pull/4227) | Added Avro and JSONL output. | +| 0.1.6 | 2021-06-16 | [\#4130](https://github.com/airbytehq/airbyte/pull/4130) | Patched the check to verify prefix access instead of full-bucket access. | +| 0.1.5 | 2021-06-14 | [\#3908](https://github.com/airbytehq/airbyte/pull/3908) | Fixed default `max_padding_size_mb` in `spec.json`. | +| 0.1.4 | 2021-06-14 | [\#3908](https://github.com/airbytehq/airbyte/pull/3908) | Added Parquet output. | +| 0.1.3 | 2021-06-13 | [\#4038](https://github.com/airbytehq/airbyte/pull/4038) | Added support for alternative S3. | +| 0.1.2 | 2021-06-10 | [\#4029](https://github.com/airbytehq/airbyte/pull/4029) | Fixed `_airbyte_emitted_at` field to be a UTC instead of local timestamp for consistency. | +| 0.1.1 | 2021-06-09 | [\#3973](https://github.com/airbytehq/airbyte/pull/3973) | Added `AIRBYTE_ENTRYPOINT` in base Docker image for Kubernetes support. | +| 0.1.0 | 2021-06-03 | [\#3672](https://github.com/airbytehq/airbyte/pull/3672) | Initial release with CSV output. | diff --git a/docs/integrations/destinations/scylla.md b/docs/integrations/destinations/scylla.md index dfd9a621b702..7af55bda5341 100644 --- a/docs/integrations/destinations/scylla.md +++ b/docs/integrations/destinations/scylla.md @@ -1,9 +1,11 @@ # Scylla ## Prerequisites + - For Airbyte Open Source users using the [Postgres](https://docs.airbyte.com/integrations/sources/postgres) source connector, [upgrade](https://docs.airbyte.com/operator-guides/upgrading-airbyte/) your Airbyte platform to version `v0.40.0-alpha` or newer and upgrade your Scylla connector to version `0.1.3` or newer ## Sync overview + ### Output schema The incoming airbyte data is structured in keyspaces and tables and is partitioned and replicated across different nodes @@ -11,18 +13,18 @@ in the cluster. This connector maps an incoming `stream` to a Scylla `table` and Fields in the airbyte message become different columns in the Scylla tables. Each table will contain the following columns. -* `_airbyte_ab_id`: A random uuid generated to be used as a partition key. -* `_airbyte_emitted_at`: a timestamp representing when the event was received from the data source. -* `_airbyte_data`: a json text representing the extracted data. +- `_airbyte_ab_id`: A random uuid generated to be used as a partition key. +- `_airbyte_emitted_at`: a timestamp representing when the event was received from the data source. +- `_airbyte_data`: a json text representing the extracted data. ### Features -| Feature | Support | Notes | -| :--- | :---: | :--- | -| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured DynamoDB table. | -| Incremental - Append Sync | ✅ | | -| Incremental - Deduped History | ❌ | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | ✅ | Namespace will be used as part of the table name. | +| Feature | Support | Notes | +| :----------------------------- | :-----: | :-------------------------------------------------------------------------------------- | +| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured DynamoDB table. | +| Incremental - Append Sync | ✅ | | +| Incremental - Append + Deduped | ❌ | | +| Namespaces | ✅ | Namespace will be used as part of the table name. | ### Performance considerations @@ -34,18 +36,19 @@ and handle any amount of data from the connector. ### Requirements -* Driver compatibility: NA -* Configuration - * Keyspace [default keyspace to use when writing data] - * Username [authentication username] - * Password [authentication password] - * Address [cluster address] - * Port [default: 9042] - * Replication [optional] [default: 1] +- Driver compatibility: NA +- Configuration + - Keyspace [default keyspace to use when writing data] + - Username [authentication username] + - Password [authentication password] + - Address [cluster address] + - Port [default: 9042] + - Replication [optional] [default: 1] ### Setup guide + ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------| :--- |:----------------------------------------------------------------------------------------------------| -| 0.1.3 | 2022-08-10 | [153999](https://github.com/airbytehq/airbyte/pull/15399) | handling per-stream state | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :-------------------------------------------------------- | :------------------------ | +| 0.1.3 | 2022-08-10 | [153999](https://github.com/airbytehq/airbyte/pull/15399) | handling per-stream state | diff --git a/docs/integrations/destinations/selectdb.md b/docs/integrations/destinations/selectdb.md index 626d7e15ecd2..032fdae475a7 100644 --- a/docs/integrations/destinations/selectdb.md +++ b/docs/integrations/destinations/selectdb.md @@ -12,19 +12,16 @@ Each stream will be output into its own table in SelectDB. Each table will conta - `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. The column type in Doris is `BIGINT`. - `_airbyte_data`: a json blob representing with the event data. The column type in SelectDB is `String`. - ### Features This section should contain a table with the following format: -| Feature | Supported?(Yes/No) | Notes | -| :------------------------------------- | :----------------- | :----------------------- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | it will soon be realized | -| For databases, WAL/Logical replication | Yes | | - - +| Feature | Supported?(Yes/No) | Notes | +| :------------------------------------- | :----------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| For databases, WAL/Logical replication | Yes | | ### Performance considerations @@ -37,7 +34,7 @@ Importing multiple tables will generate multiple transactions, which should be s To use the SelectDB destination, you'll need: -- A SelectDB server and your +- A SelectDB server and your - Make sure your SelectDB http port and mysql query port can be accessed by Airbyte. - Make sure your SelectDB host can be accessed by Airbyte. if use a public network to access SelectDB, please ensure that your airbyte public network IP is in the ip whitelist of your SelectDB. - Make sure your SelectDB user with read/write permissions on certain tables. @@ -58,7 +55,6 @@ You need to prepare database that will be used to store synced data from Airbyte ## CHANGELOG -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.1.0 | 2023-04-03 | [\#20881](https://github.com/airbytehq/airbyte/pull/20881) | Initial release SelectDB Destination | - +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :--------------------------------------------------------- | :----------------------------------- | +| 0.1.0 | 2023-04-03 | [\#20881](https://github.com/airbytehq/airbyte/pull/20881) | Initial release SelectDB Destination | diff --git a/docs/integrations/destinations/snowflake.md b/docs/integrations/destinations/snowflake.md index 7f7b77894559..84a84fb1ff46 100644 --- a/docs/integrations/destinations/snowflake.md +++ b/docs/integrations/destinations/snowflake.md @@ -244,7 +244,7 @@ The Snowflake destination supports the following sync modes: - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental Sync - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Snowflake tutorials @@ -270,7 +270,7 @@ Otherwise, make sure to grant the role the required permissions in the desired n ## Changelog | Version | Date | Pull Request | Subject | -|:----------------|:-----------|:-----------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------| +| :-------------- | :--------- | :--------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | | 1.2.8 | 2023-08-03 | [\#29047](https://github.com/airbytehq/airbyte/pull/29047) | Avoid logging record if the format is invalid | | 1.2.7 | 2023-08-02 | [\#28976](https://github.com/airbytehq/airbyte/pull/28976) | Fix composite PK handling in v1 mode | | 1.2.6 | 2023-08-01 | [\#28618](https://github.com/airbytehq/airbyte/pull/28618) | Reduce logging noise | diff --git a/docs/integrations/destinations/sqlite.md b/docs/integrations/destinations/sqlite.md index 7c35543f5cc2..eb266b61eee8 100644 --- a/docs/integrations/destinations/sqlite.md +++ b/docs/integrations/destinations/sqlite.md @@ -22,18 +22,18 @@ Please make sure that Docker Desktop has access to `/tmp` (and `/private` on a M Each stream will be output into its own table `_airbyte_raw_{stream_name}`. Each table will contain 3 columns: -* `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. -* `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. -* `_airbyte_data`: a json blob representing with the event data. +- `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. +- `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. +- `_airbyte_data`: a json blob representing with the event data. #### Features -| Feature | Supported | | -| :--- | :--- | :--- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | No | | +| Feature | Supported | | +| :----------------------------- | :-------- | :-- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | No | | #### Performance considerations @@ -49,13 +49,13 @@ The local mount is mounted by Docker onto `LOCAL_ROOT`. This means the `/local` ### Example: -* If `destination_path` is set to `/local/sqlite.db` -* the local mount is using the `/tmp/airbyte_local` default -* then all data will be written to `/tmp/airbyte_local/sqlite.db`. +- If `destination_path` is set to `/local/sqlite.db` +- the local mount is using the `/tmp/airbyte_local` default +- then all data will be written to `/tmp/airbyte_local/sqlite.db`. ## Access Replicated Data Files -If your Airbyte instance is running on the same computer that you are navigating with, you can open your browser and enter [file:///tmp/airbyte\_local](file:///tmp/airbyte_local) to look at the replicated data locally. If the first approach fails or if your Airbyte instance is running on a remote server, follow the following steps to access the replicated files: +If your Airbyte instance is running on the same computer that you are navigating with, you can open your browser and enter [file:///tmp/airbyte_local](file:///tmp/airbyte_local) to look at the replicated data locally. If the first approach fails or if your Airbyte instance is running on a remote server, follow the following steps to access the replicated files: 1. Access the scheduler container using `docker exec -it airbyte-server bash` 2. Navigate to the default local mount using `cd /tmp/airbyte_local` @@ -72,6 +72,6 @@ Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have ## Changelog -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.1.0 | 2022-07-25 | [15018](https://github.com/airbytehq/airbyte/pull/15018) | New SQLite destination | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :--------------------- | +| 0.1.0 | 2022-07-25 | [15018](https://github.com/airbytehq/airbyte/pull/15018) | New SQLite destination | diff --git a/docs/integrations/destinations/starburst-galaxy.md b/docs/integrations/destinations/starburst-galaxy.md index 710cbb2f0883..2fdfc77cd0b5 100644 --- a/docs/integrations/destinations/starburst-galaxy.md +++ b/docs/integrations/destinations/starburst-galaxy.md @@ -2,47 +2,47 @@ ## Overview -The Starburst Galaxy destination syncs data to Starburst Galaxy [great lake catalogs](https://docs.starburst.io/starburst-galaxy/sql/great-lakes.html) +The Starburst Galaxy destination syncs data to Starburst Galaxy [great lake catalogs](https://docs.starburst.io/starburst-galaxy/sql/great-lakes.html) in [Apache Iceberg](https://iceberg.apache.org/) table format. Each stream is written to its own Iceberg table. ## Features -| Feature | Supported | Notes | -|:----------------|:---------:|:------------------------------------------------------------------------------------| -| Overwrite Sync | ✅ | **Warning**: this mode deletes all previously synced data in the destination table. | -| Append Sync | ✅ | | -| Deduped History | ❌ | | -| Namespaces | ✅ | | -| SSL | ✅ | SSL is enabled. | +| Feature | Supported | Notes | +| :--------------- | :-------: | :---------------------------------------------------------------------------------- | +| Overwrite Sync | ✅ | **Warning**: this mode deletes all previously synced data in the destination table. | +| Append Sync | ✅ | | +| Append + Deduped | ❌ | | +| Namespaces | ✅ | | +| SSL | ✅ | SSL is enabled. | ## Data storage -Starburst Galaxy supports various [object storages](https://docs.starburst.io/starburst-galaxy/catalogs/index.html#object-storage); +Starburst Galaxy supports various [object storages](https://docs.starburst.io/starburst-galaxy/catalogs/index.html#object-storage); however, only Amazon S3 is supported by this connector. ## Configuration -| Category | Parameter | Type | Notes | -|:---------------------------------|:------------------------------|:-------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Starburst Galaxy | `Hostname` | string | Required. Located in the **Connection info** section of the [view clusters](https://docs.starburst.io/starburst-galaxy/clusters/index.html#manage-clusters) pane in Starburst Galaxy. | -| | `Port` | string | Optional. Located in the **Connection info** section of the [view clusters](https://docs.starburst.io/starburst-galaxy/clusters/index.html#manage-clusters) pane in Starburst Galaxy. Defaults to `443`. | -| | `User` | string | Required. Galaxy user found in the **Connection info** section of the [view clusters](https://docs.starburst.io/starburst-galaxy/clusters/index.html#manage-clusters) pane in Starburst Galaxy. | -| | `Password` | string | Required. Password for the specified Galaxy user. | -| | `Amazon S3 catalog` | string | Required. Name of the [Amazon S3 catalog](https://docs.starburst.io/starburst-galaxy/catalogs/s3.html) created in the Galaxy domain. | -| | `Amazon S3 catalog schema` | string | Optional. The default Starburst Galaxy Amazon S3 catalog schema where tables are written to if the source does not specify a namespace. Each data stream is written to a table in this schema. Defaults to `public`. | -| Staging Object Store - Amazon S3 | `Bucket name` | string | Required. Name of the bucket where the staging data is stored. | -| | `Bucket path` | string | Required. Sets the subdirectory of the specified S3 bucket used for storing staging data. | -| | `Bucket region` | string | Required. Sets the region of the specified S3 bucket. | -| | `Access key` | string | Required. AWS/Minio credential. | -| | `Secret key` | string | Required. AWS/Minio credential. | -| General | `Purge staging Iceberg table` | boolean | Optional. Indicates that staging Iceberg table is purged after a data sync is complete. Enabled by default. Disable it for debugging purposes only. | +| Category | Parameter | Type | Notes | +| :------------------------------- | :---------------------------- | :-----: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Starburst Galaxy | `Hostname` | string | Required. Located in the **Connection info** section of the [view clusters](https://docs.starburst.io/starburst-galaxy/clusters/index.html#manage-clusters) pane in Starburst Galaxy. | +| | `Port` | string | Optional. Located in the **Connection info** section of the [view clusters](https://docs.starburst.io/starburst-galaxy/clusters/index.html#manage-clusters) pane in Starburst Galaxy. Defaults to `443`. | +| | `User` | string | Required. Galaxy user found in the **Connection info** section of the [view clusters](https://docs.starburst.io/starburst-galaxy/clusters/index.html#manage-clusters) pane in Starburst Galaxy. | +| | `Password` | string | Required. Password for the specified Galaxy user. | +| | `Amazon S3 catalog` | string | Required. Name of the [Amazon S3 catalog](https://docs.starburst.io/starburst-galaxy/catalogs/s3.html) created in the Galaxy domain. | +| | `Amazon S3 catalog schema` | string | Optional. The default Starburst Galaxy Amazon S3 catalog schema where tables are written to if the source does not specify a namespace. Each data stream is written to a table in this schema. Defaults to `public`. | +| Staging Object Store - Amazon S3 | `Bucket name` | string | Required. Name of the bucket where the staging data is stored. | +| | `Bucket path` | string | Required. Sets the subdirectory of the specified S3 bucket used for storing staging data. | +| | `Bucket region` | string | Required. Sets the region of the specified S3 bucket. | +| | `Access key` | string | Required. AWS/Minio credential. | +| | `Secret key` | string | Required. AWS/Minio credential. | +| General | `Purge staging Iceberg table` | boolean | Optional. Indicates that staging Iceberg table is purged after a data sync is complete. Enabled by default. Disable it for debugging purposes only. | ## Staging files ### S3 -Data streams are written to a temporary Iceberg table, and then loaded into Amazon S3 Starburst Galaxy catalog in the Iceberg table format. -Staging table is deleted after a sync is complete if the `Purge staging Iceberg table` is enabled. +Data streams are written to a temporary Iceberg table, and then loaded into Amazon S3 Starburst Galaxy catalog in the Iceberg table format. +Staging table is deleted after a sync is complete if the `Purge staging Iceberg table` is enabled. The following is an example of a full path for a staging file: ```text @@ -54,9 +54,9 @@ For example: ```text s3://galaxy_bucket/data_output_path/test_schema/_airbyte_tmp_qey_user ↑ ↑ ↑ ↑ - | | | temporary Iceberg table holding data + | | | temporary Iceberg table holding data | | source namespace or provided schema name - | | + | | | bucket path bucket name ``` @@ -69,21 +69,21 @@ Streams are synced in the Starburst Galaxy Amazon S3 catalog with Iceberg table Each table in the output schema has the following columns: -| Column | Type | Description | -|:--------------------------------------------------------------|:---------------------:|:-----------------------------------------------------------------------------------------------------| -| `_airbyte_ab_id` | varchar | UUID. | -| `_airbyte_emitted_at` | timestamp(6) | Data emission timestamp. | -| Data fields from the source stream | various | All the fields from the source stream will be populated as an individual column in the target table. | -| `_airbyte_additional_properties` | map(varchar, varchar) | Additional properties. | +| Column | Type | Description | +| :--------------------------------- | :-------------------: | :--------------------------------------------------------------------------------------------------- | +| `_airbyte_ab_id` | varchar | UUID. | +| `_airbyte_emitted_at` | timestamp(6) | Data emission timestamp. | +| Data fields from the source stream | various | All the fields from the source stream will be populated as an individual column in the target table. | +| `_airbyte_additional_properties` | map(varchar, varchar) | Additional properties. | -The Airbyte data stream's JSON schema is converted to an Avro schema. The JSON object is then converted to an Avro record; -the Avro record is written to a staging Iceberg table. As the data stream can be generated from any data source, -the JSON-to-Avro conversion process has arbitrary rules and limitations. +The Airbyte data stream's JSON schema is converted to an Avro schema. The JSON object is then converted to an Avro record; +the Avro record is written to a staging Iceberg table. As the data stream can be generated from any data source, +the JSON-to-Avro conversion process has arbitrary rules and limitations. Learn more about [how source data is converted to Avro](https://docs.airbyte.io/understanding-airbyte/json-avro-conversion). ### Datatype support -Learn more about [Starburst Galaxy Iceberg type mapping](https://docs.starburst.io/latest/connector/iceberg.html#iceberg-to-trino-type-mapping). +Learn more about [Starburst Galaxy Iceberg type mapping](https://docs.starburst.io/latest/connector/iceberg.html#iceberg-to-trino-type-mapping). ## Getting started @@ -97,5 +97,5 @@ Learn more about [Starburst Galaxy Iceberg type mapping](https://docs.starburst. ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:-----------------------------------------------------------|:------------------------| +| :------ | :--------- | :--------------------------------------------------------- | :---------------------- | | 0.0.1 | 2023-03-28 | [\#24620](https://github.com/airbytehq/airbyte/pull/24620) | Initial public release. | diff --git a/docs/integrations/destinations/streamr.md b/docs/integrations/destinations/streamr.md index 40f6a0c76a5c..97fbe15765df 100644 --- a/docs/integrations/destinations/streamr.md +++ b/docs/integrations/destinations/streamr.md @@ -2,12 +2,12 @@ ## Features -| Feature | Support | Notes | -| :---------------------------- | :-----: | :------------------------------------------------------------------------------------------- | -| Full Refresh Sync | ❌ | Warning: this mode deletes all previously synced data in the configured bucket path. | -| Incremental - Append Sync | ✅ | | -| Incremental - Deduped History | ❌ | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | +| Feature | Support | Notes | +| :----------------------------- | :-----: | :----------------------------------------------------------------------------------- | +| Full Refresh Sync | ❌ | Warning: this mode deletes all previously synced data in the configured bucket path. | +| Incremental - Append Sync | ✅ | | +| Incremental - Append + Deduped | ❌ | | +| Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | The Streamr destination allows you to sync data to Streamr - The decentralized real‑time data network. diff --git a/docs/integrations/destinations/teradata.md b/docs/integrations/destinations/teradata.md index 6ea038fce3ea..d8144b9a4623 100644 --- a/docs/integrations/destinations/teradata.md +++ b/docs/integrations/destinations/teradata.md @@ -6,17 +6,17 @@ This page guides you through the process of setting up the Teradata destination To use the Teradata destination connector, you'll need: -* Access to a Teradata Vantage instance +- Access to a Teradata Vantage instance - **Note:** If you need a new instance of Vantage, you can install a free version called Vantage Express in the cloud on [Google Cloud](https://quickstarts.teradata.com/vantage.express.gcp.html), [Azure](https://quickstarts.teradata.com/run-vantage-express-on-microsoft-azure.html), and [AWS](https://quickstarts.teradata.com/run-vantage-express-on-aws.html). You can also run Vantage Express on your local machine using [VMware](https://quickstarts.teradata.com/getting.started.vmware.html), [VirtualBox](https://quickstarts.teradata.com/getting.started.vbox.html), or [UTM](https://quickstarts.teradata.com/getting.started.utm.html). + **Note:** If you need a new instance of Vantage, you can install a free version called Vantage Express in the cloud on [Google Cloud](https://quickstarts.teradata.com/vantage.express.gcp.html), [Azure](https://quickstarts.teradata.com/run-vantage-express-on-microsoft-azure.html), and [AWS](https://quickstarts.teradata.com/run-vantage-express-on-aws.html). You can also run Vantage Express on your local machine using [VMware](https://quickstarts.teradata.com/getting.started.vmware.html), [VirtualBox](https://quickstarts.teradata.com/getting.started.vbox.html), or [UTM](https://quickstarts.teradata.com/getting.started.utm.html). You'll need the following information to configure the Teradata destination: -* **Host** - The host name of the Teradata Vantage instance. -* **Username** -* **Password** -* **Default Schema Name** - Specify the schema (or several schemas separated by commas) to be set in the search-path. These schemas will be used to resolve unqualified object names used in statements executed over this connection. -* **JDBC URL Params** (optional) +- **Host** - The host name of the Teradata Vantage instance. +- **Username** +- **Password** +- **Default Schema Name** - Specify the schema (or several schemas separated by commas) to be set in the search-path. These schemas will be used to resolve unqualified object names used in statements executed over this connection. +- **JDBC URL Params** (optional) [Refer to this guide for more details](https://downloads.teradata.com/doc/connectivity/jdbc/reference/current/jdbcug_chapter_2.html#BGBHDDGB) @@ -26,23 +26,21 @@ You'll need the following information to configure the Teradata destination: Each stream will be output into its own table in Teradata. Each table will contain 3 columns: -* `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. The column type in Teradata is `VARCHAR(256)`. -* `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. The column type in Teradata is `TIMESTAMP(6)`. -* `_airbyte_data`: a json blob representing with the event data. The column type in Teradata is `JSON`. - +- `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. The column type in Teradata is `VARCHAR(256)`. +- `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. The column type in Teradata is `TIMESTAMP(6)`. +- `_airbyte_data`: a json blob representing with the event data. The column type in Teradata is `JSON`. ### Features The Teradata destination connector supports the following[ sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): - -| Feature | Supported?\(Yes/No\) | Notes | -| :--- | :--- | :--- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | | -| Namespaces | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | Yes | | ### Performance considerations @@ -52,8 +50,8 @@ following[ sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-s You need a Teradata user with the following permissions: -* can create tables and write permission. -* can create schemas e.g: +- can create tables and write permission. +- can create schemas e.g: You can create such a user by running: @@ -64,6 +62,7 @@ GRANT ALL on dbc to airbyte_user; ``` You can also use a pre-existing user but we highly recommend creating a dedicated user for Airbyte. + ### Setup guide #### Set up the Teradata Destination connector @@ -83,7 +82,7 @@ You can also use a pre-existing user but we highly recommend creating a dedicate ## CHANGELOG -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:--------------------------------------------------------------|:---------------------------------| -| 0.1.0 | 2022-12-13 | https://github.com/airbytehq/airbyte/pull/20428 | New Destination Teradata Vantage | -| 0.1.1 | 2023-03-03 | https://github.com/airbytehq/airbyte/pull/21760 | Added SSL support | \ No newline at end of file +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :---------------------------------------------- | :------------------------------- | +| 0.1.0 | 2022-12-13 | https://github.com/airbytehq/airbyte/pull/20428 | New Destination Teradata Vantage | +| 0.1.1 | 2023-03-03 | https://github.com/airbytehq/airbyte/pull/21760 | Added SSL support | diff --git a/docs/integrations/destinations/tidb.md b/docs/integrations/destinations/tidb.md index 42f31fa36531..c7f646c4bedd 100644 --- a/docs/integrations/destinations/tidb.md +++ b/docs/integrations/destinations/tidb.md @@ -6,13 +6,13 @@ This page guides you through the process of setting up the TiDB destination conn ## Features -| Feature | Supported?\(Yes/No\) | Notes | -| :---------------------------- | :------------------- | :---- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | Yes | | -| Namespaces | Yes | | -| SSH Tunnel Connection | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | Yes | | +| Namespaces | Yes | | +| SSH Tunnel Connection | Yes | | #### Output Schema diff --git a/docs/integrations/destinations/typesense.md b/docs/integrations/destinations/typesense.md index 3bf9aac71c75..445071ba88d2 100644 --- a/docs/integrations/destinations/typesense.md +++ b/docs/integrations/destinations/typesense.md @@ -16,12 +16,12 @@ Each stream will be output into its own collection in Typesense. If an id column #### Features -| Feature | Supported?\(Yes/No\) | Notes | -| :---------------------------- | :------------------- | :------------------------------------------------------------------------------------------- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | -| Namespaces | No | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | No | | ## Getting started @@ -36,5 +36,5 @@ The setup only requires two fields. First is the `host` which is the address at ## Changelog | Version | Date | Pull Request | Subject | -| :-------| :--------- | :------------------------------------------------------- | :-------------------------| +| :------ | :--------- | :------------------------------------------------------- | :------------------------ | | 0.1.0 | 2022-10-28 | [18349](https://github.com/airbytehq/airbyte/pull/18349) | New Typesense destination | diff --git a/docs/integrations/destinations/vertica.md b/docs/integrations/destinations/vertica.md index 6d5ba806dd25..cd829121f50a 100644 --- a/docs/integrations/destinations/vertica.md +++ b/docs/integrations/destinations/vertica.md @@ -6,21 +6,21 @@ This page guides you through the process of setting up the vertica destination c To use the Vertica destination, you'll need: -* A V -ertica server version 11.0 or above +- A V + ertica server version 11.0 or above Airbyte Cloud only supports connecting to your Vertica instances with SSL or TLS encryption. TLS is used by default. Other than that, you can proceed with the open-source instructions below. You'll need the following information to configure the Vertica destination: -* **Host** - The host name of the server. -* **Port** - The port number the server is listening on. Defaults to the VSQL™ standard port number (5433). -* **Username** -* **Password** -* **Default Schema Name** - Specify the schema (or several schemas separated by commas) to be set in the search-path. These schemas will be used to resolve unqualified object names used in statements executed over this connection. -* **Database** - The database name. The default is to connect to a database with the same name as the user name. -* **JDBC URL Params** (optional) +- **Host** - The host name of the server. +- **Port** - The port number the server is listening on. Defaults to the VSQL™ standard port number (5433). +- **Username** +- **Password** +- **Default Schema Name** - Specify the schema (or several schemas separated by commas) to be set in the search-path. These schemas will be used to resolve unqualified object names used in statements executed over this connection. +- **Database** - The database name. The default is to connect to a database with the same name as the user name. +- **JDBC URL Params** (optional) [Refer to this guide for more details](https://www.vertica.com/docs/12.0.4/HTML/Content/Authoring/ConnectingToVertica/ClientJDBC/JDBCConnectionProperties.htm) @@ -35,8 +35,8 @@ may need to allow access from the IP you're using to expose Airbyte. You need a Vertica user with the following permissions: -* can create tables and write rows. -* can create schemas e.g: +- can create tables and write rows. +- can create schemas e.g: You can create such a user by running: @@ -58,9 +58,9 @@ synced data from Airbyte. From [Vertica SQL Identifiers syntax](https://www.vertica.com/docs/12.0.x/HTML/Content/Authoring/ConnectingToVertica/ClientJDBC/ExecutingQueriesThroughJDBC.htm?tocpath=Connecting%20to%20Vertica%7CClient%20Libraries%7CProgramming%20JDBC%20Client%20Applications%7C_____4): -* SQL identifiers and key words must begin with a letter \(a-z, but also letters with diacritical +- SQL identifiers and key words must begin with a letter \(a-z, but also letters with diacritical marks and non-Latin letters\) or an underscore \(\_\). -* Subsequent characters in an identifier or key word can be letters, underscores, digits \(0-9\), or +- Subsequent characters in an identifier or key word can be letters, underscores, digits \(0-9\), or dollar signs \($\). Note that dollar signs are not allowed in identifiers according to the SQL standard, @@ -68,16 +68,16 @@ From [Vertica SQL Identifiers syntax](https://www.vertica.com/docs/12.0.x/HTML/C that contains digits or starts or ends with an underscore, so identifiers of this form are safe against possible conflict with future extensions of the standard. -* The system uses no more than NAMEDATALEN-1 bytes of an identifier; longer names can be written in +- The system uses no more than NAMEDATALEN-1 bytes of an identifier; longer names can be written in commands, but they will be truncated. By default, NAMEDATALEN is 64 so the maximum identifier length is 63 bytes -* Quoted identifiers can contain any character, except the character with code zero. \(To include a +- Quoted identifiers can contain any character, except the character with code zero. \(To include a double quote, write two double quotes.\) This allows constructing table or column names that would otherwise not be possible, such as ones containing spaces or ampersands. The length limitation still applies. -* Quoting an identifier also makes it case-sensitive, whereas unquoted names are always folded to +- Quoting an identifier also makes it case-sensitive, whereas unquoted names are always folded to lower case. -* In order to make your applications portable and less error-prone, use consistent quoting with each name (either always quote it or never quote it). +- In order to make your applications portable and less error-prone, use consistent quoting with each name (either always quote it or never quote it). Note, that Airbyte Vertica destination will create tables and schemas using the Unquoted identifiers when possible or fallback to Quoted Identifiers if the names are containing special @@ -92,19 +92,19 @@ characters. 4. Enter a name for your source. 5. For the **Host**, **Port**, and **DB Name**, enter the hostname, port number, and name for your Vertica database. 6. List the **Default Schemas**. - :::note - The schema names are case sensitive. The 'public' schema is set by default. Multiple schemas may be used at one time. No schemas set explicitly - will sync all of existing. - ::: + :::note + The schema names are case sensitive. The 'public' schema is set by default. Multiple schemas may be used at one time. No schemas set explicitly - will sync all of existing. + ::: 7. For **User** and **Password**, enter the username and password you created in [Step 1](#step-1-optional-create-a-dedicated-read-only-user). -9. For Airbyte Open Source, toggle the switch to connect using SSL. For Airbyte Cloud uses SSL by default. -10. For SSL Modes, select: - - **disable** to disable encrypted communication between Airbyte and the source - - **allow** to enable encrypted communication only when required by the source - - **prefer** to allow unencrypted communication only when the source doesn't support encryption - - **require** to always require encryption. Note: The connection will fail if the source doesn't support encryption. - - **verify-ca** to always require encryption and verify that the source has a valid SSL certificate - - **verify-full** to always require encryption and verify the identity of the source -11. To customize the JDBC connection beyond common options, specify additional supported [JDBC URL parameters](https://www.vertica.com/docs/12.0.x/HTML/Content/Authoring/ConnectingToVertica/ClientJDBC/JDBCConnectionProperties.htm) as key-value pairs separated by the symbol & in the **JDBC URL Parameters (Advanced)** field. +8. For Airbyte Open Source, toggle the switch to connect using SSL. For Airbyte Cloud uses SSL by default. +9. For SSL Modes, select: + - **disable** to disable encrypted communication between Airbyte and the source + - **allow** to enable encrypted communication only when required by the source + - **prefer** to allow unencrypted communication only when the source doesn't support encryption + - **require** to always require encryption. Note: The connection will fail if the source doesn't support encryption. + - **verify-ca** to always require encryption and verify that the source has a valid SSL certificate + - **verify-full** to always require encryption and verify the identity of the source +10. To customize the JDBC connection beyond common options, specify additional supported [JDBC URL parameters](https://www.vertica.com/docs/12.0.x/HTML/Content/Authoring/ConnectingToVertica/ClientJDBC/JDBCConnectionProperties.htm) as key-value pairs separated by the symbol & in the **JDBC URL Parameters (Advanced)** field. Example: key1=value1&key2=value2&key3=value3 @@ -118,14 +118,17 @@ characters. :::warning This is an advanced configuration option. Users are advised to use it with caution. ::: + 11. For SSH Tunnel Method, select: + - **No Tunnel** for a direct connection to the database - **SSH Key Authentication** to use an RSA Private as your secret for establishing the SSH tunnel - **Password Authentication** to use a password as your secret for establishing the SSH tunnel - + :::warning Since Airbyte Cloud requires encrypted communication, select **SSH Key Authentication** or **Password Authentication** if you selected **disable**, **allow**, or **prefer** as the **SSL Mode**; otherwise, the connection will fail. ::: + 12. Click **Set up destination**. ## Supported sync modes @@ -133,12 +136,12 @@ characters. The Vertica destination connector supports the following[ sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -| Feature | Supported?\(Yes/No\) | Notes | -|:------------------------------|:---------------------|:------| -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | No | | -| Incremental - Deduped History | No | | -| Namespaces | No | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | No | | +| Incremental - Append + Deduped | No | | +| Namespaces | No | | ## Schema map @@ -146,15 +149,15 @@ following[ sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-s Each stream will be mapped to a separate table in Vertica. Each table will contain 3 columns: -* `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. The column type in +- `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. The column type in Vertica is `VARCHAR`. -* `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. +- `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. The column type in Vertica is `TIMESTAMP WITH TIME ZONE`. -* `_airbyte_data`: a json blob representing with the event data. The column type in Vertica +- `_airbyte_data`: a json blob representing with the event data. The column type in Vertica is `JSONB`. ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------| :--- |:---------------------------------------------| +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :--------------------------------------------------------- | :---------------------- | | 0.1.0 | 2023-05-29 | [\#25682](https://github.com/airbytehq/airbyte/pull/25682) | Add Vertica Destination | diff --git a/docs/integrations/destinations/weaviate.md b/docs/integrations/destinations/weaviate.md index 25812fd2b92b..3b54c45a0d6e 100644 --- a/docs/integrations/destinations/weaviate.md +++ b/docs/integrations/destinations/weaviate.md @@ -2,13 +2,13 @@ ## Features -| Feature | Supported?\(Yes/No\) | Notes | -| :--- | :--- | :--- | -| Full Refresh Sync | No | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | No | | -| Namespaces | No | | -| Provide vector | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | No | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduped | No | | +| Namespaces | No | | +| Provide vector | Yes | | #### Output Schema @@ -18,15 +18,17 @@ in the Weaviate class. **Uploading Vectors:** Use the vectors configuration if you want to upload vectors from a source database into Weaviate. You can do this by specifying the stream name and vector field name in the following format: + ``` ., . ``` + For example, if you have a table named `my_table` and the vector is stored using the column `vector` then you should use the following `vectors`configuration: `my_table.vector`. Dynamic Schema: Weaviate will automatically create a schema for the stream if no class was defined unless you have disabled the Dynamic Schema feature in Weaviate. You can also create the class in Weaviate in advance -if you need more control over the schema in Weaviate. +if you need more control over the schema in Weaviate. IDs: If your source table has an int based id stored as field name `id` then the ID will automatically be converted to a UUID. Weaviate only supports the ID to be a UUID. @@ -48,7 +50,7 @@ password. To use the Weaviate destination, you'll need: -* A Weaviate cluster version 21.8.10.19 or above +- A Weaviate cluster version 21.8.10.19 or above #### Configure Network Access @@ -58,24 +60,21 @@ Make sure your Weaviate database can be accessed by Airbyte. If your database is You need a Weaviate user or use a Weaviate instance that's accessible to all - ### Setup the Weaviate Destination in Airbyte You should now have all the requirements needed to configure Weaviate as a destination in the UI. You'll need the following information to configure the Weaviate destination: -* **URL** for example http://localhost:8080 or https://my-wcs.semi.network -* **Username** (Optional) -* **Password** (Optional) -* **Batch Size** (Optional, defaults to 100) -* **Vectors** a comma separated list of `` to specify the field -* **ID Schema** a comma separated list of `` to specify the field +- **URL** for example http://localhost:8080 or https://my-wcs.semi.network +- **Username** (Optional) +- **Password** (Optional) +- **Batch Size** (Optional, defaults to 100) +- **Vectors** a comma separated list of `` to specify the field +- **ID Schema** a comma separated list of `` to specify the field name that contains the ID of a record - ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------| :--- |:---------------------------------------------| +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :--------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------- | | 0.1.1 | 2022-02-08 | [\#22527](https://github.com/airbytehq/airbyte/pull/22527) | Multiple bug fixes: Support String based IDs, arrays of uknown type and additionalProperties of type object and array of objects | -| 0.1.0 | 2022-12-06 | [\#20094](https://github.com/airbytehq/airbyte/pull/20094) | Add Weaviate destination | - +| 0.1.0 | 2022-12-06 | [\#20094](https://github.com/airbytehq/airbyte/pull/20094) | Add Weaviate destination | diff --git a/docs/integrations/sources/alloydb.md b/docs/integrations/sources/alloydb.md index 75f6f9c10fda..f9858f2a542b 100644 --- a/docs/integrations/sources/alloydb.md +++ b/docs/integrations/sources/alloydb.md @@ -20,7 +20,6 @@ If your goal is to maintain a snapshot of your table in the destination but the If your dataset is small and you just want a snapshot of your table in the destination, consider using [Full Refresh replication](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite) for your table instead of CDC. - ### Step 1: (Optional) Create a dedicated read-only user We recommend creating a dedicated read-only user for better permission control and auditing. Alternatively, you can use an existing AlloyDB user in your database. @@ -71,6 +70,7 @@ The workaround for partial table syncing is to create a view on the specific col ``` CREATE VIEW as SELECT FROM ; ``` + ``` GRANT SELECT ON TABLE IN SCHEMA to ; ``` @@ -123,16 +123,17 @@ When using an SSH tunnel, you are configuring Airbyte to connect to an intermedi To connect to a AlloyDB instance via an SSH tunnel: 1. While [setting up](#setup-guide) the AlloyDB source connector, from the SSH tunnel dropdown, select: - - SSH Key Authentication to use an RSA Private as your secret for establishing the SSH tunnel - - Password Authentication to use a password as your secret for establishing the SSH Tunnel + - SSH Key Authentication to use an RSA Private as your secret for establishing the SSH tunnel + - Password Authentication to use a password as your secret for establishing the SSH Tunnel 2. For **SSH Tunnel Jump Server Host**, enter the hostname or IP address for the intermediate (bastion) server that Airbyte will connect to. 3. For **SSH Connection Port**, enter the port on the bastion server. The default port for SSH connections is 22. 4. For **SSH Login Username**, enter the username to use when connecting to the bastion server. **Note:** This is the operating system username and not the AlloyDB username. 5. For authentication: - - If you selected **SSH Key Authentication**, set the **SSH Private Key** to the [RSA Private Key](#generating-an-rsa-private-key​) that you are using to create the SSH connection. - - If you selected **Password Authentication**, enter the password for the operating system user to connect to the bastion server. **Note:** This is the operating system password and not the AlloyDB password. + - If you selected **SSH Key Authentication**, set the **SSH Private Key** to the [RSA Private Key](#generating-an-rsa-private-key​) that you are using to create the SSH connection. + - If you selected **Password Authentication**, enter the password for the operating system user to connect to the bastion server. **Note:** This is the operating system password and not the AlloyDB password. #### Generating an RSA Private Key​ + The connector expects an RSA key in PEM format. To generate this key, run: ``` @@ -154,8 +155,8 @@ Airbyte uses [logical replication](https://www.postgresql.org/docs/10/logical-re - The records produced by `DELETE` statements only contain primary keys. All other data fields are unset. - Log-based replication only works for master instances of AlloyDB. - Using logical replication increases disk space used on the database server. The additional data is stored until it is consumed. - - Set frequent syncs for CDC to ensure that the data doesn't fill up your disk space. - - If you stop syncing a CDC-configured AlloyDB instance with Airbyte, delete the replication slot. Otherwise, it may fill up your disk space. + - Set frequent syncs for CDC to ensure that the data doesn't fill up your disk space. + - If you stop syncing a CDC-configured AlloyDB instance with Airbyte, delete the replication slot. Otherwise, it may fill up your disk space. ### Setting up CDC for AlloyDB @@ -232,7 +233,7 @@ The AlloyDB source connector supports the following [sync modes](https://docs.ai - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental Sync - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported cursors @@ -253,74 +254,74 @@ The AlloyDB source connector supports the following [sync modes](https://docs.ai - `BINARY/BLOB` ## Data type mapping + The AlloyDb is a fully managed PostgreSQL-compatible database service. According to Postgres [documentation](https://www.postgresql.org/docs/14/datatype.html), Postgres data types are mapped to the following data types when synchronizing data. You can check the test values examples [here](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceDatatypeTest.java). If you can't find the data type you are looking for or have any problems feel free to add a new test! -| Postgres Type | Resulting Type | Notes | -|:--------------------------------------|:---------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `bigint` | number | | -| `bigserial`, `serial8` | number | | -| `bit` | string | Fixed-length bit string (e.g. "0100"). | -| `bit varying`, `varbit` | string | Variable-length bit string (e.g. "0100"). | -| `boolean`, `bool` | boolean | | -| `box` | string | | -| `bytea` | string | Variable length binary string with hex output format prefixed with "\x" (e.g. "\x6b707a"). | -| `character`, `char` | string | | -| `character varying`, `varchar` | string | | -| `cidr` | string | | -| `circle` | string | | -| `date` | string | Parsed as ISO8601 date time at midnight. CDC mode doesn't support era indicators. Issue: [#14590](https://github.com/airbytehq/airbyte/issues/14590) | -| `double precision`, `float`, `float8` | number | `Infinity`, `-Infinity`, and `NaN` are not supported and converted to `null`. Issue: [#8902](https://github.com/airbytehq/airbyte/issues/8902). | -| `hstore` | string | | -| `inet` | string | | -| `integer`, `int`, `int4` | number | | -| `interval` | string | | -| `json` | string | | -| `jsonb` | string | | -| `line` | string | | -| `lseg` | string | | -| `macaddr` | string | | -| `macaddr8` | string | | -| `money` | number | | -| `numeric`, `decimal` | number | `Infinity`, `-Infinity`, and `NaN` are not supported and converted to `null`. Issue: [#8902](https://github.com/airbytehq/airbyte/issues/8902). | -| `path` | string | | -| `pg_lsn` | string | | -| `point` | string | | -| `polygon` | string | | -| `real`, `float4` | number | | -| `smallint`, `int2` | number | | -| `smallserial`, `serial2` | number | | -| `serial`, `serial4` | number | | -| `text` | string | | -| `time` | string | Parsed as a time string without a time-zone in the ISO-8601 calendar system. | -| `timetz` | string | Parsed as a time string with time-zone in the ISO-8601 calendar system. | -| `timestamp` | string | Parsed as a date-time string without a time-zone in the ISO-8601 calendar system. | -| `timestamptz` | string | Parsed as a date-time string with time-zone in the ISO-8601 calendar system. | -| `tsquery` | string | | -| `tsvector` | string | | -| `uuid` | string | | -| `xml` | string | | -| `enum` | string | | -| `tsrange` | string | | -| `array` | array | E.g. "[\"10001\",\"10002\",\"10003\",\"10004\"]". | -| composite type | string | | +| Postgres Type | Resulting Type | Notes | +| :------------------------------------ | :------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------- | +| `bigint` | number | | +| `bigserial`, `serial8` | number | | +| `bit` | string | Fixed-length bit string (e.g. "0100"). | +| `bit varying`, `varbit` | string | Variable-length bit string (e.g. "0100"). | +| `boolean`, `bool` | boolean | | +| `box` | string | | +| `bytea` | string | Variable length binary string with hex output format prefixed with "\x" (e.g. "\x6b707a"). | +| `character`, `char` | string | | +| `character varying`, `varchar` | string | | +| `cidr` | string | | +| `circle` | string | | +| `date` | string | Parsed as ISO8601 date time at midnight. CDC mode doesn't support era indicators. Issue: [#14590](https://github.com/airbytehq/airbyte/issues/14590) | +| `double precision`, `float`, `float8` | number | `Infinity`, `-Infinity`, and `NaN` are not supported and converted to `null`. Issue: [#8902](https://github.com/airbytehq/airbyte/issues/8902). | +| `hstore` | string | | +| `inet` | string | | +| `integer`, `int`, `int4` | number | | +| `interval` | string | | +| `json` | string | | +| `jsonb` | string | | +| `line` | string | | +| `lseg` | string | | +| `macaddr` | string | | +| `macaddr8` | string | | +| `money` | number | | +| `numeric`, `decimal` | number | `Infinity`, `-Infinity`, and `NaN` are not supported and converted to `null`. Issue: [#8902](https://github.com/airbytehq/airbyte/issues/8902). | +| `path` | string | | +| `pg_lsn` | string | | +| `point` | string | | +| `polygon` | string | | +| `real`, `float4` | number | | +| `smallint`, `int2` | number | | +| `smallserial`, `serial2` | number | | +| `serial`, `serial4` | number | | +| `text` | string | | +| `time` | string | Parsed as a time string without a time-zone in the ISO-8601 calendar system. | +| `timetz` | string | Parsed as a time string with time-zone in the ISO-8601 calendar system. | +| `timestamp` | string | Parsed as a date-time string without a time-zone in the ISO-8601 calendar system. | +| `timestamptz` | string | Parsed as a date-time string with time-zone in the ISO-8601 calendar system. | +| `tsquery` | string | | +| `tsvector` | string | | +| `uuid` | string | | +| `xml` | string | | +| `enum` | string | | +| `tsrange` | string | | +| `array` | array | E.g. "[\"10001\",\"10002\",\"10003\",\"10004\"]". | +| composite type | string | | ## Limitations - The AlloyDB source connector currently does not handle schemas larger than 4MB. - The AlloyDB source connector does not alter the schema present in your database. Depending on the destination connected to this source, however, the schema may be altered. See the destination's documentation for more details. - The following two schema evolution actions are currently supported: - - Adding/removing tables without resetting the entire connection at the destination - Caveat: In the CDC mode, adding a new table to a connection may become a temporary bottleneck. When a new table is added, the next sync job takes a full snapshot of the new table before it proceeds to handle any changes. - - Resetting a single table within the connection without resetting the rest of the destination tables in that connection + - Adding/removing tables without resetting the entire connection at the destination + Caveat: In the CDC mode, adding a new table to a connection may become a temporary bottleneck. When a new table is added, the next sync job takes a full snapshot of the new table before it proceeds to handle any changes. + - Resetting a single table within the connection without resetting the rest of the destination tables in that connection - Changing a column data type or removing a column might break connections. - ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------- | | 3.1.3 | 2023-08-03 | [28708](https://github.com/airbytehq/airbyte/pull/28708) | Enable checkpointing snapshots in CDC connections | | 3.1.2 | 2023-08-01 | [28954](https://github.com/airbytehq/airbyte/pull/28954) | Fix an issue that prevented use of tables with names containing uppercase letters | | 3.1.1 | 2023-07-31 | [28892](https://github.com/airbytehq/airbyte/pull/28892) | Fix an issue that prevented use of cursor columns with names containing uppercase letters | diff --git a/docs/integrations/sources/apple-search-ads.md b/docs/integrations/sources/apple-search-ads.md index cf1423033a52..231e45cb1b98 100644 --- a/docs/integrations/sources/apple-search-ads.md +++ b/docs/integrations/sources/apple-search-ads.md @@ -1,14 +1,18 @@ # Apple Search Ads + This page contains the setup guide and reference information for the Apple Search Ads source connector. ## Setup guide + ### Step 1: Set up Apple Search Ads + 1. With an administrator account, [create an API user role](https://developer.apple.com/documentation/apple_search_ads/implementing_oauth_for_the_apple_search_ads_api) from the Apple Search Ads UI. 2. Then [implement OAuth for your API user](https://developer.apple.com/documentation/apple_search_ads/implementing_oauth_for_the_apple_search_ads_api) in order to the required Client Secret and Client Id. - ### Step 2: Set up the source connector in Airbyte + #### For Airbyte Open Source + 1. Log in to your Airbyte Open Source account. 2. Click **Sources** and then click **+ New source**. 3. On the Set up the source page, select **Apple Search Ads** from the **Source type** dropdown. @@ -19,33 +23,39 @@ This page contains the setup guide and reference information for the Apple Searc 8. Click **Set up source**. ## Supported sync modes + The Apple Search Ads source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/glossary#full-refresh-sync) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) + +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/glossary#full-refresh-sync) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams + The Apple Ads source connector supports the following streams. For more information, see the [Apple Search Ads API](https://developer.apple.com/documentation/apple_search_ads). ### Base streams + - [campaigns](https://developer.apple.com/documentation/apple_search_ads/get_all_campaigns) - [adgroups](https://developer.apple.com/documentation/apple_search_ads/get_all_ad_groups) - [keywords](https://developer.apple.com/documentation/apple_search_ads/get_all_targeting_keywords_in_an_ad_group) ### Report Streams + - [campaigns_report_daily](https://developer.apple.com/documentation/apple_search_ads/get_campaign-level_reports) - [adgroups_report_daily](https://developer.apple.com/documentation/apple_search_ads/get__ad_group-level_reports) - [keywords_report_daily](https://developer.apple.com/documentation/apple_search_ads/get_keyword-level_reports) ### Report aggregation + The Apple Search Ads currently offers [aggregation](https://developer.apple.com/documentation/apple_search_ads/reportingrequest) at hourly, daily, weekly, or monthly level. However, at this moment and as indicated in the stream names, the connector only offers data with daily aggregation. - ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:--------------------------------------------------------|:-------------------------------------------------------------------------------------| + +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :----------------------------------------------------------------------------------- | | 0.1.1 | 2023-07-11 | [28153](https://github.com/airbytehq/airbyte/pull/28153) | Fix manifest duplicate key (no change in behavior for the syncs) | | 0.1.0 | 2022-11-17 | [19557](https://github.com/airbytehq/airbyte/pull/19557) | Initial release with campaigns, adgroups & keywords streams (base and daily reports) | diff --git a/docs/integrations/sources/bing-ads.md b/docs/integrations/sources/bing-ads.md index c83fbfa9afe8..258189441a01 100644 --- a/docs/integrations/sources/bing-ads.md +++ b/docs/integrations/sources/bing-ads.md @@ -1,8 +1,11 @@ # Bing Ads + This page contains the setup guide and reference information for the Bing Ads source connector. ## Setup guide + ### Step 1: Set up Bing Ads + 1. [Register your application](https://docs.microsoft.com/en-us/advertising/guides/authentication-oauth-register?view=bingads-13) in the Azure portal. 2. [Request user consent](https://docs.microsoft.com/en-us/advertising/guides/authentication-oauth-consent?view=bingads-13l) to get the authorization code. 3. Use the authorization code to [get a refresh token](https://docs.microsoft.com/en-us/advertising/guides/authentication-oauth-get-tokens?view=bingads-13). @@ -25,7 +28,9 @@ The tenant is used in the authentication URL, for example: `https://login.micros ### Step 2: Set up the source connector in Airbyte + **For Airbyte Cloud:** + 1. Log in to your [Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. 2. Click **Sources** and then click **+ New source**. 3. On the Set up the source page, select **Bing Ads** from the **Source type** dropdown. @@ -36,11 +41,13 @@ The tenant is used in the authentication URL, for example: `https://login.micros 8. For **Lookback window** (also known as attribution or conversion window) enter the number of **days** to look into the past. If your conversion window has an hours/minutes granularity, round it up to the number of days exceeding. If you're not using performance report streams in incremental mode, let it with 0 default value. 9. Click **Authenticate your Bing Ads account**. 10. Log in and authorize the Bing Ads account. -11. Click **Set up source**. +11. Click **Set up source**. + **For Airbyte Open Source:** + 1. Log in to your Airbyte Open Source account. 2. Click **Sources** and then click **+ New source**. 3. On the Set up the source page, select **Bing Ads** from the **Source type** dropdown. @@ -53,22 +60,27 @@ The tenant is used in the authentication URL, for example: `https://login.micros ## Supported sync modes + The Bing Ads source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) + +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams + The Bing Ads source connector supports the following streams. For more information, see the [Bing Ads API](https://docs.microsoft.com/en-us/advertising/guides/?view=bingads-13). ### Basic streams + - [accounts](https://docs.microsoft.com/en-us/advertising/customer-management-service/searchaccounts?view=bingads-13) - [ad_groups](https://docs.microsoft.com/en-us/advertising/campaign-management-service/getadgroupsbycampaignid?view=bingads-13) - [ads](https://docs.microsoft.com/en-us/advertising/campaign-management-service/getadsbyadgroupid?view=bingads-13) - [campaigns](https://docs.microsoft.com/en-us/advertising/campaign-management-service/getcampaignsbyaccountid?view=bingads-13) ### Report Streams + - [account_performance_report_hourly](https://docs.microsoft.com/en-us/advertising/reporting-service/accountperformancereportrequest?view=bingads-13) - [account_performance_report_daily](https://docs.microsoft.com/en-us/advertising/reporting-service/accountperformancereportrequest?view=bingads-13) - [account_performance_report_weekly](https://docs.microsoft.com/en-us/advertising/reporting-service/accountperformancereportrequest?view=bingads-13) @@ -92,6 +104,7 @@ The Bing Ads source connector supports the following streams. For more informati - [keyword_performance_report_monthly](https://docs.microsoft.com/en-us/advertising/reporting-service/keywordperformancereportrequest?view=bingads-13) ### Report aggregation + All reports synced by this connector can be [aggregated](https://docs.microsoft.com/en-us/advertising/reporting-service/reportaggregation?view=bingads-13) using hourly, daily, weekly, or monthly time windows. For example, if you select a report with daily aggregation, the report will contain a row for each day for the duration of the report. Each row will indicate the number of impressions recorded on that day. @@ -99,33 +112,35 @@ For example, if you select a report with daily aggregation, the report will cont A report's aggregation window is indicated in its name. For example, `account_performance_report_hourly` is the Account Performance Reported aggregated using an hourly window. ## Performance considerations + The Bing Ads API limits the number of requests for all Microsoft Advertising clients. You can find detailed info [here](https://docs.microsoft.com/en-us/advertising/guides/services-protocol?view=bingads-13#throttling). ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------| -| 0.1.24 | 2023-06-22 | [27619](https://github.com/airbytehq/airbyte/pull/27619) | Retry request after facing temporary name resolution error. | -| 0.1.23 | 2023-05-11 | [25996](https://github.com/airbytehq/airbyte/pull/25996) | Implement a retry logic if SSL certificate validation fails. | -| 0.1.22 | 2023-05-08 | [24223](https://github.com/airbytehq/airbyte/pull/24223) | Add CampaignLabels report column in campaign performance report | -| 0.1.21 | 2023-04-28 | [25668](https://github.com/airbytehq/airbyte/pull/25668) | Add undeclared fields to accounts, campaigns, campaign_performance_report, keyword_performance_report and account_performance_report streams | -| 0.1.20 | 2023-03-09 | [23663](https://github.com/airbytehq/airbyte/pull/23663) | Add lookback window for performance reports in incremental mode | -| 0.1.19 | 2023-03-08 | [23868](https://github.com/airbytehq/airbyte/pull/23868) | Add dimensional-type columns for reports. | -| 0.1.18 | 2023-01-30 | [22073](https://github.com/airbytehq/airbyte/pull/22073) | Fix null values in the `Keyword` column of `keyword_performance_report` streams | -| 0.1.17 | 2022-12-10 | [20005](https://github.com/airbytehq/airbyte/pull/20005) | Add `Keyword` to `keyword_performance_report` stream | -| 0.1.16 | 2022-10-12 | [17873](https://github.com/airbytehq/airbyte/pull/17873) | Fix: added missing campaign types in (Audience, Shopping and DynamicSearchAds) in campaigns stream | -| 0.1.15 | 2022-10-03 | [17505](https://github.com/airbytehq/airbyte/pull/17505) | Fix: limit cache size for ServiceClient instances | -| 0.1.14 | 2022-09-29 | [17403](https://github.com/airbytehq/airbyte/pull/17403) | Fix: limit cache size for ReportingServiceManager instances | -| 0.1.13 | 2022-09-29 | [17386](https://github.com/airbytehq/airbyte/pull/17386) | Migrate to per-stream states. | -| 0.1.12 | 2022-09-05 | [16335](https://github.com/airbytehq/airbyte/pull/16335) | Added backoff for socket.timeout | -| 0.1.11 | 2022-08-25 | [15684](https://github.com/airbytehq/airbyte/pull/15684) (published in [15987](https://github.com/airbytehq/airbyte/pull/15987)) | Fixed log messages being unreadable | -| 0.1.10 | 2022-08-12 | [15602](https://github.com/airbytehq/airbyte/pull/15602) | Fixed bug caused Hourly Reports to crash due to invalid fields set | -| 0.1.9 | 2022-08-02 | [14862](https://github.com/airbytehq/airbyte/pull/14862) | Added missing columns | -| 0.1.8 | 2022-06-15 | [13801](https://github.com/airbytehq/airbyte/pull/13801) | All reports `hourly/daily/weekly/monthly` will be generated by default, these options are removed from input configuration | -| 0.1.7 | 2022-05-17 | [12937](https://github.com/airbytehq/airbyte/pull/12937) | Added OAuth2.0 authentication method, removed `redirect_uri` from input configuration | -| 0.1.6 | 2022-04-30 | [12500](https://github.com/airbytehq/airbyte/pull/12500) | Improve input configuration copy | -| 0.1.5 | 2022-01-01 | [11652](https://github.com/airbytehq/airbyte/pull/11652) | Rebump attempt after DockerHub failure at registring the 0.1.4 | -| 0.1.4 | 2022-03-22 | [11311](https://github.com/airbytehq/airbyte/pull/11311) | Added optional Redirect URI & Tenant ID to spec | -| 0.1.3 | 2022-01-14 | [9510](https://github.com/airbytehq/airbyte/pull/9510) | Fixed broken dependency that blocked connector's operations | -| 0.1.2 | 2021-12-14 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Update titles and descriptions | -| 0.1.1 | 2021-08-31 | [5750](https://github.com/airbytehq/airbyte/pull/5750) | Added reporting streams\) | -| 0.1.0 | 2021-07-22 | [4911](https://github.com/airbytehq/airbyte/pull/4911) | Initial release supported core streams \(Accounts, Campaigns, Ads, AdGroups\) | + +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------- | +| 0.1.24 | 2023-06-22 | [27619](https://github.com/airbytehq/airbyte/pull/27619) | Retry request after facing temporary name resolution error. | +| 0.1.23 | 2023-05-11 | [25996](https://github.com/airbytehq/airbyte/pull/25996) | Implement a retry logic if SSL certificate validation fails. | +| 0.1.22 | 2023-05-08 | [24223](https://github.com/airbytehq/airbyte/pull/24223) | Add CampaignLabels report column in campaign performance report | +| 0.1.21 | 2023-04-28 | [25668](https://github.com/airbytehq/airbyte/pull/25668) | Add undeclared fields to accounts, campaigns, campaign_performance_report, keyword_performance_report and account_performance_report streams | +| 0.1.20 | 2023-03-09 | [23663](https://github.com/airbytehq/airbyte/pull/23663) | Add lookback window for performance reports in incremental mode | +| 0.1.19 | 2023-03-08 | [23868](https://github.com/airbytehq/airbyte/pull/23868) | Add dimensional-type columns for reports. | +| 0.1.18 | 2023-01-30 | [22073](https://github.com/airbytehq/airbyte/pull/22073) | Fix null values in the `Keyword` column of `keyword_performance_report` streams | +| 0.1.17 | 2022-12-10 | [20005](https://github.com/airbytehq/airbyte/pull/20005) | Add `Keyword` to `keyword_performance_report` stream | +| 0.1.16 | 2022-10-12 | [17873](https://github.com/airbytehq/airbyte/pull/17873) | Fix: added missing campaign types in (Audience, Shopping and DynamicSearchAds) in campaigns stream | +| 0.1.15 | 2022-10-03 | [17505](https://github.com/airbytehq/airbyte/pull/17505) | Fix: limit cache size for ServiceClient instances | +| 0.1.14 | 2022-09-29 | [17403](https://github.com/airbytehq/airbyte/pull/17403) | Fix: limit cache size for ReportingServiceManager instances | +| 0.1.13 | 2022-09-29 | [17386](https://github.com/airbytehq/airbyte/pull/17386) | Migrate to per-stream states. | +| 0.1.12 | 2022-09-05 | [16335](https://github.com/airbytehq/airbyte/pull/16335) | Added backoff for socket.timeout | +| 0.1.11 | 2022-08-25 | [15684](https://github.com/airbytehq/airbyte/pull/15684) (published in [15987](https://github.com/airbytehq/airbyte/pull/15987)) | Fixed log messages being unreadable | +| 0.1.10 | 2022-08-12 | [15602](https://github.com/airbytehq/airbyte/pull/15602) | Fixed bug caused Hourly Reports to crash due to invalid fields set | +| 0.1.9 | 2022-08-02 | [14862](https://github.com/airbytehq/airbyte/pull/14862) | Added missing columns | +| 0.1.8 | 2022-06-15 | [13801](https://github.com/airbytehq/airbyte/pull/13801) | All reports `hourly/daily/weekly/monthly` will be generated by default, these options are removed from input configuration | +| 0.1.7 | 2022-05-17 | [12937](https://github.com/airbytehq/airbyte/pull/12937) | Added OAuth2.0 authentication method, removed `redirect_uri` from input configuration | +| 0.1.6 | 2022-04-30 | [12500](https://github.com/airbytehq/airbyte/pull/12500) | Improve input configuration copy | +| 0.1.5 | 2022-01-01 | [11652](https://github.com/airbytehq/airbyte/pull/11652) | Rebump attempt after DockerHub failure at registring the 0.1.4 | +| 0.1.4 | 2022-03-22 | [11311](https://github.com/airbytehq/airbyte/pull/11311) | Added optional Redirect URI & Tenant ID to spec | +| 0.1.3 | 2022-01-14 | [9510](https://github.com/airbytehq/airbyte/pull/9510) | Fixed broken dependency that blocked connector's operations | +| 0.1.2 | 2021-12-14 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Update titles and descriptions | +| 0.1.1 | 2021-08-31 | [5750](https://github.com/airbytehq/airbyte/pull/5750) | Added reporting streams\) | +| 0.1.0 | 2021-07-22 | [4911](https://github.com/airbytehq/airbyte/pull/4911) | Initial release supported core streams \(Accounts, Campaigns, Ads, AdGroups\) | diff --git a/docs/integrations/sources/delighted.md b/docs/integrations/sources/delighted.md index 155a6a8163a0..b76d5f585a00 100644 --- a/docs/integrations/sources/delighted.md +++ b/docs/integrations/sources/delighted.md @@ -3,13 +3,15 @@ This page contains the setup guide and reference information for the [Delighted](https://delighted.com/) source connector. ## Prerequisites + - A Delighted API Key. - A desired start date and time. Only data added on and after this point will be replicated. ## Setup guide + ### Step 1: Obtain a Delighted API Key -To set up the Delighted source connector, you'll need a Delighted API key. For detailed instructions, please refer to the +To set up the Delighted source connector, you'll need a Delighted API key. For detailed instructions, please refer to the [official Delighted documentation](https://app.delighted.com/docs/api). ### Step 2: Set up the Delighted connector in Airbyte @@ -21,7 +23,7 @@ To set up the Delighted source connector, you'll need a Delighted API key. For d 5. In the **Replication Start Date** field, enter the desired UTC date and time. Only the data added on and after this date will be replicated. :::note -If you are configuring this connector programmatically, please format your date as such: `yyyy-mm-ddThh:mm:ssZ`. For example, an input of `2022-05-30T14:50:00Z` signifies a start date of May 30th, 2022 at 2:50 PM UTC. For help converting UTC to your local time, +If you are configuring this connector programmatically, please format your date as such: `yyyy-mm-ddThh:mm:ssZ`. For example, an input of `2022-05-30T14:50:00Z` signifies a start date of May 30th, 2022 at 2:50 PM UTC. For help converting UTC to your local time, [use a UTC Time Zone Converter](https://dateful.com/convert/utc). ::: @@ -31,26 +33,26 @@ If you are configuring this connector programmatically, please format your date The Delighted source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported streams This source is capable of syncing the following core streams: -* [Bounced People](https://app.delighted.com/docs/api/listing-bounced-people) -* [People](https://app.delighted.com/docs/api/listing-people) -* [Survey Responses](https://app.delighted.com/docs/api/listing-survey-responses) -* [Unsubscribed People](https://app.delighted.com/docs/api/listing-unsubscribed-people) +- [Bounced People](https://app.delighted.com/docs/api/listing-bounced-people) +- [People](https://app.delighted.com/docs/api/listing-people) +- [Survey Responses](https://app.delighted.com/docs/api/listing-survey-responses) +- [Unsubscribed People](https://app.delighted.com/docs/api/listing-unsubscribed-people) ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:-----------------------------------------------------------------------------------------------------| -| 0.2.2 | 2023-03-09 | [23909](https://github.com/airbytehq/airbyte/pull/23909) | Updated the input config pattern to accept both `RFC3339` and `datetime string` formats in UI | -| 0.2.1 | 2023-02-14 | [23009](https://github.com/airbytehq/airbyte/pull/23009) |Specified date formatting in specification | +| :------ | :--------- | :------------------------------------------------------- | :--------------------------------------------------------------------------------------------------- | +| 0.2.2 | 2023-03-09 | [23909](https://github.com/airbytehq/airbyte/pull/23909) | Updated the input config pattern to accept both `RFC3339` and `datetime string` formats in UI | +| 0.2.1 | 2023-02-14 | [23009](https://github.com/airbytehq/airbyte/pull/23009) | Specified date formatting in specification | | 0.2.0 | 2022-11-22 | [19822](https://github.com/airbytehq/airbyte/pull/19822) | Migrate to Low code + certify to Beta | | 0.1.4 | 2022-06-10 | [13439](https://github.com/airbytehq/airbyte/pull/13439) | Change since parameter input to iso date | | 0.1.3 | 2022-01-31 | [9550](https://github.com/airbytehq/airbyte/pull/9550) | Output only records in which cursor field is greater than the value in state for incremental streams | diff --git a/docs/integrations/sources/facebook-marketing.md b/docs/integrations/sources/facebook-marketing.md index adf01c4fef82..45fe2f987c79 100644 --- a/docs/integrations/sources/facebook-marketing.md +++ b/docs/integrations/sources/facebook-marketing.md @@ -43,28 +43,28 @@ This page guides you through the process of setting up the Facebook Marketing so 8. (Optional) Toggle the **Fetch Thumbnail Images** button to fetch the `thumbnail_url` and store the result in `thumbnail_data_url` for each [Ad Creative](https://developers.facebook.com/docs/marketing-api/creative/). 9. (Optional) In the Custom Insights section. A list which contains ad statistics entries, each entry must have a name and can contain fields, breakdowns or action_breakdowns. Click on "add" to fill this field. - To retrieve specific fields from Facebook Ads Insights combined with other breakdowns, you can choose which fields and breakdowns to sync. - We recommend following the Facebook Marketing [documentation](https://developers.facebook.com/docs/marketing-api/insights/breakdowns) to understand the breakdown limitations. Some fields can not be requested and many others only work when combined with specific fields. For example, the breakdown `app_id` is only supported with the `total_postbacks` field. - - To configure Custom Insights: - - 1. For **Name**, enter a name for the insight. This will be used as the Airbyte stream name - 2. For **Level**, enter the level of the fields you want to pull from the Facebook Marketing API. By default, 'ad'. You can specify also account, campaign or adset. - 3. For **Fields**, enter a list of the fields you want to pull from the Facebook Marketing API. - 4. For **End Date**, enter the date in the `YYYY-MM-DDTHH:mm:ssZ` format. The data added on and before this date will be replicated. If this field is blank, Airbyte will replicate the latest data. - 5. For **Breakdowns**, enter a list of the breakdowns you want to configure. - 6. For **Start Date**, enter the date in the `YYYY-MM-DDTHH:mm:ssZ` format. The data added on and after this date will be replicated. If this field is blank, Airbyte will replicate all data. - 7. For **Action Breakdown**, enter a list of the action breakdowns you want to configure. - 8. For **Action Report Time**, enter the action report time you want to configure (mixed, conversion or impression). - 9. For **Custom Insights Lookback Window**, fill in the appropriate value. See [more](#facebook-marketing-attribution-reporting) on this parameter. - 10. Click **Done**. - 12. For **Page Size of Requests**, fill in the page size in case pagination kicks in. Feel free to ignore it, the default value should work in most cases. - 13. For **Insights Lookback Window**, fill in the appropriate value. Facebook freezes insight data 28 days after it was generated, which means that all data from the past 28 days may have changed since we last emitted it, so you can retrieve refreshed insights from the past by setting this parameter. If you set a custom lookback window value in Facebook account, please provide the same value here. See [more](#facebook-marketing-attribution-reporting) on this parameter. - 14. Click **Set up source**. - - :::warning - Additional streams for Facebook Marketing are dynamically created based on the specified Custom Insights. For an existing Facebook Marketing source, when you are updating or removing Custom Insights, you should also ensure that any connections syncing to these streams are either disabled or have had their source schema refreshed. - ::: + To retrieve specific fields from Facebook Ads Insights combined with other breakdowns, you can choose which fields and breakdowns to sync. + We recommend following the Facebook Marketing [documentation](https://developers.facebook.com/docs/marketing-api/insights/breakdowns) to understand the breakdown limitations. Some fields can not be requested and many others only work when combined with specific fields. For example, the breakdown `app_id` is only supported with the `total_postbacks` field. + + To configure Custom Insights: + + 1. For **Name**, enter a name for the insight. This will be used as the Airbyte stream name + 2. For **Level**, enter the level of the fields you want to pull from the Facebook Marketing API. By default, 'ad'. You can specify also account, campaign or adset. + 3. For **Fields**, enter a list of the fields you want to pull from the Facebook Marketing API. + 4. For **End Date**, enter the date in the `YYYY-MM-DDTHH:mm:ssZ` format. The data added on and before this date will be replicated. If this field is blank, Airbyte will replicate the latest data. + 5. For **Breakdowns**, enter a list of the breakdowns you want to configure. + 6. For **Start Date**, enter the date in the `YYYY-MM-DDTHH:mm:ssZ` format. The data added on and after this date will be replicated. If this field is blank, Airbyte will replicate all data. + 7. For **Action Breakdown**, enter a list of the action breakdowns you want to configure. + 8. For **Action Report Time**, enter the action report time you want to configure (mixed, conversion or impression). + 9. For **Custom Insights Lookback Window**, fill in the appropriate value. See [more](#facebook-marketing-attribution-reporting) on this parameter. + 10. Click **Done**. + 11. For **Page Size of Requests**, fill in the page size in case pagination kicks in. Feel free to ignore it, the default value should work in most cases. + 12. For **Insights Lookback Window**, fill in the appropriate value. Facebook freezes insight data 28 days after it was generated, which means that all data from the past 28 days may have changed since we last emitted it, so you can retrieve refreshed insights from the past by setting this parameter. If you set a custom lookback window value in Facebook account, please provide the same value here. See [more](#facebook-marketing-attribution-reporting) on this parameter. + 13. Click **Set up source**. + + :::warning + Additional streams for Facebook Marketing are dynamically created based on the specified Custom Insights. For an existing Facebook Marketing source, when you are updating or removing Custom Insights, you should also ensure that any connections syncing to these streams are either disabled or have had their source schema refreshed. + ::: @@ -95,7 +95,7 @@ The Facebook Marketing source connector supports the following sync modes: - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) (except for the AdCreatives and AdAccount tables) -- [Incremental Sync - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) (except for the AdCreatives and AdAccount tables) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) (except for the AdCreatives and AdAccount tables) ## Supported Streams @@ -167,11 +167,11 @@ Please be informed that the connector uses the `lookback_window` parameter to pe ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 1.1.2 | 2023-08-03 | [29042](https://github.com/airbytehq/airbyte/pull/29042) | Fix broken `advancedAuth` references for `spec` | -| 1.1.1 | 2023-07-26 | [27996](https://github.com/airbytehq/airbyte/pull/27996) | remove reference to authSpecification | -| 1.1.0 | 2023-07-11 | [26345](https://github.com/airbytehq/airbyte/pull/26345) | add new `action_report_time` attribute to `AdInsights` class | -| 1.0.1 | 2023-07-07 | [27979](https://github.com/airbytehq/airbyte/pull/27979) | Added the ability to restore the reduced request record limit after the successful retry, and handle the `unknown error` (code 99) with the retry strategy | +| :------ | :--------- | :------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| 1.1.2 | 2023-08-03 | [29042](https://github.com/airbytehq/airbyte/pull/29042) | Fix broken `advancedAuth` references for `spec` | +| 1.1.1 | 2023-07-26 | [27996](https://github.com/airbytehq/airbyte/pull/27996) | remove reference to authSpecification | +| 1.1.0 | 2023-07-11 | [26345](https://github.com/airbytehq/airbyte/pull/26345) | add new `action_report_time` attribute to `AdInsights` class | +| 1.0.1 | 2023-07-07 | [27979](https://github.com/airbytehq/airbyte/pull/27979) | Added the ability to restore the reduced request record limit after the successful retry, and handle the `unknown error` (code 99) with the retry strategy | | 1.0.0 | 2023-07-05 | [27563](https://github.com/airbytehq/airbyte/pull/27563) | Migrate to FB SDK version 17 | | 0.5.0 | 2023-06-26 | [27728](https://github.com/airbytehq/airbyte/pull/27728) | License Update: Elv2 | | 0.4.3 | 2023-05-12 | [27483](https://github.com/airbytehq/airbyte/pull/27483) | Reduce replication start date by one more day | diff --git a/docs/integrations/sources/freshdesk.md b/docs/integrations/sources/freshdesk.md index 0f802c46234a..b56115b5e34b 100644 --- a/docs/integrations/sources/freshdesk.md +++ b/docs/integrations/sources/freshdesk.md @@ -18,45 +18,45 @@ To set up the Freshdesk source connector, you'll need the Freshdesk [domain URL] 8. For **Requests per minute**, enter the number of requests per minute that this source allowed to use. The Freshdesk rate limit is 50 requests per minute per app per account. 9. Click **Set up source**. -## Supported sync modes +## Supported sync modes -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams Several output streams are available from this source: -* [Agents](https://developers.freshdesk.com/api/#agents) -* [Business Hours](https://developers.freshdesk.com/api/#business-hours) -* [Canned Responses](https://developers.freshdesk.com/api/#canned-responses) -* [Canned Response Folders](https://developers.freshdesk.com/api/#list_all_canned_response_folders) -* [Companies](https://developers.freshdesk.com/api/#companies) -* [Contacts](https://developers.freshdesk.com/api/#contacts) \(Native Incremental Sync\) -* [Conversations](https://developers.freshdesk.com/api/#conversations) -* [Discussion Categories](https://developers.freshdesk.com/api/#category_attributes) -* [Discussion Comments](https://developers.freshdesk.com/api/#comment_attributes) -* [Discussion Forums](https://developers.freshdesk.com/api/#forum_attributes) -* [Discussion Topics](https://developers.freshdesk.com/api/#topic_attributes) -* [Email Configs](https://developers.freshdesk.com/api/#email-configs) -* [Email Mailboxes](https://developers.freshdesk.com/api/#email-mailboxes) -* [Groups](https://developers.freshdesk.com/api/#groups) -* [Products](https://developers.freshdesk.com/api/#products) -* [Roles](https://developers.freshdesk.com/api/#roles) -* [Satisfaction Ratings](https://developers.freshdesk.com/api/#satisfaction-ratings) -* [Scenario Automations](https://developers.freshdesk.com/api/#scenario-automations) -* [Settings](https://developers.freshdesk.com/api/#settings) -* [Skills](https://developers.freshdesk.com/api/#skills) -* [SLA Policies](https://developers.freshdesk.com/api/#sla-policies) -* [Solution Articles](https://developers.freshdesk.com/api/#solution_article_attributes) -* [Solution Categories](https://developers.freshdesk.com/api/#solution_category_attributes) -* [Solution Folders](https://developers.freshdesk.com/api/#solution_folder_attributes) -* [Surveys](https://developers.freshdesk.com/api/#surveys) -* [Tickets](https://developers.freshdesk.com/api/#tickets) \(Native Incremental Sync\) -* [Ticket Fields](https://developers.freshdesk.com/api/#ticket-fields) -* [Time Entries](https://developers.freshdesk.com/api/#time-entries) +- [Agents](https://developers.freshdesk.com/api/#agents) +- [Business Hours](https://developers.freshdesk.com/api/#business-hours) +- [Canned Responses](https://developers.freshdesk.com/api/#canned-responses) +- [Canned Response Folders](https://developers.freshdesk.com/api/#list_all_canned_response_folders) +- [Companies](https://developers.freshdesk.com/api/#companies) +- [Contacts](https://developers.freshdesk.com/api/#contacts) \(Native Incremental Sync\) +- [Conversations](https://developers.freshdesk.com/api/#conversations) +- [Discussion Categories](https://developers.freshdesk.com/api/#category_attributes) +- [Discussion Comments](https://developers.freshdesk.com/api/#comment_attributes) +- [Discussion Forums](https://developers.freshdesk.com/api/#forum_attributes) +- [Discussion Topics](https://developers.freshdesk.com/api/#topic_attributes) +- [Email Configs](https://developers.freshdesk.com/api/#email-configs) +- [Email Mailboxes](https://developers.freshdesk.com/api/#email-mailboxes) +- [Groups](https://developers.freshdesk.com/api/#groups) +- [Products](https://developers.freshdesk.com/api/#products) +- [Roles](https://developers.freshdesk.com/api/#roles) +- [Satisfaction Ratings](https://developers.freshdesk.com/api/#satisfaction-ratings) +- [Scenario Automations](https://developers.freshdesk.com/api/#scenario-automations) +- [Settings](https://developers.freshdesk.com/api/#settings) +- [Skills](https://developers.freshdesk.com/api/#skills) +- [SLA Policies](https://developers.freshdesk.com/api/#sla-policies) +- [Solution Articles](https://developers.freshdesk.com/api/#solution_article_attributes) +- [Solution Categories](https://developers.freshdesk.com/api/#solution_category_attributes) +- [Solution Folders](https://developers.freshdesk.com/api/#solution_folder_attributes) +- [Surveys](https://developers.freshdesk.com/api/#surveys) +- [Tickets](https://developers.freshdesk.com/api/#tickets) \(Native Incremental Sync\) +- [Ticket Fields](https://developers.freshdesk.com/api/#ticket-fields) +- [Time Entries](https://developers.freshdesk.com/api/#time-entries) ## Performance considerations @@ -64,11 +64,10 @@ The Freshdesk connector should not run into Freshdesk API limitations under norm If you don't use the start date Freshdesk will retrieve only the last 30 days. More information [here](https://developers.freshdesk.com/api/#list_all_tickets). - ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :------------------------------------------------------------------------------------ | | 3.0.4 | 2023-06-24 | [27680](https://github.com/airbytehq/airbyte/pull/27680) | Fix formatting | | 3.0.3 | 2023-06-02 | [26978](https://github.com/airbytehq/airbyte/pull/26978) | Skip the stream if subscription level had changed during sync | | 3.0.2 | 2023-02-06 | [21970](https://github.com/airbytehq/airbyte/pull/21970) | Enable availability strategy for all streams | diff --git a/docs/integrations/sources/gitlab.md b/docs/integrations/sources/gitlab.md index 9a14441f6e15..d69193e12ebb 100644 --- a/docs/integrations/sources/gitlab.md +++ b/docs/integrations/sources/gitlab.md @@ -10,6 +10,7 @@ This page contains the setup guide and reference information for the Gitlab Sour - GitLab Projects (Optional) + **For Airbyte Cloud:** - Personal Access Token (see [personal access token](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html)) @@ -17,6 +18,7 @@ This page contains the setup guide and reference information for the Gitlab Sour + **For Airbyte Open Source:** - Personal Access Token (see [personal access token](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html)) @@ -29,12 +31,15 @@ This page contains the setup guide and reference information for the Gitlab Sour Create a [GitLab Account](https://gitlab.com) or set up a local instance of GitLab. + **Airbyte Open Source additional setup steps** Log into [GitLab](https://gitlab.com) and then generate a [personal access token](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html). Your token should have the `read_api` scope, that Grants read access to the API, including all groups and projects, the container registry, and the package registry. + + ### Step 2: Set up the GitLab connector in Airbyte **For Airbyte Cloud:** @@ -55,6 +60,7 @@ Log into [GitLab](https://gitlab.com) and then generate a [personal access token + **For Airbyte Open Source:** 1. Authenticate with **Personal Access Token**. @@ -64,34 +70,34 @@ Log into [GitLab](https://gitlab.com) and then generate a [personal access token The Gitlab Source connector supports the following [ sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams This connector outputs the following streams: -* [Branches](https://docs.gitlab.com/ee/api/branches.html) -* [Commits](https://docs.gitlab.com/ee/api/commits.html) \(Incremental\) -* [Issues](https://docs.gitlab.com/ee/api/issues.html) \(Incremental\) -* [Group Issue Boards](https://docs.gitlab.com/ee/api/group_boards.html) -* [Pipelines](https://docs.gitlab.com/ee/api/pipelines.html) \(Incremental\) -* [Jobs](https://docs.gitlab.com/ee/api/jobs.html) -* [Projects](https://docs.gitlab.com/ee/api/projects.html) -* [Project Milestones](https://docs.gitlab.com/ee/api/milestones.html) -* [Project Merge Requests](https://docs.gitlab.com/ee/api/merge_requests.html) \(Incremental\) -* [Users](https://docs.gitlab.com/ee/api/users.html) -* [Groups](https://docs.gitlab.com/ee/api/groups.html) -* [Group Milestones](https://docs.gitlab.com/ee/api/group_milestones.html) -* [Group and Project members](https://docs.gitlab.com/ee/api/members.html) -* [Tags](https://docs.gitlab.com/ee/api/tags.html) -* [Releases](https://docs.gitlab.com/ee/api/releases/index.html) -* [Group Labels](https://docs.gitlab.com/ee/api/group_labels.html) -* [Project Labels](https://docs.gitlab.com/ee/api/labels.html) -* [Epics](https://docs.gitlab.com/ee/api/epics.html) \(only available for GitLab Ultimate and GitLab.com Gold accounts\) -* [Epic Issues](https://docs.gitlab.com/ee/api/epic_issues.html) \(only available for GitLab Ultimate and GitLab.com Gold accounts\) +- [Branches](https://docs.gitlab.com/ee/api/branches.html) +- [Commits](https://docs.gitlab.com/ee/api/commits.html) \(Incremental\) +- [Issues](https://docs.gitlab.com/ee/api/issues.html) \(Incremental\) +- [Group Issue Boards](https://docs.gitlab.com/ee/api/group_boards.html) +- [Pipelines](https://docs.gitlab.com/ee/api/pipelines.html) \(Incremental\) +- [Jobs](https://docs.gitlab.com/ee/api/jobs.html) +- [Projects](https://docs.gitlab.com/ee/api/projects.html) +- [Project Milestones](https://docs.gitlab.com/ee/api/milestones.html) +- [Project Merge Requests](https://docs.gitlab.com/ee/api/merge_requests.html) \(Incremental\) +- [Users](https://docs.gitlab.com/ee/api/users.html) +- [Groups](https://docs.gitlab.com/ee/api/groups.html) +- [Group Milestones](https://docs.gitlab.com/ee/api/group_milestones.html) +- [Group and Project members](https://docs.gitlab.com/ee/api/members.html) +- [Tags](https://docs.gitlab.com/ee/api/tags.html) +- [Releases](https://docs.gitlab.com/ee/api/releases/index.html) +- [Group Labels](https://docs.gitlab.com/ee/api/group_labels.html) +- [Project Labels](https://docs.gitlab.com/ee/api/labels.html) +- [Epics](https://docs.gitlab.com/ee/api/epics.html) \(only available for GitLab Ultimate and GitLab.com Gold accounts\) +- [Epic Issues](https://docs.gitlab.com/ee/api/epic_issues.html) \(only available for GitLab Ultimate and GitLab.com Gold accounts\) ## Additional information @@ -104,7 +110,7 @@ Gitlab has the [rate limits](https://docs.gitlab.com/ee/user/gitlab_com/index.ht ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:-------------------------------------------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :----------------------------------------------------------------------------------------- | | 1.6.0 | 2023-06-30 | [27869](https://github.com/airbytehq/airbyte/pull/27869) | Add `shared_runners_setting` field to groups | | 1.5.1 | 2023-06-24 | [27679](https://github.com/airbytehq/airbyte/pull/27679) | Fix formatting | | 1.5.0 | 2023-06-15 | [27392](https://github.com/airbytehq/airbyte/pull/27392) | Make API URL an optional parameter in spec. | diff --git a/docs/integrations/sources/google-ads.md b/docs/integrations/sources/google-ads.md index 228d6d7defaf..cfabd4349ad4 100644 --- a/docs/integrations/sources/google-ads.md +++ b/docs/integrations/sources/google-ads.md @@ -9,7 +9,7 @@ This page contains the setup guide and reference information for the Google Ads - (For Airbyte Open Source): - A Developer Token - OAuth credentials to authenticate your Google account - + ## Setup guide @@ -24,10 +24,10 @@ To set up the Google Ads source connector with Airbyte Open Source, you will nee 2. To apply for the developer token, please follow [Google's instructions](https://developers.google.com/google-ads/api/docs/first-call/dev-token). 3. When you apply for the token, make sure to include the following: - - Why you need the token (example: Want to run some internal analytics) - - That you will be using the Airbyte Open Source project - - That you have full access to the code base (because we're open source) - - That you have full access to the server running the code (because you're self-hosting Airbyte) + - Why you need the token (example: Want to run some internal analytics) + - That you will be using the Airbyte Open Source project + - That you have full access to the code base (because we're open source) + - That you have full access to the server running the code (because you're self-hosting Airbyte) :::note You will _not_ be able to access your data via the Google Ads API until this token is approved. You cannot use a test developer token; it has to be at least a basic developer token. The approval process typically takes around 24 hours. @@ -61,7 +61,7 @@ To set up Google Ads as a source in Airbyte Cloud: 7. Enter a **Start Date** using the provided datepicker, or by programmatically entering the date in YYYY-MM-DD format. The data added on and after this date will be replicated. 8. (Optional) You can use the **Custom GAQL Queries** field to enter a custom query using Google Ads Query Language. Click **Add** and enter your query, as well as the desired name of the table for this data in the destination. Multiple queries can be provided. For more information on formulating these queries, refer to our [guide below](#custom-query-understanding-google-ads-query-language). 9. (Required for Manager accounts) If accessing your account through a Google Ads Manager account, you must enter the [**Customer ID**](https://developers.google.com/google-ads/api/docs/concepts/call-structure#cid) of the Manager account. -10. (Optional) Enter a **Conversion Window**. This is the number of days after an ad interaction during which a conversion is recorded in Google Ads. For more information on this topic, refer to the [Google Ads Help Center](https://support.google.com/google-ads/answer/3123169?hl=en). This field defaults to 14 days. +10. (Optional) Enter a **Conversion Window**. This is the number of days after an ad interaction during which a conversion is recorded in Google Ads. For more information on this topic, refer to the [Google Ads Help Center](https://support.google.com/google-ads/answer/3123169?hl=en). This field defaults to 14 days. 11. (Optional) Enter an **End Date** in YYYY-MM-DD format. Any data added after this date will not be replicated. Leaving this field blank will replicate all data from the start date onward. 12. Click **Set up source** and wait for the tests to complete. @@ -95,7 +95,7 @@ The Google Ads source connector supports the following [sync modes](https://docs - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental Sync - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams @@ -116,7 +116,7 @@ The Google Ads source connector can sync the following tables. It can also sync Note that `ad_groups`, `ad_group_ads`, and `campaigns` contain a `labels` field, which should be joined against their respective `*_labels` streams if you want to view the actual labels. For example, the `ad_groups` stream contains an `ad_group.labels` field, which you would join against the `ad_group_labels` stream's `label.resource_name` field. ### Report Tables - + - [account_performance_report](https://developers.google.com/google-ads/api/docs/migration/mapping#account_performance) - [ad_groups](https://developers.google.com/google-ads/api/fields/v14/ad_group) - [ad_group_ad_report](https://developers.google.com/google-ads/api/docs/migration/mapping#ad_performance) @@ -183,10 +183,10 @@ Due to a limitation in the Google Ads API which does not allow getting performan ## Changelog | Version | Date | Pull Request | Subject | -|:---------|:-----------|:---------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------| -| `0.7.4` | 2023-07-28 | [28832](https://github.com/airbytehq/airbyte/pull/28832) | Update field descriptions | +| :------- | :--------- | :------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------- | +| `0.7.4` | 2023-07-28 | [28832](https://github.com/airbytehq/airbyte/pull/28832) | Update field descriptions | | `0.7.3` | 2023-07-24 | [28510](https://github.com/airbytehq/airbyte/pull/28510) | Set dates with client's timezone | -| `0.7.2` | 2023-07-20 | [28535](https://github.com/airbytehq/airbyte/pull/28535) | UI improvement: Make the query field in custom reports a multi-line string field | +| `0.7.2` | 2023-07-20 | [28535](https://github.com/airbytehq/airbyte/pull/28535) | UI improvement: Make the query field in custom reports a multi-line string field | | `0.7.1` | 2023-07-17 | [28365](https://github.com/airbytehq/airbyte/pull/28365) | 0.3.1 and 0.3.2 follow up: make today the end date, not yesterday | | `0.7.0` | 2023-07-12 | [28246](https://github.com/airbytehq/airbyte/pull/28246) | Add new streams: labels, criterions, biddig strategies | | `0.6.1` | 2023-07-12 | [28230](https://github.com/airbytehq/airbyte/pull/28230) | Reduce amount of logs produced by the connector while working with big amount of data | @@ -263,4 +263,3 @@ Due to a limitation in the Google Ads API which does not allow getting performan | `0.1.3` | 2021-07-23 | [4788](https://github.com/airbytehq/airbyte/pull/4788) | Support main streams, fix bug with exception `DATE_RANGE_TOO_NARROW` for incremental streams | | `0.1.2` | 2021-07-06 | [4539](https://github.com/airbytehq/airbyte/pull/4539) | Add `AIRBYTE_ENTRYPOINT` for Kubernetes support | | `0.1.1` | 2021-06-23 | [4288](https://github.com/airbytehq/airbyte/pull/4288) | Fix `Bugfix: Correctly declare required parameters` | - diff --git a/docs/integrations/sources/google-analytics-data-api.md b/docs/integrations/sources/google-analytics-data-api.md index 397790541234..1a9ad3dc014f 100644 --- a/docs/integrations/sources/google-analytics-data-api.md +++ b/docs/integrations/sources/google-analytics-data-api.md @@ -68,7 +68,7 @@ The Google Analytics source connector supports the following [sync modes](https: - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams @@ -110,7 +110,7 @@ This connector outputs the following incremental streams: ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :---------------------------------------------------------------------------- | | 1.1.2 | 2023-07-03 | [27909](https://github.com/airbytehq/airbyte/pull/27909) | Limit the page size of custom report streams | | 1.1.1 | 2023-06-26 | [27718](https://github.com/airbytehq/airbyte/pull/27718) | Limit the page size when calling `check()` | | 1.1.0 | 2023-06-26 | [27738](https://github.com/airbytehq/airbyte/pull/27738) | License Update: Elv2 | diff --git a/docs/integrations/sources/google-analytics-v4.md b/docs/integrations/sources/google-analytics-v4.md index 38d24e195af4..7b49e28ceb25 100644 --- a/docs/integrations/sources/google-analytics-v4.md +++ b/docs/integrations/sources/google-analytics-v4.md @@ -73,7 +73,7 @@ The Google Analytics source connector supports the following [sync modes](https: - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental Sync - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) :::caution diff --git a/docs/integrations/sources/google-search-console.md b/docs/integrations/sources/google-search-console.md index f5925e6c4b46..524bd199bdac 100644 --- a/docs/integrations/sources/google-search-console.md +++ b/docs/integrations/sources/google-search-console.md @@ -94,7 +94,7 @@ The Google Search Console Source connector supports the following [ sync modes]( - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) :::note The granularity for the cursor is 1 day, so Incremental Sync in Append mode may result in duplicating the data. @@ -137,7 +137,7 @@ This connector attempts to back off gracefully when it hits Reports API's rate l ## Changelog | Version | Date | Pull Request | Subject | -|:---------|:-----------|:--------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------| +| :------- | :--------- | :------------------------------------------------------------------------------------------------------------ | :----------------------------------------------------------------------------------------------------------------------------- | | `1.2.1` | 2023-07-04 | [27952](https://github.com/airbytehq/airbyte/pull/27952) | Removed deprecated `searchType`, added `discover`(Discover results) and `googleNews`(Results from news.google.com, etc.) types | | `1.2.0` | 2023-06-29 | [27831](https://github.com/airbytehq/airbyte/pull/27831) | Add new streams | | `1.1.0` | 2023-06-26 | [27738](https://github.com/airbytehq/airbyte/pull/27738) | License Update: Elv2 | diff --git a/docs/integrations/sources/greenhouse.md b/docs/integrations/sources/greenhouse.md index 40f41158557a..4c169e791c30 100644 --- a/docs/integrations/sources/greenhouse.md +++ b/docs/integrations/sources/greenhouse.md @@ -12,49 +12,49 @@ To set up the Greenhouse source connector, you'll need the [Harvest API key](htt 2. Click **Sources** and then click **+ New source**. 3. On the Set up the source page, select **Greenhouse** from the Source type dropdown. 4. Enter the name for the Greenhouse connector. -4. Enter your [**Harvest API Key**](https://developers.greenhouse.io/harvest.html#authentication) that you obtained from Greenhouse. -5. Click **Set up source**. +5. Enter your [**Harvest API Key**](https://developers.greenhouse.io/harvest.html#authentication) that you obtained from Greenhouse. +6. Click **Set up source**. ## Supported sync modes The Greenhouse source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams -* [Activity Feed](https://developers.greenhouse.io/harvest.html#get-retrieve-activity-feed) -* [Applications](https://developers.greenhouse.io/harvest.html#get-list-applications) -* [Applications Interviews](https://developers.greenhouse.io/harvest.html#get-list-scheduled-interviews-for-application) -* [Approvals](https://developers.greenhouse.io/harvest.html#get-list-approvals-for-job) -* [Candidates](https://developers.greenhouse.io/harvest.html#get-list-candidates) -* [Close Reasons](https://developers.greenhouse.io/harvest.html#get-list-close-reasons) -* [Custom Fields](https://developers.greenhouse.io/harvest.html#get-list-custom-fields) -* [Degrees](https://developers.greenhouse.io/harvest.html#get-list-degrees) -* [Departments](https://developers.greenhouse.io/harvest.html#get-list-departments) -* [Disciplines](https://developers.greenhouse.io/harvest.html#get-list-approvals-for-job) -* [EEOC](https://developers.greenhouse.io/harvest.html#get-list-eeoc) -* [Email Templates](https://developers.greenhouse.io/harvest.html#get-list-email-templates) -* [Interviews](https://developers.greenhouse.io/harvest.html#get-list-scheduled-interviews) -* [Job Posts](https://developers.greenhouse.io/harvest.html#get-list-job-posts) -* [Job Stages](https://developers.greenhouse.io/harvest.html#get-list-job-stages) -* [Jobs](https://developers.greenhouse.io/harvest.html#get-list-jobs) -* [Job Openings](https://developers.greenhouse.io/harvest.html#get-list-job-openings) -* [Jobs Stages](https://developers.greenhouse.io/harvest.html#get-list-job-stages-for-job) -* [Offers](https://developers.greenhouse.io/harvest.html#get-list-offers) -* [Offices](https://developers.greenhouse.io/harvest.html#get-list-offices) -* [Prospect Pools](https://developers.greenhouse.io/harvest.html#get-list-prospect-pools) -* [Rejection Reasons](https://developers.greenhouse.io/harvest.html#get-list-rejection-reasons) -* [Schools](https://developers.greenhouse.io/harvest.html#get-list-schools) -* [Scorecards](https://developers.greenhouse.io/harvest.html#get-list-scorecards) -* [Sources](https://developers.greenhouse.io/harvest.html#get-list-sources) -* [Tags](https://developers.greenhouse.io/harvest.html#get-list-candidate-tags) -* [Users](https://developers.greenhouse.io/harvest.html#get-list-users) -* [User Permissions](https://developers.greenhouse.io/harvest.html#get-list-job-permissions) -* [User Roles](https://developers.greenhouse.io/harvest.html#the-user-role-object) +- [Activity Feed](https://developers.greenhouse.io/harvest.html#get-retrieve-activity-feed) +- [Applications](https://developers.greenhouse.io/harvest.html#get-list-applications) +- [Applications Interviews](https://developers.greenhouse.io/harvest.html#get-list-scheduled-interviews-for-application) +- [Approvals](https://developers.greenhouse.io/harvest.html#get-list-approvals-for-job) +- [Candidates](https://developers.greenhouse.io/harvest.html#get-list-candidates) +- [Close Reasons](https://developers.greenhouse.io/harvest.html#get-list-close-reasons) +- [Custom Fields](https://developers.greenhouse.io/harvest.html#get-list-custom-fields) +- [Degrees](https://developers.greenhouse.io/harvest.html#get-list-degrees) +- [Departments](https://developers.greenhouse.io/harvest.html#get-list-departments) +- [Disciplines](https://developers.greenhouse.io/harvest.html#get-list-approvals-for-job) +- [EEOC](https://developers.greenhouse.io/harvest.html#get-list-eeoc) +- [Email Templates](https://developers.greenhouse.io/harvest.html#get-list-email-templates) +- [Interviews](https://developers.greenhouse.io/harvest.html#get-list-scheduled-interviews) +- [Job Posts](https://developers.greenhouse.io/harvest.html#get-list-job-posts) +- [Job Stages](https://developers.greenhouse.io/harvest.html#get-list-job-stages) +- [Jobs](https://developers.greenhouse.io/harvest.html#get-list-jobs) +- [Job Openings](https://developers.greenhouse.io/harvest.html#get-list-job-openings) +- [Jobs Stages](https://developers.greenhouse.io/harvest.html#get-list-job-stages-for-job) +- [Offers](https://developers.greenhouse.io/harvest.html#get-list-offers) +- [Offices](https://developers.greenhouse.io/harvest.html#get-list-offices) +- [Prospect Pools](https://developers.greenhouse.io/harvest.html#get-list-prospect-pools) +- [Rejection Reasons](https://developers.greenhouse.io/harvest.html#get-list-rejection-reasons) +- [Schools](https://developers.greenhouse.io/harvest.html#get-list-schools) +- [Scorecards](https://developers.greenhouse.io/harvest.html#get-list-scorecards) +- [Sources](https://developers.greenhouse.io/harvest.html#get-list-sources) +- [Tags](https://developers.greenhouse.io/harvest.html#get-list-candidate-tags) +- [Users](https://developers.greenhouse.io/harvest.html#get-list-users) +- [User Permissions](https://developers.greenhouse.io/harvest.html#get-list-job-permissions) +- [User Roles](https://developers.greenhouse.io/harvest.html#the-user-role-object) ## Performance considerations @@ -63,8 +63,8 @@ The Greenhouse connector should not run into Greenhouse API limitations under no ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 0.4.2 | 2023-08-02 | [28969](https://github.com/airbytehq/airbyte/pull/28969) | Update CDK version | +| :------ | :--------- | :------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 0.4.2 | 2023-08-02 | [28969](https://github.com/airbytehq/airbyte/pull/28969) | Update CDK version | | 0.4.1 | 2023-06-28 | [27773](https://github.com/airbytehq/airbyte/pull/27773) | Update following state breaking changes | | 0.4.0 | 2023-04-26 | [25332](https://github.com/airbytehq/airbyte/pull/25332) | Add new streams: `ActivityFeed`, `Approvals`, `Disciplines`, `Eeoc`, `EmailTemplates`, `Offices`, `ProspectPools`, `Schools`, `Tags`, `UserPermissions`, `UserRoles` | | 0.3.1 | 2023-03-06 | [23231](https://github.com/airbytehq/airbyte/pull/23231) | Publish using low-code CDK Beta version | diff --git a/docs/integrations/sources/harvest.md b/docs/integrations/sources/harvest.md index d0bb23ecf711..31ca38f85400 100644 --- a/docs/integrations/sources/harvest.md +++ b/docs/integrations/sources/harvest.md @@ -9,6 +9,7 @@ To set up the Harvest source connector, you'll need the [Harvest Account ID and ## Setup guide + **For Airbyte Cloud:** 1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces). @@ -22,6 +23,7 @@ To set up the Harvest source connector, you'll need the [Harvest Account ID and + **For Airbyte Open Source:** 1. Navigate to the Airbyte Open Source dashboard. @@ -38,38 +40,38 @@ To set up the Harvest source connector, you'll need the [Harvest Account ID and The Harvest source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams -* [Client Contacts](https://help.getharvest.com/api-v2/clients-api/clients/contacts/) \(Incremental\) -* [Clients](https://help.getharvest.com/api-v2/clients-api/clients/clients/) \(Incremental\) -* [Company](https://help.getharvest.com/api-v2/company-api/company/company/) -* [Invoice Messages](https://help.getharvest.com/api-v2/invoices-api/invoices/invoice-messages/) \(Incremental\) -* [Invoice Payments](https://help.getharvest.com/api-v2/invoices-api/invoices/invoice-payments/) \(Incremental\) -* [Invoices](https://help.getharvest.com/api-v2/invoices-api/invoices/invoices/) \(Incremental\) -* [Invoice Item Categories](https://help.getharvest.com/api-v2/invoices-api/invoices/invoice-item-categories/) \(Incremental\) -* [Estimate Messages](https://help.getharvest.com/api-v2/estimates-api/estimates/estimate-messages/) \(Incremental\) -* [Estimates](https://help.getharvest.com/api-v2/estimates-api/estimates/estimates/) \(Incremental\) -* [Estimate Item Categories](https://help.getharvest.com/api-v2/estimates-api/estimates/estimate-item-categories/) \(Incremental\) -* [Expenses](https://help.getharvest.com/api-v2/expenses-api/expenses/expenses/) \(Incremental\) -* [Expense Categories](https://help.getharvest.com/api-v2/expenses-api/expenses/expense-categories/) \(Incremental\) -* [Tasks](https://help.getharvest.com/api-v2/tasks-api/tasks/tasks/) \(Incremental\) -* [Time Entries](https://help.getharvest.com/api-v2/timesheets-api/timesheets/time-entries/) \(Incremental\) -* [Project User Assignments](https://help.getharvest.com/api-v2/projects-api/projects/user-assignments/) \(Incremental\) -* [Project Task Assignments](https://help.getharvest.com/api-v2/projects-api/projects/task-assignments/) \(Incremental\) -* [Projects](https://help.getharvest.com/api-v2/projects-api/projects/projects/) \(Incremental\) -* [Roles](https://help.getharvest.com/api-v2/roles-api/roles/roles/) \(Incremental\) -* [User Billable Rates](https://help.getharvest.com/api-v2/users-api/users/billable-rates/) -* [User Cost Rates](https://help.getharvest.com/api-v2/users-api/users/cost-rates/) -* [User Project Assignments](https://help.getharvest.com/api-v2/users-api/users/project-assignments/) \(Incremental\) -* [Expense Reports](https://help.getharvest.com/api-v2/reports-api/reports/expense-reports/) -* [Uninvoiced Report](https://help.getharvest.com/api-v2/reports-api/reports/uninvoiced-report/) -* [Time Reports](https://help.getharvest.com/api-v2/reports-api/reports/time-reports/) -* [Project Budget Report](https://help.getharvest.com/api-v2/reports-api/reports/project-budget-report/) +- [Client Contacts](https://help.getharvest.com/api-v2/clients-api/clients/contacts/) \(Incremental\) +- [Clients](https://help.getharvest.com/api-v2/clients-api/clients/clients/) \(Incremental\) +- [Company](https://help.getharvest.com/api-v2/company-api/company/company/) +- [Invoice Messages](https://help.getharvest.com/api-v2/invoices-api/invoices/invoice-messages/) \(Incremental\) +- [Invoice Payments](https://help.getharvest.com/api-v2/invoices-api/invoices/invoice-payments/) \(Incremental\) +- [Invoices](https://help.getharvest.com/api-v2/invoices-api/invoices/invoices/) \(Incremental\) +- [Invoice Item Categories](https://help.getharvest.com/api-v2/invoices-api/invoices/invoice-item-categories/) \(Incremental\) +- [Estimate Messages](https://help.getharvest.com/api-v2/estimates-api/estimates/estimate-messages/) \(Incremental\) +- [Estimates](https://help.getharvest.com/api-v2/estimates-api/estimates/estimates/) \(Incremental\) +- [Estimate Item Categories](https://help.getharvest.com/api-v2/estimates-api/estimates/estimate-item-categories/) \(Incremental\) +- [Expenses](https://help.getharvest.com/api-v2/expenses-api/expenses/expenses/) \(Incremental\) +- [Expense Categories](https://help.getharvest.com/api-v2/expenses-api/expenses/expense-categories/) \(Incremental\) +- [Tasks](https://help.getharvest.com/api-v2/tasks-api/tasks/tasks/) \(Incremental\) +- [Time Entries](https://help.getharvest.com/api-v2/timesheets-api/timesheets/time-entries/) \(Incremental\) +- [Project User Assignments](https://help.getharvest.com/api-v2/projects-api/projects/user-assignments/) \(Incremental\) +- [Project Task Assignments](https://help.getharvest.com/api-v2/projects-api/projects/task-assignments/) \(Incremental\) +- [Projects](https://help.getharvest.com/api-v2/projects-api/projects/projects/) \(Incremental\) +- [Roles](https://help.getharvest.com/api-v2/roles-api/roles/roles/) \(Incremental\) +- [User Billable Rates](https://help.getharvest.com/api-v2/users-api/users/billable-rates/) +- [User Cost Rates](https://help.getharvest.com/api-v2/users-api/users/cost-rates/) +- [User Project Assignments](https://help.getharvest.com/api-v2/users-api/users/project-assignments/) \(Incremental\) +- [Expense Reports](https://help.getharvest.com/api-v2/reports-api/reports/expense-reports/) +- [Uninvoiced Report](https://help.getharvest.com/api-v2/reports-api/reports/uninvoiced-report/) +- [Time Reports](https://help.getharvest.com/api-v2/reports-api/reports/time-reports/) +- [Project Budget Report](https://help.getharvest.com/api-v2/reports-api/reports/project-budget-report/) ## Performance considerations @@ -78,7 +80,7 @@ The connector is restricted by the [Harvest rate limits](https://help.getharvest ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:-----------------------------------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :--------------------------------------------------------------------------------- | | 0.1.18 | 2023-05-29 | [26714](https://github.com/airbytehq/airbyte/pull/26714) | Remove `authSpecification` from spec in favour of `advancedAuth` | | 0.1.17 | 2023-03-03 | [22983](https://github.com/airbytehq/airbyte/pull/22983) | Specified date formatting in specification | | 0.1.16 | 2023-02-07 | [22417](https://github.com/airbytehq/airbyte/pull/22417) | Turn on default HttpAvailabilityStrategy | diff --git a/docs/integrations/sources/instagram.md b/docs/integrations/sources/instagram.md index af5f2dcc1d88..d16ba32b25a8 100644 --- a/docs/integrations/sources/instagram.md +++ b/docs/integrations/sources/instagram.md @@ -4,16 +4,18 @@ This page contains the setup guide and reference information for the Instagram s ## Prerequisites -* [Meta for Developers account](https://developers.facebook.com) -* [Instagram business account](https://www.facebook.com/business/help/898752960195806) to your Facebook page -* [Instagram Graph API](https://developers.facebook.com/docs/instagram-api/) to your Facebook app -* [Facebook OAuth Reference](https://developers.facebook.com/docs/instagram-basic-display-api/reference) -* [Facebook ad account ID number](https://www.facebook.com/business/help/1492627900875762) (you'll use this to configure Instagram as a source in Airbyte) +- [Meta for Developers account](https://developers.facebook.com) +- [Instagram business account](https://www.facebook.com/business/help/898752960195806) to your Facebook page +- [Instagram Graph API](https://developers.facebook.com/docs/instagram-api/) to your Facebook app +- [Facebook OAuth Reference](https://developers.facebook.com/docs/instagram-basic-display-api/reference) +- [Facebook ad account ID number](https://www.facebook.com/business/help/1492627900875762) (you'll use this to configure Instagram as a source in Airbyte) ## Setup Guide + ### Set up the Instagram connector in Airbyte + **For Airbyte Cloud:** 1. Log in to your [Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. @@ -27,6 +29,7 @@ This page contains the setup guide and reference information for the Instagram s + **For Airbyte Open Source:** 1. Log in to your Airbyte Open Source account. @@ -36,15 +39,17 @@ This page contains the setup guide and reference information for the Instagram s 5. Click **Authenticate your Instagram account**. 6. Log in and authorize the Instagram account. 7. Enter the **Start Date** in YYYY-MM-DDTHH:mm:ssZ format. All data generated after this date will be replicated. If this field is blank, Airbyte will replicate all data. -9. Click **Set up source**. +8. Click **Set up source**. ## Supported sync modes + The Instagram source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) + +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) :::note @@ -53,50 +58,50 @@ Incremental sync modes are only available for the [User Insights](https://develo ::: ## Supported Streams + The Instagram source connector supports the following streams. For more information, see the [Instagram Graph API](https://developers.facebook.com/docs/instagram-api/) and [Instagram Insights API documentation](https://developers.facebook.com/docs/instagram-api/guides/insights/). -* [User](https://developers.facebook.com/docs/instagram-api/reference/ig-user) - * [User Insights](https://developers.facebook.com/docs/instagram-api/reference/ig-user/insights) -* [Media](https://developers.facebook.com/docs/instagram-api/reference/ig-user/media) - * [Media Insights](https://developers.facebook.com/docs/instagram-api/reference/ig-media/insights) -* [Stories](https://developers.facebook.com/docs/instagram-api/reference/ig-user/stories/) - * [Story Insights](https://developers.facebook.com/docs/instagram-api/reference/ig-media/insights) +- [User](https://developers.facebook.com/docs/instagram-api/reference/ig-user) + - [User Insights](https://developers.facebook.com/docs/instagram-api/reference/ig-user/insights) +- [Media](https://developers.facebook.com/docs/instagram-api/reference/ig-user/media) + - [Media Insights](https://developers.facebook.com/docs/instagram-api/reference/ig-media/insights) +- [Stories](https://developers.facebook.com/docs/instagram-api/reference/ig-user/stories/) + - [Story Insights](https://developers.facebook.com/docs/instagram-api/reference/ig-media/insights) ### Rate Limiting and Performance Considerations Instagram limits the number of requests that can be made at a time, but the Instagram connector gracefully handles rate limiting. See Facebook's [documentation on rate limiting](https://developers.facebook.com/docs/graph-api/overview/rate-limiting/#instagram-graph-api) for more information. - ## Data type map + AirbyteRecords are required to conform to the [Airbyte type](https://docs.airbyte.com/understanding-airbyte/supported-data-types/) system. This means that all sources must produce schemas and records within these types and all destinations must handle records that conform to this type system. | Integration Type | Airbyte Type | -|:-----------------|:-------------| +| :--------------- | :----------- | | `string` | `string` | | `number` | `number` | | `array` | `array` | | `object` | `object` | - ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----|:-------------|:--------| -| 1.0.11 | 2023-08-03 | [29031](https://github.com/airbytehq/airbyte/pull/29031) | Reverted `advancedAuth` spec changes | -| 1.0.10 | 2023-08-01 | [28910](https://github.com/airbytehq/airbyte/pull/28910) | Updated `advancedAuth` broken references | -| 1.0.9 | 2023-07-01 | [27908](https://github.com/airbytehq/airbyte/pull/27908) | Fix bug when `user_lifetime_insights` stream returns `Key Error (end_time)`, refactored `state` to use `IncrementalMixin` | -| 1.0.8 | 2023-05-26 | [26767](https://github.com/airbytehq/airbyte/pull/26767) | Handle permission error for `insights` | -| 1.0.7 | 2023-05-26 | [26656](https://github.com/airbytehq/airbyte/pull/26656) | Remove `authSpecification` from connector specification in favour of `advancedAuth` | -| 1.0.6 | 2023-03-28 | [26599](https://github.com/airbytehq/airbyte/pull/26599) | Handle error for Media posted before business account conversion | -| 1.0.5 | 2023-03-28 | [24634](https://github.com/airbytehq/airbyte/pull/24634) | Add user-friendly message for no instagram_business_accounts case | -| 1.0.4 | 2023-03-15 | [23671](https://github.com/airbytehq/airbyte/pull/23671) | Add info about main permissions in spec and doc links in error message to navigate user | -| 1.0.3 | 2023-03-14 | [24043](https://github.com/airbytehq/airbyte/pull/24043) | Do not emit incomplete records for `user_insights` stream | -| 1.0.2 | 2023-03-14 | [24042](https://github.com/airbytehq/airbyte/pull/24042) | Test publish flow | -| 1.0.1 | 2023-01-19 | [21602](https://github.com/airbytehq/airbyte/pull/21602) | Handle abnormally large state values | -| 1.0.0 | 2022-09-23 | [17110](https://github.com/airbytehq/airbyte/pull/17110) | Remove custom read function and migrate to per-stream state | -| 0.1.11 | 2022-09-08 | [16428](https://github.com/airbytehq/airbyte/pull/16428) | Fix requests metrics for Reels media product type | -| 0.1.10 | 2022-09-05 | [16340](https://github.com/airbytehq/airbyte/pull/16340) | Update to latest version of the CDK (v0.1.81) | -| 0.1.9 | 2021-09-30 | [6438](https://github.com/airbytehq/airbyte/pull/6438) | Annotate Oauth2 flow initialization parameters in connector specification | -| 0.1.8 | 2021-08-11 | [5354](https://github.com/airbytehq/airbyte/pull/5354) | Added check for empty state and fixed tests | -| 0.1.7 | 2021-07-19 | [4805](https://github.com/airbytehq/airbyte/pull/4805) | Add support for previous `STATE` format | -| 0.1.6 | 2021-07-07 | [4210](https://github.com/airbytehq/airbyte/pull/4210) | Refactor connector to use CDK: - improve error handling - fix sync fail with HTTP status 400 - integrate SAT | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------ | +| 1.0.11 | 2023-08-03 | [29031](https://github.com/airbytehq/airbyte/pull/29031) | Reverted `advancedAuth` spec changes | +| 1.0.10 | 2023-08-01 | [28910](https://github.com/airbytehq/airbyte/pull/28910) | Updated `advancedAuth` broken references | +| 1.0.9 | 2023-07-01 | [27908](https://github.com/airbytehq/airbyte/pull/27908) | Fix bug when `user_lifetime_insights` stream returns `Key Error (end_time)`, refactored `state` to use `IncrementalMixin` | +| 1.0.8 | 2023-05-26 | [26767](https://github.com/airbytehq/airbyte/pull/26767) | Handle permission error for `insights` | +| 1.0.7 | 2023-05-26 | [26656](https://github.com/airbytehq/airbyte/pull/26656) | Remove `authSpecification` from connector specification in favour of `advancedAuth` | +| 1.0.6 | 2023-03-28 | [26599](https://github.com/airbytehq/airbyte/pull/26599) | Handle error for Media posted before business account conversion | +| 1.0.5 | 2023-03-28 | [24634](https://github.com/airbytehq/airbyte/pull/24634) | Add user-friendly message for no instagram_business_accounts case | +| 1.0.4 | 2023-03-15 | [23671](https://github.com/airbytehq/airbyte/pull/23671) | Add info about main permissions in spec and doc links in error message to navigate user | +| 1.0.3 | 2023-03-14 | [24043](https://github.com/airbytehq/airbyte/pull/24043) | Do not emit incomplete records for `user_insights` stream | +| 1.0.2 | 2023-03-14 | [24042](https://github.com/airbytehq/airbyte/pull/24042) | Test publish flow | +| 1.0.1 | 2023-01-19 | [21602](https://github.com/airbytehq/airbyte/pull/21602) | Handle abnormally large state values | +| 1.0.0 | 2022-09-23 | [17110](https://github.com/airbytehq/airbyte/pull/17110) | Remove custom read function and migrate to per-stream state | +| 0.1.11 | 2022-09-08 | [16428](https://github.com/airbytehq/airbyte/pull/16428) | Fix requests metrics for Reels media product type | +| 0.1.10 | 2022-09-05 | [16340](https://github.com/airbytehq/airbyte/pull/16340) | Update to latest version of the CDK (v0.1.81) | +| 0.1.9 | 2021-09-30 | [6438](https://github.com/airbytehq/airbyte/pull/6438) | Annotate Oauth2 flow initialization parameters in connector specification | +| 0.1.8 | 2021-08-11 | [5354](https://github.com/airbytehq/airbyte/pull/5354) | Added check for empty state and fixed tests | +| 0.1.7 | 2021-07-19 | [4805](https://github.com/airbytehq/airbyte/pull/4805) | Add support for previous `STATE` format | +| 0.1.6 | 2021-07-07 | [4210](https://github.com/airbytehq/airbyte/pull/4210) | Refactor connector to use CDK: - improve error handling - fix sync fail with HTTP status 400 - integrate SAT | diff --git a/docs/integrations/sources/iterable.md b/docs/integrations/sources/iterable.md index 08adb7f61c38..bc5fcb91cd3a 100644 --- a/docs/integrations/sources/iterable.md +++ b/docs/integrations/sources/iterable.md @@ -20,57 +20,57 @@ To set up the Iterable source connector, you'll need the Iterable [`Server-side` The Iterable source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams -* [Campaigns](https://api.iterable.com/api/docs#campaigns_campaigns) -* [Campaign Metrics](https://api.iterable.com/api/docs#campaigns_metrics) -* [Channels](https://api.iterable.com/api/docs#channels_channels) -* [Email Bounce](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [Email Click](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [Email Complaint](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [Email Open](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [Email Send](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [Email Send Skip](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [Email Subscribe](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [Email Unsubscribe](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [Events](https://api.iterable.com/api/docs#events_User_events) -* [Lists](https://api.iterable.com/api/docs#lists_getLists) -* [List Users](https://api.iterable.com/api/docs#lists_getLists_0) -* [Message Types](https://api.iterable.com/api/docs#messageTypes_messageTypes) -* [Metadata](https://api.iterable.com/api/docs#metadata_list_tables) -* [Templates](https://api.iterable.com/api/docs#templates_getTemplates) \(Incremental\) -* [Users](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [PushSend](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [PushSendSkip](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [PushOpen](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [PushUninstall](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [PushBounce](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [WebPushSend](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [WebPushClick](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [WebPushSendSkip](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [InAppSend](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [InAppOpen](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [InAppClick](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [InAppClose](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [InAppDelete](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [InAppDelivery](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [InAppSendSkip](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [InboxSession](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [InboxMessageImpression](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [SmsSend](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [SmsBounce](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [SmsClick](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [SmsReceived](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [SmsSendSkip](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [SmsUsageInfo](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [Purchase](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [CustomEvent](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) -* [HostedUnsubscribeClick](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [Campaigns](https://api.iterable.com/api/docs#campaigns_campaigns) +- [Campaign Metrics](https://api.iterable.com/api/docs#campaigns_metrics) +- [Channels](https://api.iterable.com/api/docs#channels_channels) +- [Email Bounce](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [Email Click](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [Email Complaint](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [Email Open](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [Email Send](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [Email Send Skip](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [Email Subscribe](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [Email Unsubscribe](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [Events](https://api.iterable.com/api/docs#events_User_events) +- [Lists](https://api.iterable.com/api/docs#lists_getLists) +- [List Users](https://api.iterable.com/api/docs#lists_getLists_0) +- [Message Types](https://api.iterable.com/api/docs#messageTypes_messageTypes) +- [Metadata](https://api.iterable.com/api/docs#metadata_list_tables) +- [Templates](https://api.iterable.com/api/docs#templates_getTemplates) \(Incremental\) +- [Users](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [PushSend](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [PushSendSkip](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [PushOpen](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [PushUninstall](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [PushBounce](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [WebPushSend](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [WebPushClick](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [WebPushSendSkip](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [InAppSend](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [InAppOpen](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [InAppClick](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [InAppClose](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [InAppDelete](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [InAppDelivery](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [InAppSendSkip](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [InboxSession](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [InboxMessageImpression](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [SmsSend](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [SmsBounce](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [SmsClick](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [SmsReceived](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [SmsSendSkip](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [SmsUsageInfo](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [Purchase](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [CustomEvent](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) +- [HostedUnsubscribeClick](https://api.iterable.com/api/docs#export_exportDataJson) \(Incremental\) ## Additional notes @@ -79,7 +79,7 @@ The Iterable source connector supports the following [sync modes](https://docs.a ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------- | | 0.1.30 | 2023-07-19 | [28457](https://github.com/airbytehq/airbyte/pull/28457) | Fixed TypeError for StreamSlice in debug mode | | 0.1.29 | 2023-05-24 | [26459](https://github.com/airbytehq/airbyte/pull/26459) | Added requests reading timeout 300 seconds | | 0.1.28 | 2023-05-12 | [26014](https://github.com/airbytehq/airbyte/pull/26014) | Improve 500 handling for Events stream | @@ -104,4 +104,3 @@ The Iterable source connector supports the following [sync modes](https://docs.a | 0.1.9 | 2021-10-06 | [5915](https://github.com/airbytehq/airbyte/pull/5915) | Enable campaign_metrics stream | | 0.1.8 | 2021-09-20 | [5915](https://github.com/airbytehq/airbyte/pull/5915) | Add new streams: campaign_metrics, events | | 0.1.7 | 2021-09-20 | [6242](https://github.com/airbytehq/airbyte/pull/6242) | Updated schema for: campaigns, lists, templates, metadata | - diff --git a/docs/integrations/sources/jira.md b/docs/integrations/sources/jira.md index 63b86cfb5574..7b5772e1e429 100644 --- a/docs/integrations/sources/jira.md +++ b/docs/integrations/sources/jira.md @@ -25,7 +25,7 @@ This page contains the setup guide and reference information for the Jira source 5. Enter the **Domain** for your Jira account, e.g. `airbyteio.atlassian.net`. 6. Enter the **Email** for your Jira account which you used to generate the API token. This field is used for Authorization to your account by BasicAuth. 7. Enter the list of **Projects (Optional)** for which you need to replicate data, or leave it empty if you want to replicate data for all projects. -8. Enter the **Start Date (Optional)** from which you'd like to replicate data for Jira in the format YYYY-MM-DDTHH:MM:SSZ. All data generated after this date will be replicated, or leave it empty if you want to replicate all data. Note that it will be used only in the following streams:BoardIssues, IssueComments, IssueProperties, IssueRemoteLinks, IssueVotes, IssueWatchers, IssueWorklogs, Issues, PullRequests, SprintIssues. For other streams it will replicate all data. +8. Enter the **Start Date (Optional)** from which you'd like to replicate data for Jira in the format YYYY-MM-DDTHH:MM:SSZ. All data generated after this date will be replicated, or leave it empty if you want to replicate all data. Note that it will be used only in the following streams:BoardIssues, IssueComments, IssueProperties, IssueRemoteLinks, IssueVotes, IssueWatchers, IssueWorklogs, Issues, PullRequests, SprintIssues. For other streams it will replicate all data. 9. Toggle **Expand Issue Changelog** allows you to get a list of recent updates to every issue in the Issues stream. 10. Toggle **Render Issue Fields** allows returning field values rendered in HTML format in the Issues stream. 11. Toggle **Enable Experimental Streams** enables experimental PullRequests stream. @@ -37,7 +37,7 @@ The Jira source connector supports the following [sync modes](https://docs.airby - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Troubleshooting @@ -47,60 +47,60 @@ Check out common troubleshooting issues for the Jira connector on our Airbyte Fo This connector outputs the following full refresh streams: -* [Application roles](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-application-roles/#api-rest-api-3-applicationrole-get) -* [Avatars](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-avatars/#api-rest-api-3-avatar-type-system-get) -* [Boards](https://developer.atlassian.com/cloud/jira/software/rest/api-group-other-operations/#api-agile-1-0-board-get) -* [Dashboards](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-dashboards/#api-rest-api-3-dashboard-get) -* [Filters](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-filters/#api-rest-api-3-filter-search-get) -* [Filter sharing](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-filter-sharing/#api-rest-api-3-filter-id-permission-get) -* [Groups](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-groups/#api-rest-api-3-groups-picker-get) -* [Issue fields](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-fields/#api-rest-api-3-field-get) -* [Issue field configurations](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-field-configurations/#api-rest-api-3-fieldconfiguration-get) -* [Issue custom field contexts](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-custom-field-contexts/#api-rest-api-3-field-fieldid-context-get) -* [Issue link types](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-link-types/#api-rest-api-3-issuelinktype-get) -* [Issue navigator settings](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-navigator-settings/#api-rest-api-3-settings-columns-get) -* [Issue notification schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-notification-schemes/#api-rest-api-3-notificationscheme-get) -* [Issue priorities](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-priorities/#api-rest-api-3-priority-get) -* [Issue properties](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-properties/#api-rest-api-3-issue-issueidorkey-properties-propertykey-get) -* [Issue remote links](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-remote-links/#api-rest-api-3-issue-issueidorkey-remotelink-get) -* [Issue resolutions](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-resolutions/#api-rest-api-3-resolution-search-get) -* [Issue security schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-security-schemes/#api-rest-api-3-issuesecurityschemes-get) -* [Issue type schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-type-schemes/#api-rest-api-3-issuetypescheme-get) -* [Issue type screen schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-type-screen-schemes/#api-rest-api-3-issuetypescreenscheme-get) -* [Issue votes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-votes/#api-group-issue-votes) -* [Issue watchers](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-watchers/#api-rest-api-3-issue-issueidorkey-watchers-get) -* [Jira settings](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-jira-settings/#api-rest-api-3-application-properties-get) -* [Labels](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-labels/#api-rest-api-3-label-get) -* [Permissions](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-permissions/#api-rest-api-3-mypermissions-get) -* [Permission schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-permission-schemes/#api-rest-api-3-permissionscheme-get) -* [Projects](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-projects/#api-rest-api-3-project-search-get) -* [Project avatars](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-project-avatars/#api-rest-api-3-project-projectidorkey-avatars-get) -* [Project categories](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-project-categories/#api-rest-api-3-projectcategory-get) -* [Project components](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-project-components/#api-rest-api-3-project-projectidorkey-component-get) -* [Project email](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-project-email/#api-rest-api-3-project-projectid-email-get) -* [Project permission schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-project-permission-schemes/#api-group-project-permission-schemes) -* [Project types](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-project-types/#api-rest-api-3-project-type-get) -* [Project versions](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-project-versions/#api-rest-api-3-project-projectidorkey-version-get) -* [Screens](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-screens/#api-rest-api-3-screens-get) -* [Screen tabs](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-screen-tabs/#api-rest-api-3-screens-screenid-tabs-get) -* [Screen tab fields](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-screen-tab-fields/#api-rest-api-3-screens-screenid-tabs-tabid-fields-get) -* [Screen schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-screen-schemes/#api-rest-api-3-screenscheme-get) -* [Sprints](https://developer.atlassian.com/cloud/jira/software/rest/api-group-board/#api-rest-agile-1-0-board-boardid-sprint-get) -* [Time tracking](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-time-tracking/#api-rest-api-3-configuration-timetracking-list-get) -* [Users](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-user-search/#api-rest-api-3-user-search-get) -* [UsersGroupsDetailed](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-users/#api-rest-api-3-user-get) -* [Workflows](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-workflows/#api-rest-api-3-workflow-search-get) -* [Workflow schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-workflow-schemes/#api-rest-api-3-workflowscheme-get) -* [Workflow statuses](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-workflow-statuses/#api-rest-api-3-status-get) -* [Workflow status categories](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-workflow-status-categories/#api-rest-api-3-statuscategory-get) +- [Application roles](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-application-roles/#api-rest-api-3-applicationrole-get) +- [Avatars](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-avatars/#api-rest-api-3-avatar-type-system-get) +- [Boards](https://developer.atlassian.com/cloud/jira/software/rest/api-group-other-operations/#api-agile-1-0-board-get) +- [Dashboards](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-dashboards/#api-rest-api-3-dashboard-get) +- [Filters](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-filters/#api-rest-api-3-filter-search-get) +- [Filter sharing](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-filter-sharing/#api-rest-api-3-filter-id-permission-get) +- [Groups](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-groups/#api-rest-api-3-groups-picker-get) +- [Issue fields](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-fields/#api-rest-api-3-field-get) +- [Issue field configurations](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-field-configurations/#api-rest-api-3-fieldconfiguration-get) +- [Issue custom field contexts](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-custom-field-contexts/#api-rest-api-3-field-fieldid-context-get) +- [Issue link types](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-link-types/#api-rest-api-3-issuelinktype-get) +- [Issue navigator settings](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-navigator-settings/#api-rest-api-3-settings-columns-get) +- [Issue notification schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-notification-schemes/#api-rest-api-3-notificationscheme-get) +- [Issue priorities](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-priorities/#api-rest-api-3-priority-get) +- [Issue properties](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-properties/#api-rest-api-3-issue-issueidorkey-properties-propertykey-get) +- [Issue remote links](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-remote-links/#api-rest-api-3-issue-issueidorkey-remotelink-get) +- [Issue resolutions](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-resolutions/#api-rest-api-3-resolution-search-get) +- [Issue security schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-security-schemes/#api-rest-api-3-issuesecurityschemes-get) +- [Issue type schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-type-schemes/#api-rest-api-3-issuetypescheme-get) +- [Issue type screen schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-type-screen-schemes/#api-rest-api-3-issuetypescreenscheme-get) +- [Issue votes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-votes/#api-group-issue-votes) +- [Issue watchers](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-watchers/#api-rest-api-3-issue-issueidorkey-watchers-get) +- [Jira settings](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-jira-settings/#api-rest-api-3-application-properties-get) +- [Labels](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-labels/#api-rest-api-3-label-get) +- [Permissions](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-permissions/#api-rest-api-3-mypermissions-get) +- [Permission schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-permission-schemes/#api-rest-api-3-permissionscheme-get) +- [Projects](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-projects/#api-rest-api-3-project-search-get) +- [Project avatars](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-project-avatars/#api-rest-api-3-project-projectidorkey-avatars-get) +- [Project categories](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-project-categories/#api-rest-api-3-projectcategory-get) +- [Project components](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-project-components/#api-rest-api-3-project-projectidorkey-component-get) +- [Project email](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-project-email/#api-rest-api-3-project-projectid-email-get) +- [Project permission schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-project-permission-schemes/#api-group-project-permission-schemes) +- [Project types](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-project-types/#api-rest-api-3-project-type-get) +- [Project versions](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-project-versions/#api-rest-api-3-project-projectidorkey-version-get) +- [Screens](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-screens/#api-rest-api-3-screens-get) +- [Screen tabs](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-screen-tabs/#api-rest-api-3-screens-screenid-tabs-get) +- [Screen tab fields](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-screen-tab-fields/#api-rest-api-3-screens-screenid-tabs-tabid-fields-get) +- [Screen schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-screen-schemes/#api-rest-api-3-screenscheme-get) +- [Sprints](https://developer.atlassian.com/cloud/jira/software/rest/api-group-board/#api-rest-agile-1-0-board-boardid-sprint-get) +- [Time tracking](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-time-tracking/#api-rest-api-3-configuration-timetracking-list-get) +- [Users](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-user-search/#api-rest-api-3-user-search-get) +- [UsersGroupsDetailed](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-users/#api-rest-api-3-user-get) +- [Workflows](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-workflows/#api-rest-api-3-workflow-search-get) +- [Workflow schemes](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-workflow-schemes/#api-rest-api-3-workflowscheme-get) +- [Workflow statuses](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-workflow-statuses/#api-rest-api-3-status-get) +- [Workflow status categories](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-workflow-status-categories/#api-rest-api-3-statuscategory-get) This connector outputs the following incremental streams: -* [Board issues](https://developer.atlassian.com/cloud/jira/software/rest/api-group-board/#api-rest-agile-1-0-board-boardid-issue-get) -* [Issue comments](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-comments/#api-rest-api-3-issue-issueidorkey-comment-get) -* [Issue worklogs](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-worklogs/#api-rest-api-3-issue-issueidorkey-worklog-get) -* [Issues](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-search/#api-rest-api-3-search-get) -* [Sprint issues](https://developer.atlassian.com/cloud/jira/software/rest/api-group-sprint/#api-rest-agile-1-0-sprint-sprintid-issue-get) +- [Board issues](https://developer.atlassian.com/cloud/jira/software/rest/api-group-board/#api-rest-agile-1-0-board-boardid-issue-get) +- [Issue comments](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-comments/#api-rest-api-3-issue-issueidorkey-comment-get) +- [Issue worklogs](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-worklogs/#api-rest-api-3-issue-issueidorkey-worklog-get) +- [Issues](https://developer.atlassian.com/cloud/jira/platform/rest/v3/api-group-issue-search/#api-rest-api-3-search-get) +- [Sprint issues](https://developer.atlassian.com/cloud/jira/software/rest/api-group-sprint/#api-rest-agile-1-0-sprint-sprintid-issue-get) If there are more endpoints you'd like Airbyte to support, please [create an issue.](https://github.com/airbytehq/airbyte/issues/new/choose) @@ -112,7 +112,7 @@ While they will not cause a sync to fail, they may not be able to pull any data. Use the "Enable Experimental Streams" option when setting up the source to allow or disallow these tables to be selected when configuring a connection. -* Pull Requests (currently only GitHub PRs are supported) +- Pull Requests (currently only GitHub PRs are supported) ## Troubleshooting @@ -125,8 +125,8 @@ The Jira connector should not run into Jira API limitations under normal usage. ## CHANGELOG | Version | Date | Pull Request | Subject | -|:--------|:-----------|:-----------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------| -| 0.3.12 | 2023-06-01 | [\#26652](https://github.com/airbytehq/airbyte/pull/26652) | Expand on `leads` for `projects` stream | +| :------ | :--------- | :--------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------- | +| 0.3.12 | 2023-06-01 | [\#26652](https://github.com/airbytehq/airbyte/pull/26652) | Expand on `leads` for `projects` stream | | 0.3.11 | 2023-06-01 | [\#26906](https://github.com/airbytehq/airbyte/pull/26906) | Handle project permissions error | | 0.3.10 | 2023-05-26 | [\#26652](https://github.com/airbytehq/airbyte/pull/26652) | Fixed bug when `board` doesn't support `sprints` | | 0.3.9 | 2023-05-16 | [\#26114](https://github.com/airbytehq/airbyte/pull/26114) | Update fields info in docs and spec, update to latest airbyte-cdk | @@ -157,6 +157,5 @@ The Jira connector should not run into Jira API limitations under normal usage. | 0.2.7 | 2021-07-19 | [\#4817](https://github.com/airbytehq/airbyte/pull/4817) | Fixed `labels` schema properties issue. | | 0.2.6 | 2021-06-15 | [\#4113](https://github.com/airbytehq/airbyte/pull/4113) | Fixed `user` stream with the correct endpoint and query param. | | 0.2.5 | 2021-06-09 | [\#3973](https://github.com/airbytehq/airbyte/pull/3973) | Added `AIRBYTE_ENTRYPOINT` in base Docker image for Kubernetes support. | -| 0.2.4 | | | Implementing base\_read acceptance test dived by stream groups. | +| 0.2.4 | | | Implementing base_read acceptance test dived by stream groups. | | 0.2.3 | | | Implementing incremental sync. Migrated to airbyte-cdk. Adding all available entities in Jira Cloud. | - diff --git a/docs/integrations/sources/klaviyo.md b/docs/integrations/sources/klaviyo.md index 505d4d5eda0c..2c953f7943b1 100644 --- a/docs/integrations/sources/klaviyo.md +++ b/docs/integrations/sources/klaviyo.md @@ -6,7 +6,6 @@ This page contains the setup guide and reference information for the Klaviyo sou To set up the Klaviyo source connector, you'll need the [Klaviyo Private API key](https://help.klaviyo.com/hc/en-us/articles/115005062267-How-to-Manage-Your-Account-s-API-Keys#your-private-api-keys3). - ## Set up the Klaviyo connector in Airbyte 1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) or navigate to the Airbyte Open Source dashboard. @@ -14,27 +13,27 @@ To set up the Klaviyo source connector, you'll need the [Klaviyo Private API key 3. On the Set up the source page, select **Klaviyo** from the Source type dropdown. 4. Enter the name for the Klaviyo connector. 5. For **Api Key**, enter the Klaviyo [Private API key](https://help.klaviyo.com/hc/en-us/articles/115005062267-How-to-Manage-Your-Account-s-API-Keys#your-private-api-keys3). -6. For **Start Date**, enter the date in YYYY-MM-DD format. The data added on and after this date will be replicated. +6. For **Start Date**, enter the date in YYYY-MM-DD format. The data added on and after this date will be replicated. 7. Click **Set up source**. ## Supported sync modes The Klaviyo source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams -* [Campaigns](https://developers.klaviyo.com/en/v1-2/reference/get-campaigns#get-campaigns) -* [Events](https://developers.klaviyo.com/en/v1-2/reference/metrics-timeline) -* [GlobalExclusions](https://developers.klaviyo.com/en/v1-2/reference/get-global-exclusions) -* [Lists](https://developers.klaviyo.com/en/v1-2/reference/get-lists) -* [Metrics](https://developers.klaviyo.com/en/v1-2/reference/get-metrics) -* [Flows](https://developers.klaviyo.com/en/reference/get_flows) -* [Profiles](https://developers.klaviyo.com/en/reference/get_profiles) +- [Campaigns](https://developers.klaviyo.com/en/v1-2/reference/get-campaigns#get-campaigns) +- [Events](https://developers.klaviyo.com/en/v1-2/reference/metrics-timeline) +- [GlobalExclusions](https://developers.klaviyo.com/en/v1-2/reference/get-global-exclusions) +- [Lists](https://developers.klaviyo.com/en/v1-2/reference/get-lists) +- [Metrics](https://developers.klaviyo.com/en/v1-2/reference/get-metrics) +- [Flows](https://developers.klaviyo.com/en/reference/get_flows) +- [Profiles](https://developers.klaviyo.com/en/reference/get_profiles) ## Performance considerations @@ -45,7 +44,7 @@ The Klaviyo connector should not run into Klaviyo API limitations under normal u ## Data type map | Integration Type | Airbyte Type | Notes | -|:-----------------|:-------------|:------| +| :--------------- | :----------- | :---- | | `string` | `string` | | | `number` | `number` | | | `array` | `array` | | @@ -54,7 +53,7 @@ The Klaviyo connector should not run into Klaviyo API limitations under normal u ## Changelog | Version | Date | Pull Request | Subject | -|:---------|:-----------|:-----------------------------------------------------------|:------------------------------------------------------------------------------------------| +| :------- | :--------- | :--------------------------------------------------------- | :---------------------------------------------------------------------------------------- | | `0.3.2` | 2023-06-20 | [27498](https://github.com/airbytehq/airbyte/pull/27498) | Do not store state in the future | | `0.3.1` | 2023-06-08 | [27162](https://github.com/airbytehq/airbyte/pull/27162) | Anonymize check connection error message | | `0.3.0` | 2023-02-18 | [23236](https://github.com/airbytehq/airbyte/pull/23236) | Add ` Email Templates` stream | @@ -63,9 +62,9 @@ The Klaviyo connector should not run into Klaviyo API limitations under normal u | `0.1.12` | 2023-01-30 | [22071](https://github.com/airbytehq/airbyte/pull/22071) | Fix `Events` stream schema | | `0.1.11` | 2023-01-27 | [22012](https://github.com/airbytehq/airbyte/pull/22012) | Set `AvailabilityStrategy` for streams explicitly to `None` | | `0.1.10` | 2022-09-29 | [17422](https://github.com/airbytehq/airbyte/issues/17422) | Update CDK dependency | -| `0.1.9` | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/issues/17304) | Migrate to per-stream state. | +| `0.1.9` | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/issues/17304) | Migrate to per-stream state. | | `0.1.6` | 2022-07-20 | [14872](https://github.com/airbytehq/airbyte/issues/14872) | Increase test coverage | -| `0.1.5` | 2022-07-12 | [14617](https://github.com/airbytehq/airbyte/issues/14617) | Set max\_retries = 10 for `lists` stream. | +| `0.1.5` | 2022-07-12 | [14617](https://github.com/airbytehq/airbyte/issues/14617) | Set max_retries = 10 for `lists` stream. | | `0.1.4` | 2022-04-15 | [11723](https://github.com/airbytehq/airbyte/issues/11723) | Enhance klaviyo source for flows stream and update to events stream. | | `0.1.3` | 2021-12-09 | [8592](https://github.com/airbytehq/airbyte/pull/8592) | Improve performance, make Global Exclusions stream incremental and enable Metrics stream. | | `0.1.2` | 2021-10-19 | [6952](https://github.com/airbytehq/airbyte/pull/6952) | Update schema validation in SAT | diff --git a/docs/integrations/sources/mixpanel.md b/docs/integrations/sources/mixpanel.md index 04aa49e276c8..cec9ca3ec089 100644 --- a/docs/integrations/sources/mixpanel.md +++ b/docs/integrations/sources/mixpanel.md @@ -17,30 +17,31 @@ To set up the Mixpanel source connector, you'll need a Mixpanel [Service Account 7. For **Attribution Window**, enter the number of days for the length of the attribution window. 8. For **Project Timezone**, enter the [timezone](https://help.mixpanel.com/hc/en-us/articles/115004547203-Manage-Timezones-for-Projects-in-Mixpanel) for your Mixpanel project. 9. For **Start Date**, enter the date in YYYY-MM-DD format. The data added on and after this date will be replicated. If left blank, the connector will replicate data from up to one year ago by default. -10. For **End Date**, enter the date in YYYY-MM-DD format. +10. For **End Date**, enter the date in YYYY-MM-DD format. 11. For **Region**, enter the [region](https://help.mixpanel.com/hc/en-us/articles/360039135652-Data-Residency-in-EU) for your Mixpanel project. 12. For **Date slicing window**, enter the number of days to slice through data. If you encounter RAM usage issues due to a huge amount of data in each window, try using a lower value for this parameter. 13. Click **Set up source**. ## Supported sync modes + The Mixpanel source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) Note: Incremental sync returns duplicated \(old records\) for the state date due to API filter limitation, which is granular to the whole day only. ### Supported Streams -* [Export](https://developer.mixpanel.com/reference/raw-event-export) \(Incremental\) -* [Engage](https://developer.mixpanel.com/reference/engage-query) \(Incremental\) -* [Funnels](https://developer.mixpanel.com/reference/funnels-query) \(Incremental\) -* [Revenue](https://developer.mixpanel.com/reference/engage-query) \(Incremental\) -* [Annotations](https://developer.mixpanel.com/reference/overview-1) \(Full table\) -* [Cohorts](https://developer.mixpanel.com/reference/cohorts-list) \(Incremental\) -* [Cohort Members](https://developer.mixpanel.com/reference/engage-query) \(Incremental\) +- [Export](https://developer.mixpanel.com/reference/raw-event-export) \(Incremental\) +- [Engage](https://developer.mixpanel.com/reference/engage-query) \(Incremental\) +- [Funnels](https://developer.mixpanel.com/reference/funnels-query) \(Incremental\) +- [Revenue](https://developer.mixpanel.com/reference/engage-query) \(Incremental\) +- [Annotations](https://developer.mixpanel.com/reference/overview-1) \(Full table\) +- [Cohorts](https://developer.mixpanel.com/reference/cohorts-list) \(Incremental\) +- [Cohort Members](https://developer.mixpanel.com/reference/engage-query) \(Incremental\) ## Performance considerations @@ -49,10 +50,10 @@ Syncing huge date windows may take longer due to Mixpanel's low API rate-limits ## CHANGELOG | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------- | | 0.1.37 | 2022-07-20 | [27932](https://github.com/airbytehq/airbyte/pull/27932) | Fix spec: change start/end date format to `date` | | 0.1.36 | 2022-06-27 | [27752](https://github.com/airbytehq/airbyte/pull/27752) | Partially revert version 0.1.32; Use exponential backoff; | -| 0.1.35 | 2022-06-12 | [27252](https://github.com/airbytehq/airbyte/pull/27252) | Add should_retry False for 402 error | +| 0.1.35 | 2022-06-12 | [27252](https://github.com/airbytehq/airbyte/pull/27252) | Add should_retry False for 402 error | | 0.1.34 | 2022-05-15 | [21837](https://github.com/airbytehq/airbyte/pull/21837) | Add "insert_id" field to "export" stream schema | | 0.1.33 | 2023-04-25 | [25543](https://github.com/airbytehq/airbyte/pull/25543) | Set should_retry for 104 error in stream export | | 0.1.32 | 2023-04-11 | [25056](https://github.com/airbytehq/airbyte/pull/25056) | Set HttpAvailabilityStrategy, add exponential backoff, streams export and annotations add undeclared fields | @@ -87,5 +88,3 @@ Syncing huge date windows may take longer due to Mixpanel's low API rate-limits | 0.1.2 | 2021-11-02 | [7439](https://github.com/airbytehq/airbyte/issues/7439) | Added delay for all streams to match API limitation of requests rate | | 0.1.1 | 2021-09-16 | [6075](https://github.com/airbytehq/airbyte/issues/6075) | Added option to select project region | | 0.1.0 | 2021-07-06 | [3698](https://github.com/airbytehq/airbyte/issues/3698) | Created CDK native mixpanel connector | - - diff --git a/docs/integrations/sources/notion.md b/docs/integrations/sources/notion.md index ec8ac13c342b..74ed432faa9c 100644 --- a/docs/integrations/sources/notion.md +++ b/docs/integrations/sources/notion.md @@ -3,20 +3,22 @@ This page contains the setup guide and reference information for the Notion source connector. ## Prerequisites + - Access to a Notion workspace ## Setup guide​ To authenticate the Notion source connector, you need to use **one** of the following two methods: + - OAuth2.0 authorization (recommended for Airbyte Cloud) - Access Token :::note -**For Airbyte Cloud users:** We highly recommend using OAuth2.0 authorization to connect to Notion, as this method significantly simplifies the setup process. If you use OAuth2.0 authorization in Airbyte Cloud, you do **not** need to create and configure a new integration in Notion. Instead, you can proceed straight to +**For Airbyte Cloud users:** We highly recommend using OAuth2.0 authorization to connect to Notion, as this method significantly simplifies the setup process. If you use OAuth2.0 authorization in Airbyte Cloud, you do **not** need to create and configure a new integration in Notion. Instead, you can proceed straight to [setting up the connector in Airbyte](#step-3-set-up-the-notion-connector-in-airbyte). ::: -We have provided a quick setup guide for creating an integration in Notion below. If you would like more detailed information and context on Notion integrations, or experience any difficulties with the integration setup process, please refer to the +We have provided a quick setup guide for creating an integration in Notion below. If you would like more detailed information and context on Notion integrations, or experience any difficulties with the integration setup process, please refer to the [official Notion documentation](https://developers.notion.com/docs). ### Step 1: Create an integration in Notion​ @@ -33,14 +35,17 @@ You must be the owner of the Notion workspace to create a new integration associ ### Step 2: Set permissions and acquire authorization credentials #### Access Token (Cloud and Open Source) + If you are authenticating via Access Token, you will need to manually set permissions for each page you want to share with Airbyte. -1. Navigate to the page(s) you want to share with Airbyte. Click the **•••** menu at the top right of the page, select **Add connections**, and choose the integration you created in Step 1. -2. Once you have selected all the pages to share, you can find and copy the Access Token from the **Secrets** tab of your Notion integration's page. Then proceed to -[setting up the connector in Airbyte](#step-2-set-up-the-notion-connector-in-airbyte). +1. Navigate to the page(s) you want to share with Airbyte. Click the **•••** menu at the top right of the page, select **Add connections**, and choose the integration you created in Step 1. +2. Once you have selected all the pages to share, you can find and copy the Access Token from the **Secrets** tab of your Notion integration's page. Then proceed to + [setting up the connector in Airbyte](#step-2-set-up-the-notion-connector-in-airbyte). + #### OAuth2.0 (Open Source only) + If you are authenticating via OAuth2.0 for Airbyte Open Source, you will need to make your integration public and acquire your Client ID, Client Secret and Access Token. 1. Navigate to the **Distribution** tab in your integration page, and toggle the switch to make the integration public. @@ -58,10 +63,12 @@ If you are authenticating via OAuth2.0 for Airbyte Open Source, you will need to 5. Choose the method of authentication from the dropdown menu: #### Authentication for Airbyte Cloud + - **OAuth2.0** (Recommended): Click **Authenticate your Notion account**. When the popup appears, click **Select pages**. Check the pages you want to give Airbyte access to, and click **Allow access**. - **Access Token**: Copy and paste the Access Token found in the **Secrets** tab of your Notion integration's page. #### Authentication for Airbyte Open Source + - **Access Token**: Copy and paste the Access Token found in the **Secrets** tab of your Notion integration's page. - **OAuth2.0**: Copy and paste the Client ID, Client Secret and Access Token you acquired. @@ -71,19 +78,20 @@ If you are authenticating via OAuth2.0 for Airbyte Open Source, you will need to ## Supported sync modes The Notion source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) (partially) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) + +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) (partially) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams The Notion source connector supports the following streams. For more information, see the [Notion API](https://developers.notion.com/reference/intro). -* [blocks](https://developers.notion.com/reference/retrieve-a-block) -* [databases](https://developers.notion.com/reference/retrieve-a-database) -* [pages](https://developers.notion.com/reference/retrieve-a-page) -* [users](https://developers.notion.com/reference/get-user) +- [blocks](https://developers.notion.com/reference/retrieve-a-block) +- [databases](https://developers.notion.com/reference/retrieve-a-database) +- [pages](https://developers.notion.com/reference/retrieve-a-page) +- [users](https://developers.notion.com/reference/get-user) :::note @@ -98,7 +106,7 @@ The connector is restricted by Notion [request limits](https://developers.notion ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:-----------------------------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :--------------------------------------------------------------------------- | | 1.1.1 | 2023-06-14 | [26535](https://github.com/airbytehq/airbyte/pull/26535) | Migrate from deprecated `authSpecification` to `advancedAuth` | | 1.1.0 | 2023-06-08 | [27170](https://github.com/airbytehq/airbyte/pull/27170) | Fix typo in `blocks` schema | | 1.0.9 | 2023-06-08 | [27062](https://github.com/airbytehq/airbyte/pull/27062) | Skip streams with `invalid_start_cursor` error | diff --git a/docs/integrations/sources/onesignal.md b/docs/integrations/sources/onesignal.md index 1e0c708f8587..9f740480c7a8 100644 --- a/docs/integrations/sources/onesignal.md +++ b/docs/integrations/sources/onesignal.md @@ -1,14 +1,15 @@ # OneSignal + This page contains the setup guide and reference information for the OneSignal source connector. ## Prerequisites -* [User Auth Key](https://documentation.onesignal.com/docs/accounts-and-keys#user-auth-key) -* Applications [credentials](https://documentation.onesignal.com/docs/accounts-and-keys) \(App Id & REST API Key\) +- [User Auth Key](https://documentation.onesignal.com/docs/accounts-and-keys#user-auth-key) +- Applications [credentials](https://documentation.onesignal.com/docs/accounts-and-keys) \(App Id & REST API Key\) ## Setup guide -### Step 1: Set up OneSignal +### Step 1: Set up OneSignal ### Step 2: Set up the OneSignal connector in Airbyte @@ -26,7 +27,6 @@ This page contains the setup guide and reference information for the OneSignal s 7. Enter the Start Date in format `YYYY-MM-DDTHH:mm:ssZ` 8. Enter Outcome names as comma separated values, e.g. `os__session_duration.count,os__click.count,` see the [API docs](https://documentation.onesignal.com/reference/view-outcomes) for more details. - #### For Airbyte Open Source: 1. Navigate to the Airbyte Open Source dashboard. @@ -41,22 +41,21 @@ This page contains the setup guide and reference information for the OneSignal s 7. Enter the Start Date in format `YYYY-MM-DDTHH:mm:ssZ` 8. Enter Outcome names as comma separated values, e.g. `os__session_duration.count,os__click.count,` see the [API docs](https://documentation.onesignal.com/reference/view-outcomes) for more details. - ## Supported sync modes The OneSignal source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams -* [Apps](https://documentation.onesignal.com/reference/view-apps-apps) -* [Devices](https://documentation.onesignal.com/reference/view-devices) \(Incremental\) -* [Notifications](https://documentation.onesignal.com/reference/view-notification) \(Incremental\) -* [Outcomes](https://documentation.onesignal.com/reference/view-outcomes) +- [Apps](https://documentation.onesignal.com/reference/view-apps-apps) +- [Devices](https://documentation.onesignal.com/reference/view-devices) \(Incremental\) +- [Notifications](https://documentation.onesignal.com/reference/view-notification) \(Incremental\) +- [Outcomes](https://documentation.onesignal.com/reference/view-outcomes) ## Performance considerations @@ -65,20 +64,18 @@ The connector is restricted by normal OneSignal [rate limits](https://documentat ## Data type mapping | Integration Type | Airbyte Type | Notes | -|:-----------------|:-------------|:------| +| :--------------- | :----------- | :---- | | `string` | `string` | | | `integer` | `integer` | | | `number` | `number` | | | `array` | `array` | | | `object` | `object` | | - ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :------------------------------------------- | | 1.0.0 | 2023-03-14 | [24076](https://github.com/airbytehq/airbyte/pull/24076) | Update connectors spec; fix incremental sync | | 0.1.2 | 2021-12-07 | [8582](https://github.com/airbytehq/airbyte/pull/8582) | Update connector fields title/description | | 0.1.1 | 2021-11-10 | [7617](https://github.com/airbytehq/airbyte/pull/7617) | Fix get_update state | | 0.1.0 | 2021-10-13 | [6998](https://github.com/airbytehq/airbyte/pull/6998) | Initial Release | - diff --git a/docs/integrations/sources/pinterest.md b/docs/integrations/sources/pinterest.md index 5c9925291c2a..bce016844a49 100644 --- a/docs/integrations/sources/pinterest.md +++ b/docs/integrations/sources/pinterest.md @@ -9,6 +9,7 @@ To set up the Pinterest source connector with Airbyte Open Source, you'll need y ## Setup guide + **For Airbyte Cloud:** 1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. @@ -21,6 +22,7 @@ To set up the Pinterest source connector with Airbyte Open Source, you'll need y + **For Airbyte Open Source:** 1. Navigate to the Airbyte Open Source dashboard. @@ -36,33 +38,33 @@ To set up the Pinterest source connector with Airbyte Open Source, you'll need y The Pinterest source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams -* [Account analytics](https://developers.pinterest.com/docs/api/v5/#operation/user_account/analytics) \(Incremental\) -* [Boards](https://developers.pinterest.com/docs/api/v5/#operation/boards/list) \(Full table\) - * [Board sections](https://developers.pinterest.com/docs/api/v5/#operation/board_sections/list) \(Full table\) - * [Pins on board section](https://developers.pinterest.com/docs/api/v5/#operation/board_sections/list_pins) \(Full table\) - * [Pins on board](https://developers.pinterest.com/docs/api/v5/#operation/boards/list_pins) \(Full table\) -* [Ad accounts](https://developers.pinterest.com/docs/api/v5/#operation/ad_accounts/list) \(Full table\) - * [Ad account analytics](https://developers.pinterest.com/docs/api/v5/#operation/ad_account/analytics) \(Incremental\) - * [Campaigns](https://developers.pinterest.com/docs/api/v5/#operation/campaigns/list) \(Incremental\) - * [Campaign analytics](https://developers.pinterest.com/docs/api/v5/#operation/campaigns/list) \(Incremental\) - * [Campaign Analytics Report](https://developers.pinterest.com/docs/api/v5/#operation/analytics/create_report) \(Incremental\) - * [Ad groups](https://developers.pinterest.com/docs/api/v5/#operation/ad_groups/list) \(Incremental\) - * [Ad group analytics](https://developers.pinterest.com/docs/api/v5/#operation/ad_groups/analytics) \(Incremental\) - * [Ads](https://developers.pinterest.com/docs/api/v5/#operation/ads/list) \(Incremental\) - * [Ad analytics](https://developers.pinterest.com/docs/api/v5/#operation/ads/analytics) \(Incremental\) +- [Account analytics](https://developers.pinterest.com/docs/api/v5/#operation/user_account/analytics) \(Incremental\) +- [Boards](https://developers.pinterest.com/docs/api/v5/#operation/boards/list) \(Full table\) + - [Board sections](https://developers.pinterest.com/docs/api/v5/#operation/board_sections/list) \(Full table\) + - [Pins on board section](https://developers.pinterest.com/docs/api/v5/#operation/board_sections/list_pins) \(Full table\) + - [Pins on board](https://developers.pinterest.com/docs/api/v5/#operation/boards/list_pins) \(Full table\) +- [Ad accounts](https://developers.pinterest.com/docs/api/v5/#operation/ad_accounts/list) \(Full table\) + - [Ad account analytics](https://developers.pinterest.com/docs/api/v5/#operation/ad_account/analytics) \(Incremental\) + - [Campaigns](https://developers.pinterest.com/docs/api/v5/#operation/campaigns/list) \(Incremental\) + - [Campaign analytics](https://developers.pinterest.com/docs/api/v5/#operation/campaigns/list) \(Incremental\) + - [Campaign Analytics Report](https://developers.pinterest.com/docs/api/v5/#operation/analytics/create_report) \(Incremental\) + - [Ad groups](https://developers.pinterest.com/docs/api/v5/#operation/ad_groups/list) \(Incremental\) + - [Ad group analytics](https://developers.pinterest.com/docs/api/v5/#operation/ad_groups/analytics) \(Incremental\) + - [Ads](https://developers.pinterest.com/docs/api/v5/#operation/ads/list) \(Incremental\) + - [Ad analytics](https://developers.pinterest.com/docs/api/v5/#operation/ads/analytics) \(Incremental\) ## Performance considerations The connector is restricted by the Pinterest [requests limitation](https://developers.pinterest.com/docs/api/v5/#tag/Rate-limits). -##### Rate Limits +##### Rate Limits - Analytics streams: 300 calls per day / per user \ - Ad accounts streams (Campaigns, Ad groups, Ads): 1000 calls per min / per user / per app \ @@ -71,7 +73,7 @@ The connector is restricted by the Pinterest [requests limitation](https://devel ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :---------------------------------------------------------------------------------------------------- | | 0.6.0 | 2023-07-25 | [28672](https://github.com/airbytehq/airbyte/pull/28672) | Add report stream for `CAMPAIGN` level | | 0.5.3 | 2023-07-05 | [27964](https://github.com/airbytehq/airbyte/pull/27964) | Add `id` field to `owner` field in `ad_accounts` stream | | 0.5.2 | 2023-06-02 | [26949](https://github.com/airbytehq/airbyte/pull/26949) | Update `BoardPins` stream with `note` property | diff --git a/docs/integrations/sources/postgres.md b/docs/integrations/sources/postgres.md index 625f2f17e4b8..94b803cd6085 100644 --- a/docs/integrations/sources/postgres.md +++ b/docs/integrations/sources/postgres.md @@ -268,14 +268,15 @@ If you know there are database changes to be synced, but the connector cannot re In [Step 2](#step-2-set-up-the-postgres-connector-in-airbyte) of the connector setup guide, enter the replication slot and publication you just created. ## Xmin replication mode + Xmin replication is a new cursor-less replication method for Postgres. Cursorless syncs enable syncing new or updated rows without explicitly choosing a cursor field. The xmin system column which is available in all Postgres databases is used to track inserts and updates to your source data. This is a good solution if: + - There is not a well-defined cursor candidate to use for Standard incremental mode. - You want to replace a previously configured full-refresh sync. - You are replicating Postgres tables less than 500GB. - ## Supported sync modes The Postgres source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): @@ -283,7 +284,7 @@ The Postgres source connector supports the following [sync modes](https://docs.a - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental Sync - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported cursors @@ -406,14 +407,14 @@ Some larger tables may encounter an error related to the temporary file size lim ## Changelog | Version | Date | Pull Request | Subject | -|---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ------- | ---------- | -------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --- | | 3.1.3 | 2023-08-03 | [28708](https://github.com/airbytehq/airbyte/pull/28708) | Enable checkpointing snapshots in CDC connections | | 3.1.2 | 2023-08-01 | [28954](https://github.com/airbytehq/airbyte/pull/28954) | Fix an issue that prevented use of tables with names containing uppercase letters | | 3.1.1 | 2023-07-31 | [28892](https://github.com/airbytehq/airbyte/pull/28892) | Fix an issue that prevented use of cursor columns with names containing uppercase letters | | 3.1.0 | 2023-07-25 | [28339](https://github.com/airbytehq/airbyte/pull/28339) | Checkpointing initial load for incremental syncs: enabled for xmin and cursor based only. | | 3.0.2 | 2023-07-18 | [28336](https://github.com/airbytehq/airbyte/pull/28336) | Add full-refresh mode back to Xmin syncs. | | 3.0.1 | 2023-07-14 | [28345](https://github.com/airbytehq/airbyte/pull/28345) | Increment patch to trigger a rebuild | -| 3.0.0 | 2023-07-12 | [27442](https://github.com/airbytehq/airbyte/pull/27442) | Set _ab_cdc_lsn as the source defined cursor for CDC mode to prepare for Destination v2 normalization | +| 3.0.0 | 2023-07-12 | [27442](https://github.com/airbytehq/airbyte/pull/27442) | Set \_ab_cdc_lsn as the source defined cursor for CDC mode to prepare for Destination v2 normalization | | 2.1.1 | 2023-07-06 | [26723](https://github.com/airbytehq/airbyte/pull/26723) | Add new xmin replication method. | | 2.1.0 | 2023-06-26 | [27737](https://github.com/airbytehq/airbyte/pull/27737) | License Update: Elv2 | | 2.0.34 | 2023-06-20 | [27212](https://github.com/airbytehq/airbyte/pull/27212) | Fix silent exception swallowing in StreamingJdbcDatabase | @@ -509,13 +510,13 @@ Some larger tables may encounter an error related to the temporary file size lim | 0.4.43 | 2022-08-03 | [15226](https://github.com/airbytehq/airbyte/pull/15226) | Make connectionTimeoutMs configurable through JDBC url parameters | | 0.4.42 | 2022-08-03 | [15273](https://github.com/airbytehq/airbyte/pull/15273) | Fix a bug in `0.4.36` and correctly parse the CDC initial record waiting time | | 0.4.41 | 2022-08-03 | [15077](https://github.com/airbytehq/airbyte/pull/15077) | Sync data from beginning if the LSN is no longer valid in CDC | -| | 2022-08-03 | [14903](https://github.com/airbytehq/airbyte/pull/14903) | Emit state messages more frequently (⛔ this version has a bug; use `1.0.1` instead | +| | 2022-08-03 | [14903](https://github.com/airbytehq/airbyte/pull/14903) | Emit state messages more frequently (⛔ this version has a bug; use `1.0.1` instead | | 0.4.40 | 2022-08-03 | [15187](https://github.com/airbytehq/airbyte/pull/15187) | Add support for BCE dates/timestamps | | | 2022-08-03 | [14534](https://github.com/airbytehq/airbyte/pull/14534) | Align regular and CDC integration tests and data mappers | | 0.4.39 | 2022-08-02 | [14801](https://github.com/airbytehq/airbyte/pull/14801) | Fix multiple log bindings | | 0.4.38 | 2022-07-26 | [14362](https://github.com/airbytehq/airbyte/pull/14362) | Integral columns are now discovered as int64 fields. | | 0.4.37 | 2022-07-22 | [14714](https://github.com/airbytehq/airbyte/pull/14714) | Clarified error message when invalid cursor column selected | -| 0.4.36 | 2022-07-21 | [14451](https://github.com/airbytehq/airbyte/pull/14451) | Make initial CDC waiting time configurable (⛔ this version has a bug and will not work; use `0.4.42` instead) | | +| 0.4.36 | 2022-07-21 | [14451](https://github.com/airbytehq/airbyte/pull/14451) | Make initial CDC waiting time configurable (⛔ this version has a bug and will not work; use `0.4.42` instead) | | | 0.4.35 | 2022-07-14 | [14574](https://github.com/airbytehq/airbyte/pull/14574) | Removed additionalProperties:false from JDBC source connectors | | 0.4.34 | 2022-07-17 | [13840](https://github.com/airbytehq/airbyte/pull/13840) | Added the ability to connect using different SSL modes and SSL certificates. | | 0.4.33 | 2022-07-14 | [14586](https://github.com/airbytehq/airbyte/pull/14586) | Validate source JDBC url parameters | diff --git a/docs/integrations/sources/prestashop.md b/docs/integrations/sources/prestashop.md index 7f3cc086c344..d939a2113057 100644 --- a/docs/integrations/sources/prestashop.md +++ b/docs/integrations/sources/prestashop.md @@ -33,7 +33,7 @@ The PrestaShop source connector supports the following [ sync modes](https://doc - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams @@ -103,7 +103,7 @@ If there are more endpoints you'd like Airbyte to support, please [create an iss ## CHANGELOG | Version | Date | Pull Request | Subject | -|:--------|:-----------|:----------------------------------------------------------|:-----------------------------------------------------| +| :------ | :--------- | :-------------------------------------------------------- | :--------------------------------------------------- | | 1.0.0 | 2023-06-26 | [27716](https://github.com/airbytehq/airbyte/pull/27716) | update schema; remove empty datetime fields | | 0.3.1 | 2023-02-13 | [22905](https://github.com/airbytehq/airbyte/pull/22905) | Specified date formatting in specification | | 0.3.0 | 2022-11-08 | [#18927](https://github.com/airbytehq/airbyte/pull/18927) | Migrate connector from Alpha (Python) to Beta (YAML) | diff --git a/docs/integrations/sources/quickbooks.md b/docs/integrations/sources/quickbooks.md index ac6481bd8003..5a9abd6ee759 100644 --- a/docs/integrations/sources/quickbooks.md +++ b/docs/integrations/sources/quickbooks.md @@ -37,6 +37,7 @@ This page contains the setup guide and reference information for the QuickBooks + **For Airbyte Open Source:** 1. **Client ID** - The OAuth2.0 application ID @@ -53,10 +54,10 @@ This page contains the setup guide and reference information for the QuickBooks The Quickbooks Source connector supports the following [ sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams @@ -94,7 +95,7 @@ This Source is capable of syncing the following [Streams](https://developer.intu ## Data type map | Integration Type | Airbyte Type | Notes | -|:-----------------|:-------------|:------| +| :--------------- | :----------- | :---- | | `string` | `string` | | | `number` | `number` | | | `array` | `array` | | @@ -102,15 +103,15 @@ This Source is capable of syncing the following [Streams](https://developer.intu ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:-------------------------------------------------------------|:-------------------------------------------------------------------| -| `2.0.4` | 2023-06-28 | [27803](https://github.com/airbytehq/airbyte/pull/27803) | Update following state breaking changes | -| `2.0.3` | 2023-06-08 | [27148](https://github.com/airbytehq/airbyte/pull/27148) | Update description and example values of a Start Date in spec.json | -| `2.0.2` | 2023-06-07 | [26722](https://github.com/airbytehq/airbyte/pull/27053) | Update CDK version and adjust authenticator configuration | -| `2.0.1` | 2023-05-28 | [26722](https://github.com/airbytehq/airbyte/pull/26722) | Change datatype for undisclosed amount field in payments | -| `2.0.0` | 2023-04-11 | [25045](https://github.com/airbytehq/airbyte/pull/25045) | Fix datetime format, disable OAuth button in cloud | -| `1.0.0` | 2023-03-20 | [24324](https://github.com/airbytehq/airbyte/pull/24324) | Migrate to Low-Code | -| `0.1.5` | 2022-02-17 | [10346](https://github.com/airbytehq/airbyte/pull/10346) | Update label `Quickbooks` -> `QuickBooks` | -| `0.1.4` | 2021-12-20 | [8960](https://github.com/airbytehq/airbyte/pull/8960) | Update connector fields title/description | -| `0.1.3` | 2021-08-10 | [4986](https://github.com/airbytehq/airbyte/pull/4986) | Using number data type for decimal fields instead string | -| `0.1.2` | 2021-07-06 | [4539](https://github.com/airbytehq/airbyte/pull/4539) | Add `AIRBYTE_ENTRYPOINT` for Kubernetes support | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :----------------------------------------------------------------- | +| `2.0.4` | 2023-06-28 | [27803](https://github.com/airbytehq/airbyte/pull/27803) | Update following state breaking changes | +| `2.0.3` | 2023-06-08 | [27148](https://github.com/airbytehq/airbyte/pull/27148) | Update description and example values of a Start Date in spec.json | +| `2.0.2` | 2023-06-07 | [26722](https://github.com/airbytehq/airbyte/pull/27053) | Update CDK version and adjust authenticator configuration | +| `2.0.1` | 2023-05-28 | [26722](https://github.com/airbytehq/airbyte/pull/26722) | Change datatype for undisclosed amount field in payments | +| `2.0.0` | 2023-04-11 | [25045](https://github.com/airbytehq/airbyte/pull/25045) | Fix datetime format, disable OAuth button in cloud | +| `1.0.0` | 2023-03-20 | [24324](https://github.com/airbytehq/airbyte/pull/24324) | Migrate to Low-Code | +| `0.1.5` | 2022-02-17 | [10346](https://github.com/airbytehq/airbyte/pull/10346) | Update label `Quickbooks` -> `QuickBooks` | +| `0.1.4` | 2021-12-20 | [8960](https://github.com/airbytehq/airbyte/pull/8960) | Update connector fields title/description | +| `0.1.3` | 2021-08-10 | [4986](https://github.com/airbytehq/airbyte/pull/4986) | Using number data type for decimal fields instead string | +| `0.1.2` | 2021-07-06 | [4539](https://github.com/airbytehq/airbyte/pull/4539) | Add `AIRBYTE_ENTRYPOINT` for Kubernetes support | diff --git a/docs/integrations/sources/redshift.md b/docs/integrations/sources/redshift.md index 8d858da8e302..6b764eb72475 100644 --- a/docs/integrations/sources/redshift.md +++ b/docs/integrations/sources/redshift.md @@ -27,9 +27,9 @@ The Redshift source does not alter the schema present in your warehouse. Dependi #### Incremental Sync -The Redshift source connector supports incremental syncs. To setup an incremental sync for a table in Redshift in the Airbyte UI, you must setup a [user-defined cursor field](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append/#user-defined-cursor) such as an `updated_at` column. The connector relies on this column to know which records were updated since the last sync it ran. See the [incremental sync docs](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) for more information. +The Redshift source connector supports incremental syncs. To setup an incremental sync for a table in Redshift in the Airbyte UI, you must setup a [user-defined cursor field](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append/#user-defined-cursor) such as an `updated_at` column. The connector relies on this column to know which records were updated since the last sync it ran. See the [incremental sync docs](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) for more information. -Defining a cursor field allows you to run incremental-append syncs. To run [incremental-dedupe](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) syncs, you'll need to tell the connector which column(s) to use as a primary key. See the [incremental-dedupe sync docs](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) for more information. +Defining a cursor field allows you to run incremental-append syncs. To run [incremental-dedupe](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) syncs, you'll need to tell the connector which column(s) to use as a primary key. See the [incremental-dedupe sync docs](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) for more information. ## Getting started diff --git a/docs/integrations/sources/salesforce.md b/docs/integrations/sources/salesforce.md index 7a98220ba50d..aefe09215296 100644 --- a/docs/integrations/sources/salesforce.md +++ b/docs/integrations/sources/salesforce.md @@ -46,9 +46,9 @@ If you are using Airbyte Open Source, you will need to obtain the following OAut To obtain these credentials, follow [this walkthrough](https://medium.com/@bpmmendis94/obtain-access-refresh-tokens-from-salesforce-rest-api-a324fe4ccd9b) with the following modifications: - 1. If your Salesforce URL is not in the `X.salesforce.com` format, use your Salesforce domain name. For example, if your Salesforce URL is `awesomecompany.force.com` then use that instead of `awesomecompany.salesforce.com`. - 2. When running a curl command, run it with the `-L` option to follow any redirects. - 3. If you [created a read-only user](https://docs.google.com/document/d/1wZR8pz4MRdc2zUculc9IqoF8JxN87U40IqVnTtcqdrI/edit#heading=h.w5v6h7b2a9y4), use the user credentials when logging in to generate OAuth tokens. +1. If your Salesforce URL is not in the `X.salesforce.com` format, use your Salesforce domain name. For example, if your Salesforce URL is `awesomecompany.force.com` then use that instead of `awesomecompany.salesforce.com`. +2. When running a curl command, run it with the `-L` option to follow any redirects. +3. If you [created a read-only user](https://docs.google.com/document/d/1wZR8pz4MRdc2zUculc9IqoF8JxN87U40IqVnTtcqdrI/edit#heading=h.w5v6h7b2a9y4), use the user credentials when logging in to generate OAuth tokens. @@ -59,12 +59,12 @@ To obtain these credentials, follow [this walkthrough](https://medium.com/@bpmme 3. Find and select **Salesforce** from the list of available sources. 4. Enter a **Source name** of your choosing to help you identify this source. 5. To authenticate: - -**For Airbyte Cloud**: Click **Authenticate your account** to authorize your Salesforce account. Airbyte will authenticate the Salesforce account you are already logged in to. Please make sure you are logged into the right account. - - -**For Airbyte Open Source**: Enter your Client ID, Client Secret, and Refresh Token. - + + **For Airbyte Cloud**: Click **Authenticate your account** to authorize your Salesforce account. Airbyte will authenticate the Salesforce account you are already logged in to. Please make sure you are logged into the right account. + + + **For Airbyte Open Source**: Enter your Client ID, Client Secret, and Refresh Token. + 6. Toggle whether your Salesforce account is a [Sandbox account](https://help.salesforce.com/s/articleView?id=sf.deploy_sandboxes_parent.htm&type=5) or a production account. 7. (Optional) For **Start Date**, use the provided datepicker or enter the date programmatically in either `YYYY-MM-DD` or `YYYY-MM-DDTHH:MM:SSZ` format. The data added on and after this date will be replicated. If this field is left blank, Airbyte will replicate the data for the last two years by default. Please note that timestamps are in [UTC](https://www.utctime.net/). 8. (Optional) In the **Filter Salesforce Object** section, you may choose to target specific data for replication. To do so, click **Add**, then select the relevant criteria from the **Search criteria** dropdown. For **Search value**, add the search terms relevant to you. You may add multiple filters. If no filters are specified, Airbyte will replicate all data. @@ -77,7 +77,7 @@ The Salesforce source connector supports the following sync modes: - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- (Recommended)[ Incremental Sync - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- (Recommended)[ Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ### Incremental Deletes sync @@ -85,7 +85,7 @@ The Salesforce connector retrieves deleted records from Salesforce. For the stre ## Performance considerations -The Salesforce connector is restricted by Salesforce’s [Daily Rate Limits](https://developer.salesforce.com/docs/atlas.en-us.salesforce_app_limits_cheatsheet.meta/salesforce_app_limits_cheatsheet/salesforce_app_limits_platform_api.htm). The connector syncs data until it hits the daily rate limit, then ends the sync early with success status, and starts the next sync from where it left off. Note that picking up from where it ends will work only for incremental sync, which is why we recommend using the [Incremental Sync - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) sync mode. +The Salesforce connector is restricted by Salesforce’s [Daily Rate Limits](https://developer.salesforce.com/docs/atlas.en-us.salesforce_app_limits_cheatsheet.meta/salesforce_app_limits_cheatsheet/salesforce_app_limits_platform_api.htm). The connector syncs data until it hits the daily rate limit, then ends the sync early with success status, and starts the next sync from where it left off. Note that picking up from where it ends will work only for incremental sync, which is why we recommend using the [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) sync mode. ## Supported Objects @@ -130,7 +130,7 @@ Now that you have set up the Salesforce source connector, check out the followin ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------- | | 2.1.1 | 2023-07-06 | [28021](https://github.com/airbytehq/airbyte/pull/28021) | Several Vulnerabilities Fixes; switched to use alpine instead of slim, CVE-2022-40897, CVE-2023-29383, CVE-2023-31484, CVE-2016-2781 | | 2.1.0 | 2023-06-26 | [27726](https://github.com/airbytehq/airbyte/pull/27726) | License Update: Elv2 | | 2.0.14 | 2023-05-04 | [25794](https://github.com/airbytehq/airbyte/pull/25794) | Avoid pandas inferring wrong data types by forcing all data type as object | diff --git a/docs/integrations/sources/salesloft.md b/docs/integrations/sources/salesloft.md index 2b25039436e2..0a14fdfa419f 100644 --- a/docs/integrations/sources/salesloft.md +++ b/docs/integrations/sources/salesloft.md @@ -8,6 +8,7 @@ This page contains the setup guide and reference information for the Salesloft S - Start date + **For Airbyte Open Source:** - Salesloft API Key (see [API Key Authentication](https://developers.salesloft.com/api.html#!/Topic/apikey)) @@ -20,12 +21,15 @@ This page contains the setup guide and reference information for the Salesloft S Create a [Salesloft Account](https://salesloft.com). + **Airbyte Open Source additional setup steps** Log into [Salesloft](https://salesloft.com) and then generate an [API Key](https://developers.salesloft.com/api.html#!/Topic/apikey). + + ### Step 2: Set up the Salesloft connector in Airbyte **For Airbyte Cloud:** @@ -41,6 +45,7 @@ Log into [Salesloft](https://salesloft.com) and then generate an [API Key](https + **For Airbyte Open Source:** 1. Authenticate with **API Key**. @@ -50,44 +55,44 @@ Log into [Salesloft](https://salesloft.com) and then generate an [API Key](https The Salesloft Source connector supports the following [ sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams This connector outputs the following streams: -* [CadenceMemberships](https://developers.salesloft.com/api.html#!/Cadence_Memberships/get_v2_cadence_memberships_json) -* [Cadences](https://developers.salesloft.com/api.html#!/Cadences/get_v2_cadences_json) -* [People](https://developers.salesloft.com/api.html#!/People/get_v2_people_json) -* [Users](https://developers.salesloft.com/api.html#!/Users/get_v2_users_json) -* [Emails](https://developers.salesloft.com/api.html#!/Emails/get_v2_activities_emails_json) -* [Account Stages](https://developers.salesloft.com/api.html#!/Account_Stages/get_v2_account_stages_json) -* [Account Tiers](https://developers.salesloft.com/api.html#!/Account_Tiers/get_v2_account_tiers_json) -* [Accounts](https://developers.salesloft.com/api.html#!/Accounts/get_v2_accounts_json) -* [Actions](https://developers.salesloft.com/api.html#!/Actions/get_v2_actions_json) -* [Calls](https://developers.salesloft.com/api.html#!/Calls/get_v2_activities_calls_json) -* [Emails Templates](https://developers.salesloft.com/api.html#!/Email_Templates/get_v2_email_templates_json) -* [Emails Template Attachements](https://developers.salesloft.com/api.html#!/Email_Template_Attachments/get_v2_email_template_attachments_json) -* [Imports](https://developers.salesloft.com/api.html#!/Imports/get_v2_imports_json) -* [Notes](https://developers.salesloft.com/api.html#!/Notes/get_v2_notes_json) -* [Person Stages](https://developers.salesloft.com/api.html#!/Person_Stages/get_v2_person_stages_json) -* [Phone Number Assignments](https://developers.salesloft.com/api.html#!/Phone_Number_Assignments/get_v2_phone_number_assignments_json) -* [Steps](https://developers.salesloft.com/api.html#!/Steps/get_v2_steps_json) -* [Team Templates](https://developers.salesloft.com/api.html#!/Team_Templates/get_v2_team_templates_json) -* [Team Template Attachements](https://developers.salesloft.com/api.html#!/Team_Template_Attachments/get_v2_team_template_attachments_json) -* [CRM Activities](https://developers.salesloft.com/api.html#!/CRM_Activities/get_v2_crm_activities_json) -* [CRM Users](https://developers.salesloft.com/api.html#!/Crm_Users/get_v2_crm_users_json) -* [Groups](https://developers.salesloft.com/api.html#!/Groups/get_v2_groups_json) -* [Successes](https://developers.salesloft.com/api.html#!/Successes/get_v2_successes_json) -* [Call Data Records](https://developers.salesloft.com/api.html#!/Call_Data_Records/get_v2_call_data_records_json) -* [Call Dispositions](https://developers.salesloft.com/api.html#!/Call_Dispositions/get_v2_call_dispositions_json) -* [Call Sentiments](https://developers.salesloft.com/api.html#!/Call_Sentiments/get_v2_call_sentiments_json) -* [Custom Fields](https://developers.salesloft.com/api.html#!/Custom_Fields/get_v2_custom_fields_json) -* [Meetings](https://developers.salesloft.com/api.html#!/Meetings/get_v2_meetings_json) -* [Searches](https://developers.salesloft.com/api.html#!/Searches/post_v2_searches_json) +- [CadenceMemberships](https://developers.salesloft.com/api.html#!/Cadence_Memberships/get_v2_cadence_memberships_json) +- [Cadences](https://developers.salesloft.com/api.html#!/Cadences/get_v2_cadences_json) +- [People](https://developers.salesloft.com/api.html#!/People/get_v2_people_json) +- [Users](https://developers.salesloft.com/api.html#!/Users/get_v2_users_json) +- [Emails](https://developers.salesloft.com/api.html#!/Emails/get_v2_activities_emails_json) +- [Account Stages](https://developers.salesloft.com/api.html#!/Account_Stages/get_v2_account_stages_json) +- [Account Tiers](https://developers.salesloft.com/api.html#!/Account_Tiers/get_v2_account_tiers_json) +- [Accounts](https://developers.salesloft.com/api.html#!/Accounts/get_v2_accounts_json) +- [Actions](https://developers.salesloft.com/api.html#!/Actions/get_v2_actions_json) +- [Calls](https://developers.salesloft.com/api.html#!/Calls/get_v2_activities_calls_json) +- [Emails Templates](https://developers.salesloft.com/api.html#!/Email_Templates/get_v2_email_templates_json) +- [Emails Template Attachements](https://developers.salesloft.com/api.html#!/Email_Template_Attachments/get_v2_email_template_attachments_json) +- [Imports](https://developers.salesloft.com/api.html#!/Imports/get_v2_imports_json) +- [Notes](https://developers.salesloft.com/api.html#!/Notes/get_v2_notes_json) +- [Person Stages](https://developers.salesloft.com/api.html#!/Person_Stages/get_v2_person_stages_json) +- [Phone Number Assignments](https://developers.salesloft.com/api.html#!/Phone_Number_Assignments/get_v2_phone_number_assignments_json) +- [Steps](https://developers.salesloft.com/api.html#!/Steps/get_v2_steps_json) +- [Team Templates](https://developers.salesloft.com/api.html#!/Team_Templates/get_v2_team_templates_json) +- [Team Template Attachements](https://developers.salesloft.com/api.html#!/Team_Template_Attachments/get_v2_team_template_attachments_json) +- [CRM Activities](https://developers.salesloft.com/api.html#!/CRM_Activities/get_v2_crm_activities_json) +- [CRM Users](https://developers.salesloft.com/api.html#!/Crm_Users/get_v2_crm_users_json) +- [Groups](https://developers.salesloft.com/api.html#!/Groups/get_v2_groups_json) +- [Successes](https://developers.salesloft.com/api.html#!/Successes/get_v2_successes_json) +- [Call Data Records](https://developers.salesloft.com/api.html#!/Call_Data_Records/get_v2_call_data_records_json) +- [Call Dispositions](https://developers.salesloft.com/api.html#!/Call_Dispositions/get_v2_call_dispositions_json) +- [Call Sentiments](https://developers.salesloft.com/api.html#!/Call_Sentiments/get_v2_call_sentiments_json) +- [Custom Fields](https://developers.salesloft.com/api.html#!/Custom_Fields/get_v2_custom_fields_json) +- [Meetings](https://developers.salesloft.com/api.html#!/Meetings/get_v2_meetings_json) +- [Searches](https://developers.salesloft.com/api.html#!/Searches/post_v2_searches_json) ## Performance considerations @@ -96,7 +101,7 @@ Salesloft has the [rate limits](hhttps://developers.salesloft.com/api.html#!/Top ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :---------------------------------------------------------------- | | 1.2.0 | 2023-06-20 | [27505](https://github.com/airbytehq/airbyte/pull/27505) | Added new streams (Call Data Records, Call Dispositions, ... ) | | 1.1.1 | 2023-06-17 | [27484](https://github.com/airbytehq/airbyte/pull/27484) | Bump version on py files updates | | 1.1.0 | 2023-05-17 | [26188](https://github.com/airbytehq/airbyte/pull/26188) | Added `latest_active_date` field to the `Cadences` stream schema. | diff --git a/docs/integrations/sources/sentry.md b/docs/integrations/sources/sentry.md index 2addcbb1ff8c..2f19ae726dfd 100644 --- a/docs/integrations/sources/sentry.md +++ b/docs/integrations/sources/sentry.md @@ -22,17 +22,17 @@ To set up the Sentry source connector, you'll need the Sentry [project name](htt The Sentry source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams -* [Events](https://docs.sentry.io/api/events/list-a-projects-events/) -* [Issues](https://docs.sentry.io/api/events/list-a-projects-issues/) -* [Projects](https://docs.sentry.io/api/projects/list-your-projects/) -* [Releases](https://docs.sentry.io/api/releases/list-an-organizations-releases/) +- [Events](https://docs.sentry.io/api/events/list-a-projects-events/) +- [Issues](https://docs.sentry.io/api/events/list-a-projects-issues/) +- [Projects](https://docs.sentry.io/api/projects/list-your-projects/) +- [Releases](https://docs.sentry.io/api/releases/list-an-organizations-releases/) ## Data type map @@ -45,21 +45,21 @@ The Sentry source connector supports the following [sync modes](https://docs.air ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------| -| 0.2.2 | 2023-05-02 | [25759](https://github.com/airbytehq/airbyte/pull/25759) | Change stream that used in check_connection | -| 0.2.1 | 2023-04-27 | [25602](https://github.com/airbytehq/airbyte/pull/25602) | Add validation of project and organization names during connector setup | -| 0.2.0 | 2023-04-03 | [23923](https://github.com/airbytehq/airbyte/pull/23923) | Add Releases stream | -| 0.1.12 | 2023-03-01 | [23619](https://github.com/airbytehq/airbyte/pull/23619) | Fix bug when `stream state` is `None` or any other bad value occurs | -| 0.1.11 | 2023-02-02 | [22303](https://github.com/airbytehq/airbyte/pull/22303) | Turn ON default AvailabilityStrategy | -| 0.1.10 | 2023-01-27 | [22041](https://github.com/airbytehq/airbyte/pull/22041) | Set `AvailabilityStrategy` for streams explicitly to `None` | -| 0.1.9 | 2022-12-20 | [21864](https://github.com/airbytehq/airbyte/pull/21864) | Add state persistence to incremental sync | -| 0.1.8 | 2022-12-20 | [20709](https://github.com/airbytehq/airbyte/pull/20709) | Add incremental sync | -| 0.1.7 | 2022-09-30 | [17466](https://github.com/airbytehq/airbyte/pull/17466) | Migrate to per-stream states | -| 0.1.6 | 2022-08-29 | [16112](https://github.com/airbytehq/airbyte/pull/16112) | Revert back to the Python CDK | -| 0.1.5 | 2022-08-24 | [15911](https://github.com/airbytehq/airbyte/pull/15911) | Bugfix to allowing reading schemas at runtime | -| 0.1.4 | 2022-08-19 | [15800](https://github.com/airbytehq/airbyte/pull/15800) | Bugfix to allow reading sentry.yaml at runtime | -| 0.1.3 | 2022-08-17 | [15734](https://github.com/airbytehq/airbyte/pull/15734) | Fix yaml based on the new schema validator | -| 0.1.2 | 2021-12-28 | [15345](https://github.com/airbytehq/airbyte/pull/15345) | Migrate to config-based framework | -| 0.1.1 | 2021-12-28 | [8628](https://github.com/airbytehq/airbyte/pull/8628) | Update fields in source-connectors specifications | -| 0.1.0 | 2021-10-12 | [6975](https://github.com/airbytehq/airbyte/pull/6975) | New Source: Sentry | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :---------------------------------------------------------------------- | +| 0.2.2 | 2023-05-02 | [25759](https://github.com/airbytehq/airbyte/pull/25759) | Change stream that used in check_connection | +| 0.2.1 | 2023-04-27 | [25602](https://github.com/airbytehq/airbyte/pull/25602) | Add validation of project and organization names during connector setup | +| 0.2.0 | 2023-04-03 | [23923](https://github.com/airbytehq/airbyte/pull/23923) | Add Releases stream | +| 0.1.12 | 2023-03-01 | [23619](https://github.com/airbytehq/airbyte/pull/23619) | Fix bug when `stream state` is `None` or any other bad value occurs | +| 0.1.11 | 2023-02-02 | [22303](https://github.com/airbytehq/airbyte/pull/22303) | Turn ON default AvailabilityStrategy | +| 0.1.10 | 2023-01-27 | [22041](https://github.com/airbytehq/airbyte/pull/22041) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 0.1.9 | 2022-12-20 | [21864](https://github.com/airbytehq/airbyte/pull/21864) | Add state persistence to incremental sync | +| 0.1.8 | 2022-12-20 | [20709](https://github.com/airbytehq/airbyte/pull/20709) | Add incremental sync | +| 0.1.7 | 2022-09-30 | [17466](https://github.com/airbytehq/airbyte/pull/17466) | Migrate to per-stream states | +| 0.1.6 | 2022-08-29 | [16112](https://github.com/airbytehq/airbyte/pull/16112) | Revert back to the Python CDK | +| 0.1.5 | 2022-08-24 | [15911](https://github.com/airbytehq/airbyte/pull/15911) | Bugfix to allowing reading schemas at runtime | +| 0.1.4 | 2022-08-19 | [15800](https://github.com/airbytehq/airbyte/pull/15800) | Bugfix to allow reading sentry.yaml at runtime | +| 0.1.3 | 2022-08-17 | [15734](https://github.com/airbytehq/airbyte/pull/15734) | Fix yaml based on the new schema validator | +| 0.1.2 | 2021-12-28 | [15345](https://github.com/airbytehq/airbyte/pull/15345) | Migrate to config-based framework | +| 0.1.1 | 2021-12-28 | [8628](https://github.com/airbytehq/airbyte/pull/8628) | Update fields in source-connectors specifications | +| 0.1.0 | 2021-10-12 | [6975](https://github.com/airbytehq/airbyte/pull/6975) | New Source: Sentry | diff --git a/docs/integrations/sources/sftp-bulk.md b/docs/integrations/sources/sftp-bulk.md index f652a7a04668..8b1095a0a2c2 100644 --- a/docs/integrations/sources/sftp-bulk.md +++ b/docs/integrations/sources/sftp-bulk.md @@ -1,4 +1,5 @@ # SFTP Bulk + This page contains the setup guide and reference information for the SFTP Bulk source connector. This connector provides the following features not found in the standard SFTP source connector: @@ -9,11 +10,12 @@ This connector provides the following features not found in the standard SFTP so ## Prerequisites -* Access to a remote server that supports SFTP -* Host address -* Valid username and password associated with the host server +- Access to a remote server that supports SFTP +- Host address +- Valid username and password associated with the host server ## Setup guide + ### Step 1: Set up SFTP authentication To set up the SFTP connector, you will need to select at least _one_ of the following authentication methods: @@ -24,9 +26,9 @@ To set up the SFTP connector, you will need to select at least _one_ of the foll To set up key pair authentication, you may use the following steps as a guide: 1. Open your terminal or command prompt and use the `ssh-keygen` command to generate a new key pair. -:::note -If your operating system does not support the `ssh-keygen` command, you can use a third-party tool like [PuTTYgen](https://www.puttygen.com/) to generate the key pair instead. -::: + :::note + If your operating system does not support the `ssh-keygen` command, you can use a third-party tool like [PuTTYgen](https://www.puttygen.com/) to generate the key pair instead. + ::: 2. You will be prompted for a location to save the keys, and a passphrase to secure the private key. You can press enter to accept the default location and opt out of a passphrase if desired. Your two keys will be generated in the designated location as two separate files. The private key will usually be saved as `id_rsa`, while the public key will be saved with the `.pub` extension (`id_rsa.pub`). @@ -47,17 +49,17 @@ Depending on factors such as your operating system and the specific SSH implemen ssh @ ``` -For more information on SSH key pair authentication, please refer to the +For more information on SSH key pair authentication, please refer to the [official documentation](https://www.ssh.com/academy/ssh/keygen). ### Step 2: Set up the SFTP connector in Airbyte 1. [Log in to your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account, or navigate to your Airbyte Open Source dashboard. 2. In the left navigation bar, click **Sources**. In the top-right corner, click **+ New source**. -3. Find and select **SFTP** from the list of available sources. - -**For Airbyte Cloud users**: If you do not see the **SFTP Bulk** source listed, please make sure the **Alpha** checkbox at the top of the page is checked. - +3. Find and select **SFTP** from the list of available sources. + + **For Airbyte Cloud users**: If you do not see the **SFTP Bulk** source listed, please make sure the **Alpha** checkbox at the top of the page is checked. + 4. Enter a **Source name** of your choosing. 5. Enter your **Username**, as well as the **Host Address** and **Port**. The default port for SFTP is 22. If your remote server is using a different port, please enter it here. 6. Enter your authentication credentials for the SFTP server (**Password** or **Private Key**). If you are authenticating with a private key, you can upload the file containing the private key (usually named `rsa_id`) using the Upload file button. @@ -101,14 +103,13 @@ This pattern will filter for files that match the format `log-YYYYMMDD`, where ` The SFTP Bulk source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -| Feature | Support | Notes | -|:------------------------------|:--------:|:--------------------------------------------------------------------------------------| -| Full Refresh - Overwrite | ✅ | | -| Full Refresh - Append Sync | ✅ | | -| Incremental - Append | ✅ | | -| Incremental - Deduped History | ❌ | | -| Namespaces | ❌ | | - +| Feature | Support | Notes | +| :----------------------------- | :-----: | :---- | +| Full Refresh - Overwrite | ✅ | | +| Full Refresh - Append Sync | ✅ | | +| Incremental - Append | ✅ | | +| Incremental - Append + Deduped | ❌ | | +| Namespaces | ❌ | | ## Supported streams @@ -117,8 +118,8 @@ More formats \(e.g. Apache Avro\) will be supported in the future. ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:-------------|:----------------| +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :-------------------------------------------------------- | :---------------------------- | | 0.1.2 | 2023-04-19 | [#19224](https://github.com/airbytehq/airbyte/pull/19224) | Support custom CSV separators | -| 0.1.1 | 2023-03-17 | [#24180](https://github.com/airbytehq/airbyte/pull/24180) | Fix field order | -| 0.1.0 | 2021-24-05 | | Initial version | +| 0.1.1 | 2023-03-17 | [#24180](https://github.com/airbytehq/airbyte/pull/24180) | Fix field order | +| 0.1.0 | 2021-24-05 | | Initial version | diff --git a/docs/integrations/sources/sftp.md b/docs/integrations/sources/sftp.md index b5e45f0d5daf..8d6d84e942d9 100644 --- a/docs/integrations/sources/sftp.md +++ b/docs/integrations/sources/sftp.md @@ -1,13 +1,15 @@ # SFTP + This page contains the setup guide and reference information for the SFTP source connector. ## Prerequisites -* Access to a remote server that supports SFTP -* Host address -* Valid username and password associated with the host server +- Access to a remote server that supports SFTP +- Host address +- Valid username and password associated with the host server ## Setup guide + ### Step 1: Set up SFTP authentication To set up the SFTP connector, you will need to select _one_ of the following authentication methods: @@ -18,9 +20,9 @@ To set up the SFTP connector, you will need to select _one_ of the following aut To set up key pair authentication, you may use the following steps as a guide: 1. Open your terminal or command prompt and use the `ssh-keygen` command to generate a new key pair. -:::note -If your operating system does not support the `ssh-keygen` command, you can use a third-party tool like [PuTTYgen](https://www.puttygen.com/) to generate the key pair instead. -::: + :::note + If your operating system does not support the `ssh-keygen` command, you can use a third-party tool like [PuTTYgen](https://www.puttygen.com/) to generate the key pair instead. + ::: 2. You will be prompted for a location to save the keys, and a passphrase to secure the private key. You can press enter to accept the default location and opt out of a passphrase if desired. Your two keys will be generated in the designated location as two separate files. The private key will usually be saved as `id_rsa`, while the public key will be saved with the `.pub` extension (`id_rsa.pub`). @@ -41,17 +43,17 @@ Depending on factors such as your operating system and the specific SSH implemen ssh @ ``` -For more information on SSH key pair authentication, please refer to the +For more information on SSH key pair authentication, please refer to the [official documentation](https://www.ssh.com/academy/ssh/keygen). ### Step 2: Set up the SFTP connector in Airbyte 1. [Log in to your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account, or navigate to your Airbyte Open Source dashboard. 2. In the left navigation bar, click **Sources**. In the top-right corner, click **+ New source**. -3. Find and select **SFTP** from the list of available sources. - -**For Airbyte Cloud users**: If you do not see the **SFTP** source listed, please make sure the **Alpha** checkbox at the top of the page is checked. - +3. Find and select **SFTP** from the list of available sources. + + **For Airbyte Cloud users**: If you do not see the **SFTP** source listed, please make sure the **Alpha** checkbox at the top of the page is checked. + 4. Enter a **Source name** of your choosing. 5. Enter your **Username**, as well as the **Host Address** and **Port**. The default port for SFTP is 22. If your remote server is using a different port, please enter it here. 6. In the **Authentication** section, use the dropdown menu to select **Password Authentication** or **SSH Key Authentication**, then fill in the required credentials. If you are authenticating with a private key, you can upload the file containing the private key (usually named `rsa_id`) using the **Upload file** button. @@ -89,24 +91,22 @@ This pattern will filter for files that match the format `log-YYYYMMDD`, where ` The SFTP source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -| Feature | Support | Notes | -|:------------------------------|:-------:|:-------------------------------------------------------------------------------------| -| Full Refresh - Overwrite | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | -| Full Refresh - Append Sync | ❌ | | -| Incremental - Append | ❌ | | -| Incremental - Deduped History | ❌ | | -| Namespaces | ❌ | | - +| Feature | Support | Notes | +| :----------------------------- | :-----: | :----------------------------------------------------------------------------------- | +| Full Refresh - Overwrite | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | +| Full Refresh - Append Sync | ❌ | | +| Incremental - Append | ❌ | | +| Incremental - Append + Deduped | ❌ | | +| Namespaces | ❌ | | ## Supported streams This source provides a single stream per file with a dynamic schema. The current supported file types are CSV and JSON. More formats \(e.g. Apache Avro\) will be supported in the future. - ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:-------------|:----------------| -| 0.1.2 | 2022-06-17 | [13864](https://github.com/airbytehq/airbyte/pull/13864) | Updated stacktrace format for any trace message errors | -| 0.1.0 | 2021-24-05 | | Initial version | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :----------------------------------------------------- | +| 0.1.2 | 2022-06-17 | [13864](https://github.com/airbytehq/airbyte/pull/13864) | Updated stacktrace format for any trace message errors | +| 0.1.0 | 2021-24-05 | | Initial version | diff --git a/docs/integrations/sources/square.md b/docs/integrations/sources/square.md index d1e9075bde04..0893780d3f72 100644 --- a/docs/integrations/sources/square.md +++ b/docs/integrations/sources/square.md @@ -6,8 +6,8 @@ This page contains the setup guide and reference information for the Square sour To set up the Square source connector with Airbyte, you'll need to create your Square Application and use Personal token or Oauth access token. - ## Setup guide + ### Step 1: Set up Square 1. Create [Square Application](https://developer.squareup.com/apps) @@ -21,25 +21,26 @@ To set up the Square source connector with Airbyte, you'll need to create your S 2. In the left navigation bar, click **Sources**. In the top-right corner, click **+ New source**. 3. On the Set up the source page, enter the name for the Square connector and select **Square** from the Source type dropdown. 4. Choose authentication method: - * Api-Key - * Fill in API key token with "Access token" from Square Application settings page (Credentials on the left) - * Oauth authentication - * Fill in Client ID and Client secret with data from Square Application settings page (Oauth on the left) - * Fill in refresh token with one obtained during the authentication process + - Api-Key + - Fill in API key token with "Access token" from Square Application settings page (Credentials on the left) + - Oauth authentication + - Fill in Client ID and Client secret with data from Square Application settings page (Oauth on the left) + - Fill in refresh token with one obtained during the authentication process 5. Choose if your account is sandbox 6. Choose start date 7. Choose if you would like to include Deleted objects (for streams: Items, Categories, Discounts, Taxes) ### For Airbyte OSS: + 1. Navigate to the Airbyte Open Source dashboard. -2. Set the name for your source. +2. Set the name for your source. 3. On the Set up the source page, enter the name for the Square connector and select **Square** from the Source type dropdown. 4. Choose authentication method: - * Api-Key - * Fill in API key token with "Access token" from Square Application settings page (Credentials on the left) - * Oauth authentication - * Fill in Client ID and Client secret with data from Square Application settings page (Oauth on the left) - * Fill in refresh token with one obtained during the authentication process + - Api-Key + - Fill in API key token with "Access token" from Square Application settings page (Credentials on the left) + - Oauth authentication + - Fill in Client ID and Client secret with data from Square Application settings page (Oauth on the left) + - Fill in refresh token with one obtained during the authentication process 5. Choose if your account is sandbox 6. Choose start date 7. Choose if you would like to include Deleted objects (for streams: Items, Categories, Discounts, Taxes) @@ -47,35 +48,35 @@ To set up the Square source connector with Airbyte, you'll need to create your S ## Supported sync modes The Square source connector supports the following [ sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) + +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams -* [Items](https://developer.squareup.com/explorer/square/catalog-api/search-catalog-objects) \(Incremental\) -* [Categories](https://developer.squareup.com/explorer/square/catalog-api/search-catalog-objects) \(Incremental\) -* [Discounts](https://developer.squareup.com/explorer/square/catalog-api/search-catalog-objects) \(Incremental\) -* [Taxes](https://developer.squareup.com/explorer/square/catalog-api/search-catalog-objects) \(Incremental\) -* [ModifierLists](https://developer.squareup.com/explorer/square/catalog-api/search-catalog-objects) \(Incremental\) -* [Payments](https://developer.squareup.com/reference/square_2022-10-19/payments-api/list-payments) \(Incremental\) -* [Refunds](https://developer.squareup.com/reference/square_2022-10-19/refunds-api/list-payment-refunds) \(Incremental\) -* [Locations](https://developer.squareup.com/explorer/square/locations-api/list-locations) -* [Team Members](https://developer.squareup.com/reference/square_2022-10-19/team-api/search-team-members) -* [List Team Member Wages](https://developer.squareup.com/explorer/square/labor-api/list-team-member-wages) -* [Customers](https://developer.squareup.com/explorer/square/customers-api/list-customers) -* [Shifts](https://developer.squareup.com/reference/square/labor-api/search-shifts) -* [Orders](https://developer.squareup.com/reference/square/orders-api/search-orders) +- [Items](https://developer.squareup.com/explorer/square/catalog-api/search-catalog-objects) \(Incremental\) +- [Categories](https://developer.squareup.com/explorer/square/catalog-api/search-catalog-objects) \(Incremental\) +- [Discounts](https://developer.squareup.com/explorer/square/catalog-api/search-catalog-objects) \(Incremental\) +- [Taxes](https://developer.squareup.com/explorer/square/catalog-api/search-catalog-objects) \(Incremental\) +- [ModifierLists](https://developer.squareup.com/explorer/square/catalog-api/search-catalog-objects) \(Incremental\) +- [Payments](https://developer.squareup.com/reference/square_2022-10-19/payments-api/list-payments) \(Incremental\) +- [Refunds](https://developer.squareup.com/reference/square_2022-10-19/refunds-api/list-payment-refunds) \(Incremental\) +- [Locations](https://developer.squareup.com/explorer/square/locations-api/list-locations) +- [Team Members](https://developer.squareup.com/reference/square_2022-10-19/team-api/search-team-members) +- [List Team Member Wages](https://developer.squareup.com/explorer/square/labor-api/list-team-member-wages) +- [Customers](https://developer.squareup.com/explorer/square/customers-api/list-customers) +- [Shifts](https://developer.squareup.com/reference/square/labor-api/search-shifts) +- [Orders](https://developer.squareup.com/reference/square/orders-api/search-orders) ## Connector-specific features & highlights Useful links: -* [Square API Explorer](https://developer.squareup.com/explorer/square) -* [Square API Docs](https://developer.squareup.com/reference/square) -* [Square Developer Dashboard](https://developer.squareup.com/apps) - +- [Square API Explorer](https://developer.squareup.com/explorer/square) +- [Square API Docs](https://developer.squareup.com/reference/square) +- [Square Developer Dashboard](https://developer.squareup.com/apps) ## Performance considerations (if any) @@ -85,28 +86,27 @@ Exponential [Backoff](https://developer.squareup.com/forums/t/current-square-api ## Data type map | Integration Type | Airbyte Type | Notes | -|:-----------------|:-------------|:------| +| :--------------- | :----------- | :---- | | `string` | `string` | | | `integer` | `integer` | | | `array` | `array` | | | `object` | `object` | | | `boolean` | `boolean` | | - ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------| -| 1.1.2 | 2023-07-10 | [28019](https://github.com/airbytehq/airbyte/pull/28019) | fix display order of spec fields | -| 1.1.1 | 2023-06-28 | [27762](https://github.com/airbytehq/airbyte/pull/27762) | Update following state breaking changes | -| 1.1.0 | 2023-05-24 | [26485](https://github.com/airbytehq/airbyte/pull/26485) | Remove deprecated authSpecification in favour of advancedAuth | -| 1.0.1 | 2023-05-03 | [25784](https://github.com/airbytehq/airbyte/pull/25784) | Fix Authenticator | -| 1.0.0 | 2023-05-03 | [25784](https://github.com/airbytehq/airbyte/pull/25784) | Fix Authenticator | -| 0.2.2 | 2023-03-22 | [22867](https://github.com/airbytehq/airbyte/pull/22867) | Specified date formatting in specification | -| 0.2.1 | 2023-03-06 | [23231](https://github.com/airbytehq/airbyte/pull/23231) | Publish using low-code CDK Beta version | -| 0.2.0 | 2022-11-14 | [19369](https://github.com/airbytehq/airbyte/pull/19369) | Migrate to low code (YAML); update API to version 2022-10-19; update docs | -| 0.1.4 | 2021-12-02 | [6842](https://github.com/airbytehq/airbyte/pull/6842) | Added oauth support | -| 0.1.3 | 2021-12-06 | [8425](https://github.com/airbytehq/airbyte/pull/8425) | Update title, description fields in spec | -| 0.1.2 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | -| 0.1.1 | 2021-07-09 | [4645](https://github.com/airbytehq/airbyte/pull/4645) | Update \_send\_request method due to Airbyte CDK changes | -| 0.1.0 | 2021-06-30 | [4439](https://github.com/airbytehq/airbyte/pull/4439) | Initial release supporting the Square API | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :------------------------------------------------------------------------ | +| 1.1.2 | 2023-07-10 | [28019](https://github.com/airbytehq/airbyte/pull/28019) | fix display order of spec fields | +| 1.1.1 | 2023-06-28 | [27762](https://github.com/airbytehq/airbyte/pull/27762) | Update following state breaking changes | +| 1.1.0 | 2023-05-24 | [26485](https://github.com/airbytehq/airbyte/pull/26485) | Remove deprecated authSpecification in favour of advancedAuth | +| 1.0.1 | 2023-05-03 | [25784](https://github.com/airbytehq/airbyte/pull/25784) | Fix Authenticator | +| 1.0.0 | 2023-05-03 | [25784](https://github.com/airbytehq/airbyte/pull/25784) | Fix Authenticator | +| 0.2.2 | 2023-03-22 | [22867](https://github.com/airbytehq/airbyte/pull/22867) | Specified date formatting in specification | +| 0.2.1 | 2023-03-06 | [23231](https://github.com/airbytehq/airbyte/pull/23231) | Publish using low-code CDK Beta version | +| 0.2.0 | 2022-11-14 | [19369](https://github.com/airbytehq/airbyte/pull/19369) | Migrate to low code (YAML); update API to version 2022-10-19; update docs | +| 0.1.4 | 2021-12-02 | [6842](https://github.com/airbytehq/airbyte/pull/6842) | Added oauth support | +| 0.1.3 | 2021-12-06 | [8425](https://github.com/airbytehq/airbyte/pull/8425) | Update title, description fields in spec | +| 0.1.2 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | +| 0.1.1 | 2021-07-09 | [4645](https://github.com/airbytehq/airbyte/pull/4645) | Update \_send_request method due to Airbyte CDK changes | +| 0.1.0 | 2021-06-30 | [4439](https://github.com/airbytehq/airbyte/pull/4439) | Initial release supporting the Square API | diff --git a/docs/integrations/sources/strava.md b/docs/integrations/sources/strava.md index 2d8669942f8d..0ecc8c38641e 100644 --- a/docs/integrations/sources/strava.md +++ b/docs/integrations/sources/strava.md @@ -5,74 +5,79 @@ This page guides you through the process of setting up the Strava source connect ## Prerequisites Scopes: -* `activity:read_all` + +- `activity:read_all` ## Setup guide + ### Step 1: Set up Strava + **For Airbyte Open Source:** Follow these steps to get the required credentials and inputs: -* `client_id` and `client_secret` - * [Create a Strava account](https://developers.strava.com/docs/getting-started/#account) - * Continue to follow the instructions from the doc above to obtain `client_id` and `client_secret` -* `refresh_token` - * Enter this URL into your browser (make sure to add your `client_id` from previous step: - * `https://www.strava.com/oauth/authorize?client_id=[REPLACE_WITH_YOUR_CLIENT_ID]&response_type=code&redirect_uri=https://localhost/exchange_token&approval_prompt=force&scope=activity:read_all` - * Authorize through the UI - * Browser will redirect you to an empty page with a URL similar to `https://localhost/exchange_token?state=&code=b55003496d87a9f0b694ca1680cd5690d27d9d28&scope=activity:read_all` - * Copy the authorization code above (in this example it would be `b55003496d87a9f0b694ca1680cd5690d27d9d28`) - * Make a cURL request to exchange the authorization code and scope for a refresh token: - * ``` - curl -X POST https://www.strava.com/oauth/token \ - -F client_id=YOUR_CLIENT_ID \ - -F client_secret=YOUR_CLIENT_SECRET \ - -F code=AUTHORIZATION_CODE \ - -F grant_type=authorization_code - ``` - * The resulting json will contain the `refresh_token` - * Example Result: - * ``` - { - "token_type": "Bearer", - "expires_at": 1562908002, - "expires_in": 21600, - "refresh_token": "REFRESHTOKEN", - "access_token": "ACCESSTOKEN", - "athlete": { - "id": 123456, - "username": "MeowTheCat", - "resource_state": 2, - "firstname": "Meow", - "lastname": "TheCat", - "city": "", - "state": "", - "country": null, - ... - } - } - ``` - * Refer to Strava's [Getting Started - Oauth](https://developers.strava.com/docs/getting-started/#oauth) or [Authentication](https://developers.strava.com/docs/authentication/) documents for more information -* `athlete_id` - * Go to your athlete page by clicking your name on the [Strava dashboard](https://www.strava.com/dashboard) or click on "My Profile" on the drop down after hovering on your top bar icon - * The number at the end of the url will be your `athlete_id`. For example `17831421` would be the `athlete_id` for https://www.strava.com/athletes/17831421 + +- `client_id` and `client_secret` + - [Create a Strava account](https://developers.strava.com/docs/getting-started/#account) + - Continue to follow the instructions from the doc above to obtain `client_id` and `client_secret` +- `refresh_token` + - Enter this URL into your browser (make sure to add your `client_id` from previous step: + - `https://www.strava.com/oauth/authorize?client_id=[REPLACE_WITH_YOUR_CLIENT_ID]&response_type=code&redirect_uri=https://localhost/exchange_token&approval_prompt=force&scope=activity:read_all` + - Authorize through the UI + - Browser will redirect you to an empty page with a URL similar to `https://localhost/exchange_token?state=&code=b55003496d87a9f0b694ca1680cd5690d27d9d28&scope=activity:read_all` + - Copy the authorization code above (in this example it would be `b55003496d87a9f0b694ca1680cd5690d27d9d28`) + - Make a cURL request to exchange the authorization code and scope for a refresh token: + - ``` + curl -X POST https://www.strava.com/oauth/token \ + -F client_id=YOUR_CLIENT_ID \ + -F client_secret=YOUR_CLIENT_SECRET \ + -F code=AUTHORIZATION_CODE \ + -F grant_type=authorization_code + ``` + - The resulting json will contain the `refresh_token` + - Example Result: + - ``` + { + "token_type": "Bearer", + "expires_at": 1562908002, + "expires_in": 21600, + "refresh_token": "REFRESHTOKEN", + "access_token": "ACCESSTOKEN", + "athlete": { + "id": 123456, + "username": "MeowTheCat", + "resource_state": 2, + "firstname": "Meow", + "lastname": "TheCat", + "city": "", + "state": "", + "country": null, + ... + } + } + ``` + - Refer to Strava's [Getting Started - Oauth](https://developers.strava.com/docs/getting-started/#oauth) or [Authentication](https://developers.strava.com/docs/authentication/) documents for more information +- `athlete_id` + - Go to your athlete page by clicking your name on the [Strava dashboard](https://www.strava.com/dashboard) or click on "My Profile" on the drop down after hovering on your top bar icon + - The number at the end of the url will be your `athlete_id`. For example `17831421` would be the `athlete_id` for https://www.strava.com/athletes/17831421 + **For Airbyte Cloud:** -* `athlete_id` - * Go to your athlete page by clicking your name on the [Strava dashboard](https://www.strava.com/dashboard) or click on "My Profile" on the drop down after hovering on your top bar icon - * The number at the end of the url will be your `athlete_id`. For example `17831421` would be the `athlete_id` for https://www.strava.com/athletes/17831421 +- `athlete_id` + - Go to your athlete page by clicking your name on the [Strava dashboard](https://www.strava.com/dashboard) or click on "My Profile" on the drop down after hovering on your top bar icon + - The number at the end of the url will be your `athlete_id`. For example `17831421` would be the `athlete_id` for https://www.strava.com/athletes/17831421 - ### Step 2: Set up the source connector in Airbyte + **For Airbyte Cloud:** 1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. @@ -85,12 +90,13 @@ Follow these steps to get the required credentials and inputs: + **For Airbyte Open Source:** 1. Go to local Airbyte page. 2. In the left navigation bar, click **Sources**. In the top-right corner, click **+ new source**. 3. On the source setup page, select **Strava** from the Source type dropdown and enter a name for this connector. -4. Add **Client ID**, **Client Secret** and **Refresh Token** +4. Add **Client ID**, **Client Secret** and **Refresh Token** 5. Set required **Athlete ID** and **Start Date** 6. Click `Set up source`. @@ -102,12 +108,12 @@ The Strava source connector supports the following [sync modes](https://docs.air - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental Sync - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported streams -* [Athlete Stats](https://developers.strava.com/docs/reference/#api-Athletes-getStats) -* [Activities](https://developers.strava.com/docs/reference/#api-Activities-getLoggedInAthleteActivities) \(Incremental\) +- [Athlete Stats](https://developers.strava.com/docs/reference/#api-Athletes-getStats) +- [Activities](https://developers.strava.com/docs/reference/#api-Activities-getLoggedInAthleteActivities) \(Incremental\) ## Performance considerations @@ -117,10 +123,9 @@ More information about Strava rate limits and adjustments to those limits can be ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:-----------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :--------------------------------------------------------- | | 0.1.4 | 2023-03-23 | [24368](https://github.com/airbytehq/airbyte/pull/24368) | Add date-time format for input | | 0.1.3 | 2023-03-15 | [24101](https://github.com/airbytehq/airbyte/pull/24101) | certified to beta, fixed spec, fixed SAT, added unit tests | | 0.1.2 | 2021-12-15 | [8799](https://github.com/airbytehq/airbyte/pull/8799) | Implement OAuth 2.0 support | | 0.1.1 | 2021-12-06 | [8425](https://github.com/airbytehq/airbyte/pull/8425) | Update title, description fields in spec | | 0.1.0 | 2021-10-18 | [7151](https://github.com/airbytehq/airbyte/pull/7151) | Initial release supporting Strava API | - diff --git a/docs/integrations/sources/surveycto.md b/docs/integrations/sources/surveycto.md index 1e399fec4bf7..3e017bb8f4a8 100644 --- a/docs/integrations/sources/surveycto.md +++ b/docs/integrations/sources/surveycto.md @@ -11,20 +11,22 @@ This page guides you through the process of setting up the SurveyCTO source conn - Start Date `Start Date default` ## How to setup a SurveyCTO Account + - create the account - create your form - publish your form - give your user an API consumer permission to the existing role or create a user with that role and permission. ## Set up the SurveyCTO source connection + 1. Log into your [Airbyte Cloud](https://cloud.airbyte.com/workspaces) or Airbyte Open Source account. 2. Click **Sources** and then click **+ New source**. 3. On the Set up the source page, select **Survey CTO** from the Source type dropdown. 4. Enter a name for your source. -5. Enter a Server name for your SurveyCTO account. +5. Enter a Server name for your SurveyCTO account. 6. Enter a Username for SurveyCTO account. 7. Enter a Password for SurveyCTO account. -8. Form ID's (We can multiple forms id here to pull from) +8. Form ID's (We can multiple forms id here to pull from) 9. Start Date (This can be pass to pull the data from particular date) 10. Click **Set up source**. @@ -32,10 +34,10 @@ This page guides you through the process of setting up the SurveyCTO source conn The SurveyCTO source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* (Recommended)[ Incremental Sync - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- (Recommended)[ Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams @@ -46,6 +48,6 @@ The SurveyCTO source connector supports the following streams: ## Changelog | Version | Date | Pull Request | Subject | -| 0.1.2 | 2023-07-27 | [28512](https://github.com/airbytehq/airbyte/pull/28512) | Added Check Connection | -| 0.1.1 | 2023-04-25 | [24784](https://github.com/airbytehq/airbyte/pull/24784) | Fix incremental sync | -| 0.1.0 | 2022-11-16 | [19371](https://github.com/airbytehq/airbyte/pull/19371) | SurveyCTO Source Connector | +| 0.1.2 | 2023-07-27 | [28512](https://github.com/airbytehq/airbyte/pull/28512) | Added Check Connection | +| 0.1.1 | 2023-04-25 | [24784](https://github.com/airbytehq/airbyte/pull/24784) | Fix incremental sync | +| 0.1.0 | 2022-11-16 | [19371](https://github.com/airbytehq/airbyte/pull/19371) | SurveyCTO Source Connector | diff --git a/docs/integrations/sources/tempo.md b/docs/integrations/sources/tempo.md index 7e9b57e6ea1e..9a4c773b7003 100644 --- a/docs/integrations/sources/tempo.md +++ b/docs/integrations/sources/tempo.md @@ -4,16 +4,16 @@ This page contains the setup guide and reference information for the Tempo sourc ## Prerequisites -* API Token +- API Token ## Setup guide + ### Step 1: Set up Tempo Source Tempo is designed to interact with the data your permissions give you access to. To do so, you will need to generate a Tempo OAuth 2.0 token for an individual user. Go to **Tempo > Settings**, scroll down to **Data Access** and select **API integration**. - ## Step 2: Set up the Tempo connector in Airbyte 1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. @@ -22,31 +22,30 @@ Go to **Tempo > Settings**, scroll down to **Data Access** and select **API i 4. Enter your API token that you obtained from Tempo. 5. Click **Set up source**. - ## Supported sync modes The Tempo source connector supports the following [ sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams This connector outputs the following streams: -* [Accounts](https://apidocs.tempo.io/#tag/Accounts) -* [Customers](https://apidocs.tempo.io/#tag/Customers) -* [Worklogs](https://apidocs.tempo.io/#tag/Worklogs) -* [Workload Schemes](https://apidocs.tempo.io/#tag/Workload-Schemes) +- [Accounts](https://apidocs.tempo.io/#tag/Accounts) +- [Customers](https://apidocs.tempo.io/#tag/Customers) +- [Worklogs](https://apidocs.tempo.io/#tag/Worklogs) +- [Workload Schemes](https://apidocs.tempo.io/#tag/Workload-Schemes) If there are more endpoints you'd like Airbyte to support, please [create an issue.](https://github.com/airbytehq/airbyte/issues/new/choose) ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:----------------------------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :-------------------------------------------------------- | | 0.3.1 | 2023-03-06 | [23231](https://github.com/airbytehq/airbyte/pull/23231) | Publish using low-code CDK Beta version | | 0.3.0 | 2022-11-02 | [18936](https://github.com/airbytehq/airbyte/pull/18936) | Migrate to low code + certify to Beta + migrate to API v4 | | 0.2.6 | 2022-09-08 | [16361](https://github.com/airbytehq/airbyte/pull/16361) | Avoid infinite loop for non-paginated APIs | diff --git a/docs/integrations/sources/woocommerce.md b/docs/integrations/sources/woocommerce.md index 9550c6255e20..c1459977b429 100644 --- a/docs/integrations/sources/woocommerce.md +++ b/docs/integrations/sources/woocommerce.md @@ -33,6 +33,7 @@ You will need to generate new API key with read permissions and use `Customer ke 6. Choose start date you want to start sync from. 7. (Optional) Fill in Conversion Window. + ### For Airbyte OSS: 1. Navigate to the Airbyte Open Source dashboard. @@ -52,8 +53,9 @@ following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-s - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) + ## Supported Streams - [Coupons](https://woocommerce.github.io/woocommerce-rest-api-docs/#coupons) \(Incremental\) @@ -97,11 +99,11 @@ Useful links: ## Changelog -| Version | Date | Pull Request | Subject | -| :------ | :--------- | :------------------------------------------------------- | :---------------------------------------------------------------------- | -| 0.2.3 | 2023-06-02 | [26955](https://github.com/airbytehq/airbyte/pull/26955) | Added `block_context` and `author` properties to the `Products` stream | -| 0.2.2 | 2023-03-03 | [23599](https://github.com/airbytehq/airbyte/pull/23599) | Fix pagination and removed lookback window | -| 0.2.1 | 2023-02-10 | [22821](https://github.com/airbytehq/airbyte/pull/22821) | Specified date formatting in specification | -| 0.2.0 | 2022-11-30 | [19903](https://github.com/airbytehq/airbyte/pull/19903) | Migrate to low-code; Certification to Beta | -| 0.1.1 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | -| 0.1.0 | 2021-09-09 | [5955](https://github.com/airbytehq/airbyte/pull/5955) | Initial Release. Source WooCommerce | +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :--------------------------------------------------------------------- | +| 0.2.3 | 2023-06-02 | [26955](https://github.com/airbytehq/airbyte/pull/26955) | Added `block_context` and `author` properties to the `Products` stream | +| 0.2.2 | 2023-03-03 | [23599](https://github.com/airbytehq/airbyte/pull/23599) | Fix pagination and removed lookback window | +| 0.2.1 | 2023-02-10 | [22821](https://github.com/airbytehq/airbyte/pull/22821) | Specified date formatting in specification | +| 0.2.0 | 2022-11-30 | [19903](https://github.com/airbytehq/airbyte/pull/19903) | Migrate to low-code; Certification to Beta | +| 0.1.1 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | +| 0.1.0 | 2021-09-09 | [5955](https://github.com/airbytehq/airbyte/pull/5955) | Initial Release. Source WooCommerce | diff --git a/docs/integrations/sources/yandex-metrica.md b/docs/integrations/sources/yandex-metrica.md index d988711a3ca6..f1cb3079b9bb 100644 --- a/docs/integrations/sources/yandex-metrica.md +++ b/docs/integrations/sources/yandex-metrica.md @@ -34,7 +34,6 @@ This page contains the setup guide and reference information for the Yandex Metr 7. Enter the Start Date in format `YYYY-MM-DD`. 8. Enter the End Date in format `YYYY-MM-DD` (Optional). - #### For Airbyte Open Source: 1. Navigate to the Airbyte Open Source dashboard. @@ -50,15 +49,15 @@ This page contains the setup guide and reference information for the Yandex Metr The Yandex Metrica source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams -* [Views](https://yandex.com/dev/metrika/doc/api2/logs/fields/hits.html) \(Incremental\). -* [Sessions](https://yandex.com/dev/metrika/doc/api2/logs/fields/visits.html) \(Incremental\). +- [Views](https://yandex.com/dev/metrika/doc/api2/logs/fields/hits.html) \(Incremental\). +- [Sessions](https://yandex.com/dev/metrika/doc/api2/logs/fields/visits.html) \(Incremental\). ## Performance considerations @@ -79,7 +78,7 @@ Because of the way API works some syncs may take a long time to finish. Timeout ## Data type mapping | Integration Type | Airbyte Type | Notes | -|:-----------------|:-------------|:------| +| :--------------- | :----------- | :---- | | `string` | `string` | | | `integer` | `integer` | | | `number` | `number` | | @@ -89,6 +88,6 @@ Because of the way API works some syncs may take a long time to finish. Timeout ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:----------------------------------------| +| :------ | :--------- | :------------------------------------------------------- | :-------------------------------------- | | 1.0.0 | 2023-03-20 | [24188](https://github.com/airbytehq/airbyte/pull/24188) | Migrate to Beta; Change state structure | | 0.1.0 | 2022-09-09 | [15061](https://github.com/airbytehq/airbyte/pull/15061) | 🎉 New Source: Yandex metrica | diff --git a/docs/integrations/sources/zendesk-chat.md b/docs/integrations/sources/zendesk-chat.md index 098b276c8491..1a1cd588376f 100644 --- a/docs/integrations/sources/zendesk-chat.md +++ b/docs/integrations/sources/zendesk-chat.md @@ -12,6 +12,7 @@ This page contains the setup guide and reference information for the Zendesk Cha ## Setup guide + **For Airbyte Cloud:** 1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. @@ -25,6 +26,7 @@ This page contains the setup guide and reference information for the Zendesk Cha + **For Airbyte Open Source:** 1. Navigate to the Airbyte Open Source dashboard. @@ -41,25 +43,25 @@ This page contains the setup guide and reference information for the Zendesk Cha The Zendesk Chat source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): -* [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) -* [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) -* [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -* [Incremental - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) ## Supported Streams -* [Accounts](https://developer.zendesk.com/rest_api/docs/chat/accounts#show-account) -* [Agents](https://developer.zendesk.com/rest_api/docs/chat/agents#list-agents) \(Incremental\) -* [Agent Timelines](https://developer.zendesk.com/rest_api/docs/chat/incremental_export#incremental-agent-timeline-export) \(Incremental\) -* [Chats](https://developer.zendesk.com/rest_api/docs/chat/chats#list-chats) -* [Shortcuts](https://developer.zendesk.com/rest_api/docs/chat/shortcuts#list-shortcuts) -* [Triggers](https://developer.zendesk.com/rest_api/docs/chat/triggers#list-triggers) -* [Bans](https://developer.zendesk.com/rest_api/docs/chat/bans#list-bans) \(Incremental\) -* [Departments](https://developer.zendesk.com/rest_api/docs/chat/departments#list-departments) -* [Goals](https://developer.zendesk.com/rest_api/docs/chat/goals#list-goals) -* [Skills](https://developer.zendesk.com/rest_api/docs/chat/skills#list-skills) -* [Roles](https://developer.zendesk.com/rest_api/docs/chat/roles#list-roles) -* [Routing Settings](https://developer.zendesk.com/rest_api/docs/chat/routing_settings#show-account-routing-settings) +- [Accounts](https://developer.zendesk.com/rest_api/docs/chat/accounts#show-account) +- [Agents](https://developer.zendesk.com/rest_api/docs/chat/agents#list-agents) \(Incremental\) +- [Agent Timelines](https://developer.zendesk.com/rest_api/docs/chat/incremental_export#incremental-agent-timeline-export) \(Incremental\) +- [Chats](https://developer.zendesk.com/rest_api/docs/chat/chats#list-chats) +- [Shortcuts](https://developer.zendesk.com/rest_api/docs/chat/shortcuts#list-shortcuts) +- [Triggers](https://developer.zendesk.com/rest_api/docs/chat/triggers#list-triggers) +- [Bans](https://developer.zendesk.com/rest_api/docs/chat/bans#list-bans) \(Incremental\) +- [Departments](https://developer.zendesk.com/rest_api/docs/chat/departments#list-departments) +- [Goals](https://developer.zendesk.com/rest_api/docs/chat/goals#list-goals) +- [Skills](https://developer.zendesk.com/rest_api/docs/chat/skills#list-skills) +- [Roles](https://developer.zendesk.com/rest_api/docs/chat/roles#list-roles) +- [Routing Settings](https://developer.zendesk.com/rest_api/docs/chat/routing_settings#show-account-routing-settings) ## Performance considerations @@ -77,11 +79,11 @@ The connector is restricted by Zendesk's [requests limitation](https://developer ## Changelog | Version | Date | Pull Request | Subject | -|:--------| :--------- | :------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------- | -| 0.1.14 | 2023-02-10 | [24190](https://github.com/airbytehq/airbyte/pull/24190) | Fix remove too high min/max from account stream | -| 0.1.13 | 2023-02-10 | [22819](https://github.com/airbytehq/airbyte/pull/22819) | Specified date formatting in specification | -| 0.1.12 | 2023-01-27 | [22026](https://github.com/airbytehq/airbyte/pull/22026) | Set `AvailabilityStrategy` for streams explicitly to `None` | -| 0.1.11 | 2022-10-18 | [17745](https://github.com/airbytehq/airbyte/pull/17745) | Add Engagements Stream and fix infity looping | +| :------ | :--------- | :------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------- | +| 0.1.14 | 2023-02-10 | [24190](https://github.com/airbytehq/airbyte/pull/24190) | Fix remove too high min/max from account stream | +| 0.1.13 | 2023-02-10 | [22819](https://github.com/airbytehq/airbyte/pull/22819) | Specified date formatting in specification | +| 0.1.12 | 2023-01-27 | [22026](https://github.com/airbytehq/airbyte/pull/22026) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 0.1.11 | 2022-10-18 | [17745](https://github.com/airbytehq/airbyte/pull/17745) | Add Engagements Stream and fix infity looping | | 0.1.10 | 2022-09-28 | [17326](https://github.com/airbytehq/airbyte/pull/17326) | Migrate to per-stream states. | | 0.1.9 | 2022-08-23 | [15879](https://github.com/airbytehq/airbyte/pull/15879) | Corrected specification and stream schemas to support backward capability | | 0.1.8 | 2022-06-28 | [13387](https://github.com/airbytehq/airbyte/pull/13387) | Add state checkpoint to allow long runs | diff --git a/docs/understanding-airbyte/basic-normalization.md b/docs/understanding-airbyte/basic-normalization.md index 1e0eb605f975..bbebf577fdd7 100644 --- a/docs/understanding-airbyte/basic-normalization.md +++ b/docs/understanding-airbyte/basic-normalization.md @@ -25,6 +25,7 @@ Basic Normalization uses a fixed set of rules to map a json object from a source ``` The destination connectors produce the following raw table in the destination database: + ```sql CREATE TABLE "_airbyte_raw_cars" ( -- metadata added by airbyte @@ -53,6 +54,7 @@ CREATE TABLE "cars" ( ## Normalization metadata columns You'll notice that some metadata are added to keep track of important information about each record. + - Some are introduced at the destination connector level: These are propagated by the normalization process from the raw table to the final table - `_airbyte_ab_id`: uuid value assigned by connectors to each row of the data written in the destination. - `_airbyte_emitted_at`: time at which the record was emitted and recorded by destination connector. @@ -61,6 +63,7 @@ You'll notice that some metadata are added to keep track of important informatio - `_airbyte_normalized_at`: time at which the record was last normalized (useful to track when incremental transformations are performed) Additional metadata columns can be added on some tables depending on the usage: + - On the Slowly Changing Dimension (SCD) tables: - `_airbyte_start_at`: equivalent to the cursor column defined on the table, denotes when the row was first seen - `_airbyte_end_at`: denotes until when the row was seen with these particular values. If this column is not NULL, then the record has been updated and is no longer the most up to date one. If NULL, then the row is the latest version for the record. @@ -89,19 +92,19 @@ To summarize, we can represent the ELT process in the diagram below. These are s In Airbyte, the current normalization option is implemented using a dbt Transformer composed of: -* Airbyte base-normalization python package to generate dbt SQL models files -* dbt to compile and executes the models on top of the data in the destinations that supports it. +- Airbyte base-normalization python package to generate dbt SQL models files +- dbt to compile and executes the models on top of the data in the destinations that supports it. ## Destinations that Support Basic Normalization -* [BigQuery](../integrations/destinations/bigquery.md) -* [MS Server SQL](../integrations/destinations/mssql.md) -* [MySQL](../integrations/destinations/mysql.md) - * The server must support the `WITH` keyword. - * Require MySQL >= 8.0, or MariaDB >= 10.2.1. -* [Postgres](../integrations/destinations/postgres.md) -* [Redshift](../integrations/destinations/redshift.md) -* [Snowflake](../integrations/destinations/snowflake.md) +- [BigQuery](../integrations/destinations/bigquery.md) +- [MS Server SQL](../integrations/destinations/mssql.md) +- [MySQL](../integrations/destinations/mysql.md) + - The server must support the `WITH` keyword. + - Require MySQL >= 8.0, or MariaDB >= 10.2.1. +- [Postgres](../integrations/destinations/postgres.md) +- [Redshift](../integrations/destinations/redshift.md) +- [Snowflake](../integrations/destinations/snowflake.md) Basic Normalization can be configured when you're creating the connection between your Connection Setup and after in the Transformation Tab. Select the option: **Normalized tabular data**. @@ -114,16 +117,16 @@ Airbyte tracks types using JsonSchema's primitive types. Here is how these types Airbyte uses the types described in the catalog to determine the correct type for each column. It does not try to use the values themselves to infer the type. -| JsonSchema Type | Resulting Type | Notes | -| :--- | :--- | :--- | -| `number` | float | | -| `integer` | integer | | -| `string` | string | | -| `bit` | boolean | | -| `boolean` | boolean | | -| `string` with format label `date-time`| timestamp with timezone | | -| `array` | new table | see [nesting](basic-normalization.md#Nesting) | -| `object` | new table | see [nesting](basic-normalization.md#Nesting) | +| JsonSchema Type | Resulting Type | Notes | +| :------------------------------------- | :---------------------- | :-------------------------------------------- | +| `number` | float | | +| `integer` | integer | | +| `string` | string | | +| `bit` | boolean | | +| `boolean` | boolean | | +| `string` with format label `date-time` | timestamp with timezone | | +| `array` | new table | see [nesting](basic-normalization.md#Nesting) | +| `object` | new table | see [nesting](basic-normalization.md#Nesting) | ### Nesting @@ -254,11 +257,11 @@ For example, if we had a `cars` table with a nested column `cars` containing an The expanded table would have a conflict in terms of naming since both are named `cars`. To avoid name collisions and ensure a more consistent naming scheme, Basic Normalization chooses the expanded name as follows: -* `cars` for the original parent table -* `cars_da3_cars` for the expanded nested columns following this naming scheme in 3 parts: `__` -* Json path: The entire json path string with '\_' characters used as delimiters to reach the table that contains the nested column name. -* Hash: Hash of the entire json path to reach the nested column reduced to 3 characters. This is to make sure we have a unique name \(in case part of the name gets truncated, see below\) -* Nested column name: name of the column being expanded into its own table. +- `cars` for the original parent table +- `cars_da3_cars` for the expanded nested columns following this naming scheme in 3 parts: `__` +- Json path: The entire json path string with '\_' characters used as delimiters to reach the table that contains the nested column name. +- Hash: Hash of the entire json path to reach the nested column reduced to 3 characters. This is to make sure we have a unique name \(in case part of the name gets truncated, see below\) +- Nested column name: name of the column being expanded into its own table. By following this strategy, nested columns should "never" collide with other table names. If it does, an exception will probably be thrown either by the normalization process or by dbt that runs afterward. @@ -300,18 +303,18 @@ However, in the rare cases where these limits are reached: As an example from the hubspot source, we could have the following tables with nested columns: -| Description | Example 1 | Example 2 | -| :--- | :--- | :--- | -| Original Stream Name | companies | deals | -| Json path to the nested column | `companies/property_engagements_last_meeting_booked_campaign` | `deals/properties/engagements_last_meeting_booked_medium` | -| Final table name of expanded nested column on BigQuery | companies\_2e8\_property\_engag**ements\_last\_meeting\_bo**oked\_campaign | deals\_prop**erties**\_6e6\_engagements\_l**ast\_meeting\_**booked\_medium | -| Final table name of expanded nested column on Postgres | companies\_2e8\_property\_engag**\_\_**oked\_campaign | deals\_prop\_6e6\_engagements\_l**\_\_**booked\_medium | +| Description | Example 1 | Example 2 | +| :----------------------------------------------------- | :------------------------------------------------------------------ | :-------------------------------------------------------------------- | +| Original Stream Name | companies | deals | +| Json path to the nested column | `companies/property_engagements_last_meeting_booked_campaign` | `deals/properties/engagements_last_meeting_booked_medium` | +| Final table name of expanded nested column on BigQuery | companies_2e8_property_engag**ements_last_meeting_bo**oked_campaign | deals_prop**erties**\_6e6_engagements_l**ast_meeting\_**booked_medium | +| Final table name of expanded nested column on Postgres | companies_2e8_property_engag**\_\_**oked_campaign | deals_prop_6e6_engagements_l**\_\_**booked_medium | As mentioned in the overview: -* Airbyte places the json blob version of your data in a table called `_airbyte_raw_`. -* If basic normalization is turned on, it will place a separate copy of the data in a table called ``. -* In certain pathological cases, basic normalization is required to generate large models with many columns and multiple intermediate transformation steps for a stream. This may break down the "ephemeral" materialization strategy and require the use of additional intermediate views or tables instead. As a result, you may notice additional temporary tables being generated in the destination to handle these checkpoints. +- Airbyte places the json blob version of your data in a table called `_airbyte_raw_`. +- If basic normalization is turned on, it will place a separate copy of the data in a table called ``. +- In certain pathological cases, basic normalization is required to generate large models with many columns and multiple intermediate transformation steps for a stream. This may break down the "ephemeral" materialization strategy and require the use of additional intermediate views or tables instead. As a result, you may notice additional temporary tables being generated in the destination to handle these checkpoints. ## UI Configurations @@ -321,7 +324,7 @@ To enable basic normalization \(which is optional\), you can toggle it on or dis ## Incremental runs -When the source is configured with sync modes compatible with incremental transformations (using append on destination) such as ( [full_refresh_append](connections/full-refresh-append.md), [incremental append](connections/incremental-append.md) or [incremental deduped history](connections/incremental-deduped-history.md)), only rows that have changed in the source are transferred over the network and written by the destination connector. +When the source is configured with sync modes compatible with incremental transformations (using append on destination) such as ( [full_refresh_append](connections/full-refresh-append.md), [incremental append](connections/incremental-append.md) or [incremental deduped history](connections/incremental-append-deduped.md)), only rows that have changed in the source are transferred over the network and written by the destination connector. Normalization will then try to build the normalized tables incrementally as the rows in the raw tables that have been created or updated since the last time dbt ran. As such, on each dbt run, the models get built incrementally. This limits the amount of data that needs to be transformed, vastly reducing the runtime of the transformations. This improves warehouse performance and reduces compute costs. Because normalization can be either run incrementally and, or, in full refresh, a technical column `_airbyte_normalized_at` can serve to track when was the last time a record has been transformed and written by normalization. This may greatly diverge from the `_airbyte_emitted_at` value as the normalized tables could be totally re-built at a latter time from the data stored in the `_airbyte_raw` tables. @@ -333,15 +336,15 @@ Normalization produces tables that are partitioned, clustered, sorted or indexed In general, normalization needs to do lookup on the last emitted_at column to know if a record is freshly produced and need to be incrementally processed or not. But in certain models, such as SCD tables for example, we also need to retrieve older data to update their type 2 SCD end_date and active_row flags, thus a different partitioning scheme is used to optimize that use case. -On Postgres destination, an additional table suffixed with `_stg` for every stream replicated in [incremental deduped history](connections/incremental-deduped-history.md) needs to be persisted (in a different staging schema) for incremental transformations to work because of a [limitation](https://github.com/dbt-labs/docs.getdbt.com/issues/335#issuecomment-694199569). +On Postgres destination, an additional table suffixed with `_stg` for every stream replicated in [incremental deduped history](connections/incremental-append-deduped.md) needs to be persisted (in a different staging schema) for incremental transformations to work because of a [limitation](https://github.com/dbt-labs/docs.getdbt.com/issues/335#issuecomment-694199569). ## Extending Basic Normalization Note that all the choices made by Normalization as described in this documentation page in terms of naming (and more) could be overridden by your own custom choices. To do so, you can follow the following tutorials: -* to build a [custom SQL view](../operator-guides/transformation-and-normalization/transformations-with-sql.md) with your own naming conventions -* to export, edit and run [custom dbt normalization](../operator-guides/transformation-and-normalization/transformations-with-dbt.md) yourself -* or further, you can configure the use of a custom dbt project within Airbyte by following [this guide](../operator-guides/transformation-and-normalization/transformations-with-airbyte.md). +- to build a [custom SQL view](../operator-guides/transformation-and-normalization/transformations-with-sql.md) with your own naming conventions +- to export, edit and run [custom dbt normalization](../operator-guides/transformation-and-normalization/transformations-with-dbt.md) yourself +- or further, you can configure the use of a custom dbt project within Airbyte by following [this guide](../operator-guides/transformation-and-normalization/transformations-with-airbyte.md). ## CHANGELOG @@ -351,71 +354,71 @@ Note that Basic Normalization is packaged in a docker image `airbyte/normalizati Therefore, in order to "upgrade" to the desired normalization version, you need to use the corresponding Airbyte version that it's being released in: -| Airbyte Version | Normalization Version | Date | Pull Request | Subject | -|:----------------|:---------------------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------| +| Airbyte Version | Normalization Version | Date | Pull Request | Subject | +| :-------------- | :------------------------- | :--------- | :----------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | | 0.4.3 | 2023-05-11 | [\#25993](https://github.com/airbytehq/airbyte/pull/25993) | Fix bug in source-postgres CDC for multiple updates on a single PK in a single transaction (destinations MySQL, MSSQL, TiDB may still be affected in certain cases) | -| | 0.4.2 | 2023-05-03 | [\#25771](https://github.com/airbytehq/airbyte/pull/25771) | Remove old VARCHAR to SUPER migration functionality for destination Redshift | -| | 0.4.1 | 2023-04-26 | [\#25591](https://github.com/airbytehq/airbyte/pull/25591) | Pin MarkupSafe library for Oracle normalization to fix build. | -| | 0.4.0 | 2023-03-23 | [\#22381](https://github.com/airbytehq/airbyte/pull/22381) | Prevent normalization from creating unnecessary duplicates in nested tables. | -| | 0.2.27 | 2023-03-15 | [\#24077](https://github.com/airbytehq/airbyte/pull/24077) | Add more bigquery reserved words | -| | 0.2.26 | 2023-02-15 | [\#19573](https://github.com/airbytehq/airbyte/pull/19573) | Update Clickhouse dbt version to 1.4.0 | -| | 0.3.2 (broken, do not use) | 2023-01-31 | [\#22165](https://github.com/airbytehq/airbyte/pull/22165) | Fix support for non-object top-level schemas | -| | 0.3.1 (broken, do not use) | 2023-01-31 | [\#22161](https://github.com/airbytehq/airbyte/pull/22161) | Fix handling for combined primitive types | -| | 0.3.0 (broken, do not use) | 2023-01-30 | [\#19721](https://github.com/airbytehq/airbyte/pull/19721) | Update normalization to airbyte-protocol v1.0.0 | -| | 0.2.25 | 2022-12-05 | [\#19573](https://github.com/airbytehq/airbyte/pull/19573) | Update Clickhouse dbt version | -| | 0.2.24 | 2022-11-01 | [\#18015](https://github.com/airbytehq/airbyte/pull/18015) | Add a drop table hook that drops *_scd tables after overwrite/reset | -| | 0.2.23 | 2022-10-12 | [\#17483](https://github.com/airbytehq/airbyte/pull/17483) (published in [\#17896](https://github.com/airbytehq/airbyte/pull/17896)) | Remove unnecessary `Native Port` config option | -| | 0.2.22 | 2022-09-05 | [\#16339](https://github.com/airbytehq/airbyte/pull/16339) | Update Clickhouse DBT to 1.1.8 | -| | 0.2.21 | 2022-09-09 | [\#15833](https://github.com/airbytehq/airbyte/pull/15833/) | SSH Tunnel: allow using OPENSSH key format (published in [\#16545](https://github.com/airbytehq/airbyte/pull/16545)) | -| | 0.2.20 | 2022-08-30 | [\#15592](https://github.com/airbytehq/airbyte/pull/15592) | Add TiDB support | -| | 0.2.19 | 2022-08-21 | [\#14897](https://github.com/airbytehq/airbyte/pull/14897) | Update Clickhouse DBT to 1.1.7 | -| | 0.2.16 | 2022-08-04 | [\#14295](https://github.com/airbytehq/airbyte/pull/14295) | Fixed SSH tunnel port usage | -| | 0.2.14 | 2022-08-01 | [\#14790](https://github.com/airbytehq/airbyte/pull/14790) | Add and persist job failures for Normalization | -| | 0.2.13 | 2022-07-27 | [\#14683](https://github.com/airbytehq/airbyte/pull/14683) | Quote schema name to allow reserved keywords | -| | 0.2.12 | 2022-07-26 | [\#14362](https://github.com/airbytehq/airbyte/pull/14362) | Handle timezone in date-time format. Parse date correct in clickhouse. | -| | 0.2.11 | 2022-07-26 | [\#13591](https://github.com/airbytehq/airbyte/pull/13591) | Updated support for integer columns. | -| | 0.2.10 | 2022-07-18 | [\#14792](https://github.com/airbytehq/airbyte/pull/14792) | Add support for key pair auth for snowflake | -| | 0.2.9 | 2022-07-06 | [\#14485](https://github.com/airbytehq/airbyte/pull/14485) | BigQuery partition pruning otimization | -| | 0.2.8 | 2022-07-13 | [\#14522](https://github.com/airbytehq/airbyte/pull/14522) | BigQuery replaces `NULL` array entries with the string value `"NULL"` | -| | 0.2.7 | 2022-07-05 | [\#11694](https://github.com/airbytehq/airbyte/pull/11694) | Do not return NULL for MySQL column values > 512 chars | -| | 0.2.6 | 2022-06-16 | [\#13894](https://github.com/airbytehq/airbyte/pull/13894) | Fix incorrect jinja2 macro `json_extract_array` call | -| | 0.2.5 | 2022-06-15 | [\#11470](https://github.com/airbytehq/airbyte/pull/11470) | Upgrade MySQL to dbt 1.0.0 | -| | 0.2.4 | 2022-06-14 | [\#12846](https://github.com/airbytehq/airbyte/pull/12846) | CDC correctly deletes propagates deletions to final tables | -| | 0.2.3 | 2022-06-10 | [\#11204](https://github.com/airbytehq/airbyte/pull/11204) | MySQL: add support for SSh tunneling | -| | 0.2.2 | 2022-06-02 | [\#13289](https://github.com/airbytehq/airbyte/pull/13289) | BigQuery use `json_extract_string_array` for array of simple type elements | -| | 0.2.1 | 2022-05-17 | [\#12924](https://github.com/airbytehq/airbyte/pull/12924) | Fixed checking --event-buffer-size on old dbt crashed entrypoint.sh | -| | 0.2.0 | 2022-05-15 | [\#12745](https://github.com/airbytehq/airbyte/pull/12745) | Snowflake: add datetime without timezone | -| | 0.1.78 | 2022-05-06 | [\#12305](https://github.com/airbytehq/airbyte/pull/12305) | Mssql: use NVARCHAR and datetime2 by default | -| 0.36.2-alpha | 0.1.77 | 2022-04-19 | [\#12064](https://github.com/airbytehq/airbyte/pull/12064) | Add support redshift SUPER type | -| 0.35.65-alpha | 0.1.75 | 2022-04-09 | [\#11511](https://github.com/airbytehq/airbyte/pull/11511) | Move DBT modules from `/tmp/dbt_modules` to `/dbt` | -| 0.35.61-alpha | 0.1.74 | 2022-03-24 | [\#10905](https://github.com/airbytehq/airbyte/pull/10905) | Update clickhouse dbt version | -| 0.35.60-alpha | 0.1.73 | 2022-03-25 | [\#11267](https://github.com/airbytehq/airbyte/pull/11267) | Set `--event-buffer-size` to reduce memory usage | -| 0.35.59-alpha | 0.1.72 | 2022-03-24 | [\#11093](https://github.com/airbytehq/airbyte/pull/11093) | Added Snowflake OAuth2.0 support | -| 0.35.53-alpha | 0.1.71 | 2022-03-14 | [\#11077](https://github.com/airbytehq/airbyte/pull/11077) | Enable BigQuery to handle project ID embedded inside dataset ID | -| 0.35.49-alpha | 0.1.70 | 2022-03-11 | [\#11051](https://github.com/airbytehq/airbyte/pull/11051) | Upgrade dbt to 1.0.0 (except for MySQL and Oracle) | -| 0.35.45-alpha | 0.1.69 | 2022-03-04 | [\#10754](https://github.com/airbytehq/airbyte/pull/10754) | Enable Clickhouse normalization over SSL | -| 0.35.32-alpha | 0.1.68 | 2022-02-20 | [\#10485](https://github.com/airbytehq/airbyte/pull/10485) | Fix row size too large for table with numerous `string` fields | -| | 0.1.66 | 2022-02-04 | [\#9341](https://github.com/airbytehq/airbyte/pull/9341) | Fix normalization for bigquery datasetId and tables | -| 0.35.13-alpha | 0.1.65 | 2021-01-28 | [\#9846](https://github.com/airbytehq/airbyte/pull/9846) | Tweak dbt multi-thread parameter down | -| 0.35.12-alpha | 0.1.64 | 2021-01-28 | [\#9793](https://github.com/airbytehq/airbyte/pull/9793) | Support PEM format for ssh-tunnel keys | -| 0.35.4-alpha | 0.1.63 | 2021-01-07 | [\#9301](https://github.com/airbytehq/airbyte/pull/9301) | Fix Snowflake prefix tables starting with numbers | -| | 0.1.62 | 2021-01-07 | [\#9340](https://github.com/airbytehq/airbyte/pull/9340) | Use TCP-port support for clickhouse | -| | 0.1.62 | 2021-01-07 | [\#9063](https://github.com/airbytehq/airbyte/pull/9063) | Change Snowflake-specific materialization settings | -| | 0.1.62 | 2021-01-07 | [\#9317](https://github.com/airbytehq/airbyte/pull/9317) | Fix issue with quoted & case sensitive columns | -| | 0.1.62 | 2021-01-07 | [\#9281](https://github.com/airbytehq/airbyte/pull/9281) | Fix SCD partition by float columns in BigQuery | -| 0.32.11-alpha | 0.1.61 | 2021-12-02 | [\#8394](https://github.com/airbytehq/airbyte/pull/8394) | Fix incremental queries not updating empty tables | -| | 0.1.61 | 2021-12-01 | [\#8378](https://github.com/airbytehq/airbyte/pull/8378) | Fix un-nesting queries and add proper ref hints | -| 0.32.5-alpha | 0.1.60 | 2021-11-22 | [\#8088](https://github.com/airbytehq/airbyte/pull/8088) | Speed-up incremental queries for SCD table on Snowflake | -| 0.30.32-alpha | 0.1.59 | 2021-11-08 | [\#7669](https://github.com/airbytehq/airbyte/pull/7169) | Fix nested incremental dbt | -| 0.30.24-alpha | 0.1.57 | 2021-10-26 | [\#7162](https://github.com/airbytehq/airbyte/pull/7162) | Implement incremental dbt updates | -| 0.30.16-alpha | 0.1.52 | 2021-10-07 | [\#6379](https://github.com/airbytehq/airbyte/pull/6379) | Handle empty string for date and date-time format | -| | 0.1.51 | 2021-10-08 | [\#6799](https://github.com/airbytehq/airbyte/pull/6799) | Added support for ad\_cdc\_log\_pos while normalization | -| | 0.1.50 | 2021-10-07 | [\#6079](https://github.com/airbytehq/airbyte/pull/6079) | Added support for MS SQL Server normalization | -| | 0.1.49 | 2021-10-06 | [\#6709](https://github.com/airbytehq/airbyte/pull/6709) | Forward destination dataset location to dbt profiles | -| 0.29.17-alpha | 0.1.47 | 2021-09-20 | [\#6317](https://github.com/airbytehq/airbyte/pull/6317) | MySQL: updated MySQL normalization with using SSH tunnel | -| | 0.1.45 | 2021-09-18 | [\#6052](https://github.com/airbytehq/airbyte/pull/6052) | Snowflake: accept any date-time format | -| 0.29.8-alpha | 0.1.40 | 2021-08-18 | [\#5433](https://github.com/airbytehq/airbyte/pull/5433) | Allow optional credentials\_json for BigQuery | -| 0.29.5-alpha | 0.1.39 | 2021-08-11 | [\#4557](https://github.com/airbytehq/airbyte/pull/4557) | Handle date times and solve conflict name btw stream/field | -| 0.28.2-alpha | 0.1.38 | 2021-07-28 | [\#5027](https://github.com/airbytehq/airbyte/pull/5027) | Handle quotes in column names when parsing JSON blob | -| 0.27.5-alpha | 0.1.37 | 2021-07-22 | [\#3947](https://github.com/airbytehq/airbyte/pull/4881/) | Handle `NULL` cursor field values when deduping | -| 0.27.2-alpha | 0.1.36 | 2021-07-09 | [\#3947](https://github.com/airbytehq/airbyte/pull/4163/) | Enable normalization for MySQL destination | +| | 0.4.2 | 2023-05-03 | [\#25771](https://github.com/airbytehq/airbyte/pull/25771) | Remove old VARCHAR to SUPER migration functionality for destination Redshift | +| | 0.4.1 | 2023-04-26 | [\#25591](https://github.com/airbytehq/airbyte/pull/25591) | Pin MarkupSafe library for Oracle normalization to fix build. | +| | 0.4.0 | 2023-03-23 | [\#22381](https://github.com/airbytehq/airbyte/pull/22381) | Prevent normalization from creating unnecessary duplicates in nested tables. | +| | 0.2.27 | 2023-03-15 | [\#24077](https://github.com/airbytehq/airbyte/pull/24077) | Add more bigquery reserved words | +| | 0.2.26 | 2023-02-15 | [\#19573](https://github.com/airbytehq/airbyte/pull/19573) | Update Clickhouse dbt version to 1.4.0 | +| | 0.3.2 (broken, do not use) | 2023-01-31 | [\#22165](https://github.com/airbytehq/airbyte/pull/22165) | Fix support for non-object top-level schemas | +| | 0.3.1 (broken, do not use) | 2023-01-31 | [\#22161](https://github.com/airbytehq/airbyte/pull/22161) | Fix handling for combined primitive types | +| | 0.3.0 (broken, do not use) | 2023-01-30 | [\#19721](https://github.com/airbytehq/airbyte/pull/19721) | Update normalization to airbyte-protocol v1.0.0 | +| | 0.2.25 | 2022-12-05 | [\#19573](https://github.com/airbytehq/airbyte/pull/19573) | Update Clickhouse dbt version | +| | 0.2.24 | 2022-11-01 | [\#18015](https://github.com/airbytehq/airbyte/pull/18015) | Add a drop table hook that drops \*\_scd tables after overwrite/reset | +| | 0.2.23 | 2022-10-12 | [\#17483](https://github.com/airbytehq/airbyte/pull/17483) (published in [\#17896](https://github.com/airbytehq/airbyte/pull/17896)) | Remove unnecessary `Native Port` config option | +| | 0.2.22 | 2022-09-05 | [\#16339](https://github.com/airbytehq/airbyte/pull/16339) | Update Clickhouse DBT to 1.1.8 | +| | 0.2.21 | 2022-09-09 | [\#15833](https://github.com/airbytehq/airbyte/pull/15833/) | SSH Tunnel: allow using OPENSSH key format (published in [\#16545](https://github.com/airbytehq/airbyte/pull/16545)) | +| | 0.2.20 | 2022-08-30 | [\#15592](https://github.com/airbytehq/airbyte/pull/15592) | Add TiDB support | +| | 0.2.19 | 2022-08-21 | [\#14897](https://github.com/airbytehq/airbyte/pull/14897) | Update Clickhouse DBT to 1.1.7 | +| | 0.2.16 | 2022-08-04 | [\#14295](https://github.com/airbytehq/airbyte/pull/14295) | Fixed SSH tunnel port usage | +| | 0.2.14 | 2022-08-01 | [\#14790](https://github.com/airbytehq/airbyte/pull/14790) | Add and persist job failures for Normalization | +| | 0.2.13 | 2022-07-27 | [\#14683](https://github.com/airbytehq/airbyte/pull/14683) | Quote schema name to allow reserved keywords | +| | 0.2.12 | 2022-07-26 | [\#14362](https://github.com/airbytehq/airbyte/pull/14362) | Handle timezone in date-time format. Parse date correct in clickhouse. | +| | 0.2.11 | 2022-07-26 | [\#13591](https://github.com/airbytehq/airbyte/pull/13591) | Updated support for integer columns. | +| | 0.2.10 | 2022-07-18 | [\#14792](https://github.com/airbytehq/airbyte/pull/14792) | Add support for key pair auth for snowflake | +| | 0.2.9 | 2022-07-06 | [\#14485](https://github.com/airbytehq/airbyte/pull/14485) | BigQuery partition pruning otimization | +| | 0.2.8 | 2022-07-13 | [\#14522](https://github.com/airbytehq/airbyte/pull/14522) | BigQuery replaces `NULL` array entries with the string value `"NULL"` | +| | 0.2.7 | 2022-07-05 | [\#11694](https://github.com/airbytehq/airbyte/pull/11694) | Do not return NULL for MySQL column values > 512 chars | +| | 0.2.6 | 2022-06-16 | [\#13894](https://github.com/airbytehq/airbyte/pull/13894) | Fix incorrect jinja2 macro `json_extract_array` call | +| | 0.2.5 | 2022-06-15 | [\#11470](https://github.com/airbytehq/airbyte/pull/11470) | Upgrade MySQL to dbt 1.0.0 | +| | 0.2.4 | 2022-06-14 | [\#12846](https://github.com/airbytehq/airbyte/pull/12846) | CDC correctly deletes propagates deletions to final tables | +| | 0.2.3 | 2022-06-10 | [\#11204](https://github.com/airbytehq/airbyte/pull/11204) | MySQL: add support for SSh tunneling | +| | 0.2.2 | 2022-06-02 | [\#13289](https://github.com/airbytehq/airbyte/pull/13289) | BigQuery use `json_extract_string_array` for array of simple type elements | +| | 0.2.1 | 2022-05-17 | [\#12924](https://github.com/airbytehq/airbyte/pull/12924) | Fixed checking --event-buffer-size on old dbt crashed entrypoint.sh | +| | 0.2.0 | 2022-05-15 | [\#12745](https://github.com/airbytehq/airbyte/pull/12745) | Snowflake: add datetime without timezone | +| | 0.1.78 | 2022-05-06 | [\#12305](https://github.com/airbytehq/airbyte/pull/12305) | Mssql: use NVARCHAR and datetime2 by default | +| 0.36.2-alpha | 0.1.77 | 2022-04-19 | [\#12064](https://github.com/airbytehq/airbyte/pull/12064) | Add support redshift SUPER type | +| 0.35.65-alpha | 0.1.75 | 2022-04-09 | [\#11511](https://github.com/airbytehq/airbyte/pull/11511) | Move DBT modules from `/tmp/dbt_modules` to `/dbt` | +| 0.35.61-alpha | 0.1.74 | 2022-03-24 | [\#10905](https://github.com/airbytehq/airbyte/pull/10905) | Update clickhouse dbt version | +| 0.35.60-alpha | 0.1.73 | 2022-03-25 | [\#11267](https://github.com/airbytehq/airbyte/pull/11267) | Set `--event-buffer-size` to reduce memory usage | +| 0.35.59-alpha | 0.1.72 | 2022-03-24 | [\#11093](https://github.com/airbytehq/airbyte/pull/11093) | Added Snowflake OAuth2.0 support | +| 0.35.53-alpha | 0.1.71 | 2022-03-14 | [\#11077](https://github.com/airbytehq/airbyte/pull/11077) | Enable BigQuery to handle project ID embedded inside dataset ID | +| 0.35.49-alpha | 0.1.70 | 2022-03-11 | [\#11051](https://github.com/airbytehq/airbyte/pull/11051) | Upgrade dbt to 1.0.0 (except for MySQL and Oracle) | +| 0.35.45-alpha | 0.1.69 | 2022-03-04 | [\#10754](https://github.com/airbytehq/airbyte/pull/10754) | Enable Clickhouse normalization over SSL | +| 0.35.32-alpha | 0.1.68 | 2022-02-20 | [\#10485](https://github.com/airbytehq/airbyte/pull/10485) | Fix row size too large for table with numerous `string` fields | +| | 0.1.66 | 2022-02-04 | [\#9341](https://github.com/airbytehq/airbyte/pull/9341) | Fix normalization for bigquery datasetId and tables | +| 0.35.13-alpha | 0.1.65 | 2021-01-28 | [\#9846](https://github.com/airbytehq/airbyte/pull/9846) | Tweak dbt multi-thread parameter down | +| 0.35.12-alpha | 0.1.64 | 2021-01-28 | [\#9793](https://github.com/airbytehq/airbyte/pull/9793) | Support PEM format for ssh-tunnel keys | +| 0.35.4-alpha | 0.1.63 | 2021-01-07 | [\#9301](https://github.com/airbytehq/airbyte/pull/9301) | Fix Snowflake prefix tables starting with numbers | +| | 0.1.62 | 2021-01-07 | [\#9340](https://github.com/airbytehq/airbyte/pull/9340) | Use TCP-port support for clickhouse | +| | 0.1.62 | 2021-01-07 | [\#9063](https://github.com/airbytehq/airbyte/pull/9063) | Change Snowflake-specific materialization settings | +| | 0.1.62 | 2021-01-07 | [\#9317](https://github.com/airbytehq/airbyte/pull/9317) | Fix issue with quoted & case sensitive columns | +| | 0.1.62 | 2021-01-07 | [\#9281](https://github.com/airbytehq/airbyte/pull/9281) | Fix SCD partition by float columns in BigQuery | +| 0.32.11-alpha | 0.1.61 | 2021-12-02 | [\#8394](https://github.com/airbytehq/airbyte/pull/8394) | Fix incremental queries not updating empty tables | +| | 0.1.61 | 2021-12-01 | [\#8378](https://github.com/airbytehq/airbyte/pull/8378) | Fix un-nesting queries and add proper ref hints | +| 0.32.5-alpha | 0.1.60 | 2021-11-22 | [\#8088](https://github.com/airbytehq/airbyte/pull/8088) | Speed-up incremental queries for SCD table on Snowflake | +| 0.30.32-alpha | 0.1.59 | 2021-11-08 | [\#7669](https://github.com/airbytehq/airbyte/pull/7169) | Fix nested incremental dbt | +| 0.30.24-alpha | 0.1.57 | 2021-10-26 | [\#7162](https://github.com/airbytehq/airbyte/pull/7162) | Implement incremental dbt updates | +| 0.30.16-alpha | 0.1.52 | 2021-10-07 | [\#6379](https://github.com/airbytehq/airbyte/pull/6379) | Handle empty string for date and date-time format | +| | 0.1.51 | 2021-10-08 | [\#6799](https://github.com/airbytehq/airbyte/pull/6799) | Added support for ad_cdc_log_pos while normalization | +| | 0.1.50 | 2021-10-07 | [\#6079](https://github.com/airbytehq/airbyte/pull/6079) | Added support for MS SQL Server normalization | +| | 0.1.49 | 2021-10-06 | [\#6709](https://github.com/airbytehq/airbyte/pull/6709) | Forward destination dataset location to dbt profiles | +| 0.29.17-alpha | 0.1.47 | 2021-09-20 | [\#6317](https://github.com/airbytehq/airbyte/pull/6317) | MySQL: updated MySQL normalization with using SSH tunnel | +| | 0.1.45 | 2021-09-18 | [\#6052](https://github.com/airbytehq/airbyte/pull/6052) | Snowflake: accept any date-time format | +| 0.29.8-alpha | 0.1.40 | 2021-08-18 | [\#5433](https://github.com/airbytehq/airbyte/pull/5433) | Allow optional credentials_json for BigQuery | +| 0.29.5-alpha | 0.1.39 | 2021-08-11 | [\#4557](https://github.com/airbytehq/airbyte/pull/4557) | Handle date times and solve conflict name btw stream/field | +| 0.28.2-alpha | 0.1.38 | 2021-07-28 | [\#5027](https://github.com/airbytehq/airbyte/pull/5027) | Handle quotes in column names when parsing JSON blob | +| 0.27.5-alpha | 0.1.37 | 2021-07-22 | [\#3947](https://github.com/airbytehq/airbyte/pull/4881/) | Handle `NULL` cursor field values when deduping | +| 0.27.2-alpha | 0.1.36 | 2021-07-09 | [\#3947](https://github.com/airbytehq/airbyte/pull/4163/) | Enable normalization for MySQL destination | diff --git a/docs/understanding-airbyte/connections/README.md b/docs/understanding-airbyte/connections/README.md index 401f139dbd7b..49a0756a43d9 100644 --- a/docs/understanding-airbyte/connections/README.md +++ b/docs/understanding-airbyte/connections/README.md @@ -2,11 +2,11 @@ A connection is a configuration for syncing data between a source and a destination. To setup a connection, a user must configure things such as: -* Sync schedule: when to trigger a sync of the data. -* Destination [Namespace](../namespaces.md) and stream names: where the data will end up being written. -* A catalog selection: which [streams and fields](../airbyte-protocol.md#catalog) to replicate from the source -* Sync mode: how streams should be replicated \(read and write\): -* Optional transformations: how to convert Airbyte protocol messages \(raw JSON blob\) data into some other data representations. +- Sync schedule: when to trigger a sync of the data. +- Destination [Namespace](../namespaces.md) and stream names: where the data will end up being written. +- A catalog selection: which [streams and fields](../airbyte-protocol.md#catalog) to replicate from the source +- Sync mode: how streams should be replicated \(read and write\): +- Optional transformations: how to convert Airbyte protocol messages \(raw JSON blob\) data into some other data representations. ## Sync schedules @@ -14,17 +14,17 @@ Sync schedules are explained below. For information about catalog selections, se Syncs will be triggered by either: -* A manual request \(i.e: clicking the "Sync Now" button in the UI\) -* A schedule +- A manual request \(i.e: clicking the "Sync Now" button in the UI\) +- A schedule When a scheduled connection is first created, a sync is executed as soon as possible. After that, a sync is run once the time since the last sync \(whether it was triggered manually or due to a schedule\) has exceeded the schedule interval. For example, consider the following illustrative scenario: -* **October 1st, 2pm**, a user sets up a connection to sync data every 24 hours. -* **October 1st, 2:01pm**: sync job runs -* **October 2nd, 2:01pm:** 24 hours have passed since the last sync, so a sync is triggered. -* **October 2nd, 5pm**: The user manually triggers a sync from the UI -* **October 3rd, 2:01pm:** since the last sync was less than 24 hours ago, no sync is run -* **October 3rd, 5:01pm:** It has been more than 24 hours since the last sync, so a sync is run +- **October 1st, 2pm**, a user sets up a connection to sync data every 24 hours. +- **October 1st, 2:01pm**: sync job runs +- **October 2nd, 2:01pm:** 24 hours have passed since the last sync, so a sync is triggered. +- **October 2nd, 5pm**: The user manually triggers a sync from the UI +- **October 3rd, 2:01pm:** since the last sync was less than 24 hours ago, no sync is run +- **October 3rd, 5:01pm:** It has been more than 24 hours since the last sync, so a sync is run ## Destination namespace @@ -46,8 +46,8 @@ A sync mode governs how Airbyte reads from a source and writes to a destination. 1. The first part of the name denotes how the source connector reads data from the source: 1. Incremental: Read records added to the source since the last sync job. \(The first sync using Incremental is equivalent to a Full Refresh\) - * Method 1: Using a cursor. Generally supported by all connectors whose data source allows extracting records incrementally. - * Method 2: Using change data capture. Only supported by some sources. See [CDC](../cdc.md) for more info. + - Method 1: Using a cursor. Generally supported by all connectors whose data source allows extracting records incrementally. + - Method 2: Using change data capture. Only supported by some sources. See [CDC](../cdc.md) for more info. 2. Full Refresh: Read everything in the source. 2. The second part of the sync mode name denotes how the destination connector writes data. This is not affected by how the source connector produced the data: 1. Overwrite: Overwrite by first deleting existing data in the destination. @@ -56,10 +56,10 @@ A sync mode governs how Airbyte reads from a source and writes to a destination. A sync mode is therefore, a combination of a source and destination mode together. The UI exposes the following options, whenever both source and destination connectors are capable to support it for the corresponding stream: -* [Full Refresh Overwrite](full-refresh-overwrite.md): Sync the whole stream and replace data in destination by overwriting it. -* [Full Refresh Append](full-refresh-append.md): Sync the whole stream and append data in destination. -* [Incremental Append](incremental-append.md): Sync new records from stream and append data in destination. -* [Incremental Deduped History](incremental-deduped-history.md): Sync new records from stream and append data in destination, also provides a de-duplicated view mirroring the state of the stream in the source. +- [Full Refresh Overwrite](full-refresh-overwrite.md): Sync the whole stream and replace data in destination by overwriting it. +- [Full Refresh Append](full-refresh-append.md): Sync the whole stream and append data in destination. +- [Incremental Append](incremental-append.md): Sync new records from stream and append data in destination. +- [Incremental Append + Deduped](incremental-append-deduped.md): Sync new records from stream and append data in destination, also provides a de-duplicated view mirroring the state of the stream in the source. ## Optional operations @@ -69,9 +69,9 @@ As described by the [Airbyte Protocol from the Airbyte Specifications](../airbyt On top of this replication, Airbyte provides the option to enable or disable an additional transformation step at the end of the sync called [basic normalization](../basic-normalization.md). This operation is: -* Only available for destinations that support dbt execution -* Automatically generates a pipeline or DAG of dbt transformation models to convert JSON blob objects into normalized tables -* Runs and applies these dbt models to the data written in the destination +- Only available for destinations that support dbt execution +- Automatically generates a pipeline or DAG of dbt transformation models to convert JSON blob objects into normalized tables +- Runs and applies these dbt models to the data written in the destination :::note @@ -82,4 +82,3 @@ Normalizing data may cause an increase in your destination's compute cost. This ### Custom sync operations Further operations can be included in a sync on top of Airbyte basic normalization \(or even to replace it completely\). See [operations](../operations.md) for more details. - diff --git a/docs/understanding-airbyte/connections/incremental-deduped-history.md b/docs/understanding-airbyte/connections/incremental-append-deduped.md similarity index 58% rename from docs/understanding-airbyte/connections/incremental-deduped-history.md rename to docs/understanding-airbyte/connections/incremental-append-deduped.md index be02105b8bfe..beff559938b0 100644 --- a/docs/understanding-airbyte/connections/incremental-deduped-history.md +++ b/docs/understanding-airbyte/connections/incremental-append-deduped.md @@ -1,21 +1,19 @@ -# Incremental Sync - Deduped History +# Incremental Sync - Append + Deduped ## High-Level Context This connector syncs data **incrementally**, which means that only new or modified data will be synced. In contrast with the [Incremental Append mode](./incremental-append.md), this mode updates rows that have been modified instead of adding a new version of the row with the updated data. Simply put, if you've synced a row before and it has since been updated, this mode will combine the two rows -in the destination and use the updated data. On the other hand, the [Incremental Append mode](./incremental-append.md) would just add a new row with the updated data. +in the destination and use the most recent data. On the other hand, the [Incremental Append mode](./incremental-append.md) would just add a new row with the updated data. ## Overview -Airbyte supports syncing data in **Incremental Deduped History** mode i.e: +Airbyte supports syncing data in **Incremental Append Deduped** mode i.e: 1. **Incremental** means syncing only replicate _new_ or _modified_ data. This prevents re-fetching data that you have already replicated from a source. If the sync is running for the first time, it is equivalent to a [Full Refresh](full-refresh-append.md) since all data will be considered as _new_. -2. **Deduped** means that data in the final table will be unique per primary key \(unlike [Append modes](incremental-append.md)\). This is determined by sorting the data using the cursor field and keeping only the latest de-duplicated data row. In dimensional data warehouse jargon defined by Ralph Kimball, this is referred as a Slowly Changing Dimension \(SCD\) table of type 1. -3. **History** means that an additional intermediate table is created in which data is being continuously appended to \(with duplicates exactly like [Append modes](incremental-append.md)\). With the use of primary key fields, it is identifying effective `start` and `end` dates of each row of a record. In dimensional data warehouse jargon, this is referred as a Slowly Changing Dimension \(SCD\) table of type 2. +2. **Append** means taht this incremental data is added to existing tables in your data warehouse. +3. **Deduped** means that data in the final table will be unique per primary key \(unlike [Append modes](incremental-append.md)\). This is determined by sorting the data using the cursor field and keeping only the latest de-duplicated data row. -In this flavor of incremental, records in the warehouse destination will never be deleted in the history tables \(named with a `_scd` suffix\), but might not exist in the final table. A copy of each new or updated record is _appended_ to the history data in the warehouse. Only the `end` date column is mutated when a new version of the same record is inserted to denote effective date ranges of a row. This means you can find multiple copies of the same record in the destination warehouse. We provide an "at least once" guarantee of replicating each record that is present when the sync runs. - -On the other hand, records in the final destination can potentially be deleted as they are de-duplicated. You should not find multiple copies of the same primary key as these should be unique in that table. +Records in the final destination can potentially be deleted as they are de-duplicated, and if your source supports emitting deleting records (e.g. an CDC database source). You should not find multiple copies of the same primary key as these should be unique in that table. ## Definitions @@ -42,17 +40,16 @@ Assume that `updated_at` is our `cursor_field` and `name` is the `primary_key`. In the next sync, the delta contains the following record: -| name | deceased | updated_at | -| :--------- | :------- | :--------- | -| Louis XVII | false | 1785 | +| name | deceased | updated_at | +| :-------- | :------- | :--------- | +| Louis XVI | false | 1785 | At the end of this incremental sync, the data warehouse would now contain: | name | deceased | updated_at | | :--------------- | :------- | :--------- | -| Louis XVI | false | 1754 | | Marie Antoinette | false | 1755 | -| Louis XVII | false | 1785 | +| Louis XVI | false | 1785 | ### Updating a Record @@ -63,24 +60,11 @@ Let's assume that our warehouse contains all the data that it did at the end of | Louis XVI | true | 1793 | | Marie Antoinette | true | 1793 | -The output we expect to see in the warehouse is as follows: - -In the history table: - -| name | deceased | updated_at | start_at | end_at | -| :--------------- | :------- | :--------- | :------- | :----- | -| Louis XVI | false | 1754 | 1754 | 1793 | -| Louis XVI | true | 1793 | 1793 | NULL | -| Louis XVII | false | 1785 | 1785 | NULL | -| Marie Antoinette | false | 1755 | 1755 | 1793 | -| Marie Antoinette | true | 1793 | 1793 | NULL | - In the final de-duplicated table: | name | deceased | updated_at | | :--------------- | :------- | :--------- | | Louis XVI | true | 1793 | -| Louis XVII | false | 1785 | | Marie Antoinette | true | 1793 | ## Source-Defined Cursor @@ -125,37 +109,13 @@ Due to the use of a cursor column, if modifications to the underlying records ar select * from table where cursor_field > 'last_sync_max_cursor_field_value' ``` -Let's say the following data already exists into our data warehouse. - -| name | deceased | updated_at | -| :--------------- | :------- | :--------- | -| Louis XVI | false | 1754 | -| Marie Antoinette | false | 1755 | - -At the start of the next sync, the source data contains the following new record: - -| name | deceased | updated_at | -| :-------- | :------- | :--------- | -| Louis XVI | true | 1754 | - -At the end of the second incremental sync, the data warehouse would still contain data from the first sync because the delta record did not provide a valid value for the cursor field \(the cursor field is not greater than last sync's max value, `1754 < 1755`\), so it is not emitted by the source as a new or modified record. - -| name | deceased | updated_at | -| :--------------- | :------- | :--------- | -| Louis XVI | false | 1754 | -| Marie Antoinette | false | 1755 | - -Similarly, if multiple modifications are made during the same day to the same records. If the frequency of the sync is not granular enough \(for example, set for every 24h\), then intermediate modifications to the data are not going to be detected and emitted. Only the state of data at the time the sync runs will be reflected in the destination. - -Those concerns could be solved by using a different incremental approach based on binary logs, Write-Ahead-Logs \(WAL\), or also called [Change Data Capture \(CDC\)](../cdc.md). - -The current behavior of **Incremental** is not able to handle source schema changes yet, for example, when a column is added, renamed or deleted from an existing table etc. It is recommended to trigger a [Full refresh - Overwrite](full-refresh-overwrite.md) to correctly replicate the data to the destination with the new schema changes. - -Additionally, this sync mode is only supported for destinations where dbt/normalization is possible for the moment. The de-duplicating logic is indeed implemented as dbt models as part of a sequence of transformations applied after the Extract and Load activities \(thus, an ELT approach\). Nevertheless, it is theoretically possible that destinations can handle directly this logic \(maybe in the future\) before actually writing records to the destination \(as in traditional ETL manner\), but that's not the way it is implemented at this time. - -If you are not satisfied with how transformations are applied on top of the appended data, you can find more relevant SQL transformations you might need to do on your data in the [Connecting EL with T using SQL \(part 1/2\)](../../operator-guides/transformation-and-normalization/transformations-with-sql.md) - ## Related information - [An overview of Airbyte’s replication modes](https://airbyte.com/blog/understanding-data-replication-modes). - [Explore Airbyte’s incremental data synchronization](https://airbyte.com/tutorials/incremental-data-synchronization). + +--- + +**Note**: + +Previous versions of Airbyte destinations supported SCD tables, which would sore every entry seen for a record. This was removed with Destinations V2 and [Typing and Deduplication](/understanding-airbyte/typing-deduping.md). diff --git a/docs/understanding-airbyte/connections/incremental-append.md b/docs/understanding-airbyte/connections/incremental-append.md index a779d0b1d13c..c380d2226912 100644 --- a/docs/understanding-airbyte/connections/incremental-append.md +++ b/docs/understanding-airbyte/connections/incremental-append.md @@ -80,7 +80,7 @@ Some sources cannot define the cursor without user input. For example, in the [p As demonstrated in the examples above, with **Incremental Append,** a record which was updated in the source will be appended to the destination rather than updated in-place. This means that if data in the source uses a primary key \(e.g: `user_id` in the `users` table\), then the destination will end up having multiple records with the same primary key value. -However, some use cases require only the latest snapshot of the data. This is available by using other flavors of sync modes such as [Incremental - Deduped History](incremental-deduped-history.md) instead. +However, some use cases require only the latest snapshot of the data. This is available by using other flavors of sync modes such as [Incremental - Append + Deduped](incremental-append-deduped.md) instead. Note that in **Incremental Append**, the size of the data in your warehouse increases monotonically since an updated record in the source is appended to the destination rather than updated in-place. diff --git a/docusaurus/sidebars.js b/docusaurus/sidebars.js index 5bc638c52eed..a83274e98ebc 100644 --- a/docusaurus/sidebars.js +++ b/docusaurus/sidebars.js @@ -1,7 +1,7 @@ -const fs = require('fs'); -const path = require('path'); +const fs = require("fs"); +const path = require("path"); -const connectorsDocsRoot = '../docs/integrations'; +const connectorsDocsRoot = "../docs/integrations"; const sourcesDocs = `${connectorsDocsRoot}/sources`; const destinationDocs = `${connectorsDocsRoot}/destinations`; @@ -10,231 +10,231 @@ function getFilenamesInDir(prefix, dir, excludes) { .readdirSync(dir) .filter( (fileName) => - !(fileName.endsWith('.inapp.md') || fileName.endsWith('-migrations.md')) + !(fileName.endsWith(".inapp.md") || fileName.endsWith("-migrations.md")) ) - .map((fileName) => fileName.replace('.md', '')) + .map((fileName) => fileName.replace(".md", "")) .filter((fileName) => excludes.indexOf(fileName.toLowerCase()) === -1) .map((filename) => { - return { type: 'doc', id: path.join(prefix, filename) }; + return { type: "doc", id: path.join(prefix, filename) }; }); } function getSourceConnectors() { - return getFilenamesInDir('integrations/sources/', sourcesDocs, ['readme']); + return getFilenamesInDir("integrations/sources/", sourcesDocs, ["readme"]); } function getDestinationConnectors() { - return getFilenamesInDir('integrations/destinations/', destinationDocs, [ - 'readme', + return getFilenamesInDir("integrations/destinations/", destinationDocs, [ + "readme", ]); } const sectionHeader = (title) => ({ - type: 'html', + type: "html", value: title, - className: 'navbar__category', + className: "navbar__category", }); const buildAConnector = { - type: 'category', - label: 'Build a Connector', + type: "category", + label: "Build a Connector", items: [ { - type: 'doc', - label: 'Overview', - id: 'connector-development/README', + type: "doc", + label: "Overview", + id: "connector-development/README", }, { - type: 'category', - label: 'Connector Builder', + type: "category", + label: "Connector Builder", items: [ - 'connector-development/connector-builder-ui/overview', - 'connector-development/connector-builder-ui/connector-builder-compatibility', - 'connector-development/connector-builder-ui/tutorial', + "connector-development/connector-builder-ui/overview", + "connector-development/connector-builder-ui/connector-builder-compatibility", + "connector-development/connector-builder-ui/tutorial", { - type: 'category', - label: 'Concepts', + type: "category", + label: "Concepts", items: [ - 'connector-development/connector-builder-ui/authentication', - 'connector-development/connector-builder-ui/record-processing', - 'connector-development/connector-builder-ui/pagination', - 'connector-development/connector-builder-ui/incremental-sync', - 'connector-development/connector-builder-ui/partitioning', - 'connector-development/connector-builder-ui/error-handling', + "connector-development/connector-builder-ui/authentication", + "connector-development/connector-builder-ui/record-processing", + "connector-development/connector-builder-ui/pagination", + "connector-development/connector-builder-ui/incremental-sync", + "connector-development/connector-builder-ui/partitioning", + "connector-development/connector-builder-ui/error-handling", ], }, ], }, { - type: 'category', - label: 'Low-code connector development', + type: "category", + label: "Low-code connector development", items: [ { - label: 'Low-code CDK Intro', - type: 'doc', - id: 'connector-development/config-based/low-code-cdk-overview', + label: "Low-code CDK Intro", + type: "doc", + id: "connector-development/config-based/low-code-cdk-overview", }, { - type: 'category', - label: 'Tutorial', + type: "category", + label: "Tutorial", items: [ - 'connector-development/config-based/tutorial/getting-started', - 'connector-development/config-based/tutorial/create-source', - 'connector-development/config-based/tutorial/install-dependencies', - 'connector-development/config-based/tutorial/connecting-to-the-API-source', - 'connector-development/config-based/tutorial/reading-data', - 'connector-development/config-based/tutorial/incremental-reads', - 'connector-development/config-based/tutorial/testing', + "connector-development/config-based/tutorial/getting-started", + "connector-development/config-based/tutorial/create-source", + "connector-development/config-based/tutorial/install-dependencies", + "connector-development/config-based/tutorial/connecting-to-the-API-source", + "connector-development/config-based/tutorial/reading-data", + "connector-development/config-based/tutorial/incremental-reads", + "connector-development/config-based/tutorial/testing", ], }, { - type: 'category', - label: 'Understanding the YAML file', + type: "category", + label: "Understanding the YAML file", link: { - type: 'doc', - id: 'connector-development/config-based/understanding-the-yaml-file/yaml-overview', + type: "doc", + id: "connector-development/config-based/understanding-the-yaml-file/yaml-overview", }, items: [ { type: `category`, label: `Requester`, link: { - type: 'doc', - id: 'connector-development/config-based/understanding-the-yaml-file/requester', + type: "doc", + id: "connector-development/config-based/understanding-the-yaml-file/requester", }, items: [ - 'connector-development/config-based/understanding-the-yaml-file/request-options', - 'connector-development/config-based/understanding-the-yaml-file/authentication', - 'connector-development/config-based/understanding-the-yaml-file/error-handling', + "connector-development/config-based/understanding-the-yaml-file/request-options", + "connector-development/config-based/understanding-the-yaml-file/authentication", + "connector-development/config-based/understanding-the-yaml-file/error-handling", ], }, - 'connector-development/config-based/understanding-the-yaml-file/incremental-syncs', - 'connector-development/config-based/understanding-the-yaml-file/pagination', - 'connector-development/config-based/understanding-the-yaml-file/partition-router', - 'connector-development/config-based/understanding-the-yaml-file/record-selector', - 'connector-development/config-based/understanding-the-yaml-file/reference', + "connector-development/config-based/understanding-the-yaml-file/incremental-syncs", + "connector-development/config-based/understanding-the-yaml-file/pagination", + "connector-development/config-based/understanding-the-yaml-file/partition-router", + "connector-development/config-based/understanding-the-yaml-file/record-selector", + "connector-development/config-based/understanding-the-yaml-file/reference", ], }, - 'connector-development/config-based/advanced-topics', + "connector-development/config-based/advanced-topics", ], }, { - type: 'category', - label: 'Connector Development Kit', + type: "category", + label: "Connector Development Kit", link: { - type: 'doc', - id: 'connector-development/cdk-python/README', + type: "doc", + id: "connector-development/cdk-python/README", }, items: [ - 'connector-development/cdk-python/basic-concepts', - 'connector-development/cdk-python/schemas', - 'connector-development/cdk-python/full-refresh-stream', - 'connector-development/cdk-python/incremental-stream', - 'connector-development/cdk-python/http-streams', - 'connector-development/cdk-python/python-concepts', - 'connector-development/cdk-python/stream-slices', + "connector-development/cdk-python/basic-concepts", + "connector-development/cdk-python/schemas", + "connector-development/cdk-python/full-refresh-stream", + "connector-development/cdk-python/incremental-stream", + "connector-development/cdk-python/http-streams", + "connector-development/cdk-python/python-concepts", + "connector-development/cdk-python/stream-slices", ], }, { - type: 'category', - label: 'Testing Connectors', + type: "category", + label: "Testing Connectors", link: { - type: 'doc', - id: 'connector-development/testing-connectors/README', + type: "doc", + id: "connector-development/testing-connectors/README", }, items: [ - 'connector-development/testing-connectors/connector-acceptance-tests-reference', - 'connector-development/testing-connectors/testing-a-local-catalog-in-development', + "connector-development/testing-connectors/connector-acceptance-tests-reference", + "connector-development/testing-connectors/testing-a-local-catalog-in-development", ], }, { - type: 'category', - label: 'Tutorials', + type: "category", + label: "Tutorials", items: [ - 'connector-development/tutorials/cdk-speedrun', + "connector-development/tutorials/cdk-speedrun", { - type: 'category', - label: 'Python CDK: Creating a HTTP API Source', + type: "category", + label: "Python CDK: Creating a HTTP API Source", items: [ - 'connector-development/tutorials/cdk-tutorial-python-http/getting-started', - 'connector-development/tutorials/cdk-tutorial-python-http/creating-the-source', - 'connector-development/tutorials/cdk-tutorial-python-http/install-dependencies', - 'connector-development/tutorials/cdk-tutorial-python-http/define-inputs', - 'connector-development/tutorials/cdk-tutorial-python-http/connection-checking', - 'connector-development/tutorials/cdk-tutorial-python-http/declare-schema', - 'connector-development/tutorials/cdk-tutorial-python-http/read-data', - 'connector-development/tutorials/cdk-tutorial-python-http/use-connector-in-airbyte', - 'connector-development/tutorials/cdk-tutorial-python-http/test-your-connector', + "connector-development/tutorials/cdk-tutorial-python-http/getting-started", + "connector-development/tutorials/cdk-tutorial-python-http/creating-the-source", + "connector-development/tutorials/cdk-tutorial-python-http/install-dependencies", + "connector-development/tutorials/cdk-tutorial-python-http/define-inputs", + "connector-development/tutorials/cdk-tutorial-python-http/connection-checking", + "connector-development/tutorials/cdk-tutorial-python-http/declare-schema", + "connector-development/tutorials/cdk-tutorial-python-http/read-data", + "connector-development/tutorials/cdk-tutorial-python-http/use-connector-in-airbyte", + "connector-development/tutorials/cdk-tutorial-python-http/test-your-connector", ], }, - 'connector-development/tutorials/building-a-python-source', - 'connector-development/tutorials/building-a-python-destination', - 'connector-development/tutorials/building-a-java-destination', - 'connector-development/tutorials/profile-java-connector-memory', + "connector-development/tutorials/building-a-python-source", + "connector-development/tutorials/building-a-python-destination", + "connector-development/tutorials/building-a-java-destination", + "connector-development/tutorials/profile-java-connector-memory", ], }, - 'connector-development/connector-specification-reference', - 'connector-development/schema-reference', - 'connector-development/connector-metadata-file', - 'connector-development/best-practices', - 'connector-development/ux-handbook', + "connector-development/connector-specification-reference", + "connector-development/schema-reference", + "connector-development/connector-metadata-file", + "connector-development/best-practices", + "connector-development/ux-handbook", ], }; const connectorCatalog = { - type: 'category', - label: 'Connector Catalog', + type: "category", + label: "Connector Catalog", link: { - type: 'doc', - id: 'integrations/README', + type: "doc", + id: "integrations/README", }, items: [ { - type: 'category', - label: 'Sources', + type: "category", + label: "Sources", link: { - type: 'generated-index', + type: "generated-index", }, items: getSourceConnectors(), }, { - type: 'category', - label: 'Destinations', + type: "category", + label: "Destinations", link: { - type: 'generated-index', + type: "generated-index", }, items: getDestinationConnectors(), }, { - type: 'doc', - id: 'integrations/custom-connectors', + type: "doc", + id: "integrations/custom-connectors", }, ], }; const contributeToAirbyte = { - type: 'category', - label: 'Contribute to Airbyte', + type: "category", + label: "Contribute to Airbyte", link: { - type: 'doc', - id: 'contributing-to-airbyte/README', + type: "doc", + id: "contributing-to-airbyte/README", }, items: [ - 'contributing-to-airbyte/issues-and-requests', - 'contributing-to-airbyte/change-cdk-connector', - 'contributing-to-airbyte/submit-new-connector', - 'contributing-to-airbyte/writing-docs', + "contributing-to-airbyte/issues-and-requests", + "contributing-to-airbyte/change-cdk-connector", + "contributing-to-airbyte/submit-new-connector", + "contributing-to-airbyte/writing-docs", { - type: 'category', - label: 'Resources', + type: "category", + label: "Resources", items: [ - 'contributing-to-airbyte/resources/pull-requests-handbook', - 'contributing-to-airbyte/resources/code-style', - 'contributing-to-airbyte/resources/developing-locally', - 'contributing-to-airbyte/resources/developing-on-docker', - 'contributing-to-airbyte/resources/gradle', - 'contributing-to-airbyte/resources/python-gradle-setup', + "contributing-to-airbyte/resources/pull-requests-handbook", + "contributing-to-airbyte/resources/code-style", + "contributing-to-airbyte/resources/developing-locally", + "contributing-to-airbyte/resources/developing-on-docker", + "contributing-to-airbyte/resources/gradle", + "contributing-to-airbyte/resources/python-gradle-setup", ], }, ], @@ -242,259 +242,259 @@ const contributeToAirbyte = { const airbyteCloud = [ { - type: 'doc', - label: 'Getting Started', - id: 'cloud/getting-started-with-airbyte-cloud', + type: "doc", + label: "Getting Started", + id: "cloud/getting-started-with-airbyte-cloud", }, - 'cloud/core-concepts', + "cloud/core-concepts", { - type: 'category', - label: 'Using Airbyte Cloud', + type: "category", + label: "Using Airbyte Cloud", link: { - type: 'generated-index', + type: "generated-index", }, items: [ - 'cloud/managing-airbyte-cloud/edit-stream-configuration', - 'cloud/managing-airbyte-cloud/manage-schema-changes', - 'cloud/managing-airbyte-cloud/manage-data-residency', - 'cloud/managing-airbyte-cloud/manage-credits', - 'cloud/managing-airbyte-cloud/review-sync-summary', - 'cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications', - 'cloud/managing-airbyte-cloud/dbt-cloud-integration', - 'cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace', - 'cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits', - 'cloud/managing-airbyte-cloud/review-connection-state', + "cloud/managing-airbyte-cloud/edit-stream-configuration", + "cloud/managing-airbyte-cloud/manage-schema-changes", + "cloud/managing-airbyte-cloud/manage-data-residency", + "cloud/managing-airbyte-cloud/manage-credits", + "cloud/managing-airbyte-cloud/review-sync-summary", + "cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications", + "cloud/managing-airbyte-cloud/dbt-cloud-integration", + "cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace", + "cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits", + "cloud/managing-airbyte-cloud/review-connection-state", ], }, ]; const ossGettingStarted = { - type: 'category', - label: 'Getting Started', + type: "category", + label: "Getting Started", link: { - type: 'generated-index', + type: "generated-index", }, items: [ - 'quickstart/deploy-airbyte', - 'quickstart/add-a-source', - 'quickstart/add-a-destination', - 'quickstart/set-up-a-connection', + "quickstart/deploy-airbyte", + "quickstart/add-a-source", + "quickstart/add-a-destination", + "quickstart/set-up-a-connection", ], }; const deployAirbyte = { - type: 'category', - label: 'Deploy Airbyte', + type: "category", + label: "Deploy Airbyte", link: { - type: 'generated-index', + type: "generated-index", }, items: [ { - type: 'doc', - label: 'On your local machine', - id: 'deploying-airbyte/local-deployment', + type: "doc", + label: "On your local machine", + id: "deploying-airbyte/local-deployment", }, { - type: 'doc', - label: 'On AWS EC2', - id: 'deploying-airbyte/on-aws-ec2', + type: "doc", + label: "On AWS EC2", + id: "deploying-airbyte/on-aws-ec2", }, { - type: 'doc', - label: 'On Azure', - id: 'deploying-airbyte/on-azure-vm-cloud-shell', + type: "doc", + label: "On Azure", + id: "deploying-airbyte/on-azure-vm-cloud-shell", }, { - type: 'doc', - label: 'On Google (GCP)', - id: 'deploying-airbyte/on-gcp-compute-engine', + type: "doc", + label: "On Google (GCP)", + id: "deploying-airbyte/on-gcp-compute-engine", }, { - type: 'doc', - label: 'On Kubernetes using Kustomize', - id: 'deploying-airbyte/on-kubernetes', + type: "doc", + label: "On Kubernetes using Kustomize", + id: "deploying-airbyte/on-kubernetes", }, { - type: 'doc', - label: 'On Kubernetes using Helm', - id: 'deploying-airbyte/on-kubernetes-via-helm', + type: "doc", + label: "On Kubernetes using Helm", + id: "deploying-airbyte/on-kubernetes-via-helm", }, { - type: 'doc', - label: 'On Restack', - id: 'deploying-airbyte/on-restack', + type: "doc", + label: "On Restack", + id: "deploying-airbyte/on-restack", }, { - type: 'doc', - label: 'On Plural', - id: 'deploying-airbyte/on-plural', + type: "doc", + label: "On Plural", + id: "deploying-airbyte/on-plural", }, { - type: 'doc', - label: 'On Oracle Cloud', - id: 'deploying-airbyte/on-oci-vm', + type: "doc", + label: "On Oracle Cloud", + id: "deploying-airbyte/on-oci-vm", }, { - type: 'doc', - label: 'On DigitalOcean', - id: 'deploying-airbyte/on-digitalocean-droplet', + type: "doc", + label: "On DigitalOcean", + id: "deploying-airbyte/on-digitalocean-droplet", }, ], }; const operatorGuide = { - type: 'category', - label: 'Manage Airbyte', + type: "category", + label: "Manage Airbyte", link: { - type: 'generated-index', + type: "generated-index", }, items: [ - 'operator-guides/upgrading-airbyte', - 'operator-guides/reset', - 'operator-guides/configuring-airbyte-db', - 'operator-guides/configuring-connector-resources', - 'operator-guides/browsing-output-logs', - 'operator-guides/using-the-airflow-airbyte-operator', - 'operator-guides/using-prefect-task', - 'operator-guides/using-dagster-integration', - 'operator-guides/using-kestra-plugin', - 'operator-guides/locating-files-local-destination', - 'operator-guides/collecting-metrics', + "operator-guides/upgrading-airbyte", + "operator-guides/reset", + "operator-guides/configuring-airbyte-db", + "operator-guides/configuring-connector-resources", + "operator-guides/browsing-output-logs", + "operator-guides/using-the-airflow-airbyte-operator", + "operator-guides/using-prefect-task", + "operator-guides/using-dagster-integration", + "operator-guides/using-kestra-plugin", + "operator-guides/locating-files-local-destination", + "operator-guides/collecting-metrics", { - type: 'category', - label: 'Transformations and Normalization', + type: "category", + label: "Transformations and Normalization", items: [ - 'operator-guides/transformation-and-normalization/transformations-with-sql', - 'operator-guides/transformation-and-normalization/transformations-with-dbt', - 'operator-guides/transformation-and-normalization/transformations-with-airbyte', + "operator-guides/transformation-and-normalization/transformations-with-sql", + "operator-guides/transformation-and-normalization/transformations-with-dbt", + "operator-guides/transformation-and-normalization/transformations-with-airbyte", ], }, - 'operator-guides/configuring-airbyte', - 'operator-guides/using-custom-connectors', - 'operator-guides/scaling-airbyte', - 'operator-guides/configuring-sync-notifications', + "operator-guides/configuring-airbyte", + "operator-guides/using-custom-connectors", + "operator-guides/scaling-airbyte", + "operator-guides/configuring-sync-notifications", ], }; const understandingAirbyte = { - type: 'category', - label: 'Understand Airbyte', + type: "category", + label: "Understand Airbyte", items: [ - 'understanding-airbyte/beginners-guide-to-catalog', - 'understanding-airbyte/airbyte-protocol', - 'understanding-airbyte/airbyte-protocol-docker', - 'understanding-airbyte/basic-normalization', - 'understanding-airbyte/typing-deduping', + "understanding-airbyte/beginners-guide-to-catalog", + "understanding-airbyte/airbyte-protocol", + "understanding-airbyte/airbyte-protocol-docker", + "understanding-airbyte/basic-normalization", + "understanding-airbyte/typing-deduping", { - type: 'category', - label: 'Connections and Sync Modes', + type: "category", + label: "Connections and Sync Modes", items: [ { - type: 'doc', - label: 'Connections Overview', - id: 'understanding-airbyte/connections/README', + type: "doc", + label: "Connections Overview", + id: "understanding-airbyte/connections/README", }, - 'understanding-airbyte/connections/full-refresh-overwrite', - 'understanding-airbyte/connections/full-refresh-append', - 'understanding-airbyte/connections/incremental-append', - 'understanding-airbyte/connections/incremental-deduped-history', + "understanding-airbyte/connections/full-refresh-overwrite", + "understanding-airbyte/connections/full-refresh-append", + "understanding-airbyte/connections/incremental-append", + "understanding-airbyte/connections/incremental-append-deduped", ], }, - 'understanding-airbyte/operations', - 'understanding-airbyte/high-level-view', - 'understanding-airbyte/jobs', - 'understanding-airbyte/tech-stack', - 'understanding-airbyte/cdc', - 'understanding-airbyte/namespaces', - 'understanding-airbyte/supported-data-types', - 'understanding-airbyte/json-avro-conversion', - 'understanding-airbyte/database-data-catalog', + "understanding-airbyte/operations", + "understanding-airbyte/high-level-view", + "understanding-airbyte/jobs", + "understanding-airbyte/tech-stack", + "understanding-airbyte/cdc", + "understanding-airbyte/namespaces", + "understanding-airbyte/supported-data-types", + "understanding-airbyte/json-avro-conversion", + "understanding-airbyte/database-data-catalog", ], }; const security = { - type: 'doc', - id: 'operator-guides/security', + type: "doc", + id: "operator-guides/security", }; const support = { - type: 'doc', - id: 'operator-guides/contact-support', + type: "doc", + id: "operator-guides/contact-support", }; module.exports = { mySidebar: [ { - type: 'doc', - label: 'Start here', - id: 'readme', + type: "doc", + label: "Start here", + id: "readme", }, - sectionHeader('Airbyte Connectors'), + sectionHeader("Airbyte Connectors"), connectorCatalog, buildAConnector, - sectionHeader('Airbyte Cloud'), + sectionHeader("Airbyte Cloud"), ...airbyteCloud, - sectionHeader('Airbyte Open Source (OSS)'), + sectionHeader("Airbyte Open Source (OSS)"), ossGettingStarted, deployAirbyte, operatorGuide, { - type: 'doc', - id: 'troubleshooting', + type: "doc", + id: "troubleshooting", }, - sectionHeader('Developer Guides'), + sectionHeader("Developer Guides"), { - type: 'doc', - id: 'api-documentation', + type: "doc", + id: "api-documentation", }, { - type: 'doc', - id: 'cli-documentation', + type: "doc", + id: "cli-documentation", }, understandingAirbyte, contributeToAirbyte, - sectionHeader('Resources'), + sectionHeader("Resources"), support, security, { - type: 'category', - label: 'Project Overview', + type: "category", + label: "Project Overview", items: [ { - type: 'link', - label: 'Roadmap', - href: 'https://go.airbyte.com/roadmap', + type: "link", + label: "Roadmap", + href: "https://go.airbyte.com/roadmap", }, - 'project-overview/product-release-stages', - 'project-overview/slack-code-of-conduct', - 'project-overview/code-of-conduct', + "project-overview/product-release-stages", + "project-overview/slack-code-of-conduct", + "project-overview/code-of-conduct", { - type: 'link', - label: 'Airbyte Repository', - href: 'https://github.com/airbytehq/airbyte', + type: "link", + label: "Airbyte Repository", + href: "https://github.com/airbytehq/airbyte", }, { - type: 'category', - label: 'Licenses', + type: "category", + label: "Licenses", link: { - type: 'doc', - id: 'project-overview/licenses/README', + type: "doc", + id: "project-overview/licenses/README", }, items: [ - 'project-overview/licenses/license-faq', - 'project-overview/licenses/elv2-license', - 'project-overview/licenses/mit-license', - 'project-overview/licenses/examples', + "project-overview/licenses/license-faq", + "project-overview/licenses/elv2-license", + "project-overview/licenses/mit-license", + "project-overview/licenses/examples", ], }, ], }, { - type: 'category', - label: 'Release Notes', + type: "category", + label: "Release Notes", link: { - type: 'generated-index', + type: "generated-index", }, items: [ /* @@ -510,18 +510,18 @@ module.exports = { ], }, */ - 'release_notes/june_2023', - 'release_notes/may_2023', - 'release_notes/april_2023', - 'release_notes/march_2023', - 'release_notes/february_2023', - 'release_notes/january_2023', - 'release_notes/december_2022', - 'release_notes/november_2022', - 'release_notes/october_2022', - 'release_notes/september_2022', - 'release_notes/august_2022', - 'release_notes/july_2022', + "release_notes/june_2023", + "release_notes/may_2023", + "release_notes/april_2023", + "release_notes/march_2023", + "release_notes/february_2023", + "release_notes/january_2023", + "release_notes/december_2022", + "release_notes/november_2022", + "release_notes/october_2022", + "release_notes/september_2022", + "release_notes/august_2022", + "release_notes/july_2022", ], }, ], From 2ddd33a09a5eab11c74610b00a2cfa1384916b67 Mon Sep 17 00:00:00 2001 From: evantahler Date: Tue, 8 Aug 2023 09:30:18 -0700 Subject: [PATCH 2/2] fix links --- .../connector-builder-ui/incremental-sync.md | 4 ++-- .../connector-builder-ui/record-processing.mdx | 2 +- docs/integrations/destinations/cumulio.md | 12 ++++++------ docs/integrations/destinations/snowflake.md | 2 +- docs/integrations/sources/alloydb.md | 2 +- docs/integrations/sources/apple-search-ads.md | 2 +- docs/integrations/sources/bing-ads.md | 2 +- docs/integrations/sources/delighted.md | 2 +- docs/integrations/sources/facebook-marketing.md | 2 +- docs/integrations/sources/freshdesk.md | 2 +- docs/integrations/sources/gitlab.md | 2 +- docs/integrations/sources/google-ads.md | 2 +- .../sources/google-analytics-data-api.md | 2 +- docs/integrations/sources/google-analytics-v4.md | 2 +- docs/integrations/sources/google-search-console.md | 2 +- docs/integrations/sources/greenhouse.md | 2 +- docs/integrations/sources/harvest.md | 2 +- docs/integrations/sources/instagram.md | 2 +- docs/integrations/sources/iterable.md | 2 +- docs/integrations/sources/jira.md | 2 +- docs/integrations/sources/klaviyo.md | 2 +- docs/integrations/sources/mixpanel.md | 2 +- docs/integrations/sources/notion.md | 2 +- docs/integrations/sources/onesignal.md | 2 +- docs/integrations/sources/pinterest.md | 2 +- docs/integrations/sources/postgres.md | 2 +- docs/integrations/sources/prestashop.md | 2 +- docs/integrations/sources/quickbooks.md | 2 +- docs/integrations/sources/redshift.md | 4 ++-- docs/integrations/sources/salesforce.md | 4 ++-- docs/integrations/sources/salesloft.md | 2 +- docs/integrations/sources/sentry.md | 2 +- docs/integrations/sources/square.md | 2 +- docs/integrations/sources/strava.md | 2 +- docs/integrations/sources/surveycto.md | 2 +- docs/integrations/sources/tempo.md | 2 +- docs/integrations/sources/woocommerce.md | 2 +- docs/integrations/sources/yandex-metrica.md | 2 +- docs/integrations/sources/zendesk-chat.md | 2 +- 39 files changed, 47 insertions(+), 47 deletions(-) diff --git a/docs/connector-development/connector-builder-ui/incremental-sync.md b/docs/connector-development/connector-builder-ui/incremental-sync.md index 83252fe9248c..c6f780fe12cc 100644 --- a/docs/connector-development/connector-builder-ui/incremental-sync.md +++ b/docs/connector-development/connector-builder-ui/incremental-sync.md @@ -13,7 +13,7 @@ To use incremental syncs, the API endpoint needs to fullfil the following requir - It's possible to filter/request records by the cursor field - The records are sorted in ascending order based on their cursor field -The knowledge of a cursor value also allows the Airbyte system to automatically keep a history of changes to records in the destination. To learn more about how different modes of incremental syncs, check out the [Incremental Sync - Append](/understanding-airbyte/connections/incremental-append/) and [Incremental Sync - Append + Deduped](/understanding-airbyte/connections/incremental-apped-deduped) pages. +The knowledge of a cursor value also allows the Airbyte system to automatically keep a history of changes to records in the destination. To learn more about how different modes of incremental syncs, check out the [Incremental Sync - Append](/understanding-airbyte/connections/incremental-append/) and [Incremental Sync - Append + Deduped](/understanding-airbyte/connections/incremental-append-deduped) pages. ## Configuration @@ -139,7 +139,7 @@ Some APIs update records over time but do not allow to filter or search by modif In these cases, there are two options: -- **Do not use incremental sync** and always sync the full set of records to always have a consistent state, losing the advantages of reduced load and [automatic history keeping in the destination](/understanding-airbyte/connections/incremental-apped-deduped) +- **Do not use incremental sync** and always sync the full set of records to always have a consistent state, losing the advantages of reduced load and [automatic history keeping in the destination](/understanding-airbyte/connections/incremental-append-deduped) - **Configure the "Lookback window"** to not only sync exclusively new records, but resync some portion of records before the cutoff date to catch changes that were made to existing records, trading off data consistency and the amount of synced records. In the case of the API of The Guardian, news articles tend to only be updated for a few days after the initial release date, so this strategy should be able to catch most updates without having to resync all articles. Reiterating the example from above with a "Lookback window" of 2 days configured, let's assume the last encountered article looked like this: diff --git a/docs/connector-development/connector-builder-ui/record-processing.mdx b/docs/connector-development/connector-builder-ui/record-processing.mdx index c83cb3b4fc3b..d5ac0dbb88de 100644 --- a/docs/connector-development/connector-builder-ui/record-processing.mdx +++ b/docs/connector-development/connector-builder-ui/record-processing.mdx @@ -321,7 +321,7 @@ Besides bringing the records in the right shape, it's important to communicate s ### Primary key -The "Primary key" field specifies how to uniquely identify a record. This is important for downstream de-duplication of records (e.g. by the [incremental sync - Append + Deduped sync mode](/understanding-airbyte/connections/incremental-apped-deduped)). +The "Primary key" field specifies how to uniquely identify a record. This is important for downstream de-duplication of records (e.g. by the [incremental sync - Append + Deduped sync mode](/understanding-airbyte/connections/incremental-append-deduped)). In a lot of cases, like for the EmailOctopus example from above, there is a dedicated id field that can be used for this purpose. It's important that the value of the id field is guaranteed to only occur once for a single record. diff --git a/docs/integrations/destinations/cumulio.md b/docs/integrations/destinations/cumulio.md index 9e554b682bc3..6cae834e0cd0 100644 --- a/docs/integrations/destinations/cumulio.md +++ b/docs/integrations/destinations/cumulio.md @@ -25,12 +25,12 @@ _If you have any questions or want to get started with Cumul.io, don't hesitate ### Sync modes support -| [Sync modes](https://docs.airbyte.com/understanding-airbyte/connections/#sync-modes) | Supported?\(Yes/No\) | Notes | -| :---------------------------------------------------------------------------------------------------------------------- | :------------------- | :---------------------------------------------------- | -| [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append/) | Yes | / | -| [Full Refresh - Replace](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) | Yes | / | -| [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append/) | Yes | / | -| [Incremental - Append + Deduped ](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) | No | Cumul.io's data warehouse does not support dbt (yet). | +| [Sync modes](https://docs.airbyte.com/understanding-airbyte/connections/#sync-modes) | Supported?\(Yes/No\) | Notes | +| :----------------------------------------------------------------------------------------------------------------------- | :------------------- | :---------------------------------------------------- | +| [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append/) | Yes | / | +| [Full Refresh - Replace](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) | Yes | / | +| [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append/) | Yes | / | +| [Incremental - Append + Deduped ](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) | No | Cumul.io's data warehouse does not support dbt (yet). | ### Airbyte Features support diff --git a/docs/integrations/destinations/snowflake.md b/docs/integrations/destinations/snowflake.md index 84a84fb1ff46..e5bb95e04269 100644 --- a/docs/integrations/destinations/snowflake.md +++ b/docs/integrations/destinations/snowflake.md @@ -244,7 +244,7 @@ The Snowflake destination supports the following sync modes: - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Snowflake tutorials diff --git a/docs/integrations/sources/alloydb.md b/docs/integrations/sources/alloydb.md index f9858f2a542b..83e77de41bcc 100644 --- a/docs/integrations/sources/alloydb.md +++ b/docs/integrations/sources/alloydb.md @@ -233,7 +233,7 @@ The AlloyDB source connector supports the following [sync modes](https://docs.ai - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported cursors diff --git a/docs/integrations/sources/apple-search-ads.md b/docs/integrations/sources/apple-search-ads.md index 231e45cb1b98..ffba4f33a596 100644 --- a/docs/integrations/sources/apple-search-ads.md +++ b/docs/integrations/sources/apple-search-ads.md @@ -29,7 +29,7 @@ The Apple Search Ads source connector supports the following [sync modes](https: - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/glossary#full-refresh-sync) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/bing-ads.md b/docs/integrations/sources/bing-ads.md index 258189441a01..d8be549e86dc 100644 --- a/docs/integrations/sources/bing-ads.md +++ b/docs/integrations/sources/bing-ads.md @@ -66,7 +66,7 @@ The Bing Ads source connector supports the following [sync modes](https://docs.a - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/delighted.md b/docs/integrations/sources/delighted.md index b76d5f585a00..5491a650b605 100644 --- a/docs/integrations/sources/delighted.md +++ b/docs/integrations/sources/delighted.md @@ -36,7 +36,7 @@ The Delighted source connector supports the following [sync modes](https://docs. - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported streams diff --git a/docs/integrations/sources/facebook-marketing.md b/docs/integrations/sources/facebook-marketing.md index 45fe2f987c79..923af9abea34 100644 --- a/docs/integrations/sources/facebook-marketing.md +++ b/docs/integrations/sources/facebook-marketing.md @@ -95,7 +95,7 @@ The Facebook Marketing source connector supports the following sync modes: - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) (except for the AdCreatives and AdAccount tables) -- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) (except for the AdCreatives and AdAccount tables) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) (except for the AdCreatives and AdAccount tables) ## Supported Streams diff --git a/docs/integrations/sources/freshdesk.md b/docs/integrations/sources/freshdesk.md index b56115b5e34b..126173fefbbe 100644 --- a/docs/integrations/sources/freshdesk.md +++ b/docs/integrations/sources/freshdesk.md @@ -23,7 +23,7 @@ To set up the Freshdesk source connector, you'll need the Freshdesk [domain URL] - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/gitlab.md b/docs/integrations/sources/gitlab.md index d69193e12ebb..caf45350163f 100644 --- a/docs/integrations/sources/gitlab.md +++ b/docs/integrations/sources/gitlab.md @@ -73,7 +73,7 @@ The Gitlab Source connector supports the following [ sync modes](https://docs.ai - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/google-ads.md b/docs/integrations/sources/google-ads.md index cfabd4349ad4..1bbe68939567 100644 --- a/docs/integrations/sources/google-ads.md +++ b/docs/integrations/sources/google-ads.md @@ -95,7 +95,7 @@ The Google Ads source connector supports the following [sync modes](https://docs - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/google-analytics-data-api.md b/docs/integrations/sources/google-analytics-data-api.md index 1a9ad3dc014f..bb5d41180d31 100644 --- a/docs/integrations/sources/google-analytics-data-api.md +++ b/docs/integrations/sources/google-analytics-data-api.md @@ -68,7 +68,7 @@ The Google Analytics source connector supports the following [sync modes](https: - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/google-analytics-v4.md b/docs/integrations/sources/google-analytics-v4.md index 7b49e28ceb25..2b32a9d7d7a6 100644 --- a/docs/integrations/sources/google-analytics-v4.md +++ b/docs/integrations/sources/google-analytics-v4.md @@ -73,7 +73,7 @@ The Google Analytics source connector supports the following [sync modes](https: - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) :::caution diff --git a/docs/integrations/sources/google-search-console.md b/docs/integrations/sources/google-search-console.md index 524bd199bdac..59883a322322 100644 --- a/docs/integrations/sources/google-search-console.md +++ b/docs/integrations/sources/google-search-console.md @@ -94,7 +94,7 @@ The Google Search Console Source connector supports the following [ sync modes]( - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) :::note The granularity for the cursor is 1 day, so Incremental Sync in Append mode may result in duplicating the data. diff --git a/docs/integrations/sources/greenhouse.md b/docs/integrations/sources/greenhouse.md index 4c169e791c30..50ef4dc63de0 100644 --- a/docs/integrations/sources/greenhouse.md +++ b/docs/integrations/sources/greenhouse.md @@ -22,7 +22,7 @@ The Greenhouse source connector supports the following [sync modes](https://docs - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/harvest.md b/docs/integrations/sources/harvest.md index 31ca38f85400..e53ff9afbe96 100644 --- a/docs/integrations/sources/harvest.md +++ b/docs/integrations/sources/harvest.md @@ -43,7 +43,7 @@ The Harvest source connector supports the following [sync modes](https://docs.ai - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/instagram.md b/docs/integrations/sources/instagram.md index d16ba32b25a8..cfe5d8b7c9a6 100644 --- a/docs/integrations/sources/instagram.md +++ b/docs/integrations/sources/instagram.md @@ -49,7 +49,7 @@ The Instagram source connector supports the following [sync modes](https://docs. - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) :::note diff --git a/docs/integrations/sources/iterable.md b/docs/integrations/sources/iterable.md index bc5fcb91cd3a..ec0bb73ea0fb 100644 --- a/docs/integrations/sources/iterable.md +++ b/docs/integrations/sources/iterable.md @@ -23,7 +23,7 @@ The Iterable source connector supports the following [sync modes](https://docs.a - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/jira.md b/docs/integrations/sources/jira.md index 7b5772e1e429..aea4d3cac20d 100644 --- a/docs/integrations/sources/jira.md +++ b/docs/integrations/sources/jira.md @@ -37,7 +37,7 @@ The Jira source connector supports the following [sync modes](https://docs.airby - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Troubleshooting diff --git a/docs/integrations/sources/klaviyo.md b/docs/integrations/sources/klaviyo.md index 2c953f7943b1..e8d1ce170503 100644 --- a/docs/integrations/sources/klaviyo.md +++ b/docs/integrations/sources/klaviyo.md @@ -23,7 +23,7 @@ The Klaviyo source connector supports the following [sync modes](https://docs.ai - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/mixpanel.md b/docs/integrations/sources/mixpanel.md index cec9ca3ec089..b0f31a65d224 100644 --- a/docs/integrations/sources/mixpanel.md +++ b/docs/integrations/sources/mixpanel.md @@ -29,7 +29,7 @@ The Mixpanel source connector supports the following [sync modes](https://docs.a - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) Note: Incremental sync returns duplicated \(old records\) for the state date due to API filter limitation, which is granular to the whole day only. diff --git a/docs/integrations/sources/notion.md b/docs/integrations/sources/notion.md index 74ed432faa9c..b465bb9c4786 100644 --- a/docs/integrations/sources/notion.md +++ b/docs/integrations/sources/notion.md @@ -82,7 +82,7 @@ The Notion source connector supports the following [sync modes](https://docs.air - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) (partially) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/onesignal.md b/docs/integrations/sources/onesignal.md index 9f740480c7a8..bb2f124b6680 100644 --- a/docs/integrations/sources/onesignal.md +++ b/docs/integrations/sources/onesignal.md @@ -48,7 +48,7 @@ The OneSignal source connector supports the following [sync modes](https://docs. - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/pinterest.md b/docs/integrations/sources/pinterest.md index bce016844a49..a5ddc684e83c 100644 --- a/docs/integrations/sources/pinterest.md +++ b/docs/integrations/sources/pinterest.md @@ -41,7 +41,7 @@ The Pinterest source connector supports the following [sync modes](https://docs. - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/postgres.md b/docs/integrations/sources/postgres.md index 94b803cd6085..056a583985cb 100644 --- a/docs/integrations/sources/postgres.md +++ b/docs/integrations/sources/postgres.md @@ -284,7 +284,7 @@ The Postgres source connector supports the following [sync modes](https://docs.a - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported cursors diff --git a/docs/integrations/sources/prestashop.md b/docs/integrations/sources/prestashop.md index d939a2113057..3a6fa7a7ff04 100644 --- a/docs/integrations/sources/prestashop.md +++ b/docs/integrations/sources/prestashop.md @@ -33,7 +33,7 @@ The PrestaShop source connector supports the following [ sync modes](https://doc - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/quickbooks.md b/docs/integrations/sources/quickbooks.md index 5a9abd6ee759..f5f905cd95ed 100644 --- a/docs/integrations/sources/quickbooks.md +++ b/docs/integrations/sources/quickbooks.md @@ -57,7 +57,7 @@ The Quickbooks Source connector supports the following [ sync modes](https://doc - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/redshift.md b/docs/integrations/sources/redshift.md index 6b764eb72475..dafe396d2684 100644 --- a/docs/integrations/sources/redshift.md +++ b/docs/integrations/sources/redshift.md @@ -27,9 +27,9 @@ The Redshift source does not alter the schema present in your warehouse. Dependi #### Incremental Sync -The Redshift source connector supports incremental syncs. To setup an incremental sync for a table in Redshift in the Airbyte UI, you must setup a [user-defined cursor field](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append/#user-defined-cursor) such as an `updated_at` column. The connector relies on this column to know which records were updated since the last sync it ran. See the [incremental sync docs](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) for more information. +The Redshift source connector supports incremental syncs. To setup an incremental sync for a table in Redshift in the Airbyte UI, you must setup a [user-defined cursor field](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append/#user-defined-cursor) such as an `updated_at` column. The connector relies on this column to know which records were updated since the last sync it ran. See the [incremental sync docs](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) for more information. -Defining a cursor field allows you to run incremental-append syncs. To run [incremental-dedupe](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) syncs, you'll need to tell the connector which column(s) to use as a primary key. See the [incremental-dedupe sync docs](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) for more information. +Defining a cursor field allows you to run incremental-append syncs. To run [incremental-dedupe](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) syncs, you'll need to tell the connector which column(s) to use as a primary key. See the [incremental-dedupe sync docs](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) for more information. ## Getting started diff --git a/docs/integrations/sources/salesforce.md b/docs/integrations/sources/salesforce.md index aefe09215296..ba5d388273c4 100644 --- a/docs/integrations/sources/salesforce.md +++ b/docs/integrations/sources/salesforce.md @@ -77,7 +77,7 @@ The Salesforce source connector supports the following sync modes: - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- (Recommended)[ Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- (Recommended)[ Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ### Incremental Deletes sync @@ -85,7 +85,7 @@ The Salesforce connector retrieves deleted records from Salesforce. For the stre ## Performance considerations -The Salesforce connector is restricted by Salesforce’s [Daily Rate Limits](https://developer.salesforce.com/docs/atlas.en-us.salesforce_app_limits_cheatsheet.meta/salesforce_app_limits_cheatsheet/salesforce_app_limits_platform_api.htm). The connector syncs data until it hits the daily rate limit, then ends the sync early with success status, and starts the next sync from where it left off. Note that picking up from where it ends will work only for incremental sync, which is why we recommend using the [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) sync mode. +The Salesforce connector is restricted by Salesforce’s [Daily Rate Limits](https://developer.salesforce.com/docs/atlas.en-us.salesforce_app_limits_cheatsheet.meta/salesforce_app_limits_cheatsheet/salesforce_app_limits_platform_api.htm). The connector syncs data until it hits the daily rate limit, then ends the sync early with success status, and starts the next sync from where it left off. Note that picking up from where it ends will work only for incremental sync, which is why we recommend using the [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) sync mode. ## Supported Objects diff --git a/docs/integrations/sources/salesloft.md b/docs/integrations/sources/salesloft.md index 0a14fdfa419f..be63f2cd8d7f 100644 --- a/docs/integrations/sources/salesloft.md +++ b/docs/integrations/sources/salesloft.md @@ -58,7 +58,7 @@ The Salesloft Source connector supports the following [ sync modes](https://docs - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/sentry.md b/docs/integrations/sources/sentry.md index 2f19ae726dfd..2aad6fe489aa 100644 --- a/docs/integrations/sources/sentry.md +++ b/docs/integrations/sources/sentry.md @@ -25,7 +25,7 @@ The Sentry source connector supports the following [sync modes](https://docs.air - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/square.md b/docs/integrations/sources/square.md index 0893780d3f72..2dfa772284b3 100644 --- a/docs/integrations/sources/square.md +++ b/docs/integrations/sources/square.md @@ -52,7 +52,7 @@ The Square source connector supports the following [ sync modes](https://docs.ai - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/strava.md b/docs/integrations/sources/strava.md index 0ecc8c38641e..c2d187919952 100644 --- a/docs/integrations/sources/strava.md +++ b/docs/integrations/sources/strava.md @@ -108,7 +108,7 @@ The Strava source connector supports the following [sync modes](https://docs.air - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported streams diff --git a/docs/integrations/sources/surveycto.md b/docs/integrations/sources/surveycto.md index 3e017bb8f4a8..70a373ad8617 100644 --- a/docs/integrations/sources/surveycto.md +++ b/docs/integrations/sources/surveycto.md @@ -37,7 +37,7 @@ The SurveyCTO source connector supports the following [sync modes](https://docs. - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- (Recommended)[ Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- (Recommended)[ Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/tempo.md b/docs/integrations/sources/tempo.md index 9a4c773b7003..68f40b69be28 100644 --- a/docs/integrations/sources/tempo.md +++ b/docs/integrations/sources/tempo.md @@ -29,7 +29,7 @@ The Tempo source connector supports the following [ sync modes](https://docs.air - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/woocommerce.md b/docs/integrations/sources/woocommerce.md index c1459977b429..e97eb3a6749d 100644 --- a/docs/integrations/sources/woocommerce.md +++ b/docs/integrations/sources/woocommerce.md @@ -53,7 +53,7 @@ following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-s - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/yandex-metrica.md b/docs/integrations/sources/yandex-metrica.md index f1cb3079b9bb..e38e7c8404b0 100644 --- a/docs/integrations/sources/yandex-metrica.md +++ b/docs/integrations/sources/yandex-metrica.md @@ -52,7 +52,7 @@ The Yandex Metrica source connector supports the following [sync modes](https:// - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams diff --git a/docs/integrations/sources/zendesk-chat.md b/docs/integrations/sources/zendesk-chat.md index 1a1cd588376f..eab821953fd8 100644 --- a/docs/integrations/sources/zendesk-chat.md +++ b/docs/integrations/sources/zendesk-chat.md @@ -46,7 +46,7 @@ The Zendesk Chat source connector supports the following [sync modes](https://do - [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) - [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) - [Incremental - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) -- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-apped-deduped) +- [Incremental - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) ## Supported Streams