diff --git a/llama_hub/airbyte_cdk/.gitignore b/llama_hub/airbyte_cdk/.gitignore new file mode 100644 index 0000000000..5890b098b7 --- /dev/null +++ b/llama_hub/airbyte_cdk/.gitignore @@ -0,0 +1 @@ +test.py diff --git a/llama_hub/airbyte_cdk/README.md b/llama_hub/airbyte_cdk/README.md new file mode 100644 index 0000000000..5a45496fef --- /dev/null +++ b/llama_hub/airbyte_cdk/README.md @@ -0,0 +1,54 @@ +# Airbyte CDK Loader + +The Airbyte CDK Loader is a shim for sources created using the [Airbyte Python CDK](https://docs.airbyte.com/connector-development/cdk-python/). It allows you to load data from any Airbyte source into LlamaIndex. + +## Installation + +* Install llama_hub: `pip install llama_hub` +* Install airbyte-cdk: `pip install airbyte-cdk` +* Install a source via git (or implement your own): `pip install git+https://github.com/airbytehq/airbyte.git@master#egg=source_github&subdirectory=airbyte-integrations/connectors/source-github` + +## Usage + +Implement and import your own source. You can find lots of resources for how to achieve this on the [Airbyte documentation page](https://docs.airbyte.com/connector-development/). + +Here's an example usage of the AirbyteCdkReader. + +```python +from llama_index import download_loader +from llama_hub.airbyte_cdk.base import AirbyteCDKReader +from source_github.source import SourceGithub # this is just an example, you can use any source here - this one is loaded from the Airbyte Github repo via pip install git+https://github.com/airbytehq/airbyte.git@master#egg=source_github&subdirectory=airbyte-integrations/connectors/source-github` + + +github_config = { + # ... +} +reader = AirbyteCDKReader(source_class=SourceGithub,config=github_config) +documents = reader.load_data(stream_name="issues") +``` + +By default all fields are stored as metadata in the documents and the text is set to the JSON representation of all the fields. Construct the text of the document by passing a `record_handler` to the reader: +```python +def handle_record(record, id): + return Document(doc_id=id, text=record.data["title"], extra_info=record.data) + +reader = AirbyteCDKReader(source_class=SourceGithub,config=github_config, record_handler=handle_record) +``` + +## Lazy loads + +The `reader.load_data` endpoint will collect all documents and return them as a list. If there are a large number of documents, this can cause issues. By using `reader.lazy_load_data` instead, an iterator is returned which can be consumed document by document without the need to keep all documents in memory. + +## Incremental loads + +If a stream supports it, this loader can be used to load data incrementally (only returning documents that weren't loaded last time or got updated in the meantime): +```python + +reader = AirbyteCDKReader(source_class=SourceGithub,config=github_config) +documents = reader.load_data(stream_name="issues") +current_state = reader.last_state # can be pickled away or stored otherwise + +updated_documents = reader.load_data(stream_name="issues", state=current_state) # only loads documents that were updated since last time +``` + +This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples. diff --git a/llama_hub/airbyte_cdk/__init__.py b/llama_hub/airbyte_cdk/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/llama_hub/airbyte_cdk/base.py b/llama_hub/airbyte_cdk/base.py new file mode 100644 index 0000000000..ca58072ac7 --- /dev/null +++ b/llama_hub/airbyte_cdk/base.py @@ -0,0 +1,55 @@ +import json +from typing import Any, Callable, Iterator, List, Mapping, Optional + +from llama_index.readers.base import BaseReader +from llama_index.readers.schema.base import Document + +RecordHandler = Callable[[Any, Optional[str]], Document] + + +class AirbyteCDKReader(BaseReader): + """AirbyteCDKReader reader. + + Retrieve documents from an Airbyte source implemented using the CDK. + + Args: + source_class: The Airbyte source class. + config: The config object for the Airbyte source. + """ + + def __init__( + self, + source_class: Any, + config: Mapping[str, Any], + record_handler: Optional[RecordHandler] = None, + ) -> None: + """Initialize with parameters.""" + + from airbyte_cdk.sources.embedded.base_integration import BaseEmbeddedIntegration + from airbyte_cdk.sources.embedded.runner import CDKRunner + from airbyte_cdk.models.airbyte_protocol import AirbyteRecordMessage + + class CDKIntegration(BaseEmbeddedIntegration): + def _handle_record( + self, record: AirbyteRecordMessage, id: Optional[str] + ) -> Document: + if record_handler: + return record_handler(record, id) + return Document( + doc_id=id, text=json.dumps(record.data), extra_info=record.data + ) + + self._integration = CDKIntegration( + config=config, + runner=CDKRunner(source=source_class(), name=source_class.__name__), + ) + + def load_data(self, *args: Any, **kwargs: Any) -> List[Document]: + return list(self.lazy_load_data(*args, **kwargs)) + + def lazy_load_data(self, *args: Any, **kwargs: Any) -> Iterator[Document]: + return self._integration._load_data(*args, **kwargs) + + @property + def last_state(self): + return self._integration.last_state diff --git a/llama_hub/airbyte_cdk/requirements.txt b/llama_hub/airbyte_cdk/requirements.txt new file mode 100644 index 0000000000..2f8bee370d --- /dev/null +++ b/llama_hub/airbyte_cdk/requirements.txt @@ -0,0 +1,2 @@ +airbyte-cdk +airbyte-protocol-models \ No newline at end of file diff --git a/llama_hub/airbyte_gong/README.md b/llama_hub/airbyte_gong/README.md new file mode 100644 index 0000000000..9b0b290e8c --- /dev/null +++ b/llama_hub/airbyte_gong/README.md @@ -0,0 +1,62 @@ +# Airbyte Gong Loader + +The Airbyte Gong Loader allows you to access different Gong objects. + +## Installation + +* Install llama_hub: `pip install llama_hub` +* Install the gong source: `pip install airbyte-source-gong` + +## Usage + +Here's an example usage of the AirbyteGongReader. + +```python +from llama_hub.airbyte_gong.base import AirbyteGongReader + +gong_config = { + # ... +} +reader = AirbyteGongReader(config=gong_config) +documents = reader.load_data(stream_name="calls") +``` + +## Configuration + +Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/gong/) for details about how to configure the reader. +The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-gong/source_gong/spec.yaml](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-gong/source_gong/spec.yaml). + +The general shape looks like this: +```python +{ + "access_key": "", + "access_key_secret": "", + "start_date": "", +} +``` + +By default all fields are stored as metadata in the documents and the text is set to the JSON representation of all the fields. Construct the text of the document by passing a `record_handler` to the reader: +```python +def handle_record(record, id): + return Document(doc_id=id, text=record.data["title"], extra_info=record.data) + +reader = AirbyteGongReader(config=gong_config, record_handler=handle_record) +``` + +## Lazy loads + +The `reader.load_data` endpoint will collect all documents and return them as a list. If there are a large number of documents, this can cause issues. By using `reader.lazy_load_data` instead, an iterator is returned which can be consumed document by document without the need to keep all documents in memory. + +## Incremental loads + +This loader supports loading data incrementally (only returning documents that weren't loaded last time or got updated in the meantime): +```python + +reader = AirbyteGongReader(config={...}) +documents = reader.load_data(stream_name="calls") +current_state = reader.last_state # can be pickled away or stored otherwise + +updated_documents = reader.load_data(stream_name="calls", state=current_state) # only loads documents that were updated since last time +``` + +This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples. diff --git a/llama_hub/airbyte_gong/__init__.py b/llama_hub/airbyte_gong/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/llama_hub/airbyte_gong/base.py b/llama_hub/airbyte_gong/base.py new file mode 100644 index 0000000000..1fe7c66420 --- /dev/null +++ b/llama_hub/airbyte_gong/base.py @@ -0,0 +1,22 @@ +from typing import Any, Mapping, Optional +from llama_hub.airbyte_cdk.base import AirbyteCDKReader, RecordHandler + + +class AirbyteGongReader(AirbyteCDKReader): + """AirbyteGongReader reader. + + Retrieve documents from Gong + + Args: + config: The config object for the gong source. + """ + + def __init__( + self, + config: Mapping[str, Any], + record_handler: Optional[RecordHandler] = None, + ) -> None: + """Initialize with parameters.""" + import source_gong + + super().__init__(source_class=source_gong.SourceGong, config=config, record_handler=record_handler) diff --git a/llama_hub/airbyte_gong/requirements.txt b/llama_hub/airbyte_gong/requirements.txt new file mode 100644 index 0000000000..eb567a43c7 --- /dev/null +++ b/llama_hub/airbyte_gong/requirements.txt @@ -0,0 +1 @@ +airbyte-source-gong \ No newline at end of file diff --git a/llama_hub/airbyte_hubspot/README.md b/llama_hub/airbyte_hubspot/README.md new file mode 100644 index 0000000000..fee2ff0135 --- /dev/null +++ b/llama_hub/airbyte_hubspot/README.md @@ -0,0 +1,64 @@ +# Airbyte Hubspot Loader + +The Airbyte Hubspot Loader allows you to access different Hubspot objects. + +## Installation + +* Install llama_hub: `pip install llama_hub` +* Install the hubspot source: `pip install airbyte-source-hubspot` + +## Usage + +Here's an example usage of the AirbyteHubspotReader. + +```python +from llama_hub.airbyte_hubspot.base import AirbyteHubspotReader + +hubspot_config = { + # ... +} +reader = AirbyteHubspotReader(config=hubspot_config) +documents = reader.load_data(stream_name="products") +``` + +## Configuration + +Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/hubspot/) for details about how to configure the reader. +The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.yaml](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.yaml). + +The general shape looks like this: +```python +{ + "start_date": "", + "credentials": { + "credentials_title": "Private App Credentials", + "access_token": "" + } +} +``` + +By default all fields are stored as metadata in the documents and the text is set to the JSON representation of all the fields. Construct the text of the document by passing a `record_handler` to the reader: +```python +def handle_record(record, id): + return Document(doc_id=id, text=record.data["title"], extra_info=record.data) + +reader = AirbyteHubspotReader(config=hubspot_config, record_handler=handle_record) +``` + +## Lazy loads + +The `reader.load_data` endpoint will collect all documents and return them as a list. If there are a large number of documents, this can cause issues. By using `reader.lazy_load_data` instead, an iterator is returned which can be consumed document by document without the need to keep all documents in memory. + +## Incremental loads + +This loader supports loading data incrementally (only returning documents that weren't loaded last time or got updated in the meantime): +```python + +reader = AirbyteHubspotReader(config={...}) +documents = reader.load_data(stream_name="products") +current_state = reader.last_state # can be pickled away or stored otherwise + +updated_documents = reader.load_data(stream_name="products", state=current_state) # only loads documents that were updated since last time +``` + +This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples. diff --git a/llama_hub/airbyte_hubspot/__init__.py b/llama_hub/airbyte_hubspot/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/llama_hub/airbyte_hubspot/base.py b/llama_hub/airbyte_hubspot/base.py new file mode 100644 index 0000000000..8fad703181 --- /dev/null +++ b/llama_hub/airbyte_hubspot/base.py @@ -0,0 +1,22 @@ +from typing import Any, Mapping, Optional +from llama_hub.airbyte_cdk.base import AirbyteCDKReader, RecordHandler + + +class AirbyteHubspotReader(AirbyteCDKReader): + """AirbyteHubspotReader reader. + + Retrieve documents from Hubspot + + Args: + config: The config object for the hubspot source. + """ + + def __init__( + self, + config: Mapping[str, Any], + record_handler: Optional[RecordHandler] = None, + ) -> None: + """Initialize with parameters.""" + import source_hubspot + + super().__init__(source_class=source_hubspot.SourceHubspot, config=config, record_handler=record_handler) diff --git a/llama_hub/airbyte_hubspot/requirements.txt b/llama_hub/airbyte_hubspot/requirements.txt new file mode 100644 index 0000000000..3245554bf1 --- /dev/null +++ b/llama_hub/airbyte_hubspot/requirements.txt @@ -0,0 +1 @@ +source_hubspot \ No newline at end of file diff --git a/llama_hub/airbyte_salesforce/README.md b/llama_hub/airbyte_salesforce/README.md new file mode 100644 index 0000000000..1c379687ed --- /dev/null +++ b/llama_hub/airbyte_salesforce/README.md @@ -0,0 +1,69 @@ +# Airbyte Salesforce Loader + +The Airbyte Salesforce Loader allows you to access different Salesforce objects. + +## Installation + +* Install llama_hub: `pip install llama_hub` +* Install the salesforce source: `pip install airbyte-source-salesforce` + +## Usage + +Here's an example usage of the AirbyteSalesforceReader. + +```python +from llama_hub.airbyte_salesforce.base import AirbyteSalesforceReader + +salesforce_config = { + # ... +} +reader = AirbyteSalesforceReader(config=salesforce_config) +documents = reader.load_data(stream_name="asset") +``` + +## Configuration + +Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/salesforce/) for details about how to configure the reader. +The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.yaml](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.yaml). + +The general shape looks like this: +```python +{ + "client_id": "", + "client_secret": "", + "refresh_token": "", + "start_date": "", + "is_sandbox": False, # set to True if you're using a sandbox environment + "streams_criteria": [ # Array of filters for salesforce objects that should be loadable + {"criteria": "exacts", "value": "Account"}, # Exact name of salesforce object + {"criteria": "starts with", "value": "Asset"}, # Prefix of the name + # Other allowed criteria: ends with, contains, starts not with, ends not with, not contains, not exacts + ], +} +``` + +By default all fields are stored as metadata in the documents and the text is set to the JSON representation of all the fields. Construct the text of the document by passing a `record_handler` to the reader: +```python +def handle_record(record, id): + return Document(doc_id=id, text=record.data["title"], extra_info=record.data) + +reader = AirbyteSalesforceReader(config=salesforce_config, record_handler=handle_record) +``` + +## Lazy loads + +The `reader.load_data` endpoint will collect all documents and return them as a list. If there are a large number of documents, this can cause issues. By using `reader.lazy_load_data` instead, an iterator is returned which can be consumed document by document without the need to keep all documents in memory. + +## Incremental loads + +This loader supports loading data incrementally (only returning documents that weren't loaded last time or got updated in the meantime): +```python + +reader = AirbyteSalesforceReader(config={...}) +documents = reader.load_data(stream_name="asset") +current_state = reader.last_state # can be pickled away or stored otherwise + +updated_documents = reader.load_data(stream_name="asset", state=current_state) # only loads documents that were updated since last time +``` + +This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples. diff --git a/llama_hub/airbyte_salesforce/__init__.py b/llama_hub/airbyte_salesforce/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/llama_hub/airbyte_salesforce/base.py b/llama_hub/airbyte_salesforce/base.py new file mode 100644 index 0000000000..83b5afcbaf --- /dev/null +++ b/llama_hub/airbyte_salesforce/base.py @@ -0,0 +1,22 @@ +from typing import Any, Mapping, Optional +from llama_hub.airbyte_cdk.base import AirbyteCDKReader, RecordHandler + + +class AirbyteSalesforceReader(AirbyteCDKReader): + """AirbyteSalesforceReader reader. + + Retrieve documents from Salesforce + + Args: + config: The config object for the salesforce source. + """ + + def __init__( + self, + config: Mapping[str, Any], + record_handler: Optional[RecordHandler] = None, + ) -> None: + """Initialize with parameters.""" + import source_salesforce + + super().__init__(source_class=source_salesforce.SourceSalesforce, config=config, record_handler=record_handler) diff --git a/llama_hub/airbyte_salesforce/requirements.txt b/llama_hub/airbyte_salesforce/requirements.txt new file mode 100644 index 0000000000..2b9a2e11c9 --- /dev/null +++ b/llama_hub/airbyte_salesforce/requirements.txt @@ -0,0 +1 @@ +airbyte-source-salesforce \ No newline at end of file diff --git a/llama_hub/airbyte_shopify/README.md b/llama_hub/airbyte_shopify/README.md new file mode 100644 index 0000000000..5268147a03 --- /dev/null +++ b/llama_hub/airbyte_shopify/README.md @@ -0,0 +1,65 @@ +# Airbyte Shopify Loader + +The Airbyte Shopify Loader allows you to access different Shopify objects. + +## Installation + +* Install llama_hub: `pip install llama_hub` +* Install the shopify source: `pip install airbyte-source-shopify` + +## Usage + +Here's an example usage of the AirbyteShopifyReader. + +```python +from llama_hub.airbyte_shopify.base import AirbyteShopifyReader + +shopify_config = { + # ... +} +reader = AirbyteShopifyReader(config=shopify_config) +documents = reader.load_data(stream_name="orders") +``` + +## Configuration + +Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/shopify/) for details about how to configure the reader. +The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-shopify/source_shopify/spec.json](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-shopify/source_shopify/spec.json). + +The general shape looks like this: +```python +{ + "start_date": "", + "shop": "", + "credentials": { + "auth_method": "api_password", + "api_password": "" + } +} +``` + +By default all fields are stored as metadata in the documents and the text is set to the JSON representation of all the fields. Construct the text of the document by passing a `record_handler` to the reader: +```python +def handle_record(record, id): + return Document(doc_id=id, text=record.data["title"], extra_info=record.data) + +reader = AirbyteShopifyReader(config=shopify_config, record_handler=handle_record) +``` + +## Lazy loads + +The `reader.load_data` endpoint will collect all documents and return them as a list. If there are a large number of documents, this can cause issues. By using `reader.lazy_load_data` instead, an iterator is returned which can be consumed document by document without the need to keep all documents in memory. + +## Incremental loads + +This loader supports loading data incrementally (only returning documents that weren't loaded last time or got updated in the meantime): +```python + +reader = AirbyteShopifyReader(config={...}) +documents = reader.load_data(stream_name="orders") +current_state = reader.last_state # can be pickled away or stored otherwise + +updated_documents = reader.load_data(stream_name="orders", state=current_state) # only loads documents that were updated since last time +``` + +This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples. diff --git a/llama_hub/airbyte_shopify/__init__.py b/llama_hub/airbyte_shopify/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/llama_hub/airbyte_shopify/base.py b/llama_hub/airbyte_shopify/base.py new file mode 100644 index 0000000000..bfaec86cae --- /dev/null +++ b/llama_hub/airbyte_shopify/base.py @@ -0,0 +1,22 @@ +from typing import Any, Mapping, Optional +from llama_hub.airbyte_cdk.base import AirbyteCDKReader, RecordHandler + + +class AirbyteShopifyReader(AirbyteCDKReader): + """AirbyteShopifyReader reader. + + Retrieve documents from Shopify + + Args: + config: The config object for the shopify source. + """ + + def __init__( + self, + config: Mapping[str, Any], + record_handler: Optional[RecordHandler] = None, + ) -> None: + """Initialize with parameters.""" + import source_shopify + + super().__init__(source_class=source_shopify.SourceShopify, config=config, record_handler=record_handler) diff --git a/llama_hub/airbyte_shopify/requirements.txt b/llama_hub/airbyte_shopify/requirements.txt new file mode 100644 index 0000000000..9f1119673e --- /dev/null +++ b/llama_hub/airbyte_shopify/requirements.txt @@ -0,0 +1 @@ +airbyte-source-shopify \ No newline at end of file diff --git a/llama_hub/airbyte_stripe/README.md b/llama_hub/airbyte_stripe/README.md new file mode 100644 index 0000000000..d65d425883 --- /dev/null +++ b/llama_hub/airbyte_stripe/README.md @@ -0,0 +1,63 @@ +# Airbyte Stripe Loader + +The Airbyte Stripe Loader allows you to access different Stripe objects. + +## Installation + +* Install llama_hub: `pip install llama_hub` +* Install the stripe source: `pip install airbyte-source-stripe` + +## Usage + +Here's an example usage of the AirbyteStripeReader. + +```python +from llama_hub.airbyte_stripe.base import AirbyteStripeReader + +stripe_config = { + # ... +} +reader = AirbyteStripeReader(config=stripe_config) +documents = reader.load_data(stream_name="invoices") +``` + +## Configuration + +Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/stripe/) for details about how to configure the reader. +The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml). + +The general shape looks like this: +```python + +{ + "client_secret": "", + "account_id": "", + "start_date": "", +} +``` + +By default all fields are stored as metadata in the documents and the text is set to the JSON representation of all the fields. Construct the text of the document by passing a `record_handler` to the reader: +```python +def handle_record(record, id): + return Document(doc_id=id, text=record.data["title"], extra_info=record.data) + +reader = AirbyteStripeReader(config=stripe_config, record_handler=handle_record) +``` + +## Lazy loads + +The `reader.load_data` endpoint will collect all documents and return them as a list. If there are a large number of documents, this can cause issues. By using `reader.lazy_load_data` instead, an iterator is returned which can be consumed document by document without the need to keep all documents in memory. + +## Incremental loads + +This loader supports loading data incrementally (only returning documents that weren't loaded last time or got updated in the meantime): +```python + +reader = AirbyteStripeReader(config={...}) +documents = reader.load_data(stream_name="invoices") +current_state = reader.last_state # can be pickled away or stored otherwise + +updated_documents = reader.load_data(stream_name="invoices", state=current_state) # only loads documents that were updated since last time +``` + +This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples. diff --git a/llama_hub/airbyte_stripe/__init__.py b/llama_hub/airbyte_stripe/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/llama_hub/airbyte_stripe/base.py b/llama_hub/airbyte_stripe/base.py new file mode 100644 index 0000000000..9b23eab09b --- /dev/null +++ b/llama_hub/airbyte_stripe/base.py @@ -0,0 +1,22 @@ +from typing import Any, Mapping, Optional +from llama_hub.airbyte_cdk.base import AirbyteCDKReader, RecordHandler + + +class AirbyteStripeReader(AirbyteCDKReader): + """AirbyteStripeReader reader. + + Retrieve documents from Stripe + + Args: + config: The config object for the stripe source. + """ + + def __init__( + self, + config: Mapping[str, Any], + record_handler: Optional[RecordHandler] = None, + ) -> None: + """Initialize with parameters.""" + import source_stripe + + super().__init__(source_class=source_stripe.SourceStripe, config=config, record_handler=record_handler) diff --git a/llama_hub/airbyte_stripe/requirements.txt b/llama_hub/airbyte_stripe/requirements.txt new file mode 100644 index 0000000000..4a0e864730 --- /dev/null +++ b/llama_hub/airbyte_stripe/requirements.txt @@ -0,0 +1 @@ +airbyte-source-stripe \ No newline at end of file diff --git a/llama_hub/airbyte_typeform/README.md b/llama_hub/airbyte_typeform/README.md new file mode 100644 index 0000000000..cc83e5073d --- /dev/null +++ b/llama_hub/airbyte_typeform/README.md @@ -0,0 +1,65 @@ +# Airbyte Typeform Loader + +The Airbyte Typeform Loader allows you to access different Typeform objects. + +## Installation + +* Install llama_hub: `pip install llama_hub` +* Install the typeform source: `pip install airbyte-source-typeform` + +## Usage + +Here's an example usage of the AirbyteTypeformReader. + +```python +from llama_hub.airbyte_typeform.base import AirbyteTypeformReader + +typeform_config = { + # ... +} +reader = AirbyteTypeformReader(config=typeform_config) +documents = reader.load_data(stream_name="forms") +``` + +## Configuration + +Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/typeform/) for details about how to configure the reader. +The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-typeform/source_typeform/spec.json](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-typeform/source_typeform/spec.json). + +The general shape looks like this: +```python +{ + "credentials": { + "auth_type": "Private Token", + "access_token": "" + }, + "start_date": "", + "form_ids": [""] # if omitted, records from all forms will be loaded +} +``` + +By default all fields are stored as metadata in the documents and the text is set to the JSON representation of all the fields. Construct the text of the document by passing a `record_handler` to the reader: +```python +def handle_record(record, id): + return Document(doc_id=id, text=record.data["title"], extra_info=record.data) + +reader = AirbyteTypeformReader(config=typeform_config, record_handler=handle_record) +``` + +## Lazy loads + +The `reader.load_data` endpoint will collect all documents and return them as a list. If there are a large number of documents, this can cause issues. By using `reader.lazy_load_data` instead, an iterator is returned which can be consumed document by document without the need to keep all documents in memory. + +## Incremental loads + +This loader supports loading data incrementally (only returning documents that weren't loaded last time or got updated in the meantime): +```python + +reader = AirbyteTypeformReader(config={...}) +documents = reader.load_data(stream_name="forms") +current_state = reader.last_state # can be pickled away or stored otherwise + +updated_documents = reader.load_data(stream_name="forms", state=current_state) # only loads documents that were updated since last time +``` + +This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples. diff --git a/llama_hub/airbyte_typeform/__init__.py b/llama_hub/airbyte_typeform/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/llama_hub/airbyte_typeform/base.py b/llama_hub/airbyte_typeform/base.py new file mode 100644 index 0000000000..d7136f0c66 --- /dev/null +++ b/llama_hub/airbyte_typeform/base.py @@ -0,0 +1,22 @@ +from typing import Any, Mapping, Optional +from llama_hub.airbyte_cdk.base import AirbyteCDKReader, RecordHandler + + +class AirbyteTypeformReader(AirbyteCDKReader): + """AirbyteTypeformReader reader. + + Retrieve documents from Typeform + + Args: + config: The config object for the typeform source. + """ + + def __init__( + self, + config: Mapping[str, Any], + record_handler: Optional[RecordHandler] = None, + ) -> None: + """Initialize with parameters.""" + import source_typeform + + super().__init__(source_class=source_typeform.SourceTypeform, config=config, record_handler=record_handler) diff --git a/llama_hub/airbyte_typeform/requirements.txt b/llama_hub/airbyte_typeform/requirements.txt new file mode 100644 index 0000000000..41719e100a --- /dev/null +++ b/llama_hub/airbyte_typeform/requirements.txt @@ -0,0 +1 @@ +airbyte-source-typeform \ No newline at end of file diff --git a/llama_hub/airbyte_zendesk_support/README.md b/llama_hub/airbyte_zendesk_support/README.md new file mode 100644 index 0000000000..079d3695e1 --- /dev/null +++ b/llama_hub/airbyte_zendesk_support/README.md @@ -0,0 +1,66 @@ +# Airbyte ZendeskSupport Loader + +The Airbyte ZendeskSupport Loader allows you to access different ZendeskSupport objects. + +## Installation + +* Install llama_hub: `pip install llama_hub` +* Install the zendesk_support source: `pip install airbyte-source-zendesk-support` + +## Usage + +Here's an example usage of the AirbyteZendeskSupportReader. + +```python +from llama_hub.airbyte_zendesk_support.base import AirbyteZendeskSupportReader + +zendesk_support_config = { + # ... +} +reader = AirbyteZendeskSupportReader(config=zendesk_support_config) +documents = reader.load_data(stream_name="tickets") +``` + +## Configuration + +Check out the [Airbyte documentation page](https://docs.airbyte.com/integrations/sources/zendesk-support/) for details about how to configure the reader. +The JSON schema the config object should adhere to can be found on Github: [https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/spec.json](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/spec.json). + +The general shape looks like this: +```python +{ + "subdomain": "", + "start_date": "", + "credentials": { + "credentials": "api_token", + "email": "", + "api_token": "" + } +} +``` + +By default all fields are stored as metadata in the documents and the text is set to the JSON representation of all the fields. Construct the text of the document by passing a `record_handler` to the reader: +```python +def handle_record(record, id): + return Document(doc_id=id, text=record.data["title"], extra_info=record.data) + +reader = AirbyteZendeskSupportReader(config=zendesk_support_config, record_handler=handle_record) +``` + +## Lazy loads + +The `reader.load_data` endpoint will collect all documents and return them as a list. If there are a large number of documents, this can cause issues. By using `reader.lazy_load_data` instead, an iterator is returned which can be consumed document by document without the need to keep all documents in memory. + +## Incremental loads + +This loader supports loading data incrementally (only returning documents that weren't loaded last time or got updated in the meantime): +```python + +reader = AirbyteZendeskSupportReader(config={...}) +documents = reader.load_data(stream_name="tickets") +current_state = reader.last_state # can be pickled away or stored otherwise + +updated_documents = reader.load_data(stream_name="tickets", state=current_state) # only loads documents that were updated since last time +``` + +This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/jerryjliu/gpt_index/tree/main/gpt_index) and/or subsequently used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples. diff --git a/llama_hub/airbyte_zendesk_support/__init__.py b/llama_hub/airbyte_zendesk_support/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/llama_hub/airbyte_zendesk_support/base.py b/llama_hub/airbyte_zendesk_support/base.py new file mode 100644 index 0000000000..347966450c --- /dev/null +++ b/llama_hub/airbyte_zendesk_support/base.py @@ -0,0 +1,22 @@ +from typing import Any, Mapping, Optional +from llama_hub.airbyte_cdk.base import AirbyteCDKReader, RecordHandler + + +class AirbyteZendeskSupportReader(AirbyteCDKReader): + """AirbyteZendeskSupportReader reader. + + Retrieve documents from ZendeskSupport + + Args: + config: The config object for the zendesk_support source. + """ + + def __init__( + self, + config: Mapping[str, Any], + record_handler: Optional[RecordHandler] = None, + ) -> None: + """Initialize with parameters.""" + import source_zendesk_support + + super().__init__(source_class=source_zendesk_support.SourceZendeskSupport, config=config, record_handler=record_handler) diff --git a/llama_hub/airbyte_zendesk_support/requirements.txt b/llama_hub/airbyte_zendesk_support/requirements.txt new file mode 100644 index 0000000000..8479d087f2 --- /dev/null +++ b/llama_hub/airbyte_zendesk_support/requirements.txt @@ -0,0 +1 @@ +airbyte-source-zendesk-support \ No newline at end of file diff --git a/llama_hub/library.json b/llama_hub/library.json index 045b820c17..675867c9e7 100644 --- a/llama_hub/library.json +++ b/llama_hub/library.json @@ -1,4 +1,36 @@ { + "AirbyteCDKReader": { + "id": "airbyte_cdk", + "author": "flash1293" + }, + "AirbyteGongReader": { + "id": "airbyte_gong", + "author": "flash1293" + }, + "AirbyteHubspotReader": { + "id": "airbyte_hubspot", + "author": "flash1293" + }, + "AirbyteSalesforceReader": { + "id": "airbyte_salesforce", + "author": "flash1293" + }, + "AirbyteShopifyReader": { + "id": "airbyte_shopify", + "author": "flash1293" + }, + "AirbyteStripeReader": { + "id": "airbyte_stripe", + "author": "flash1293" + }, + "AirbyteTypeformReader": { + "id": "airbyte_typeform", + "author": "flash1293" + }, + "AirbyteZendeskSupportReader": { + "id": "airbyte_zendesk_support", + "author": "flash1293" + }, "AsanaReader": { "id": "asana", "author": "daveey"