diff --git a/.wordlist.txt b/.wordlist.txt index caca19a0..4f42089b 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -123,6 +123,7 @@ SSL SSO Sample SearchRequest +Secret ShardIteratorType Signifier SnapshotData @@ -346,6 +347,7 @@ schedulable schemaless schemas sdk +secret signup signups sinked diff --git a/docs/api.yml b/docs/api.yml index 90747629..58d73734 100644 --- a/docs/api.yml +++ b/docs/api.yml @@ -67,6 +67,7 @@ sidebar: - "api-reference/source_connectors/pubsub" - "api-reference/source_connectors/redshift" - "api-reference/source_connectors/s3" + - "api-reference/source_connectors/secret" - "api-reference/source_connectors/snowflake" - "api-reference/source_connectors/webhook" diff --git a/docs/examples/api-reference/sources/kafka.py b/docs/examples/api-reference/sources/kafka.py index 4856556a..570ed0e5 100644 --- a/docs/examples/api-reference/sources/kafka.py +++ b/docs/examples/api-reference/sources/kafka.py @@ -39,6 +39,66 @@ class SomeDataset: client.commit(message="some commit msg", datasets=[SomeDataset]) +@mock +def test_kafka_source_with_secret(client): + os.environ["SCHEMA_REGISTRY_URL"] = "http://localhost:8081" + # docsnip secret + from fennel.connectors import source, Kafka, Avro + from fennel.datasets import dataset, field + from fennel.integrations.aws import Secret + + # docsnip-highlight start + aws_secret = Secret( + arn="arn:aws:secretsmanager:us-east-1:123456789012:secret:my-secret-name-I4hSKr", + role_arn="arn:aws:iam::123456789012:role/secret-access-role", + ) + # docsnip-highlight end + + # secret with above arn has content like below + # { + # "kafka": { + # "username": "actual-kafka-username", + # "password": "actual-kafka-password" + # }, + # "schema_registry": { + # "username": "actual-schema-registry-username", + # "password": "actual-schema-registry-password" + # } + # } + + kafka = Kafka( + name="my_kafka", + bootstrap_servers="localhost:9092", # could come via os env var too + security_protocol="SASL_PLAINTEXT", + sasl_mechanism="PLAIN", + # docsnip-highlight start + sasl_plain_username=aws_secret["kafka"]["username"], + sasl_plain_password=aws_secret["kafka"]["password"], + # docsnip-highlight end + ) + avro = Avro( + registry="confluent", + url=os.environ["SCHEMA_REGISTRY_URL"], + # docsnip-highlight start + username=aws_secret["schema_registry"]["username"], + password=aws_secret["schema_registry"]["password"], + # docsnip-highlight end + ) + + # docsnip-highlight start + @source(kafka.topic("user", format=avro), disorder="14d", cdc="upsert") + # docsnip-highlight end + @dataset + class SomeDataset: + uid: int = field(key=True) + email: str + timestamp: datetime + + # /docsnip + + client.commit(message="some commit msg", datasets=[SomeDataset]) + + @mock def test_kafka_with_avro(client): os.environ["KAFKA_USERNAME"] = "test" diff --git a/docs/pages/api-reference/sink_connectors/kafka.md b/docs/pages/api-reference/sink_connectors/kafka.md index c5155131..14ee2352 100644 --- a/docs/pages/api-reference/sink_connectors/kafka.md +++ b/docs/pages/api-reference/sink_connectors/kafka.md @@ -30,11 +30,11 @@ Protocol used to communicate with the brokers. SASL mechanism to use for authentication. - + SASL username. - + SASL password. diff --git a/docs/pages/api-reference/sink_connectors/s3.md b/docs/pages/api-reference/sink_connectors/s3.md index 3a1b0ff8..16817a3f 100644 --- a/docs/pages/api-reference/sink_connectors/s3.md +++ b/docs/pages/api-reference/sink_connectors/s3.md @@ -16,7 +16,7 @@ AWS Access Key ID. This field is not required if role-based access is used or if the bucket is public. - + AWS Secret Access Key. This field is not required if role-based access is used or if the bucket is public. diff --git a/docs/pages/api-reference/sink_connectors/snowflake.md b/docs/pages/api-reference/sink_connectors/snowflake.md index b45943f0..c5b05b0b 100644 --- a/docs/pages/api-reference/sink_connectors/snowflake.md +++ b/docs/pages/api-reference/sink_connectors/snowflake.md @@ -38,12 +38,12 @@ The name of the database where the data has to be sinked. The schema where the required data has to be sinked. - + The username which should be used to access Snowflake. This username should have required permissions to assume the provided `role`. - + The password associated with the username. diff --git a/docs/pages/api-reference/source_connectors/avro.md b/docs/pages/api-reference/source_connectors/avro.md index 80024b89..112c8622 100644 --- a/docs/pages/api-reference/source_connectors/avro.md +++ b/docs/pages/api-reference/source_connectors/avro.md @@ -22,7 +22,7 @@ time. The URL where the schema registry is hosted. - + User name to access the schema registry (assuming the registry requires authentication). If user name is provided, corresponding password must also be provided. @@ -31,11 +31,11 @@ Assuming authentication is needed, either username/password must be provided or a token, but not both. - + The password associated with the username. - + Token to be used for authentication with the schema registry. Only one of username/password or token must be provided. diff --git a/docs/pages/api-reference/source_connectors/bigquery.md b/docs/pages/api-reference/source_connectors/bigquery.md index d4010561..885d3ab9 100644 --- a/docs/pages/api-reference/source_connectors/bigquery.md +++ b/docs/pages/api-reference/source_connectors/bigquery.md @@ -19,7 +19,7 @@ The project ID of the Google Cloud project containing the BigQuery dataset. The ID of the BigQuery dataset containing the table(s) to replicate. - + A dictionary containing the credentials for the Service Account to use to access BigQuery. See below for instructions on how to obtain this. diff --git a/docs/pages/api-reference/source_connectors/kafka.md b/docs/pages/api-reference/source_connectors/kafka.md index ccb8a71e..8176c14a 100644 --- a/docs/pages/api-reference/source_connectors/kafka.md +++ b/docs/pages/api-reference/source_connectors/kafka.md @@ -30,11 +30,11 @@ Protocol used to communicate with the brokers. SASL mechanism to use for authentication. - + SASL username. - + SASL password. diff --git a/docs/pages/api-reference/source_connectors/mongo.md b/docs/pages/api-reference/source_connectors/mongo.md index 86275265..b04470f7 100644 --- a/docs/pages/api-reference/source_connectors/mongo.md +++ b/docs/pages/api-reference/source_connectors/mongo.md @@ -19,12 +19,12 @@ The hostname of the database. The name of the Mongo database to establish a connection with. - + The username which should be used to access the database. This username should have access to the database `db_name`. - + The password associated with the username. diff --git a/docs/pages/api-reference/source_connectors/mysql.md b/docs/pages/api-reference/source_connectors/mysql.md index d2877dcb..64dac58d 100644 --- a/docs/pages/api-reference/source_connectors/mysql.md +++ b/docs/pages/api-reference/source_connectors/mysql.md @@ -23,12 +23,12 @@ The port to connect to. The name of the MySQL database to establish a connection with. - + The username which should be used to access the database. This username should have access to the database `db_name`. - + The password associated with the username. diff --git a/docs/pages/api-reference/source_connectors/postgres.md b/docs/pages/api-reference/source_connectors/postgres.md index 02af2632..05aca298 100644 --- a/docs/pages/api-reference/source_connectors/postgres.md +++ b/docs/pages/api-reference/source_connectors/postgres.md @@ -23,12 +23,12 @@ The port to connect to. The name of the Postgres database to establish a connection with. - + The username which should be used to access the database. This username should have access to the database `db_name`. - + The password associated with the username. diff --git a/docs/pages/api-reference/source_connectors/protobuf.md b/docs/pages/api-reference/source_connectors/protobuf.md index f36d1e83..491227e6 100644 --- a/docs/pages/api-reference/source_connectors/protobuf.md +++ b/docs/pages/api-reference/source_connectors/protobuf.md @@ -22,7 +22,7 @@ time. The URL where the schema registry is hosted. - + User name to access the schema registry (assuming the registry requires authentication). If user name is provided, corresponding password must also be provided. @@ -31,11 +31,11 @@ Assuming authentication is needed, either username/password must be provided or a token, but not both. - + The password associated with the username. - + Token to be used for authentication with the schema registry. Only one of username/password or token must be provided. diff --git a/docs/pages/api-reference/source_connectors/pubsub.md b/docs/pages/api-reference/source_connectors/pubsub.md index 9f8df698..debd5af9 100644 --- a/docs/pages/api-reference/source_connectors/pubsub.md +++ b/docs/pages/api-reference/source_connectors/pubsub.md @@ -15,7 +15,7 @@ A name to identify the source. The name should be unique across all Fennel conne The project ID of the Google Cloud project containing the Pub/Sub topic - + A dictionary containing the credentials for the Service Account to use to access Pub/Sub. See below for instructions on how to obtain this. diff --git a/docs/pages/api-reference/source_connectors/redshift.md b/docs/pages/api-reference/source_connectors/redshift.md index d9d80d16..f9a95721 100644 --- a/docs/pages/api-reference/source_connectors/redshift.md +++ b/docs/pages/api-reference/source_connectors/redshift.md @@ -33,12 +33,12 @@ Do not set this parameter when using username/password for authentication The name of the database where the relevant data resides. - + The username which should be used to access the database. This username should have access to the database `db_name`. Do not set this parameter when using IAM authentication - + The password associated with the username. Do not set this parameter when using IAM authentication diff --git a/docs/pages/api-reference/source_connectors/s3.md b/docs/pages/api-reference/source_connectors/s3.md index ca9e0b61..cf189c2d 100644 --- a/docs/pages/api-reference/source_connectors/s3.md +++ b/docs/pages/api-reference/source_connectors/s3.md @@ -11,12 +11,12 @@ Data connector to source data from S3. A name to identify the source. The name should be unique across all Fennel connectors. - + AWS Access Key ID. This field is not required if role-based access is used or if the bucket is public. - + AWS Secret Access Key. This field is not required if role-based access is used or if the bucket is public. diff --git a/docs/pages/api-reference/source_connectors/secret.md b/docs/pages/api-reference/source_connectors/secret.md new file mode 100644 index 00000000..0c00bed1 --- /dev/null +++ b/docs/pages/api-reference/source_connectors/secret.md @@ -0,0 +1,64 @@ +--- +title: Secret +order: 0 +status: published +--- + +### Secret +Secret can be used to pass sensitive information like username/password to Fennel using Secrets Manager secret reference. + +In order to use Secret one of the below should be followed: +1. Fennel Data access role should be given access to the secret. +2. Or a new role can be created with access to secrets needed and Fennel Data access role can be added as trusted entities for that new role. so that the new role can be assumed to access the secrets. + + +#### Parameters + + +The ARN of the secret. + + + +The Optional ARN of the role to be assumed to access the secret. +This should be provided if a new role is created for Fennel Data access role to assume. + + +

+
+```JSON message="Example Permission policy for new role"
+{
+	"Version": "2012-10-17",
+	"Statement": [
+		{
+			"Sid": "VisualEditor0",
+			"Effect": "Allow",
+			"Action": [
+				"secretsmanager:GetResourcePolicy",
+				"secretsmanager:GetSecretValue",
+				"secretsmanager:DescribeSecret",
+				"secretsmanager:ListSecretVersionIds"
+			],
+			"Resource": "arn:aws:secretsmanager:us-west-2:123456789012:secret:my-secret-name-I4hSKr"
+		}
+	]
+}
+```
+
+```JSON message="Example Trusted relationship for the new role"
+{
+    "Version": "2012-10-17",
+    "Statement": [
+        {
+            "Effect": "Allow",
+            "Principal": {
+                "AWS": [
+                    "arn:aws:iam::123456789012:role/FennelDataAccessRole"
+                ]
+            },
+            "Action": "sts:AssumeRole"
+        }
+    ]
+}
+```
diff --git a/docs/pages/api-reference/source_connectors/snowflake.md b/docs/pages/api-reference/source_connectors/snowflake.md
index 15502459..aaa2015c 100644
--- a/docs/pages/api-reference/source_connectors/snowflake.md
+++ b/docs/pages/api-reference/source_connectors/snowflake.md
@@ -38,12 +38,12 @@ The name of the database where the relevant data resides.
 The schema where the required data table(s) resides.
 
- + The username which should be used to access Snowflake. This username should have required permissions to assume the provided `role`. - + The password associated with the username. diff --git a/fennel/CHANGELOG.md b/fennel/CHANGELOG.md index e320f6e8..22324a01 100644 --- a/fennel/CHANGELOG.md +++ b/fennel/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## [1.5.46] - 2024-10-30 +- Add support for AWS Secrets Manager + ## [1.5.45] - 2024-10-30 - Remove print statement in dedup operator