Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

delete old layer code #42

Merged
merged 6 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 4 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,38 +29,26 @@ COPY testdata testdata

# Build the app binaries
RUN go vet ./... && \
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags="-w -s" -o flake cmd/flake/main.go && \
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags="-w -s" -o snowflake-datalayer cmd/snowflake-datalayer/main.go && \
CGO_ENABLED=0 GOOS=linux go build -ldflags="-w -s" -o snowflake-datalayer cmd/snowflake-datalayer/main.go && \
go test -v ./...

# enable the apps to run as any non root user
RUN chgrp 0 flake && chmod g+X flake && \
chgrp 0 snowflake-datalayer && chmod g+X snowflake-datalayer
RUN chgrp 0 snowflake-datalayer && chmod g+X snowflake-datalayer

FROM scratch

WORKDIR /root/

COPY --from=builder /usr/share/zoneinfo /usr/share/zoneinfo
COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
COPY --from=builder /app/flake .
COPY --from=builder /app/snowflake-datalayer .
COPY --from=builder /etc/passwd /etc/passwd

# server configs
ENV LOG_TYPE=json \
LOG_LEVEL=info \
SERVICE_NAME=datahub-snowflake-datalayer \
SERVICE_NAME=snowflake-datalayer \
PORT=8080 \
SNOWFLAKE_USER=<user> \
SNOWFLAKE_PASSWORD=<password> \
SNOWFLAKE_ACCOUNT=<account> \
SNOWFLAKE_DB=<db> \
SNOWFLAKE_SCHEMA=<schema> \
WELL_KNOWN=https://<auth.srv>/.well-known/jwks.json \
ISSUER=https://<auth.srv> \
AUDIENCE=https://<app.host> \
AUTHENTICATOR=jwt \
HOME=/ \
USER=5678

Expand All @@ -71,4 +59,4 @@ EXPOSE 8080
USER 5678

# default command to run the app. override command with snowflake-datalayer to use v2
CMD ["./flake", "server"]
CMD ["./snowflake-datalayer"]
42 changes: 27 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,22 +1,34 @@
define empty_config
{
"layer_config": {
"service_name": "snowflake-local",
"port": "8080"
},
"system_config": {
"memory_headroom": 100,
"snowflake_db": "db",
"snowflake_schema": "schema",
"snowflake_user": "user",
"snowflake_account": "acct",
"snowflake_warehouse": "wh",
"snowflake_private_key": "MIIBUwIBADANBgkqhkiG9w0BAQEFAASCAT0wggE5AgEAAkEAxIXbFdo7AhvdobX4F+gjkgGD3wM2zH6GhvJSnCLmKvlYPGwwX9J+xgEBPLSEH4R4zW/YFySOYxGU/DboZIpXfwIDAQABAkBKOch643cgH8hBGMrAtNQihGH7bGpZKHzFIWdkQ6YtmmBu/O5FtBNJQgsFsWnOydURrJzGoG1ezMQArNBdFUUJAiEA40p9KnnaA/NWb608yolfArKHcQJ+iXx1d2HkeVMbCSUCIQDdWHj+0VWZ00iNh5plqFov8EKNAMImYEi/1geBHcQ20wIgeaNGovG9NDoI+xEqJHYp66ahh2A/WdLKho5UGH3aTSUCIBqeDgbOk5Wo87uZR/bblOTY5pfgNHi68WSoT0S2mKbjAiBnG28oMs8D+vGKZMawf2BKbq33MjRsMJmcjmMHJqy7ow=="
rompetroll marked this conversation as resolved.
Show resolved Hide resolved
},
"dataset_definitions": []
}
endef
export empty_config

build:
go build -race -o bin/flake cmd/flake/main.go
go build -race -o bin/snowflake-datalayer ./cmd/snowflake-datalayer

.SHELL=bash
run:
bin/flake server \
--port 9090 \
--log-level debug \
--authenticator jwt \
--well-known="http://localhost:8080/jwks/.well-known/jwks.json" \
--issuer="https://api.dev.mimiro.io" \
--audience="https://api.dev.mimiro.io" \
--snowflake-account=AUONOEH.LU43266 \
--snowflake-db=DATAHUB_MIMIRO \
--snowflake-schema=DATAHUB_TEST \
--snowflake-warehouse=DATAHUB_IMPORT

docker:
docker build . -t datahub-snowflake-datalayer
mkdir -p /tmp/sfconf && echo $$empty_config > /tmp/sfconf/config.json && \
DATALAYER_CONFIG_PATH=/tmp/sfconf go run ./cmd/snowflake-datalayer

test:
go vet ./...
go test ./... -v

license:
go install github.com/google/addlicense; addlicense -c "MIMIRO AS" $(shell find . -iname "*.go")
190 changes: 82 additions & 108 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,51 +5,42 @@
## Usage

Running as from source:
```shell
go run ./cmd/flake
```

Note that the server in default mode expects to be configured for jwt auth.
Provide the following env vars to configure it:
```shell
WELL_KNOWN=https://<authservice>/jwks/.well-known/jwks.json
ISSUER=https://<issuer>
AUDIENCE=https://<audience>
go run ./cmd/snowflake-layer
```

You can run it without auth by setting the `AUTHENTICATOR=noop` env var.

The app will run on port 8080 by default. You can change this by setting the `PORT` env var.

## Connecting to Snowflake

To connect to snowflake, prepare a snowflake user for the layer. Then, follow the instructions in the [snowflake docs](https://docs.snowflake.com/en/user-guide/key-pair-auth.html) to generate a keypair for the user.
The following is the process summarized:

### Generating compatible keypairs

Generate your private key:

```shell
openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:2048 -pkeyopt rsa_keygen_pubexp:65537 | openssl pkcs8 -topk8 -nocrypt -outform der > rsa-2048-private-key.p8
```
The layer can be configured with a [common-datalayer configuration](https://github.com/mimiro-io/common-datalayer?tab=readme-ov-file#data-layer-configuration)
file. Example for the `layer_config` section:

Generate the public key:
```shell
openssl pkey -pubout -inform der -outform der -in rsa-2048-private-key.p8 -out rsa-2048-public-key.spki
```json
"layer_config": {
"service_name": "snowflake",
"port": "8080",
"config_refresh_interval": "600s",
"log_level": "trace",
"log_format": "json"
},
```

Generate base64 url encoded strings from the key files:
Additionally, the layer allows the following environment variables to override
system settings:

```shell
openssl base64 -in rsa-2048-private-key.p8 -out rsa-2048-private-key.base64.p8
openssl base64 -in rsa-2048-public-key.spki -out rsa-2048-public-key.base64.spki
MEMORY_HEADROOM=100 # reject new requests when the layer has less that this many MB free memory
SNOWFLAKE_PRIVATE_KEY=base64 encoded private key
SNOWFLAKE_USER=snowflake user
SNOWFLAKE_ACCOUNT=snowflake account
SNOWFLAKE_DB=snowflake database
SNOWFLAKE_SCHEMA=snowflake schema
SNOWFLAKE_WAREHOUSE=snowflake warehouse
```

You then need to update your user in Snowflake with the public key (not base64).
```
ALTER USER <DB username> SET RSA_PUBLIC_KEY='<paste pub key here>'
```
## Connecting to Snowflake

To connect to snowflake, prepare a snowflake user for the layer.
Then, follow the instructions in the [snowflake docs](https://docs.snowflake.com/en/user-guide/key-pair-auth.html)
to generate a key pair for the user.

When running the server, you need to provide the private key as a base64 encoded string.
It can be set in the env var `SNOWFLAKE_PRIVATE_KEY`.
Expand All @@ -75,12 +66,14 @@ a deleted state. use the `deleted` field to filter them out.

If a target table contains valid UDA entities in json format, the layer can read from the table without any configuration.
Prerequisites:
- The table must contain a column named `ENTITY` which contains the entity.
- Entities are fully expanded, i.e. no namespace prefixes are used.
- Chronology is reflected by the table's natural order. The latest entity is the last row in the table.

- The table must contain a column named `ENTITY` which contains the entity.
- Entities are fully expanded, i.e. no namespace prefixes are used.
- Chronology is reflected by the table's natural order. The latest entity is the last row in the table.

To use convention based reading, construct a dataset name in this form:
```

```sql
<database>.<schema>.<table>
```

Expand All @@ -97,50 +90,36 @@ curl http://<layerhost>/datasets/<database>.<schema>.<table>/entities
The layer uses the `dataset_definitions` part of the common-datalayer configuration to configure the datasets.
For details on the configuration options, see the [documentation](https://github.com/mimiro-io/common-datalayer#data-layer-configuration).

This configuration format can be read from a file or from an url. The layer will look
for the env var `CONFIG_LOCATION`. It will also re-read the configuration file every minute
to allow for dynamic configuration of datasets. The interval can be configured by setting whole
second values in the env var `CONFIG_LOADER_INTERVAL`.

In case a JWT secured URL is used, these configuration options to the layer can be used:
```shell
CONFIG_LOADER_CLIENT_ID
CONFIG_LOADER_CLIENT_SECRET
CONFIG_LOADER_AUDIENCE
CONFIG_LOADER_GRANT_TYPE
CONFIG_LOADER_AUTH_ENDPOINT
```

### Writing to Snowflake

To configure a dataset for writing, add a dataset definition to the configuration with the following fields:
Note that `source_config` is optional. If not provided here, the layer uses the dataset name as table,
and database and schema from the environment variables `SNOWFLAKE_DB` and `SNOWFLAKE_SCHEMA`.

```json
```javascript
{
"name": "name of the dataset (uri path)",
"source_config": {
"table_name": "name of the table in snowflake",
"schema": "name of the schema in snowflake",
"database": "name of the database in snowflake"
},
"incoming_mapping_config": {
"base_uri": "http://example.com",
"property_mappings": [{
"Custom": {
"expression": "expression to extract the value from the entity"
}, // optional, if set, the layer will use this expression to extract the value from the entity
"required": true,
"entity_property": "property name in the entity",
"property": "name of the column in the table",
"datatype": "integer", // snowflake datatype, must be compatible with the value
"is_reference": false, // if true, the value is looked up in the references part of the entity
"is_identity": false,
"is_deleted": false,
"is_recorded": false
}]
}
"name": "name of the dataset (uri path)",
"source_config": {
"table_name": "name of the table in snowflake",
"schema": "name of the schema in snowflake",
"database": "name of the database in snowflake"
},
"incoming_mapping_config": {
"base_uri": "http://example.com",
"property_mappings": [{
"Custom": {
"expression": "expression to extract the value from the entity"
}, // optional, if set, the layer will use this expression to extract the value from the entity
"required": true,
"entity_property": "property name in the entity",
"property": "name of the column in the table",
"datatype": "integer", // snowflake datatype, must be compatible with the value
"is_reference": false, // if true, the value is looked up in the references part of the entity
"is_identity": false,
"is_deleted": false,
"is_recorded": false
}]
}
}
```

Expand All @@ -161,44 +140,39 @@ If a mapping contains a custom expression, it will be applied instead of the def
This can be used to insert static values into the table, or to wrap the json-path based entity access expressions with
additional sql transformation. Possible use cases include unpacking of array values or nested entities.


### Reading from Snowflake

The layer can be configured to read from tables that do not follow the convention based reading.
To do so, create a dataset configuration for layer. The configuration is a json object with the following fields:

```json
```javascript
{
"dataset_definitions": [
{
"name": "name of the dataset (uri path)",
"source_config": {
"table_name": "name of the table in snowflake",
"schema": "name of the schema in snowflake",
"database": "name of the database in snowflake",
"raw_column": "optional name of the column containing a raw json entity"
},
"outgoing_mapping_config": { // optional, not used when a raw_column is configured
"base_uri": "http://example.com",
"constructions": [{
"property": "name",
"operation": "replace",
"args": ["arg1", "arg2", "arg3"]
}],
"property_mappings": [{
"required": true,
"entity_property": "property name in the entity",
"property": "name of the column in the table",
"datatype": "int", // conversion hint for the layer
"is_reference": false, // if true, the value is treated as a reference to another entity
"uri_value_pattern": "http://example.com/{value}",// optional, if set, the value used as string template to construct a property value
"is_identity": false,
"default_value": "default"
}],
"map_all": true
}
}
]
"dataset_definitions": [{
"name": "name of the dataset (uri path)",
"source_config": {
"table_name": "name of the table in snowflake",
"schema": "name of the schema in snowflake",
"database": "name of the database in snowflake",
"raw_column": "optional name of the column containing a raw json entity"
},
"outgoing_mapping_config": { // optional, not used when a raw_column is configured
"base_uri": "http://example.com",
"constructions": [{
"property": "name",
"operation": "replace",
"args": ["arg1", "arg2", "arg3"]
}], "property_mappings": [{
"required": true,
"entity_property": "property name in the entity",
"property": "name of the column in the table",
"datatype": "int", // conversion hint for the layer
"is_reference": false, // if true, the value is treated as a reference to another entity
"uri_value_pattern": "http://example.com/{value}", // optional, if set, the value used as string template to construct a property value
"is_identity": false,
"default_value": "default"
}],
"map_all": true
}
}]
}
```

Loading
Loading