Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deploy with Modal: fix indenting #2040

Merged
merged 2 commits into from
Nov 8, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,62 +1,64 @@
import os

import modal
from tests.pipeline.utils import assert_load_info

# @@@DLT_SNIPPET_START modal_image
# Define the Modal Image
image = modal.Image.debian_slim().pip_install(
"dlt>=1.1.0",
"dlt[duckdb]", # destination
"dlt[sql_database]", # source (MySQL)
"pymysql", # database driver for MySQL source
)
from tests.pipeline.utils import assert_load_info

app = modal.App("example-dlt", image=image)

# Modal Volume used to store the duckdb database file
vol = modal.Volume.from_name("duckdb-vol", create_if_missing=True)
# @@@DLT_SNIPPET_END modal_image
def test_modal_snippet() -> None:
# @@@DLT_SNIPPET_START modal_image
# Define the Modal Image
image = modal.Image.debian_slim().pip_install(
"dlt>=1.1.0",
"dlt[duckdb]", # destination
"dlt[sql_database]", # source (MySQL)
"pymysql", # database driver for MySQL source
)

app = modal.App("example-dlt", image=image)

# @@@DLT_SNIPPET_START modal_function
@app.function(
volumes={"/data/": vol},
schedule=modal.Period(days=1),
secrets=[modal.Secret.from_name("sql-secret")],
)
def load_tables() -> None:
import dlt
from dlt.sources.sql_database import sql_database
# Modal Volume used to store the duckdb database file
vol = modal.Volume.from_name("duckdb-vol", create_if_missing=True)
# @@@DLT_SNIPPET_END modal_image

# Define the source database credentials; in production, you would save this as a Modal Secret which can be referenced here as an environment variable
os.environ["SOURCES__SQL_DATABASE__CREDENTIALS"] = (
"mysql+pymysql://rfamro@mysql-rfam-public.ebi.ac.uk:4497/Rfam"
# @@@DLT_SNIPPET_START modal_function
@app.function(
volumes={"/data/": vol},
schedule=modal.Period(days=1),
secrets=[modal.Secret.from_name("sql-secret")],
serialized=True,
)
# Load tables "family" and "genome"
source = sql_database().with_resources("family", "genome")
def load_tables() -> None:
import dlt
from dlt.sources.sql_database import sql_database

# Create dlt pipeline object
pipeline = dlt.pipeline(
pipeline_name="sql_to_duckdb_pipeline",
destination=dlt.destinations.duckdb(
"/data/rfam.duckdb"
), # write the duckdb database file to this file location, which will get mounted to the Modal Volume
dataset_name="sql_to_duckdb_pipeline_data",
progress="log", # output progress of the pipeline
)
# Define the source database credentials; in production, you would save this as a Modal Secret which can be referenced here as an environment variable
os.environ["SOURCES__SQL_DATABASE__CREDENTIALS"] = (
"mysql+pymysql://rfamro@mysql-rfam-public.ebi.ac.uk:4497/Rfam"
)
# Load tables "family" and "genome"
source = sql_database().with_resources("family", "genome")

# Create dlt pipeline object
pipeline = dlt.pipeline(
pipeline_name="sql_to_duckdb_pipeline",
destination=dlt.destinations.duckdb(
"/data/rfam.duckdb"
), # write the duckdb database file to this file location, which will get mounted to the Modal Volume
dataset_name="sql_to_duckdb_pipeline_data",
progress="log", # output progress of the pipeline
)

# Run the pipeline
load_info = pipeline.run(source)
# Run the pipeline
load_info = pipeline.run(source)

# Print run statistics
print(load_info)
# Print run statistics
print(load_info)
# @@@DLT_SNIPPET_END modal_function

assert_load_info(load_info)
assert_load_info(load_info)


def test_modal_snippet() -> None:
import pytest
from modal.exception import ExecutionError

Expand Down
Loading