Skip to content

Commit

Permalink
Refactor entrypoints (#175)
Browse files Browse the repository at this point in the history
  • Loading branch information
vijayvammi authored Jan 7, 2025
1 parent 8c47dde commit 43d777d
Show file tree
Hide file tree
Showing 71 changed files with 2,515 additions and 2,226 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ docs/
.tox/
.scripts/
.tests/
.venv/
minikube/
1 change: 1 addition & 0 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ jobs:
fetch-depth: 0
- name: Install uv
run: curl -LsSf https://astral.sh/uv/install.sh | sh

- name: "Set up Python"
run: uv python install
- run: |
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,5 @@ cov.xml
.DS_Store

data/

minikube/
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.9
3.10
22 changes: 22 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Python 3.8 Image without Dependecies
FROM ubuntu:24.04

LABEL maintainer="vijay.vammi@astrazeneca.com"

RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ca-certificates \
curl \
&& rm -rf /var/lib/apt/lists/*

ADD https://astral.sh/uv/0.5.12/install.sh /uv-installer.sh
RUN sh /uv-installer.sh && rm /uv-installer.sh
ENV PATH="/root/.local/bin/:$PATH"

COPY . /app
WORKDIR /app

RUN uv python install && \
uv sync --index https://artifactory.astrazeneca.net/api/pypi/pypi-virtual/simple/ --frozen --all-extras

ENV PATH="/app/.venv/bin:$PATH"
File renamed without changes.
1 change: 0 additions & 1 deletion examples/01-tasks/python_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def main():
pipeline = Pipeline(steps=[hello_task])

pipeline.execute()

return pipeline


Expand Down
23 changes: 23 additions & 0 deletions examples/11-jobs/catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from examples.common.functions import write_files
from runnable import Catalog, Job, PythonTask

print("Running catalog.py")


def main():
write_catalog = Catalog(put=["df.csv", "data_folder/data.txt"])
generate_data = PythonTask(
name="generate_data",
function=write_files,
catalog=write_catalog,
)

job = Job(name="catalog", task=generate_data)

_ = job.execute()

return job


if __name__ == "__main__":
main()
5 changes: 5 additions & 0 deletions examples/11-jobs/catalog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
type: python
command: examples.common.functions.write_files
catalog:
- "*"
- data_folder/data.txt
41 changes: 41 additions & 0 deletions examples/11-jobs/k8s-job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
job-executor:
type: "k8s-job"
config:
jobSpec:
# activeDeadlineSeconds: Optional[int]
# selector: Optional[LabelSelector]
# ttlSecondsAfterFinished: Optional[int]
template:
# metadata:
# annotations: Optional[Dict[str, str]]
# generate_name: Optional[str] = run_id
# namespace: Optional[str] = "default"
spec:
# activeDeadlineSeconds: Optional[int]
# nodeSelector: Optional[Dict[str, str]]
# tolerations: Optional[List[Toleration]]
# volumes:
# - name: str
# hostPath:
# path: str
# serviceAccountName: Optional[str]
# restartPolicy: Optional[str] = Choose from [Always, OnFailure, Never]
container:
# command: List[str]
# env:
# - name: str
# value: str
image: runnable-m1
# imagePullPolicy: Optional[str] = choose from [Always, Never, IfNotPresent]
# resources:
# limits:
# cpu: str
# memory: str
# gpu: str
# requests:
# cpu: str
# memory: str
# gpu: str
# volumeMounts:
# - name: str
# mountPath: str
4 changes: 4 additions & 0 deletions examples/11-jobs/local-container.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
job-executor:
type: "local-container" # (1)
config:
docker_image: runnable-m1:latest # (2)
3 changes: 3 additions & 0 deletions examples/11-jobs/notebook.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# name: notebook job
type: notebook
command: examples/common/simple_notebook.ipynb # The path is relative to the root of the project.
47 changes: 47 additions & 0 deletions examples/11-jobs/passing_parameters_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
The below example shows how to set/get parameters in python
tasks of the pipeline.
The function, set_parameter, returns
- JSON serializable types
- pydantic models
- pandas dataframe, any "object" type
pydantic models are implicitly handled by runnable
but "object" types should be marked as "pickled".
Use pickled even for python data types is advised for
reasonably large collections.
Run the below example as:
python examples/03-parameters/passing_parameters_python.py
"""

from examples.common.functions import write_parameter
from runnable import Job, PythonTask, metric, pickled


def main():
write_parameters = PythonTask(
function=write_parameter,
returns=[
pickled("df"),
"integer",
"floater",
"stringer",
"pydantic_param",
metric("score"),
],
name="set_parameter",
)

job = Job(name="set_parameters", task=write_parameters)

job.execute()

return job


if __name__ == "__main__":
main()
11 changes: 11 additions & 0 deletions examples/11-jobs/python_parameters.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
type: python
command: examples.common.functions.write_parameter
returns:
- name: df
kind: object
- name: integer
- name: floater
- name: stringer
- name: pydantic_param
- name: score
kind: metric
44 changes: 44 additions & 0 deletions examples/11-jobs/python_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
You can execute this pipeline by:
python examples/01-tasks/python_tasks.py
The stdout of "Hello World!" would be captured as execution
log and stored in the catalog.
An example of the catalog structure:
.catalog
└── baked-heyrovsky-0602
└── hello.execution.log
2 directories, 1 file
The hello.execution.log has the captured stdout of "Hello World!".
"""

from examples.common.functions import hello
from runnable import Job, PythonTask


def main():
# Create a tasks which calls the function "hello"
# If this step executes successfully,
# the pipeline will terminate with success
hello_task = PythonTask(
name="hello",
function=hello,
terminate_with_success=True,
)

# The pipeline has only one step.
job = Job(name="hello", task=hello_task)

job.execute()

return job


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions examples/11-jobs/python_tasks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
type: python
command: examples.common.functions.hello # dotted path to the function.
20 changes: 20 additions & 0 deletions examples/11-jobs/scripts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
dag:
description: |
This is a sample pipeline with one step that
executes a shell command.
You can run this pipeline by:
runnable execute -f examples/01-tasks/scripts.yaml
For example:
.catalog
└── seasoned-perlman-1355
└── hello.execution.log
start_at: shell
steps:
shell:
type: task
command_type: shell
command: echo "hello world!!" # The path is relative to the root of the project.
next: success
14 changes: 7 additions & 7 deletions examples/common/simple_notebook_out.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "bd34d156",
"id": "215adf58",
"metadata": {
"ploomber": {
"timestamp_end": 1714453073.951735,
"timestamp_start": 1714453073.951505
"timestamp_end": 1735514269.76332,
"timestamp_start": 1735514269.76314
},
"tags": [
"injected-parameters"
Expand All @@ -24,8 +24,8 @@
"id": "3e98e89e-765c-42d4-81ea-c371c2eab14d",
"metadata": {
"ploomber": {
"timestamp_end": 1714453073.951955,
"timestamp_start": 1714453073.95176
"timestamp_end": 1735514269.763565,
"timestamp_start": 1735514269.763376
}
},
"outputs": [],
Expand All @@ -40,8 +40,8 @@
"id": "8eac7a3f",
"metadata": {
"ploomber": {
"timestamp_end": 1714453073.952089,
"timestamp_start": 1714453073.951969
"timestamp_end": 1735514269.763689,
"timestamp_start": 1735514269.763579
}
},
"outputs": [
Expand Down
12 changes: 6 additions & 6 deletions examples/configs/argo-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
executor:
pipeline-executor:
type: "argo" # (1)
config:
image: harbor.csis.astrazeneca.net/mlops/runnable:latest # (2)
Expand All @@ -7,12 +7,12 @@ executor:
- name: magnus-volume
mount_path: /mnt

run_log_store: # (4)
run-log-store: # (4)
type: chunked-fs
config:
log_folder: /mnt/run_log_store
# config:
# log_folder: /mnt/run_log_store

catalog:
type: file-system
config:
catalog_location: /mnt/catalog
# config:
# catalog_location: /mnt/catalog
2 changes: 1 addition & 1 deletion examples/configs/chunked-fs-run_log.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
run_log_store:
run-log-store:
type: chunked-fs
4 changes: 2 additions & 2 deletions examples/configs/default.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
executor:
pipeline-executor:
type: local # (1)

run_log_store:
run-log-store:
type: buffered # (2)

catalog:
Expand Down
6 changes: 3 additions & 3 deletions examples/configs/local-container.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
executor:
pipeline-executor:
type: "local-container" # (1)
config:
docker_image: runnable:latest # (2)
docker_image: runnable-m1:latest # (2)

run_log_store: # (4)
run-log-store: # (4)
type: chunked-fs
4 changes: 2 additions & 2 deletions examples/configs/mocked-config-debug.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
catalog:
type: file-system # (1)

run_log_store:
run-log-store:
type: file-system # (1)

executor:
pipeline-executor:
type: mocked
config:
patches:
Expand Down
4 changes: 2 additions & 2 deletions examples/configs/mocked-config-simple.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
catalog:
type: file-system # (1)

run_log_store:
run-log-store:
type: file-system # (1)

executor:
pipeline-executor:
type: mocked
4 changes: 2 additions & 2 deletions examples/configs/mocked-config-unittest.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
catalog:
type: file-system # (1)

run_log_store:
run-log-store:
type: file-system # (1)

executor:
pipeline-executor:
type: mocked
config:
patches:
Expand Down
4 changes: 2 additions & 2 deletions examples/configs/mocked-config.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
catalog:
type: file-system # (1)

run_log_store:
run-log-store:
type: file-system # (1)

executor:
pipeline-executor:
type: mocked
config:
patches:
Expand Down
Loading

0 comments on commit 43d777d

Please sign in to comment.