Skip to content

Commit

Permalink
adds tests to build containers, removes psutil
Browse files Browse the repository at this point in the history
  • Loading branch information
rudolfix committed Jun 2, 2023
1 parent 2022f4c commit c5cbece
Show file tree
Hide file tree
Showing 10 changed files with 210 additions and 128 deletions.
8 changes: 7 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,11 @@ __pycache__
.egg-info
_storage
_test_storage
.vscode
Dockerfile
.md
.md
_secrets
docs
tests
logs
experiments
37 changes: 37 additions & 0 deletions .github/workflows/test_build_images.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: test build docker images

on:
pull_request:
branches:
- master
- devel
workflow_dispatch:

jobs:
get_docs_changes:
uses: ./.github/workflows/get_docs_changes.yml

run_airflow:
name: Build alpine and airflow images
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
runs-on: ubuntu-latest

steps:
- name: Check out
uses: actions/checkout@master

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.10.x"

- name: Install Poetry
uses: snok/install-poetry@v1.3.2
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true

- name: Build images
run: make test-build-images
10 changes: 6 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,15 @@ reset-test-storage:
mkdir _storage
python3 tests/tools/create_storages.py

recreate-compiled-deps:
poetry export -f requirements.txt --output _gen_requirements.txt --without-hashes --extras gcp --extras redshift
grep `cat compiled_packages.txt` _gen_requirements.txt > compiled_requirements.txt

build-library: dev
poetry version
poetry build

publish-library: build-library
poetry publish

test-build-images: build-library
poetry export -f requirements.txt --output _gen_requirements.txt --without-hashes --extras gcp --extras redshift
grep `cat compiled_packages.txt` _gen_requirements.txt > compiled_requirements.txt
docker build -f deploy/dlt/Dockerfile.airflow --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" .
docker build -f deploy/dlt/Dockerfile --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" .
6 changes: 0 additions & 6 deletions compiled_requirements.txt

This file was deleted.

2 changes: 1 addition & 1 deletion deploy/dlt/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ ENV IMAGE_VERSION=${IMAGE_VERSION}

# install exactly the same version of the library we used to build
COPY dist/dlt-${IMAGE_VERSION}.tar.gz .
RUN pip3 install /tmp/pydlt/dlt-${IMAGE_VERSION}.tar.gz[gcp,redshift]
RUN pip3 install /tmp/pydlt/dlt-${IMAGE_VERSION}.tar.gz[gcp,redshift,duckdb]

WORKDIR /
RUN rm -r /tmp/pydlt
27 changes: 27 additions & 0 deletions deploy/dlt/Dockerfile.airflow
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
FROM apache/airflow:2.5.2-python3.8

# Metadata
LABEL org.label-schema.vendor="dltHub" \
org.label-schema.url="https://dlthub.com" \
org.label-schema.name="dlt" \
org.label-schema.description="**data load tool (dlt)** is a simple, open source Python library that makes data loading easy."

# prepare dirs to install dlt
RUN mkdir -p /tmp/pydlt

WORKDIR /tmp/pydlt

# add build labels and envs
ARG COMMIT_SHA=""
ARG IMAGE_VERSION=""
LABEL commit_sha = ${COMMIT_SHA}
LABEL version=${IMAGE_VERSION}
ENV COMMIT_SHA=${COMMIT_SHA}
ENV IMAGE_VERSION=${IMAGE_VERSION}

# install exactly the same version of the library we used to build
COPY dist/dlt-${IMAGE_VERSION}.tar.gz .
RUN pip3 install /tmp/pydlt/dlt-${IMAGE_VERSION}.tar.gz[gcp,redshift,duckdb]

WORKDIR /
RUN rm -r /tmp/pydlt
2 changes: 1 addition & 1 deletion dlt/common/data_writers/buffered.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def _rotate_file(self) -> None:

def _flush_items(self) -> None:
if len(self._buffered_items) > 0:
# we only open a writer when there are any files in the buffer and first flush is requested
# we only open a writer when there are any items in the buffer and first flush is requested
if not self._writer:
# create new writer and write header
if self._file_format_spec.is_binary_format:
Expand Down
16 changes: 13 additions & 3 deletions dlt/common/runtime/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import sys
import logging
import time
import psutil
from abc import ABC, abstractmethod
from collections import defaultdict
from typing import Any, ContextManager, Dict, Type, TYPE_CHECKING, DefaultDict, NamedTuple, Optional, Union, TextIO, TypeVar
Expand Down Expand Up @@ -120,6 +119,12 @@ def __init__(self, log_period: float = 1.0, logger: Union[logging.Logger, TextIO
self.counters: DefaultDict[str, int] = None
self.counter_info: Dict[str, LogCollector.CounterInfo] = None
self.messages: Dict[str, Optional[str]] = None
if dump_system_stats:
try:
import psutil
except ImportError:
self._log(logging.WARNING, "psutil dependency is not installed and mem stats will not be available. add psutil to your environment or pass dump_system_stats argument as False to disable warning.")
dump_system_stats = False
self.dump_system_stats = dump_system_stats
self.last_log_time: float = None

Expand Down Expand Up @@ -168,20 +173,25 @@ def dump_counters(self) -> None:
log_lines.append(counter_line.strip())

if self.dump_system_stats:
import psutil

process = psutil.Process(os.getpid())
mem_info = process.memory_info()
current_mem = mem_info.rss / (1024 ** 2) # Convert to MB
mem_percent = psutil.virtual_memory().percent
cpu_percent = process.cpu_percent()

log_lines.append(f"Memory usage: {current_mem:.2f} MB ({mem_percent:.2f}%) | CPU usage: {cpu_percent:.2f}%")

log_lines.append("")
log_message = "\n".join(log_lines)
if not self.logger:
# try to attach dlt logger
self.logger = dlt_logger.LOGGER
self._log(self.log_level, log_message)

def _log(self, log_level: int, log_message: str) -> None:
if isinstance(self.logger, logging.Logger):
self.logger.log(self.log_level, log_message)
self.logger.log(log_level, log_message)
else:
print(log_message, file=self.logger or sys.stdout)

Expand Down
Loading

0 comments on commit c5cbece

Please sign in to comment.