From 183be9e82632287c35081c4eefd81ff99d4bd4ba Mon Sep 17 00:00:00 2001 From: John Pruitt Date: Mon, 28 Oct 2024 16:08:19 -0500 Subject: [PATCH] chore: separate the dev/test/build between the projects --- .github/workflows/ci.yml | 68 ++-- DEVELOPMENT.md | 181 +++++---- .../extension/.dockerignore | 3 +- Dockerfile => projects/extension/Dockerfile | 4 +- Makefile => projects/extension/Makefile | 59 +-- projects/extension/ai/__init__.py | 2 +- build.py => projects/extension/build.py | 342 +++++------------- .../extension/requirements-dev.txt | 3 +- projects/extension/requirements-test.txt | 9 + projects/extension/sql/ai.control | 2 +- .../extension/tests/contents/test_contents.py | 4 +- .../tests/dump_restore/test_dump_restore.py | 12 +- .../tests/privileges/test_privileges.py | 4 +- projects/extension/tests/test_ollama.py | 4 +- .../tests/vectorizer_tool/__init__.py | 0 .../tests/vectorizer_tool/test_cli.py | 173 --------- projects/pgai/.dockerignore | 5 + projects/pgai/Makefile | 92 +++++ projects/pgai/requirements-dev.txt | 6 + .../pgai/requirements-test.txt | 2 - 20 files changed, 386 insertions(+), 589 deletions(-) rename .dockerignore => projects/extension/.dockerignore (62%) rename Dockerfile => projects/extension/Dockerfile (92%) rename Makefile => projects/extension/Makefile (58%) rename build.py => projects/extension/build.py (66%) rename requirements-dev.txt => projects/extension/requirements-dev.txt (76%) create mode 100644 projects/extension/requirements-test.txt delete mode 100644 projects/extension/tests/vectorizer_tool/__init__.py delete mode 100644 projects/extension/tests/vectorizer_tool/test_cli.py create mode 100644 projects/pgai/Makefile create mode 100644 projects/pgai/requirements-dev.txt rename requirements-test.txt => projects/pgai/requirements-test.txt (90%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 05b3d56b..1d438d56 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,7 +5,7 @@ permissions: contents: read jobs: - build-and-test: + build-and-test-extension: runs-on: ubuntu-latest steps: @@ -17,49 +17,75 @@ jobs: python-version: "3.10" cache: "pip" # caching pip dependencies - - run: pip install -r requirements-dev.txt - - - name: Build the pgai distributable and check artifacts - run: make build-vec - - name: Verify Docker installation run: | docker --version docker info - name: Build Docker image + working-directory: ./projects/extension run: make docker-build - name: Run Docker container + working-directory: ./projects/extension run: make docker-run - name: Build - run: docker exec pgai make build + run: docker exec pgai-ext make build - name: Lint SQL and Python - run: docker exec pgai make lint + run: docker exec pgai-ext make lint - name: Check Python Formatting - run: docker exec pgai make format-py + run: docker exec pgai-ext make format-py - - name: Install extension and vectorizer - run: | - docker exec pgai make install - docker exec pgai make install-vec + - name: Install extension + run: docker exec pgai-ext make install - name: Run test server - run: docker exec -d pgai make test-server + run: docker exec -d pgai-ext make test-server - name: Run tests - run: docker exec pgai make test - - - name: Run vectorizer tests - run: make test-vectorizer - - - name: Run type check - run: make type-check-py + run: docker exec pgai-ext make test - name: Stop and remove Docker container + working-directory: ./projects/extension run: | make docker-stop make docker-rm + + build-and-test-pgai: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: "pip" # caching pip dependencies + + - name: Install dev/test dependencies + working-directory: ./projects/pgai + run: pip install -r requirements-dev.txt + + - name: Lint + working-directory: ./projects/pgai + run: make lint + + - name: Check Typing + working-directory: ./projects/pgai + run: make type-check + + - name: Check Formatting + working-directory: ./projects/pgai + run: make format + + - name: Run Tests + working-directory: ./projects/pgai + run: make test + + - name: Build the pgai distributable and check artifacts + working-directory: ./projects/pgai + run: make build diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index b64ddfe4..df82ca0c 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -1,15 +1,21 @@ # Develop and test changes to pgai pgai brings embedding and generation AI models closer to the database. Want to contribute to the pgai project? -Start here. This page shows you: +Start here. -- [The pgai development workflow](#the-pgai-development-workflow): build, run and test pgai in a Docker container. -- [How to test pgai](#test-pgai): use the psql script to test your changes to pgai. -- [The pgai build architecture](#the-pgai-build-architecture): work with multiple versions of pgai in the same environment. +This project is organized as a monorepo with two distributable bodies of code: + +1. the pgai Postgres extension is located in [projects/extension](./projects/extension) +2. the [pgai python library/cli](https://pypi.org/project/pgai/) is located in [projects/pgai](./projects/pgai) + +This page shows you: + +- [How to work on the pgai extension](#working-on-the-pgai-extension) +- [How to work on the pgai library](#working-on-the-pgai-library) ## PRs and commits -The project uses [conventional commits][conventional-commits]. It's enforce by +The project uses [conventional commits][conventional-commits]. It's enforced by CI, you won't be able to merge PRs if your commits do not comply. This helps us automate the release process, changelog generation, etc. @@ -17,10 +23,18 @@ If you don't want to wait for the CI to get feedback on you commit. You can install the git hook that checks your commits locally. To do so, run: ```bash -make install-git-hooks +cd projects/pgai +make install-commit-hook ``` -## pgai Prerequisites +## Working on the pgai extension + +- [pgai extension development prerequisites](#pgai-extension-development-prerequisites) +- [The pgai extension development workflow](#the-pgai-extension-development-workflow) +- [Controlling pgai extension tests](#controlling-pgai-extension-tests) +- [The pgai extension architecture](#the-pgai-extension-architecture) + +### pgai extension development prerequisites To make changes to the pgai extension, do the following in your developer environment: @@ -42,11 +56,11 @@ To make changes to the pgai extension, do the following in your developer enviro ollama pull llava:7b ``` -## The pgai development workflow +### The pgai extension development workflow To make changes to pgai: -1. Navigate to directory where you cloned the repo. +1. Navigate to `projects/extension` in the directory where you cloned the repo. 2. Build the docker image ```bash make docker-build @@ -55,7 +69,7 @@ To make changes to pgai: ```bash make docker-run ``` - The repo directory is mounted to `/pgai` in the running container. + The `projects/extension` directory is mounted to `/pgai` in the running container. 4. Work inside the container: * **Docker shell**: @@ -66,25 +80,31 @@ To make changes to pgai: ``` You are logged in as root. - 2. Install the extension + 2. Build and Install the extension ```bash + make build make install ``` 3. Run the unit tests - + + First run the test-server in a second shell + ```bash + make test-server + ``` + Then, run the tests in the first shell ```bash make test ``` - 4. Clean build artifacts + 5. Clean build artifacts ```bash make clean ``` - 5. Uninstall the extension + 6. Uninstall the extension ```bash make uninstall @@ -107,71 +127,60 @@ To make changes to pgai: ``` -## Test pgai +### Controlling pgai extension tests -The [tests](./tests) directory contains the unit tests. +The [projects/extension/tests](./projects/extension/tests) directory contains the unit tests. -To set up the pgai tests: +To set up the tests: -1. In a [.env](https://saurabh-kumar.com/python-dotenv/) file, add the variables associated with the component(s) you want to test: +1. In a [.env](https://saurabh-kumar.com/python-dotenv/) file, use the following flags to enable/disable test suites + ```text + # enable/disable tests + ENABLE_OPENAI_TESTS=1 + ENABLE_OLLAMA_TESTS=1 + ENABLE_ANTHROPIC_TESTS=1 + ENABLE_COHERE_TESTS=1 + ENABLE_VECTORIZER_TESTS=1 + ENABLE_DUMP_RESTORE_TESTS=1 + ENABLE_PRIVILEGES_TESTS=1 + ENABLE_CONTENTS_TESTS=1 + ENABLE_SECRETS_TESTS=1 + ``` + +2. Some tests require extra environment variables to be added to the .env file - **OpenAI**: - - ENABLE_OPENAI_TESTS - set to `1` to enable OpenAI unit tests. - OPENAI_API_KEY - an [OpenAI API Key](https://platform.openai.com/api-keys) for OpenAI unit testing. - **Ollama**: - - ENABLE_OLLAMA_TESTS - set to `1` to enable Ollama unit tests. - OLLAMA_HOST - the URL to the Ollama instance to use for testing. For example, `http://host.docker.internal:11434`. - **Anthropic**: - - ENABLE_ANTHROPIC_TESTS - set to `1` to enable Anthropic unit tests. - ANTHROPIC_API_KEY - an [Anthropic API Key](https://docs.anthropic.com/en/docs/quickstart#set-your-api-key) for Anthropic unit testing. - **Cohere**: - - ENABLE_COHERE_TESTS - set to `1` to enable Cohere unit tests. - COHERE_API_KEY - a [Cohere API Key](https://docs.cohere.com/docs/rate-limits) for Cohere unit testing. -2. If you have made changes to the source, from a Docker shell, install the extension: - ```bash - make docker-shell - make install - ``` - You are in a Docker shell. - -3. Run the tests - - ```bash - make test - ``` - - This runs pytest against the unit tests in the [./tests](./tests) directory: - 1. Drops the `test` database. - 2. Creates the `test` database. - 3. Creates the `test` database user. - 4. Runs the tests against the `test` database. - 5. The `test` database and `test` user are left after the tests run for debugging -Best practice is to add new tests when you commit new functionality. +### The pgai extension architecture -## The pgai architecture - -pgai consists of [SQL](./sql) scripts and a [Python](./src) package. +pgai consists of [SQL](./projects/extension/sql) scripts and a [Python](./projects/extension/ai) package. * [Develop SQL in pgai](#develop-sql-in-pgai) * [Develop Python in pgai](#develop-python-in-pgai) * [Versions prior to 0.4.0](#versions-prior-to-040): -### Develop SQL in pgai +#### Develop SQL in the pgai extension -SQL code used by pgai is maintained in [./sql](./sql). +SQL code used by pgai is maintained in [./projects/extension/sql](./projects/extension/sql). The SQL is organized into: -* **Idempotent scripts**: maintained in [./sql/idempotent](./sql/idempotent). +* **Idempotent scripts**: maintained in [./projects/extension/sql/idempotent](./projects/extension/sql/idempotent). Idempotent scripts consist of `CREATE OR REPLACE` style statements, usually as functions. They are executed in alphanumeric order every time you install or upgrade pgai. In general, it is safe to rename these scripts from one version to the next. -* **Incremental scripts**: maintained in [./sql/incremental](./sql/incremental). +* **Incremental scripts**: maintained in [./projects/extension/sql/incremental](./projects/extension/sql/incremental). Incremental files create tables and other stateful-structures that should not be dropped when you upgrade from one version to another. Each incremental script @@ -181,47 +190,46 @@ The SQL is organized into: Incremental scripts are executed in alphanumeric order on file name. Once an incremental script is published in a release, you must not rename it. To facilitate migration, each incremental file is - [wrapped](./sql/migration.sql). Each migration id is tracked in the `migration` table. For more information, - see [./sql/head.sql](./sql/head.sql). + [wrapped](./projects/extension/sql/migration.sql). Each migration id is tracked in the `migration` table. For more information, + see [./projects/extension/sql/head.sql](./projects/extension/sql/head.sql). -* **Built scripts**: `./sql/ai--*.sql` +* **Built scripts**: `./projects/extension/sql/ai--*.sql` - `make build-sql` "compiles" the idempotent and incremental scripts into the final + `make build` "compiles" the idempotent and incremental scripts into the final form that is installed into a postgres environment as an extension. A script - named `./sql/ai--.sql` is built. For every prior version + named `./projects/extension/sql/ai--.sql` is built. For every prior version (other than 0.1.0, 0.2.0, and 0.3.0), the file is copied to - `./sql/ai----.sql` to give postgres an upgrade - path from prior versions. The `./sql/ai.control` is also ensured to have the + `./projects/extension/sql/ai----.sql` to give postgres an upgrade + path from prior versions. The `./projects/extension/sql/ai.control` is also ensured to have the correct version listed in it. - When you release a new version, add the `./sql/ai--*.sql` scripts to this repo with your + When you release a new version, add the `./projects/extension/sql/ai--*.sql` scripts to this repo with your pull request. The scripts from prior versions are checked in and should not be modified after having been released. If you are exclusively working on SQL, you may want to forego the high-level make targets in favor of the SQL-specific make targets: -1. **Clean your environment**: run `make clean-sql` to delete `./sql/ai--*.sql`. - - The `` is defined in `versions()` in [./build.py](./build.py). +1. **Clean your environment**: run `make clean-sql` to delete `./projects/extension/sql/ai--*.sql`. -1. **Build pgai**: run `make build-sql` to compile idempotent and incremental scripts - into `./sql/ai--*.sql`. -1. **Install pgai**: run `make install-sql` to install `./sql/ai--*.sql` and `./sql/ai*.control` into your local - environment. + The `` is defined in `versions()` in [./projects/extension/build.py](./projects/extension/build.py). +1. **Build pgai**: run `make build` to compile idempotent and incremental scripts + into `./projects/extension/sql/ai--*.sql`. +1. **Install pgai**: run `make install-sql` to install `./projects/extension/sql/ai--*.sql` and `./projects/extension/sql/ai*.control` into your local + Postgres environment. -### Develop Python in pgai +#### Develop Python in the pgai extension -Python code used by the pgai is maintained in [./src](./src). +Python code used by the pgai extension is maintained in [./projects/extension/ai](./projects/extension/ai). Database functions written in [plpython3u](https://www.postgresql.org/docs/current/plpython.html) can import the modules in this package and any dependencies specified in -[./src/pyproject.toml](./src/pyproject.toml). Including the following line at the -beginning of the database function body will allow you to import. The -build process replaces this comment line with Python code that makes this -possible. Note that the leading four spaces are required. +[./projects/extension/pyproject.toml](./projects/extension/pyproject.toml). +Including the following line at the beginning of the database function body will +allow you to import. The build process replaces this comment line with Python +code that makes this possible. Note that the leading four spaces are required. ```python #ADD-PYTHON-LIB-DIR @@ -242,9 +250,40 @@ targets in favor of the Python-specific make targets: `/usr/local/lib/pgai`. -### Versions prior to 0.4.0 +#### Versions prior to 0.4.0 Prior to pgai v0.4.0, Python dependencies were installed system-wide. Until pgai versions 0.1 - 0.3 are deprecated [old dependencies](./src/old_requirements.txt) are installed system-wide. + +## Working on the pgai library + +The experience of working on the pgai library is like developing most Python +libraries and applications. Use the [requirements-dev.txt](./projects/pgai/requirements-dev.txt) +file to create a virtual env for development. + +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements-dev.txt +``` + +Use the `help` target of the [Makefile](./projects/pgai/Makefile) to see what +commands are available. + +```bash +make help +``` + +Be sure to add unit tests to the [tests](./projects/pgai/tests) directory when +you add or modify code. Use the following commands to check your work before +submitting a PR. + +```bash +make test +make lint +make format +make type-check +``` + [conventional-commits]: https://www.conventionalcommits.org/en/v1.0.0/ diff --git a/.dockerignore b/projects/extension/.dockerignore similarity index 62% rename from .dockerignore rename to projects/extension/.dockerignore index 3e886ee5..319d2622 100644 --- a/.dockerignore +++ b/projects/extension/.dockerignore @@ -1,4 +1,5 @@ .env .git .github -.idea \ No newline at end of file +.idea +.venv \ No newline at end of file diff --git a/Dockerfile b/projects/extension/Dockerfile similarity index 92% rename from Dockerfile rename to projects/extension/Dockerfile index fc0371de..a8a8cf65 100644 --- a/Dockerfile +++ b/projects/extension/Dockerfile @@ -58,12 +58,10 @@ RUN set -eux; \ pip install /build/pgspot; \ rm -rf /build/pgspot -# install our dev/test python dependencies +# install our test python dependencies ENV PIP_BREAK_SYSTEM_PACKAGES=1 COPY requirements-test.txt /build/requirements-test.txt RUN pip install -r /build/requirements-test.txt -COPY projects/pgai/requirements.txt /build/requirements-pgai.txt -RUN pip install -r /build/requirements-pgai.txt RUN rm -r /build WORKDIR /pgai diff --git a/Makefile b/projects/extension/Makefile similarity index 58% rename from Makefile rename to projects/extension/Makefile index c456d8b3..a4c9dd93 100644 --- a/Makefile +++ b/projects/extension/Makefile @@ -22,10 +22,6 @@ clean-sql: clean-py: @./build.py clean-py -.PHONY: clean-vec -clean-vec: - @./build.py clean-vec - .PHONY: build build: @PG_BIN=$(PG_BIN) ./build.py build @@ -50,14 +46,6 @@ install-prior-py: install-py: @./build.py install-py -.PHONY: install-vec -install-vec: - @./build.py install-vec - -.PHONY: build-vec -build-vec: - @./build.py build-vec - .PHONY: uninstall uninstall: @PG_BIN=$(PG_BIN) ./build.py uninstall @@ -70,10 +58,6 @@ uninstall-sql: uninstall-py: @./build.py uninstall-py -.PHONY: uninstall-vec -uninstall-vec: - @./build.py uninstall-vec - .PHONY: build-sql build-sql: @./build.py build-sql @@ -82,14 +66,6 @@ build-sql: test-server: @./build.py test-server -.PHONY: vectorizer -vectorizer: - @./build.py vectorizer - -.PHONY: test-vectorizer -test-vectorizer: - @cd projects/pgai && pytest - .PHONY: test test: @./build.py test @@ -99,17 +75,13 @@ lint-sql: @./build.py lint-sql .PHONY: lint-py -lint-py: type-check-py +lint-py: @./build.py lint-py .PHONY: lint -lint: type-check-py +lint: @./build.py lint -.PHONY: type-check-py -type-check-py: - @./build.py type-check-py - .PHONY: format-py format-py: @./build.py format-py @@ -118,48 +90,27 @@ format-py: docker-build: @PG_MAJOR=$(PG_MAJOR) ./build.py docker-build -.PHONY: docker-build-vec -docker-build-vec: - @./build.py docker-build-vec - .PHONY: docker-run docker-run: @./build.py docker-run -.PHONY: docker-run-vec -docker-run-vec: - @./build.py docker-run-vec - .PHONY: docker-stop docker-stop: @./build.py docker-stop -.PHONY: docker-stop-vec -docker-stop-vec: - @./build.py docker-stop-vec - .PHONY: docker-rm docker-rm: @./build.py docker-rm -.PHONY: docker-rm-vec -docker-rm-vec: - @./build.py docker-rm-vec - .PHONY: run run: @PG_MAJOR=$(PG_MAJOR) PG_BIN=$(PG_BIN) ./build.py run - @docker exec -it -u postgres pgai /bin/bash -c "set -e; if [ -f .env ]; then set -a; source .env; set +a; fi; psql" + @docker exec -it -u postgres pgai-ext /bin/bash -c "set -e; if [ -f .env ]; then set -a; source .env; set +a; fi; psql" .PHONY: docker-shell docker-shell: - @docker exec -it -u root pgai /bin/bash + @docker exec -it -u root pgai-ext /bin/bash .PHONY: psql-shell psql-shell: - @docker exec -it -u postgres pgai /bin/bash -c "set -e; if [ -f .env ]; then set -a; source .env; set +a; fi; psql" - -.PHONY: install-commit-hook -install-commit-hook: - @curl --fail -o .git/hooks/commit-msg https://raw.githubusercontent.com/hazcod/semantic-commit-hook/master/commit-msg \ - && chmod 500 .git/hooks/commit-msg + @docker exec -it -u postgres pgai-ext /bin/bash -c "set -e; if [ -f .env ]; then set -a; source .env; set +a; fi; psql" diff --git a/projects/extension/ai/__init__.py b/projects/extension/ai/__init__.py index 6a9beea8..3d26edf7 100644 --- a/projects/extension/ai/__init__.py +++ b/projects/extension/ai/__init__.py @@ -1 +1 @@ -__version__ = "0.4.0" +__version__ = "0.4.1" diff --git a/build.py b/projects/extension/build.py similarity index 66% rename from build.py rename to projects/extension/build.py index ae8fef56..196e10fa 100755 --- a/build.py +++ b/projects/extension/build.py @@ -1,13 +1,14 @@ #!/usr/bin/env python3 -import os import platform import re +import os import shutil import subprocess import sys import tempfile from pathlib import Path + HELP = """Available targets: - help displays this message and exits - build-install runs build followed by install @@ -15,20 +16,16 @@ - install-sql installs the sql files into the postgres installation - install-prior-py installs the extension's python package for prior versions - install-py installs the extension's python package -- install-vec installs the vectorizer python tool - uninstall uninstalls the project - uninstall-sql removes the sql extension from the postgres installation - uninstall-py removes the extension's python package from the system -- uninstall-vec removes the vectorizer python tool from the system - build alias for build-sql - build-sql constructs the sql files for the extension - clean removes python build artifacts from the src dir - clean-sql removes sql file artifacts from the sql dir - clean-py removes python build artifacts from the extension src dir -- clean-vec removes python build artifacts from the vectorizer src dir - test runs the tests in the docker container - test-server runs the test http server in the docker container -- vectorizer runs the vectorizer python tool in the docker container - lint-sql runs pgspot against the `ai--.sql` file - lint-py runs ruff linter against the python source files - lint runs both sql and python linters @@ -37,15 +34,12 @@ - docker-run launches a container in docker using the docker image - docker-stop stops the container - docker-rm deletes the dev container -- docker-build-vec builds the docker image for the vectorizer tool -- docker-run-vec runs a docker container for the vectorizer tool -- docker-stop-vec stops the docker container for the vectorizer tool -- docker-rm-vec deletes the docker container for the vectorizer tool - run builds+runs the dev container and installs the extension""" def versions() -> list[str]: return [ + "0.4.1", "0.4.0", "0.3.0", "0.2.0", @@ -61,36 +55,34 @@ def prior_versions() -> list[str]: return versions()[1:] if len(versions()) > 1 else [] -def pg_major() -> str | None: - return os.getenv("PG_MAJOR") - - -def root_dir() -> Path: - return Path(__file__).resolve().parent - - -def sql_dir() -> Path: - return project_extension_dir().joinpath("sql").resolve() +def parse_version(version: str) -> tuple[int, int, int, str | None]: + parts = re.split(r"[.-]", version, 4) + return ( + int(parts[0]), + int(parts[1]), + int(parts[2]), + parts[3] if len(parts) > 3 else None, + ) -def projects_dir() -> Path: - return root_dir().joinpath("projects").resolve() +def git_tag(version: str) -> str: + return f"extension-{version}" -def project_extension_dir() -> Path: - return projects_dir().joinpath("extension").resolve() +def pg_major() -> str | None: + return os.getenv("PG_MAJOR") -def project_pgai_dir() -> Path: - return projects_dir().joinpath("pgai").resolve() +def ext_dir() -> Path: + return Path(__file__).resolve().parent -def incremental_sql_dir() -> Path: - return sql_dir().joinpath("incremental") +def sql_dir() -> Path: + return ext_dir() / "sql" def idempotent_sql_dir() -> Path: - return sql_dir().joinpath("idempotent") + return sql_dir() / "idempotent" def idempotent_sql_files() -> list[Path]: @@ -112,6 +104,10 @@ def check_idempotent_sql_files(paths: list[Path]) -> None: prev = this +def incremental_sql_dir() -> Path: + return sql_dir() / "incremental" + + def incremental_sql_files() -> list[Path]: paths = [x for x in incremental_sql_dir().glob("*.sql")] paths.sort() @@ -132,33 +128,30 @@ def check_incremental_sql_files(paths: list[Path]) -> None: def output_sql_file() -> Path: - return sql_dir().joinpath(f"ai--{this_version()}.sql") + return sql_dir() / f"ai--{this_version()}.sql" -def tests_dir() -> Path: - return project_extension_dir().joinpath("tests").absolute() - - -def where_am_i() -> str: - if "WHERE_AM_I" in os.environ and os.environ["WHERE_AM_I"] == "docker": - return "docker" - return "host" +def control_file() -> Path: + return sql_dir() / "ai.control" -def build_sql_control_file() -> None: - ctl_file = sql_dir().joinpath("ai.control") - content = ctl_file.read_text() +def build_control_file() -> None: + content = control_file().read_text() lines = [] for line in content.splitlines(keepends=True): if line.startswith("default_version"): lines.append(f"default_version='{this_version()}'\n") else: lines.append(line) - ctl_file.write_text("".join(lines)) + control_file().write_text("".join(lines)) + + +def sql_migration_file() -> Path: + return sql_dir() / "migration.sql" def build_incremental_sql_file(input_file: Path) -> str: - template = sql_dir().joinpath("migration.sql").read_text() + template = sql_migration_file().read_text() migration_name = input_file.name migration_body = input_file.read_text() version = this_version() @@ -194,15 +187,18 @@ def build_idempotent_sql_file(input_file: Path) -> str: ) # leading 4 spaces is intentional +def sql_head_file() -> Path: + return sql_dir() / "head.sql" + + def build_sql() -> None: - build_sql_control_file() + build_control_file() hr = "".rjust(80, "-") osf = output_sql_file() osf.unlink(missing_ok=True) with osf.open("w") as wf: wf.write(f"{hr}\n-- {this_version()}\n\n\n") - with sql_dir().joinpath("head.sql").open("r") as rf: - shutil.copyfileobj(rf, wf) + wf.write(sql_head_file().read_text()) wf.write("\n\n\n") files = incremental_sql_files() check_incremental_sql_files(files) @@ -233,7 +229,7 @@ def build_sql() -> None: def clean_sql() -> None: for f in sql_dir().glob(f"ai--*.*.*--{this_version()}.sql"): f.unlink(missing_ok=True) - sql_dir().joinpath(f"ai--{this_version()}.sql").unlink(missing_ok=True) + output_sql_file().unlink(missing_ok=True) def postgres_bin_dir() -> Path: @@ -242,8 +238,8 @@ def postgres_bin_dir() -> Path: return Path(bin_dir).resolve() else: bin_dir = Path(f"/usr/lib/postgresql/{pg_major()}/bin") - if bin_dir.exists(): - return bin_dir.absolute() + if bin_dir.is_dir(): + return bin_dir.resolve() else: p = shutil.which("pg_config") if not p: @@ -253,7 +249,7 @@ def postgres_bin_dir() -> Path: def pg_config() -> Path: - return postgres_bin_dir().joinpath("pg_config") + return postgres_bin_dir() / "pg_config" def extension_install_dir() -> Path: @@ -265,23 +261,26 @@ def extension_install_dir() -> Path: text=True, capture_output=True, ) - return Path(str(proc.stdout).strip()).resolve().joinpath("extension") + return Path(str(proc.stdout).strip()).resolve() / "extension" def install_sql() -> None: ext_dir = extension_install_dir() - if not ext_dir.exists(): + if not ext_dir.is_dir(): print(f"extension directory does not exist: {ext_dir}", file=sys.stderr) sys.exit(1) - this_sql_file = sql_dir().joinpath(f"ai--{this_version()}.sql") - if not this_sql_file.exists(): + this_sql_file = output_sql_file() + if not this_sql_file.is_file(): print(f"required sql file is missing: {this_sql_file}", file=sys.stderr) sys.exit(1) + if not control_file().is_file(): + print(f"required control file is missing: {control_file()}", file=sys.stderr) + sys.exit(1) for src in sql_dir().glob("ai*.control"): - dest = ext_dir.joinpath(src.name) + dest = ext_dir / src.name shutil.copyfile(src, dest) for src in sql_dir().glob("ai--*.sql"): - dest = ext_dir.joinpath(src.name) + dest = ext_dir / src.name shutil.copyfile(src, dest) @@ -308,8 +307,8 @@ def python_install_dir() -> Path: def install_old_py_deps() -> None: # this is necessary for versions prior to 0.4.0 # we will deprecate these versions and then get rid of this function - old_reqs_file = project_extension_dir().joinpath("old_requirements.txt").resolve() - if old_reqs_file.exists(): + old_reqs_file = ext_dir().joinpath("old_requirements.txt").resolve() + if old_reqs_file.is_file(): env = {k: v for k, v in os.environ.items()} env["PIP_BREAK_SYSTEM_PACKAGES"] = "1" subprocess.run( @@ -317,7 +316,7 @@ def install_old_py_deps() -> None: shell=True, check=True, env=env, - cwd=str(project_extension_dir()), + cwd=str(ext_dir()), ) @@ -341,13 +340,14 @@ def install_prior_py() -> None: continue tmp_dir = Path(tempfile.gettempdir()).joinpath("pgai", version) tmp_dir.mkdir(parents=True, exist_ok=True) + branch = git_tag(version) subprocess.run( - f"git clone https://github.com/timescale/pgai.git --branch {version} {tmp_dir}", + f"git clone https://github.com/timescale/pgai.git --branch {branch} {tmp_dir}", shell=True, check=True, env=os.environ, ) - tmp_src_dir = tmp_dir.joinpath("src").resolve() + tmp_src_dir = tmp_dir.joinpath("projects", "extension").resolve() subprocess.run( f'pip3 install -v --compile -t "{version_target_dir}" "{tmp_src_dir}"', check=True, @@ -364,7 +364,7 @@ def build_init_py() -> None: # function just ensures that you can't screw up the current version. The # only place you have to update the version when starting a new release is # in the versions() function. - init_py = project_extension_dir().joinpath("ai", "__init__.py").resolve() + init_py = ext_dir().joinpath("ai", "__init__.py").resolve() content = init_py.read_text() lines = [] for line in content.splitlines(keepends=True): @@ -391,28 +391,28 @@ def install_py() -> None: ): # delete package info if exists shutil.rmtree(d) subprocess.run( - f'pip3 install -v --no-deps --compile -t "{version_target_dir}" "{project_extension_dir()}"', + f'pip3 install -v --no-deps --compile -t "{version_target_dir}" "{ext_dir()}"', check=True, shell=True, env=os.environ, - cwd=str(project_extension_dir()), + cwd=str(ext_dir()), ) else: version_target_dir.mkdir(exist_ok=True) subprocess.run( - f'pip3 install -v --compile -t "{version_target_dir}" "{project_extension_dir()}"', + f'pip3 install -v --compile -t "{version_target_dir}" "{ext_dir()}"', check=True, shell=True, env=os.environ, - cwd=str(project_extension_dir()), + cwd=str(ext_dir()), ) def clean_py() -> None: - d = project_extension_dir().joinpath("build") + d = ext_dir().joinpath("build") if d.exists(): shutil.rmtree(d, ignore_errors=True) - d = project_extension_dir().joinpath("pgai.egg-info") + d = ext_dir().joinpath("pgai.egg-info") if d.exists(): shutil.rmtree(d, ignore_errors=True) @@ -421,87 +421,9 @@ def uninstall_py() -> None: shutil.rmtree(python_install_dir(), ignore_errors=True) -def pgai_version() -> str: - content = project_pgai_dir().joinpath("pgai", "__init__.py").resolve().read_text() - for line in content.splitlines(keepends=True): - if line.startswith("__version__"): - m = re.search(r'"(.*)"', line) - if not m: - print("could not determine the pgai version", file=sys.stderr) - sys.exit(1) - g = m.groups() - if not g or len(g) != 1: - print("could not determine the pgai version", file=sys.stderr) - sys.exit(1) - version = g[0].strip('"') - return version - print("could not determine the pgai version", file=sys.stderr) - sys.exit(1) - - -def pgai_wheel() -> Path: - return ( - project_pgai_dir() - .joinpath("dist", f"pgai-{pgai_version()}-py3-none-any.whl") - .resolve() - ) - - -def install_vectorizer() -> None: - if not pgai_wheel().is_file(): - build_vectorizer() - subprocess.run( - f'pip3 install -v --compile "{pgai_wheel()}"', - check=True, - shell=True, - env=os.environ, - cwd=str(project_pgai_dir()), - ) - - -def build_vectorizer() -> None: - subprocess.run( - "python3 -m build --sdist --wheel", - check=True, - shell=True, - env=os.environ, - cwd=str(project_pgai_dir()), - ) - subprocess.run( - "twine check dist/*", - check=True, - shell=True, - env=os.environ, - cwd=str(project_pgai_dir()), - ) - - -def clean_vectorizer() -> None: - d = project_pgai_dir().joinpath("build") - if d.exists(): - shutil.rmtree(d, ignore_errors=True) - d = project_pgai_dir().joinpath("pgai.egg-info") - if d.exists(): - shutil.rmtree(d, ignore_errors=True) - d = project_pgai_dir().joinpath("dist") - if d.exists(): - shutil.rmtree(d, ignore_errors=True) - - -def uninstall_vectorizer() -> None: - subprocess.run( - "pip3 uninstall -v -y pgai", - check=True, - shell=True, - env=os.environ, - cwd=str(project_pgai_dir()), - ) - - def uninstall() -> None: uninstall_sql() uninstall_py() - uninstall_vectorizer() def build() -> None: @@ -512,7 +434,6 @@ def install() -> None: install_prior_py() install_py() install_sql() - # installing the vectorizer cli tool should be explicit def build_install() -> None: @@ -523,13 +444,22 @@ def build_install() -> None: def clean() -> None: clean_sql() clean_py() - clean_vectorizer() + + +def tests_dir() -> Path: + return ext_dir().joinpath("tests").absolute() + + +def where_am_i() -> str: + if "WHERE_AM_I" in os.environ and os.environ["WHERE_AM_I"] == "docker": + return "docker" + return "host" def test_server() -> None: if where_am_i() == "host": - cmd = "docker exec -it -w /pgai/projects/extension/tests/vectorizer pgai fastapi dev server.py" - subprocess.run(cmd, shell=True, check=True, env=os.environ, cwd=root_dir()) + cmd = "docker exec -it -w /pgai/tests/vectorizer pgai-ext fastapi dev server.py" + subprocess.run(cmd, shell=True, check=True, env=os.environ, cwd=ext_dir()) else: cmd = "fastapi dev server.py" subprocess.run( @@ -541,21 +471,6 @@ def test_server() -> None: ) -def vectorizer() -> None: - if where_am_i() == "host": - cmd = "docker exec -it pgai pgai vectorizer-worker --version" - subprocess.run(cmd, shell=True, check=True, env=os.environ, cwd=root_dir()) - else: - cmd = "pgai vectorizer-worker --version" - subprocess.run( - cmd, - shell=True, - check=True, - env=os.environ, - cwd=root_dir(), - ) - - def test() -> None: subprocess.run("pytest", shell=True, check=True, env=os.environ, cwd=tests_dir()) @@ -573,9 +488,7 @@ def lint_sql() -> None: def lint_py() -> None: - subprocess.run( - f"ruff check {projects_dir()}", shell=True, check=True, env=os.environ - ) + subprocess.run(f"ruff check {ext_dir()}", shell=True, check=True, env=os.environ) def lint() -> None: @@ -585,13 +498,7 @@ def lint() -> None: def format_py() -> None: subprocess.run( - f"ruff format --diff {projects_dir()}", shell=True, check=True, env=os.environ - ) - - -def type_check_py() -> None: - subprocess.run( - "pyright", shell=True, check=True, env=os.environ, cwd=project_pgai_dir() + f"ruff format --diff {ext_dir()}", shell=True, check=True, env=os.environ ) @@ -601,39 +508,23 @@ def docker_build() -> None: else: rust_flags = "" subprocess.run( - f"""docker build --build-arg PG_MAJOR={pg_major()} {rust_flags} -t pgai .""", - shell=True, - check=True, - env=os.environ, - text=True, - cwd=root_dir(), - ) - - -def docker_build_vectorizer() -> None: - subprocess.run( - f"""docker build -t pgai/vectorizer:latest -t pgai/vectorizer:{this_version()} .""", + f"""docker build --build-arg PG_MAJOR={pg_major()} {rust_flags} -t pgai-ext .""", shell=True, check=True, env=os.environ, text=True, - cwd=project_pgai_dir(), + cwd=ext_dir(), ) def docker_run() -> None: # Set TESTCONTAINERS_HOST_OVERRIDE when running on MacOS. - env_var = "" - if platform.system() == "Darwin": - env_var = "-e TESTCONTAINERS_HOST_OVERRIDE=host.docker.internal" cmd = " ".join( [ - "docker run -d --name pgai -p 127.0.0.1:5432:5432 -e POSTGRES_HOST_AUTH_METHOD=trust", - "-v /var/run/docker.sock:/var/run/docker.sock", - f"--mount type=bind,src={root_dir()},dst=/pgai", - env_var, # Include the environment variable if on macOS + "docker run -d --name pgai-ext -p 127.0.0.1:5432:5432 -e POSTGRES_HOST_AUTH_METHOD=trust", + f"--mount type=bind,src={ext_dir()},dst=/pgai", "-e TEST_ENV_SECRET=super_secret", - "pgai", + "pgai-ext", "-c shared_preload_libraries='timescaledb, pgextwlist'", "-c extwlist.extensions='ai,vector'", ] @@ -641,40 +532,15 @@ def docker_run() -> None: subprocess.run(cmd, shell=True, check=True, env=os.environ, text=True) -def docker_run_vectorizer() -> None: - cmd = " ".join( - [ - f"docker run -d --name vectorizer pgai/vectorizer:{this_version()}", - ] - ) - subprocess.run(cmd, shell=True, check=True, env=os.environ, text=True) - - def docker_stop() -> None: subprocess.run( - """docker stop pgai""", shell=True, check=True, env=os.environ, text=True - ) - - -def docker_stop_vectorizer() -> None: - subprocess.run( - """docker stop vectorizer""", shell=True, check=True, env=os.environ, text=True + """docker stop pgai-ext""", shell=True, check=True, env=os.environ, text=True ) def docker_rm() -> None: subprocess.run( - """docker rm --force --volumes pgai""", - shell=True, - check=True, - env=os.environ, - text=True, - ) - - -def docker_rm_vectorizer() -> None: - subprocess.run( - """docker rm --force --volumes vectorizer""", + """docker rm --force --volumes pgai-ext""", shell=True, check=True, env=os.environ, @@ -685,12 +551,12 @@ def docker_rm_vectorizer() -> None: def run() -> None: docker_build() docker_run() - cmd = "docker exec pgai make build-install" - subprocess.run(cmd, shell=True, check=True, env=os.environ, cwd=root_dir()) - cmd = 'docker exec -u postgres pgai psql -c "create extension ai cascade"' - subprocess.run(cmd, shell=True, check=True, env=os.environ, cwd=root_dir()) - cmd = "docker exec -it -d -w /pgai/tests pgai fastapi dev server.py" - subprocess.run(cmd, shell=True, check=True, env=os.environ, cwd=root_dir()) + cmd = "docker exec pgai-ext make build-install" + subprocess.run(cmd, shell=True, check=True, env=os.environ, cwd=ext_dir()) + cmd = 'docker exec -u postgres pgai-ext psql -c "create extension ai cascade"' + subprocess.run(cmd, shell=True, check=True, env=os.environ, cwd=ext_dir()) + cmd = "docker exec -it -d -w /pgai/tests pgai-ext fastapi dev server.py" + subprocess.run(cmd, shell=True, check=True, env=os.environ, cwd=ext_dir()) if __name__ == "__main__": @@ -708,10 +574,6 @@ def run() -> None: install_prior_py() elif action == "install-py": install_py() - elif action == "install-vec": - install_vectorizer() - elif action == "build-vec": - build_vectorizer() elif action == "install-sql": install_sql() elif action == "build-sql": @@ -720,22 +582,16 @@ def run() -> None: clean_sql() elif action == "clean-py": clean_py() - elif action == "clean-vec": - clean_vectorizer() elif action == "clean": clean() elif action == "uninstall-py": uninstall_py() - elif action == "uninstall-vec": - uninstall_vectorizer() elif action == "uninstall-sql": uninstall_sql() elif action == "uninstall": uninstall() elif action == "test-server": test_server() - elif action == "vectorizer": - vectorizer() elif action == "test": test() elif action == "lint-sql": @@ -746,24 +602,14 @@ def run() -> None: lint() elif action == "format-py": format_py() - elif action == "type-check-py": - type_check_py() elif action == "docker-build": docker_build() - elif action == "docker-build-vec": - docker_build_vectorizer() elif action == "docker-run": docker_run() - elif action == "docker-run-vec": - docker_run_vectorizer() elif action == "docker-stop": docker_stop() - elif action == "docker-stop-vec": - docker_stop_vectorizer() elif action == "docker-rm": docker_rm() - elif action == "docker-rm-vec": - docker_rm_vectorizer() elif action == "run": run() else: diff --git a/requirements-dev.txt b/projects/extension/requirements-dev.txt similarity index 76% rename from requirements-dev.txt rename to projects/extension/requirements-dev.txt index 5f8622bd..16a234aa 100644 --- a/requirements-dev.txt +++ b/projects/extension/requirements-dev.txt @@ -3,5 +3,4 @@ # you may use this on your host to aid your IDE's code completion etc. capabilities -r requirements-test.txt --r projects/extension/requirements.txt --r projects/pgai/requirements.txt +-r requirements.txt diff --git a/projects/extension/requirements-test.txt b/projects/extension/requirements-test.txt new file mode 100644 index 00000000..de44c0f1 --- /dev/null +++ b/projects/extension/requirements-test.txt @@ -0,0 +1,9 @@ +# this requirements file is used in the development docker image for running tests, linting, etc. +# it is also referred to from requirements-dev.txt for use on host development machines + +ruff==0.6.9 +pytest==8.3.2 +python-dotenv==1.0.1 +fastapi==0.112.0 +fastapi-cli==0.0.5 +psycopg[binary]==3.2.1 diff --git a/projects/extension/sql/ai.control b/projects/extension/sql/ai.control index 74265e91..116b5836 100644 --- a/projects/extension/sql/ai.control +++ b/projects/extension/sql/ai.control @@ -1,6 +1,6 @@ # pgai extension comment='helper functions for ai workflows' -default_version='0.4.0' +default_version='0.4.1' requires='vector,plpython3u' relocatable=false schema=ai diff --git a/projects/extension/tests/contents/test_contents.py b/projects/extension/tests/contents/test_contents.py index c5d0c1f5..13dc50dd 100644 --- a/projects/extension/tests/contents/test_contents.py +++ b/projects/extension/tests/contents/test_contents.py @@ -22,7 +22,7 @@ def where_am_i() -> str: def docker_dir() -> str: - return "/pgai/projects/extension/tests/contents" + return "/pgai/tests/contents" def host_dir() -> Path: @@ -41,7 +41,7 @@ def init() -> None: ] ) if where_am_i() != "docker": - cmd = f"docker exec -w {docker_dir()} pgai {cmd}" + cmd = f"docker exec -w {docker_dir()} pgai-ext {cmd}" subprocess.run(cmd, check=True, shell=True, env=os.environ, cwd=str(host_dir())) diff --git a/projects/extension/tests/dump_restore/test_dump_restore.py b/projects/extension/tests/dump_restore/test_dump_restore.py index 07d91e04..1d27453c 100644 --- a/projects/extension/tests/dump_restore/test_dump_restore.py +++ b/projects/extension/tests/dump_restore/test_dump_restore.py @@ -25,7 +25,7 @@ def where_am_i() -> str: def docker_dir() -> str: - return "/pgai/projects/extension/tests/dump_restore" + return "/pgai/tests/dump_restore" def host_dir() -> Path: @@ -69,7 +69,7 @@ def dump_db() -> None: ] ) if where_am_i() != "docker": - cmd = f"docker exec -w {docker_dir()} pgai {cmd}" + cmd = f"docker exec -w {docker_dir()} pgai-ext {cmd}" subprocess.run(cmd, check=True, shell=True, env=os.environ, cwd=str(host_dir())) @@ -86,7 +86,7 @@ def restore_db() -> None: ] ) if where_am_i() != "docker": - cmd = f"docker exec -w {docker_dir()} pgai {cmd}" + cmd = f"docker exec -w {docker_dir()} pgai-ext {cmd}" subprocess.run(cmd, check=True, shell=True, env=os.environ, cwd=str(host_dir())) @@ -103,7 +103,7 @@ def snapshot_db(dbname: str) -> None: ] ) if where_am_i() != "docker": - cmd = f"docker exec -w {docker_dir()} pgai {cmd}" + cmd = f"docker exec -w {docker_dir()} pgai-ext {cmd}" subprocess.run(cmd, check=True, shell=True, env=os.environ, cwd=str(host_dir())) @@ -117,7 +117,7 @@ def init_src() -> None: ] ) if where_am_i() != "docker": - cmd = f"docker exec -w {docker_dir()} pgai {cmd}" + cmd = f"docker exec -w {docker_dir()} pgai-ext {cmd}" subprocess.run(cmd, check=True, shell=True, env=os.environ, cwd=str(host_dir())) @@ -136,7 +136,7 @@ def after_dst() -> None: ] ) if where_am_i() != "docker": - cmd = f"docker exec -w {docker_dir()} pgai {cmd}" + cmd = f"docker exec -w {docker_dir()} pgai-ext {cmd}" subprocess.run(cmd, check=True, shell=True, env=os.environ, cwd=str(host_dir())) diff --git a/projects/extension/tests/privileges/test_privileges.py b/projects/extension/tests/privileges/test_privileges.py index 14d0d79d..40b67b00 100644 --- a/projects/extension/tests/privileges/test_privileges.py +++ b/projects/extension/tests/privileges/test_privileges.py @@ -22,7 +22,7 @@ def where_am_i() -> str: def docker_dir() -> str: - return "/pgai/projects/extension/tests/privileges" + return "/pgai/tests/privileges" def host_dir() -> Path: @@ -46,7 +46,7 @@ def psql_file(user, dbname, file: str) -> None: ] ) if where_am_i() != "docker": - cmd = f"docker exec -w {docker_dir()} pgai {cmd}" + cmd = f"docker exec -w {docker_dir()} pgai-ext {cmd}" subprocess.run(cmd, check=True, shell=True, env=os.environ, cwd=str(host_dir())) diff --git a/projects/extension/tests/test_ollama.py b/projects/extension/tests/test_ollama.py index 967a294c..3a4ee5ca 100644 --- a/projects/extension/tests/test_ollama.py +++ b/projects/extension/tests/test_ollama.py @@ -139,7 +139,7 @@ def test_ollama_image(cur_with_ollama_host): select ai.ollama_generate ( 'llava:7b' , 'Please describe this image.' - , images=> array[pg_read_binary_file('/pgai/projects/extension/tests/postgresql-vs-pinecone.jpg')] + , images=> array[pg_read_binary_file('/pgai/tests/postgresql-vs-pinecone.jpg')] , system_prompt=>'you are a helpful assistant' , embedding_options=> jsonb_build_object ( 'seed', 42 @@ -209,7 +209,7 @@ def test_ollama_chat_complete_image(cur_with_ollama_host): ( jsonb_build_object ( 'role', 'user' , 'content', 'describe this image' - , 'images', jsonb_build_array(encode(pg_read_binary_file('/pgai/projects/extension/tests/postgresql-vs-pinecone.jpg'), 'base64')) + , 'images', jsonb_build_array(encode(pg_read_binary_file('/pgai/tests/postgresql-vs-pinecone.jpg'), 'base64')) ) ) , chat_options=> jsonb_build_object diff --git a/projects/extension/tests/vectorizer_tool/__init__.py b/projects/extension/tests/vectorizer_tool/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/projects/extension/tests/vectorizer_tool/test_cli.py b/projects/extension/tests/vectorizer_tool/test_cli.py deleted file mode 100644 index 52726381..00000000 --- a/projects/extension/tests/vectorizer_tool/test_cli.py +++ /dev/null @@ -1,173 +0,0 @@ -import os -import subprocess -from pathlib import Path - -import psycopg -import pytest -from psycopg.rows import namedtuple_row - -# skip tests in this module if disabled -enable_vectorizer_tool_tests = os.getenv("ENABLE_VECTORIZER_TOOL_TESTS") -if not enable_vectorizer_tool_tests or enable_vectorizer_tool_tests == "0": - pytest.skip(allow_module_level=True) - - -def db_url(user: str, dbname: str) -> str: - return f"postgres://{user}@127.0.0.1:5432/{dbname}" - - -def create_database(dbname: str) -> None: - with psycopg.connect( - db_url(user="postgres", dbname="postgres"), autocommit=True - ) as con: - with con.cursor() as cur: - cur.execute(f"drop database if exists {dbname} with (force)") - cur.execute(f"create database {dbname}") - - -def vectorizer_src_dir() -> Path: - p = Path(__file__).parent.parent.parent.parent.joinpath("pgai").resolve() - return p - - -def tiktoken_cache_dir() -> Path: - return vectorizer_src_dir().parent.joinpath("pgai", "tiktoken_cache").resolve() - - -@pytest.fixture(scope="module", autouse=True) -def create_tiktoken_cache_dir_if_missing() -> None: - d = tiktoken_cache_dir() - if not d.is_dir(): - d.mkdir(exist_ok=True, parents=True) - - -def test_bad_db_url(): - _db_url = db_url("postgres", "this_is_not_a_db") - env = os.environ.copy() - env["VECTORIZER_DB_URL"] = _db_url - env["TIKTOKEN_CACHE_DIR"] = str(tiktoken_cache_dir()) - env["OPENAI_API_KEY"] = "this_is_not_a_key" - p = subprocess.run( - "python3 -m pgai", - shell=True, - text=True, - capture_output=True, - env=env, - cwd=vectorizer_src_dir(), - ) - assert p.returncode == 1 - assert 'FATAL: database "this_is_not_a_db" does not exist' in str(p.stderr) - env.pop("VECTORIZER_DB_URL") - p = subprocess.run( - f"python3 -m pgai -d '{_db_url}'", - shell=True, - text=True, - capture_output=True, - env=env, - cwd=vectorizer_src_dir(), - ) - assert p.returncode == 1 - assert 'FATAL: database "this_is_not_a_db" does not exist' in str(p.stderr) - - -def test_pgai_not_installed(): - db = "vcli1" - create_database(db) - _db_url = db_url("postgres", db) - env = os.environ.copy() - env["VECTORIZER_DB_URL"] = _db_url - env["TIKTOKEN_CACHE_DIR"] = str(tiktoken_cache_dir()) - env["OPENAI_API_KEY"] = "this_is_not_a_key" - p = subprocess.run( - "python3 -m pgai", - shell=True, - capture_output=True, - text=True, - env=env, - cwd=vectorizer_src_dir(), - ) - assert p.returncode == 1 - assert "the pgai extension is not installed" in str(p.stdout) - env.pop("VECTORIZER_DB_URL") - p = subprocess.run( - f"python3 -m pgai -d '{_db_url}'", - shell=True, - capture_output=True, - text=True, - env=env, - cwd=vectorizer_src_dir(), - ) - assert p.returncode == 1 - assert "the pgai extension is not installed" in str(p.stdout) - - -def test_vectorizer_cli(): - db = "vcli2" - create_database(db) - _db_url = db_url("postgres", db) - with psycopg.connect(_db_url, autocommit=True, row_factory=namedtuple_row) as con: - with con.cursor() as cur: - cur.execute("create extension if not exists vectorscale cascade") - cur.execute("create extension if not exists ai cascade") - cur.execute("create extension if not exists timescaledb") - cur.execute("drop table if exists note0") - cur.execute(""" - create table note0 - ( id bigint not null primary key generated always as identity - , note text not null - ) - """) - # insert 5 rows into source - cur.execute(""" - insert into note0 (note) - select 'how much wood would a woodchuck chuck if a woodchuck could chuck wood' - from generate_series(1, 5) - """) - # insert 5 rows into source - cur.execute(""" - insert into note0 (note) - select 'if a woodchuck could chuck wood, a woodchuck would chuck as much wood as he could' - from generate_series(1, 5) - """) - # create a vectorizer for the table - cur.execute(""" - select ai.create_vectorizer - ( 'note0'::regclass - , embedding=>ai.embedding_openai('text-embedding-3-small', 3) - , chunking=>ai.chunking_character_text_splitter('note') - , scheduling=> - ai.scheduling_timescaledb - ( interval '5m' - , initial_start=>'2050-01-06'::timestamptz - , timezone=>'America/Chicago' - ) - , indexing=>ai.indexing_diskann(min_rows=>10) - , grant_to=>null - , enqueue_existing=>true - ) - """) - vectorizer_id = cur.fetchone()[0] - cur.execute("select * from ai.vectorizer where id = %s", (vectorizer_id,)) - vectorizer = cur.fetchone() - - env = os.environ.copy() - env["TIKTOKEN_CACHE_DIR"] = str(tiktoken_cache_dir()) - env["VECTORIZER_DB_URL"] = _db_url - subprocess.run( - f"python3 -m pgai -i {vectorizer_id}", - shell=True, - check=True, - capture_output=True, - text=True, - env=env, - cwd=vectorizer_src_dir(), - ) - - with psycopg.connect(_db_url, autocommit=True, row_factory=namedtuple_row) as con: - with con.cursor() as cur: - cur.execute(f""" - select count(*) - from {vectorizer.target_schema}.{vectorizer.target_table} - """) - count = cur.fetchone()[0] - assert count == 10 diff --git a/projects/pgai/.dockerignore b/projects/pgai/.dockerignore index e79ed427..5e9f63e1 100644 --- a/projects/pgai/.dockerignore +++ b/projects/pgai/.dockerignore @@ -1,3 +1,8 @@ build vectorizer.egg-info Dockerfile +.env +.git +.github +.idea +.venv \ No newline at end of file diff --git a/projects/pgai/Makefile b/projects/pgai/Makefile new file mode 100644 index 00000000..9d5d0f29 --- /dev/null +++ b/projects/pgai/Makefile @@ -0,0 +1,92 @@ + +VERSION := $(shell awk "/^__version__ = .*/ {gsub(/__version__ = |\"/, \"\"); print}" ./pgai/__init__.py) + +.PHONY: default +default: help + +.PHONY: help +help: + @echo "pgai makefile commands:" + @echo + @echo "Development commands:" + @echo " help Show this help message" + @echo " show-version Display the current pgai version" + @echo " clean Remove build artifacts and temporary files" + @echo " build Build source distribution and wheel package" + @echo " install Install the wheel package locally" + @echo " uninstall Remove the installed pgai package" + @echo + @echo "Testing and quality:" + @echo " test Run pytest test suite" + @echo " lint Run ruff linter checks" + @echo " type-check Run pyright type checking" + @echo " format Show code formatting differences" + @echo + @echo "Docker commands:" + @echo " docker-build Build Docker image with version tag" + @echo " docker-run Run the Docker container in detached mode" + @echo " docker-stop Stop the running Docker container" + @echo " docker-rm Remove the Docker container and its volumes" + @echo + @echo "Git hooks:" + @echo " install-commit-hook Install semantic commit message hook" + +.PHONY: show-version +show-version: + @echo "pgai version is: $(VERSION)" + +.PHONY: clean +clean: + @rm -rf ./build + @rm -rf ./pgai.egg-info + @rm -rf ./dist + +.PHONY: build +build: + @python3 -m build --sdist --wheel + @twine check ./dist/* + +.PHONY: install +install: + @pip3 install -v --compile "./dist/pgai-$(VERSION)-py3-none-any.whl" + +.PHONY: uninstall +uninstall: + @pip3 uninstall -v -y pgai + +.PHONY: test +test: + @pytest + +.PHONY: lint +lint: + @ruff check ./ + +.PHONY: type-check +type-check: + @pyright ./ + +.PHONY: format +format: + @ruff format --diff ./ + +.PHONY: docker-build +docker-build: + @docker build -t pgai-cli:latest -t "pgai-cli:$(VERSION)" . + +.PHONY: docker-run +docker-run: + @docker run -d --name pgai-cli "pgai-cli:$(VERSION)" + +.PHONY: docker-stop +docker-stop: + @docker stop pgai-cli + +.PHONY: docker-rm +docker-rm: + @docker rm --force --volumes pgai-cli + +.PHONY: install-commit-hook +install-commit-hook: + @cd ../.. && curl --fail -o .git/hooks/commit-msg https://raw.githubusercontent.com/hazcod/semantic-commit-hook/master/commit-msg \ + && chmod 500 .git/hooks/commit-msg diff --git a/projects/pgai/requirements-dev.txt b/projects/pgai/requirements-dev.txt new file mode 100644 index 00000000..16a234aa --- /dev/null +++ b/projects/pgai/requirements-dev.txt @@ -0,0 +1,6 @@ +# this requirements file is optional +# if you are editing python on your host machine (as opposed to inside the docker container) +# you may use this on your host to aid your IDE's code completion etc. capabilities + +-r requirements-test.txt +-r requirements.txt diff --git a/requirements-test.txt b/projects/pgai/requirements-test.txt similarity index 90% rename from requirements-test.txt rename to projects/pgai/requirements-test.txt index 80ccb632..e20eb2b1 100644 --- a/requirements-test.txt +++ b/projects/pgai/requirements-test.txt @@ -4,8 +4,6 @@ ruff==0.6.9 pytest==8.3.2 python-dotenv==1.0.1 -fastapi==0.112.0 -fastapi-cli==0.0.5 vcrpy==6.0.1 pyright==1.1.385 psycopg[binary]==3.2.1