diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/.gitattributes b/vector-databases/vertex-ai-embeddings-vector-mariadb/.gitattributes new file mode 100644 index 0000000..3b41682 --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/.gitattributes @@ -0,0 +1,2 @@ +/mvnw text eol=lf +*.cmd text eol=crlf diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/.gitignore b/vector-databases/vertex-ai-embeddings-vector-mariadb/.gitignore new file mode 100644 index 0000000..549e00a --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/.gitignore @@ -0,0 +1,33 @@ +HELP.md +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/**/target/ +!**/src/test/**/target/ + +### STS ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### IntelliJ IDEA ### +.idea +*.iws +*.iml +*.ipr + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/.mvn/wrapper/maven-wrapper.properties b/vector-databases/vertex-ai-embeddings-vector-mariadb/.mvn/wrapper/maven-wrapper.properties new file mode 100644 index 0000000..d58dfb7 --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/.mvn/wrapper/maven-wrapper.properties @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +wrapperVersion=3.3.2 +distributionType=only-script +distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/README.md b/vector-databases/vertex-ai-embeddings-vector-mariadb/README.md new file mode 100644 index 0000000..cf601e4 --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/README.md @@ -0,0 +1,133 @@ +# Project Title + +## Overview + +This project serves as an example of how to leverage the Spring AI framework to build applications that utilize generative AI capabilities, specifically focusing on vector embeddings and similarity search. + +**Key Features & Technologies:** + +* **Spring AI:** Utilizes the Spring AI library to simplify the integration of AI functionalities into a Spring Boot application. +* **Google Cloud Vertex AI Embeddings:** Demonstrates how to generate text embeddings using Google Cloud's Vertex AI PaLM 2 embedding models. These embeddings convert textual data into numerical vectors, capturing semantic meaning. +* **MariaDB as a Vector Database:** Shows how MariaDB, with its vector storage and search capabilities (e.g., using its `VECTOR` data type and functions like `VECTOR_COSINE_DISTANCE`), can be used to store and perform similarity searches on the generated Vertex AI embeddings. +* **Vector Search/Similarity Search:** The core functionality likely involves ingesting data, generating embeddings for it, storing these embeddings in MariaDB, and then performing queries to find items semantically similar to a given input query. + +**Purpose:** + +The primary goal of this example is to provide developers with a practical, hands-on demonstration of: +1. Generating high-quality text embeddings with Vertex AI. +2. Storing these vector embeddings efficiently in MariaDB. +3. Performing semantic similarity searches against the stored vectors using Spring AI abstractions. +4. Integrating these components within a standard Spring Boot application. + +## Prerequisites + +Before you begin, ensure you have the following dependencies and prerequisites met: + +* You have installed [Google Cloud SDK](https://cloud.google.com/sdk/docs/install) (which includes `gcloud`). +* You have a Google Cloud Project. +* You have appropriate permissions to authenticate and access resources in your Google Cloud Project. +* [Optional: Add any other project-specific prerequisites, e.g., Node.js, Python, Docker, etc.] + +## Setup + +Follow these steps to get your development environment set up: + +### 1. Clone the Repository (if applicable) + +```bash +git clone +cd +``` + +### 2. Set Up Environment Variables + +This project requires certain environment variables to be set. The most common one is `PROJECT_ID`. + +**Option A: Using a `.env` file (Recommended for local development)** + +Create a file named `.env` in the root of your project directory. Add your environment variables there: + +```env +# .env +PROJECT_ID="your-gcp-project-id" +# Add other environment variables as needed +# ANOTHER_VAR="some-value" +``` + +**Important:** Add `.env` to your `.gitignore` file to prevent committing sensitive information. + +```gitignore +# .gitignore +.env +``` + +**Option B: Exporting directly in your shell** + +You can set environment variables directly in your terminal session. These will be lost when the session ends. + +```bash +export PROJECT_ID="your-gcp-project-id" +# export ANOTHER_VAR="some-value" +``` + +For persistent shell-specific environment variables, add these export commands to your shell's configuration file (e.g., `~/.bashrc`, `~/.zshrc`). + +### 3. Configure Google Cloud Application Default Credentials (ADC) + +Application Default Credentials (ADC) provide a way for your application to authenticate to Google Cloud services. + +1. **Log in to gcloud:** + If you haven't already, authenticate `gcloud` with your Google Cloud account: + ```bash + gcloud auth login + ``` + Follow the on-screen instructions to authorize `gcloud`. + +2. **Set up Application Default Credentials:** + This command will store your user credentials in a well-known location on your local machine, which Google Cloud client libraries can automatically find. + ```bash + gcloud auth application-default login + ``` + This will also open a browser window for you to authenticate. + + Your application will now be able to use these credentials to authenticate to Google Cloud services when running locally, provided the authenticated user has the necessary IAM permissions for the resources your application needs to access. + +### 4. [Optional: Add other setup steps] + +* Install dependencies: `npm install`, `pip install -r requirements.txt`, etc. +* Database setup instructions. +* ... + +## Running the Application + +[Instructions on how to run your application, e.g., `npm start`, `python app.py`] + +## Debugging + +If you are using VS Code, ensure your `.vscode/launch.json` is configured to pass environment variables. For example: + +```json +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Launch Program", + "type": "node", // or "python", etc. + "request": "launch", + "program": "${workspaceFolder}/your-main-file.js", + "env": { + "PROJECT_ID": "your-gcp-project-id" + // You can also use "${env:PROJECT_ID}" if it's set in your shell + } + } + ] +} +``` + +Replace `"your-gcp-project-id"` with your actual Project ID or use environment variable substitution if you prefer. + +--- + +Remember to replace placeholders like ``, ``, `"your-gcp-project-id"`, and any other project-specific details with your actual information. + +This README provides a good starting point. You can expand it further with more specific details about your project, such as deployment instructions, contribution guidelines, or licensing information. \ No newline at end of file diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/calls.http b/vector-databases/vertex-ai-embeddings-vector-mariadb/calls.http new file mode 100644 index 0000000..4cacd90 --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/calls.http @@ -0,0 +1,9 @@ +#GET http://localhost:8080/scrapeAll HTTP/1.1 +#content-type: application/json + +#{ +# "text": "What does my insurance policy cover?", +# "rag": true +#} + +GET http://localhost:8080/embeddingsMessage?message="How can i submit expense report of my last trip?" HTTP/1.1 diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/mvnw b/vector-databases/vertex-ai-embeddings-vector-mariadb/mvnw new file mode 100644 index 0000000..19529dd --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/mvnw @@ -0,0 +1,259 @@ +#!/bin/sh +# ---------------------------------------------------------------------------- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ---------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------- +# Apache Maven Wrapper startup batch script, version 3.3.2 +# +# Optional ENV vars +# ----------------- +# JAVA_HOME - location of a JDK home dir, required when download maven via java source +# MVNW_REPOURL - repo url base for downloading maven distribution +# MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven +# MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output +# ---------------------------------------------------------------------------- + +set -euf +[ "${MVNW_VERBOSE-}" != debug ] || set -x + +# OS specific support. +native_path() { printf %s\\n "$1"; } +case "$(uname)" in +CYGWIN* | MINGW*) + [ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")" + native_path() { cygpath --path --windows "$1"; } + ;; +esac + +# set JAVACMD and JAVACCMD +set_java_home() { + # For Cygwin and MinGW, ensure paths are in Unix format before anything is touched + if [ -n "${JAVA_HOME-}" ]; then + if [ -x "$JAVA_HOME/jre/sh/java" ]; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + JAVACCMD="$JAVA_HOME/jre/sh/javac" + else + JAVACMD="$JAVA_HOME/bin/java" + JAVACCMD="$JAVA_HOME/bin/javac" + + if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then + echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2 + echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2 + return 1 + fi + fi + else + JAVACMD="$( + 'set' +e + 'unset' -f command 2>/dev/null + 'command' -v java + )" || : + JAVACCMD="$( + 'set' +e + 'unset' -f command 2>/dev/null + 'command' -v javac + )" || : + + if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then + echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2 + return 1 + fi + fi +} + +# hash string like Java String::hashCode +hash_string() { + str="${1:-}" h=0 + while [ -n "$str" ]; do + char="${str%"${str#?}"}" + h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296)) + str="${str#?}" + done + printf %x\\n $h +} + +verbose() { :; } +[ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; } + +die() { + printf %s\\n "$1" >&2 + exit 1 +} + +trim() { + # MWRAPPER-139: + # Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds. + # Needed for removing poorly interpreted newline sequences when running in more + # exotic environments such as mingw bash on Windows. + printf "%s" "${1}" | tr -d '[:space:]' +} + +# parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties +while IFS="=" read -r key value; do + case "${key-}" in + distributionUrl) distributionUrl=$(trim "${value-}") ;; + distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;; + esac +done <"${0%/*}/.mvn/wrapper/maven-wrapper.properties" +[ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in ${0%/*}/.mvn/wrapper/maven-wrapper.properties" + +case "${distributionUrl##*/}" in +maven-mvnd-*bin.*) + MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ + case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in + *AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;; + :Darwin*x86_64) distributionPlatform=darwin-amd64 ;; + :Darwin*arm64) distributionPlatform=darwin-aarch64 ;; + :Linux*x86_64*) distributionPlatform=linux-amd64 ;; + *) + echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2 + distributionPlatform=linux-amd64 + ;; + esac + distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip" + ;; +maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;; +*) MVN_CMD="mvn${0##*/mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;; +esac + +# apply MVNW_REPOURL and calculate MAVEN_HOME +# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ +[ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}" +distributionUrlName="${distributionUrl##*/}" +distributionUrlNameMain="${distributionUrlName%.*}" +distributionUrlNameMain="${distributionUrlNameMain%-bin}" +MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}" +MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")" + +exec_maven() { + unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || : + exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD" +} + +if [ -d "$MAVEN_HOME" ]; then + verbose "found existing MAVEN_HOME at $MAVEN_HOME" + exec_maven "$@" +fi + +case "${distributionUrl-}" in +*?-bin.zip | *?maven-mvnd-?*-?*.zip) ;; +*) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;; +esac + +# prepare tmp dir +if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then + clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; } + trap clean HUP INT TERM EXIT +else + die "cannot create temp dir" +fi + +mkdir -p -- "${MAVEN_HOME%/*}" + +# Download and Install Apache Maven +verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." +verbose "Downloading from: $distributionUrl" +verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" + +# select .zip or .tar.gz +if ! command -v unzip >/dev/null; then + distributionUrl="${distributionUrl%.zip}.tar.gz" + distributionUrlName="${distributionUrl##*/}" +fi + +# verbose opt +__MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR='' +[ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v + +# normalize http auth +case "${MVNW_PASSWORD:+has-password}" in +'') MVNW_USERNAME='' MVNW_PASSWORD='' ;; +has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;; +esac + +if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then + verbose "Found wget ... using wget" + wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl" +elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then + verbose "Found curl ... using curl" + curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl" +elif set_java_home; then + verbose "Falling back to use Java to download" + javaSource="$TMP_DOWNLOAD_DIR/Downloader.java" + targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName" + cat >"$javaSource" <<-END + public class Downloader extends java.net.Authenticator + { + protected java.net.PasswordAuthentication getPasswordAuthentication() + { + return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() ); + } + public static void main( String[] args ) throws Exception + { + setDefault( new Downloader() ); + java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() ); + } + } + END + # For Cygwin/MinGW, switch paths to Windows format before running javac and java + verbose " - Compiling Downloader.java ..." + "$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java" + verbose " - Running Downloader.java ..." + "$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")" +fi + +# If specified, validate the SHA-256 sum of the Maven distribution zip file +if [ -n "${distributionSha256Sum-}" ]; then + distributionSha256Result=false + if [ "$MVN_CMD" = mvnd.sh ]; then + echo "Checksum validation is not supported for maven-mvnd." >&2 + echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 + exit 1 + elif command -v sha256sum >/dev/null; then + if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c >/dev/null 2>&1; then + distributionSha256Result=true + fi + elif command -v shasum >/dev/null; then + if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then + distributionSha256Result=true + fi + else + echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2 + echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 + exit 1 + fi + if [ $distributionSha256Result = false ]; then + echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2 + echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2 + exit 1 + fi +fi + +# unzip and move +if command -v unzip >/dev/null; then + unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip" +else + tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar" +fi +printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/mvnw.url" +mv -- "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME" + +clean || : +exec_maven "$@" diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/mvnw.cmd b/vector-databases/vertex-ai-embeddings-vector-mariadb/mvnw.cmd new file mode 100644 index 0000000..249bdf3 --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/mvnw.cmd @@ -0,0 +1,149 @@ +<# : batch portion +@REM ---------------------------------------------------------------------------- +@REM Licensed to the Apache Software Foundation (ASF) under one +@REM or more contributor license agreements. See the NOTICE file +@REM distributed with this work for additional information +@REM regarding copyright ownership. The ASF licenses this file +@REM to you under the Apache License, Version 2.0 (the +@REM "License"); you may not use this file except in compliance +@REM with the License. You may obtain a copy of the License at +@REM +@REM http://www.apache.org/licenses/LICENSE-2.0 +@REM +@REM Unless required by applicable law or agreed to in writing, +@REM software distributed under the License is distributed on an +@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@REM KIND, either express or implied. See the License for the +@REM specific language governing permissions and limitations +@REM under the License. +@REM ---------------------------------------------------------------------------- + +@REM ---------------------------------------------------------------------------- +@REM Apache Maven Wrapper startup batch script, version 3.3.2 +@REM +@REM Optional ENV vars +@REM MVNW_REPOURL - repo url base for downloading maven distribution +@REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven +@REM MVNW_VERBOSE - true: enable verbose log; others: silence the output +@REM ---------------------------------------------------------------------------- + +@IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0) +@SET __MVNW_CMD__= +@SET __MVNW_ERROR__= +@SET __MVNW_PSMODULEP_SAVE=%PSModulePath% +@SET PSModulePath= +@FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @( + IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B) +) +@SET PSModulePath=%__MVNW_PSMODULEP_SAVE% +@SET __MVNW_PSMODULEP_SAVE= +@SET __MVNW_ARG0_NAME__= +@SET MVNW_USERNAME= +@SET MVNW_PASSWORD= +@IF NOT "%__MVNW_CMD__%"=="" (%__MVNW_CMD__% %*) +@echo Cannot start maven from wrapper >&2 && exit /b 1 +@GOTO :EOF +: end batch / begin powershell #> + +$ErrorActionPreference = "Stop" +if ($env:MVNW_VERBOSE -eq "true") { + $VerbosePreference = "Continue" +} + +# calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties +$distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl +if (!$distributionUrl) { + Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties" +} + +switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) { + "maven-mvnd-*" { + $USE_MVND = $true + $distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip" + $MVN_CMD = "mvnd.cmd" + break + } + default { + $USE_MVND = $false + $MVN_CMD = $script -replace '^mvnw','mvn' + break + } +} + +# apply MVNW_REPOURL and calculate MAVEN_HOME +# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ +if ($env:MVNW_REPOURL) { + $MVNW_REPO_PATTERN = if ($USE_MVND) { "/org/apache/maven/" } else { "/maven/mvnd/" } + $distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace '^.*'+$MVNW_REPO_PATTERN,'')" +} +$distributionUrlName = $distributionUrl -replace '^.*/','' +$distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$','' +$MAVEN_HOME_PARENT = "$HOME/.m2/wrapper/dists/$distributionUrlNameMain" +if ($env:MAVEN_USER_HOME) { + $MAVEN_HOME_PARENT = "$env:MAVEN_USER_HOME/wrapper/dists/$distributionUrlNameMain" +} +$MAVEN_HOME_NAME = ([System.Security.Cryptography.MD5]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join '' +$MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME" + +if (Test-Path -Path "$MAVEN_HOME" -PathType Container) { + Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME" + Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" + exit $? +} + +if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) { + Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl" +} + +# prepare tmp dir +$TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile +$TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir" +$TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null +trap { + if ($TMP_DOWNLOAD_DIR.Exists) { + try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } + catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } + } +} + +New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null + +# Download and Install Apache Maven +Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." +Write-Verbose "Downloading from: $distributionUrl" +Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" + +$webclient = New-Object System.Net.WebClient +if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) { + $webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD) +} +[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 +$webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null + +# If specified, validate the SHA-256 sum of the Maven distribution zip file +$distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum +if ($distributionSha256Sum) { + if ($USE_MVND) { + Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." + } + Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash + if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) { + Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property." + } +} + +# unzip and move +Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null +Rename-Item -Path "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" -NewName $MAVEN_HOME_NAME | Out-Null +try { + Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null +} catch { + if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) { + Write-Error "fail to move MAVEN_HOME" + } +} finally { + try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } + catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } +} + +Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/pom.xml b/vector-databases/vertex-ai-embeddings-vector-mariadb/pom.xml new file mode 100644 index 0000000..f7ddc75 --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/pom.xml @@ -0,0 +1,163 @@ + + + 4.0.0 + + org.springframework.boot + spring-boot-starter-parent + 3.4.6 + + + com.vectors.ai + spring-ai-vertex-ai-vector-mariadb + 0.0.1-SNAPSHOT + war + spring-ai-vertex-ai-vector-mariadb + Demo project for Spring Boot, implementing vector db using maria DB, and vertex ai for embeddings + + + + + + + + + + + + + + + 17 + + + + + org.springframework.boot + spring-boot-starter-actuator + + + + org.springframework.boot + spring-boot-starter-data-rest + + + org.springframework.boot + spring-boot-starter-web + + + + org.projectlombok + lombok + true + + + org.springframework.boot + spring-boot-starter-tomcat + provided + + + org.springframework.boot + spring-boot-starter-test + test + + + + org.springframework.ai + spring-ai-starter-model-vertex-ai-gemini + + + org.springframework.ai + spring-ai-vertex-ai-embedding + + + org.springframework.ai + spring-ai-starter-model-vertex-ai-embedding + + + org.springframework.ai + spring-ai-advisors-vector-store + + + + org.springframework.ai + spring-ai-starter-vector-store-mariadb + + + org.jsoup + jsoup + 1.20.1 + + + org.mariadb.jdbc + mariadb-java-client + 3.5.3 + + + org.apache.pdfbox + pdfbox + 3.0.2 + + + + + + org.springframework.ai + spring-ai-bom + 1.0.0-SNAPSHOT + pom + import + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + org.projectlombok + lombok + + + + + + org.springframework.boot + spring-boot-maven-plugin + + + + org.projectlombok + lombok + + + + + + + + + spring-milestones + Spring Milestones + https://repo.spring.io/milestone + + false + + + + spring-snapshots + Spring Snapshots + https://repo.spring.io/snapshot + + false + + + + diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/AiKbApplication.java b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/AiKbApplication.java new file mode 100644 index 0000000..317fe04 --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/AiKbApplication.java @@ -0,0 +1,11 @@ + +package com.vectors.ai; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +@SpringBootApplication +public class AiKbApplication { + + public static void main(String[] args) { + SpringApplication.run(AiKbApplication.class, args); + } +} diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/ChatController.java b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/ChatController.java new file mode 100644 index 0000000..aef6a45 --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/ChatController.java @@ -0,0 +1,109 @@ +package com.vectors.ai; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.springframework.ai.chat.messages.Message; +import org.springframework.ai.chat.messages.UserMessage; +import org.springframework.ai.chat.model.ChatResponse; +import org.springframework.ai.chat.prompt.Prompt; +import org.springframework.ai.document.Document; +import org.springframework.ai.support.ToolCallbacks; +import org.springframework.ai.tool.ToolCallback; +import org.springframework.ai.vertexai.gemini.VertexAiGeminiChatModel; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.ai.vertexai.gemini.VertexAiGeminiChatOptions; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestMethod; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.client.RestTemplate; +import org.springframework.beans.factory.annotation.Value; +import com.google.cloud.vertexai.VertexAI; + +import jakarta.annotation.PostConstruct; + + + +@RestController +public class ChatController { + +@Value("${spring.ai.vertex.ai.gemini.project-id}") +private String projectId; +@Value("${spring.ai.vertex.ai.gemini.location}") +private String location; +@Value("${spring.ai.model}") +private String model; + +private VertexAiGeminiChatOptions options; +private VertexAiGeminiChatModel chatModel; + +private List conversationHistory = new ArrayList<>(); +@Autowired +private RagService ragService; + + @PostConstruct + public void init() { + + ToolCallback[] getUserFunction = ToolCallbacks + .from(new GetUserFunction()); + + + options = VertexAiGeminiChatOptions.builder() + .model(model) + .temperature(0.4) + // tool call backs are not compatible with google search +// .googleSearchRetrieval(true) + .toolCallbacks(getUserFunction) + .build(); + + chatModel = VertexAiGeminiChatModel.builder() + .vertexAI(new VertexAI(projectId, location)) + .defaultOptions(options) + .build(); + + } + + @RequestMapping(path="/embeddingsMessage", method=RequestMethod.GET) + public String requestEmbeddingsMessage(@RequestParam(value = "message", defaultValue = "Tell me a joke") String message) { + + List revelantDocs = ragService.getEmbeddings(message); + + StringBuilder contextBuilder = new StringBuilder(); + if (revelantDocs != null && !revelantDocs.isEmpty()) { + contextBuilder.append("Use the following information to answer the question:\n\n"); + revelantDocs.stream().forEach(doc -> { + contextBuilder.append("Context Document ").append(":\n"); + contextBuilder.append(doc.getFormattedContent()); + contextBuilder.append("\n\n"); + }); + } + // Adding the prompt to the history + conversationHistory.add(new UserMessage(message)); + // Adding context from revelant files to the History + conversationHistory.add(new UserMessage(contextBuilder.toString())); + // Preparing the prompt with the augmented data + String augmentedQuery = contextBuilder.toString() + "User Question: " + message; + Prompt prompt = new Prompt(Collections.singletonList(new UserMessage(augmentedQuery))); + // Calling the model + ChatResponse response = this.chatModel.call(prompt); + // Adding the model response to the history + conversationHistory.add(new UserMessage(response.getResult().getOutput().getText())); + + return response.getResult().getOutput().getText(); + } + + @RequestMapping(path="/functionMessage", method=RequestMethod.GET) + public String requestFunctionMessage(@RequestParam(value = "message", defaultValue = "Tell me a joke") String message) { + + // Adding the prompt to the history + conversationHistory.add(new UserMessage(message)); + Prompt prompt = new Prompt(conversationHistory); + // Calling the model + ChatResponse response = this.chatModel.call(prompt); + // Adding the model response to the history + conversationHistory.add(new UserMessage(response.getResult().getOutput().getText())); + + return response.getResult().getOutput().getText(); + } +} \ No newline at end of file diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/GetUserFunction.java b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/GetUserFunction.java new file mode 100644 index 0000000..a196580 --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/GetUserFunction.java @@ -0,0 +1,18 @@ +package com.vectors.ai; +import org.springframework.ai.tool.annotation.Tool; +import org.springframework.ai.tool.annotation.ToolParam; + +public class GetUserFunction { + + @Tool(name = "getUser",description = "Return the user email based on userId parameter") + public String getUser(@ToolParam String userId) { + // In a real application, you would fetch user data from a database or external service + // For this example, we'll return a dummy response + if ("123".equals(userId)) { + return "User with ID 123 found. Name: John Doe, Email: john.doe@example.com"; + } else { + return "User with ID " + userId + " not found."; + } + + } +} diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/RagService.java b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/RagService.java new file mode 100644 index 0000000..b36d6e8 --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/RagService.java @@ -0,0 +1,136 @@ +package com.vectors.ai; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.document.Document; +import org.springframework.ai.transformer.splitter.TokenTextSplitter; +import org.springframework.ai.vectorstore.SearchRequest; +import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.stereotype.Service; + +import jakarta.annotation.PostConstruct; + +@Service +public class RagService { + + private static final Logger logger = LoggerFactory.getLogger(RagService.class); + + @Autowired + private VectorStore vectorStore; + + @Value("${app.rag.pdf.folderPath}") + private String pdfFolderPath; + private Optional nativeClient; + private JdbcTemplate jdbc; + @Value("${spring.ai.embedding.chunk-size}") + private Integer chunkSize; + private static String checkIfFileExistQuery="SELECT COUNT(*) FROM vector_store WHERE JSON_UNQUOTE(JSON_EXTRACT(metadata, '$.source')) = ?"; + + @PostConstruct + public void init() throws URISyntaxException { + // loadSampleDocuments(); + this.nativeClient = vectorStore.getNativeClient(); + if (this.nativeClient.isPresent()) { + this.jdbc = nativeClient.get(); + // Use the native client for MariaDB-specific operations + } + loadPDFEmbeddings(); + + } + // Loading example files to the vector search + public void loadSampleDocuments() { + List documents = List.of( + new Document("Spring AI rocks Spring AI rocks Spring AI rocks Spring AI rocks Spring AI rocks", Map.of("source", "sample-doc-1")), + new Document("The World is Big and Salvation Lurks Around the Corner", Map.of("source", "sample-doc-2")), + new Document("You walk forward facing the past and you turn back toward the future.", Map.of("source", "sample-doc-3"))); + // Add the documents to MariaDB + vectorStore.add(documents); + logger.info("Loaded {} sample documents into the vector store.", documents.size()); + } + // Loading PDFs from a folder + // TO-DO: Implement text splitter to reduce the content stored on each record + public void loadPDFEmbeddings() throws URISyntaxException { + logger.info("Attempting to load PDF documents from folder: {}", pdfFolderPath); + + URI uri = RagService.class.getClassLoader().getResource(pdfFolderPath).toURI(); + + Path folderPath = Paths.get(uri); + + if (!Files.exists(folderPath) || !Files.isDirectory(folderPath)) { + logger.warn("PDF folder path does not exist or is not a directory: {}", pdfFolderPath); + return; + } + + List pdfDocuments = new ArrayList<>(); + List splittedDocuments = new ArrayList<>(); + TokenTextSplitter textSplitter = TokenTextSplitter.builder() + .withChunkSize(chunkSize) + .build(); + + logger.info("Using TokenTextSplitter with chunkSize={}", chunkSize); + + try (DirectoryStream stream = Files.newDirectoryStream(folderPath, "*.pdf")) { + for (Path pdfFile : stream) { + // Checking if file was processed already + Integer count = jdbc.queryForObject(checkIfFileExistQuery, Integer.class, pdfFile.getFileName().toString()); + + if (count != null && count > 0) { + logger.info("PDF already processed and found in vector store, skipping: {}", pdfFile.getFileName().toString()); + continue; // Skip to the next file + } + + try (PDDocument document = Loader.loadPDF(pdfFile.toFile())) { + + PDFTextStripper pdfStripper = new PDFTextStripper(); + String text = pdfStripper.getText(document); + // You can add more metadata if needed, e.g., last modified date, etc. + Map metadata = Map.of("source", pdfFile.getFileName().toString(), "filePath", pdfFile.toString()); + pdfDocuments.add(new Document(text, metadata)); + logger.info("Successfully parsed and created document for: {}", pdfFile.getFileName()); + } catch (IOException e) { + logger.error("Error processing PDF file {}: {}", pdfFile.getFileName(), e.getMessage(), e); + } + } + } catch (IOException e) { + logger.error("Error reading PDF directory {}: {}", pdfFolderPath, e.getMessage(), e); + return; + } + + if (!pdfDocuments.isEmpty()) { + logger.info("Splitting {} PDF documents into chunks...", pdfDocuments.size()); + splittedDocuments = textSplitter.split(pdfDocuments); + vectorStore.add(splittedDocuments); + logger.info("Successfully loaded and added {} PDF documents to the vector store.", splittedDocuments.size()); + } else { + logger.info("No PDF documents found or processed in folder: {}", pdfFolderPath); + } + } + + public List getEmbeddings(String query) { + // Retrieve documents similar to a query + SearchRequest searchRequest = SearchRequest.builder() + .query(query) + .topK(2) // Consider making topK configurable + .build(); + List results = vectorStore.similaritySearch(searchRequest); + logger.info("Found {} documents for query '{}': {}", results.size(), query, results); + return results; + } +} diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/ServletInitializer.java b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/ServletInitializer.java new file mode 100644 index 0000000..832995a --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/java/com/vectors/ai/ServletInitializer.java @@ -0,0 +1,13 @@ +package com.vectors.ai; + +import org.springframework.boot.builder.SpringApplicationBuilder; +import org.springframework.boot.web.servlet.support.SpringBootServletInitializer; + +public class ServletInitializer extends SpringBootServletInitializer { + + @Override + protected SpringApplicationBuilder configure(SpringApplicationBuilder application) { + return application.sources(AiKbApplication.class); + } + +} diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/application.properties b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/application.properties new file mode 100644 index 0000000..6657ca9 --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/application.properties @@ -0,0 +1,20 @@ +spring.application.name=spring-ai-kb-vector-db +# Vertex AI supported model +# AI configuration +app.rag.pdf.folderPath=embeddings_pdfs +spring.ai.model=gemini-2.0-flash +spring.ai.vertex.ai.gemini.project-id=${PROJECT_ID} +spring.ai.vertex.ai.gemini.location=us-west1 +spring.ai.vertex.ai.embedding.project-id=${PROJECT_ID} +spring.ai.vertex.ai.embedding.location=us-west1 +spring.ai.vertex.ai.embedding.text.options.model=text-embedding-004 +spring.ai.embedding.chunk-size=200 +spring.ai.vertex.ai.embedding.text.options.dimensions=768 +spring.datasource.url: jdbc:mariadb://localhost:3306/vector_db +spring.datasource.username: root +spring.datasource.password: password +spring.ai.vectorstore.mariadb.initialize-schema: true +spring.ai.vectorstore.mariadb.schema-name=vector_db +spring.ai.vectorstore.mariadb.table-name=vector_store +spring.ai.vectorstore.mariadb.distance-type: COSINE +spring.ai.vectorstore.mariadb.dimensions: 768 \ No newline at end of file diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/composer.yaml b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/composer.yaml new file mode 100644 index 0000000..900d0de --- /dev/null +++ b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/composer.yaml @@ -0,0 +1,9 @@ +# Use root/example as user/password credentials + +services: + + db: + image: mariadb + restart: always + environment: + MARIADB_ROOT_PASSWORD: password \ No newline at end of file diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank - Strategic Plan for Fiscal Year 2024-2025.pdf b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank - Strategic Plan for Fiscal Year 2024-2025.pdf new file mode 100644 index 0000000..da69aa8 Binary files /dev/null and b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank - Strategic Plan for Fiscal Year 2024-2025.pdf differ diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank Business Travel Guide.pdf b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank Business Travel Guide.pdf new file mode 100644 index 0000000..dda0413 Binary files /dev/null and b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank Business Travel Guide.pdf differ diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank Company Culture.pdf b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank Company Culture.pdf new file mode 100644 index 0000000..baa4dad Binary files /dev/null and b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank Company Culture.pdf differ diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank Founding Story.pdf b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank Founding Story.pdf new file mode 100644 index 0000000..12e39ab Binary files /dev/null and b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank Founding Story.pdf differ diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank New Employee Guide.pdf b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank New Employee Guide.pdf new file mode 100644 index 0000000..1f83b46 Binary files /dev/null and b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank New Employee Guide.pdf differ diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank Organizations & Roles.pdf b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank Organizations & Roles.pdf new file mode 100644 index 0000000..8de717a Binary files /dev/null and b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Cymbal Bank Organizations & Roles.pdf differ diff --git a/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Project Plan_ Implementation of New Digital Banking Platform for Cymbal Bank.pdf b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Project Plan_ Implementation of New Digital Banking Platform for Cymbal Bank.pdf new file mode 100644 index 0000000..ccdbeca Binary files /dev/null and b/vector-databases/vertex-ai-embeddings-vector-mariadb/src/main/resources/embeddings_pdfs/Project Plan_ Implementation of New Digital Banking Platform for Cymbal Bank.pdf differ