diff --git a/apps/proxy/.env.example b/apps/proxy/.env.example index c87db2f7..adf3c16f 100644 --- a/apps/proxy/.env.example +++ b/apps/proxy/.env.example @@ -3,6 +3,14 @@ AWS_ENDPOINT_URL_S3=http://host.docker.internal:54321/storage/v1/s3 AWS_S3_BUCKET=s3fs AWS_SECRET_ACCESS_KEY=850181e4652dd023b7a98c58ae0d2d34bd487ee0cc3254aed6eda37307425907 AWS_REGION=local +# Cache disk usage threshold in percentage of the total disk space +CACHE_DISK_USAGE_THRESHOLD=90 +CACHE_PATH=/var/lib/postgres-new/cache +# Cache schedule interval in hours +CACHE_SCHEDULE_INTERVAL=1 +CACHE_TIMESTAMP_FILE=/var/lib/postgres-new/delete_cache_last_run +# Cache time to live in hours +CACHE_TTL=24 S3FS_MOUNT=/mnt/s3 SUPABASE_SERVICE_ROLE_KEY="" SUPABASE_URL="" diff --git a/apps/proxy/fly.toml b/apps/proxy/fly.toml index d1d5a7dc..9810d8c4 100644 --- a/apps/proxy/fly.toml +++ b/apps/proxy/fly.toml @@ -6,6 +6,15 @@ primary_region = 'iad' dockerfile = "Dockerfile" ignorefile = ".dockerignore" +[env] +CACHE_DISK_USAGE_THRESHOLD = "90" +CACHE_SCHEDULE_INTERVAL = "1" +CACHE_TIMESTAMP_FILE = "/var/lib/postgres-new/delete_cache_last_run" +CACHE_TTL = "24" +CACHE_PATH = "/var/lib/postgres-new/cache" +S3FS_MOUNT = "/mnt/s3" +WILDCARD_DOMAIN = "db.postgres.new" + [[services]] internal_port = 5432 protocol = "tcp" diff --git a/apps/proxy/package.json b/apps/proxy/package.json index ae0d28c0..dd74a2c7 100644 --- a/apps/proxy/package.json +++ b/apps/proxy/package.json @@ -14,7 +14,8 @@ "@supabase/supabase-js": "^2.45.1", "find-up": "^7.0.0", "pg-gateway": "0.3.0-alpha.6", - "tar": "^7.4.3" + "tar": "^7.4.3", + "zod": "^3.23.8" }, "devDependencies": { "@postgres-new/supabase": "*", diff --git a/apps/proxy/src/delete-cache.ts b/apps/proxy/src/delete-cache.ts new file mode 100644 index 00000000..df430acd --- /dev/null +++ b/apps/proxy/src/delete-cache.ts @@ -0,0 +1,110 @@ +import * as fs from 'node:fs/promises' +import * as path from 'node:path' +import { exec } from 'node:child_process' +import { promisify } from 'node:util' +import { env } from './env.js' +const execAsync = promisify(exec) + +async function deleteOldFolders() { + const now = Date.now() + const ttlInMillis = env.CACHE_TTL * 60 * 60 * 1000 + + try { + const folders = await fs.readdir(env.CACHE_PATH) + for (const folder of folders) { + const folderPath = path.join(env.CACHE_PATH, folder) + const stats = await fs.stat(folderPath) + + if (stats.isDirectory() && now - stats.mtimeMs > ttlInMillis) { + await fs.rm(folderPath, { recursive: true, force: true }) + console.log(`Deleted folder: ${folderPath}`) + } + } + } catch (err) { + console.error('Failed to delete old folders:', err) + } +} + +async function scriptAlreadyRan() { + try { + const lastRun = parseInt(await fs.readFile(env.CACHE_TIMESTAMP_FILE, 'utf8')) + const now = Math.floor(Date.now() / 1000) + const diff = now - lastRun + return diff < env.CACHE_SCHEDULE_INTERVAL * 60 * 60 * 1000 + } catch (err) { + // File does not exist + if (err instanceof Error && 'code' in err && err.code === 'ENOENT') { + return false + } + throw err + } +} + +async function updateTimestampFile() { + const now = Math.floor(Date.now() / 1000).toString() + await fs.writeFile(env.CACHE_TIMESTAMP_FILE, now) +} + +/** + * Get the disk usage of the root directory + */ +async function getDiskUsage() { + // awk 'NR==2 {print $5}' prints the 5th column of the df command which contains the percentage of the total disk space used + // sed 's/%//' removes the % from the output + const command = `df / | awk 'NR==2 {print $5}' | sed 's/%//'` + const { stdout } = await execAsync(command) + return parseInt(stdout.trim(), 10) +} + +async function getFoldersByModificationTime() { + const folders = await fs.readdir(env.CACHE_PATH, { withFileTypes: true }) + const folderStats = await Promise.all( + folders + .filter((dirent) => dirent.isDirectory()) + .map(async (dirent) => { + const fullPath = path.join(env.CACHE_PATH, dirent.name) + const stats = await fs.stat(fullPath) + return { path: fullPath, mtime: stats.mtime.getTime() } + }) + ) + return folderStats.sort((a, b) => a.mtime - b.mtime).map((folder) => folder.path) +} + +export async function deleteCache() { + if (await scriptAlreadyRan()) { + console.log(`Script already ran in the last ${env.CACHE_SCHEDULE_INTERVAL} hours, skipping.`) + return + } + + await updateTimestampFile() + + // Always delete old folders based on TTL + await deleteOldFolders() + + let diskUsage = await getDiskUsage() + + // If disk usage exceeds the threshold, delete additional old folders + if (diskUsage >= env.CACHE_DISK_USAGE_THRESHOLD) { + console.log( + `Disk usage is at ${diskUsage}%, which is above the threshold of ${env.CACHE_DISK_USAGE_THRESHOLD}%.` + ) + + const folders = await getFoldersByModificationTime() + + // Loop through the folders and delete them one by one until disk usage is below the threshold + for (const folder of folders) { + console.log(`Deleting folder: ${folder}`) + await fs.rm(folder, { recursive: true, force: true }) + + diskUsage = await getDiskUsage() + if (diskUsage < env.CACHE_DISK_USAGE_THRESHOLD) { + console.log(`Disk usage is now at ${diskUsage}%, which is below the threshold.`) + break + } + } + } else { + console.log( + `Disk usage is at ${diskUsage}%, which is below the threshold of ${env.CACHE_DISK_USAGE_THRESHOLD}%.` + ) + } +} diff --git a/apps/proxy/src/env.ts b/apps/proxy/src/env.ts new file mode 100644 index 00000000..cb36f7c9 --- /dev/null +++ b/apps/proxy/src/env.ts @@ -0,0 +1,21 @@ +import { z } from 'zod' + +export const env = z + .object({ + AWS_ACCESS_KEY_ID: z.string(), + AWS_ENDPOINT_URL_S3: z.string(), + AWS_S3_BUCKET: z.string(), + AWS_SECRET_ACCESS_KEY: z.string(), + AWS_REGION: z.string(), + CACHE_DISK_USAGE_THRESHOLD: z.string().transform((val) => parseInt(val, 10)), + CACHE_PATH: z.string(), + CACHE_SCHEDULE_INTERVAL: z.string().transform((val) => parseInt(val, 10)), + CACHE_TIMESTAMP_FILE: z.string(), + CACHE_TTL: z.string().transform((val) => parseInt(val, 10)), + DATA_MOUNT: z.string(), + S3FS_MOUNT: z.string(), + SUPABASE_SERVICE_ROLE_KEY: z.string(), + SUPABASE_URL: z.string(), + WILDCARD_DOMAIN: z.string(), + }) + .parse(process.env) diff --git a/apps/proxy/src/index.ts b/apps/proxy/src/index.ts index b8939c2f..894ff143 100644 --- a/apps/proxy/src/index.ts +++ b/apps/proxy/src/index.ts @@ -10,12 +10,15 @@ import { PostgresConnection, ScramSha256Data, TlsOptions } from 'pg-gateway' import { createClient } from '@supabase/supabase-js' import type { Database } from '@postgres-new/supabase' import { findUp } from 'find-up' +import { env } from './env.js' +import { deleteCache } from './delete-cache.js' +import path from 'node:path' + +const supabaseUrl = env.SUPABASE_URL +const supabaseKey = env.SUPABASE_SERVICE_ROLE_KEY +const s3fsMount = env.S3FS_MOUNT +const wildcardDomain = env.WILDCARD_DOMAIN -const supabaseUrl = process.env.SUPABASE_URL ?? 'http://127.0.0.1:54321' -const supabaseKey = process.env.SUPABASE_SERVICE_ROLE_KEY ?? '' -const dataMount = process.env.DATA_MOUNT ?? './data' -const s3fsMount = process.env.S3FS_MOUNT ?? './s3' -const wildcardDomain = process.env.WILDCARD_DOMAIN ?? 'db.example.com' const packageLockJsonPath = await findUp('package-lock.json') if (!packageLockJsonPath) { throw new Error('package-lock.json not found') @@ -31,10 +34,9 @@ const pgliteVersion = `(PGlite ${packageLockJson.packages['node_modules/@electri const dumpDir = `${s3fsMount}/dbs` const tlsDir = `${s3fsMount}/tls` -const dbDir = `${dataMount}/dbs` await mkdir(dumpDir, { recursive: true }) -await mkdir(dbDir, { recursive: true }) +await mkdir(env.CACHE_PATH, { recursive: true }) await mkdir(tlsDir, { recursive: true }) const tls: TlsOptions = { @@ -77,6 +79,10 @@ const supabase = createClient(supabaseUrl, supabaseKey) const server = net.createServer((socket) => { let db: PGliteInterface + deleteCache().catch((err) => { + console.error(`Error deleting cache: ${err}`) + }) + const connection = new PostgresConnection(socket, { serverVersion: async () => { const { @@ -161,12 +167,12 @@ const server = net.createServer((socket) => { console.log(`Serving database '${databaseId}'`) - const dbPath = `${dbDir}/${databaseId}` + const dbPath = path.join(env.CACHE_PATH, databaseId) if (!(await fileExists(dbPath))) { console.log(`Database '${databaseId}' is not cached, downloading...`) - const dumpPath = `${dumpDir}/${databaseId}.tar.gz` + const dumpPath = path.join(dumpDir, `${databaseId}.tar.gz`) if (!(await fileExists(dumpPath))) { connection.sendError({ diff --git a/package-lock.json b/package-lock.json index 4eb6f9b6..19ec721a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -20,7 +20,8 @@ "@supabase/supabase-js": "^2.45.1", "find-up": "^7.0.0", "pg-gateway": "0.3.0-alpha.6", - "tar": "^7.4.3" + "tar": "^7.4.3", + "zod": "^3.23.8" }, "devDependencies": { "@postgres-new/supabase": "*",