Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,30 @@ jobs:
"${IMAGE_BASE}:${{ github.sha }}-arm64"
docker manifest push "${IMAGE_BASE}:${{ github.sha }}"

# Process docs embeddings (after ECR images are pushed)
# Check if docs changed
check-docs-changes:
name: Check Docs Changes
runs-on: blacksmith-4vcpu-ubuntu-2404
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
outputs:
docs_changed: ${{ steps.filter.outputs.docs }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2 # Need at least 2 commits to detect changes
- uses: dorny/paths-filter@v3
id: filter
with:
filters: |
docs:
- 'apps/docs/content/docs/en/**'
- 'apps/sim/scripts/process-docs.ts'
- 'apps/sim/lib/chunkers/**'

# Process docs embeddings (only when docs change, after ECR images are pushed)
process-docs:
name: Process Docs
needs: build-amd64
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging')
needs: [build-amd64, check-docs-changes]
if: needs.check-docs-changes.outputs.docs_changed == 'true'
uses: ./.github/workflows/docs-embeddings.yml
secrets: inherit
4 changes: 2 additions & 2 deletions .github/workflows/docs-embeddings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
process-docs-embeddings:
name: Process Documentation Embeddings
runs-on: blacksmith-8vcpu-ubuntu-2404
if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging'
if: github.ref == 'refs/heads/main'

steps:
- name: Checkout code
Expand Down Expand Up @@ -41,6 +41,6 @@ jobs:
- name: Process docs embeddings
working-directory: ./apps/sim
env:
DATABASE_URL: ${{ github.ref == 'refs/heads/main' && secrets.DATABASE_URL || secrets.STAGING_DATABASE_URL }}
DATABASE_URL: ${{ secrets.DATABASE_URL }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: bun run scripts/process-docs.ts --clear
16 changes: 11 additions & 5 deletions apps/sim/scripts/process-docs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ interface ProcessingOptions {
*/
async function processDocs(options: ProcessingOptions = {}) {
const config = {
docsPath: options.docsPath || path.join(process.cwd(), '../../apps/docs/content/docs'),
docsPath: options.docsPath || path.join(process.cwd(), '../../apps/docs/content/docs/en'),
baseUrl: options.baseUrl || (isDev ? 'http://localhost:4000' : 'https://docs.sim.ai'),
chunkSize: options.chunkSize || 1024,
minChunkSize: options.minChunkSize || 100,
Expand Down Expand Up @@ -216,25 +216,31 @@ async function main() {

Usage: bun run process-docs.ts [options]

By default, processes English (en) documentation only.
Note: Use --clear flag when changing language scope to remove old embeddings.

Options:
--clear Clear existing embeddings before processing
--dry-run Process and display results without saving to DB
--verbose Show detailed output including text previews
--path <path> Custom path to docs directory
--path <path> Custom path to docs directory (default: docs/en)
--url <url> Custom base URL for links
--chunk-size <n> Custom chunk size in tokens (default: 1024)
--help, -h Show this help message

Examples:
# Dry run to test chunking
# Dry run to test chunking (English docs)
bun run process-docs.ts --dry-run

# Process and save to database
# Process and save to database (English docs)
bun run process-docs.ts

# Clear existing and reprocess
# Clear existing and reprocess (English docs)
bun run process-docs.ts --clear

# Process a different language
bun run process-docs.ts --path ../../apps/docs/content/docs/es

# Custom path with verbose output
bun run process-docs.ts --path ./my-docs --verbose
`)
Expand Down