Skip to content

Commit 4cbcede

Browse files
committed
sec: Add Gitleaks pre-commit hook and configuration to prevent secret leaks
- Add Gitleaks v8.18.1 to pre-commit hooks for local secret detection - Create .gitleaks.toml with custom rules for IBM Cloud API keys - Add Terraform .tfvars and Ansible playbook secret detection - Configure allowlist for false positives (.example files, docs, tests) - Auto-fix ansible-lint warnings in deploy-roks-milvus-operator.yml This prevents secrets from being committed locally, complementing CI/CD security scans (Gitleaks + TruffleHog) that run on PR creation. Fixes security gap where --no-verify could bypass detect-secrets. Related to PR #678 (closed), PR #679 (clean)
1 parent 0773a81 commit 4cbcede

File tree

4 files changed

+296
-317
lines changed

4 files changed

+296
-317
lines changed

.gitleaks.toml

Lines changed: 42 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -1,182 +1,53 @@
1-
# .gitleaks.toml
1+
# Gitleaks configuration for RAG Modulo
2+
# Detects secrets in code, including IBM Cloud API keys, AWS keys, and more
23

3-
[allowlist]
4-
description = "Allowlist for paths and commits that are known to be safe."
5-
paths = [
6-
'''gitleaks.toml''',
7-
'''(.*?)(go.sum|go.mod|vendor)''',
8-
'''(.*?)(package.json|package-lock.json|npm-shrinkwrap.json)''',
9-
'''(.*?)(Pipfile|Pipfile.lock|poetry.lock)''',
10-
'''(.*?)(Gemfile.lock|gems.locked)''',
11-
'''(.*?)(Cargo.lock)''',
12-
'''(.*?)(yarn.lock)''',
13-
'''(.*?)(composer.lock)''',
14-
'''(.*?)(.snap)''',
15-
'''(.*?)(\.md|\.txt)''', # Documentation files
16-
'''env\.example''', # Example env files
17-
'''env\..*\.example''', # env.dev.example, env.jules.example, etc.
18-
'''(.*?)test_.*\.py''', # Test files with fixtures
19-
'''(.*?)tests/fixtures/.*''', # Test fixtures
20-
'''deployment/scripts/.*''', # Deployment scripts with env var templates
21-
'''scripts/.*\.sh''', # All deployment/setup scripts
22-
'''scripts/ibm-create-secrets\.sh''', # IBM secret creation scripts
23-
'''deployment/k8s/.*/secrets/.*''', # Kubernetes secret templates
24-
'''\.github/workflows/.*''', # GitHub Actions workflows (use ${{ secrets.* }})
25-
'''docker-compose.*\.yml''', # Docker compose files with env var templates
26-
'''\.env\..*''', # Environment template files (.env.local, .env.development, etc.)
27-
'''(^|/)\.env$''', # Local .env file (should be in .gitignore anyway)
28-
'''(^|/)backend/\.env$''', # Backend .env file
29-
'''(^|/)frontend/\.env$''', # Frontend .env file
30-
]
31-
32-
# Stopwords to avoid false positives
33-
stopwords = [
34-
"example",
35-
"sample",
36-
"test",
37-
"mock",
38-
"dummy",
39-
"placeholder",
40-
]
41-
42-
[[rules]]
43-
id = "aws-access-token"
44-
description = "AWS Access Token"
45-
regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
46-
tags = ["key", "AWS"]
47-
48-
[[rules]]
49-
id = "github-pat"
50-
description = "GitHub Personal Access Token"
51-
regex = '''ghp_[0-9a-zA-Z]{36}'''
52-
tags = ["key", "GitHub"]
53-
54-
[[rules]]
55-
id = "github-fine-grained-pat"
56-
description = "GitHub Fine-Grained Personal Access Token"
57-
regex = '''github_pat_[0-9a-zA-Z]{22}_[0-9a-zA-Z]{59}'''
58-
tags = ["key", "GitHub"]
59-
60-
[[rules]]
61-
id = "github-app-token"
62-
description = "GitHub App Token"
63-
regex = '''(ghu|ghs)_[0-9a-zA-Z]{36}'''
64-
tags = ["key", "GitHub"]
65-
66-
[[rules]]
67-
id = "github-refresh-token"
68-
description = "GitHub Refresh Token"
69-
regex = '''ghr_[0-9a-zA-Z]{76}'''
70-
tags = ["key", "GitHub"]
71-
72-
[[rules]]
73-
id = "slack-token"
74-
description = "Slack Token"
75-
regex = '''xox[baprs]-([0-9a-zA-Z-]{10,48})?'''
76-
tags = ["key", "Slack"]
77-
78-
[[rules]]
79-
id = "stripe-sk"
80-
description = "Stripe Secret Key"
81-
regex = '''sk_live_[0-9a-zA-Z]{24}'''
82-
tags = ["key", "Stripe"]
83-
84-
[[rules]]
85-
id = "stripe-rk"
86-
description = "Stripe Restricted Key"
87-
regex = '''rk_live_[0-9a-zA-Z]{24}'''
88-
tags = ["key", "Stripe"]
89-
90-
[[rules]]
91-
id = "private-key"
92-
description = "Private Key"
93-
regex = '''-----BEGIN ((EC|PGP|OPENSSH|RSA|DSA) )?PRIVATE KEY( BLOCK)?-----'''
94-
tags = ["key", "Asymmetric"]
4+
title = "RAG Modulo Gitleaks Configuration"
955

96-
[[rules]]
97-
id = "watsonx-api-key"
98-
description = "WatsonX API Key"
99-
regex = '''(?i)(WATSONX_APIKEY|WATSONX_API_KEY)\s*[=:]\s*['"]?([a-zA-Z0-9_-]{32,})['"]?'''
100-
tags = ["key", "WatsonX"]
101-
102-
[[rules]]
103-
id = "anthropic-api-key"
104-
description = "Anthropic API Key"
105-
regex = '''(?i)ANTHROPIC_API_KEY\s*[=:]\s*['"]?(sk-ant-[a-zA-Z0-9_-]{32,})['"]?'''
106-
tags = ["key", "Anthropic"]
107-
108-
[[rules]]
109-
id = "mlflow-credentials"
110-
description = "MLFlow Credentials"
111-
regex = '''(?i)MLFLOW_TRACKING_(USERNAME|PASSWORD)\s*[=:]\s*['"]?([^'"\s]{3,})['"]?'''
112-
tags = ["credentials", "MLFlow"]
113-
114-
[[rules]]
115-
id = "minio-credentials"
116-
description = "MinIO Credentials"
117-
regex = '''(?i)MINIO_ROOT_(USER|PASSWORD)\s*[=:]\s*['"]?([^'"\s]{3,})['"]?'''
118-
tags = ["credentials", "MinIO"]
119-
120-
[[rules]]
121-
id = "postgres-password"
122-
description = "PostgreSQL Password"
123-
regex = '''(?i)(POSTGRES_PASSWORD|COLLECTIONDB_PASSWORD)\s*[=:]\s*['"]?([^'"\s]{3,})['"]?'''
124-
tags = ["password", "PostgreSQL"]
6+
[extend]
7+
# Use default Gitleaks rules
8+
useDefault = true
1259

10+
# Additional custom rules for IBM Cloud and infrastructure secrets
12611
[[rules]]
127-
id = "jwt-secret-key"
128-
description = "JWT Secret Key"
129-
regex = '''(?i)JWT_SECRET_KEY\s*[=:]\s*['"]?([a-zA-Z0-9_-]{32,})['"]?'''
130-
tags = ["secret", "JWT"]
131-
132-
[[rules]]
133-
id = "openai-api-key"
134-
description = "OpenAI API Key"
135-
regex = '''sk-[a-zA-Z0-9]{48}'''
136-
tags = ["key", "OpenAI"]
137-
138-
[[rules]]
139-
id = "openai-project-key"
140-
description = "OpenAI Project API Key"
141-
regex = '''sk-proj-[a-zA-Z0-9]{48}'''
142-
tags = ["key", "OpenAI"]
143-
144-
[[rules]]
145-
id = "google-gemini-api-key"
146-
description = "Google Gemini API Key"
147-
regex = '''AIza[0-9A-Za-z\\-_]{35}'''
148-
tags = ["key", "Google", "Gemini"]
12+
id = "ibm-cloud-api-key"
13+
description = "IBM Cloud API Key"
14+
regex = '''[A-Za-z0-9_-]{44}'''
15+
keywords = [
16+
"ibm_cloud_api_key",
17+
"ibmcloud_api_key",
18+
"IC_API_KEY",
19+
]
14920

15021
[[rules]]
151-
id = "azure-storage-account-key"
152-
description = "Azure Storage Account Key"
153-
regex = '''(?i)(AZURE_STORAGE_ACCOUNT_KEY|AZURE_STORAGE_CONNECTION_STRING)\s*[=:]\s*['"]?([a-zA-Z0-9+/=]{88})['"]?'''
154-
tags = ["key", "Azure", "Storage"]
22+
id = "terraform-sensitive-values"
23+
description = "Terraform sensitive values in .tfvars files"
24+
path = '''\.tfvars$'''
25+
regex = '''(?i)(api_key|password|secret|token)\s*=\s*["'][^"']+["']'''
15526

15627
[[rules]]
157-
id = "azure-subscription-key"
158-
description = "Azure Subscription Key"
159-
regex = '''[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}'''
160-
keywords = ["azure", "subscription"]
161-
tags = ["key", "Azure"]
28+
id = "ansible-vault-unencrypted"
29+
description = "Unencrypted secrets in Ansible files"
30+
path = '''(ansible|playbook).*\.ya?ml$'''
31+
regex = '''(?i)(api_key|password|secret|token):\s*["']?[A-Za-z0-9_-]{20,}["']?'''
16232

163-
[[rules]]
164-
id = "gcp-service-account-key"
165-
description = "GCP Service Account Key (JSON)"
166-
regex = '''"type":\s*"service_account"'''
167-
keywords = ["private_key", "project_id"]
168-
tags = ["key", "GCP", "ServiceAccount"]
33+
# Allowlist for false positives
34+
[allowlist]
35+
description = "Allowlist for safe patterns"
36+
regexes = [
37+
'''YOUR_.*_HERE''', # Example placeholders
38+
'''example\.com''', # Example domains
39+
'''test-.*-key''', # Test keys
40+
'''mock-.*''', # Mock values
41+
'''\$\{.*\}''', # Environment variable references
42+
]
16943

170-
[[rules]]
171-
id = "gcp-api-key"
172-
description = "GCP API Key"
173-
regex = '''AIza[0-9A-Za-z\\-_]{35}'''
174-
keywords = ["gcp", "google-cloud", "googleapis"]
175-
tags = ["key", "GCP"]
44+
paths = [
45+
'''\.example$''', # Example files
46+
'''\.tfvars\.example$''', # Terraform example files
47+
'''\.env\.example$''', # Env example files
48+
'''^docs/''', # Documentation
49+
'''^tests/fixtures/''', # Test fixtures
50+
]
17651

177-
[[rules]]
178-
id = "high-entropy-strings"
179-
description = "High Entropy String (possible secret)"
180-
regex = '''[a-zA-Z0-9+/=]{32,}'''
181-
entropy = 4.5
182-
tags = ["entropy"]
52+
# Commits to exclude (if any)
53+
commits = []

.pre-commit-config.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,15 @@ repos:
5151
- id: detect-secrets
5252
args: [--baseline .secrets.baseline]
5353

54+
# Gitleaks - catches secrets missed by detect-secrets
55+
- repo: https://github.com/gitleaks/gitleaks
56+
rev: v8.18.1
57+
hooks:
58+
- id: gitleaks
59+
name: Gitleaks secret scanning
60+
entry: gitleaks protect --verbose --redact --staged
61+
language: system
62+
5463
# Python hooks - must match CI configuration exactly
5564
# Poetry moved to root (October 2025) - pyproject.toml now at root level
5665
# CI runs from root with: poetry run ruff check backend/ --config pyproject.toml

0 commit comments

Comments
 (0)