Skip to content

Commit a019e9b

Browse files
committed
add x-robots-tag validation
1 parent 98440df commit a019e9b

File tree

6 files changed

+548
-5
lines changed

6 files changed

+548
-5
lines changed

src/internal/errors/codes.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,14 @@ export const ERRORS = {
235235
message: `mime type ${mimeType} is not supported`,
236236
}),
237237

238+
InvalidXRobotsTag: (message: string) =>
239+
new StorageBackendError({
240+
error: 'invalid_x_robots_tag',
241+
code: ErrorCode.InvalidRequest,
242+
httpStatusCode: 400,
243+
message: `Invalid X-Robots-Tag header: ${message}`,
244+
}),
245+
238246
InvalidRange: () =>
239247
new StorageBackendError({
240248
error: 'invalid_range',

src/storage/renderer/renderer.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { ObjectMetadata } from '../backend'
33
import { Readable } from 'stream'
44
import { getConfig } from '../../config'
55
import { Obj } from '../schemas'
6+
import { validateXRobotsTag } from '@storage/validators/x-robots-tag'
67

78
export interface RenderOptions {
89
bucket: string
@@ -74,14 +75,23 @@ export abstract class Renderer {
7475
data: AssetResponse,
7576
options: RenderOptions
7677
) {
78+
let xRobotsTag = 'none'
79+
if (options.xRobotsTag) {
80+
try {
81+
// allow overriding x-robots-tag header only with valid values
82+
validateXRobotsTag(options.xRobotsTag)
83+
xRobotsTag = options.xRobotsTag
84+
} catch {}
85+
}
86+
7787
response
7888
.status(data.metadata.httpStatusCode ?? 200)
7989
.header('Accept-Ranges', 'bytes')
8090
.header('Content-Type', normalizeContentType(data.metadata.mimetype))
8191
.header('ETag', data.metadata.eTag)
8292
.header('Content-Length', data.metadata.contentLength)
8393
.header('Last-Modified', data.metadata.lastModified?.toUTCString())
84-
.header('X-Robots-Tag', options.xRobotsTag || 'none')
94+
.header('X-Robots-Tag', xRobotsTag)
8595

8696
if (options.expires) {
8797
response.header('Expires', options.expires)

src/storage/uploader.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import { getConfig } from '../config'
1212
import { logger, logSchema } from '@internal/monitoring'
1313
import { Readable } from 'stream'
1414
import { StorageObjectLocator } from '@storage/locator'
15+
import { validateXRobotsTag } from './validators/x-robots-tag'
1516

1617
const { storageS3Bucket, uploadFileSizeLimitStandard } = getConfig()
1718

@@ -308,6 +309,10 @@ export async function fileUploadFromRequest(
308309
const contentType = request.headers['content-type']
309310
const xRobotsTag = request.headers['x-robots-tag'] as string | undefined
310311

312+
if (xRobotsTag) {
313+
validateXRobotsTag(xRobotsTag)
314+
}
315+
311316
let body: Readable
312317
let userMetadata: Record<string, unknown> | undefined
313318
let mimeType: string
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
import { ERRORS } from '@internal/errors'
2+
3+
const SIMPLE_RULES = [
4+
'all',
5+
'noindex',
6+
'nofollow',
7+
'none',
8+
'nosnippet',
9+
'indexifembedded',
10+
'notranslate',
11+
'noimageindex',
12+
] as const
13+
14+
const PARAMETRIC_RULES = [
15+
'max-snippet',
16+
'max-image-preview',
17+
'max-video-preview',
18+
'unavailable_after',
19+
] as const
20+
21+
const simpleRulesPattern = SIMPLE_RULES.join('|')
22+
const parametricRulesPattern = PARAMETRIC_RULES.join('|')
23+
const SIMPLE_RULE_REGEX = new RegExp(`^(${simpleRulesPattern})$`)
24+
const PARAMETRIC_RULE_REGEX = new RegExp(`^(${parametricRulesPattern}):\\s*(.*)$`)
25+
const PARAMETRIC_RULE_START_REGEX = new RegExp(`^(${parametricRulesPattern}):`)
26+
const VALID_IMAGE_PREVIEW_VALUES = new Set(['none', 'standard', 'large'])
27+
28+
/**
29+
* Validates the X-Robots-Tag header value according to MDN specification
30+
* @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Robots-Tag
31+
*
32+
* @param value - The X-Robots-Tag header value to validate
33+
* @throws {Error} If the header value is invalid
34+
*/
35+
export function validateXRobotsTag(value: string): void {
36+
if (!value || typeof value !== 'string') {
37+
throw ERRORS.InvalidXRobotsTag('X-Robots-Tag header value must be a non-empty string')
38+
}
39+
40+
const trimmedValue = value.trim()
41+
if (!trimmedValue) {
42+
throw ERRORS.InvalidXRobotsTag('X-Robots-Tag header value must be a non-empty string')
43+
}
44+
45+
const parts = splitRules(trimmedValue)
46+
47+
for (const part of parts) {
48+
if (!part) {
49+
throw ERRORS.InvalidXRobotsTag('X-Robots-Tag header contains empty rule')
50+
}
51+
52+
// Check if this is a parametric rule
53+
const parametricMatch = part.match(PARAMETRIC_RULE_REGEX)
54+
if (parametricMatch) {
55+
const [, ruleName, ruleValue] = parametricMatch
56+
validateParametricRule(ruleName, ruleValue.trim(), VALID_IMAGE_PREVIEW_VALUES)
57+
continue
58+
}
59+
60+
// Check if this is a simple rule
61+
if (SIMPLE_RULE_REGEX.test(part)) {
62+
continue
63+
}
64+
65+
// Check if this has a colon (could be user agent prefix)
66+
const colonIndex = part.indexOf(':')
67+
if (colonIndex !== -1) {
68+
const beforeColon = part.substring(0, colonIndex).trim()
69+
const afterColon = part.substring(colonIndex + 1).trim()
70+
71+
if (!afterColon) {
72+
throw ERRORS.InvalidXRobotsTag(
73+
`X-Robots-Tag user agent "${beforeColon}" has no rules specified`
74+
)
75+
}
76+
77+
// Recursively validate user agent rules
78+
validateXRobotsTag(afterColon)
79+
continue
80+
}
81+
82+
throw ERRORS.InvalidXRobotsTag(`Invalid X-Robots-Tag rule: "${part}"`)
83+
}
84+
}
85+
86+
/**
87+
* Splits rules by comma, handling parametric rules with dates that contain commas
88+
*/
89+
function splitRules(value: string): string[] {
90+
const parts: string[] = []
91+
let remaining = value
92+
93+
while (remaining) {
94+
remaining = remaining.trim()
95+
if (!remaining) break
96+
97+
const match = remaining.match(PARAMETRIC_RULE_START_REGEX)
98+
if (match) {
99+
const ruleName = match[1]
100+
101+
// For unavailable_after, extract date value (may contain commas)
102+
if (ruleName === 'unavailable_after') {
103+
// Build regex to find end of date by looking for comma + known rule or user agent
104+
const endPattern = new RegExp(
105+
`unavailable_after:\\s*(.+?)(?:,\\s*(?:${simpleRulesPattern}|${parametricRulesPattern}|[a-zA-Z0-9_-]+:)|$)`
106+
)
107+
const dateEndMatch = remaining.match(endPattern)
108+
109+
if (dateEndMatch) {
110+
const fullRule = `unavailable_after: ${dateEndMatch[1].trim()}`
111+
parts.push(fullRule)
112+
remaining = remaining.substring(fullRule.length).replace(/^,\s*/, '').trim()
113+
} else {
114+
parts.push(remaining)
115+
remaining = ''
116+
}
117+
continue
118+
}
119+
}
120+
121+
// Default: split by comma (for other parametric rules and simple rules)
122+
const nextComma = remaining.indexOf(',')
123+
if (nextComma === -1) {
124+
parts.push(remaining)
125+
remaining = ''
126+
} else {
127+
parts.push(remaining.substring(0, nextComma).trim())
128+
remaining = remaining.substring(nextComma + 1).trim()
129+
}
130+
}
131+
132+
return parts
133+
}
134+
135+
/**
136+
* Validates a parametric rule value
137+
*/
138+
function validateParametricRule(
139+
ruleName: string,
140+
ruleValue: string,
141+
validImagePreviewValues: Set<string>
142+
): void {
143+
if (!ruleValue) {
144+
throw ERRORS.InvalidXRobotsTag(`X-Robots-Tag rule "${ruleName}" requires a value`)
145+
}
146+
147+
switch (ruleName) {
148+
case 'max-snippet': {
149+
const num = parseInt(ruleValue, 10)
150+
if (isNaN(num) || num < 0) {
151+
throw ERRORS.InvalidXRobotsTag(
152+
`X-Robots-Tag "max-snippet" value must be a non-negative number, got: "${ruleValue}"`
153+
)
154+
}
155+
break
156+
}
157+
158+
case 'max-image-preview': {
159+
if (!validImagePreviewValues.has(ruleValue)) {
160+
throw ERRORS.InvalidXRobotsTag(
161+
`X-Robots-Tag "max-image-preview" value must be one of: none, standard, large, got: "${ruleValue}"`
162+
)
163+
}
164+
break
165+
}
166+
167+
case 'max-video-preview': {
168+
const num = parseInt(ruleValue, 10)
169+
if (isNaN(num) || num < -1) {
170+
throw ERRORS.InvalidXRobotsTag(
171+
`X-Robots-Tag "max-video-preview" value must be a number >= -1, got: "${ruleValue}"`
172+
)
173+
}
174+
break
175+
}
176+
177+
case 'unavailable_after': {
178+
// Check if it's a valid date string (try parsing it)
179+
const date = new Date(ruleValue)
180+
if (isNaN(date.getTime())) {
181+
throw ERRORS.InvalidXRobotsTag(
182+
`X-Robots-Tag "unavailable_after" value must be a valid date, got: "${ruleValue}"`
183+
)
184+
}
185+
break
186+
}
187+
}
188+
}

src/test/object.test.ts

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2563,14 +2563,15 @@ describe('x-robots-tag header', () => {
25632563
test('defaults x-robots-tag header to none if not specified', async () => {
25642564
const objPath = `${X_ROBOTS_TEST_BUCKET}/test-file-1.txt`
25652565

2566-
await appInstance.inject({
2566+
const createResponse = await appInstance.inject({
25672567
method: 'POST',
25682568
url: `/object/${objPath}`,
25692569
payload: new File(['test'], 'file.txt'),
25702570
headers: {
25712571
authorization: `Bearer ${await serviceKeyAsync}`,
25722572
},
25732573
})
2574+
expect(createResponse.statusCode).toBe(200)
25742575

25752576
const response = await appInstance.inject({
25762577
method: 'GET',
@@ -2579,13 +2580,14 @@ describe('x-robots-tag header', () => {
25792580
authorization: `Bearer ${await serviceKeyAsync}`,
25802581
},
25812582
})
2583+
expect(response.statusCode).toBe(200)
25822584
expect(response.headers['x-robots-tag']).toBe('none')
25832585
})
25842586

25852587
test('uses provided x-robots-tag header if set', async () => {
25862588
const objPath = `${X_ROBOTS_TEST_BUCKET}/test-file-2.txt`
25872589

2588-
await appInstance.inject({
2590+
const createResponse = await appInstance.inject({
25892591
method: 'POST',
25902592
url: `/object/${objPath}`,
25912593
payload: new File(['test'], 'file.txt'),
@@ -2594,6 +2596,7 @@ describe('x-robots-tag header', () => {
25942596
'x-robots-tag': 'all',
25952597
},
25962598
})
2599+
expect(createResponse.statusCode).toBe(200)
25972600

25982601
const response = await appInstance.inject({
25992602
method: 'GET',
@@ -2602,13 +2605,14 @@ describe('x-robots-tag header', () => {
26022605
authorization: `Bearer ${await serviceKeyAsync}`,
26032606
},
26042607
})
2608+
expect(response.statusCode).toBe(200)
26052609
expect(response.headers['x-robots-tag']).toBe('all')
26062610
})
26072611

26082612
test('updates x-robots-tag header on upsert', async () => {
26092613
const objPath = `${X_ROBOTS_TEST_BUCKET}/test-file-3.txt`
26102614

2611-
await appInstance.inject({
2615+
const createResponse = await appInstance.inject({
26122616
method: 'POST',
26132617
url: `/object/${objPath}`,
26142618
payload: new File(['test'], 'file.txt'),
@@ -2617,6 +2621,7 @@ describe('x-robots-tag header', () => {
26172621
'x-robots-tag': 'max-snippet: 10, notranslate',
26182622
},
26192623
})
2624+
expect(createResponse.statusCode).toBe(200)
26202625

26212626
const response = await appInstance.inject({
26222627
method: 'GET',
@@ -2625,9 +2630,10 @@ describe('x-robots-tag header', () => {
26252630
authorization: `Bearer ${await serviceKeyAsync}`,
26262631
},
26272632
})
2633+
expect(response.statusCode).toBe(200)
26282634
expect(response.headers['x-robots-tag']).toBe('max-snippet: 10, notranslate')
26292635

2630-
await appInstance.inject({
2636+
const createResponse2 = await appInstance.inject({
26312637
method: 'POST',
26322638
url: `/object/${objPath}`,
26332639
payload: new File(['test'], 'file.txt'),
@@ -2637,6 +2643,7 @@ describe('x-robots-tag header', () => {
26372643
'x-robots-tag': 'nofollow',
26382644
},
26392645
})
2646+
expect(createResponse2.statusCode).toBe(200)
26402647

26412648
const response2 = await appInstance.inject({
26422649
method: 'GET',
@@ -2645,6 +2652,28 @@ describe('x-robots-tag header', () => {
26452652
authorization: `Bearer ${await serviceKeyAsync}`,
26462653
},
26472654
})
2655+
expect(response2.statusCode).toBe(200)
26482656
expect(response2.headers['x-robots-tag']).toBe('nofollow')
26492657
})
2658+
2659+
test('rejects invalid x-robots-tag header with proper error', async () => {
2660+
const objPath = `${X_ROBOTS_TEST_BUCKET}/test-file-invalid.txt`
2661+
2662+
const createResponse = await appInstance.inject({
2663+
method: 'POST',
2664+
url: `/object/${objPath}`,
2665+
payload: new File(['test'], 'file.txt'),
2666+
headers: {
2667+
authorization: `Bearer ${await serviceKeyAsync}`,
2668+
'x-robots-tag': 'invalidrule',
2669+
},
2670+
})
2671+
2672+
expect(createResponse.statusCode).toBe(400)
2673+
expect(createResponse.json()).toMatchObject({
2674+
statusCode: '400',
2675+
error: 'invalid_x_robots_tag',
2676+
message: 'Invalid X-Robots-Tag header: Invalid X-Robots-Tag rule: "invalidrule"',
2677+
})
2678+
})
26502679
})

0 commit comments

Comments
 (0)