From 7ba9c3aaf77d32dcc367db6ef16dda37f6a5fd55 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 4 Sep 2024 22:31:36 -0700 Subject: [PATCH 1/2] indexer: fix parsing POST request when using Map, check for case-insensitive 'content-type' header --- package.json | 2 +- src/lib/indexer.ts | 4 ++-- src/lib/utils.ts | 24 ++++++++++++++++++++---- src/lib/warcrecord.ts | 4 ++-- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/package.json b/package.json index fb4215a..643803b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "warcio", - "version": "2.3.0", + "version": "2.3.1", "keywords": [ "WARC", "web archiving" diff --git a/src/lib/indexer.ts b/src/lib/indexer.ts index 79d912d..6bf95ac 100644 --- a/src/lib/indexer.ts +++ b/src/lib/indexer.ts @@ -304,8 +304,8 @@ export class CDXIndexer extends Indexer { if (postToGetUrl(request)) { requestBody = request.requestBody; - record.method = method; - record.requestBody = requestBody; + //record.method = method; + //record.requestBody = requestBody; url = request.url; } } diff --git a/src/lib/utils.ts b/src/lib/utils.ts index 41d1615..7687923 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -62,7 +62,24 @@ export function postToGetUrl(request: Request) { return false; } - const requestMime = (headers.get("content-type") || "").split(";")[0]; + const getContentType = (headers: Headers | Map) : string => { + const ct = headers.get("content-type"); + if (ct) { + return ct; + } + if (!(headers instanceof Headers)) { + for (const [key, value] of headers.entries()) { + if (key && key.toLowerCase() === "content-type") { + return value; + } + } + } + return ""; + } + + const contentType = getContentType(headers); + + const requestMime = contentType.split(";")[0]; function decodeIfNeeded( postData: Uint8Array | string | undefined | null, @@ -93,13 +110,12 @@ export function postToGetUrl(request: Request) { break; case "multipart/form-data": { - const content_type = headers.get("content-type"); - if (!content_type) { + if (!contentType) { throw new Error( "utils cannot call postToGetURL when missing content-type header", ); } - query = mfdToQueryString(decodeIfNeeded(postData), content_type); + query = mfdToQueryString(decodeIfNeeded(postData), contentType); break; } diff --git a/src/lib/warcrecord.ts b/src/lib/warcrecord.ts index 5546f9a..5b619ac 100644 --- a/src/lib/warcrecord.ts +++ b/src/lib/warcrecord.ts @@ -176,8 +176,8 @@ export class WARCRecord extends BaseAsyncIterReader { _offset: number | undefined = 0; _length = 0; - method: string | undefined = ""; - requestBody = ""; + //method: string | undefined = ""; + //requestBody = ""; _urlkey = ""; constructor({ From f1b5a0d57660d2f5c00b54982f409fde073c0122 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 4 Sep 2024 22:51:54 -0700 Subject: [PATCH 2/2] remove commented out --- src/lib/indexer.ts | 2 -- src/lib/warcrecord.ts | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/lib/indexer.ts b/src/lib/indexer.ts index 6bf95ac..8219547 100644 --- a/src/lib/indexer.ts +++ b/src/lib/indexer.ts @@ -304,8 +304,6 @@ export class CDXIndexer extends Indexer { if (postToGetUrl(request)) { requestBody = request.requestBody; - //record.method = method; - //record.requestBody = requestBody; url = request.url; } } diff --git a/src/lib/warcrecord.ts b/src/lib/warcrecord.ts index 5b619ac..a8a3b37 100644 --- a/src/lib/warcrecord.ts +++ b/src/lib/warcrecord.ts @@ -176,8 +176,6 @@ export class WARCRecord extends BaseAsyncIterReader { _offset: number | undefined = 0; _length = 0; - //method: string | undefined = ""; - //requestBody = ""; _urlkey = ""; constructor({