Skip to content

Commit

Permalink
Make multipart parsing more forgiving: start-of-line and casing (#1164)
Browse files Browse the repository at this point in the history
* Make multipart parsing more forgiving: start-of-line and casing

I had a multipart response i wanted to parse, and julia failed to parse
it. These changes let it parse:
1. The content-disposition was the _second_ line in each part, with the
   content-type coming first, so the `^` was failing to parse.
2. The `content-type:` key was lower-cased, not Title-Cased as expected.

Dunno if these are generally correct, but they worked in my case.

* Add unit test
  • Loading branch information
NHDaly authored Apr 6, 2024
1 parent 2de2c78 commit ec69a01
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/parsemultipart.jl
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ end

const content_disposition_regex = Parsers.RegexAndMatchData[]
function content_disposition_regex_f()
r = Parsers.RegexAndMatchData(r"^Content-Disposition:[ \t]*form-data;[ \t]*(.*)\r\n"x)
r = Parsers.RegexAndMatchData(r"^[Cc]ontent-[Dd]isposition:[ \t]*form-data;[ \t]*(.*)\r\n"mx)
Parsers.init!(r)
end

Expand Down
26 changes: 23 additions & 3 deletions test/parsemultipart.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ function generate_test_body()
"Content-Type: application/json",
"",
"{\"data\": [\"this is json data\"]}",
"----------------------------918073721150061572809433",
"content-type: text/plain",
"content-disposition: form-data; name=\"key3\"",
"",
"This file has lower-cased content- keys, and disposition comes second.",
"----------------------------918073721150061572809433--",
"",
], "\r\n"))
Expand Down Expand Up @@ -122,8 +127,13 @@ end
@test (startIndex + endIndexOffset) == endIndex

(isTerminatingDelimiter, startIndex, endIndex) = find_multipart_boundary(body, delimiter, start = startIndex + 3)
@test isTerminatingDelimiter
@test !isTerminatingDelimiter
@test 804 == startIndex
@test (startIndex + endIndexOffset) == endIndex

(isTerminatingDelimiter, startIndex, endIndex) = find_multipart_boundary(body, delimiter, start = startIndex + 3)
@test isTerminatingDelimiter
@test 1003 == startIndex
# +2 because of the two additional '--' characters
@test (startIndex + endIndexOffset + 2) == endIndex
end
Expand All @@ -133,7 +143,7 @@ end
@test HTTP.parse_multipart_form(generate_non_multi_test_request()) === nothing

multiparts = HTTP.parse_multipart_form(generate_test_request())
@test 5 == length(multiparts)
@test 6 == length(multiparts)

@test "multipart.txt" === multiparts[1].filename
@test "namevalue" === multiparts[1].name
Expand All @@ -159,11 +169,16 @@ end
@test "json_file1" === multiparts[5].name
@test "application/json" === multiparts[5].contenttype
@test """{"data": ["this is json data"]}""" === String(read(multiparts[5].data))

@test multiparts[6].filename === nothing
@test "key3" === multiparts[6].name
@test "text/plain" === multiparts[6].contenttype
@test "This file has lower-cased content- keys, and disposition comes second." === String(read(multiparts[6].data))
end

@testset "parse_multipart_form response" begin
multiparts = HTTP.parse_multipart_form(generate_test_response())
@test 5 == length(multiparts)
@test 6 == length(multiparts)

@test "multipart.txt" === multiparts[1].filename
@test "namevalue" === multiparts[1].name
Expand All @@ -189,5 +204,10 @@ end
@test "json_file1" === multiparts[5].name
@test "application/json" === multiparts[5].contenttype
@test """{"data": ["this is json data"]}""" === String(read(multiparts[5].data))

@test multiparts[6].filename === nothing
@test "key3" === multiparts[6].name
@test "text/plain" === multiparts[6].contenttype
@test "This file has lower-cased content- keys, and disposition comes second." === String(read(multiparts[6].data))
end
end

0 comments on commit ec69a01

Please sign in to comment.