From 573fb6238574fa58108517e0dc8b46ec32f70511 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Helmut=20H=C3=A4nsel?= Date: Mon, 28 Aug 2023 10:08:54 +0200 Subject: [PATCH] add handling of multipart/mixed --- src/multipart.jl | 37 ++++++++++--- src/parsemultipart.jl | 121 ++++++++++++++++++++++++++++++------------ 2 files changed, 117 insertions(+), 41 deletions(-) diff --git a/src/multipart.jl b/src/multipart.jl index f03f2bbbb..1395750b6 100644 --- a/src/multipart.jl +++ b/src/multipart.jl @@ -1,6 +1,6 @@ module Forms -export Form, Multipart, content_type +export Form, Multipart, content_type, multipart_request using ..IOExtras, ..Sniff, ..Conditions import ..HTTP # for doc references @@ -11,6 +11,7 @@ mutable struct Form <: IO index::Int mark::Int boundary::String + type::Symbol end Form(f::Form) = f @@ -101,7 +102,9 @@ headers = [] HTTP.post(url, headers, body) ``` """ -function Form(d; boundary=string(rand(UInt128), base=16)) +function Form(d; boundary=string(rand(UInt128), base=16), type = :formdata) + @require type ∈ [:formdata, :mixed] + # https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html bcharsnospace = raw"\w'\(\)\+,-\./:=\?" boundary_re = Regex("^[$bcharsnospace ]{0,69}[$bcharsnospace]\$") @@ -112,9 +115,9 @@ function Form(d; boundary=string(rand(UInt128), base=16)) len = length(d) for (i, (k, v)) in enumerate(d) write(io, (i == 1 ? "" : "\r\n") * "--" * boundary * "\r\n") - write(io, "Content-Disposition: form-data; name=\"$k\"") + type == :mixed || write(io, "Content-Disposition: form-data; name=\"$k\"") if isa(v, IO) - writemultipartheader(io, v) + writemultipartheader(io, v, type) seekstart(io) push!(data, io) push!(data, v) @@ -128,7 +131,7 @@ function Form(d; boundary=string(rand(UInt128), base=16)) write(io, "\r\n--" * boundary * "--" * "\r\n") seekstart(io) push!(data, io) - return Form(data, 1, -1, boundary) + return Form(data, 1, -1, boundary, type) end function writemultipartheader(io::IOBuffer, i::IOStream) @@ -195,6 +198,10 @@ function Multipart(f::Union{AbstractString, Nothing}, data::T, ct::AbstractStrin return Multipart{T}(f, data, String(ct), String(cte), String(name)) end +function Form(v::Vector{<:Multipart}; boundary=string(rand(UInt128), base=16)) + Form(Pair[i => m for (i, m) in enumerate(v)]; boundary, type = :mixed) +end + function Base.show(io::IO, m::Multipart{T}) where {T} items = ["data=::$T", "contenttype=\"$(m.contenttype)\"", "contenttransferencoding=\"$(m.contenttransferencoding)\")"] m.filename === nothing || pushfirst!(items, "filename=\"$(m.filename)\"") @@ -209,8 +216,10 @@ Base.mark(m::Multipart{T}) where {T} = mark(m.data) Base.reset(m::Multipart{T}) where {T} = reset(m.data) Base.seekstart(m::Multipart{T}) where {T} = seekstart(m.data) -function writemultipartheader(io::IOBuffer, i::Multipart) - if i.filename === nothing +function writemultipartheader(io::IOBuffer, i::Multipart, type = :formdata) + if type == :mixed + # don't write a new line + elseif i.filename === nothing write(io, "\r\n") else write(io, "; filename=\"$(i.filename)\"\r\n") @@ -222,6 +231,18 @@ function writemultipartheader(io::IOBuffer, i::Multipart) end content_type(f::Form) = "Content-Type" => - "multipart/form-data; boundary=$(f.boundary)" + "multipart/$(f.type == :formdata ? "form-data" : f.type); boundary=$(f.boundary)" + +function multipart_request(method, url, ct::AbstractString="", cte::AbstractString=""; body = HTTP.nobody, headers = [], query...) + target = HTTP.unescapeuri(string(HTTP.URI(HTTP.URI(url); query))) + header = HTTP.MessageRequest.mkreqheaders(headers, true) + req = HTTP.Messages.Request(uppercase("$method"), target, header, body) + + io = IOBuffer() + write(io, req) + seekstart(io) + + HTTP.Multipart(nothing, io, ct, cte) +end end # module \ No newline at end of file diff --git a/src/parsemultipart.jl b/src/parsemultipart.jl index 95ccce1ef..28c1c8920 100644 --- a/src/parsemultipart.jl +++ b/src/parsemultipart.jl @@ -3,7 +3,7 @@ module MultiPartParsing import ..access_threaded using ..Messages, ..Forms, ..Parsers -export parse_multipart_form +export parse_multipart, parse_multipart_form, parse_multipart_mixed const CR_BYTE = 0x0d # \r const LF_BYTE = 0x0a # \n @@ -154,65 +154,115 @@ end Parse a single multi-part chunk into a Multipart object. This will decode the header and extract the contents from the byte array. """ -function parse_multipart_chunk(chunk) +function parse_multipart_chunk(chunk; require_contentdisposition::Bool=true) startIndex, end_index = find_header_boundary(chunk) header = SubString(unsafe_string(pointer(chunk, startIndex), end_index - startIndex + 1)) content = view(chunk, end_index+1:lastindex(chunk)) # find content disposition re = access_threaded(content_disposition_regex_f, content_disposition_regex) - if !Parsers.exec(re, header) + content_disposition_available = Parsers.exec(re, header) + if !content_disposition_available && require_contentdisposition @warn "Content disposition is not specified dropping the chunk." String(chunk) - return nothing # Specifying content disposition is mandatory + return nothing # Specifying content disposition is mandatory for form-data end - content_disposition = Parsers.group(1, re, header) - re_flag = access_threaded(content_disposition_flag_regex_f, content_disposition_flag_regex) - re_pair = access_threaded(content_disposition_pair_regex_f, content_disposition_pair_regex) name = nothing filename = nothing - while !isempty(content_disposition) - if Parsers.exec(re_pair, content_disposition) - key = Parsers.group(1, re_pair, content_disposition) - value = Parsers.group(2, re_pair, content_disposition) - if key == "name" - name = value - elseif key == "filename" - filename = value + if content_disposition_available + content_disposition = Parsers.group(1, re, header) + + re_flag = access_threaded(content_disposition_flag_regex_f, content_disposition_flag_regex) + re_pair = access_threaded(content_disposition_pair_regex_f, content_disposition_pair_regex) + while !isempty(content_disposition) + if Parsers.exec(re_pair, content_disposition) + key = Parsers.group(1, re_pair, content_disposition) + value = Parsers.group(2, re_pair, content_disposition) + if key == "name" + name = value + elseif key == "filename" + filename = value + else + # do stuff with other content disposition key-value pairs + end + content_disposition = Parsers.nextbytes(re_pair, content_disposition) + elseif Parsers.exec(re_flag, content_disposition) + # do stuff with content disposition flags + content_disposition = Parsers.nextbytes(re_flag, content_disposition) else - # do stuff with other content disposition key-value pairs + break end - content_disposition = Parsers.nextbytes(re_pair, content_disposition) - elseif Parsers.exec(re_flag, content_disposition) - # do stuff with content disposition flags - content_disposition = Parsers.nextbytes(re_flag, content_disposition) - else - break end - end - - name === nothing && return + name === nothing && return + end re_ct = access_threaded(content_type_regex_f, content_type_regex) contenttype = Parsers.exec(re_ct, header) ? Parsers.group(1, re_ct, header) : "text/plain" - return Multipart(filename, IOBuffer(content), contenttype, "", name) + return Multipart(filename, IOBuffer(content), contenttype, "", name === nothing ? "" : name) +end + +""" + parse_multipart_data(::Type{Response}, m::Multipart) + +Parse data of a mixed multipart response into a HTTP.Response +""" +function parse_multipart_data(::Type{Response}, m::Multipart) + seekstart(m.data) + content = read(m.data) + startIndex, end_index = find_header_boundary(content) + + header = SubString(unsafe_string(pointer(content, startIndex), end_index - startIndex + 1)) + response_content = view(content, end_index+1:lastindex(content)) + response = Response(response_content) + header = Messages.parse_start_line!(header, response) + Messages.parse_header_fields!(header, response) + return response +end + +""" + parse_multipart_data(f::Function, m::Multipart) + +Parse multipart data by parsing function `f` +""" +function parse_multipart_data(f::Function, m::Multipart) + seekstart(m.data) + content = read(m.data) + + return f(content) end +parse_multipart_data(::Type{Response}, mm::Vector{Multipart}) = parse_multipart_data.(Ref(Response), mm) + +""" + parse_multipart(::Type{T}, msg::Message) where T + +Parse multipart message into a type T. This function only forwards the type `T` to the +function `parse_multipart_data(::Type{T}, m::Multipart)`, which needs to be declared by the user. +""" +parse_multipart(::Type{T}, msg::Message) where T = parse_multipart_data(T, parse_multipart(msg)) + +""" + parse_multipart(f::Function, msg::Message) where T + +Parse multipart message by applying a parsing function `f(content::Vector{UInt8})` to the multipart parts. +""" +parse_multipart(f::Function, msg::Message) where T = parse_multipart_data.(f, parse_multipart(msg)) + """ parse_multipart_body(body, boundary)::Vector{Multipart} Parse the multipart body received from the client breaking it into the various chunks which are returned as an array of Multipart objects. """ -function parse_multipart_body(body::AbstractVector{UInt8}, boundary::AbstractString)::Vector{Multipart} +function parse_multipart_body(body::AbstractVector{UInt8}, boundary::AbstractString; require_contentdisposition::Bool = true)::Vector{Multipart} multiparts = Multipart[] idxs = find_multipart_boundaries(body, codeunits(boundary)) length(idxs) > 1 || (return multiparts) for i in 1:length(idxs)-1 chunk = view(body, idxs[i][2]+1:idxs[i+1][1]-1) - push!(multiparts, parse_multipart_chunk(chunk)) + push!(multiparts, parse_multipart_chunk(chunk; require_contentdisposition)) end return multiparts end @@ -232,19 +282,24 @@ that the boundary delimiter does not need to have '-' characters, but a line usi the boundary delimiter will start with '--' and end in \r\n. [RFC2046 5.1](https://tools.ietf.org/html/rfc2046#section-5.1.1) """ -function parse_multipart_form(msg::Message)::Union{Vector{Multipart}, Nothing} - # parse boundary from Content-Type - m = match(r"multipart/form-data; boundary=(.*)$", msg["Content-Type"]) +function parse_multipart(msg::Message, required_type = nothing)::Union{Vector{Multipart}, Nothing} + # parse multipart type and boundary from Content-Type + m = match(r"multipart/([^;]*); boundary=(.*)$", msg["Content-Type"]) m === nothing && return nothing - boundary_delimiter = m[1] + type = Symbol(replace(m[1], '-' => "")) + boundary_delimiter = m[2] + required_type !== nothing && required_type != type && return nothing # [RFC2046 5.1.1](https://tools.ietf.org/html/rfc2046#section-5.1.1) length(boundary_delimiter) > 70 && error("boundary delimiter must not be greater than 70 characters") - return parse_multipart_body(payload(msg), boundary_delimiter) + return parse_multipart_body(payload(msg), boundary_delimiter; require_contentdisposition = (type == :formdata)) end +parse_multipart_form(msg::Message) = parse_multipart(msg, :formdata) +parse_multipart_mixed(msg::Message) = parse_multipart(msg, :mixed) + function __init__() nt = isdefined(Base.Threads, :maxthreadid) ? Threads.maxthreadid() : Threads.nthreads() resize!(empty!(content_disposition_regex), nt)