Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LibWeb: Implement formData() for "multipart/form-data" #3036

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
245 changes: 241 additions & 4 deletions Libraries/LibWeb/Fetch/Body.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* SPDX-License-Identifier: BSD-2-Clause
*/

#include <AK/GenericLexer.h>
#include <AK/TypeCasts.h>
#include <LibJS/Runtime/ArrayBuffer.h>
#include <LibJS/Runtime/Completion.h>
Expand All @@ -16,10 +17,13 @@
#include <LibWeb/Bindings/MainThreadVM.h>
#include <LibWeb/DOMURL/URLSearchParams.h>
#include <LibWeb/Fetch/Body.h>
#include <LibWeb/Fetch/Infrastructure/HTTP.h>
#include <LibWeb/Fetch/Infrastructure/HTTP/Bodies.h>
#include <LibWeb/FileAPI/Blob.h>
#include <LibWeb/FileAPI/File.h>
#include <LibWeb/HTML/Scripting/TemporaryExecutionContext.h>
#include <LibWeb/Infra/JSON.h>
#include <LibWeb/Infra/Strings.h>
#include <LibWeb/MimeSniff/MimeType.h>
#include <LibWeb/Streams/ReadableStream.h>
#include <LibWeb/WebIDL/Promise.h>
Expand Down Expand Up @@ -137,10 +141,15 @@ WebIDL::ExceptionOr<JS::Value> package_data(JS::Realm& realm, ByteBuffer bytes,
case PackageDataType::FormData:
// If mimeType’s essence is "multipart/form-data", then:
if (mime_type.has_value() && mime_type->essence() == "multipart/form-data"sv) {
// FIXME: 1. Parse bytes, using the value of the `boundary` parameter from mimeType, per the rules set forth in Returning Values from Forms: multipart/form-data. [RFC7578]
// FIXME: 2. If that fails for some reason, then throw a TypeError.
// FIXME: 3. Return a new FormData object, appending each entry, resulting from the parsing operation, to its entry list.
return JS::js_null();
// 1. Parse bytes, using the value of the `boundary` parameter from mimeType, per the rules set forth in Returning Values from Forms: multipart/form-data. [RFC7578]
auto error_or_entry_list = parse_multipart_form_data(realm, bytes, mime_type.value());

// 2. If that fails for some reason, then throw a TypeError.
F3n67u marked this conversation as resolved.
Show resolved Hide resolved
if (error_or_entry_list.is_error())
return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, MUST(String::formatted("Failed to parse multipart form data: {}", error_or_entry_list.release_error().message)) };

// 3. Return a new FormData object, appending each entry, resulting from the parsing operation, to its entry list.
return TRY(XHR::FormData::create(realm, error_or_entry_list.release_value()));
}
// Otherwise, if mimeType’s essence is "application/x-www-form-urlencoded", then:
else if (mime_type.has_value() && mime_type->essence() == "application/x-www-form-urlencoded"sv) {
Expand Down Expand Up @@ -231,4 +240,232 @@ WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> consume_body(JS::Realm& realm, Bod
return promise;
}

// https://andreubotella.github.io/multipart-form-data/#parse-a-multipart-form-data-name
static MultipartParsingErrorOr<String> parse_multipart_form_data_name(GenericLexer& lexer)
{
// 1. Assert: The byte at (position - 1) is 0x22 (").
VERIFY(lexer.peek(-1) == '"');

// 2. Let name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x22 ("), given position.
auto name = lexer.consume_until(is_any_of("\n\r\""sv));

// 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1.
if (!lexer.consume_specific('"'))
return MultipartParsingError { MUST(String::formatted("Expected \" at position {}", lexer.tell())) };

// 4. Replace any occurrence of the following subsequences in name with the given byte:
// - "%0A" with 0x0A (LF)
// - "%0D" with 0x0D (CR)
// - "%22" with 0x22 (")
StringBuilder builder;
for (size_t i = 0; i < name.length(); ++i) {
// Check for subsequences starting with '%'
if (name[i] == '%' && i + 2 < name.length()) {
auto subsequence = name.substring_view(i, 3);
if (subsequence == "%0A"sv) {
builder.append(0x0A); // Append LF
i += 2; // Skip the next two characters
continue;
}
if (subsequence == "%0D"sv) {
builder.append(0x0D); // Append CR
i += 2; // Skip the next two characters
continue;
}
if (subsequence == "%22"sv) {
builder.append(0x22); // Append "
i += 2; // Skip the next two characters
continue;
}
}

// Append the current character if no substitution was made
builder.append(name[i]);
}

return builder.to_string_without_validation();
}

// https://andreubotella.github.io/multipart-form-data/#parse-multipart-form-data-headers
static MultipartParsingErrorOr<MultiPartFormDataHeader> parse_multipart_form_data_header(GenericLexer& lexer)
{
// 1. Let name, filename and contentType be null.
MultiPartFormDataHeader header;

// 2. While true:
while (true) {
// 1. If position points to a sequence of bytes starting with 0x0D 0x0A (CR LF):
if (lexer.next_is("\r\n"sv)) {
// 1. If name is null, return failure.
if (!header.name.has_value())
return MultipartParsingError { "Missing name parameter in Content-Disposition header"_string };

// 2. Return name, filename and contentType.
return header;
}

// 2. Let header name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x3A (:), given position.
auto header_name = lexer.consume_until(is_any_of("\n\r:"sv));

// 3. Remove any HTTP tab or space bytes from the start or end of header name.
header_name = header_name.trim(Infrastructure::HTTP_TAB_OR_SPACE, TrimMode::Both);

// 4. If header name does not match the field-name token production, return failure.
if (!Infrastructure::is_header_name(header_name.bytes()))
return MultipartParsingError { MUST(String::formatted("Invalid header name {}", header_name)) };

// 5. If the byte at position is not 0x3A (:), return failure.
// 6. Advance position by 1.
if (!lexer.consume_specific(':'))
return MultipartParsingError { MUST(String::formatted("Expected : at position {}", lexer.tell())) };

// 7. Collect a sequence of bytes that are HTTP tab or space bytes given position. (Do nothing with those bytes.)
lexer.ignore_while(Infrastructure::is_http_tab_or_space);

// 8. Byte-lowercase header name and switch on the result:
// -> `content-disposition`
if (header_name.equals_ignoring_ascii_case("content-disposition"sv)) {
// 1. Set name and filename to null.
header.name.clear();
header.filename.clear();

// 2. If position does not point to a sequence of bytes starting with `form-data; name="`, return failure.
// 3. Advance position so it points at the byte after the next 0x22 (") byte (the one in the sequence of bytes matched above).
if (!lexer.consume_specific("form-data; name=\""sv))
return MultipartParsingError { MUST(String::formatted("Expected `form-data; name=\"` at position {}", lexer.tell())) };

// 4. Set name to the result of parsing a multipart/form-data name given input and position, if the result is not failure. Otherwise, return failure.
auto maybe_name = parse_multipart_form_data_name(lexer);
if (maybe_name.is_error())
return maybe_name.release_error();
header.name = maybe_name.release_value();

// 5. If position points to a sequence of bytes starting with `; filename="`:
// 1. Advance position so it points at the byte after the next 0x22 (") byte (the one in the sequence of bytes matched above).
if (lexer.consume_specific("; filename=\""sv)) {
// 2. Set filename to the result of parsing a multipart/form-data name given input and position, if the result is not failure. Otherwise, return failure.
auto maybe_filename = parse_multipart_form_data_name(lexer);
if (maybe_filename.is_error())
return maybe_filename.release_error();
header.filename = maybe_filename.release_value();
}
}
// -> `content-type`
else if (header_name.equals_ignoring_ascii_case("content-type"sv)) {
// 1. Let header value be the result of collecting a sequence of bytes that are not 0x0A (LF) or 0x0D (CR), given position.
auto header_value = lexer.consume_until(Infrastructure::is_http_newline);

// 2. Remove any HTTP tab or space bytes from the end of header value.
header_value = header_value.trim(Infrastructure::HTTP_TAB_OR_SPACE, TrimMode::Right);

// 3. Set contentType to the isomorphic decoding of header value.
header.content_type = Infra::isomorphic_decode(header_value.bytes());
}
// -> Otherwise
else {
// 1. Collect a sequence of bytes that are not 0x0A (LF) or 0x0D (CR), given position. (Do nothing with those bytes.)
lexer.ignore_until(Infrastructure::is_http_newline);
}

// 9. If position does not point to a sequence of bytes starting with 0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2 (past the newline).
if (!lexer.consume_specific("\r\n"sv))
return MultipartParsingError { MUST(String::formatted("Expected CRLF at position {}", lexer.tell())) };
}
return header;
}

// https://andreubotella.github.io/multipart-form-data/#multipart-form-data-parser
MultipartParsingErrorOr<Vector<XHR::FormDataEntry>> parse_multipart_form_data(JS::Realm& realm, StringView input, MimeSniff::MimeType const& mime_type)
{
// 1. Assert: mimeType’s essence is "multipart/form-data".
VERIFY(mime_type.essence() == "multipart/form-data"sv);

// 2. If mimeType’s parameters["boundary"] does not exist, return failure. Otherwise, let boundary be the result of UTF-8 decoding mimeType’s parameters["boundary"].
auto maybe_boundary = mime_type.parameters().get("boundary"sv);
if (!maybe_boundary.has_value())
return MultipartParsingError { "Missing boundary parameter in Content-Type header"_string };
auto boundary = maybe_boundary.release_value();

// 3. Let entry list be an empty entry list.
Vector<XHR::FormDataEntry> entry_list;

// 4. Let position be a pointer to a byte in input, initially pointing at the first byte.
GenericLexer lexer(input);

auto boundary_with_dashes = MUST(String::formatted("--{}", boundary));

// 5. While true:
while (true) {
// 1. If position points to a sequence of bytes starting with 0x2D 0x2D (`--`) followed by boundary, advance position by 2 + the length of boundary. Otherwise, return failure.
if (!lexer.consume_specific(boundary_with_dashes))
return MultipartParsingError { MUST(String::formatted("Expected `--` followed by boundary at position {}", lexer.tell())) };

// 2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A (`--` followed by CR LF) followed by the end of input, return entry list.
if (lexer.next_is("--\r\n"sv))
return entry_list;

// 3. If position does not point to a sequence of bytes starting with 0x0D 0x0A (CR LF), return failure.
// 4. Advance position by 2. (This skips past the newline.)
if (!lexer.consume_specific("\r\n"sv))
return MultipartParsingError { MUST(String::formatted("Expected CRLF at position {}", lexer.tell())) };

// 5. Let name, filename and contentType be the result of parsing multipart/form-data headers on input and position, if the result is not failure. Otherwise, return failure.
auto header = TRY(parse_multipart_form_data_header(lexer));

// 6. Advance position by 2. (This skips past the empty line that marks the end of the headers.)
lexer.ignore(2);

// 7. Let body be the empty byte sequence.
// 8. Body loop: While position is not past the end of input:
// 1. Append the code point at position to body.
// 2. If body ends with boundary:
// 1. Remove the last 4 + (length of boundary) bytes from body.
// 2. Decrease position by 4 + (length of boundary).
// 3. Break out of body loop.
auto body = lexer.consume_until(boundary_with_dashes.bytes_as_string_view());
if (lexer.next_is(boundary_with_dashes.bytes_as_string_view())) {
constexpr size_t trailing_crlf_length = 2;
if (body.length() >= trailing_crlf_length) {
body = body.substring_view(0, body.length() - trailing_crlf_length);
lexer.retreat(trailing_crlf_length);
}
}

// 9. If position does not point to a sequence of bytes starting with 0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2.
if (!lexer.consume_specific("\r\n"sv))
return MultipartParsingError { MUST(String::formatted("Expected CRLF at position {}", lexer.tell())) };

// 10. If filename is not null:
Optional<XHR::FormDataEntryValue> value;
if (header.filename.has_value()) {
// 1. If contentType is null, set contentType to "text/plain".
if (!header.content_type.has_value())
header.content_type = "text/plain"_string;

// 2. If contentType is not an ASCII string, set contentType to the empty string.
if (!all_of(header.content_type->code_points(), is_ascii)) {
header.content_type = ""_string;
}

// 3. Let value be a new File object with name filename, type contentType, and body body.
auto blob = FileAPI::Blob::create(realm, MUST(ByteBuffer::copy(body.bytes())), header.content_type.release_value());
FileAPI::FilePropertyBag options {};
options.type = blob->type();
auto file = MUST(FileAPI::File::create(realm, { GC::make_root(blob) }, header.filename.release_value(), move(options)));
value = GC::make_root(file);
}
// 11. Otherwise:
else {
// 1. Let value be the UTF-8 decoding without BOM of body.
value = String::from_utf8_with_replacement_character(body, String::WithBOMHandling::No);
}

// 12. Assert: name is a scalar value string and value is either a scalar value string or a File object.
VERIFY(header.name.has_value() && value.has_value());

// 13. Create an entry with name and value, and append it to entry list.
entry_list.empend(header.name.release_value(), value.release_value());
}
}

}
22 changes: 22 additions & 0 deletions Libraries/LibWeb/Fetch/Body.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#pragma once

#include <AK/Forward.h>
#include <AK/HashMap.h>
#include <AK/Optional.h>
#include <AK/String.h>
#include <LibGC/Ptr.h>
#include <LibJS/Forward.h>
#include <LibWeb/Forward.h>
Expand All @@ -23,6 +26,24 @@ enum class PackageDataType {
Text,
};

struct MultiPartFormDataHeader {
Optional<String> name;
Optional<String> filename;
Optional<String> content_type;
};

struct ContentDispositionHeader {
String type;
OrderedHashMap<String, String> parameters;
};

struct MultipartParsingError {
String message;
};

template<typename T>
using MultipartParsingErrorOr = ErrorOr<T, MultipartParsingError>;

// https://fetch.spec.whatwg.org/#body-mixin
class BodyMixin {
public:
Expand All @@ -49,5 +70,6 @@ class BodyMixin {

[[nodiscard]] WebIDL::ExceptionOr<JS::Value> package_data(JS::Realm&, ByteBuffer, PackageDataType, Optional<MimeSniff::MimeType> const&);
[[nodiscard]] WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> consume_body(JS::Realm&, BodyMixin const&, PackageDataType);
[[nodiscard]] MultipartParsingErrorOr<Vector<XHR::FormDataEntry>> parse_multipart_form_data(JS::Realm&, StringView input, MimeSniff::MimeType const& mime_type);

}
5 changes: 5 additions & 0 deletions Libraries/LibWeb/Fetch/Infrastructure/HTTP.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ constexpr bool is_http_tab_or_space(u32 const code_point)
return code_point == 0x09 || code_point == 0x20;
}

constexpr bool is_http_newline(u32 const code_point)
{
return code_point == 0x0A || code_point == 0x0D;
}

enum class HttpQuotedStringExtractValue {
No,
Yes,
Expand Down
5 changes: 5 additions & 0 deletions Libraries/LibWeb/XHR/FormData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ WebIDL::ExceptionOr<GC::Ref<FormData>> FormData::create(JS::Realm& realm, Vector
return construct_impl(realm, move(list));
}

WebIDL::ExceptionOr<GC::Ref<FormData>> FormData::create(JS::Realm& realm, Vector<FormDataEntry> entry_list)
{
return construct_impl(realm, move(entry_list));
}

FormData::FormData(JS::Realm& realm, Vector<FormDataEntry> entry_list)
: PlatformObject(realm)
, m_entry_list(move(entry_list))
Expand Down
1 change: 1 addition & 0 deletions Libraries/LibWeb/XHR/FormData.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class FormData : public Bindings::PlatformObject {
static WebIDL::ExceptionOr<GC::Ref<FormData>> construct_impl(JS::Realm&, Vector<FormDataEntry> entry_list);

static WebIDL::ExceptionOr<GC::Ref<FormData>> create(JS::Realm&, Vector<DOMURL::QueryParam> entry_list);
static WebIDL::ExceptionOr<GC::Ref<FormData>> create(JS::Realm&, Vector<FormDataEntry> entry_list);

WebIDL::ExceptionOr<void> append(String const& name, String const& value);
WebIDL::ExceptionOr<void> append(String const& name, GC::Ref<FileAPI::Blob> const& blob_value, Optional<String> const& filename = {});
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Harness status: OK

Found 3 tests

3 Pass
Pass Consume empty response.formData() as FormData
Pass Consume empty request.formData() as FormData
Pass Consume multipart/form-data headers case-insensitively
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Harness status: OK

Found 14 tests

13 Pass
1 Fail
Pass Consume request's body as text
Pass Consume request's body as blob
Pass Consume request's body as arrayBuffer
Pass Consume request's body as json (error case)
Pass Consume request's body as formData with correct multipart type (error case)
Pass Consume request's body as formData with correct urlencoded type
Pass Consume request's body as formData without correct type (error case)
Pass Consume empty blob request body as arrayBuffer
Pass Consume empty text request body as arrayBuffer
Pass Consume empty blob request body as text
Pass Consume empty text request body as text
Pass Consume empty URLSearchParams request body as text
Fail Consume empty FormData request body as text
Pass Consume empty ArrayBuffer request body as text
Loading
Loading