LadybirdBrowser · F3n67u · Dec 25, 2024
diff --git a/Libraries/LibWeb/Fetch/Body.cpp b/Libraries/LibWeb/Fetch/Body.cpp
@@ -5,6 +5,7 @@
  * SPDX-License-Identifier: BSD-2-Clause
  */
 
+#include <AK/GenericLexer.h>
 #include <AK/TypeCasts.h>
 #include <LibJS/Runtime/ArrayBuffer.h>
 #include <LibJS/Runtime/Completion.h>
@@ -16,10 +17,13 @@
 #include <LibWeb/Bindings/MainThreadVM.h>
 #include <LibWeb/DOMURL/URLSearchParams.h>
 #include <LibWeb/Fetch/Body.h>
+#include <LibWeb/Fetch/Infrastructure/HTTP.h>
 #include <LibWeb/Fetch/Infrastructure/HTTP/Bodies.h>
 #include <LibWeb/FileAPI/Blob.h>
+#include <LibWeb/FileAPI/File.h>
 #include <LibWeb/HTML/Scripting/TemporaryExecutionContext.h>
 #include <LibWeb/Infra/JSON.h>
+#include <LibWeb/Infra/Strings.h>
 #include <LibWeb/MimeSniff/MimeType.h>
 #include <LibWeb/Streams/ReadableStream.h>
 #include <LibWeb/WebIDL/Promise.h>
@@ -137,10 +141,15 @@ WebIDL::ExceptionOr<JS::Value> package_data(JS::Realm& realm, ByteBuffer bytes,
     case PackageDataType::FormData:
         // If mimeType’s essence is "multipart/form-data", then:
         if (mime_type.has_value() && mime_type->essence() == "multipart/form-data"sv) {
-            // FIXME: 1. Parse bytes, using the value of the `boundary` parameter from mimeType, per the rules set forth in Returning Values from Forms: multipart/form-data. [RFC7578]
-            // FIXME: 2. If that fails for some reason, then throw a TypeError.
-            // FIXME: 3. Return a new FormData object, appending each entry, resulting from the parsing operation, to its entry list.
-            return JS::js_null();
+            // 1. Parse bytes, using the value of the `boundary` parameter from mimeType, per the rules set forth in Returning Values from Forms: multipart/form-data. [RFC7578]
+            auto error_or_entry_list = parse_multipart_form_data(realm, bytes, mime_type.value());
+
+            // 2. If that fails for some reason, then throw a TypeError.
+            if (error_or_entry_list.is_error())
+                return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, MUST(String::formatted("Failed to parse multipart form data: {}", error_or_entry_list.release_error().message)) };
+
+            // 3. Return a new FormData object, appending each entry, resulting from the parsing operation, to its entry list.
+            return TRY(XHR::FormData::create(realm, error_or_entry_list.release_value()));
         }
         // Otherwise, if mimeType’s essence is "application/x-www-form-urlencoded", then:
         else if (mime_type.has_value() && mime_type->essence() == "application/x-www-form-urlencoded"sv) {
@@ -231,4 +240,232 @@ WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> consume_body(JS::Realm& realm, Bod
     return promise;
 }
 
+// https://andreubotella.github.io/multipart-form-data/#parse-a-multipart-form-data-name
+static MultipartParsingErrorOr<String> parse_multipart_form_data_name(GenericLexer& lexer)
+{
+    // 1. Assert: The byte at (position - 1) is 0x22 (").
+    VERIFY(lexer.peek(-1) == '"');
+
+    // 2. Let name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x22 ("), given position.
+    auto name = lexer.consume_until(is_any_of("\n\r\""sv));
+
+    // 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1.
+    if (!lexer.consume_specific('"'))
+        return MultipartParsingError { MUST(String::formatted("Expected \" at position {}", lexer.tell())) };
+
+    // 4. Replace any occurrence of the following subsequences in name with the given byte:
+    //    - "%0A" with 0x0A (LF)
+    //    - "%0D" with 0x0D (CR)
+    //    - "%22" with 0x22 (")
+    StringBuilder builder;
+    for (size_t i = 0; i < name.length(); ++i) {
+        // Check for subsequences starting with '%'
+        if (name[i] == '%' && i + 2 < name.length()) {
+            auto subsequence = name.substring_view(i, 3);
+            if (subsequence == "%0A"sv) {
+                builder.append(0x0A); // Append LF
+                i += 2;               // Skip the next two characters
+                continue;
+            }
+            if (subsequence == "%0D"sv) {
+                builder.append(0x0D); // Append CR
+                i += 2;               // Skip the next two characters
+                continue;
+            }
+            if (subsequence == "%22"sv) {
+                builder.append(0x22); // Append "
+                i += 2;               // Skip the next two characters
+                continue;
+            }
+        }
+
+        // Append the current character if no substitution was made
+        builder.append(name[i]);
+    }
+
+    return builder.to_string_without_validation();
+}
+
+// https://andreubotella.github.io/multipart-form-data/#parse-multipart-form-data-headers
+static MultipartParsingErrorOr<MultiPartFormDataHeader> parse_multipart_form_data_header(GenericLexer& lexer)
+{
+    // 1. Let name, filename and contentType be null.
+    MultiPartFormDataHeader header;
+
+    // 2. While true:
+    while (true) {
+        // 1. If position points to a sequence of bytes starting with 0x0D 0x0A (CR LF):
+        if (lexer.next_is("\r\n"sv)) {
+            // 1. If name is null, return failure.
+            if (!header.name.has_value())
+                return MultipartParsingError { "Missing name parameter in Content-Disposition header"_string };
+
+            // 2. Return name, filename and contentType.
+            return header;
+        }
+
+        // 2. Let header name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x3A (:), given position.
+        auto header_name = lexer.consume_until(is_any_of("\n\r:"sv));
+
+        // 3. Remove any HTTP tab or space bytes from the start or end of header name.
+        header_name = header_name.trim(Infrastructure::HTTP_TAB_OR_SPACE, TrimMode::Both);
+
+        // 4. If header name does not match the field-name token production, return failure.
+        if (!Infrastructure::is_header_name(header_name.bytes()))
+            return MultipartParsingError { MUST(String::formatted("Invalid header name {}", header_name)) };
+
+        // 5. If the byte at position is not 0x3A (:), return failure.
+        // 6. Advance position by 1.
+        if (!lexer.consume_specific(':'))
+            return MultipartParsingError { MUST(String::formatted("Expected : at position {}", lexer.tell())) };
+
+        // 7. Collect a sequence of bytes that are HTTP tab or space bytes given position. (Do nothing with those bytes.)
+        lexer.ignore_while(Infrastructure::is_http_tab_or_space);
+
+        // 8. Byte-lowercase header name and switch on the result:
+        // -> `content-disposition`
+        if (header_name.equals_ignoring_ascii_case("content-disposition"sv)) {
+            // 1. Set name and filename to null.
+            header.name.clear();
+            header.filename.clear();
+
+            // 2. If position does not point to a sequence of bytes starting with `form-data; name="`, return failure.
+            // 3. Advance position so it points at the byte after the next 0x22 (") byte (the one in the sequence of bytes matched above).
+            if (!lexer.consume_specific("form-data; name=\""sv))
+                return MultipartParsingError { MUST(String::formatted("Expected `form-data; name=\"` at position {}", lexer.tell())) };
+
+            // 4. Set name to the result of parsing a multipart/form-data name given input and position, if the result is not failure. Otherwise, return failure.
+            auto maybe_name = parse_multipart_form_data_name(lexer);
+            if (maybe_name.is_error())
+                return maybe_name.release_error();
+            header.name = maybe_name.release_value();
+
+            // 5. If position points to a sequence of bytes starting with `; filename="`:
+            //     1. Advance position so it points at the byte after the next 0x22 (") byte (the one in the sequence of bytes matched above).
+            if (lexer.consume_specific("; filename=\""sv)) {
+                // 2. Set filename to the result of parsing a multipart/form-data name given input and position, if the result is not failure. Otherwise, return failure.
+                auto maybe_filename = parse_multipart_form_data_name(lexer);
+                if (maybe_filename.is_error())
+                    return maybe_filename.release_error();
+                header.filename = maybe_filename.release_value();
+            }
+        }
+        // -> `content-type`
+        else if (header_name.equals_ignoring_ascii_case("content-type"sv)) {
+            // 1. Let header value be the result of collecting a sequence of bytes that are not 0x0A (LF) or 0x0D (CR), given position.
+            auto header_value = lexer.consume_until(Infrastructure::is_http_newline);
+
+            // 2. Remove any HTTP tab or space bytes from the end of header value.
+            header_value = header_value.trim(Infrastructure::HTTP_TAB_OR_SPACE, TrimMode::Right);
+
+            // 3. Set contentType to the isomorphic decoding of header value.
+            header.content_type = Infra::isomorphic_decode(header_value.bytes());
+        }
+        // -> Otherwise
+        else {
+            // 1. Collect a sequence of bytes that are not 0x0A (LF) or 0x0D (CR), given position. (Do nothing with those bytes.)
+            lexer.ignore_until(Infrastructure::is_http_newline);
+        }
+
+        // 9. If position does not point to a sequence of bytes starting with 0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2 (past the newline).
+        if (!lexer.consume_specific("\r\n"sv))
+            return MultipartParsingError { MUST(String::formatted("Expected CRLF at position {}", lexer.tell())) };
+    }
+    return header;
+}
+
+// https://andreubotella.github.io/multipart-form-data/#multipart-form-data-parser
+MultipartParsingErrorOr<Vector<XHR::FormDataEntry>> parse_multipart_form_data(JS::Realm& realm, StringView input, MimeSniff::MimeType const& mime_type)
+{
+    // 1. Assert: mimeType’s essence is "multipart/form-data".
+    VERIFY(mime_type.essence() == "multipart/form-data"sv);
+
+    // 2. If mimeType’s parameters["boundary"] does not exist, return failure. Otherwise, let boundary be the result of UTF-8 decoding mimeType’s parameters["boundary"].
+    auto maybe_boundary = mime_type.parameters().get("boundary"sv);
+    if (!maybe_boundary.has_value())
+        return MultipartParsingError { "Missing boundary parameter in Content-Type header"_string };
+    auto boundary = maybe_boundary.release_value();
+
+    // 3. Let entry list be an empty entry list.
+    Vector<XHR::FormDataEntry> entry_list;
+
+    // 4. Let position be a pointer to a byte in input, initially pointing at the first byte.
+    GenericLexer lexer(input);
+
+    auto boundary_with_dashes = MUST(String::formatted("--{}", boundary));
+
+    // 5. While true:
+    while (true) {
+        // 1. If position points to a sequence of bytes starting with 0x2D 0x2D (`--`) followed by boundary, advance position by 2 + the length of boundary. Otherwise, return failure.
+        if (!lexer.consume_specific(boundary_with_dashes))
+            return MultipartParsingError { MUST(String::formatted("Expected `--` followed by boundary at position {}", lexer.tell())) };
+
+        // 2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A (`--` followed by CR LF) followed by the end of input, return entry list.
+        if (lexer.next_is("--\r\n"sv))
+            return entry_list;
+
+        // 3. If position does not point to a sequence of bytes starting with 0x0D 0x0A (CR LF), return failure.
+        // 4. Advance position by 2. (This skips past the newline.)
+        if (!lexer.consume_specific("\r\n"sv))
+            return MultipartParsingError { MUST(String::formatted("Expected CRLF at position {}", lexer.tell())) };
+
+        // 5. Let name, filename and contentType be the result of parsing multipart/form-data headers on input and position, if the result is not failure. Otherwise, return failure.
+        auto header = TRY(parse_multipart_form_data_header(lexer));
+
+        // 6. Advance position by 2. (This skips past the empty line that marks the end of the headers.)
+        lexer.ignore(2);
+
+        // 7. Let body be the empty byte sequence.
+        // 8. Body loop: While position is not past the end of input:
+        //      1. Append the code point at position to body.
+        //      2. If body ends with boundary:
+        //          1. Remove the last 4 + (length of boundary) bytes from body.
+        //          2. Decrease position by 4 + (length of boundary).
+        //          3. Break out of body loop.
+        auto body = lexer.consume_until(boundary_with_dashes.bytes_as_string_view());
+        if (lexer.next_is(boundary_with_dashes.bytes_as_string_view())) {
+            constexpr size_t trailing_crlf_length = 2;
+            if (body.length() >= trailing_crlf_length) {
+                body = body.substring_view(0, body.length() - trailing_crlf_length);
+                lexer.retreat(trailing_crlf_length);
+            }
+        }
+
+        // 9. If position does not point to a sequence of bytes starting with 0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2.
+        if (!lexer.consume_specific("\r\n"sv))
+            return MultipartParsingError { MUST(String::formatted("Expected CRLF at position {}", lexer.tell())) };
+
+        // 10. If filename is not null:
+        Optional<XHR::FormDataEntryValue> value;
+        if (header.filename.has_value()) {
+            // 1. If contentType is null, set contentType to "text/plain".
+            if (!header.content_type.has_value())
+                header.content_type = "text/plain"_string;
+
+            // 2. If contentType is not an ASCII string, set contentType to the empty string.
+            if (!all_of(header.content_type->code_points(), is_ascii)) {
+                header.content_type = ""_string;
+            }
+
+            // 3. Let value be a new File object with name filename, type contentType, and body body.
+            auto blob = FileAPI::Blob::create(realm, MUST(ByteBuffer::copy(body.bytes())), header.content_type.release_value());
+            FileAPI::FilePropertyBag options {};
+            options.type = blob->type();
+            auto file = MUST(FileAPI::File::create(realm, { GC::make_root(blob) }, header.filename.release_value(), move(options)));
+            value = GC::make_root(file);
+        }
+        // 11. Otherwise:
+        else {
+            // 1. Let value be the UTF-8 decoding without BOM of body.
+            value = String::from_utf8_with_replacement_character(body, String::WithBOMHandling::No);
+        }
+
+        // 12. Assert: name is a scalar value string and value is either a scalar value string or a File object.
+        VERIFY(header.name.has_value() && value.has_value());
+
+        // 13. Create an entry with name and value, and append it to entry list.
+        entry_list.empend(header.name.release_value(), value.release_value());
+    }
+}
+
 }
diff --git a/Libraries/LibWeb/Fetch/Body.h b/Libraries/LibWeb/Fetch/Body.h
@@ -8,6 +8,9 @@
 #pragma once
 
 #include <AK/Forward.h>
+#include <AK/HashMap.h>
+#include <AK/Optional.h>
+#include <AK/String.h>
 #include <LibGC/Ptr.h>
 #include <LibJS/Forward.h>
 #include <LibWeb/Forward.h>
@@ -23,6 +26,24 @@ enum class PackageDataType {
     Text,
 };
 
+struct MultiPartFormDataHeader {
+    Optional<String> name;
+    Optional<String> filename;
+    Optional<String> content_type;
+};
+
+struct ContentDispositionHeader {
+    String type;
+    OrderedHashMap<String, String> parameters;
+};
+
+struct MultipartParsingError {
+    String message;
+};
+
+template<typename T>
+using MultipartParsingErrorOr = ErrorOr<T, MultipartParsingError>;
+
 // https://fetch.spec.whatwg.org/#body-mixin
 class BodyMixin {
 public:
@@ -49,5 +70,6 @@ class BodyMixin {
 
 [[nodiscard]] WebIDL::ExceptionOr<JS::Value> package_data(JS::Realm&, ByteBuffer, PackageDataType, Optional<MimeSniff::MimeType> const&);
 [[nodiscard]] WebIDL::ExceptionOr<GC::Ref<WebIDL::Promise>> consume_body(JS::Realm&, BodyMixin const&, PackageDataType);
+[[nodiscard]] MultipartParsingErrorOr<Vector<XHR::FormDataEntry>> parse_multipart_form_data(JS::Realm&, StringView input, MimeSniff::MimeType const& mime_type);
 
 }
diff --git a/Libraries/LibWeb/Fetch/Infrastructure/HTTP.h b/Libraries/LibWeb/Fetch/Infrastructure/HTTP.h
@@ -37,6 +37,11 @@ constexpr bool is_http_tab_or_space(u32 const code_point)
     return code_point == 0x09 || code_point == 0x20;
 }
 
+constexpr bool is_http_newline(u32 const code_point)
+{
+    return code_point == 0x0A || code_point == 0x0D;
+}
+
 enum class HttpQuotedStringExtractValue {
     No,
     Yes,

diff --git a/Libraries/LibWeb/XHR/FormData.cpp b/Libraries/LibWeb/XHR/FormData.cpp
@@ -50,6 +50,11 @@ WebIDL::ExceptionOr<GC::Ref<FormData>> FormData::create(JS::Realm& realm, Vector
     return construct_impl(realm, move(list));
 }
 
+WebIDL::ExceptionOr<GC::Ref<FormData>> FormData::create(JS::Realm& realm, Vector<FormDataEntry> entry_list)
+{
+    return construct_impl(realm, move(entry_list));
+}
+
 FormData::FormData(JS::Realm& realm, Vector<FormDataEntry> entry_list)
     : PlatformObject(realm)
     , m_entry_list(move(entry_list))

diff --git a/Libraries/LibWeb/XHR/FormData.h b/Libraries/LibWeb/XHR/FormData.h
@@ -28,6 +28,7 @@ class FormData : public Bindings::PlatformObject {
     static WebIDL::ExceptionOr<GC::Ref<FormData>> construct_impl(JS::Realm&, Vector<FormDataEntry> entry_list);
 
     static WebIDL::ExceptionOr<GC::Ref<FormData>> create(JS::Realm&, Vector<DOMURL::QueryParam> entry_list);
+    static WebIDL::ExceptionOr<GC::Ref<FormData>> create(JS::Realm&, Vector<FormDataEntry> entry_list);
 
     WebIDL::ExceptionOr<void> append(String const& name, String const& value);
     WebIDL::ExceptionOr<void> append(String const& name, GC::Ref<FileAPI::Blob> const& blob_value, Optional<String> const& filename = {});

diff --git a/Tests/LibWeb/Text/expected/wpt-import/fetch/api/body/formdata.any.txt b/Tests/LibWeb/Text/expected/wpt-import/fetch/api/body/formdata.any.txt
@@ -0,0 +1,8 @@
+Harness status: OK
+
+Found 3 tests
+
+3 Pass
+Pass	Consume empty response.formData() as FormData
+Pass	Consume empty request.formData() as FormData
+Pass	Consume multipart/form-data headers case-insensitively
diff --git a/Tests/LibWeb/Text/expected/wpt-import/fetch/api/request/request-consume-empty.any.txt b/Tests/LibWeb/Text/expected/wpt-import/fetch/api/request/request-consume-empty.any.txt
@@ -0,0 +1,20 @@
+Harness status: OK
+
+Found 14 tests
+
+13 Pass
+1 Fail
+Pass	Consume request's body as text
+Pass	Consume request's body as blob
+Pass	Consume request's body as arrayBuffer
+Pass	Consume request's body as json (error case)
+Pass	Consume request's body as formData with correct multipart type (error case)
+Pass	Consume request's body as formData with correct urlencoded type
+Pass	Consume request's body as formData without correct type (error case)
+Pass	Consume empty blob request body as arrayBuffer
+Pass	Consume empty text request body as arrayBuffer
+Pass	Consume empty blob request body as text
+Pass	Consume empty text request body as text
+Pass	Consume empty URLSearchParams request body as text
+Fail	Consume empty FormData request body as text
+Pass	Consume empty ArrayBuffer request body as text