diff --git a/src/HttpHeader.cc b/src/HttpHeader.cc index a251328c3df..09677769a89 100644 --- a/src/HttpHeader.cc +++ b/src/HttpHeader.cc @@ -16,6 +16,7 @@ #include "base64.h" #include "globals.h" #include "http/ContentLengthInterpreter.h" +#include "http/one/FieldParser.h" #include "HttpHdrCc.h" #include "HttpHdrContRange.h" #include "HttpHdrScTarget.h" // also includes HttpHdrSc.h @@ -1391,115 +1392,32 @@ HttpHeaderEntry::~HttpHeaderEntry() HttpHeaderEntry * HttpHeaderEntry::parse(const char *field_start, const char *field_end, const http_hdr_owner_type msgType) { - /* note: name_start == field_start */ - const char *name_end = (const char *)memchr(field_start, ':', field_end - field_start); - int name_len = name_end ? name_end - field_start :0; - const char *value_start = field_start + name_len + 1; /* skip ':' */ - /* note: value_end == field_end */ + try { + Http1::FieldParser tok(SBuf(field_start, (field_end-field_start)), msgType); + debugs(55, 8, "parsing field-line: near '" << + Raw("buf", tok.remaining().rawContent(), std::min(tok.remaining().length(), SBuf::size_type(100))) << "'"); - ++ HeaderEntryParsedCount; + SBuf theName; + SBuf theValue; + tok.parseFieldLine(theName, theValue); - /* do we have a valid field name within this field? */ + /* is it a "known" field? */ + auto id = Http::HeaderLookupTable.lookup(theName.rawContent(), theName.length()).id; + debugs(55, 8, "got hdr-id=" << id); + if (id == Http::HdrType::BAD_HDR) + id = Http::HdrType::OTHER; - if (!name_len || name_end > field_end) - return nullptr; - - if (name_len > 65534) { - /* String must be LESS THAN 64K and it adds a terminating NULL */ - // TODO: update this to show proper name_len in Raw markup, but not print all that - debugs(55, 2, "ignoring huge header field (" << Raw("field_start", field_start, 100) << "...)"); - return nullptr; - } - - /* - * RFC 7230 section 3.2.4: - * "No whitespace is allowed between the header field-name and colon. - * ... - * A server MUST reject any received request message that contains - * whitespace between a header field-name and colon with a response code - * of 400 (Bad Request). A proxy MUST remove any such whitespace from a - * response message before forwarding the message downstream." - */ - if (xisspace(field_start[name_len - 1])) { - - if (msgType == hoRequest) - return nullptr; - - // for now, also let relaxed parser remove this BWS from any non-HTTP messages - const bool stripWhitespace = (msgType == hoReply) || - Config.onoff.relaxed_header_parser; - if (!stripWhitespace) - return nullptr; // reject if we cannot strip - - debugs(55, Config.onoff.relaxed_header_parser <= 0 ? 1 : 2, - "WARNING: Whitespace after header name in '" << getStringPrefix(field_start, field_end-field_start) << "'"); - - while (name_len > 0 && xisspace(field_start[name_len - 1])) - --name_len; - - if (!name_len) { - debugs(55, 2, "found header with only whitespace for name"); - return nullptr; - } - } - - /* RFC 7230 section 3.2: - * - * header-field = field-name ":" OWS field-value OWS - * field-name = token - * token = 1*TCHAR - */ - for (const char *pos = field_start; pos < (field_start+name_len); ++pos) { - if (!CharacterSet::TCHAR[*pos]) { - debugs(55, 2, "found header with invalid characters in " << - Raw("field-name", field_start, min(name_len,100)) << "..."); - return nullptr; - } - } - - /* now we know we can parse it */ - - debugs(55, 9, "parsing HttpHeaderEntry: near '" << getStringPrefix(field_start, field_end-field_start) << "'"); - - /* is it a "known" field? */ - Http::HdrType id = Http::HeaderLookupTable.lookup(field_start,name_len).id; - debugs(55, 9, "got hdr-id=" << id); - - SBuf theName; - - String value; - - if (id == Http::HdrType::BAD_HDR) - id = Http::HdrType::OTHER; - - /* set field name */ - if (id == Http::HdrType::OTHER) - theName.append(field_start, name_len); - else - theName = Http::HeaderLookupTable.lookup(id).name; - - /* trim field value */ - while (value_start < field_end && xisspace(*value_start)) - ++value_start; + debugs(55, 7, "got field-line: '" << theName << ": " << theValue << "'"); + ++ HeaderEntryParsedCount; + ++ headerStatsTable[id].seenCount; - while (value_start < field_end && xisspace(field_end[-1])) - --field_end; + return new HttpHeaderEntry(id, theName, theValue.c_str()); - if (field_end - value_start > 65534) { - /* String must be LESS THAN 64K and it adds a terminating NULL */ - debugs(55, 2, "WARNING: found '" << theName << "' header of " << (field_end - value_start) << " bytes"); + } catch (...) { + debugs(11, 2, "WARNING: rejected field-line with " << CurrentException); + ++ headerStatsTable[Http::HdrType::BAD_HDR].seenCount; return nullptr; } - - /* set field value */ - value.assign(value_start, field_end - value_start); - - if (id != Http::HdrType::BAD_HDR) - ++ headerStatsTable[id].seenCount; - - debugs(55, 9, "parsed HttpHeaderEntry: '" << theName << ": " << value << "'"); - - return new HttpHeaderEntry(id, theName, value.termedBuf()); } HttpHeaderEntry * diff --git a/src/http/one/FieldParser.cc b/src/http/one/FieldParser.cc new file mode 100644 index 00000000000..5ffa1033a73 --- /dev/null +++ b/src/http/one/FieldParser.cc @@ -0,0 +1,111 @@ +/* + * Copyright (C) 1996-2023 The Squid Software Foundation and contributors + * + * Squid software is distributed under GPLv2+ license and includes + * contributions from numerous individuals and organizations. + * Please see the COPYING and CONTRIBUTORS files for details. + */ + +#include "squid.h" +#include "base/Raw.h" +#include "http/one/FieldParser.h" +#include "sbuf/Stream.h" +#include "SquidConfig.h" +#include "debug/Stream.h" + +Http::One::FieldParser::FieldParser(const SBuf &aBuf, const http_hdr_owner_type &aType) : + Http::One::Parser(), + msgType(aType), + tok(aBuf) +{} + +/** + * RFC 9112 section 5: + * + * field-line = field-name ":" OWS field-value OWS + */ +void +Http::One::FieldParser::parseFieldLine(SBuf &name, SBuf &value) +{ + name = parseFieldName(); // consumes ':' delimiter + value = parseFieldValue(); + + if (!tok.atEnd()) + skipLineTerminator(tok); +} + +/** + * RFC 9110 section 5.1: + * + * field-name = token + * token = 1*TCHAR + */ +SBuf +Http::One::FieldParser::parseFieldName() +{ + auto name = tok.prefix("field-name", CharacterSet::TCHAR); + + /* + * RFC 9112 section 5.1: + * "No whitespace is allowed between the field name and colon. + * ... + * A server MUST reject, with a response status code of 400 (Bad Request), + * any received request message that contains whitespace between a header + * field name and colon. A proxy MUST remove any such whitespace from a + * response message before forwarding the message downstream." + */ + const bool stripWhitespace = (msgType == hoReply) || Config.onoff.relaxed_header_parser; + if (stripWhitespace && tok.skipAll(Http1::Parser::WhitespaceCharacters())) { + // TODO: reduce log spam from 'tok.buf()' below + debugs(11, Config.onoff.relaxed_header_parser <= 0 ? 2 : 3, + "WARNING: Whitespace after field-name '" << name << tok.buf() << "'"); + } + + if(!tok.skip(':')) + throw TextException("invalid field-name", Here()); + + if (name.length() == 0) + throw TextException("missing field-name", Here()); + + if (name.length() > 65534) { + /* String must be LESS THAN 64K and it adds a terminating NULL */ + // TODO: update this to show proper name.length() in Raw markup, but not print all that + throw TextException(ToSBuf("huge field-line (", Raw("field-name", name.c_str(), 100), "...)"), Here()); + } + + return name; +} + +/** + * RFC 9110 section 5.1: + * + * field-value = *field-content + * field-content = field-vchar + * [ 1*( SP / HTAB / field-vchar ) field-vchar ] + * field-vchar = VCHAR / obs-text + * obs-text = %x80-FF + */ +SBuf +Http::One::FieldParser::parseFieldValue() +{ + static const CharacterSet fvChars = (CharacterSet::VCHAR + CharacterSet::OBSTEXT).rename("field-value"); + auto value = tok.prefix("field-value", fvChars); + + /** + * RFC 9110 section 5.5: + * field value does not include leading or trailing whitespace. + * ... parsing implementation MUST exclude such whitespace prior + * to evaluating the field value + */ + const auto start = value.findFirstNotOf(Http1::Parser::WhitespaceCharacters()); + const auto end = value.findLastNotOf(Http1::Parser::WhitespaceCharacters()); + value.chop(start, (end-start)); + + if (value.length() > 65534) { + /* String must be LESS THAN 64K and it adds a terminating NULL */ + // TODO: update this to show proper length() in Raw markup, but not print all that + throw TextException(ToSBuf("huge field-line (", Raw("field-value", value.c_str(), 100), "...)"), Here()); + } + + return value; +} diff --git a/src/http/one/FieldParser.h b/src/http/one/FieldParser.h new file mode 100644 index 00000000000..d953dad6571 --- /dev/null +++ b/src/http/one/FieldParser.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 1996-2023 The Squid Software Foundation and contributors + * + * Squid software is distributed under GPLv2+ license and includes + * contributions from numerous individuals and organizations. + * Please see the COPYING and CONTRIBUTORS files for details. + */ + +#ifndef SQUID_SRC_HTTP_ONE_FIELDPARSER_H +#define SQUID_SRC_HTTP_ONE_FIELDPARSER_H + +#include "http/one/Parser.h" +#include "HttpHeader.h" +#include "parser/Tokenizer.h" + +namespace Http { +namespace One { + +/** HTTP/1.x header field parser + * + * Works on a raw character I/O buffer and tokenizes the content into + * the field-lines of an HTTP/1 message: + * + * RFC 9112 section 5: + * field-line = field-name ":" OWS field-value OWS + */ +class FieldParser : public Http1::Parser +{ +public: + FieldParser(const SBuf &, const http_hdr_owner_type &); + + /// extract a field name and value from the buffer being parsed. + void parseFieldLine(SBuf &name, SBuf &value); + + /* Http1::Parser API */ + void clear() { tok.reset(SBuf()); } + bool parse(const SBuf &newBuf) override { const bool e = tok.atEnd(); tok.reset(newBuf); return e; } + +private: + SBuf parseFieldName(); + SBuf parseFieldValue(); + + /* Http1::Parser API */ + size_type firstLineSize() const override { return 0; } + + // Whether a request or response message is being parsed. + // Some parser validation and tolerance depends on type. + const http_hdr_owner_type msgType; + + // low-level tokenizer to use for parsing. + // owns and manages the buffer being parsed. + Http1::Parser::Tokenizer tok; +}; + +} // namespace One +} // namespace Http + +#endif /* SQUID_SRC_HTTP_ONE_FIELDPARSER_H */ diff --git a/src/http/one/Makefile.am b/src/http/one/Makefile.am index e196047cdbb..ffef8fdbd1e 100644 --- a/src/http/one/Makefile.am +++ b/src/http/one/Makefile.am @@ -10,6 +10,8 @@ include $(top_srcdir)/src/Common.am noinst_LTLIBRARIES = libhttp1.la libhttp1_la_SOURCES = \ + FieldParser.cc \ + FieldParser.h \ Parser.cc \ Parser.h \ RequestParser.cc \