Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade HTTP header field-line parser #1908

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 20 additions & 102 deletions src/HttpHeader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "base64.h"
#include "globals.h"
#include "http/ContentLengthInterpreter.h"
#include "http/one/FieldParser.h"
#include "HttpHdrCc.h"
#include "HttpHdrContRange.h"
#include "HttpHdrScTarget.h" // also includes HttpHdrSc.h
Expand Down Expand Up @@ -1391,115 +1392,32 @@ HttpHeaderEntry::~HttpHeaderEntry()
HttpHeaderEntry *
HttpHeaderEntry::parse(const char *field_start, const char *field_end, const http_hdr_owner_type msgType)
{
/* note: name_start == field_start */
const char *name_end = (const char *)memchr(field_start, ':', field_end - field_start);
int name_len = name_end ? name_end - field_start :0;
const char *value_start = field_start + name_len + 1; /* skip ':' */
/* note: value_end == field_end */
try {
Http1::FieldParser tok(SBuf(field_start, (field_end-field_start)), msgType);
debugs(55, 8, "parsing field-line: near '" <<
Raw("buf", tok.remaining().rawContent(), std::min(tok.remaining().length(), SBuf::size_type(100))) << "'");

++ HeaderEntryParsedCount;
SBuf theName;
SBuf theValue;
tok.parseFieldLine(theName, theValue);

/* do we have a valid field name within this field? */
/* is it a "known" field? */
auto id = Http::HeaderLookupTable.lookup(theName.rawContent(), theName.length()).id;
debugs(55, 8, "got hdr-id=" << id);
if (id == Http::HdrType::BAD_HDR)
id = Http::HdrType::OTHER;

if (!name_len || name_end > field_end)
return nullptr;

if (name_len > 65534) {
/* String must be LESS THAN 64K and it adds a terminating NULL */
// TODO: update this to show proper name_len in Raw markup, but not print all that
debugs(55, 2, "ignoring huge header field (" << Raw("field_start", field_start, 100) << "...)");
return nullptr;
}

/*
* RFC 7230 section 3.2.4:
* "No whitespace is allowed between the header field-name and colon.
* ...
* A server MUST reject any received request message that contains
* whitespace between a header field-name and colon with a response code
* of 400 (Bad Request). A proxy MUST remove any such whitespace from a
* response message before forwarding the message downstream."
*/
if (xisspace(field_start[name_len - 1])) {

if (msgType == hoRequest)
return nullptr;

// for now, also let relaxed parser remove this BWS from any non-HTTP messages
const bool stripWhitespace = (msgType == hoReply) ||
Config.onoff.relaxed_header_parser;
if (!stripWhitespace)
return nullptr; // reject if we cannot strip

debugs(55, Config.onoff.relaxed_header_parser <= 0 ? 1 : 2,
"WARNING: Whitespace after header name in '" << getStringPrefix(field_start, field_end-field_start) << "'");

while (name_len > 0 && xisspace(field_start[name_len - 1]))
--name_len;

if (!name_len) {
debugs(55, 2, "found header with only whitespace for name");
return nullptr;
}
}

/* RFC 7230 section 3.2:
*
* header-field = field-name ":" OWS field-value OWS
* field-name = token
* token = 1*TCHAR
*/
for (const char *pos = field_start; pos < (field_start+name_len); ++pos) {
if (!CharacterSet::TCHAR[*pos]) {
debugs(55, 2, "found header with invalid characters in " <<
Raw("field-name", field_start, min(name_len,100)) << "...");
return nullptr;
}
}

/* now we know we can parse it */

debugs(55, 9, "parsing HttpHeaderEntry: near '" << getStringPrefix(field_start, field_end-field_start) << "'");

/* is it a "known" field? */
Http::HdrType id = Http::HeaderLookupTable.lookup(field_start,name_len).id;
debugs(55, 9, "got hdr-id=" << id);

SBuf theName;

String value;

if (id == Http::HdrType::BAD_HDR)
id = Http::HdrType::OTHER;

/* set field name */
if (id == Http::HdrType::OTHER)
theName.append(field_start, name_len);
else
theName = Http::HeaderLookupTable.lookup(id).name;

/* trim field value */
while (value_start < field_end && xisspace(*value_start))
++value_start;
debugs(55, 7, "got field-line: '" << theName << ": " << theValue << "'");
++ HeaderEntryParsedCount;
++ headerStatsTable[id].seenCount;

while (value_start < field_end && xisspace(field_end[-1]))
--field_end;
return new HttpHeaderEntry(id, theName, theValue.c_str());

if (field_end - value_start > 65534) {
/* String must be LESS THAN 64K and it adds a terminating NULL */
debugs(55, 2, "WARNING: found '" << theName << "' header of " << (field_end - value_start) << " bytes");
} catch (...) {
debugs(11, 2, "WARNING: rejected field-line with " << CurrentException);
++ headerStatsTable[Http::HdrType::BAD_HDR].seenCount;
return nullptr;
}

/* set field value */
value.assign(value_start, field_end - value_start);

if (id != Http::HdrType::BAD_HDR)
++ headerStatsTable[id].seenCount;

debugs(55, 9, "parsed HttpHeaderEntry: '" << theName << ": " << value << "'");

return new HttpHeaderEntry(id, theName, value.termedBuf());
}

HttpHeaderEntry *
Expand Down
113 changes: 113 additions & 0 deletions src/http/one/FieldParser.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Copyright (C) 1996-2023 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
* Please see the COPYING and CONTRIBUTORS files for details.
*/

#include "squid.h"
#include "base/Raw.h"
#include "http/one/FieldParser.h"
#include "sbuf/Stream.h"
#include "SquidConfig.h"
#include "debug/Stream.h"

Http::One::FieldParser::FieldParser(const SBuf &aBuf, const http_hdr_owner_type &aType) :
Http::One::Parser(),
msgType(aType),
tok(aBuf)
{}

/**
* RFC 9112 section 5:
*
* field-line = field-name ":" OWS field-value OWS
*/
void
Http::One::FieldParser::parseFieldLine(SBuf &name, SBuf &value)
{
name = parseFieldName(); // consumes ':' delimiter
value = parseFieldValue();

if (!tok.atEnd())
skipLineTerminator(tok);
}

/**
* RFC 9110 section 5.1:
*
* field-name = token
* token = 1*TCHAR
*/
SBuf
Http::One::FieldParser::parseFieldName()
{
// TODO: handle pseudo-header which begin with ':'
yadij marked this conversation as resolved.
Show resolved Hide resolved

auto name = tok.prefix("field-name", CharacterSet::TCHAR);

/*
* RFC 9112 section 5.1:
* "No whitespace is allowed between the field name and colon.
* ...
* A server MUST reject, with a response status code of 400 (Bad Request),
* any received request message that contains whitespace between a header
* field name and colon. A proxy MUST remove any such whitespace from a
* response message before forwarding the message downstream."
*/
const bool stripWhitespace = (msgType == hoReply) || Config.onoff.relaxed_header_parser;
if (stripWhitespace && tok.skipAll(Http1::Parser::WhitespaceCharacters())) {
// TODO: reduce log spam from 'tok.buf()' below
debugs(11, Config.onoff.relaxed_header_parser <= 0 ? 2 : 3,
"WARNING: Whitespace after field-name '" << name << tok.buf() << "'");
}

if(!tok.skip(':'))
throw TextException("invalid field-name", Here());

if (name.length() == 0)
throw TextException("missing field-name", Here());

if (name.length() > 65534) {
/* String must be LESS THAN 64K and it adds a terminating NULL */
// TODO: update this to show proper name.length() in Raw markup, but not print all that
throw TextException(ToSBuf("huge field-line (", Raw("field-name", name.c_str(), 100), "...)"), Here());
}

return name;
}

/**
* RFC 9110 section 5.1:
*
* field-value = *field-content
* field-content = field-vchar
* [ 1*( SP / HTAB / field-vchar ) field-vchar ]
* field-vchar = VCHAR / obs-text
* obs-text = %x80-FF
*/
SBuf
Http::One::FieldParser::parseFieldValue()
{
static const CharacterSet fvChars = (CharacterSet::VCHAR + CharacterSet::OBSTEXT).rename("field-value");
auto value = tok.prefix("field-value", fvChars);

/**
* RFC 9110 section 5.5:
* field value does not include leading or trailing whitespace.
* ... parsing implementation MUST exclude such whitespace prior
* to evaluating the field value
*/
const auto start = value.findFirstNotOf(Http1::Parser::WhitespaceCharacters());
const auto end = value.findLastNotOf(Http1::Parser::WhitespaceCharacters());
value.chop(start, (end-start));

if (value.length() > 65534) {
/* String must be LESS THAN 64K and it adds a terminating NULL */
// TODO: update this to show proper length() in Raw markup, but not print all that
throw TextException(ToSBuf("huge field-line (", Raw("field-value", value.c_str(), 100), "...)"), Here());
}

return value;
}
58 changes: 58 additions & 0 deletions src/http/one/FieldParser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright (C) 1996-2023 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
* Please see the COPYING and CONTRIBUTORS files for details.
*/

#ifndef SQUID_SRC_HTTP_ONE_FIELDPARSER_H
#define SQUID_SRC_HTTP_ONE_FIELDPARSER_H

#include "http/one/Parser.h"
#include "HttpHeader.h"
#include "parser/Tokenizer.h"

namespace Http {
namespace One {

/** HTTP/1.x header field parser
*
* Works on a raw character I/O buffer and tokenizes the content into
* the field-lines of an HTTP/1 message:
*
* RFC 9112 section 5:
* field-line = field-name ":" OWS field-value OWS
*/
class FieldParser : public Http1::Parser
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please do not derive a field parser from a message prefix parser. There is no "X is a Y" relationship between these two concepts.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sigh. You vetoed extracting the useful parts of Http1::Parser out into a separate class this one should have been importing them from. I see this is going to have to be even simpler for you.

{
public:
FieldParser(const SBuf &, const http_hdr_owner_type &);

/// extract a field name and value from the buffer being parsed.
void parseFieldLine(SBuf &name, SBuf &value);

/* Http1::Parser API */
void clear() { tok.reset(SBuf()); }
bool parse(const SBuf &newBuf) override { const bool e = tok.atEnd(); tok.reset(newBuf); return e; }

private:
SBuf parseFieldName();
SBuf parseFieldValue();

/* Http1::Parser API */
size_type firstLineSize() const override { return 0; }

// Whether a request or response message is being parsed.
// Some parser validation and tolerance depends on type.
const http_hdr_owner_type msgType;

// low-level tokenizer to use for parsing.
// owns and manages the buffer being parsed.
Http1::Parser::Tokenizer tok;
};

} // namespace One
} // namespace Http

#endif /* SQUID_SRC_HTTP_ONE_FIELDPARSER_H */
2 changes: 2 additions & 0 deletions src/http/one/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ include $(top_srcdir)/src/Common.am
noinst_LTLIBRARIES = libhttp1.la

libhttp1_la_SOURCES = \
FieldParser.cc \
FieldParser.h \
Parser.cc \
Parser.h \
RequestParser.cc \
Expand Down
Loading