Skip to content

Commit

Permalink
bpo-46932: Update bundled libexpat to 2.4.7 (GH-31736)
Browse files Browse the repository at this point in the history
(cherry picked from commit 176835c)

Co-authored-by: Steve Dower <steve.dower@python.org>
  • Loading branch information
miss-islington and zooba authored Mar 7, 2022
1 parent e1639f3 commit f46a044
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 14 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update bundled libexpat to 2.4.7
22 changes: 18 additions & 4 deletions Modules/expat/expat.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Copyright (c) 2016 Cristian Rodríguez <crrodriguez@opensuse.org>
Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
Copyright (c) 2022 Thijs Schreijer <thijs@thijsschreijer.nl>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
Expand Down Expand Up @@ -174,8 +175,10 @@ struct XML_cp {
};

/* This is called for an element declaration. See above for
description of the model argument. It's the caller's responsibility
to free model when finished with it.
description of the model argument. It's the user code's responsibility
to free model when finished with it. See XML_FreeContentModel.
There is no need to free the model from the handler, it can be kept
around and freed at a later stage.
*/
typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData,
const XML_Char *name,
Expand Down Expand Up @@ -237,6 +240,17 @@ XML_ParserCreate(const XML_Char *encoding);
and the local part will be concatenated without any separator.
It is a programming error to use the separator '\0' with namespace
triplets (see XML_SetReturnNSTriplet).
If a namespace separator is chosen that can be part of a URI or
part of an XML name, splitting an expanded name back into its
1, 2 or 3 original parts on application level in the element handler
may end up vulnerable, so these are advised against; sane choices for
a namespace separator are e.g. '\n' (line feed) and '|' (pipe).
Note that Expat does not validate namespace URIs (beyond encoding)
against RFC 3986 today (and is not required to do so with regard to
the XML 1.0 namespaces specification) but it may start doing that
in future releases. Before that, an application using Expat must
be ready to receive namespace URIs containing non-URI characters.
*/
XMLPARSEAPI(XML_Parser)
XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
Expand Down Expand Up @@ -317,7 +331,7 @@ typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData,
const XML_Char *pubid,
int has_internal_subset);

/* This is called for the start of the DOCTYPE declaration when the
/* This is called for the end of the DOCTYPE declaration when the
closing > is encountered, but after processing any external
subset.
*/
Expand Down Expand Up @@ -1041,7 +1055,7 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
*/
#define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 4
#define XML_MICRO_VERSION 6
#define XML_MICRO_VERSION 7

#ifdef __cplusplus
}
Expand Down
147 changes: 137 additions & 10 deletions Modules/expat/xmlparse.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6+)
/* fcb1a62fefa945567301146eb98e3ad3413e823a41c4378e84e8b6b6f308d824 (2.4.7+)
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
Expand Down Expand Up @@ -34,6 +34,7 @@
Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
Copyright (c) 2021 Dong-hee Na <donghee.na@python.org>
Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
Expand Down Expand Up @@ -133,7 +134,7 @@
* BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
* libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
* libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
* Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \
* Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
* Windows >=Vista (rand_s): _WIN32. \
\
If insist on not using any of these, bypass this error by defining \
Expand Down Expand Up @@ -722,6 +723,7 @@ XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
return XML_ParserCreate_MM(encodingName, NULL, tmp);
}

// "xml=http://www.w3.org/XML/1998/namespace"
static const XML_Char implicitContext[]
= {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
Expand Down Expand Up @@ -3704,12 +3706,124 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
return XML_ERROR_NONE;
}

static XML_Bool
is_rfc3986_uri_char(XML_Char candidate) {
// For the RFC 3986 ANBF grammar see
// https://datatracker.ietf.org/doc/html/rfc3986#appendix-A

switch (candidate) {
// From rule "ALPHA" (uppercase half)
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':

// From rule "ALPHA" (lowercase half)
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':

// From rule "DIGIT"
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':

// From rule "pct-encoded"
case '%':

// From rule "unreserved"
case '-':
case '.':
case '_':
case '~':

// From rule "gen-delims"
case ':':
case '/':
case '?':
case '#':
case '[':
case ']':
case '@':

// From rule "sub-delims"
case '!':
case '$':
case '&':
case '\'':
case '(':
case ')':
case '*':
case '+':
case ',':
case ';':
case '=':
return XML_TRUE;

default:
return XML_FALSE;
}
}

/* addBinding() overwrites the value of prefix->binding without checking.
Therefore one must keep track of the old value outside of addBinding().
*/
static enum XML_Error
addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
const XML_Char *uri, BINDING **bindingsPtr) {
// "http://www.w3.org/XML/1998/namespace"
static const XML_Char xmlNamespace[]
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
Expand All @@ -3720,6 +3834,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
ASCII_e, '\0'};
static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
// "http://www.w3.org/2000/xmlns/"
static const XML_Char xmlnsNamespace[]
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
Expand Down Expand Up @@ -3760,14 +3875,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
isXMLNS = XML_FALSE;

// NOTE: While Expat does not validate namespace URIs against RFC 3986,
// we have to at least make sure that the XML processor on top of
// Expat (that is splitting tag names by namespace separator into
// 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
// by an attacker putting additional namespace separator characters
// into namespace declarations. That would be ambiguous and not to
// be expected.
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
// NOTE: While Expat does not validate namespace URIs against RFC 3986
// today (and is not REQUIRED to do so with regard to the XML 1.0
// namespaces specification) we have to at least make sure, that
// the application on top of Expat (that is likely splitting expanded
// element names ("qualified names") of form
// "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
// in its element handler code) cannot be confused by an attacker
// putting additional namespace separator characters into namespace
// declarations. That would be ambiguous and not to be expected.
//
// While the HTML API docs of function XML_ParserCreateNS have been
// advising against use of a namespace separator character that can
// appear in a URI for >20 years now, some widespread applications
// are using URI characters (':' (colon) in particular) for a
// namespace separator, in practice. To keep these applications
// functional, we only reject namespaces URIs containing the
// application-chosen namespace separator if the chosen separator
// is a non-URI character with regard to RFC 3986.
if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
&& ! is_rfc3986_uri_char(uri[len])) {
return XML_ERROR_SYNTAX;
}
}
Expand Down

0 comments on commit f46a044

Please sign in to comment.