Skip to content

Commit

Permalink
parser: Remove xmlErrEncoding
Browse files Browse the repository at this point in the history
Use xmlFatalErr or xmlCtxtErrIO.
  • Loading branch information
nwellnhof committed Dec 21, 2023
1 parent 9fbe46b commit 8d0aaf4
Show file tree
Hide file tree
Showing 13 changed files with 66 additions and 113 deletions.
15 changes: 1 addition & 14 deletions HTMLparser.c
Original file line number Diff line number Diff line change
Expand Up @@ -490,20 +490,7 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
}

encoding_error:
{
char buffer[150];

if (ctxt->input->end - ctxt->input->cur >= 4) {
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2], ctxt->input->cur[3]);
} else {
snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]);
}
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
"Input is not proper UTF-8, indicate encoding !\n",
BAD_CAST buffer, NULL);
}
xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);

if ((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0)
xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
Expand Down
16 changes: 16 additions & 0 deletions error.c
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,23 @@ xmlReportError(xmlParserCtxtPtr ctxt, const xmlError *err)
}

if (ctxt != NULL) {
if ((input != NULL) &&
((input->buf == NULL) || (input->buf->encoder == NULL)) &&
(code == XML_ERR_INVALID_ENCODING) &&
(input->cur < input->end)) {
int i;

channel(data, "Bytes:");
for (i = 0; i < 4; i++) {
if (input->cur + i >= input->end)
break;
channel(data, " 0x%02X", input->cur[i]);
}
channel(data, "\n");
}

xmlParserPrintFileContextInternal(input, channel, data);

if (cur != NULL) {
if (cur->filename)
channel(data, "%s:%d: \n", cur->filename, cur->line);
Expand Down
4 changes: 0 additions & 4 deletions include/private/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,6 @@ XML_HIDDEN void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar *str1, const xmlChar *str2);
XML_HIDDEN void
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
const char *msg, const xmlChar *str1,
const xmlChar *str2) LIBXML_ATTR_FORMAT(3,0);
XML_HIDDEN void
xmlHaltParser(xmlParserCtxtPtr ctxt);
XML_HIDDEN int
xmlParserGrow(xmlParserCtxtPtr ctxt);
Expand Down
17 changes: 4 additions & 13 deletions parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -11877,19 +11877,10 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
done:
return(ret);
encoding_error:
if (ctxt->input->end - ctxt->input->cur < 4) {
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n",
NULL, NULL);
} else {
char buffer[150];

snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2], ctxt->input->cur[3]);
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n%s",
BAD_CAST buffer, NULL);
/* Only report the first error */
if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
}
return(0);
}
Expand Down
71 changes: 9 additions & 62 deletions parserInternals.c
Original file line number Diff line number Diff line change
Expand Up @@ -245,24 +245,6 @@ xmlErrParser(xmlParserCtxtPtr ctxt, xmlNodePtr node,
va_end(ap);
}

/**
* __xmlErrEncoding:
* @ctxt: an XML parser context
* @xmlerr: the error number
* @msg: the error message
* @str1: an string info
* @str2: an string info
*
* Handle an encoding error
*/
void
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
const char *msg, const xmlChar * str1, const xmlChar * str2)
{
xmlErrParser(ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
str1, str2, NULL, 0, msg, str1, str2);
}

/**
* xmlErrInternal:
* @ctxt: an XML parser context
Expand Down Expand Up @@ -659,21 +641,7 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
encoding_error:
/* Only report the first error */
if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
if ((ctxt == NULL) || (ctxt->input == NULL) ||
(ctxt->input->end - ctxt->input->cur < 4)) {
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n",
NULL, NULL);
} else {
char buffer[150];

snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2], ctxt->input->cur[3]);
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n%s",
BAD_CAST buffer, NULL);
}
xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
}
ctxt->input->cur++;
Expand Down Expand Up @@ -809,20 +777,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
encoding_error:
/* Only report the first error */
if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
if (ctxt->input->end - ctxt->input->cur < 4) {
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n",
NULL, NULL);
} else {
char buffer[150];

snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2], ctxt->input->cur[3]);
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n%s",
BAD_CAST buffer, NULL);
}
xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
}
*len = 1;
Expand Down Expand Up @@ -1050,14 +1005,9 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
}

if (res != 0) {
if (res == XML_ERR_UNSUPPORTED_ENCODING) {
const char *name = xmlGetCharEncodingName(enc);
const char *name = xmlGetCharEncodingName(enc);

__xmlErrEncoding(ctxt, res, "encoding not supported: %s\n",
BAD_CAST (name ? name : "<null>"), NULL);
} else {
xmlFatalErr(ctxt, res, NULL);
}
xmlFatalErr(ctxt, res, (name ? name : "<null>"));
return(-1);
}

Expand Down Expand Up @@ -1090,13 +1040,12 @@ xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding) {
xmlCharEncodingHandlerPtr handler;
int res;

if (encoding == NULL)
return(-1);

res = xmlOpenCharEncodingHandler(encoding, &handler);
if (res != 0) {
if (res == XML_ERR_UNSUPPORTED_ENCODING)
__xmlErrEncoding(ctxt, res, "Unsupported encoding: %s\n",
(const xmlChar *) encoding, NULL);
else
xmlFatalErr(ctxt, res, NULL);
xmlFatalErr(ctxt, res, encoding);
return(-1);
}

Expand Down Expand Up @@ -1188,9 +1137,7 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
if (nbchars == XML_ENC_ERR_MEMORY) {
xmlErrMemory(ctxt);
} else if (nbchars < 0) {
xmlErrInternal(ctxt,
"switching encoding: encoder error\n",
NULL);
xmlCtxtErrIO(ctxt, in->error, NULL);
xmlHaltParser(ctxt);
return (-1);
}
Expand Down
2 changes: 1 addition & 1 deletion result/errors/754947.xml.ent
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
./test/errors/754947.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
./test/errors/754947.xml:1: I/O error : Invalid bytes in character encoding
Bytes: 0xEE 0x5D 0x5D 0x3E
<d><![CDATA[0000000000000
^
2 changes: 1 addition & 1 deletion result/errors/754947.xml.err
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
./test/errors/754947.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
./test/errors/754947.xml:1: I/O error : Invalid bytes in character encoding
Bytes: 0xEE 0x5D 0x5D 0x3E
<d><![CDATA[0000000000000
^
2 changes: 1 addition & 1 deletion result/errors/754947.xml.str
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
./test/errors/754947.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
./test/errors/754947.xml:1: I/O error : Invalid bytes in character encoding
Bytes: 0xEE 0x5D 0x5D 0x3E
<d><![CDATA[0000000000000
^
Expand Down
2 changes: 1 addition & 1 deletion result/errors/cdata.xml.ent
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
./test/errors/cdata.xml:2: parser error : Input is not proper UTF-8, indicate encoding !
./test/errors/cdata.xml:2: I/O error : Invalid bytes in character encoding
Bytes: 0xE1 0x72 0x5D 0x5D
<A><![CDATA[C
^
2 changes: 1 addition & 1 deletion result/errors/cdata.xml.err
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
./test/errors/cdata.xml:2: parser error : Input is not proper UTF-8, indicate encoding !
./test/errors/cdata.xml:2: I/O error : Invalid bytes in character encoding
Bytes: 0xE1 0x72 0x5D 0x5D
<A><![CDATA[C
^
2 changes: 1 addition & 1 deletion result/errors/cdata.xml.str
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
./test/errors/cdata.xml:2: parser error : Input is not proper UTF-8, indicate encoding !
./test/errors/cdata.xml:2: I/O error : Invalid bytes in character encoding
Bytes: 0xE1 0x72 0x5D 0x5D
<A><![CDATA[C
^
Expand Down
16 changes: 16 additions & 0 deletions runtest.c
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,23 @@ testStructuredErrorHandler(void *ctx ATTRIBUTE_UNUSED, const xmlError *err) {
return;

if (ctxt != NULL) {
if ((input != NULL) &&
((input->buf == NULL) || (input->buf->encoder == NULL)) &&
(code == XML_ERR_INVALID_ENCODING) &&
(input->cur < input->end)) {
int i;

channel(data, "Bytes:");
for (i = 0; i < 4; i++) {
if (input->cur + i >= input->end)
return;
channel(data, " 0x%02X", input->cur[i]);
}
channel(data, "\n");
}

xmlParserPrintFileContextInternal(input, channel, data);

if (cur != NULL) {
if (cur->filename)
channel(data, "%s:%d: \n", cur->filename, cur->line);
Expand Down
28 changes: 14 additions & 14 deletions testchar.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,9 +310,9 @@ static int testCharRangeByte1(xmlParserCtxtPtr ctxt) {
c = testCurrentChar(ctxt, &len);
if (c < 0)
continue;
if ((i == 0) || (i >= 0x80)) {
if (i >= 0x80) {
/* we must see an error there */
if (lastError != XML_ERR_INVALID_CHAR) {
if (lastError != XML_ERR_INVALID_ENCODING) {
fprintf(stderr,
"Failed to detect invalid char for Byte 0x%02X\n", i);
return(1);
Expand Down Expand Up @@ -349,7 +349,7 @@ static int testCharRangeByte2(xmlParserCtxtPtr ctxt) {

/* if first bit of first char is set, then second bit must too */
if ((i & 0x80) && ((i & 0x40) == 0)) {
if (lastError != XML_ERR_INVALID_CHAR) {
if (lastError != XML_ERR_INVALID_ENCODING) {
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
i, j);
Expand All @@ -362,7 +362,7 @@ static int testCharRangeByte2(xmlParserCtxtPtr ctxt) {
* bits must be 10
*/
else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
if (lastError != XML_ERR_INVALID_CHAR) {
if (lastError != XML_ERR_INVALID_ENCODING) {
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
i, j, c);
Expand All @@ -375,7 +375,7 @@ static int testCharRangeByte2(xmlParserCtxtPtr ctxt) {
* than 0x80, i.e. one of bits 5 to 1 of i must be set
*/
else if ((i & 0x80) && ((i & 0x1E) == 0)) {
if (lastError != XML_ERR_INVALID_CHAR) {
if (lastError != XML_ERR_INVALID_ENCODING) {
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
i, j, c);
Expand All @@ -388,7 +388,7 @@ static int testCharRangeByte2(xmlParserCtxtPtr ctxt) {
* at least 3 bytes, but we give only 2 !
*/
else if ((i & 0xE0) == 0xE0) {
if (lastError != XML_ERR_INVALID_CHAR) {
if (lastError != XML_ERR_INVALID_ENCODING) {
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
i, j);
Expand Down Expand Up @@ -446,7 +446,7 @@ static int testCharRangeByte3(xmlParserCtxtPtr ctxt) {
* at least 4 bytes, but we give only 3 !
*/
if ((i & 0xF0) == 0xF0) {
if (lastError != XML_ERR_INVALID_CHAR) {
if (lastError != XML_ERR_INVALID_ENCODING) {
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
i, j, K, data[3]);
Expand All @@ -458,7 +458,7 @@ static int testCharRangeByte3(xmlParserCtxtPtr ctxt) {
* The second and the third bytes must start with 10
*/
else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80)) {
if (lastError != XML_ERR_INVALID_CHAR) {
if (lastError != XML_ERR_INVALID_ENCODING) {
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
i, j, K);
Expand All @@ -472,7 +472,7 @@ static int testCharRangeByte3(xmlParserCtxtPtr ctxt) {
* the 6th byte of data[1] must be set
*/
else if (((i & 0xF) == 0) && ((j & 0x20) == 0)) {
if (lastError != XML_ERR_INVALID_CHAR) {
if (lastError != XML_ERR_INVALID_ENCODING) {
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
i, j, K);
Expand All @@ -484,7 +484,7 @@ static int testCharRangeByte3(xmlParserCtxtPtr ctxt) {
* There are values that are not allowed in UTF-8
*/
else if ((value > 0xD7FF) && (value <0xE000)) {
if (lastError != XML_ERR_INVALID_CHAR) {
if (lastError != XML_ERR_INVALID_ENCODING) {
fprintf(stderr,
"Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X\n",
value, i, j, K);
Expand Down Expand Up @@ -548,7 +548,7 @@ static int testCharRangeByte4(xmlParserCtxtPtr ctxt) {
* at least 5 bytes, but we give only 4 !
*/
if ((i & 0xF8) == 0xF8) {
if (lastError != XML_ERR_INVALID_CHAR) {
if (lastError != XML_ERR_INVALID_ENCODING) {
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
i, j, K, data[3]);
Expand All @@ -561,7 +561,7 @@ static int testCharRangeByte4(xmlParserCtxtPtr ctxt) {
*/
else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80) ||
((L & 0xC0) != 0x80)) {
if (lastError != XML_ERR_INVALID_CHAR) {
if (lastError != XML_ERR_INVALID_ENCODING) {
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
i, j, K, L);
Expand All @@ -575,7 +575,7 @@ static int testCharRangeByte4(xmlParserCtxtPtr ctxt) {
* the 6 or 5th byte of j must be set
*/
else if (((i & 0x7) == 0) && ((j & 0x30) == 0)) {
if (lastError != XML_ERR_INVALID_CHAR) {
if (lastError != XML_ERR_INVALID_ENCODING) {
fprintf(stderr,
"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
i, j, K, L);
Expand All @@ -588,7 +588,7 @@ static int testCharRangeByte4(xmlParserCtxtPtr ctxt) {
*/
else if (((value > 0xD7FF) && (value < 0xE000)) ||
(value > 0x10FFFF)) {
if (lastError != XML_ERR_INVALID_CHAR) {
if (lastError != XML_ERR_INVALID_ENCODING) {
fprintf(stderr,
"Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
value, i, j, K, L);
Expand Down

0 comments on commit 8d0aaf4

Please sign in to comment.