Skip to content

Commit

Permalink
PDFBOX-5957: refactor, add comments, improve logging
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1923832 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
THausherr committed Feb 15, 2025
1 parent b9794d5 commit 58dd79c
Showing 1 changed file with 17 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,10 @@ private boolean hasNoFollowingBinData() throws IOException
boolean noBinData = true;
int startOpIdx = -1;
int endOpIdx = -1;

String s = "";

LOG.debug("String after EI: '{}'", new String(binCharTestArr));

if (readBytes > 0)
{
for (int bIdx = 0; bIdx < readBytes; bIdx++)
Expand All @@ -360,34 +363,37 @@ else if (startOpIdx != -1 && endOpIdx == -1 &&
// PDFBOX-3742: just assuming that 1-3 non blanks is a PDF operator isn't enough
if (endOpIdx != -1 && startOpIdx != -1)
{
// usually, the operator here is Q, sometimes EMC (PDFBOX-2376), S (PDFBOX-3784).
String s = new String(binCharTestArr, startOpIdx, endOpIdx - startOpIdx);
// usually, the operator here is Q, sometimes EMC (PDFBOX-2376), S (PDFBOX-3784)
s = new String(binCharTestArr, startOpIdx, endOpIdx - startOpIdx);
if (!"Q".equals(s) && !"EMC".equals(s) && !"S".equals(s))
{
// operator is not Q, not EMC, not S -> assume binary data
noBinData = false;
}
}

// only if not close to eof
if (readBytes == MAX_BIN_CHAR_TEST_LENGTH)
// only if not close to EOF
if (startOpIdx != -1 && readBytes == MAX_BIN_CHAR_TEST_LENGTH)
{
// a PDF operator is 1-3 bytes long
if (startOpIdx != -1 && endOpIdx == -1)
if (endOpIdx == -1)
{
endOpIdx = MAX_BIN_CHAR_TEST_LENGTH;
s = new String(binCharTestArr, startOpIdx, endOpIdx - startOpIdx);
}
if (endOpIdx != -1 && startOpIdx != -1 && endOpIdx - startOpIdx > 3)
LOG.debug("startOpIdx: {} endOpIdx: {} s = {}", startOpIdx, endOpIdx, s);
// a PDF operator is 1-3 bytes long
if (endOpIdx - startOpIdx > 3)
{
noBinData = false;
noBinData = false; // "operator" too long, assume binary data
}
}
source.rewind(readBytes);
}
if (!noBinData)
{
LOG.warn(
"ignoring 'EI' assumed to be in the middle of inline image at stream offset {}",
source.getPosition());
"ignoring 'EI' assumed to be in the middle of inline image at stream offset {}, s = '{}'",
source.getPosition(), s);
}
return noBinData;
}
Expand Down

0 comments on commit 58dd79c

Please sign in to comment.