Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Let Lexer.getObj return a dummy-Cmd for commands that start with a non-visible ASCII character (issue 13999) #14002

Merged
merged 1 commit into from
Sep 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion src/core/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -1255,7 +1255,7 @@ class Lexer {
return Cmd.get("}");
case 0x29: // ')'
// Consume the current character in order to avoid permanently hanging
// the worker thread if `Lexer.getObject` is called from within a loop
// the worker thread if `Lexer.getObj` is called from within a loop
// containing try-catch statements, since we would otherwise attempt
// to parse the *same* character over and over (fixes issue8061.pdf).
this.nextChar();
Expand All @@ -1264,6 +1264,15 @@ class Lexer {

// Start reading a command.
let str = String.fromCharCode(ch);
// A valid command cannot start with a non-visible ASCII character,
// and the next character may be (the start of) a valid command.
if (ch < 0x20 || ch > 0x7f) {
const nextCh = this.peekChar();
if (nextCh >= 0x20 && nextCh <= 0x7f) {
this.nextChar();
return Cmd.get(str);
}
}
const knownCommands = this.knownCommands;
let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/issue13999.pdf.link
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/7147166/default.pdf
8 changes: 8 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -5885,5 +5885,13 @@
"rounds": 1,
"enableXfa": true,
"type": "eq"
},
{ "id": "issue13999",
"file": "pdfs/issue13999.pdf",
"md5": "8829c76887e7827390e9a5f73cab23f2",
"link": true,
"rounds": 1,
"lastPage": 1,
"type": "eq"
}
]
28 changes: 27 additions & 1 deletion test/unit/parser_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
* limitations under the License.
*/

import { Cmd, EOF, Name } from "../../src/core/primitives.js";
import { Lexer, Linearization, Parser } from "../../src/core/parser.js";
import { FormatError } from "../../src/shared/util.js";
import { Name } from "../../src/core/primitives.js";
import { StringStream } from "../../src/core/stream.js";

describe("parser", function () {
Expand Down Expand Up @@ -217,6 +217,32 @@ describe("parser", function () {
}
});
});

describe("getObj", function () {
it(
"should stop immediately when the start of a command is " +
"a non-visible ASCII character (issue 13999)",
function () {
const input = new StringStream("\x14q\nQ");
const lexer = new Lexer(input);

let obj = lexer.getObj();
expect(obj instanceof Cmd).toEqual(true);
expect(obj.cmd).toEqual("\x14");

obj = lexer.getObj();
expect(obj instanceof Cmd).toEqual(true);
expect(obj.cmd).toEqual("q");

obj = lexer.getObj();
expect(obj instanceof Cmd).toEqual(true);
expect(obj.cmd).toEqual("Q");

obj = lexer.getObj();
expect(obj).toEqual(EOF);
}
);
});
});

describe("Linearization", function () {
Expand Down