Skip to content

Commit

Permalink
Merge pull request #2810 from cloudflare/dominik/fixes-import-parser-…
Browse files Browse the repository at this point in the history
…for-multi-line-strings

Fixes parsing of imports in multi-line string literals.
  • Loading branch information
dom96 authored Sep 30, 2024
2 parents 579048a + f5896ed commit 3800fd6
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 5 deletions.
72 changes: 72 additions & 0 deletions src/workerd/api/pyodide/pyodide-test.c++
Original file line number Diff line number Diff line change
Expand Up @@ -115,5 +115,77 @@ KJ_TEST("supports backslash") {
KJ_REQUIRE(result[5] == "c");
}

KJ_TEST("multiline-strings ignored") {
auto files = kj::heapArrayBuilder<kj::String>(4);
files.add(kj::str(R"SCRIPT(
FOO="""
import x
from y import z
"""
)SCRIPT"));
files.add(kj::str(R"SCRIPT(
FOO='''
import f
from g import z
'''
)SCRIPT"));
files.add(kj::str(R"SCRIPT(FOO = "\
import b \
")SCRIPT"));
files.add(kj::str("FOO=\"\"\" \n", R"SCRIPT(import x
from y import z
""")SCRIPT"));
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 0);
}

KJ_TEST("multiline-strings with imports in-between") {
auto files = kj::heapArrayBuilder<kj::String>(1);
files.add(kj::str(
R"SCRIPT(FOO="""
import x
from y import z
"""import q
import w
BAR="""
import e
"""
from t import u)SCRIPT"));
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 2);
KJ_REQUIRE(result[0] == "w");
KJ_REQUIRE(result[1] == "t");
}

KJ_TEST("import after string literal") {
auto files = kj::heapArrayBuilder<kj::String>(1);
files.add(kj::str(R"SCRIPT(import a
"import b)SCRIPT"));
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 1);
KJ_REQUIRE(result[0] == "a");
}

KJ_TEST("import after `i`") {
auto files = kj::heapArrayBuilder<kj::String>(1);
files.add(kj::str(R"SCRIPT(import a
iimport b)SCRIPT"));
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 1);
KJ_REQUIRE(result[0] == "a");
}

KJ_TEST("langchain import") {
auto files = kj::heapArrayBuilder<kj::String>(1);
files.add(kj::str(R"SCRIPT(from js import Response, console, URL
from langchain.chat_models import ChatOpenAI
import openai)SCRIPT"));
auto result = pyodide::ArtifactBundler::parsePythonScriptImports(files.finish());
KJ_REQUIRE(result.size() == 3);
KJ_REQUIRE(result[0] == "js");
KJ_REQUIRE(result[1] == "langchain.chat_models");
KJ_REQUIRE(result[2] == "openai");
}

} // namespace
} // namespace workerd::api
44 changes: 39 additions & 5 deletions src/workerd/api/pyodide/pyodide.c++
Original file line number Diff line number Diff line change
Expand Up @@ -207,12 +207,14 @@ kj::Array<kj::String> ArtifactBundler::parsePythonScriptImports(kj::Array<kj::St

int i = 0;
while (i < file.size()) {
auto keywordToParse = file[i] == 'i' ? "import"_kj : "from"_kj;
switch (file[i]) {
case 'i':
case 'f':
case 'f': {
auto keywordToParse = file[i] == 'i' ? "import"_kj : "from"_kj;
if (!parseKeyword(file, keywordToParse, i)) {
i += skipUntil(file, {'\n', '\r'}, i);
// We cannot simply skip the current char here, doing so would mean that
// `iimport x` would be parsed as a valid import.
i += skipUntil(file, {'\n', '\r', '"', '\''}, i);
continue;
}
i += keywordToParse.size(); // skip "import" or "from"
Expand Down Expand Up @@ -256,9 +258,41 @@ kj::Array<kj::String> ArtifactBundler::parsePythonScriptImports(kj::Array<kj::St
}
}
break;
}
case '"':
case '\'': {
char quote = file[i];
// Detect multi-line string literals `"""` and skip until the corresponding ending `"""`.
if (i + 2 < file.size() && file[i + 1] == quote && file[i + 2] == quote) {
i += 3; // skip start quotes.
// skip until terminating quotes.
while (i + 2 < file.size() && file[i + 1] != quote && file[i + 2] != quote) {
i += skipUntil(file, {quote}, i);
}
i += 3; // skip terminating quotes.
} else if (i + 2 < file.size() && file[i + 1] == '\\' &&
(file[i + 2] == '\n' || file[i + 2] == '\r')) {
// Detect string literal with backslash.
i += 3; // skip `"\<NL>`
// skip until quote, but ignore `\"`.
while (file[i] != quote && file[i - 1] != '\\') {
i += skipUntil(file, {quote}, i);
}
i += 1; // skip quote.
} else {
i += 1; // skip quote.
}

// skip until EOL so that we don't mistakenly parse and capture `"import x`.
i += skipUntil(file, {'\n', '\r', '"', '\''}, i);
break;
}
default:
// Skip to the next line.
i += skipUntil(file, {'\n', '\r'}, i);
// Skip to the next line or " or '
i += skipUntil(file, {'\n', '\r', '"', '\''}, i);
if (file[i] == '"' || file[i] == '\'') {
continue; // Allow the quotes to be handled above.
}
if (file[i] != '\0') {
i += skipChar(file, {'\n', '\r'}, i); // skip newline.
}
Expand Down

0 comments on commit 3800fd6

Please sign in to comment.