diff --git a/packages/jinja/src/index.ts b/packages/jinja/src/index.ts index 32635a2ea..9f28cf774 100644 --- a/packages/jinja/src/index.ts +++ b/packages/jinja/src/index.ts @@ -31,7 +31,7 @@ export class Template { this.parsed = parse(tokens); } - render(items: Record): string { + render(items?: Record): string { // Create a new environment for this template const env = new Environment(); @@ -44,8 +44,10 @@ export class Template { env.set("range", range); // Add user-defined variables - for (const [key, value] of Object.entries(items)) { - env.set(key, value); + if (items) { + for (const [key, value] of Object.entries(items)) { + env.set(key, value); + } } const interpreter = new Interpreter(env); diff --git a/packages/jinja/src/parser.ts b/packages/jinja/src/parser.ts index e99c1e6c1..f27cb52e7 100644 --- a/packages/jinja/src/parser.ts +++ b/packages/jinja/src/parser.ts @@ -343,7 +343,7 @@ export function parse(tokens: Token[]): Program { function parseCallMemberExpression(): Statement { // Handle member expressions recursively - const member = parseMemberExpression(); // foo.x + const member = parseMemberExpression(parsePrimaryExpression()); // foo.x if (is(TOKEN_TYPES.OpenParen)) { // foo.x() @@ -352,15 +352,17 @@ export function parse(tokens: Token[]): Program { return member; } - function parseCallExpression(callee: Statement): CallExpression { - let callExpression = new CallExpression(callee, parseArgs()); + function parseCallExpression(callee: Statement): Statement { + let expression: Statement = new CallExpression(callee, parseArgs()); + + expression = parseMemberExpression(expression); // foo.x().y if (is(TOKEN_TYPES.OpenParen)) { // foo.x()() - callExpression = parseCallExpression(callExpression); + expression = parseCallExpression(expression); } - return callExpression; + return expression; } function parseArgs(): Statement[] { @@ -433,9 +435,7 @@ export function parse(tokens: Token[]): Program { return slices[0] as Statement; // normal member expression } - function parseMemberExpression(): Statement { - let object = parsePrimaryExpression(); - + function parseMemberExpression(object: Statement): Statement { while (is(TOKEN_TYPES.Dot) || is(TOKEN_TYPES.OpenSquareBracket)) { const operator = tokens[current]; // . or [ ++current; diff --git a/packages/jinja/src/runtime.ts b/packages/jinja/src/runtime.ts index 474de75f7..0be95d8ad 100644 --- a/packages/jinja/src/runtime.ts +++ b/packages/jinja/src/runtime.ts @@ -117,6 +117,48 @@ export class StringValue extends RuntimeValue { return new StringValue(this.value.trimStart()); }), ], + [ + "split", + // follows Python's `str.split(sep=None, maxsplit=-1)` function behavior + // https://docs.python.org/3.13/library/stdtypes.html#str.split + new FunctionValue((args) => { + const sep = args[0] ?? new NullValue(); + if (!(sep instanceof StringValue || sep instanceof NullValue)) { + throw new Error("sep argument must be a string or null"); + } + const maxsplit = args[1] ?? new NumericValue(-1); + if (!(maxsplit instanceof NumericValue)) { + throw new Error("maxsplit argument must be a number"); + } + + let result = []; + if (sep instanceof NullValue) { + // If sep is not specified or is None, runs of consecutive whitespace are regarded as a single separator, and the + // result will contain no empty strings at the start or end if the string has leading or trailing whitespace. + // Trailing whitespace may be present when maxsplit is specified and there aren't sufficient matches in the string. + const text = this.value.trimStart(); + for (const { 0: match, index } of text.matchAll(/\S+/g)) { + if (maxsplit.value !== -1 && result.length >= maxsplit.value && index !== undefined) { + result.push(match + text.slice(index + match.length)); + break; + } + result.push(match); + } + } else { + // If sep is specified, consecutive delimiters are not grouped together and are deemed to delimit empty strings. + if (sep.value === "") { + throw new Error("empty separator"); + } + result = this.value.split(sep.value); + if (maxsplit.value !== -1 && result.length > maxsplit.value) { + // Follow Python's behavior: If maxsplit is given, at most maxsplit splits are done, + // with any remaining text returned as the final element of the list. + result.push(result.splice(maxsplit.value).join(sep.value)); + } + } + return new ArrayValue(result.map((part) => new StringValue(part))); + }), + ], ]); } @@ -543,6 +585,8 @@ export class Interpreter { } }) ); + case "join": + return new StringValue(operand.value.map((x) => x.value).join("")); default: throw new Error(`Unknown ArrayValue filter: ${filter.value}`); } @@ -570,6 +614,7 @@ export class Interpreter { ) .join("\n") ); + case "join": case "string": return operand; // no-op default: @@ -610,6 +655,24 @@ export class Interpreter { throw new Error("If set, indent must be a number"); } return new StringValue(toJSON(operand, indent.value)); + } else if (filterName === "join") { + let value; + if (operand instanceof StringValue) { + // NOTE: string.split('') breaks for unicode characters + value = Array.from(operand.value); + } else if (operand instanceof ArrayValue) { + value = operand.value.map((x) => x.value); + } else { + throw new Error(`Cannot apply filter "${filterName}" to type: ${operand.type}`); + } + const [args, kwargs] = this.evaluateArguments(filter.args, environment); + + const separator = args.at(0) ?? kwargs.get("separator") ?? new StringValue(""); + if (!(separator instanceof StringValue)) { + throw new Error("separator must be a string"); + } + + return new StringValue(value.join(separator.value)); } if (operand instanceof ArrayValue) { diff --git a/packages/jinja/test/e2e.test.js b/packages/jinja/test/e2e.test.js index 833a1a854..3ce98e234 100644 --- a/packages/jinja/test/e2e.test.js +++ b/packages/jinja/test/e2e.test.js @@ -677,6 +677,20 @@ const TEST_CUSTOM_TEMPLATES = Object.freeze({ }, target: `<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nEnvironment: ipython\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are a bot that responds to weather queries.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nGiven the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.\n\nRespond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables.\n\n{\n "type": "function",\n "function": {\n "name": "get_current_temperature",\n "description": "Get the current temperature at a location.",\n "parameters": {\n "type": "object",\n "properties": {\n "location": {\n "type": "string",\n "description": "The location to get the temperature for, in the format \\"City, Country\\""\n }\n },\n "required": [\n "location"\n ]\n },\n "return": {\n "type": "number",\n "description": "The current temperature at the specified location in the specified units, as a float."\n }\n }\n}\n\nHey, what's the temperature in Paris right now?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n`, }, + "deepseek-ai/DeepSeek-R1": { + chat_template: `{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '\`\`\`json' + '\\n' + tool['function']['arguments'] + '\\n' + '\`\`\`' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '\`\`\`json' + '\\n' + tool['function']['arguments'] + '\\n' + '\`\`\`' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '\`\`\`json' + '\\n' + tool['function']['arguments'] + '\\n' + '\`\`\`' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}`, + data: { + messages: [ + { role: "user", content: "Hi there." }, + { role: "assistant", content: 'The user said "Hi there."Hi!' }, + { role: "user", content: "Tell me a joke." }, + ], + bos_token: "<|begin▁of▁sentence|>", + eos_token: "<|end▁of▁sentence|>", + add_generation_prompt: true, + }, + target: `<|begin▁of▁sentence|><|User|>Hi there.<|Assistant|>Hi!<|end▁of▁sentence|><|User|>Tell me a joke.<|Assistant|>`, + }, }); describe("End-to-end tests", () => { diff --git a/packages/jinja/test/templates.test.js b/packages/jinja/test/templates.test.js index 0a5f9130f..5f9f15a03 100644 --- a/packages/jinja/test/templates.test.js +++ b/packages/jinja/test/templates.test.js @@ -36,6 +36,7 @@ const TEST_STRINGS = { // Set variables VARIABLES: `{% set x = 'Hello' %}{% set y = 'World' %}{{ x + ' ' + y }}`, + VARIABLES_2: `{% set x = 'Hello'.split('el')[-1] %}{{ x }}`, // Numbers NUMBERS: `|{{ 5 }}|{{ -5 }}|{{ add(3, -1) }}|{{ (3 - 1) + (a - 5) - (a + 5)}}|`, @@ -154,6 +155,13 @@ const TEST_STRINGS = { RSTRIP: `{{ " test it ".rstrip() }}`, //lstrip LSTRIP: `{{ " test it ".lstrip() }}`, + + //split + SPLIT: `|{{ " test it ".split() | join("|") }}|`, + SPLIT_2: `|{{ " test it ".split(" ") | join("|") }}|`, + SPLIT_3: `|{{ " test it ".split(" ", 4) | join("|") }}|`, + SPLIT_4: `|{{ " 1 2 3 ".split() | tojson }}|{{ "babbaccabbb".split("b") | tojson }}|{{ "babbaccabbb".split("b", 2) | tojson }}|`, + SPLIT_5: `|{{ " 1 2 3 4 5 ".split(none, 0) | join(",") }}|{{ " 1 2 3 4 5 ".split(none, 3) | join(",") }}|{{ " 1 2 3 4 5 ".split(" ", 0) | join(",") }}|{{ " 1 2 3 4 5 ".split(" ", 3) | join(",") }}|{{ " 1 2 3 4 5 ".split(" ", 10) | join(",") }}|`, }; const TEST_PARSED = { @@ -678,6 +686,25 @@ const TEST_PARSED = { { value: "y", type: "Identifier" }, { value: "}}", type: "CloseExpression" }, ], + VARIABLES_2: [ + { value: "{%", type: "OpenStatement" }, + { value: "set", type: "Set" }, + { value: "x", type: "Identifier" }, + { value: "=", type: "Equals" }, + { value: "Hello", type: "StringLiteral" }, + { value: ".", type: "Dot" }, + { value: "split", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "el", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "[", type: "OpenSquareBracket" }, + { value: "-1", type: "NumericLiteral" }, + { value: "]", type: "CloseSquareBracket" }, + { value: "%}", type: "CloseStatement" }, + { value: "{{", type: "OpenExpression" }, + { value: "x", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + ], // Numbers NUMBERS: [ @@ -2798,6 +2825,178 @@ const TEST_PARSED = { { value: ")", type: "CloseParen" }, { value: "}}", type: "CloseExpression" }, ], + SPLIT: [ + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: " test it ", type: "StringLiteral" }, + { value: ".", type: "Dot" }, + { value: "split", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: ")", type: "CloseParen" }, + { value: "|", type: "Pipe" }, + { value: "join", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "|", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + ], + SPLIT_2: [ + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: " test it ", type: "StringLiteral" }, + { value: ".", type: "Dot" }, + { value: "split", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: " ", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "|", type: "Pipe" }, + { value: "join", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "|", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + ], + SPLIT_3: [ + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: " test it ", type: "StringLiteral" }, + { value: ".", type: "Dot" }, + { value: "split", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: " ", type: "StringLiteral" }, + { value: ",", type: "Comma" }, + { value: "4", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "|", type: "Pipe" }, + { value: "join", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "|", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + ], + SPLIT_4: [ + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: " 1 2 3 ", type: "StringLiteral" }, + { value: ".", type: "Dot" }, + { value: "split", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: ")", type: "CloseParen" }, + { value: "|", type: "Pipe" }, + { value: "tojson", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "babbaccabbb", type: "StringLiteral" }, + { value: ".", type: "Dot" }, + { value: "split", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "b", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "|", type: "Pipe" }, + { value: "tojson", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "babbaccabbb", type: "StringLiteral" }, + { value: ".", type: "Dot" }, + { value: "split", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "b", type: "StringLiteral" }, + { value: ",", type: "Comma" }, + { value: "2", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "|", type: "Pipe" }, + { value: "tojson", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + ], + SPLIT_5: [ + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: " 1 2 3 4 5 ", type: "StringLiteral" }, + { value: ".", type: "Dot" }, + { value: "split", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "none", type: "NullLiteral" }, + { value: ",", type: "Comma" }, + { value: "0", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "|", type: "Pipe" }, + { value: "join", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: ",", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: " 1 2 3 4 5 ", type: "StringLiteral" }, + { value: ".", type: "Dot" }, + { value: "split", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "none", type: "NullLiteral" }, + { value: ",", type: "Comma" }, + { value: "3", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "|", type: "Pipe" }, + { value: "join", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: ",", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: " 1 2 3 4 5 ", type: "StringLiteral" }, + { value: ".", type: "Dot" }, + { value: "split", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: " ", type: "StringLiteral" }, + { value: ",", type: "Comma" }, + { value: "0", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "|", type: "Pipe" }, + { value: "join", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: ",", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: " 1 2 3 4 5 ", type: "StringLiteral" }, + { value: ".", type: "Dot" }, + { value: "split", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: " ", type: "StringLiteral" }, + { value: ",", type: "Comma" }, + { value: "3", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "|", type: "Pipe" }, + { value: "join", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: ",", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: " 1 2 3 4 5 ", type: "StringLiteral" }, + { value: ".", type: "Dot" }, + { value: "split", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: " ", type: "StringLiteral" }, + { value: ",", type: "Comma" }, + { value: "10", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "|", type: "Pipe" }, + { value: "join", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: ",", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + ], }; const TEST_CONTEXT = { @@ -2838,6 +3037,7 @@ const TEST_CONTEXT = { // Set variables VARIABLES: {}, + VARIABLES_2: {}, // Numbers NUMBERS: { @@ -3055,9 +3255,16 @@ const TEST_CONTEXT = { MACROS_1: {}, MACROS_2: {}, - //STRIP + // Strip RSTRIP: {}, LSTRIP: {}, + + // Split + SPLIT: {}, + SPLIT_2: {}, + SPLIT_3: {}, + SPLIT_4: {}, + SPLIT_5: {}, }; const EXPECTED_OUTPUTS = { @@ -3092,6 +3299,7 @@ const EXPECTED_OUTPUTS = { // Set variables VARIABLES: "Hello World", + VARIABLES_2: "lo", // Numbers NUMBERS: "|5|-5|2|-8|", @@ -3209,11 +3417,18 @@ const EXPECTED_OUTPUTS = { // RSTRIP/LSTRIP RSTRIP: ` test it`, LSTRIP: `test it `, + + // Split + SPLIT: `|test|it|`, + SPLIT_2: `||||test|it|||`, + SPLIT_3: `||||test|it |`, + SPLIT_4: `|["1", "2", "3"]|["", "a", "", "acca", "", "", ""]|["", "a", "baccabbb"]|`, + SPLIT_5: `|1 2 3 4 5 |1,2,3,4 5 | 1 2 3 4 5 |,1,2,3 4 5 |,1,2,3,4,5,|`, }; describe("Templates", () => { describe("Lexing", () => { - it("should tokenize an input string", () => { + describe("should tokenize an input string", () => { for (const [name, text] of Object.entries(TEST_STRINGS)) { const tokens = tokenize(text); @@ -3221,11 +3436,12 @@ describe("Templates", () => { throw new Error(`Test case "${name}" not found`); } - if (tokens.length !== TEST_PARSED[name].length) { - console.log(tokens); - } - // console.log(tokens); - expect(tokens).toMatchObject(TEST_PARSED[name]); + it(name, () => { + if (tokens.length !== TEST_PARSED[name].length) { + console.error(tokens); + } + expect(tokens).toMatchObject(TEST_PARSED[name]); + }); } }); @@ -3233,35 +3449,28 @@ describe("Templates", () => { }); describe("Parsing and intepretation", () => { - const AST_CACHE = new Map(); - it("should generate an AST", () => { - // NOTE: In this test case, we just check that no error occurs + describe("should interpret an AST", () => { for (const [name, text] of Object.entries(TEST_PARSED)) { const ast = parse(text); - AST_CACHE.set(name, ast); - } - }); - - it("should interpret an AST", () => { - for (const [name, ast] of AST_CACHE.entries()) { if (TEST_CONTEXT[name] === undefined || EXPECTED_OUTPUTS[name] === undefined) { console.warn(`Skipping test case "${name}" due to missing context or expected output`); continue; } - - const env = new Environment(); - // Declare global variables - env.set("false", false); - env.set("true", true); - - // Add user-defined variables - for (const [key, value] of Object.entries(TEST_CONTEXT[name])) { - env.set(key, value); - } - - const interpreter = new Interpreter(env); - const result = interpreter.run(ast); - expect(result.value).toEqual(EXPECTED_OUTPUTS[name]); + it(name, () => { + const env = new Environment(); + // Declare global variables + env.set("false", false); + env.set("true", true); + + // Add user-defined variables + for (const [key, value] of Object.entries(TEST_CONTEXT[name])) { + env.set(key, value); + } + + const interpreter = new Interpreter(env); + const result = interpreter.run(ast); + expect(result.value).toEqual(EXPECTED_OUTPUTS[name]); + }); } }); });