From a970c92250942eb143e105fe16d72ff71a9277d7 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Tue, 21 May 2024 13:07:04 +0200 Subject: [PATCH 01/13] Add support for basic macros See https://jinja.palletsprojects.com/en/3.1.x/templates/#macros --- packages/jinja/src/ast.ts | 12 ++++++++ packages/jinja/src/lexer.ts | 4 +++ packages/jinja/src/parser.ts | 28 +++++++++++++++++ packages/jinja/src/runtime.ts | 26 ++++++++++++++++ packages/jinja/test/templates.test.js | 43 +++++++++++++++++++++++++++ 5 files changed, 113 insertions(+) diff --git a/packages/jinja/src/ast.ts b/packages/jinja/src/ast.ts index 287d7d29f..c9d885bbc 100644 --- a/packages/jinja/src/ast.ts +++ b/packages/jinja/src/ast.ts @@ -52,6 +52,18 @@ export class SetStatement extends Statement { } } +export class Macro extends Statement { + override type = "Macro"; + + constructor( + public name: Identifier, + public args: Expression[], + public body: Statement[] + ) { + super(); + } +} + /** * Expressions will result in a value at runtime (unlike statements). */ diff --git a/packages/jinja/src/lexer.ts b/packages/jinja/src/lexer.ts index 2143b984b..2db20b8d5 100644 --- a/packages/jinja/src/lexer.ts +++ b/packages/jinja/src/lexer.ts @@ -44,6 +44,8 @@ export const TOKEN_TYPES = Object.freeze({ And: "And", Or: "Or", Not: "UnaryOperator", + Macro: "Macro", + EndMacro: "EndMacro", }); export type TokenType = keyof typeof TOKEN_TYPES; @@ -65,6 +67,8 @@ const KEYWORDS = Object.freeze({ or: TOKEN_TYPES.Or, not: TOKEN_TYPES.Not, "not in": TOKEN_TYPES.NotIn, + macro: TOKEN_TYPES.Macro, + endmacro: TOKEN_TYPES.EndMacro, // Literals true: TOKEN_TYPES.BooleanLiteral, diff --git a/packages/jinja/src/parser.ts b/packages/jinja/src/parser.ts index 34f8f0cd5..6948e4717 100644 --- a/packages/jinja/src/parser.ts +++ b/packages/jinja/src/parser.ts @@ -21,6 +21,7 @@ import { SliceExpression, KeywordArgumentExpression, TupleLiteral, + Macro, } from "./ast"; /** @@ -90,6 +91,14 @@ export function parse(tokens: Token[]): Program { expect(TOKEN_TYPES.CloseStatement, "Expected %} token"); break; + case TOKEN_TYPES.Macro: + ++current; + result = parseMacroStatement(); + expect(TOKEN_TYPES.OpenStatement, "Expected {% token"); + expect(TOKEN_TYPES.EndMacro, "Expected endmacro token"); + expect(TOKEN_TYPES.CloseStatement, "Expected %} token"); + break; + case TOKEN_TYPES.For: ++current; result = parseForStatement(); @@ -173,6 +182,25 @@ export function parse(tokens: Token[]): Program { return new If(test, body, alternate); } + function parseMacroStatement(): Macro { + const name = parsePrimaryExpression(); + if (name.type !== "Identifier") { + throw new SyntaxError(`Expected identifier following macro statement`); + } + const args = parseArgs(); + expect(TOKEN_TYPES.CloseStatement, "Expected closing statement token"); + + // Body of macro + const body: Statement[] = []; + + // Keep going until we hit {% endmacro + while (not(TOKEN_TYPES.OpenStatement, TOKEN_TYPES.EndMacro)) { + body.push(parseAny()); + } + + return new Macro(name as Identifier, args, body); + } + function parseExpressionSequence(primary = false): Statement { const fn = primary ? parsePrimaryExpression : parseExpression; const expressions = [fn()]; diff --git a/packages/jinja/src/runtime.ts b/packages/jinja/src/runtime.ts index e3d3c55ad..57103b753 100644 --- a/packages/jinja/src/runtime.ts +++ b/packages/jinja/src/runtime.ts @@ -19,6 +19,7 @@ import type { KeywordArgumentExpression, ObjectLiteral, TupleLiteral, + Macro, } from "./ast"; import { slice, titleCase } from "./utils"; @@ -823,6 +824,29 @@ export class Interpreter { return new StringValue(result); } + /** + * See https://jinja.palletsprojects.com/en/3.1.x/templates/#macros for more information. + */ + private evaluateMacro(node: Macro, environment: Environment): NullValue { + environment.setVariable( + node.name.value, + new FunctionValue((args, scope) => { + const macroScope = new Environment(scope); + for (let i = 0; i < node.args.length; i++) { + if (node.args[i].type !== "Identifier") { + throw new Error("Macro arguments must be identifiers"); + } + // TODO: if it is a keyword argument, use their name here + macroScope.setVariable((node.args[i] as Identifier).value, args[i]); + } + return this.evaluateBlock(node.body, macroScope); + }) + ); + + // Macros are not evaluated immediately, so we return null + return new NullValue(); + } + evaluate(statement: Statement | undefined, environment: Environment): AnyRuntimeValue { if (statement === undefined) return new UndefinedValue(); @@ -838,6 +862,8 @@ export class Interpreter { return this.evaluateIf(statement as If, environment); case "For": return this.evaluateFor(statement as For, environment); + case "Macro": + return this.evaluateMacro(statement as Macro, environment); // Expressions case "NumericLiteral": diff --git a/packages/jinja/test/templates.test.js b/packages/jinja/test/templates.test.js index 4d192c78e..2b0df1392 100644 --- a/packages/jinja/test/templates.test.js +++ b/packages/jinja/test/templates.test.js @@ -129,6 +129,9 @@ const TEST_STRINGS = { // Array operators ARRAY_OPERATORS: `{{ ([1, 2, 3] + [4, 5, 6]) | length }}`, + + // Macros + MACROS: `{% macro hello(name) %}{{ 'Hello ' + name }}{% endmacro %}|{{ hello('Bob') }}|{{ hello('Alice') }}|`, }; const TEST_PARSED = { @@ -2296,6 +2299,40 @@ const TEST_PARSED = { { value: "length", type: "Identifier" }, { value: "}}", type: "CloseExpression" }, ], + + // Macros + MACROS: [ + { value: "{%", type: "OpenStatement" }, + { value: "macro", type: "Macro" }, + { value: "hello", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "name", type: "Identifier" }, + { value: ")", type: "CloseParen" }, + { value: "%}", type: "CloseStatement" }, + { value: "{{", type: "OpenExpression" }, + { value: "Hello ", type: "StringLiteral" }, + { value: "+", type: "AdditiveBinaryOperator" }, + { value: "name", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + { value: "{%", type: "OpenStatement" }, + { value: "endmacro", type: "EndMacro" }, + { value: "%}", type: "CloseStatement" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "hello", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "Bob", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "hello", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "Alice", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + ], }; const TEST_CONTEXT = { @@ -2485,6 +2522,9 @@ const TEST_CONTEXT = { // Array operators ARRAY_OPERATORS: {}, + + // Macros + MACROS: {}, }; const EXPECTED_OUTPUTS = { @@ -2612,6 +2652,9 @@ const EXPECTED_OUTPUTS = { // Array operators ARRAY_OPERATORS: `6`, + + // Macros + MACROS: `|Hello Bob|Hello Alice|`, }; describe("Templates", () => { From bf41e5e3934e203bf2f2a319ce919c60272e6ff3 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Tue, 21 May 2024 16:15:07 +0200 Subject: [PATCH 02/13] Support macros with keyword arguments --- packages/jinja/src/runtime.ts | 47 ++++++++++++++++++++--- packages/jinja/test/templates.test.js | 54 +++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 6 deletions(-) diff --git a/packages/jinja/src/runtime.ts b/packages/jinja/src/runtime.ts index 57103b753..49c2f9f12 100644 --- a/packages/jinja/src/runtime.ts +++ b/packages/jinja/src/runtime.ts @@ -151,6 +151,13 @@ export class ObjectValue extends RuntimeValue> { ]); } +/** + * Represents a KeywordArguments value at runtime. + */ +export class KeywordArgumentsValue extends ObjectValue { + override type = "KeywordArgumentsValue"; +} + /** * Represents an Array value at runtime. */ @@ -654,10 +661,12 @@ export class Interpreter { } } if (kwargs.size > 0) { - args.push(new ObjectValue(kwargs)); + args.push(new KeywordArgumentsValue(kwargs)); } const fn = this.evaluate(expr.callee, environment); + // console.log('FUNCTION', fn); + // console.log('ARGS', args); if (fn.type !== "FunctionValue") { throw new Error(`Cannot call something that is not a function: got ${fn.type}`); } @@ -828,16 +837,42 @@ export class Interpreter { * See https://jinja.palletsprojects.com/en/3.1.x/templates/#macros for more information. */ private evaluateMacro(node: Macro, environment: Environment): NullValue { + // console.log('=========================') + // console.dir(node, { depth : null}) + // console.log('=========================') environment.setVariable( node.name.value, new FunctionValue((args, scope) => { const macroScope = new Environment(scope); - for (let i = 0; i < node.args.length; i++) { - if (node.args[i].type !== "Identifier") { - throw new Error("Macro arguments must be identifiers"); + + args = args.slice(); // Make a copy of the arguments + + // Separate positional and keyword arguments + let kwargs; + if (args.at(-1)?.type === "KeywordArgumentsValue") { + kwargs = args.pop() as KeywordArgumentsValue; + } + + // Assign values to all arguments defined by the node + for (let i = 0; i < node.args.length; ++i) { + const nodeArg = node.args[i]; + const passedArg = args[i]; + if (nodeArg.type === "Identifier") { + const identifier = nodeArg as Identifier; + if (!passedArg) { + throw new Error(`Missing positional argument: ${identifier.value}`); + } + macroScope.setVariable(identifier.value, passedArg); + } else if (nodeArg.type === "KeywordArgumentExpression") { + const kwarg = nodeArg as KeywordArgumentExpression; + const value = + passedArg ?? // Try positional arguments first + kwargs?.value.get(kwarg.key.value) ?? // Look in user-passed kwargs + this.evaluate(kwarg.value, macroScope); // Use the default defined by the node + macroScope.setVariable(kwarg.key.value, value); + } else { + throw new Error(`Unknown argument type: ${nodeArg.type}`); } - // TODO: if it is a keyword argument, use their name here - macroScope.setVariable((node.args[i] as Identifier).value, args[i]); } return this.evaluateBlock(node.body, macroScope); }) diff --git a/packages/jinja/test/templates.test.js b/packages/jinja/test/templates.test.js index 2b0df1392..11aa2e142 100644 --- a/packages/jinja/test/templates.test.js +++ b/packages/jinja/test/templates.test.js @@ -132,6 +132,7 @@ const TEST_STRINGS = { // Macros MACROS: `{% macro hello(name) %}{{ 'Hello ' + name }}{% endmacro %}|{{ hello('Bob') }}|{{ hello('Alice') }}|`, + MACROS_1: `{% macro hello(name, suffix='.') %}{{ 'Hello ' + name + suffix }}{% endmacro %}|{{ hello('A') }}|{{ hello('B', '!') }}|{{ hello('C', suffix='?') }}|`, }; const TEST_PARSED = { @@ -2333,6 +2334,57 @@ const TEST_PARSED = { { value: "}}", type: "CloseExpression" }, { value: "|", type: "Text" }, ], + MACROS_1: [ + { value: "{%", type: "OpenStatement" }, + { value: "macro", type: "Macro" }, + { value: "hello", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "name", type: "Identifier" }, + { value: ",", type: "Comma" }, + { value: "suffix", type: "Identifier" }, + { value: "=", type: "Equals" }, + { value: ".", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "%}", type: "CloseStatement" }, + { value: "{{", type: "OpenExpression" }, + { value: "Hello ", type: "StringLiteral" }, + { value: "+", type: "AdditiveBinaryOperator" }, + { value: "name", type: "Identifier" }, + { value: "+", type: "AdditiveBinaryOperator" }, + { value: "suffix", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + { value: "{%", type: "OpenStatement" }, + { value: "endmacro", type: "EndMacro" }, + { value: "%}", type: "CloseStatement" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "hello", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "A", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "hello", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "B", type: "StringLiteral" }, + { value: ",", type: "Comma" }, + { value: "!", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "hello", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "C", type: "StringLiteral" }, + { value: ",", type: "Comma" }, + { value: "suffix", type: "Identifier" }, + { value: "=", type: "Equals" }, + { value: "?", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + ], }; const TEST_CONTEXT = { @@ -2525,6 +2577,7 @@ const TEST_CONTEXT = { // Macros MACROS: {}, + MACROS_1: {}, }; const EXPECTED_OUTPUTS = { @@ -2655,6 +2708,7 @@ const EXPECTED_OUTPUTS = { // Macros MACROS: `|Hello Bob|Hello Alice|`, + MACROS_1: `|Hello A.|Hello B!|Hello C?|`, }; describe("Templates", () => { From 3738c685f483d79712189be6196ae9634535246a Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Tue, 21 May 2024 16:31:40 +0200 Subject: [PATCH 03/13] Add another macros test --- packages/jinja/test/templates.test.js | 88 +++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/packages/jinja/test/templates.test.js b/packages/jinja/test/templates.test.js index 11aa2e142..038df37bc 100644 --- a/packages/jinja/test/templates.test.js +++ b/packages/jinja/test/templates.test.js @@ -133,6 +133,7 @@ const TEST_STRINGS = { // Macros MACROS: `{% macro hello(name) %}{{ 'Hello ' + name }}{% endmacro %}|{{ hello('Bob') }}|{{ hello('Alice') }}|`, MACROS_1: `{% macro hello(name, suffix='.') %}{{ 'Hello ' + name + suffix }}{% endmacro %}|{{ hello('A') }}|{{ hello('B', '!') }}|{{ hello('C', suffix='?') }}|`, + MACROS_2: `{% macro fn(x, y=2, z=3) %}{{ x + ',' + y + ',' + z }}{% endmacro %}|{{ fn(1) }}|{{ fn(1, 0) }}|{{ fn(1, 0, -1) }}|{{ fn(1, y=0, z=-1) }}|{{ fn(1, z=0) }}|`, }; const TEST_PARSED = { @@ -2385,6 +2386,91 @@ const TEST_PARSED = { { value: "}}", type: "CloseExpression" }, { value: "|", type: "Text" }, ], + MACROS_2: [ + { value: "{%", type: "OpenStatement" }, + { value: "macro", type: "Macro" }, + { value: "fn", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "x", type: "Identifier" }, + { value: ",", type: "Comma" }, + { value: "y", type: "Identifier" }, + { value: "=", type: "Equals" }, + { value: "2", type: "NumericLiteral" }, + { value: ",", type: "Comma" }, + { value: "z", type: "Identifier" }, + { value: "=", type: "Equals" }, + { value: "3", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "%}", type: "CloseStatement" }, + { value: "{{", type: "OpenExpression" }, + { value: "x", type: "Identifier" }, + { value: "+", type: "AdditiveBinaryOperator" }, + { value: ",", type: "StringLiteral" }, + { value: "+", type: "AdditiveBinaryOperator" }, + { value: "y", type: "Identifier" }, + { value: "+", type: "AdditiveBinaryOperator" }, + { value: ",", type: "StringLiteral" }, + { value: "+", type: "AdditiveBinaryOperator" }, + { value: "z", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + { value: "{%", type: "OpenStatement" }, + { value: "endmacro", type: "EndMacro" }, + { value: "%}", type: "CloseStatement" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "fn", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "1", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "fn", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "1", type: "NumericLiteral" }, + { value: ",", type: "Comma" }, + { value: "0", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "fn", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "1", type: "NumericLiteral" }, + { value: ",", type: "Comma" }, + { value: "0", type: "NumericLiteral" }, + { value: ",", type: "Comma" }, + { value: "-1", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "fn", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "1", type: "NumericLiteral" }, + { value: ",", type: "Comma" }, + { value: "y", type: "Identifier" }, + { value: "=", type: "Equals" }, + { value: "0", type: "NumericLiteral" }, + { value: ",", type: "Comma" }, + { value: "z", type: "Identifier" }, + { value: "=", type: "Equals" }, + { value: "-1", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "fn", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "1", type: "NumericLiteral" }, + { value: ",", type: "Comma" }, + { value: "z", type: "Identifier" }, + { value: "=", type: "Equals" }, + { value: "0", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + ], }; const TEST_CONTEXT = { @@ -2578,6 +2664,7 @@ const TEST_CONTEXT = { // Macros MACROS: {}, MACROS_1: {}, + MACROS_2: {}, }; const EXPECTED_OUTPUTS = { @@ -2709,6 +2796,7 @@ const EXPECTED_OUTPUTS = { // Macros MACROS: `|Hello Bob|Hello Alice|`, MACROS_1: `|Hello A.|Hello B!|Hello C?|`, + MACROS_2: `|1,2,3|1,0,3|1,0,-1|1,0,-1|1,2,0|`, }; describe("Templates", () => { From 227d8d83d07ec544925a9edeb72103a82ce80c56 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Tue, 21 May 2024 16:45:42 +0200 Subject: [PATCH 04/13] Cleanup --- packages/jinja/src/runtime.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/packages/jinja/src/runtime.ts b/packages/jinja/src/runtime.ts index 49c2f9f12..e91fb5cd4 100644 --- a/packages/jinja/src/runtime.ts +++ b/packages/jinja/src/runtime.ts @@ -665,8 +665,6 @@ export class Interpreter { } const fn = this.evaluate(expr.callee, environment); - // console.log('FUNCTION', fn); - // console.log('ARGS', args); if (fn.type !== "FunctionValue") { throw new Error(`Cannot call something that is not a function: got ${fn.type}`); } @@ -837,9 +835,6 @@ export class Interpreter { * See https://jinja.palletsprojects.com/en/3.1.x/templates/#macros for more information. */ private evaluateMacro(node: Macro, environment: Environment): NullValue { - // console.log('=========================') - // console.dir(node, { depth : null}) - // console.log('=========================') environment.setVariable( node.name.value, new FunctionValue((args, scope) => { From 655fc1b2504e46dafc531dd77612c7722b1b6dbb Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Sun, 16 Jun 2024 01:11:03 +0200 Subject: [PATCH 05/13] Add `tojson` filter for objects --- packages/jinja/src/runtime.ts | 30 ++++++++++++++++++++++++++ packages/jinja/test/templates.test.js | 31 +++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/packages/jinja/src/runtime.ts b/packages/jinja/src/runtime.ts index e91fb5cd4..ad0e710e2 100644 --- a/packages/jinja/src/runtime.ts +++ b/packages/jinja/src/runtime.ts @@ -533,6 +533,8 @@ export class Interpreter { ); case "length": return new NumericValue(operand.value.size); + case "tojson": + return new StringValue(toJSON(operand)); default: throw new Error(`Unknown ObjectValue filter: ${filter.value}`); } @@ -950,6 +952,8 @@ function convertToRuntimeValues(input: unknown): AnyRuntimeValue { return new StringValue(input); case "boolean": return new BooleanValue(input); + case "undefined": + return new UndefinedValue(); case "object": if (input === null) { return new NullValue(); @@ -972,3 +976,29 @@ function convertToRuntimeValues(input: unknown): AnyRuntimeValue { throw new Error(`Cannot convert to runtime value: ${input}`); } } + +/** + * Helper function to convert runtime values to JSON + * @param {AnyRuntimeValue} input The runtime value to convert + * @returns {string} JSON representation of the input + */ +function toJSON(input: AnyRuntimeValue): string { + switch (input.type) { + case "NullValue": + case "UndefinedValue": // JSON.stringify(undefined) -> undefined + return "null"; + case "NumericValue": + case "StringValue": + case "BooleanValue": + return JSON.stringify(input.value); + case "ArrayValue": + return `[${(input as ArrayValue).value.map(toJSON).join(", ")}]`; + case "ObjectValue": + return `{${Array.from((input as ObjectValue).value.entries()) + .map(([key, value]) => `"${key}": ${toJSON(value)}`) + .join(", ")}}`; + default: + // e.g., FunctionValue + throw new Error(`Cannot convert to JSON: ${input.type}`); + } +} diff --git a/packages/jinja/test/templates.test.js b/packages/jinja/test/templates.test.js index 038df37bc..94987c034 100644 --- a/packages/jinja/test/templates.test.js +++ b/packages/jinja/test/templates.test.js @@ -78,6 +78,7 @@ const TEST_STRINGS = { FILTER_OPERATOR_4: `{{ items | selectattr('key') | length }}`, FILTER_OPERATOR_5: `{{ messages | selectattr('role', 'equalto', 'system') | length }}`, FILTER_OPERATOR_6: `|{{ obj | length }}|{{ (obj | items)[1:] | length }}|`, + FILTER_OPERATOR_7: `{{ obj | tojson }}`, // Logical operators between non-Booleans BOOLEAN_NUMERICAL: `|{{ 1 and 2 }}|{{ 1 and 0 }}|{{ 0 and 1 }}|{{ 0 and 0 }}|{{ 1 or 2 }}|{{ 1 or 0 }}|{{ 0 or 1 }}|{{ 0 or 0 }}|{{ not 1 }}|{{ not 0 }}|`, @@ -1413,6 +1414,13 @@ const TEST_PARSED = { { value: "}}", type: "CloseExpression" }, { value: "|", type: "Text" }, ], + FILTER_OPERATOR_7: [ + { value: "{{", type: "OpenExpression" }, + { value: "obj", type: "Identifier" }, + { value: "|", type: "Pipe" }, + { value: "tojson", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + ], // Logical operators between non-Booleans BOOLEAN_NUMERICAL: [ @@ -2593,6 +2601,28 @@ const TEST_CONTEXT = { FILTER_OPERATOR_6: { obj: { a: 1, b: 2, c: 3 }, }, + FILTER_OPERATOR_7: { + obj: { + // Test basic types + string: "world", + number: 5, + boolean: true, + null: null, + // undefined: undefined, + + // Test arrays + array: [1, 2, 3], + + // Test objects + object: { key: "value" }, + + // Test formatting + special: `"',:[]{}#&*;=?/\\\`~@|!^%()-_+<>`, + + // Test unicode (https://github.com/huggingface/transformers/pull/31041) + unicode: { "안녕?": "🤗" }, + }, + }, // Logical operators between non-Booleans BOOLEAN_NUMERICAL: {}, @@ -2741,6 +2771,7 @@ const EXPECTED_OUTPUTS = { FILTER_OPERATOR_4: `2`, FILTER_OPERATOR_5: `1`, FILTER_OPERATOR_6: `|3|2|`, + FILTER_OPERATOR_7: `{"string": "world", "number": 5, "boolean": true, "null": null, "array": [1, 2, 3], "object": {"key": "value"}, "special": "\\"',:[]{}#&*;=?/\\\\\`~@|!^%()-_+<>", "unicode": {"안녕?": "🤗"}}`, // Logical operators between non-Booleans BOOLEAN_NUMERICAL: `|2|0|0|0|1|1|1|0|false|true|`, From e0d36757e0eb72815f517c2948e9ff66f785fcfe Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Sun, 16 Jun 2024 01:18:54 +0200 Subject: [PATCH 06/13] Support arbitrary `tojson` types --- packages/jinja/src/runtime.ts | 6 ++-- packages/jinja/test/templates.test.js | 42 +++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/packages/jinja/src/runtime.ts b/packages/jinja/src/runtime.ts index ad0e710e2..fde608070 100644 --- a/packages/jinja/src/runtime.ts +++ b/packages/jinja/src/runtime.ts @@ -470,6 +470,10 @@ export class Interpreter { if (node.filter.type === "Identifier") { const filter = node.filter as Identifier; + if (filter.value === "tojson") { + return new StringValue(toJSON(operand)); + } + if (operand instanceof ArrayValue) { switch (filter.value) { case "list": @@ -533,8 +537,6 @@ export class Interpreter { ); case "length": return new NumericValue(operand.value.size); - case "tojson": - return new StringValue(toJSON(operand)); default: throw new Error(`Unknown ObjectValue filter: ${filter.value}`); } diff --git a/packages/jinja/test/templates.test.js b/packages/jinja/test/templates.test.js index 94987c034..aee56b679 100644 --- a/packages/jinja/test/templates.test.js +++ b/packages/jinja/test/templates.test.js @@ -78,7 +78,7 @@ const TEST_STRINGS = { FILTER_OPERATOR_4: `{{ items | selectattr('key') | length }}`, FILTER_OPERATOR_5: `{{ messages | selectattr('role', 'equalto', 'system') | length }}`, FILTER_OPERATOR_6: `|{{ obj | length }}|{{ (obj | items)[1:] | length }}|`, - FILTER_OPERATOR_7: `{{ obj | tojson }}`, + FILTER_OPERATOR_7: `|{{ obj | tojson }}|{{ "test" | tojson }}|{{ 1 | tojson }}|{{ true | tojson }}|{{ null | tojson }}|{{ [1,2,3] | tojson }}|`, // Logical operators between non-Booleans BOOLEAN_NUMERICAL: `|{{ 1 and 2 }}|{{ 1 and 0 }}|{{ 0 and 1 }}|{{ 0 and 0 }}|{{ 1 or 2 }}|{{ 1 or 0 }}|{{ 0 or 1 }}|{{ 0 or 0 }}|{{ not 1 }}|{{ not 0 }}|`, @@ -1415,11 +1415,49 @@ const TEST_PARSED = { { value: "|", type: "Text" }, ], FILTER_OPERATOR_7: [ + { value: "|", type: "Text" }, { value: "{{", type: "OpenExpression" }, { value: "obj", type: "Identifier" }, { value: "|", type: "Pipe" }, { value: "tojson", type: "Identifier" }, { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "test", type: "StringLiteral" }, + { value: "|", type: "Pipe" }, + { value: "tojson", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "1", type: "NumericLiteral" }, + { value: "|", type: "Pipe" }, + { value: "tojson", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "true", type: "BooleanLiteral" }, + { value: "|", type: "Pipe" }, + { value: "tojson", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "null", type: "Identifier" }, + { value: "|", type: "Pipe" }, + { value: "tojson", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "[", type: "OpenSquareBracket" }, + { value: "1", type: "NumericLiteral" }, + { value: ",", type: "Comma" }, + { value: "2", type: "NumericLiteral" }, + { value: ",", type: "Comma" }, + { value: "3", type: "NumericLiteral" }, + { value: "]", type: "CloseSquareBracket" }, + { value: "|", type: "Pipe" }, + { value: "tojson", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, ], // Logical operators between non-Booleans @@ -2771,7 +2809,7 @@ const EXPECTED_OUTPUTS = { FILTER_OPERATOR_4: `2`, FILTER_OPERATOR_5: `1`, FILTER_OPERATOR_6: `|3|2|`, - FILTER_OPERATOR_7: `{"string": "world", "number": 5, "boolean": true, "null": null, "array": [1, 2, 3], "object": {"key": "value"}, "special": "\\"',:[]{}#&*;=?/\\\\\`~@|!^%()-_+<>", "unicode": {"안녕?": "🤗"}}`, + FILTER_OPERATOR_7: `|{"string": "world", "number": 5, "boolean": true, "null": null, "array": [1, 2, 3], "object": {"key": "value"}, "special": "\\"',:[]{}#&*;=?/\\\\\`~@|!^%()-_+<>", "unicode": {"안녕?": "🤗"}}|"test"|1|true|null|[1, 2, 3]|`, // Logical operators between non-Booleans BOOLEAN_NUMERICAL: `|2|0|0|0|1|1|1|0|false|true|`, From 82a02ff6154aa991a1accd283268355a5ecbd49e Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Sun, 16 Jun 2024 19:46:29 +0200 Subject: [PATCH 07/13] Add support for mapping list of objects to attributes --- packages/jinja/src/runtime.ts | 29 +++++++++++++++++++++++++++ packages/jinja/test/templates.test.js | 21 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/packages/jinja/src/runtime.ts b/packages/jinja/src/runtime.ts index fde608070..17525c9a9 100644 --- a/packages/jinja/src/runtime.ts +++ b/packages/jinja/src/runtime.ts @@ -276,6 +276,7 @@ export class Environment { ["defined", (operand) => operand.type !== "UndefinedValue"], ["undefined", (operand) => operand.type === "UndefinedValue"], ["equalto", (a, b) => a.value === b.value], + ["eq", (a, b) => a.value === b.value], ]); constructor(public parent?: Environment) {} @@ -586,6 +587,34 @@ export class Interpreter { return new ArrayValue(filtered); } + case "map": { + // Accumulate kwargs + const kwargs = new Map(); + for (const argument of filter.args) { + // TODO: Lazy evaluation of arguments + if (argument.type === "KeywordArgumentExpression") { + const kwarg = argument as KeywordArgumentExpression; + kwargs.set(kwarg.key.value, this.evaluate(kwarg.value, environment)); + } + } + if (kwargs.has("attribute")) { + // Mapping on attributes + const attr = kwargs.get("attribute"); + if (!(attr instanceof StringValue)) { + throw new Error("attribute must be a string"); + } + const defaultValue = kwargs.get("default"); + const mapped = operand.value.map((item) => { + if (!(item instanceof ObjectValue)) { + throw new Error("items in map must be an object"); + } + return item.value.get(attr.value) ?? defaultValue ?? new UndefinedValue(); + }); + return new ArrayValue(mapped); + } else { + throw new Error("`map` expressions without `attribute` set are not currently supported."); + } + } } throw new Error(`Unknown ArrayValue filter: ${filterName}`); } else { diff --git a/packages/jinja/test/templates.test.js b/packages/jinja/test/templates.test.js index aee56b679..4259e68e3 100644 --- a/packages/jinja/test/templates.test.js +++ b/packages/jinja/test/templates.test.js @@ -79,6 +79,7 @@ const TEST_STRINGS = { FILTER_OPERATOR_5: `{{ messages | selectattr('role', 'equalto', 'system') | length }}`, FILTER_OPERATOR_6: `|{{ obj | length }}|{{ (obj | items)[1:] | length }}|`, FILTER_OPERATOR_7: `|{{ obj | tojson }}|{{ "test" | tojson }}|{{ 1 | tojson }}|{{ true | tojson }}|{{ null | tojson }}|{{ [1,2,3] | tojson }}|`, + FILTER_OPERATOR_8: `{{ data | map(attribute='val') | list | tojson }}`, // Logical operators between non-Booleans BOOLEAN_NUMERICAL: `|{{ 1 and 2 }}|{{ 1 and 0 }}|{{ 0 and 1 }}|{{ 0 and 0 }}|{{ 1 or 2 }}|{{ 1 or 0 }}|{{ 0 or 1 }}|{{ 0 or 0 }}|{{ not 1 }}|{{ not 0 }}|`, @@ -1459,6 +1460,22 @@ const TEST_PARSED = { { value: "}}", type: "CloseExpression" }, { value: "|", type: "Text" }, ], + FILTER_OPERATOR_8: [ + { value: "{{", type: "OpenExpression" }, + { value: "data", type: "Identifier" }, + { value: "|", type: "Pipe" }, + { value: "map", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "attribute", type: "Identifier" }, + { value: "=", type: "Equals" }, + { value: "val", type: "StringLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "|", type: "Pipe" }, + { value: "list", type: "Identifier" }, + { value: "|", type: "Pipe" }, + { value: "tojson", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + ], // Logical operators between non-Booleans BOOLEAN_NUMERICAL: [ @@ -2661,6 +2678,9 @@ const TEST_CONTEXT = { unicode: { "안녕?": "🤗" }, }, }, + FILTER_OPERATOR_8: { + data: [{ val: 1 }, { val: 2 }, { val: 3 }], + }, // Logical operators between non-Booleans BOOLEAN_NUMERICAL: {}, @@ -2810,6 +2830,7 @@ const EXPECTED_OUTPUTS = { FILTER_OPERATOR_5: `1`, FILTER_OPERATOR_6: `|3|2|`, FILTER_OPERATOR_7: `|{"string": "world", "number": 5, "boolean": true, "null": null, "array": [1, 2, 3], "object": {"key": "value"}, "special": "\\"',:[]{}#&*;=?/\\\\\`~@|!^%()-_+<>", "unicode": {"안녕?": "🤗"}}|"test"|1|true|null|[1, 2, 3]|`, + FILTER_OPERATOR_8: `[1, 2, 3]`, // Logical operators between non-Booleans BOOLEAN_NUMERICAL: `|2|0|0|0|1|1|1|0|false|true|`, From 029db6baefd46bedd8900dd2ba4aa1eb03a9ef3d Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Sun, 16 Jun 2024 19:51:49 +0200 Subject: [PATCH 08/13] Add e2e test for attribute mapping --- packages/jinja/test/e2e.test.js | 40 +++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/packages/jinja/test/e2e.test.js b/packages/jinja/test/e2e.test.js index 58e10f35e..4b716dd5b 100644 --- a/packages/jinja/test/e2e.test.js +++ b/packages/jinja/test/e2e.test.js @@ -76,6 +76,36 @@ const EXAMPLE_FUNCTION_CALLING_WITH_SYSTEM = [ { role: "user", content: "Hi, can you tell me the current stock price of AAPL?" }, ]; +// Adapted from https://huggingface.co/CISCai/Mistral-7B-Instruct-v0.3-SOTA-GGUF +const EXAMPLE_CHAT_WITH_TOOLS = [ + { + role: "user", + content: "What's the weather like in Oslo and Stockholm?", + }, +]; +const EXAMPLE_TOOLS = [ + { + type: "function", + function: { + name: "get_current_weather", + description: "Get the current weather in a given location", + parameters: { + type: "object", + properties: { + location: { + type: "string", + description: "The city and state, e.g. San Francisco, CA", + }, + unit: { + type: "string", + enum: ["celsius", "fahrenheit"], + }, + }, + required: ["location"], + }, + }, + }, +]; /** * Defined in https://github.com/huggingface/transformers * Keys correspond to `model_type` in the transformers repo. @@ -416,6 +446,16 @@ const TEST_CUSTOM_TEMPLATES = Object.freeze({ "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Whats the biggest penguin in the world?<|END_OF_TURN_TOKEN|>" + '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write \'Action:\' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user\'s last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example:\n```json\n[\n {\n "tool_name": title of the tool in the specification,\n "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters\n }\n]```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>', }, + "CISCai/Mistral-7B-Instruct-v0.3-SOTA-GGUF": { + chat_template: `{{ bos_token }}{% set ns = namespace(lastuser=-1, system=false, functions=false) %}{% if tools %}{% for message in messages %}{% if message['role'] == 'user' %}{% set ns.lastuser = loop.index0 %}{% elif message['role'] == 'system' %}{% set ns.system = message['content'] %}{% endif %}{% endfor %}{% set ns.functions = tools|selectattr('type','eq','function')|map(attribute='function')|list|tojson %}{% endif %}{% for message in messages %}{% if message['role'] == 'user' %}{% if loop.index0 == ns.lastuser and ns.functions %}{{ '[AVAILABLE_TOOLS] ' }}{{ ns.functions }}{{ '[/AVAILABLE_TOOLS]' }}{% endif %}{{ '[INST] ' }}{% if loop.index0 == ns.lastuser and ns.system %}{{ ns.system + ' ' }}{% endif %}{{ message['content'] }}{{ '[/INST]' }}{% elif message['role'] == 'tool' %}{{ '[TOOL_RESULTS] ' }}{{ dict(call_id=message['tool_call_id'], content=message['content'])|tojson }}{{ '[/TOOL_RESULTS]' }}{% elif message['role'] == 'assistant' %}{% if message['tool_calls'] %}{{ '[TOOL_CALLS] [' }}{% for call in message['tool_calls'] %}{% if call['type'] == 'function' %}{{ dict(id=call['id'], name=call['function']['name'], arguments=call['function']['arguments'])|tojson }}{% endif %}{% if not loop.last %}{{ ', ' }}{% endif %}{% endfor %}{{ ']' }}{% else %}{{ message['content'] }}{% endif %}{{ eos_token }}{% endif %}{% endfor %}`, + data: { + messages: EXAMPLE_CHAT_WITH_TOOLS, + tools: EXAMPLE_TOOLS, + bos_token: "", + eos_token: "", + }, + target: `[AVAILABLE_TOOLS] [{"name": "get_current_weather", "description": "Get the current weather in a given location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}}, "required": ["location"]}}][/AVAILABLE_TOOLS][INST] What's the weather like in Oslo and Stockholm?[/INST]`, + }, }); describe("End-to-end tests", () => { From e7800d7830d8c470813a9fba9ec215fe1361c1e2 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Mon, 17 Jun 2024 15:37:17 +0200 Subject: [PATCH 09/13] Support capitalized booleans --- packages/jinja/src/lexer.ts | 7 +++++++ packages/jinja/src/parser.ts | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/jinja/src/lexer.ts b/packages/jinja/src/lexer.ts index 2db20b8d5..fd8ea58c1 100644 --- a/packages/jinja/src/lexer.ts +++ b/packages/jinja/src/lexer.ts @@ -73,6 +73,13 @@ const KEYWORDS = Object.freeze({ // Literals true: TOKEN_TYPES.BooleanLiteral, false: TOKEN_TYPES.BooleanLiteral, + + // NOTE: According to the Jinja docs: The special constants true, false, and none are indeed lowercase. + // Because that caused confusion in the past, (True used to expand to an undefined variable that was considered false), + // all three can now also be written in title case (True, False, and None). However, for consistency, (all Jinja identifiers are lowercase) + // you should use the lowercase versions. + True: TOKEN_TYPES.BooleanLiteral, + False: TOKEN_TYPES.BooleanLiteral, }); /** diff --git a/packages/jinja/src/parser.ts b/packages/jinja/src/parser.ts index 6948e4717..6e5529655 100644 --- a/packages/jinja/src/parser.ts +++ b/packages/jinja/src/parser.ts @@ -505,7 +505,7 @@ export function parse(tokens: Token[]): Program { return new StringLiteral(token.value); case TOKEN_TYPES.BooleanLiteral: ++current; - return new BooleanLiteral(token.value === "true"); + return new BooleanLiteral(token.value.toLowerCase() === "true"); case TOKEN_TYPES.Identifier: ++current; return new Identifier(token.value); From db6eeec342e2c3a959b6e6906cde6c569bb29ad9 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Mon, 17 Jun 2024 15:55:27 +0200 Subject: [PATCH 10/13] Add support for Command-R chat template --- packages/jinja/src/runtime.ts | 90 +++++++++++++++++-- packages/jinja/test/e2e.test.js | 63 ++++++++++++++ packages/jinja/test/templates.test.js | 120 +++++++++++++++++++++++++- 3 files changed, 265 insertions(+), 8 deletions(-) diff --git a/packages/jinja/src/runtime.ts b/packages/jinja/src/runtime.ts index 17525c9a9..24f08c7f4 100644 --- a/packages/jinja/src/runtime.ts +++ b/packages/jinja/src/runtime.ts @@ -520,6 +520,16 @@ export class Interpreter { return new StringValue(operand.value.charAt(0).toUpperCase() + operand.value.slice(1)); case "trim": return new StringValue(operand.value.trim()); + case "indent": + return new StringValue( + operand.value + .split("\n") + .map((x, i) => + // By default, don't indent the first line or empty lines + i === 0 || x.length === 0 ? x : " " + x + ) + .join("\n") + ); default: throw new Error(`Unknown StringValue filter: ${filter.value}`); } @@ -551,6 +561,20 @@ export class Interpreter { } const filterName = (filter.callee as Identifier).value; + if (filterName === "tojson") { + // Accumulate kwargs + const kwargs = new Map(); + for (const argument of filter.args) { + // TODO: Lazy evaluation of arguments + if (argument.type === "KeywordArgumentExpression") { + const kwarg = argument as KeywordArgumentExpression; + kwargs.set(kwarg.key.value, this.evaluate(kwarg.value, environment)); + } + } + const indent = kwargs.get("indent") ?? new NullValue(); + return new StringValue(toJSON(operand, indent.value)); + } + if (operand instanceof ArrayValue) { switch (filterName) { case "selectattr": { @@ -617,6 +641,43 @@ export class Interpreter { } } throw new Error(`Unknown ArrayValue filter: ${filterName}`); + } else if (operand instanceof StringValue) { + switch (filterName) { + case "indent": { + // https://jinja.palletsprojects.com/en/3.1.x/templates/#jinja-filters.indent + // Return a copy of the string with each line indented by 4 spaces. The first line and blank lines are not indented by default. + // Parameters: + // - width: Number of spaces, or a string, to indent by. + // - first: Don't skip indenting the first line. + // - blank: Don't skip indenting empty lines. + const args = []; + const kwargs = new Map(); + for (const argument of filter.args) { + // TODO: Lazy evaluation of arguments + if (argument.type === "KeywordArgumentExpression") { + const kwarg = argument as KeywordArgumentExpression; + kwargs.set(kwarg.key.value, this.evaluate(kwarg.value, environment)); + } else { + if (kwargs.size > 0) { + throw new Error("Positional arguments must come before keyword arguments"); + } + args.push(this.evaluate(argument, environment)); + } + } + + const width = args.at(0) ?? kwargs.get("width") ?? new NumericValue(4); + const first = args.at(1) ?? kwargs.get("first") ?? new BooleanValue(false); + const blank = args.at(2) ?? kwargs.get("blank") ?? new BooleanValue(false); + + const lines = operand.value.split("\n"); + const indent = " ".repeat(width.value); + const indented = lines.map((x, i) => + (!first.value && i === 0) || (!blank.value && x.length === 0) ? x : indent + x + ); + return new StringValue(indented.join("\n")); + } + } + throw new Error(`Unknown StringValue filter: ${filterName}`); } else { throw new Error(`Cannot apply filter "${filterName}" to type: ${operand.type}`); } @@ -1011,9 +1072,12 @@ function convertToRuntimeValues(input: unknown): AnyRuntimeValue { /** * Helper function to convert runtime values to JSON * @param {AnyRuntimeValue} input The runtime value to convert + * @param {number|null} [indent] The number of spaces to indent, or null for no indentation + * @param {number} [depth] The current depth of the object * @returns {string} JSON representation of the input */ -function toJSON(input: AnyRuntimeValue): string { +function toJSON(input: AnyRuntimeValue, indent?: number | null, depth?: number): string { + const currentDepth = depth ?? 0; switch (input.type) { case "NullValue": case "UndefinedValue": // JSON.stringify(undefined) -> undefined @@ -1023,11 +1087,25 @@ function toJSON(input: AnyRuntimeValue): string { case "BooleanValue": return JSON.stringify(input.value); case "ArrayValue": - return `[${(input as ArrayValue).value.map(toJSON).join(", ")}]`; - case "ObjectValue": - return `{${Array.from((input as ObjectValue).value.entries()) - .map(([key, value]) => `"${key}": ${toJSON(value)}`) - .join(", ")}}`; + case "ObjectValue": { + const indentValue = indent ? " ".repeat(indent) : ""; + const basePadding = "\n" + indentValue.repeat(currentDepth); + const childrenPadding = basePadding + indentValue; // Depth + 1 + + if (input.type === "ArrayValue") { + const core = (input as ArrayValue).value.map((x) => toJSON(x, indent, currentDepth + 1)); + return indent + ? `[${childrenPadding}${core.join(`,${childrenPadding}`)}${basePadding}]` + : `[${core.join(", ")}]`; + } else { + // ObjectValue + const core = Array.from((input as ObjectValue).value.entries()).map(([key, value]) => { + const v = `"${key}": ${toJSON(value, indent, currentDepth + 1)}`; + return indent ? `${childrenPadding}${v}` : v; + }); + return indent ? `{${core.join(",")}${basePadding}}` : `{${core.join(", ")}}`; + } + } default: // e.g., FunctionValue throw new Error(`Cannot convert to JSON: ${input.type}`); diff --git a/packages/jinja/test/e2e.test.js b/packages/jinja/test/e2e.test.js index 4b716dd5b..3ee26ad04 100644 --- a/packages/jinja/test/e2e.test.js +++ b/packages/jinja/test/e2e.test.js @@ -446,6 +446,69 @@ const TEST_CUSTOM_TEMPLATES = Object.freeze({ "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Whats the biggest penguin in the world?<|END_OF_TURN_TOKEN|>" + '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write \'Action:\' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user\'s last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example:\n```json\n[\n {\n "tool_name": title of the tool in the specification,\n "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters\n }\n]```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>', }, + "CohereForAI/c4ai-command-r-v01 (JSON Schema)": { + chat_template: + '\n{%- macro json_to_python_type(json_spec) %}\n{%- set basic_type_map = {\n "string": "str",\n "number": "float",\n "integer": "int",\n "boolean": "bool"\n} %}\n\n{%- if basic_type_map[json_spec.type] is defined %}\n {{- basic_type_map[json_spec.type] }}\n{%- elif json_spec.type == "array" %}\n {{- "List[" + json_to_python_type(json_spec.items) + "]"}}\n{%- elif json_spec.type == "object" %}\n {{- "Dict[str, " + json_to_python_type(json_spec.additionalProperties) + \']\'}}\n{%- elif json_spec.type is iterable %}\n {{- "Union[" }}\n {%- for t in json_spec.type %}\n {{- json_to_python_type({"type": t}) }}\n {%- if not loop.last %}\n {{- "," }} \n {%- endif %}\n {%- endfor %}\n {{- "]" }}\n{%- else %}\n {{- "Any" }}\n{%- endif %}\n{%- endmacro %}\n' + + "\n{%- macro old_tool_parser(tools) %}\n{%- for tool in tools %}\n {%- if loop.index0 != 0 %}\n {{- '\\n\\n' }}\n {%- endif %}\n {{- '```python\\ndef ' + tool.name + '(' }}\n {%- for param_name, param_fields in tool.parameter_definitions.items() %}\n {%- if loop.index0 != 0 %}\n {{- ', '}}\n {%- endif %}\n {{- param_name + ': ' }}\n {%- if not param_fields.required %}\n {{- 'Optional[' + param_fields.type + '] = None'}}\n {%- else %}\n {{- param_fields.type }}\n {%- endif %}\n {%- endfor %}\n {{- ') -> List[Dict]:\\n \"\"\"'}}\n {{- tool.description }}\n {%- if tool.parameter_definitions|length != 0 %}\n {{- '\\n\\n Args:\\n '}}\n {%- for param_name, param_fields in tool.parameter_definitions.items() %}\n {%- if loop.index0 != 0 %}\n {{- '\\n ' }}\n {%- endif %}\n {{- param_name + ' ('}}\n {%- if not param_fields.required %}\n {{- 'Optional[' + param_fields.type + ']'}}\n {%- else %}\n {{- param_fields.type }}\n {%- endif %}\n {{- '): ' + param_fields.description }}\n {%- endfor %}\n {%- endif %}\n {{- '\\n \"\"\"\\n pass\\n```' }}\n{%- endfor %}\n{%- endmacro %}\n" + + "\n{%- macro new_tool_parser(tools) %}\n{%- for tool in tools %}\n {%- if loop.index0 != 0 %}\n {{- '\\n\\n'}}\n {%- endif %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {{-'```python\ndef ' + tool.name + '('}}\n {%- for param_name, param_fields in tool.parameters.properties.items() %}\n {%- if loop.index0 != 0 %}\n {{- ', '}}\n {%- endif %}\n {{-param_name + \": \"}} \n {%- if not param_name in tool.parameters.required %}\n {{-'Optional[' + json_to_python_type(param_fields) + '] = None'}}\n {%- else %}\n {{- json_to_python_type(param_fields) }}\n {%- endif %}\n {%- endfor %}\n {{- ') -> List[Dict]:\n \"\"\"'}}\n {{- tool.description }}\n {%- if tool.parameters.properties|length != 0 %}\n {{- '\\n\\n Args:\\n '}}\n {%- for param_name, param_fields in tool.parameters.properties.items() %}\n {%- if loop.index0 != 0 %}\n {{- '\\n ' }}\n {%- endif %}\n {{- param_name + ' ('}}\n {%- if not param_name in tool.parameters.required %}\n {{-'Optional[' + json_to_python_type(param_fields) + ']'}}\n {%- else %}\n {{- json_to_python_type(param_fields) }}\n {%- endif %}\n {{- '): ' + param_fields.description }}\n {%- endfor %}\n {%- endif %}\n {{- '\\n \"\"\"\\n pass\\n```' }}\n{%- endfor %}\n{%- endmacro %}\n" + + "\n{{- bos_token }}\n{%- if messages[0]['role'] == 'system' %}\n {%- set loop_messages = messages[1:] %}\n {%- set system_message = messages[0]['content'] %}\n{%- else %}\n {%- set loop_messages = messages %}\n {%- set system_message = '## Task and Context\\nYou help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user\\'s needs as best you can, which will be wide-ranging.\\n\\n## Style Guide\\nUnless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.' %}\n{%- endif %}" + + "\n{{- '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' }}\n{{- '# Safety Preamble' }}\n{{- '\nThe instructions in this section override those in the task description and style guide sections. Don\\'t answer questions that are harmful or immoral.' }}\n{{- '\n\n# System Preamble' }}\n{{- '\n## Basic Rules' }}\n{{- '\nYou are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user\\'s requests, you cite your sources in your answers, according to those instructions.' }}\n{{- '\n\n# User Preamble' }}\n{{- '\n' + system_message }}\n{{-'\n\n## Available Tools\nHere is a list of tools that you have available to you:\n\n'}}\n{%- set ns = namespace(new_tools=true) %}\n{%- for tool in tools %}\n {%- if tool.parameter_definitions is defined %}\n {%- set ns.new_tools = false %}\n {%- endif %}\n{%- endfor %}\n{%- if ns.new_tools %}\n {{- new_tool_parser(tools) }}\n{%- else %}\n {{- old_tool_parser(tools) }}\n{%- endif %}\n{{- '<|END_OF_TURN_TOKEN|>'}}\n{%- for message in loop_messages %}\n {%- set content = message['content'] %}\n {%- if message.role == 'user' %}" + + "\n {{- '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}\n {%- elif message.role == 'system' %}" + + "\n {{- '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}\n {%- elif message.role == 'assistant' and message.tool_calls is defined %}" + + "\n {{- '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}\n {%- if message.content is defined %}\n {{- message.content.strip() }}\n {%- endif %}\n {{- '\\nAction:\\n```json\\n[\\n' }}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '{\\n'|indent(4, first=True) }}\n {{- '\"tool_name\": \"'|indent(8, first=True) + tool_call.name + '\",\\n' }}\n {{- '\"parameters\": '|indent(8, first=True) }}\n {%- if tool_call.arguments is defined and tool_call.arguments|length > 0 %} \n {{- tool_call.arguments|tojson(indent=4)|indent(8) }}\n {{- '\\n' }}\n {%- else %}\n {{- '{}\\n' }}\n {%- endif %}\n {{- '}'|indent(4, first=True) }}\n {%- if not loop.last %}\n {{- ',\\n' }}\n {%- endif %}\n {%- endfor %}\n {{- \"\\n]```\\n\" }}\n {%- elif message.role == 'assistant' %}" + + "\n {{- '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}\n {%- elif message.role == 'tool' %}" + + "\n {{- '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>\\n' }}\n {{- message.content.strip() }}\n {{- '<|END_OF_TURN_TOKEN|>' }}\n {%- endif %}\n{%- endfor %}" + + "\n{{-'<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write \\'Action:\\' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user\\'s last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example:\n```json\n[\n {\n \"tool_name\": title of the tool in the specification,\n \"parameters\": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters\n }\n]```<|END_OF_TURN_TOKEN|>'}}\n{%- if add_generation_prompt %}" + + "\n {{- '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}\n{%- endif %}\n", + data: { + messages: [ + { role: "user", content: "Whats the biggest penguin in the world?" }, + { + role: "assistant", + tool_calls: [ + { + type: "function", + function: { name: "internet_search", arguments: { query: "biggest penguin species" } }, + }, + ], + }, + { role: "tool", content: "Tool results go here!" }, + ], + tools: [ + { + type: "function", + function: { + name: "internet_search", + description: "Returns a list of relevant document snippets for a textual query retrieved from the internet", + parameters: { + type: "object", + properties: { query: { type: "string", description: "Query to search the internet with" } }, + required: ["query"], + }, + }, + }, + { + type: "function", + function: { + name: "directly_answer", + description: + "Calls a standard (un-augmented) AI chatbot to generate a response given the conversation history", + parameters: { type: "object", properties: {} }, + }, + }, + ], + bos_token: "", + add_generation_prompt: true, + }, + target: + "" + + '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble\nThe instructions in this section override those in the task description and style guide sections. Don\'t answer questions that are harmful or immoral.\n\n# System Preamble\n## Basic Rules\nYou are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user\'s requests, you cite your sources in your answers, according to those instructions.\n\n# User Preamble\n## Task and Context\nYou help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user\'s needs as best you can, which will be wide-ranging.\n\n## Style Guide\nUnless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.\n\n## Available Tools\nHere is a list of tools that you have available to you:\n\n```python\ndef internet_search(query: str) -> List[Dict]:\n """Returns a list of relevant document snippets for a textual query retrieved from the internet\n\n Args:\n query (str): Query to search the internet with\n """\n pass\n```\n\n```python\ndef directly_answer() -> List[Dict]:\n """Calls a standard (un-augmented) AI chatbot to generate a response given the conversation history\n """\n pass\n```<|END_OF_TURN_TOKEN|>' + + "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Whats the biggest penguin in the world?<|END_OF_TURN_TOKEN|>" + + '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>\nAction:\n```json\n[\n {\n "tool_name": "internet_search",\n "parameters": {\n "query": "biggest penguin species"\n }\n }\n]```\n' + + "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>\nTool results go here!<|END_OF_TURN_TOKEN|>" + + '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write \'Action:\' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user\'s last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example:\n```json\n[\n {\n "tool_name": title of the tool in the specification,\n "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters\n }\n]```<|END_OF_TURN_TOKEN|>' + + "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", + }, "CISCai/Mistral-7B-Instruct-v0.3-SOTA-GGUF": { chat_template: `{{ bos_token }}{% set ns = namespace(lastuser=-1, system=false, functions=false) %}{% if tools %}{% for message in messages %}{% if message['role'] == 'user' %}{% set ns.lastuser = loop.index0 %}{% elif message['role'] == 'system' %}{% set ns.system = message['content'] %}{% endif %}{% endfor %}{% set ns.functions = tools|selectattr('type','eq','function')|map(attribute='function')|list|tojson %}{% endif %}{% for message in messages %}{% if message['role'] == 'user' %}{% if loop.index0 == ns.lastuser and ns.functions %}{{ '[AVAILABLE_TOOLS] ' }}{{ ns.functions }}{{ '[/AVAILABLE_TOOLS]' }}{% endif %}{{ '[INST] ' }}{% if loop.index0 == ns.lastuser and ns.system %}{{ ns.system + ' ' }}{% endif %}{{ message['content'] }}{{ '[/INST]' }}{% elif message['role'] == 'tool' %}{{ '[TOOL_RESULTS] ' }}{{ dict(call_id=message['tool_call_id'], content=message['content'])|tojson }}{{ '[/TOOL_RESULTS]' }}{% elif message['role'] == 'assistant' %}{% if message['tool_calls'] %}{{ '[TOOL_CALLS] [' }}{% for call in message['tool_calls'] %}{% if call['type'] == 'function' %}{{ dict(id=call['id'], name=call['function']['name'], arguments=call['function']['arguments'])|tojson }}{% endif %}{% if not loop.last %}{{ ', ' }}{% endif %}{% endfor %}{{ ']' }}{% else %}{{ message['content'] }}{% endif %}{{ eos_token }}{% endif %}{% endfor %}`, data: { diff --git a/packages/jinja/test/templates.test.js b/packages/jinja/test/templates.test.js index 4259e68e3..74286ef25 100644 --- a/packages/jinja/test/templates.test.js +++ b/packages/jinja/test/templates.test.js @@ -9,6 +9,9 @@ const TEST_STRINGS = { NO_TEMPLATE: `Hello world!`, TEXT_NODES: `0{{ 'A' }}1{{ 'B' }}{{ 'C' }}2{{ 'D' }}3`, + // Boolean literals + BOOLEAN_LITERALS: `|{{ true }}|{{ false }}|{{ True }}|{{ False }}|`, + // Logical operators LOGICAL_AND: `{{ true and true }}{{ true and false }}{{ false and true }}{{ false and false }}`, LOGICAL_OR: `{{ true or true }}{{ true or false }}{{ false or true }}{{ false or false }}`, @@ -79,7 +82,9 @@ const TEST_STRINGS = { FILTER_OPERATOR_5: `{{ messages | selectattr('role', 'equalto', 'system') | length }}`, FILTER_OPERATOR_6: `|{{ obj | length }}|{{ (obj | items)[1:] | length }}|`, FILTER_OPERATOR_7: `|{{ obj | tojson }}|{{ "test" | tojson }}|{{ 1 | tojson }}|{{ true | tojson }}|{{ null | tojson }}|{{ [1,2,3] | tojson }}|`, - FILTER_OPERATOR_8: `{{ data | map(attribute='val') | list | tojson }}`, + FILTER_OPERATOR_8: `{{ obj | tojson(indent=2) }}`, + FILTER_OPERATOR_9: `{{ data | map(attribute='val') | list | tojson }}`, + FILTER_OPERATOR_10: `|{{ " 1 \n 2 \n 3 \n\n " | indent }}|{{ " 1 \n 2 \n 3 \n\n " | indent(2) }}|{{ " 1 \n 2 \n 3 \n\n " | indent(first=True) }}|{{ " 1 \n 2 \n 3 \n\n " | indent(blank=True) }}|{{ " 1 \n 2 \n 3 \n\n " | indent(4, first=True) }}|`, // Logical operators between non-Booleans BOOLEAN_NUMERICAL: `|{{ 1 and 2 }}|{{ 1 and 0 }}|{{ 0 and 1 }}|{{ 0 and 0 }}|{{ 1 or 2 }}|{{ 1 or 0 }}|{{ 0 or 1 }}|{{ 0 or 0 }}|{{ not 1 }}|{{ not 0 }}|`, @@ -160,6 +165,27 @@ const TEST_PARSED = { { value: "3", type: "Text" }, ], + // Boolean literals + BOOLEAN_LITERALS: [ + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "true", type: "BooleanLiteral" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "false", type: "BooleanLiteral" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "True", type: "BooleanLiteral" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: "False", type: "BooleanLiteral" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + ], + // Logical operators LOGICAL_AND: [ { value: "{{", type: "OpenExpression" }, @@ -1461,6 +1487,18 @@ const TEST_PARSED = { { value: "|", type: "Text" }, ], FILTER_OPERATOR_8: [ + { value: "{{", type: "OpenExpression" }, + { value: "obj", type: "Identifier" }, + { value: "|", type: "Pipe" }, + { value: "tojson", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "indent", type: "Identifier" }, + { value: "=", type: "Equals" }, + { value: "2", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + ], + FILTER_OPERATOR_9: [ { value: "{{", type: "OpenExpression" }, { value: "data", type: "Identifier" }, { value: "|", type: "Pipe" }, @@ -1476,6 +1514,59 @@ const TEST_PARSED = { { value: "tojson", type: "Identifier" }, { value: "}}", type: "CloseExpression" }, ], + FILTER_OPERATOR_10: [ + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: " 1 \n 2 \n 3 \n\n ", type: "StringLiteral" }, + { value: "|", type: "Pipe" }, + { value: "indent", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: " 1 \n 2 \n 3 \n\n ", type: "StringLiteral" }, + { value: "|", type: "Pipe" }, + { value: "indent", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "2", type: "NumericLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: " 1 \n 2 \n 3 \n\n ", type: "StringLiteral" }, + { value: "|", type: "Pipe" }, + { value: "indent", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "first", type: "Identifier" }, + { value: "=", type: "Equals" }, + { value: "True", type: "BooleanLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: " 1 \n 2 \n 3 \n\n ", type: "StringLiteral" }, + { value: "|", type: "Pipe" }, + { value: "indent", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "blank", type: "Identifier" }, + { value: "=", type: "Equals" }, + { value: "True", type: "BooleanLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + { value: "{{", type: "OpenExpression" }, + { value: " 1 \n 2 \n 3 \n\n ", type: "StringLiteral" }, + { value: "|", type: "Pipe" }, + { value: "indent", type: "Identifier" }, + { value: "(", type: "OpenParen" }, + { value: "4", type: "NumericLiteral" }, + { value: ",", type: "Comma" }, + { value: "first", type: "Identifier" }, + { value: "=", type: "Equals" }, + { value: "True", type: "BooleanLiteral" }, + { value: ")", type: "CloseParen" }, + { value: "}}", type: "CloseExpression" }, + { value: "|", type: "Text" }, + ], // Logical operators between non-Booleans BOOLEAN_NUMERICAL: [ @@ -2541,6 +2632,9 @@ const TEST_CONTEXT = { NO_TEMPLATE: {}, TEXT_NODES: {}, + // Boolean literals + BOOLEAN_LITERALS: {}, + // Logical operators LOGICAL_AND: {}, LOGICAL_OR: {}, @@ -2679,8 +2773,25 @@ const TEST_CONTEXT = { }, }, FILTER_OPERATOR_8: { + obj: { + a: [1, 2, 3], + b: 1, + c: { + d: 2, + e: { + f: 3, + g: { + h: 4, + i: [1, 2, 3], + }, + }, + }, + }, + }, + FILTER_OPERATOR_9: { data: [{ val: 1 }, { val: 2 }, { val: 3 }], }, + FILTER_OPERATOR_10: {}, // Logical operators between non-Booleans BOOLEAN_NUMERICAL: {}, @@ -2760,6 +2871,9 @@ const EXPECTED_OUTPUTS = { NO_TEMPLATE: `Hello world!`, TEXT_NODES: `0A1BC2D3`, + // Boolean literals + BOOLEAN_LITERALS: `|true|false|true|false|`, + // Logical operators LOGICAL_AND: `truefalsefalsefalse`, LOGICAL_OR: `truetruetruefalse`, @@ -2830,7 +2944,9 @@ const EXPECTED_OUTPUTS = { FILTER_OPERATOR_5: `1`, FILTER_OPERATOR_6: `|3|2|`, FILTER_OPERATOR_7: `|{"string": "world", "number": 5, "boolean": true, "null": null, "array": [1, 2, 3], "object": {"key": "value"}, "special": "\\"',:[]{}#&*;=?/\\\\\`~@|!^%()-_+<>", "unicode": {"안녕?": "🤗"}}|"test"|1|true|null|[1, 2, 3]|`, - FILTER_OPERATOR_8: `[1, 2, 3]`, + FILTER_OPERATOR_8: `{\n "a": [\n 1,\n 2,\n 3\n ],\n "b": 1,\n "c": {\n "d": 2,\n "e": {\n "f": 3,\n "g": {\n "h": 4,\n "i": [\n 1,\n 2,\n 3\n ]\n }\n }\n }\n}`, + FILTER_OPERATOR_9: `[1, 2, 3]`, + FILTER_OPERATOR_10: `| 1 \n 2 \n 3 \n\n | 1 \n 2 \n 3 \n\n | 1 \n 2 \n 3 \n\n | 1 \n 2 \n 3 \n \n | 1 \n 2 \n 3 \n\n |`, // Logical operators between non-Booleans BOOLEAN_NUMERICAL: `|2|0|0|0|1|1|1|0|false|true|`, From bd51b6747612d0dcb1f4bce9f2c9cd55498516aa Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Mon, 17 Jun 2024 16:07:41 +0200 Subject: [PATCH 11/13] De-duplicate evaluating of positional/keyword args --- packages/jinja/src/runtime.ts | 77 ++++++++++++++++------------------- 1 file changed, 36 insertions(+), 41 deletions(-) diff --git a/packages/jinja/src/runtime.ts b/packages/jinja/src/runtime.ts index 24f08c7f4..a0d55952c 100644 --- a/packages/jinja/src/runtime.ts +++ b/packages/jinja/src/runtime.ts @@ -20,6 +20,7 @@ import type { ObjectLiteral, TupleLiteral, Macro, + Expression, } from "./ast"; import { slice, titleCase } from "./utils"; @@ -450,6 +451,28 @@ export class Interpreter { throw new SyntaxError(`Unknown operator "${node.operator.value}" between ${left.type} and ${right.type}`); } + private evaluateArguments( + args: Expression[], + environment: Environment + ): [AnyRuntimeValue[], Map] { + // Accumulate args and kwargs + const positionalArguments = []; + const keywordArguments = new Map(); + for (const argument of args) { + // TODO: Lazy evaluation of arguments + if (argument.type === "KeywordArgumentExpression") { + const kwarg = argument as KeywordArgumentExpression; + keywordArguments.set(kwarg.key.value, this.evaluate(kwarg.value, environment)); + } else { + if (keywordArguments.size > 0) { + throw new Error("Positional arguments must come before keyword arguments"); + } + positionalArguments.push(this.evaluate(argument, environment)); + } + } + return [positionalArguments, keywordArguments]; + } + /** * Evaluates expressions following the filter operation type. */ @@ -562,16 +585,11 @@ export class Interpreter { const filterName = (filter.callee as Identifier).value; if (filterName === "tojson") { - // Accumulate kwargs - const kwargs = new Map(); - for (const argument of filter.args) { - // TODO: Lazy evaluation of arguments - if (argument.type === "KeywordArgumentExpression") { - const kwarg = argument as KeywordArgumentExpression; - kwargs.set(kwarg.key.value, this.evaluate(kwarg.value, environment)); - } - } + const [, kwargs] = this.evaluateArguments(filter.args, environment); const indent = kwargs.get("indent") ?? new NullValue(); + if (!(indent instanceof NumericValue || indent instanceof NullValue)) { + throw new Error("If set, indent must be a number"); + } return new StringValue(toJSON(operand, indent.value)); } @@ -613,14 +631,8 @@ export class Interpreter { } case "map": { // Accumulate kwargs - const kwargs = new Map(); - for (const argument of filter.args) { - // TODO: Lazy evaluation of arguments - if (argument.type === "KeywordArgumentExpression") { - const kwarg = argument as KeywordArgumentExpression; - kwargs.set(kwarg.key.value, this.evaluate(kwarg.value, environment)); - } - } + const [, kwargs] = this.evaluateArguments(filter.args, environment); + if (kwargs.has("attribute")) { // Mapping on attributes const attr = kwargs.get("attribute"); @@ -650,22 +662,13 @@ export class Interpreter { // - width: Number of spaces, or a string, to indent by. // - first: Don't skip indenting the first line. // - blank: Don't skip indenting empty lines. - const args = []; - const kwargs = new Map(); - for (const argument of filter.args) { - // TODO: Lazy evaluation of arguments - if (argument.type === "KeywordArgumentExpression") { - const kwarg = argument as KeywordArgumentExpression; - kwargs.set(kwarg.key.value, this.evaluate(kwarg.value, environment)); - } else { - if (kwargs.size > 0) { - throw new Error("Positional arguments must come before keyword arguments"); - } - args.push(this.evaluate(argument, environment)); - } - } + + const [args, kwargs] = this.evaluateArguments(filter.args, environment); const width = args.at(0) ?? kwargs.get("width") ?? new NumericValue(4); + if (!(width instanceof NumericValue)) { + throw new Error("width must be a number"); + } const first = args.at(1) ?? kwargs.get("first") ?? new BooleanValue(false); const blank = args.at(2) ?? kwargs.get("blank") ?? new BooleanValue(false); @@ -744,16 +747,8 @@ export class Interpreter { private evaluateCallExpression(expr: CallExpression, environment: Environment): AnyRuntimeValue { // Accumulate all keyword arguments into a single object, which will be // used as the final argument in the call function. - const args: AnyRuntimeValue[] = []; - const kwargs = new Map(); - for (const argument of expr.args) { - if (argument.type === "KeywordArgumentExpression") { - const kwarg = argument as KeywordArgumentExpression; - kwargs.set(kwarg.key.value, this.evaluate(kwarg.value, environment)); - } else { - args.push(this.evaluate(argument, environment)); - } - } + const [args, kwargs] = this.evaluateArguments(expr.args, environment); + if (kwargs.size > 0) { args.push(new KeywordArgumentsValue(kwargs)); } From 9a62b439826e2cb4981db0debe91d944388c216f Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Tue, 18 Jun 2024 00:33:06 +0200 Subject: [PATCH 12/13] Add support for mistral v0.3 tool template --- packages/jinja/src/ast.ts | 22 +++++- packages/jinja/src/parser.ts | 39 ++++++++--- packages/jinja/src/runtime.ts | 96 ++++++++++++++++++++------- packages/jinja/test/e2e.test.js | 78 ++++++++++++++++++++++ packages/jinja/test/templates.test.js | 53 +++++++++++++++ 5 files changed, 254 insertions(+), 34 deletions(-) diff --git a/packages/jinja/src/ast.ts b/packages/jinja/src/ast.ts index c9d885bbc..0f7e949ce 100644 --- a/packages/jinja/src/ast.ts +++ b/packages/jinja/src/ast.ts @@ -30,13 +30,18 @@ export class If extends Statement { } } +/** + * Loop over each item in a sequence + * https://jinja.palletsprojects.com/en/3.0.x/templates/#for + */ export class For extends Statement { override type = "For"; constructor( public loopvar: Identifier | TupleLiteral, public iterable: Expression, - public body: Statement[] + public body: Statement[], + public defaultBlock: Statement[] // if no iteration took place ) { super(); } @@ -194,6 +199,21 @@ export class FilterExpression extends Expression { } } +/** + * An operation which filters a sequence of objects by applying a test to each object, + * and only selecting the objects with the test succeeding. + */ +export class SelectExpression extends Expression { + override type = "SelectExpression"; + + constructor( + public iterable: Expression, + public test: Expression + ) { + super(); + } +} + /** * An operation with two sides, separated by the "is" operator. */ diff --git a/packages/jinja/src/parser.ts b/packages/jinja/src/parser.ts index 6e5529655..37891c1b6 100644 --- a/packages/jinja/src/parser.ts +++ b/packages/jinja/src/parser.ts @@ -22,6 +22,7 @@ import { KeywordArgumentExpression, TupleLiteral, Macro, + SelectExpression, } from "./ast"; /** @@ -217,7 +218,7 @@ export function parse(tokens: Token[]): Program { function parseForStatement(): For { // e.g., `message` in `for message in messages` - const loopVariable = parseExpressionSequence(true); // should be an identifier + const loopVariable = parseExpressionSequence(true); // should be an identifier/tuple if (!(loopVariable instanceof Identifier || loopVariable instanceof TupleLiteral)) { throw new SyntaxError(`Expected identifier/tuple for the loop variable, got ${loopVariable.type} instead`); } @@ -232,28 +233,48 @@ export function parse(tokens: Token[]): Program { // Body of for loop const body: Statement[] = []; - // Keep going until we hit {% endfor - while (not(TOKEN_TYPES.OpenStatement, TOKEN_TYPES.EndFor)) { + // Keep going until we hit {% endfor or {% else + while (not(TOKEN_TYPES.OpenStatement, TOKEN_TYPES.EndFor) && not(TOKEN_TYPES.OpenStatement, TOKEN_TYPES.Else)) { body.push(parseAny()); } - return new For(loopVariable, iterable, body); + // (Optional) else block + const alternative: Statement[] = []; + if (is(TOKEN_TYPES.OpenStatement, TOKEN_TYPES.Else)) { + ++current; // consume {% + ++current; // consume else + expect(TOKEN_TYPES.CloseStatement, "Expected closing statement token"); + + // keep going until we hit {% endfor + while (not(TOKEN_TYPES.OpenStatement, TOKEN_TYPES.EndFor)) { + alternative.push(parseAny()); + } + } + + return new For(loopVariable, iterable, body, alternative); } function parseExpression(): Statement { // Choose parse function with lowest precedence - return parseTernaryExpression(); + return parseIfExpression(); } - function parseTernaryExpression(): Statement { + function parseIfExpression(): Statement { const a = parseLogicalOrExpression(); if (is(TOKEN_TYPES.If)) { // Ternary expression ++current; // consume if const predicate = parseLogicalOrExpression(); - expect(TOKEN_TYPES.Else, "Expected else token"); - const b = parseLogicalOrExpression(); - return new If(predicate, [a], [b]); + + if (is(TOKEN_TYPES.Else)) { + // Ternary expression with else + ++current; // consume else + const b = parseLogicalOrExpression(); + return new If(predicate, [a], [b]); + } else { + // Select expression on iterable + return new SelectExpression(a, predicate); + } } return a; } diff --git a/packages/jinja/src/runtime.ts b/packages/jinja/src/runtime.ts index a0d55952c..7f2a1e5ea 100644 --- a/packages/jinja/src/runtime.ts +++ b/packages/jinja/src/runtime.ts @@ -21,6 +21,7 @@ import type { TupleLiteral, Macro, Expression, + SelectExpression, } from "./ast"; import { slice, titleCase } from "./utils"; @@ -256,6 +257,7 @@ export class Environment { ], ["false", (operand) => operand.type === "BooleanValue" && !(operand as BooleanValue).value], ["true", (operand) => operand.type === "BooleanValue" && (operand as BooleanValue).value], + ["string", (operand) => operand.type === "StringValue"], ["number", (operand) => operand.type === "NumericValue"], ["integer", (operand) => operand.type === "NumericValue" && Number.isInteger((operand as NumericValue).value)], ["iterable", (operand) => operand instanceof ArrayValue || operand instanceof StringValue], @@ -553,6 +555,8 @@ export class Interpreter { ) .join("\n") ); + case "string": + return operand; // no-op default: throw new Error(`Unknown StringValue filter: ${filter.value}`); } @@ -864,35 +868,29 @@ export class Interpreter { // Scope for the for loop const scope = new Environment(environment); - const iterable = this.evaluate(node.iterable, scope); + let test, iterable; + if (node.iterable.type === "SelectExpression") { + const select = node.iterable as SelectExpression; + iterable = this.evaluate(select.iterable, scope); + test = select.test; + } else { + iterable = this.evaluate(node.iterable, scope); + } + if (!(iterable instanceof ArrayValue)) { throw new Error(`Expected iterable type in for loop: got ${iterable.type}`); } - let result = ""; - + const items: Expression[] = []; + const scopeUpdateFunctions: ((scope: Environment) => void)[] = []; for (let i = 0; i < iterable.value.length; ++i) { - // Update the loop variable - // TODO: Only create object once, then update value? - const loop = new Map([ - ["index", new NumericValue(i + 1)], - ["index0", new NumericValue(i)], - ["revindex", new NumericValue(iterable.value.length - i)], - ["revindex0", new NumericValue(iterable.value.length - i - 1)], - ["first", new BooleanValue(i === 0)], - ["last", new BooleanValue(i === iterable.value.length - 1)], - ["length", new NumericValue(iterable.value.length)], - ["previtem", i > 0 ? iterable.value[i - 1] : new UndefinedValue()], - ["nextitem", i < iterable.value.length - 1 ? iterable.value[i + 1] : new UndefinedValue()], - ] as [string, AnyRuntimeValue][]); - - scope.setVariable("loop", new ObjectValue(loop)); + const loopScope = new Environment(scope); const current = iterable.value[i]; - // For this iteration, set the loop variable to the current element + let scopeUpdateFunction; if (node.loopvar.type === "Identifier") { - scope.setVariable((node.loopvar as Identifier).value, current); + scopeUpdateFunction = (scope: Environment) => scope.setVariable((node.loopvar as Identifier).value, current); } else if (node.loopvar.type === "TupleLiteral") { const loopvar = node.loopvar as TupleLiteral; if (current.type !== "ArrayValue") { @@ -904,17 +902,67 @@ export class Interpreter { if (loopvar.value.length !== c.value.length) { throw new Error(`Too ${loopvar.value.length > c.value.length ? "few" : "many"} items to unpack`); } - for (let j = 0; j < loopvar.value.length; ++j) { - if (loopvar.value[j].type !== "Identifier") { - throw new Error(`Cannot unpack non-identifier type: ${loopvar.value[j].type}`); + + scopeUpdateFunction = (scope: Environment) => { + for (let j = 0; j < loopvar.value.length; ++j) { + if (loopvar.value[j].type !== "Identifier") { + throw new Error(`Cannot unpack non-identifier type: ${loopvar.value[j].type}`); + } + scope.setVariable((loopvar.value[j] as Identifier).value, c.value[j]); } - scope.setVariable((loopvar.value[j] as Identifier).value, c.value[j]); + }; + } else { + throw new Error(`Invalid loop variable(s): ${node.loopvar.type}`); + } + + if (test) { + scopeUpdateFunction(loopScope); + + const testValue = this.evaluate(test, loopScope); + if (!testValue.__bool__().value) { + continue; } } + items.push(current); + scopeUpdateFunctions.push(scopeUpdateFunction); + } + + let result = ""; + + let noIteration = true; + for (let i = 0; i < items.length; ++i) { + // Update the loop variable + // TODO: Only create object once, then update value? + const loop = new Map([ + ["index", new NumericValue(i + 1)], + ["index0", new NumericValue(i)], + ["revindex", new NumericValue(items.length - i)], + ["revindex0", new NumericValue(items.length - i - 1)], + ["first", new BooleanValue(i === 0)], + ["last", new BooleanValue(i === items.length - 1)], + ["length", new NumericValue(items.length)], + ["previtem", i > 0 ? items[i - 1] : new UndefinedValue()], + ["nextitem", i < items.length - 1 ? items[i + 1] : new UndefinedValue()], + ] as [string, AnyRuntimeValue][]); + + scope.setVariable("loop", new ObjectValue(loop)); + + // Update scope for this iteration + scopeUpdateFunctions[i](scope); + // Evaluate the body of the for loop const evaluated = this.evaluateBlock(node.body, scope); result += evaluated.value; + + // At least one iteration took place + noIteration = false; + } + + // no iteration took place, so we render the default block + if (noIteration) { + const defaultEvaluated = this.evaluateBlock(node.defaultBlock, scope); + result += defaultEvaluated.value; } return new StringValue(result); diff --git a/packages/jinja/test/e2e.test.js b/packages/jinja/test/e2e.test.js index 3ee26ad04..312143836 100644 --- a/packages/jinja/test/e2e.test.js +++ b/packages/jinja/test/e2e.test.js @@ -509,6 +509,84 @@ const TEST_CUSTOM_TEMPLATES = Object.freeze({ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write \'Action:\' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user\'s last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example:\n```json\n[\n {\n "tool_name": title of the tool in the specification,\n "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters\n }\n]```<|END_OF_TURN_TOKEN|>' + "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", }, + "mistralai/Mistral-7B-Instruct-v0.3 (JSON Schema)": { + chat_template: + "{{- bos_token }}\n{%- set user_messages = messages | selectattr('role', 'equalto', 'user') | list %}\n{%- for message in messages %}\n {%- if message['role'] == 'user' %}\n {%- if tools and (message == user_messages[-1]) %}\n {{- ' [AVAILABLE_TOOLS] [' }}\n {%- for tool in tools %}\n\t\t{%- set tool = tool.function %}\n\t\t{{- '{\"type\": \"function\", \"function\": {' }}\n\t\t{%- for key, val in tool|items if key != \"return\" %}\n\t\t {%- if val is string %}\n\t\t\t{{- '\"' + key + '\": \"' + val + '\"' }}\n\t\t {%- else %}\n\t\t\t{{- '\"' + key + '\": ' + val|tojson }}\n\t\t {%- endif %}\n\t\t {%- if not loop.last %}\n\t\t\t{{- \", \" }}\n\t\t {%- endif %}\n\t\t{%- endfor %}\n\t\t{{- \"}}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %}\n {{- ' [/AVAILABLE_TOOLS]' }}\n {%- endif %}\n {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n {%- elif message['role'] == 'assistant' %}\n {%- if message.tool_calls is defined and message.tool_calls|length > 0 %}\n {{- ' [TOOL_CALLS] [' }}\n {%- for tool_call in message.tool_calls %}\n {{- {\"name\": tool_call.function.name, \"arguments\": tool_call.function.arguments, \"id\": tool_call.id}|tojson }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- '] ' }}\n {{- eos_token }}\n \t{%- elif message.content is defined %}\n\t {{- ' ' + message.content + ' ' + eos_token}}\n {%- endif %}\n {%- elif message['role'] == 'tool' %}\n {{- ' [TOOL_RESULTS] ' }}\n {{- '{\"call_id\": \"' + message.tool_call_id + '\", \"content\": ' + message.content|string + '}' }}\n {{- ' [/TOOL_RESULTS] ' }}\n {%- endif %}\n{%- endfor %}\n", + data: { + messages: [ + { + role: "system", + content: + "You are a bot that responds to weather queries. You should reply with the unit used in the queried location.", + }, + { role: "user", content: "Hey, what's the temperature in Paris right now?" }, + { + role: "assistant", + tool_calls: [ + { + id: "abcdef123", + type: "function", + function: { name: "get_current_temperature", arguments: { location: "Paris, France", unit: "celsius" } }, + }, + ], + }, + { role: "tool", tool_call_id: "abcdef123", name: "get_current_temperature", content: "22.0" }, + ], + tools: [ + { + type: "function", + function: { + name: "get_current_temperature", + description: "Get the current temperature at a location.", + parameters: { + type: "object", + properties: { + location: { + type: "string", + description: 'The location to get the temperature for, in the format "City, Country"', + }, + unit: { + type: "string", + enum: ["celsius", "fahrenheit"], + description: "The unit to return the temperature in.", + }, + }, + required: ["location", "unit"], + }, + return: { + type: "number", + description: "The current temperature at the specified location in the specified units, as a float.", + }, + }, + }, + { + type: "function", + function: { + name: "get_current_wind_speed", + description: "Get the current wind speed in km/h at a given location.", + parameters: { + type: "object", + properties: { + location: { + type: "string", + description: 'The location to get the temperature for, in the format "City, Country"', + }, + }, + required: ["location"], + }, + return: { + type: "number", + description: "The current wind speed at the given location in km/h, as a float.", + }, + }, + }, + ], + bos_token: "", + eos_token: "", + }, + target: + ' [AVAILABLE_TOOLS] [{"type": "function", "function": {"name": "get_current_temperature", "description": "Get the current temperature at a location.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The location to get the temperature for, in the format \\"City, Country\\""}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The unit to return the temperature in."}}, "required": ["location", "unit"]}}}, {"type": "function", "function": {"name": "get_current_wind_speed", "description": "Get the current wind speed in km/h at a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The location to get the temperature for, in the format \\"City, Country\\""}}, "required": ["location"]}}}] [/AVAILABLE_TOOLS] [INST] Hey, what\'s the temperature in Paris right now? [/INST] [TOOL_CALLS] [{"name": "get_current_temperature", "arguments": {"location": "Paris, France", "unit": "celsius"}, "id": "abcdef123"}] [TOOL_RESULTS] {"call_id": "abcdef123", "content": 22.0} [/TOOL_RESULTS] ', + }, "CISCai/Mistral-7B-Instruct-v0.3-SOTA-GGUF": { chat_template: `{{ bos_token }}{% set ns = namespace(lastuser=-1, system=false, functions=false) %}{% if tools %}{% for message in messages %}{% if message['role'] == 'user' %}{% set ns.lastuser = loop.index0 %}{% elif message['role'] == 'system' %}{% set ns.system = message['content'] %}{% endif %}{% endfor %}{% set ns.functions = tools|selectattr('type','eq','function')|map(attribute='function')|list|tojson %}{% endif %}{% for message in messages %}{% if message['role'] == 'user' %}{% if loop.index0 == ns.lastuser and ns.functions %}{{ '[AVAILABLE_TOOLS] ' }}{{ ns.functions }}{{ '[/AVAILABLE_TOOLS]' }}{% endif %}{{ '[INST] ' }}{% if loop.index0 == ns.lastuser and ns.system %}{{ ns.system + ' ' }}{% endif %}{{ message['content'] }}{{ '[/INST]' }}{% elif message['role'] == 'tool' %}{{ '[TOOL_RESULTS] ' }}{{ dict(call_id=message['tool_call_id'], content=message['content'])|tojson }}{{ '[/TOOL_RESULTS]' }}{% elif message['role'] == 'assistant' %}{% if message['tool_calls'] %}{{ '[TOOL_CALLS] [' }}{% for call in message['tool_calls'] %}{% if call['type'] == 'function' %}{{ dict(id=call['id'], name=call['function']['name'], arguments=call['function']['arguments'])|tojson }}{% endif %}{% if not loop.last %}{{ ', ' }}{% endif %}{% endfor %}{{ ']' }}{% else %}{{ message['content'] }}{% endif %}{{ eos_token }}{% endif %}{% endfor %}`, data: { diff --git a/packages/jinja/test/templates.test.js b/packages/jinja/test/templates.test.js index 74286ef25..4b44c9675 100644 --- a/packages/jinja/test/templates.test.js +++ b/packages/jinja/test/templates.test.js @@ -31,6 +31,8 @@ const TEST_STRINGS = { // For loops FOR_LOOP: `{% for message in messages %}{{ message['content'] }}{% endfor %}`, FOR_LOOP_UNPACKING: `|{% for x, y in [ [1, 2], [3, 4] ] %}|{{ x + ' ' + y }}|{% endfor %}|`, + FOR_LOOP_DEFAULT: `{% for x in [] %}{{ 'A' }}{% else %}{{'B'}}{% endfor %}`, + FOR_LOOP_SELECT: `{% for x in [1, 2, 3, 4] if x > 2 %}{{ x }}{% endfor %}`, // Set variables VARIABLES: `{% set x = 'Hello' %}{% set y = 'World' %}{{ x + ' ' + y }}`, @@ -595,6 +597,53 @@ const TEST_PARSED = { { value: "%}", type: "CloseStatement" }, { value: "|", type: "Text" }, ], + FOR_LOOP_DEFAULT: [ + { value: "{%", type: "OpenStatement" }, + { value: "for", type: "For" }, + { value: "x", type: "Identifier" }, + { value: "in", type: "In" }, + { value: "[", type: "OpenSquareBracket" }, + { value: "]", type: "CloseSquareBracket" }, + { value: "%}", type: "CloseStatement" }, + { value: "{{", type: "OpenExpression" }, + { value: "A", type: "StringLiteral" }, + { value: "}}", type: "CloseExpression" }, + { value: "{%", type: "OpenStatement" }, + { value: "else", type: "Else" }, + { value: "%}", type: "CloseStatement" }, + { value: "{{", type: "OpenExpression" }, + { value: "B", type: "StringLiteral" }, + { value: "}}", type: "CloseExpression" }, + { value: "{%", type: "OpenStatement" }, + { value: "endfor", type: "EndFor" }, + { value: "%}", type: "CloseStatement" }, + ], + FOR_LOOP_SELECT: [ + { value: "{%", type: "OpenStatement" }, + { value: "for", type: "For" }, + { value: "x", type: "Identifier" }, + { value: "in", type: "In" }, + { value: "[", type: "OpenSquareBracket" }, + { value: "1", type: "NumericLiteral" }, + { value: ",", type: "Comma" }, + { value: "2", type: "NumericLiteral" }, + { value: ",", type: "Comma" }, + { value: "3", type: "NumericLiteral" }, + { value: ",", type: "Comma" }, + { value: "4", type: "NumericLiteral" }, + { value: "]", type: "CloseSquareBracket" }, + { value: "if", type: "If" }, + { value: "x", type: "Identifier" }, + { value: ">", type: "ComparisonBinaryOperator" }, + { value: "2", type: "NumericLiteral" }, + { value: "%}", type: "CloseStatement" }, + { value: "{{", type: "OpenExpression" }, + { value: "x", type: "Identifier" }, + { value: "}}", type: "CloseExpression" }, + { value: "{%", type: "OpenStatement" }, + { value: "endfor", type: "EndFor" }, + { value: "%}", type: "CloseStatement" }, + ], // Set variables VARIABLES: [ @@ -2660,6 +2709,8 @@ const TEST_CONTEXT = { ], }, FOR_LOOP_UNPACKING: {}, + FOR_LOOP_DEFAULT: {}, + FOR_LOOP_SELECT: {}, // Set variables VARIABLES: {}, @@ -2893,6 +2944,8 @@ const EXPECTED_OUTPUTS = { // For loops FOR_LOOP: "ABC", FOR_LOOP_UNPACKING: "||1 2||3 4||", + FOR_LOOP_DEFAULT: "B", + FOR_LOOP_SELECT: "34", // Set variables VARIABLES: "Hello World", From fd8748ccd4b2711e278764b77c8e324ee2fb1777 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Tue, 18 Jun 2024 01:04:54 +0200 Subject: [PATCH 13/13] Add hermes tool template e2e unit test --- packages/jinja/test/e2e.test.js | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/packages/jinja/test/e2e.test.js b/packages/jinja/test/e2e.test.js index 312143836..fe588e330 100644 --- a/packages/jinja/test/e2e.test.js +++ b/packages/jinja/test/e2e.test.js @@ -597,6 +597,39 @@ const TEST_CUSTOM_TEMPLATES = Object.freeze({ }, target: `[AVAILABLE_TOOLS] [{"name": "get_current_weather", "description": "Get the current weather in a given location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}}, "required": ["location"]}}][/AVAILABLE_TOOLS][INST] What's the weather like in Oslo and Stockholm?[/INST]`, }, + "NousResearch/Hermes-2-Pro-Llama-3-8B (JSON Schema)": { + chat_template: + `{%- macro json_to_python_type(json_spec) %}\n{%- set basic_type_map = {\n "string": "str",\n "number": "float",\n "integer": "int",\n "boolean": "bool"\n} %}\n\n{%- if basic_type_map[json_spec.type] is defined %}\n {{- basic_type_map[json_spec.type] }}\n{%- elif json_spec.type == "array" %}\n {{- "list[" + json_to_python_type(json_spec|items) + "]"}}\n{%- elif json_spec.type == "object" %}\n {%- if json_spec.additionalProperties is defined %}\n {{- "dict[str, " + json_to_python_type(json_spec.additionalProperties) + ']'}}\n {%- else %}\n {{- "dict" }}\n {%- endif %}\n{%- elif json_spec.type is iterable %}\n {{- "Union[" }}\n {%- for t in json_spec.type %}\n {{- json_to_python_type({"type": t}) }}\n {%- if not loop.last %}\n {{- "," }} \n {%- endif %}\n {%- endfor %}\n {{- "]" }}\n{%- else %}\n {{- "Any" }}\n{%- endif %}\n{%- endmacro %}\n\n\n` + + `{{- bos_token }}\n{{- "You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: " }}\n{%- for tool in tools %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {{- '{"type": "function", "function": ' }}\n {{- '{"name": ' + tool.name + '", ' }}\n {{- '"description": "' + tool.name + '(' }}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {{- param_name + ": " + json_to_python_type(param_fields) }}\n {%- if not loop.last %}\n {{- ", " }}\n {%- endif %}\n {%- endfor %}\n {{- ")" }}\n {%- if tool.return is defined %}\n {{- " -> " + json_to_python_type(tool.return) }}\n {%- endif %}\n {{- " - " + tool.description + "\\n\\n" }}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {%- if loop.first %}\n {{- " Args:\\n" }}\n {%- endif %}\n {{- " " + param_name + "(" + json_to_python_type(param_fields) + "): " + param_fields.description|trim }}\n {%- endfor %}\n {%- if tool.return is defined and tool.return.description is defined %}\n {{- "\\n Returns:\\n " + tool.return.description }}\n {%- endif %}\n {{- '"' }}\n {{- ', "parameters": ' }}\n {%- if tool.parameters.properties | length == 0 %}\n {{- "{}" }}\n {%- else %}\n {{- tool.parameters | tojson}}\n {%- endif %}\n {{- "}" }}\n {%- if not loop.last %}\n {{- "\\n" }}\n {%- endif %}\n{%- endfor %}\n{{- " " }}\n` + + `{{- 'Use the following pydantic model json schema for each tool call you will make: {"properties": {"arguments": {"title": "Arguments", "type": "object"}, "name": {"title": "Name", "type": "string"}}, "required": ["arguments", "name"], "title": "FunctionCall", "type": "object"}\n' }}\n{{- "For each function call return a json object with function name and arguments within XML tags as follows:\n" }}\n{{- "\n" }}\n{{- '{"arguments": , "name": }\n' }}\n{{- '<|im_end|>' }}\n{%- for message in messages %}\n {%- if message.role == "user" or message.role == "system" or (message.role == "assistant" and message.tool_calls is not defined) %}\n ` + + `{{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == "assistant" %}\n {{- '<|im_start|>' + message.role + '\\n\\n' }}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '{ ' }}\n {%- if tool_call.arguments is defined %}\n {{- '"arguments": ' }}\n {{- tool_call.arguments|tojson }}\n {{- ', '}}\n {%- endif %}\n {{- '"name": "' }}\n {{- tool_call.name }}\n {{- '"}' }}\n {{- '\\n ' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == "tool" %}\n {%- if not message.name is defined %}\n {{- raise_exception("Tool response dicts require a 'name' key indicating the name of the called function!") }}\n {%- endif %}\n {{- '<|im_start|>' + message.role + '\\n\\n' }}\n {{- '{"name": "' }}\n {{- message.name }}\n {{- '", "content": ' }}\n {{- message.content|tojson + '}' }}\n {{- '\\n <|im_end|>\\n' }} \n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n`, + data: { + messages: [{ role: "user", content: "Fetch the stock fundamentals data for Tesla (TSLA)" }], + tools: [ + { + type: "function", + function: { + name: "get_stock_fundamentals", + description: "Get fundamental data for a given stock symbol using yfinance API.", + parameters: { + type: "object", + properties: { symbol: { type: "string", description: "The stock symbol." } }, + required: ["symbol"], + }, + return: { + type: "object", + description: + "A dictionary containing fundamental data.\n\nKeys:\n - 'symbol': The stock symbol.\n - 'company_name': The long name of the company.\n - 'sector': The sector to which the company belongs.\n - 'industry': The industry to which the company belongs.\n - 'market_cap': The market capitalization of the company.\n - 'pe_ratio': The forward price-to-earnings ratio.\n - 'pb_ratio': The price-to-book ratio.\n - 'dividend_yield': The dividend yield.\n - 'eps': The trailing earnings per share.\n - 'beta': The beta value of the stock.\n - '52_week_high': The 52-week high price of the stock.\n - '52_week_low': The 52-week low price of the stock.", + }, + }, + }, + ], + bos_token: "<|begin_of_text|>", + eos_token: "<|im_end|>", + add_generation_prompt: true, + }, + target: `<|begin_of_text|>You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": get_stock_fundamentals", "description": "get_stock_fundamentals(symbol: str) -> dict - Get fundamental data for a given stock symbol using yfinance API.\n\n Args:\n symbol(str): The stock symbol.\n Returns:\n A dictionary containing fundamental data.\n\nKeys:\n - 'symbol': The stock symbol.\n - 'company_name': The long name of the company.\n - 'sector': The sector to which the company belongs.\n - 'industry': The industry to which the company belongs.\n - 'market_cap': The market capitalization of the company.\n - 'pe_ratio': The forward price-to-earnings ratio.\n - 'pb_ratio': The price-to-book ratio.\n - 'dividend_yield': The dividend yield.\n - 'eps': The trailing earnings per share.\n - 'beta': The beta value of the stock.\n - '52_week_high': The 52-week high price of the stock.\n - '52_week_low': The 52-week low price of the stock.", "parameters": {"type": "object", "properties": {"symbol": {"type": "string", "description": "The stock symbol."}}, "required": ["symbol"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"arguments": {"title": "Arguments", "type": "object"}, "name": {"title": "Name", "type": "string"}}, "required": ["arguments", "name"], "title": "FunctionCall", "type": "object"}\nFor each function call return a json object with function name and arguments within XML tags as follows:\n\n{"arguments": , "name": }\n<|im_end|><|im_start|>user\nFetch the stock fundamentals data for Tesla (TSLA)<|im_end|>\n<|im_start|>assistant\n`, + }, }); describe("End-to-end tests", () => {