mirror of
https://github.com/samiyev/puaros.git
synced 2025-12-27 15:00:41 +05:00
feat(ipuaro): add JSON tool call parsing and improve prompts
- Add JSON fallback parsing in ResponseParser for LLMs that prefer JSON - Add tool name aliases (get_functions -> get_lines, etc.) - Improve system prompt with clear tool usage guidelines - Add native Ollama tools support in OllamaClient - Add E2E tests for full workflow with real Ollama
This commit is contained in:
@@ -5,6 +5,35 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [0.30.2] - 2025-12-05 - JSON Tool Call Parsing & Improved Prompts
|
||||
|
||||
### Added
|
||||
|
||||
- **JSON Tool Call Fallback in ResponseParser**
|
||||
- LLM responses with JSON format `{"name": "tool", "arguments": {...}}` are now parsed
|
||||
- Fallback to JSON when XML format not found
|
||||
- Works with models like qwen2.5-coder that prefer JSON over XML
|
||||
|
||||
- **Tool Name Aliases**
|
||||
- `get_functions`, `read_file`, `read_lines` → `get_lines`
|
||||
- `list_files`, `get_files` → `get_structure`
|
||||
- `find_todos` → `get_todos`
|
||||
- And more common LLM typos/variations
|
||||
|
||||
### Changed
|
||||
|
||||
- **Improved System Prompt**
|
||||
- Added clear "When to Use Tools" / "Do NOT use tools" sections
|
||||
- More concise and directive instructions
|
||||
- Better examples for tool usage
|
||||
|
||||
### Technical Details
|
||||
|
||||
- Total tests: 1848 passed (+8 new tests for JSON parsing)
|
||||
- 0 ESLint errors, 3 warnings (pre-existing complexity)
|
||||
|
||||
---
|
||||
|
||||
## [0.30.1] - 2025-12-05 - Display Transitive Counts in Context
|
||||
|
||||
### Changed
|
||||
|
||||
@@ -18,6 +18,7 @@ import {
|
||||
buildInitialContext,
|
||||
type ProjectStructure,
|
||||
SYSTEM_PROMPT,
|
||||
TOOL_REMINDER,
|
||||
} from "../../infrastructure/llm/prompts.js"
|
||||
import { parseToolCalls } from "../../infrastructure/llm/ResponseParser.js"
|
||||
import type { IToolRegistry } from "../interfaces/IToolRegistry.js"
|
||||
@@ -277,6 +278,12 @@ export class HandleMessage {
|
||||
|
||||
messages.push(...session.history)
|
||||
|
||||
// Add tool reminder if last message is from user (first LLM call for this query)
|
||||
const lastMessage = session.history[session.history.length - 1]
|
||||
if (lastMessage?.role === "user") {
|
||||
messages.push(createSystemMessage(TOOL_REMINDER))
|
||||
}
|
||||
|
||||
return messages
|
||||
}
|
||||
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
import { type Message, Ollama } from "ollama"
|
||||
import { type Message, Ollama, type Tool } from "ollama"
|
||||
import type { ILLMClient, LLMResponse } from "../../domain/services/ILLMClient.js"
|
||||
import type { ChatMessage } from "../../domain/value-objects/ChatMessage.js"
|
||||
import { createToolCall, type ToolCall } from "../../domain/value-objects/ToolCall.js"
|
||||
import type { LLMConfig } from "../../shared/constants/config.js"
|
||||
import { IpuaroError } from "../../shared/errors/IpuaroError.js"
|
||||
import { estimateTokens } from "../../shared/utils/tokens.js"
|
||||
import { parseToolCalls } from "./ResponseParser.js"
|
||||
import { getOllamaNativeTools } from "./toolDefs.js"
|
||||
|
||||
/**
|
||||
* Ollama LLM client implementation.
|
||||
* Wraps the Ollama SDK for chat completions with tool support.
|
||||
* Supports both XML-based and native Ollama tool calling.
|
||||
*/
|
||||
export class OllamaClient implements ILLMClient {
|
||||
private readonly client: Ollama
|
||||
@@ -17,6 +20,7 @@ export class OllamaClient implements ILLMClient {
|
||||
private readonly contextWindow: number
|
||||
private readonly temperature: number
|
||||
private readonly timeout: number
|
||||
private readonly useNativeTools: boolean
|
||||
private abortController: AbortController | null = null
|
||||
|
||||
constructor(config: LLMConfig) {
|
||||
@@ -26,11 +30,12 @@ export class OllamaClient implements ILLMClient {
|
||||
this.contextWindow = config.contextWindow
|
||||
this.temperature = config.temperature
|
||||
this.timeout = config.timeout
|
||||
this.useNativeTools = config.useNativeTools ?? false
|
||||
}
|
||||
|
||||
/**
|
||||
* Send messages to LLM and get response.
|
||||
* Tool definitions should be included in the system prompt as XML format.
|
||||
* Supports both XML-based tool calling and native Ollama tools.
|
||||
*/
|
||||
async chat(messages: ChatMessage[]): Promise<LLMResponse> {
|
||||
const startTime = Date.now()
|
||||
@@ -39,26 +44,11 @@ export class OllamaClient implements ILLMClient {
|
||||
try {
|
||||
const ollamaMessages = this.convertMessages(messages)
|
||||
|
||||
const response = await this.client.chat({
|
||||
model: this.model,
|
||||
messages: ollamaMessages,
|
||||
options: {
|
||||
temperature: this.temperature,
|
||||
},
|
||||
stream: false,
|
||||
})
|
||||
|
||||
const timeMs = Date.now() - startTime
|
||||
const parsed = parseToolCalls(response.message.content)
|
||||
|
||||
return {
|
||||
content: parsed.content,
|
||||
toolCalls: parsed.toolCalls,
|
||||
tokens: response.eval_count ?? estimateTokens(response.message.content),
|
||||
timeMs,
|
||||
truncated: false,
|
||||
stopReason: this.determineStopReason(response, parsed.toolCalls),
|
||||
if (this.useNativeTools) {
|
||||
return await this.chatWithNativeTools(ollamaMessages, startTime)
|
||||
}
|
||||
|
||||
return await this.chatWithXMLTools(ollamaMessages, startTime)
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === "AbortError") {
|
||||
throw IpuaroError.llm("Request was aborted")
|
||||
@@ -69,6 +59,131 @@ export class OllamaClient implements ILLMClient {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Chat using XML-based tool calling (legacy mode).
|
||||
*/
|
||||
private async chatWithXMLTools(
|
||||
ollamaMessages: Message[],
|
||||
startTime: number,
|
||||
): Promise<LLMResponse> {
|
||||
const response = await this.client.chat({
|
||||
model: this.model,
|
||||
messages: ollamaMessages,
|
||||
options: {
|
||||
temperature: this.temperature,
|
||||
},
|
||||
stream: false,
|
||||
})
|
||||
|
||||
const timeMs = Date.now() - startTime
|
||||
const parsed = parseToolCalls(response.message.content)
|
||||
|
||||
return {
|
||||
content: parsed.content,
|
||||
toolCalls: parsed.toolCalls,
|
||||
tokens: response.eval_count ?? estimateTokens(response.message.content),
|
||||
timeMs,
|
||||
truncated: false,
|
||||
stopReason: this.determineStopReason(response, parsed.toolCalls),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Chat using native Ollama tool calling.
|
||||
*/
|
||||
private async chatWithNativeTools(
|
||||
ollamaMessages: Message[],
|
||||
startTime: number,
|
||||
): Promise<LLMResponse> {
|
||||
const nativeTools = getOllamaNativeTools() as Tool[]
|
||||
|
||||
const response = await this.client.chat({
|
||||
model: this.model,
|
||||
messages: ollamaMessages,
|
||||
tools: nativeTools,
|
||||
options: {
|
||||
temperature: this.temperature,
|
||||
},
|
||||
stream: false,
|
||||
})
|
||||
|
||||
const timeMs = Date.now() - startTime
|
||||
let toolCalls = this.parseNativeToolCalls(response.message.tool_calls)
|
||||
|
||||
// Fallback: some models return tool calls as JSON in content
|
||||
if (toolCalls.length === 0 && response.message.content) {
|
||||
toolCalls = this.parseToolCallsFromContent(response.message.content)
|
||||
}
|
||||
|
||||
const content = toolCalls.length > 0 ? "" : response.message.content || ""
|
||||
|
||||
return {
|
||||
content,
|
||||
toolCalls,
|
||||
tokens: response.eval_count ?? estimateTokens(response.message.content || ""),
|
||||
timeMs,
|
||||
truncated: false,
|
||||
stopReason: toolCalls.length > 0 ? "tool_use" : "end",
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse native Ollama tool calls into ToolCall format.
|
||||
*/
|
||||
private parseNativeToolCalls(
|
||||
nativeToolCalls?: { function: { name: string; arguments: Record<string, unknown> } }[],
|
||||
): ToolCall[] {
|
||||
if (!nativeToolCalls || nativeToolCalls.length === 0) {
|
||||
return []
|
||||
}
|
||||
|
||||
return nativeToolCalls.map((tc, index) =>
|
||||
createToolCall(
|
||||
`native_${String(Date.now())}_${String(index)}`,
|
||||
tc.function.name,
|
||||
tc.function.arguments,
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse tool calls from content (fallback for models that return JSON in content).
|
||||
* Supports format: {"name": "tool_name", "arguments": {...}}
|
||||
*/
|
||||
private parseToolCallsFromContent(content: string): ToolCall[] {
|
||||
const toolCalls: ToolCall[] = []
|
||||
|
||||
// Try to parse JSON objects from content
|
||||
const jsonRegex = /\{[\s\S]*?"name"[\s\S]*?"arguments"[\s\S]*?\}/g
|
||||
const matches = content.match(jsonRegex)
|
||||
|
||||
if (!matches) {
|
||||
return toolCalls
|
||||
}
|
||||
|
||||
for (const match of matches) {
|
||||
try {
|
||||
const parsed = JSON.parse(match) as {
|
||||
name?: string
|
||||
arguments?: Record<string, unknown>
|
||||
}
|
||||
if (parsed.name && typeof parsed.name === "string") {
|
||||
toolCalls.push(
|
||||
createToolCall(
|
||||
`json_${String(Date.now())}_${String(toolCalls.length)}`,
|
||||
parsed.name,
|
||||
parsed.arguments ?? {},
|
||||
),
|
||||
)
|
||||
}
|
||||
} catch {
|
||||
// Invalid JSON, skip
|
||||
}
|
||||
}
|
||||
|
||||
return toolCalls
|
||||
}
|
||||
|
||||
/**
|
||||
* Count tokens in text.
|
||||
* Uses estimation since Ollama doesn't provide a tokenizer endpoint.
|
||||
|
||||
@@ -58,9 +58,50 @@ const VALID_TOOL_NAMES = new Set([
|
||||
"run_tests",
|
||||
])
|
||||
|
||||
/**
|
||||
* Tool name aliases for common LLM typos/variations.
|
||||
* Maps incorrect names to correct tool names.
|
||||
*/
|
||||
const TOOL_ALIASES: Record<string, string> = {
|
||||
// get_lines aliases
|
||||
get_functions: "get_lines",
|
||||
read_file: "get_lines",
|
||||
read_lines: "get_lines",
|
||||
get_file: "get_lines",
|
||||
read: "get_lines",
|
||||
// get_function aliases
|
||||
getfunction: "get_function",
|
||||
// get_structure aliases
|
||||
list_files: "get_structure",
|
||||
get_files: "get_structure",
|
||||
list_structure: "get_structure",
|
||||
get_project_structure: "get_structure",
|
||||
// get_todos aliases
|
||||
find_todos: "get_todos",
|
||||
list_todos: "get_todos",
|
||||
// find_references aliases
|
||||
get_references: "find_references",
|
||||
// find_definition aliases
|
||||
get_definition: "find_definition",
|
||||
// edit_lines aliases
|
||||
edit_file: "edit_lines",
|
||||
modify_file: "edit_lines",
|
||||
update_file: "edit_lines",
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize tool name using aliases.
|
||||
*/
|
||||
function normalizeToolName(name: string): string {
|
||||
const lowerName = name.toLowerCase()
|
||||
return TOOL_ALIASES[lowerName] ?? name
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse tool calls from LLM response text.
|
||||
* Supports XML format: <tool_call name="get_lines"><path>src/index.ts</path></tool_call>
|
||||
* Supports both XML and JSON formats:
|
||||
* - XML: <tool_call name="get_lines"><path>src/index.ts</path></tool_call>
|
||||
* - JSON: {"name": "get_lines", "arguments": {"path": "src/index.ts"}}
|
||||
* Validates tool names and provides helpful error messages.
|
||||
*/
|
||||
export function parseToolCalls(response: string): ParsedResponse {
|
||||
@@ -68,14 +109,18 @@ export function parseToolCalls(response: string): ParsedResponse {
|
||||
const parseErrors: string[] = []
|
||||
let content = response
|
||||
|
||||
const matches = [...response.matchAll(TOOL_CALL_REGEX)]
|
||||
// First, try XML format
|
||||
const xmlMatches = [...response.matchAll(TOOL_CALL_REGEX)]
|
||||
|
||||
for (const match of matches) {
|
||||
const [fullMatch, toolName, paramsXml] = match
|
||||
for (const match of xmlMatches) {
|
||||
const [fullMatch, rawToolName, paramsXml] = match
|
||||
|
||||
// Normalize tool name (handle common LLM typos/variations)
|
||||
const toolName = normalizeToolName(rawToolName)
|
||||
|
||||
if (!VALID_TOOL_NAMES.has(toolName)) {
|
||||
parseErrors.push(
|
||||
`Unknown tool "${toolName}". Valid tools: ${[...VALID_TOOL_NAMES].join(", ")}`,
|
||||
`Unknown tool "${rawToolName}". Valid tools: ${[...VALID_TOOL_NAMES].join(", ")}`,
|
||||
)
|
||||
continue
|
||||
}
|
||||
@@ -91,7 +136,19 @@ export function parseToolCalls(response: string): ParsedResponse {
|
||||
content = content.replace(fullMatch, "")
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : String(error)
|
||||
parseErrors.push(`Failed to parse tool call "${toolName}": ${errorMsg}`)
|
||||
parseErrors.push(`Failed to parse tool call "${rawToolName}": ${errorMsg}`)
|
||||
}
|
||||
}
|
||||
|
||||
// If no XML tool calls found, try JSON format as fallback
|
||||
if (toolCalls.length === 0) {
|
||||
const jsonResult = parseJsonToolCalls(response)
|
||||
toolCalls.push(...jsonResult.toolCalls)
|
||||
parseErrors.push(...jsonResult.parseErrors)
|
||||
|
||||
// Remove JSON tool calls from content
|
||||
for (const jsonMatch of jsonResult.matchedStrings) {
|
||||
content = content.replace(jsonMatch, "")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,6 +162,59 @@ export function parseToolCalls(response: string): ParsedResponse {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* JSON tool call format pattern.
|
||||
* Matches: {"name": "tool_name", "arguments": {...}}
|
||||
*/
|
||||
const JSON_TOOL_CALL_REGEX =
|
||||
/\{\s*"name"\s*:\s*"([^"]+)"\s*,\s*"arguments"\s*:\s*(\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\})\s*\}/g
|
||||
|
||||
/**
|
||||
* Parse tool calls from JSON format in response.
|
||||
* This is a fallback for LLMs that prefer JSON over XML.
|
||||
*/
|
||||
function parseJsonToolCalls(response: string): {
|
||||
toolCalls: ToolCall[]
|
||||
parseErrors: string[]
|
||||
matchedStrings: string[]
|
||||
} {
|
||||
const toolCalls: ToolCall[] = []
|
||||
const parseErrors: string[] = []
|
||||
const matchedStrings: string[] = []
|
||||
|
||||
const matches = [...response.matchAll(JSON_TOOL_CALL_REGEX)]
|
||||
|
||||
for (const match of matches) {
|
||||
const [fullMatch, rawToolName, argsJson] = match
|
||||
matchedStrings.push(fullMatch)
|
||||
|
||||
// Normalize tool name
|
||||
const toolName = normalizeToolName(rawToolName)
|
||||
|
||||
if (!VALID_TOOL_NAMES.has(toolName)) {
|
||||
parseErrors.push(
|
||||
`Unknown tool "${rawToolName}". Valid tools: ${[...VALID_TOOL_NAMES].join(", ")}`,
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
try {
|
||||
const args = JSON.parse(argsJson) as Record<string, unknown>
|
||||
const toolCall = createToolCall(
|
||||
`json_${String(Date.now())}_${String(toolCalls.length)}`,
|
||||
toolName,
|
||||
args,
|
||||
)
|
||||
toolCalls.push(toolCall)
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : String(error)
|
||||
parseErrors.push(`Failed to parse JSON tool call "${rawToolName}": ${errorMsg}`)
|
||||
}
|
||||
}
|
||||
|
||||
return { toolCalls, parseErrors, matchedStrings }
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse parameters from XML content.
|
||||
*/
|
||||
|
||||
@@ -25,99 +25,115 @@ export interface BuildContextOptions {
|
||||
/**
|
||||
* System prompt for the ipuaro AI agent.
|
||||
*/
|
||||
export const SYSTEM_PROMPT = `You are ipuaro, a local AI code assistant specialized in helping developers understand and modify their codebase. You operate within a single project directory and have access to powerful tools for reading, searching, analyzing, and editing code.
|
||||
export const SYSTEM_PROMPT = `You are ipuaro, a local AI code assistant with tools for reading, searching, analyzing, and editing code.
|
||||
|
||||
## Core Principles
|
||||
## When to Use Tools
|
||||
|
||||
1. **Lazy Loading**: You don't have the full code in context. Use tools to fetch exactly what you need.
|
||||
2. **Precision**: Always verify file paths and line numbers before making changes.
|
||||
3. **Safety**: Confirm destructive operations. Never execute dangerous commands.
|
||||
4. **Efficiency**: Minimize context usage. Request only necessary code sections.
|
||||
**Use tools** when the user asks about:
|
||||
- Code content (files, functions, classes)
|
||||
- Project structure
|
||||
- TODOs, complexity, dependencies
|
||||
- Git status, diffs, commits
|
||||
- Running commands or tests
|
||||
|
||||
## Tool Calling Format
|
||||
**Do NOT use tools** for:
|
||||
- Greetings ("Hello", "Hi", "Thanks")
|
||||
- General questions not about this codebase
|
||||
- Clarifying questions back to the user
|
||||
|
||||
When you need to use a tool, format your call as XML:
|
||||
## MANDATORY: Tools for Code Questions
|
||||
|
||||
<tool_call name="tool_name">
|
||||
<param_name>value</param_name>
|
||||
<another_param>value</another_param>
|
||||
</tool_call>
|
||||
**CRITICAL:** You have ZERO code in your context. To answer ANY question about code, you MUST first call a tool.
|
||||
|
||||
You can call multiple tools in one response. Always wait for tool results before making conclusions.
|
||||
|
||||
**Examples:**
|
||||
**WRONG:**
|
||||
User: "What's in src/index.ts?"
|
||||
Assistant: "The file likely contains..." ← WRONG! Call a tool!
|
||||
|
||||
**CORRECT:**
|
||||
User: "What's in src/index.ts?"
|
||||
<tool_call name="get_lines">
|
||||
<path>src/index.ts</path>
|
||||
<start>1</start>
|
||||
<end>50</end>
|
||||
<path>src/index.ts</path>
|
||||
</tool_call>
|
||||
|
||||
<tool_call name="edit_lines">
|
||||
<path>src/utils.ts</path>
|
||||
<start>10</start>
|
||||
<end>15</end>
|
||||
<content>const newCode = "hello";</content>
|
||||
## Tool Call Format
|
||||
|
||||
Output this XML format. Do NOT explain before calling - just output the XML:
|
||||
|
||||
<tool_call name="TOOL_NAME">
|
||||
<param1>value1</param1>
|
||||
<param2>value2</param2>
|
||||
</tool_call>
|
||||
|
||||
<tool_call name="find_references">
|
||||
<symbol>getUserById</symbol>
|
||||
## Example Interactions
|
||||
|
||||
**Example 1 - Reading a file:**
|
||||
User: "Show me the main function in src/app.ts"
|
||||
<tool_call name="get_function">
|
||||
<path>src/app.ts</path>
|
||||
<name>main</name>
|
||||
</tool_call>
|
||||
|
||||
**Example 2 - Finding TODOs:**
|
||||
User: "Are there any TODO comments?"
|
||||
<tool_call name="get_todos">
|
||||
</tool_call>
|
||||
|
||||
**Example 3 - Project structure:**
|
||||
User: "What files are in this project?"
|
||||
<tool_call name="get_structure">
|
||||
<path>.</path>
|
||||
</tool_call>
|
||||
|
||||
## Available Tools
|
||||
|
||||
### Reading Tools
|
||||
- \`get_lines(path, start?, end?)\`: Get specific lines from a file
|
||||
- \`get_function(path, name)\`: Get a function by name
|
||||
- \`get_class(path, name)\`: Get a class by name
|
||||
- \`get_structure(path?, depth?)\`: Get project directory structure
|
||||
### Reading
|
||||
- get_lines(path, start?, end?) - Read file lines
|
||||
- get_function(path, name) - Get function by name
|
||||
- get_class(path, name) - Get class by name
|
||||
- get_structure(path?, depth?) - List project files
|
||||
|
||||
### Editing Tools (require confirmation)
|
||||
- \`edit_lines(path, start, end, content)\`: Replace specific lines in a file
|
||||
- \`create_file(path, content)\`: Create a new file
|
||||
- \`delete_file(path)\`: Delete a file
|
||||
### Analysis
|
||||
- get_todos(path?, type?) - Find TODO/FIXME comments
|
||||
- get_dependencies(path) - What this file imports
|
||||
- get_dependents(path) - What imports this file
|
||||
- get_complexity(path?) - Code complexity metrics
|
||||
- find_references(symbol) - Find all usages of a symbol
|
||||
- find_definition(symbol) - Find where symbol is defined
|
||||
|
||||
### Search Tools
|
||||
- \`find_references(symbol, path?)\`: Find all usages of a symbol
|
||||
- \`find_definition(symbol)\`: Find where a symbol is defined
|
||||
### Editing (requires confirmation)
|
||||
- edit_lines(path, start, end, content) - Modify file lines
|
||||
- create_file(path, content) - Create new file
|
||||
- delete_file(path) - Delete a file
|
||||
|
||||
### Analysis Tools
|
||||
- \`get_dependencies(path)\`: Get files this file imports
|
||||
- \`get_dependents(path)\`: Get files that import this file
|
||||
- \`get_complexity(path?, limit?)\`: Get complexity metrics
|
||||
- \`get_todos(path?, type?)\`: Find TODO/FIXME comments
|
||||
### Git
|
||||
- git_status() - Repository status
|
||||
- git_diff(path?, staged?) - Show changes
|
||||
- git_commit(message, files?) - Create commit
|
||||
|
||||
### Git Tools
|
||||
- \`git_status()\`: Get repository status
|
||||
- \`git_diff(path?, staged?)\`: Get uncommitted changes
|
||||
- \`git_commit(message, files?)\`: Create a commit (requires confirmation)
|
||||
### Commands
|
||||
- run_command(command, timeout?) - Execute shell command
|
||||
- run_tests(path?, filter?) - Run test suite
|
||||
|
||||
### Run Tools
|
||||
- \`run_command(command, timeout?)\`: Execute a shell command (security checked)
|
||||
- \`run_tests(path?, filter?, watch?)\`: Run the test suite
|
||||
## Rules
|
||||
|
||||
## Response Guidelines
|
||||
1. **ALWAYS call a tool first** when asked about code - you cannot see any files
|
||||
2. **Output XML directly** - don't say "I will use..." just output the tool call
|
||||
3. **Wait for results** before making conclusions
|
||||
4. **Be concise** in your responses
|
||||
5. **Verify before editing** - always read code before modifying it
|
||||
6. **Stay safe** - never execute destructive commands without user confirmation`
|
||||
|
||||
1. **Be concise**: Don't repeat information already in context.
|
||||
2. **Show your work**: Explain what tools you're using and why.
|
||||
3. **Verify before editing**: Always read the target code before modifying it.
|
||||
4. **Handle errors gracefully**: If a tool fails, explain what went wrong and suggest alternatives.
|
||||
/**
|
||||
* Tool usage reminder - appended to messages to reinforce tool usage.
|
||||
* This is added as the last system message before LLM call.
|
||||
*/
|
||||
export const TOOL_REMINDER = `⚠️ REMINDER: To answer this question, you MUST use a tool first.
|
||||
Output the <tool_call> XML directly. Do NOT describe what you will do - just call the tool.
|
||||
|
||||
## Code Editing Rules
|
||||
|
||||
1. Always use \`get_lines\` or \`get_function\` before \`edit_lines\`.
|
||||
2. Provide exact line numbers for edits.
|
||||
3. For large changes, break into multiple small edits.
|
||||
4. After editing, suggest running tests if available.
|
||||
|
||||
## Safety Rules
|
||||
|
||||
1. Never execute commands that could harm the system.
|
||||
2. Never expose sensitive data (API keys, passwords).
|
||||
3. Always confirm file deletions and destructive git operations.
|
||||
4. Stay within the project directory.
|
||||
|
||||
When you need to perform an action, use the appropriate tool. Think step by step about what information you need and which tools will provide it most efficiently.`
|
||||
Example - if asked about a file, output:
|
||||
<tool_call name="get_lines">
|
||||
<path>the/file/path.ts</path>
|
||||
</tool_call>`
|
||||
|
||||
/**
|
||||
* Build initial context from project structure and AST metadata.
|
||||
|
||||
@@ -509,3 +509,87 @@ export function getToolsByCategory(category: string): ToolDef[] {
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Native Ollama Tools Format
|
||||
* =============================================================================
|
||||
*/
|
||||
|
||||
/**
|
||||
* Ollama native tool definition format.
|
||||
*/
|
||||
export interface OllamaTool {
|
||||
type: "function"
|
||||
function: {
|
||||
name: string
|
||||
description: string
|
||||
parameters: {
|
||||
type: "object"
|
||||
properties: Record<string, OllamaToolProperty>
|
||||
required: string[]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
interface OllamaToolProperty {
|
||||
type: string
|
||||
description: string
|
||||
enum?: string[]
|
||||
items?: { type: string }
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert ToolDef to Ollama native format.
|
||||
*/
|
||||
function convertToOllamaTool(tool: ToolDef): OllamaTool {
|
||||
const properties: Record<string, OllamaToolProperty> = {}
|
||||
const required: string[] = []
|
||||
|
||||
for (const param of tool.parameters) {
|
||||
const prop: OllamaToolProperty = {
|
||||
type: param.type === "array" ? "array" : param.type,
|
||||
description: param.description,
|
||||
}
|
||||
|
||||
if (param.enum) {
|
||||
prop.enum = param.enum
|
||||
}
|
||||
|
||||
if (param.type === "array") {
|
||||
prop.items = { type: "string" }
|
||||
}
|
||||
|
||||
properties[param.name] = prop
|
||||
|
||||
if (param.required) {
|
||||
required.push(param.name)
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
type: "function",
|
||||
function: {
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties,
|
||||
required,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* All tools in Ollama native format.
|
||||
* Used when useNativeTools is enabled.
|
||||
*/
|
||||
export const OLLAMA_NATIVE_TOOLS: OllamaTool[] = ALL_TOOLS.map(convertToOllamaTool)
|
||||
|
||||
/**
|
||||
* Get native tool definitions for Ollama.
|
||||
*/
|
||||
export function getOllamaNativeTools(): OllamaTool[] {
|
||||
return OLLAMA_NATIVE_TOOLS
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ export const LLMConfigSchema = z.object({
|
||||
temperature: z.number().min(0).max(2).default(0.1),
|
||||
host: z.string().default("http://localhost:11434"),
|
||||
timeout: z.number().int().positive().default(120_000),
|
||||
useNativeTools: z.boolean().default(false),
|
||||
})
|
||||
|
||||
/**
|
||||
|
||||
1506
packages/ipuaro/tests/e2e/full-workflow.test.ts
Normal file
1506
packages/ipuaro/tests/e2e/full-workflow.test.ts
Normal file
File diff suppressed because it is too large
Load Diff
351
packages/ipuaro/tests/e2e/test-helpers.ts
Normal file
351
packages/ipuaro/tests/e2e/test-helpers.ts
Normal file
@@ -0,0 +1,351 @@
|
||||
/**
|
||||
* E2E Test Helpers
|
||||
* Provides dependencies for testing the full flow with REAL LLM.
|
||||
*/
|
||||
|
||||
import { vi } from "vitest"
|
||||
import * as fs from "node:fs/promises"
|
||||
import * as path from "node:path"
|
||||
import * as os from "node:os"
|
||||
import type { IStorage, SymbolIndex, DepsGraph } from "../../src/domain/services/IStorage.js"
|
||||
import type { ISessionStorage, SessionListItem } from "../../src/domain/services/ISessionStorage.js"
|
||||
import type { FileData } from "../../src/domain/value-objects/FileData.js"
|
||||
import type { FileAST } from "../../src/domain/value-objects/FileAST.js"
|
||||
import type { FileMeta } from "../../src/domain/value-objects/FileMeta.js"
|
||||
import type { UndoEntry } from "../../src/domain/value-objects/UndoEntry.js"
|
||||
import { Session } from "../../src/domain/entities/Session.js"
|
||||
import { ToolRegistry } from "../../src/infrastructure/tools/registry.js"
|
||||
import { OllamaClient } from "../../src/infrastructure/llm/OllamaClient.js"
|
||||
import { registerAllTools } from "../../src/cli/commands/tools-setup.js"
|
||||
import type { LLMConfig } from "../../src/shared/constants/config.js"
|
||||
|
||||
/**
|
||||
* Default LLM config for tests.
|
||||
*/
|
||||
export const DEFAULT_TEST_LLM_CONFIG: LLMConfig = {
|
||||
model: "qwen2.5-coder:14b-instruct-q4_K_M",
|
||||
contextWindow: 128_000,
|
||||
temperature: 0.1,
|
||||
host: "http://localhost:11434",
|
||||
timeout: 180_000,
|
||||
useNativeTools: true,
|
||||
}
|
||||
|
||||
/**
|
||||
* In-memory storage implementation for testing.
|
||||
* Stores all data in Maps, no Redis required.
|
||||
*/
|
||||
export function createInMemoryStorage(): IStorage {
|
||||
const files = new Map<string, FileData>()
|
||||
const asts = new Map<string, FileAST>()
|
||||
const metas = new Map<string, FileMeta>()
|
||||
let symbolIndex: SymbolIndex = new Map()
|
||||
let depsGraph: DepsGraph = { imports: new Map(), importedBy: new Map() }
|
||||
const projectConfig = new Map<string, unknown>()
|
||||
let connected = false
|
||||
|
||||
return {
|
||||
getFile: vi.fn(async (filePath: string) => files.get(filePath) ?? null),
|
||||
setFile: vi.fn(async (filePath: string, data: FileData) => {
|
||||
files.set(filePath, data)
|
||||
}),
|
||||
deleteFile: vi.fn(async (filePath: string) => {
|
||||
files.delete(filePath)
|
||||
}),
|
||||
getAllFiles: vi.fn(async () => new Map(files)),
|
||||
getFileCount: vi.fn(async () => files.size),
|
||||
|
||||
getAST: vi.fn(async (filePath: string) => asts.get(filePath) ?? null),
|
||||
setAST: vi.fn(async (filePath: string, ast: FileAST) => {
|
||||
asts.set(filePath, ast)
|
||||
}),
|
||||
deleteAST: vi.fn(async (filePath: string) => {
|
||||
asts.delete(filePath)
|
||||
}),
|
||||
getAllASTs: vi.fn(async () => new Map(asts)),
|
||||
|
||||
getMeta: vi.fn(async (filePath: string) => metas.get(filePath) ?? null),
|
||||
setMeta: vi.fn(async (filePath: string, meta: FileMeta) => {
|
||||
metas.set(filePath, meta)
|
||||
}),
|
||||
deleteMeta: vi.fn(async (filePath: string) => {
|
||||
metas.delete(filePath)
|
||||
}),
|
||||
getAllMetas: vi.fn(async () => new Map(metas)),
|
||||
|
||||
getSymbolIndex: vi.fn(async () => symbolIndex),
|
||||
setSymbolIndex: vi.fn(async (index: SymbolIndex) => {
|
||||
symbolIndex = index
|
||||
}),
|
||||
getDepsGraph: vi.fn(async () => depsGraph),
|
||||
setDepsGraph: vi.fn(async (graph: DepsGraph) => {
|
||||
depsGraph = graph
|
||||
}),
|
||||
|
||||
getProjectConfig: vi.fn(async (key: string) => projectConfig.get(key) ?? null),
|
||||
setProjectConfig: vi.fn(async (key: string, value: unknown) => {
|
||||
projectConfig.set(key, value)
|
||||
}),
|
||||
|
||||
connect: vi.fn(async () => {
|
||||
connected = true
|
||||
}),
|
||||
disconnect: vi.fn(async () => {
|
||||
connected = false
|
||||
}),
|
||||
isConnected: vi.fn(() => connected),
|
||||
clear: vi.fn(async () => {
|
||||
files.clear()
|
||||
asts.clear()
|
||||
metas.clear()
|
||||
symbolIndex = new Map()
|
||||
depsGraph = { imports: new Map(), importedBy: new Map() }
|
||||
projectConfig.clear()
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* In-memory session storage for testing.
|
||||
*/
|
||||
export function createInMemorySessionStorage(): ISessionStorage {
|
||||
const sessions = new Map<string, Session>()
|
||||
const undoStacks = new Map<string, UndoEntry[]>()
|
||||
|
||||
return {
|
||||
saveSession: vi.fn(async (session: Session) => {
|
||||
sessions.set(session.id, session)
|
||||
}),
|
||||
loadSession: vi.fn(async (sessionId: string) => sessions.get(sessionId) ?? null),
|
||||
deleteSession: vi.fn(async (sessionId: string) => {
|
||||
sessions.delete(sessionId)
|
||||
undoStacks.delete(sessionId)
|
||||
}),
|
||||
listSessions: vi.fn(async (projectName?: string): Promise<SessionListItem[]> => {
|
||||
const items: SessionListItem[] = []
|
||||
for (const session of sessions.values()) {
|
||||
if (!projectName || session.projectName === projectName) {
|
||||
items.push({
|
||||
id: session.id,
|
||||
projectName: session.projectName,
|
||||
createdAt: session.createdAt,
|
||||
lastActivityAt: session.lastActivityAt,
|
||||
messageCount: session.history.length,
|
||||
})
|
||||
}
|
||||
}
|
||||
return items
|
||||
}),
|
||||
getLatestSession: vi.fn(async (projectName: string) => {
|
||||
let latest: Session | null = null
|
||||
for (const session of sessions.values()) {
|
||||
if (session.projectName === projectName) {
|
||||
if (!latest || session.lastActivityAt > latest.lastActivityAt) {
|
||||
latest = session
|
||||
}
|
||||
}
|
||||
}
|
||||
return latest
|
||||
}),
|
||||
sessionExists: vi.fn(async (sessionId: string) => sessions.has(sessionId)),
|
||||
pushUndoEntry: vi.fn(async (sessionId: string, entry: UndoEntry) => {
|
||||
const stack = undoStacks.get(sessionId) ?? []
|
||||
stack.push(entry)
|
||||
undoStacks.set(sessionId, stack)
|
||||
}),
|
||||
popUndoEntry: vi.fn(async (sessionId: string) => {
|
||||
const stack = undoStacks.get(sessionId) ?? []
|
||||
return stack.pop() ?? null
|
||||
}),
|
||||
getUndoStack: vi.fn(async (sessionId: string) => undoStacks.get(sessionId) ?? []),
|
||||
touchSession: vi.fn(async (sessionId: string) => {
|
||||
const session = sessions.get(sessionId)
|
||||
if (session) {
|
||||
session.lastActivityAt = Date.now()
|
||||
}
|
||||
}),
|
||||
clearAllSessions: vi.fn(async () => {
|
||||
sessions.clear()
|
||||
undoStacks.clear()
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create REAL Ollama client for E2E tests.
|
||||
*/
|
||||
export function createRealOllamaClient(config?: Partial<LLMConfig>): OllamaClient {
|
||||
return new OllamaClient({
|
||||
...DEFAULT_TEST_LLM_CONFIG,
|
||||
...config,
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a tool registry with all 18 tools registered.
|
||||
*/
|
||||
export function createRealToolRegistry(): ToolRegistry {
|
||||
const registry = new ToolRegistry()
|
||||
registerAllTools(registry)
|
||||
return registry
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new test session.
|
||||
*/
|
||||
export function createTestSession(projectName = "test-project"): Session {
|
||||
return new Session(`test-${Date.now()}`, projectName)
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a temporary test project directory with sample files.
|
||||
*/
|
||||
export async function createTestProject(): Promise<string> {
|
||||
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "ipuaro-e2e-"))
|
||||
|
||||
await fs.mkdir(path.join(tempDir, "src"), { recursive: true })
|
||||
|
||||
await fs.writeFile(
|
||||
path.join(tempDir, "src", "index.ts"),
|
||||
`/**
|
||||
* Main entry point
|
||||
*/
|
||||
export function main(): void {
|
||||
console.log("Hello, world!")
|
||||
}
|
||||
|
||||
export function add(a: number, b: number): number {
|
||||
return a + b
|
||||
}
|
||||
|
||||
export function multiply(a: number, b: number): number {
|
||||
return a * b
|
||||
}
|
||||
|
||||
// TODO: Add more math functions
|
||||
main()
|
||||
`,
|
||||
)
|
||||
|
||||
await fs.writeFile(
|
||||
path.join(tempDir, "src", "utils.ts"),
|
||||
`/**
|
||||
* Utility functions
|
||||
*/
|
||||
import { add } from "./index.js"
|
||||
|
||||
export function sum(numbers: number[]): number {
|
||||
return numbers.reduce((acc, n) => add(acc, n), 0)
|
||||
}
|
||||
|
||||
export class Calculator {
|
||||
private result: number = 0
|
||||
|
||||
add(n: number): this {
|
||||
this.result += n
|
||||
return this
|
||||
}
|
||||
|
||||
subtract(n: number): this {
|
||||
this.result -= n
|
||||
return this
|
||||
}
|
||||
|
||||
getResult(): number {
|
||||
return this.result
|
||||
}
|
||||
|
||||
reset(): void {
|
||||
this.result = 0
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: Handle edge cases for negative numbers
|
||||
`,
|
||||
)
|
||||
|
||||
await fs.writeFile(
|
||||
path.join(tempDir, "package.json"),
|
||||
JSON.stringify(
|
||||
{
|
||||
name: "test-project",
|
||||
version: "1.0.0",
|
||||
type: "module",
|
||||
scripts: {
|
||||
test: "echo 'Tests passed!'",
|
||||
},
|
||||
},
|
||||
null,
|
||||
4,
|
||||
),
|
||||
)
|
||||
|
||||
await fs.writeFile(
|
||||
path.join(tempDir, "README.md"),
|
||||
`# Test Project
|
||||
|
||||
A sample project for E2E testing.
|
||||
|
||||
## Features
|
||||
- Basic math functions
|
||||
- Calculator class
|
||||
`,
|
||||
)
|
||||
|
||||
return tempDir
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up test project directory.
|
||||
*/
|
||||
export async function cleanupTestProject(projectDir: string): Promise<void> {
|
||||
await fs.rm(projectDir, { recursive: true, force: true })
|
||||
}
|
||||
|
||||
/**
|
||||
* All test dependencies bundled together.
|
||||
*/
|
||||
export interface E2ETestDependencies {
|
||||
storage: IStorage
|
||||
sessionStorage: ISessionStorage
|
||||
llm: OllamaClient
|
||||
tools: ToolRegistry
|
||||
session: Session
|
||||
projectRoot: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Create all dependencies for E2E testing with REAL Ollama.
|
||||
*/
|
||||
export async function createE2ETestDependencies(
|
||||
llmConfig?: Partial<LLMConfig>,
|
||||
): Promise<E2ETestDependencies> {
|
||||
const projectRoot = await createTestProject()
|
||||
|
||||
return {
|
||||
storage: createInMemoryStorage(),
|
||||
sessionStorage: createInMemorySessionStorage(),
|
||||
llm: createRealOllamaClient(llmConfig),
|
||||
tools: createRealToolRegistry(),
|
||||
session: createTestSession(),
|
||||
projectRoot,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if Ollama is available.
|
||||
*/
|
||||
export async function isOllamaAvailable(): Promise<boolean> {
|
||||
const client = createRealOllamaClient()
|
||||
return client.isAvailable()
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if required model is available.
|
||||
*/
|
||||
export async function isModelAvailable(
|
||||
model = "qwen2.5-coder:14b-instruct-q4_K_M",
|
||||
): Promise<boolean> {
|
||||
const client = createRealOllamaClient()
|
||||
return client.hasModel(model)
|
||||
}
|
||||
@@ -135,6 +135,108 @@ describe("ResponseParser", () => {
|
||||
expect(result.parseErrors[0]).toContain("unknown_tool")
|
||||
})
|
||||
|
||||
it("should normalize tool name aliases", () => {
|
||||
// get_functions -> get_lines (common LLM typo)
|
||||
const response1 = `<tool_call name="get_functions"><path>src/index.ts</path></tool_call>`
|
||||
const result1 = parseToolCalls(response1)
|
||||
expect(result1.toolCalls).toHaveLength(1)
|
||||
expect(result1.toolCalls[0].name).toBe("get_lines")
|
||||
expect(result1.hasParseErrors).toBe(false)
|
||||
|
||||
// read_file -> get_lines
|
||||
const response2 = `<tool_call name="read_file"><path>test.ts</path></tool_call>`
|
||||
const result2 = parseToolCalls(response2)
|
||||
expect(result2.toolCalls).toHaveLength(1)
|
||||
expect(result2.toolCalls[0].name).toBe("get_lines")
|
||||
|
||||
// find_todos -> get_todos
|
||||
const response3 = `<tool_call name="find_todos"></tool_call>`
|
||||
const result3 = parseToolCalls(response3)
|
||||
expect(result3.toolCalls).toHaveLength(1)
|
||||
expect(result3.toolCalls[0].name).toBe("get_todos")
|
||||
|
||||
// list_files -> get_structure
|
||||
const response4 = `<tool_call name="list_files"><path>.</path></tool_call>`
|
||||
const result4 = parseToolCalls(response4)
|
||||
expect(result4.toolCalls).toHaveLength(1)
|
||||
expect(result4.toolCalls[0].name).toBe("get_structure")
|
||||
})
|
||||
|
||||
// JSON format tests
|
||||
it("should parse JSON format tool calls as fallback", () => {
|
||||
const response = `{"name": "get_lines", "arguments": {"path": "src/index.ts"}}`
|
||||
const result = parseToolCalls(response)
|
||||
|
||||
expect(result.toolCalls).toHaveLength(1)
|
||||
expect(result.toolCalls[0].name).toBe("get_lines")
|
||||
expect(result.toolCalls[0].params).toEqual({ path: "src/index.ts" })
|
||||
expect(result.hasParseErrors).toBe(false)
|
||||
})
|
||||
|
||||
it("should parse JSON format with numeric arguments", () => {
|
||||
const response = `{"name": "get_lines", "arguments": {"path": "src/index.ts", "start": 1, "end": 50}}`
|
||||
const result = parseToolCalls(response)
|
||||
|
||||
expect(result.toolCalls).toHaveLength(1)
|
||||
expect(result.toolCalls[0].params).toEqual({
|
||||
path: "src/index.ts",
|
||||
start: 1,
|
||||
end: 50,
|
||||
})
|
||||
})
|
||||
|
||||
it("should parse JSON format with surrounding text", () => {
|
||||
const response = `I'll read the file for you:
|
||||
{"name": "get_lines", "arguments": {"path": "src/index.ts"}}
|
||||
Let me know if you need more.`
|
||||
|
||||
const result = parseToolCalls(response)
|
||||
|
||||
expect(result.toolCalls).toHaveLength(1)
|
||||
expect(result.toolCalls[0].name).toBe("get_lines")
|
||||
expect(result.content).toContain("I'll read the file for you:")
|
||||
expect(result.content).toContain("Let me know if you need more.")
|
||||
})
|
||||
|
||||
it("should normalize tool name aliases in JSON format", () => {
|
||||
// read_file -> get_lines
|
||||
const response = `{"name": "read_file", "arguments": {"path": "test.ts"}}`
|
||||
const result = parseToolCalls(response)
|
||||
|
||||
expect(result.toolCalls).toHaveLength(1)
|
||||
expect(result.toolCalls[0].name).toBe("get_lines")
|
||||
})
|
||||
|
||||
it("should reject unknown tool names in JSON format", () => {
|
||||
const response = `{"name": "unknown_tool", "arguments": {"path": "test.ts"}}`
|
||||
const result = parseToolCalls(response)
|
||||
|
||||
expect(result.toolCalls).toHaveLength(0)
|
||||
expect(result.hasParseErrors).toBe(true)
|
||||
expect(result.parseErrors[0]).toContain("unknown_tool")
|
||||
})
|
||||
|
||||
it("should prefer XML over JSON when both present", () => {
|
||||
const response = `<tool_call name="get_lines"><path>xml.ts</path></tool_call>
|
||||
{"name": "get_function", "arguments": {"path": "json.ts", "name": "foo"}}`
|
||||
|
||||
const result = parseToolCalls(response)
|
||||
|
||||
// Should only parse XML since it was found first
|
||||
expect(result.toolCalls).toHaveLength(1)
|
||||
expect(result.toolCalls[0].name).toBe("get_lines")
|
||||
expect(result.toolCalls[0].params.path).toBe("xml.ts")
|
||||
})
|
||||
|
||||
it("should parse JSON with empty arguments", () => {
|
||||
const response = `{"name": "git_status", "arguments": {}}`
|
||||
const result = parseToolCalls(response)
|
||||
|
||||
expect(result.toolCalls).toHaveLength(1)
|
||||
expect(result.toolCalls[0].name).toBe("git_status")
|
||||
expect(result.toolCalls[0].params).toEqual({})
|
||||
})
|
||||
|
||||
it("should support CDATA for multiline content", () => {
|
||||
const response = `<tool_call name="edit_lines">
|
||||
<path>src/index.ts</path>
|
||||
|
||||
@@ -19,10 +19,16 @@ describe("prompts", () => {
|
||||
expect(SYSTEM_PROMPT.length).toBeGreaterThan(100)
|
||||
})
|
||||
|
||||
it("should contain core principles", () => {
|
||||
expect(SYSTEM_PROMPT).toContain("Lazy Loading")
|
||||
expect(SYSTEM_PROMPT).toContain("Precision")
|
||||
expect(SYSTEM_PROMPT).toContain("Safety")
|
||||
it("should contain mandatory tool usage instructions", () => {
|
||||
expect(SYSTEM_PROMPT).toContain("MANDATORY")
|
||||
expect(SYSTEM_PROMPT).toContain("Tools for Code Questions")
|
||||
expect(SYSTEM_PROMPT).toContain("ZERO code in your context")
|
||||
})
|
||||
|
||||
it("should contain when to use and when not to use tools", () => {
|
||||
expect(SYSTEM_PROMPT).toContain("When to Use Tools")
|
||||
expect(SYSTEM_PROMPT).toContain("Do NOT use tools")
|
||||
expect(SYSTEM_PROMPT).toContain("Greetings")
|
||||
})
|
||||
|
||||
it("should list available tools", () => {
|
||||
@@ -34,8 +40,9 @@ describe("prompts", () => {
|
||||
})
|
||||
|
||||
it("should include safety rules", () => {
|
||||
expect(SYSTEM_PROMPT).toContain("Safety Rules")
|
||||
expect(SYSTEM_PROMPT).toContain("Never execute commands that could harm")
|
||||
expect(SYSTEM_PROMPT).toContain("Stay safe")
|
||||
expect(SYSTEM_PROMPT).toContain("destructive commands")
|
||||
expect(SYSTEM_PROMPT).toContain("Verify before editing")
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user