Compare commits

..

3 Commits

Author SHA1 Message Date
imfozilbek
3e7762ec4e feat(ipuaro): add JSON tool call parsing and improve prompts
- Add JSON fallback parsing in ResponseParser for LLMs that prefer JSON
- Add tool name aliases (get_functions -> get_lines, etc.)
- Improve system prompt with clear tool usage guidelines
- Add native Ollama tools support in OllamaClient
- Add E2E tests for full workflow with real Ollama
2025-12-05 20:51:18 +05:00
imfozilbek
c82006bbda chore(ipuaro): release v0.30.1 2025-12-05 16:16:58 +05:00
imfozilbek
2e84472e49 feat(ipuaro): display transitive counts in High Impact Files table
- Change table header to include Direct and Transitive columns
- Sort by transitive count first, then by impact score
- Update tests for new table format
2025-12-05 16:16:22 +05:00
12 changed files with 2494 additions and 138 deletions

View File

@@ -5,6 +5,52 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [0.30.2] - 2025-12-05 - JSON Tool Call Parsing & Improved Prompts
### Added
- **JSON Tool Call Fallback in ResponseParser**
- LLM responses with JSON format `{"name": "tool", "arguments": {...}}` are now parsed
- Fallback to JSON when XML format not found
- Works with models like qwen2.5-coder that prefer JSON over XML
- **Tool Name Aliases**
- `get_functions`, `read_file`, `read_lines``get_lines`
- `list_files`, `get_files``get_structure`
- `find_todos``get_todos`
- And more common LLM typos/variations
### Changed
- **Improved System Prompt**
- Added clear "When to Use Tools" / "Do NOT use tools" sections
- More concise and directive instructions
- Better examples for tool usage
### Technical Details
- Total tests: 1848 passed (+8 new tests for JSON parsing)
- 0 ESLint errors, 3 warnings (pre-existing complexity)
---
## [0.30.1] - 2025-12-05 - Display Transitive Counts in Context
### Changed
- **High Impact Files table now includes transitive counts**
- Table header changed from `| File | Impact | Dependents |` to `| File | Impact | Direct | Transitive |`
- Shows both direct dependent count and transitive dependent count
- Sorting changed: now sorts by transitive count first, then by impact score
- Example: `| utils/validation | 67% | 12 | 24 |`
### Technical Details
- Total tests: 1839 passed
- 0 ESLint errors, 3 warnings (pre-existing complexity)
---
## [0.30.0] - 2025-12-05 - Transitive Dependencies Count
### Added

View File

@@ -1,6 +1,6 @@
{
"name": "@samiyev/ipuaro",
"version": "0.30.0",
"version": "0.30.1",
"description": "Local AI agent for codebase operations with infinite context feeling",
"author": "Fozilbek Samiyev <fozilbek.samiyev@gmail.com>",
"license": "MIT",

View File

@@ -18,6 +18,7 @@ import {
buildInitialContext,
type ProjectStructure,
SYSTEM_PROMPT,
TOOL_REMINDER,
} from "../../infrastructure/llm/prompts.js"
import { parseToolCalls } from "../../infrastructure/llm/ResponseParser.js"
import type { IToolRegistry } from "../interfaces/IToolRegistry.js"
@@ -277,6 +278,12 @@ export class HandleMessage {
messages.push(...session.history)
// Add tool reminder if last message is from user (first LLM call for this query)
const lastMessage = session.history[session.history.length - 1]
if (lastMessage?.role === "user") {
messages.push(createSystemMessage(TOOL_REMINDER))
}
return messages
}

View File

@@ -1,14 +1,17 @@
import { type Message, Ollama } from "ollama"
import { type Message, Ollama, type Tool } from "ollama"
import type { ILLMClient, LLMResponse } from "../../domain/services/ILLMClient.js"
import type { ChatMessage } from "../../domain/value-objects/ChatMessage.js"
import { createToolCall, type ToolCall } from "../../domain/value-objects/ToolCall.js"
import type { LLMConfig } from "../../shared/constants/config.js"
import { IpuaroError } from "../../shared/errors/IpuaroError.js"
import { estimateTokens } from "../../shared/utils/tokens.js"
import { parseToolCalls } from "./ResponseParser.js"
import { getOllamaNativeTools } from "./toolDefs.js"
/**
* Ollama LLM client implementation.
* Wraps the Ollama SDK for chat completions with tool support.
* Supports both XML-based and native Ollama tool calling.
*/
export class OllamaClient implements ILLMClient {
private readonly client: Ollama
@@ -17,6 +20,7 @@ export class OllamaClient implements ILLMClient {
private readonly contextWindow: number
private readonly temperature: number
private readonly timeout: number
private readonly useNativeTools: boolean
private abortController: AbortController | null = null
constructor(config: LLMConfig) {
@@ -26,11 +30,12 @@ export class OllamaClient implements ILLMClient {
this.contextWindow = config.contextWindow
this.temperature = config.temperature
this.timeout = config.timeout
this.useNativeTools = config.useNativeTools ?? false
}
/**
* Send messages to LLM and get response.
* Tool definitions should be included in the system prompt as XML format.
* Supports both XML-based tool calling and native Ollama tools.
*/
async chat(messages: ChatMessage[]): Promise<LLMResponse> {
const startTime = Date.now()
@@ -39,6 +44,28 @@ export class OllamaClient implements ILLMClient {
try {
const ollamaMessages = this.convertMessages(messages)
if (this.useNativeTools) {
return await this.chatWithNativeTools(ollamaMessages, startTime)
}
return await this.chatWithXMLTools(ollamaMessages, startTime)
} catch (error) {
if (error instanceof Error && error.name === "AbortError") {
throw IpuaroError.llm("Request was aborted")
}
throw this.handleError(error)
} finally {
this.abortController = null
}
}
/**
* Chat using XML-based tool calling (legacy mode).
*/
private async chatWithXMLTools(
ollamaMessages: Message[],
startTime: number,
): Promise<LLMResponse> {
const response = await this.client.chat({
model: this.model,
messages: ollamaMessages,
@@ -59,14 +86,102 @@ export class OllamaClient implements ILLMClient {
truncated: false,
stopReason: this.determineStopReason(response, parsed.toolCalls),
}
} catch (error) {
if (error instanceof Error && error.name === "AbortError") {
throw IpuaroError.llm("Request was aborted")
}
throw this.handleError(error)
} finally {
this.abortController = null
/**
* Chat using native Ollama tool calling.
*/
private async chatWithNativeTools(
ollamaMessages: Message[],
startTime: number,
): Promise<LLMResponse> {
const nativeTools = getOllamaNativeTools() as Tool[]
const response = await this.client.chat({
model: this.model,
messages: ollamaMessages,
tools: nativeTools,
options: {
temperature: this.temperature,
},
stream: false,
})
const timeMs = Date.now() - startTime
let toolCalls = this.parseNativeToolCalls(response.message.tool_calls)
// Fallback: some models return tool calls as JSON in content
if (toolCalls.length === 0 && response.message.content) {
toolCalls = this.parseToolCallsFromContent(response.message.content)
}
const content = toolCalls.length > 0 ? "" : response.message.content || ""
return {
content,
toolCalls,
tokens: response.eval_count ?? estimateTokens(response.message.content || ""),
timeMs,
truncated: false,
stopReason: toolCalls.length > 0 ? "tool_use" : "end",
}
}
/**
* Parse native Ollama tool calls into ToolCall format.
*/
private parseNativeToolCalls(
nativeToolCalls?: { function: { name: string; arguments: Record<string, unknown> } }[],
): ToolCall[] {
if (!nativeToolCalls || nativeToolCalls.length === 0) {
return []
}
return nativeToolCalls.map((tc, index) =>
createToolCall(
`native_${String(Date.now())}_${String(index)}`,
tc.function.name,
tc.function.arguments,
),
)
}
/**
* Parse tool calls from content (fallback for models that return JSON in content).
* Supports format: {"name": "tool_name", "arguments": {...}}
*/
private parseToolCallsFromContent(content: string): ToolCall[] {
const toolCalls: ToolCall[] = []
// Try to parse JSON objects from content
const jsonRegex = /\{[\s\S]*?"name"[\s\S]*?"arguments"[\s\S]*?\}/g
const matches = content.match(jsonRegex)
if (!matches) {
return toolCalls
}
for (const match of matches) {
try {
const parsed = JSON.parse(match) as {
name?: string
arguments?: Record<string, unknown>
}
if (parsed.name && typeof parsed.name === "string") {
toolCalls.push(
createToolCall(
`json_${String(Date.now())}_${String(toolCalls.length)}`,
parsed.name,
parsed.arguments ?? {},
),
)
}
} catch {
// Invalid JSON, skip
}
}
return toolCalls
}
/**

View File

@@ -58,9 +58,50 @@ const VALID_TOOL_NAMES = new Set([
"run_tests",
])
/**
* Tool name aliases for common LLM typos/variations.
* Maps incorrect names to correct tool names.
*/
const TOOL_ALIASES: Record<string, string> = {
// get_lines aliases
get_functions: "get_lines",
read_file: "get_lines",
read_lines: "get_lines",
get_file: "get_lines",
read: "get_lines",
// get_function aliases
getfunction: "get_function",
// get_structure aliases
list_files: "get_structure",
get_files: "get_structure",
list_structure: "get_structure",
get_project_structure: "get_structure",
// get_todos aliases
find_todos: "get_todos",
list_todos: "get_todos",
// find_references aliases
get_references: "find_references",
// find_definition aliases
get_definition: "find_definition",
// edit_lines aliases
edit_file: "edit_lines",
modify_file: "edit_lines",
update_file: "edit_lines",
}
/**
* Normalize tool name using aliases.
*/
function normalizeToolName(name: string): string {
const lowerName = name.toLowerCase()
return TOOL_ALIASES[lowerName] ?? name
}
/**
* Parse tool calls from LLM response text.
* Supports XML format: <tool_call name="get_lines"><path>src/index.ts</path></tool_call>
* Supports both XML and JSON formats:
* - XML: <tool_call name="get_lines"><path>src/index.ts</path></tool_call>
* - JSON: {"name": "get_lines", "arguments": {"path": "src/index.ts"}}
* Validates tool names and provides helpful error messages.
*/
export function parseToolCalls(response: string): ParsedResponse {
@@ -68,14 +109,18 @@ export function parseToolCalls(response: string): ParsedResponse {
const parseErrors: string[] = []
let content = response
const matches = [...response.matchAll(TOOL_CALL_REGEX)]
// First, try XML format
const xmlMatches = [...response.matchAll(TOOL_CALL_REGEX)]
for (const match of matches) {
const [fullMatch, toolName, paramsXml] = match
for (const match of xmlMatches) {
const [fullMatch, rawToolName, paramsXml] = match
// Normalize tool name (handle common LLM typos/variations)
const toolName = normalizeToolName(rawToolName)
if (!VALID_TOOL_NAMES.has(toolName)) {
parseErrors.push(
`Unknown tool "${toolName}". Valid tools: ${[...VALID_TOOL_NAMES].join(", ")}`,
`Unknown tool "${rawToolName}". Valid tools: ${[...VALID_TOOL_NAMES].join(", ")}`,
)
continue
}
@@ -91,7 +136,19 @@ export function parseToolCalls(response: string): ParsedResponse {
content = content.replace(fullMatch, "")
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error)
parseErrors.push(`Failed to parse tool call "${toolName}": ${errorMsg}`)
parseErrors.push(`Failed to parse tool call "${rawToolName}": ${errorMsg}`)
}
}
// If no XML tool calls found, try JSON format as fallback
if (toolCalls.length === 0) {
const jsonResult = parseJsonToolCalls(response)
toolCalls.push(...jsonResult.toolCalls)
parseErrors.push(...jsonResult.parseErrors)
// Remove JSON tool calls from content
for (const jsonMatch of jsonResult.matchedStrings) {
content = content.replace(jsonMatch, "")
}
}
@@ -105,6 +162,59 @@ export function parseToolCalls(response: string): ParsedResponse {
}
}
/**
* JSON tool call format pattern.
* Matches: {"name": "tool_name", "arguments": {...}}
*/
const JSON_TOOL_CALL_REGEX =
/\{\s*"name"\s*:\s*"([^"]+)"\s*,\s*"arguments"\s*:\s*(\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\})\s*\}/g
/**
* Parse tool calls from JSON format in response.
* This is a fallback for LLMs that prefer JSON over XML.
*/
function parseJsonToolCalls(response: string): {
toolCalls: ToolCall[]
parseErrors: string[]
matchedStrings: string[]
} {
const toolCalls: ToolCall[] = []
const parseErrors: string[] = []
const matchedStrings: string[] = []
const matches = [...response.matchAll(JSON_TOOL_CALL_REGEX)]
for (const match of matches) {
const [fullMatch, rawToolName, argsJson] = match
matchedStrings.push(fullMatch)
// Normalize tool name
const toolName = normalizeToolName(rawToolName)
if (!VALID_TOOL_NAMES.has(toolName)) {
parseErrors.push(
`Unknown tool "${rawToolName}". Valid tools: ${[...VALID_TOOL_NAMES].join(", ")}`,
)
continue
}
try {
const args = JSON.parse(argsJson) as Record<string, unknown>
const toolCall = createToolCall(
`json_${String(Date.now())}_${String(toolCalls.length)}`,
toolName,
args,
)
toolCalls.push(toolCall)
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error)
parseErrors.push(`Failed to parse JSON tool call "${rawToolName}": ${errorMsg}`)
}
}
return { toolCalls, parseErrors, matchedStrings }
}
/**
* Parse parameters from XML content.
*/

View File

@@ -25,99 +25,115 @@ export interface BuildContextOptions {
/**
* System prompt for the ipuaro AI agent.
*/
export const SYSTEM_PROMPT = `You are ipuaro, a local AI code assistant specialized in helping developers understand and modify their codebase. You operate within a single project directory and have access to powerful tools for reading, searching, analyzing, and editing code.
export const SYSTEM_PROMPT = `You are ipuaro, a local AI code assistant with tools for reading, searching, analyzing, and editing code.
## Core Principles
## When to Use Tools
1. **Lazy Loading**: You don't have the full code in context. Use tools to fetch exactly what you need.
2. **Precision**: Always verify file paths and line numbers before making changes.
3. **Safety**: Confirm destructive operations. Never execute dangerous commands.
4. **Efficiency**: Minimize context usage. Request only necessary code sections.
**Use tools** when the user asks about:
- Code content (files, functions, classes)
- Project structure
- TODOs, complexity, dependencies
- Git status, diffs, commits
- Running commands or tests
## Tool Calling Format
**Do NOT use tools** for:
- Greetings ("Hello", "Hi", "Thanks")
- General questions not about this codebase
- Clarifying questions back to the user
When you need to use a tool, format your call as XML:
## MANDATORY: Tools for Code Questions
<tool_call name="tool_name">
<param_name>value</param_name>
<another_param>value</another_param>
</tool_call>
**CRITICAL:** You have ZERO code in your context. To answer ANY question about code, you MUST first call a tool.
You can call multiple tools in one response. Always wait for tool results before making conclusions.
**Examples:**
**WRONG:**
User: "What's in src/index.ts?"
Assistant: "The file likely contains..." ← WRONG! Call a tool!
**CORRECT:**
User: "What's in src/index.ts?"
<tool_call name="get_lines">
<path>src/index.ts</path>
<start>1</start>
<end>50</end>
<path>src/index.ts</path>
</tool_call>
<tool_call name="edit_lines">
<path>src/utils.ts</path>
<start>10</start>
<end>15</end>
<content>const newCode = "hello";</content>
## Tool Call Format
Output this XML format. Do NOT explain before calling - just output the XML:
<tool_call name="TOOL_NAME">
<param1>value1</param1>
<param2>value2</param2>
</tool_call>
<tool_call name="find_references">
<symbol>getUserById</symbol>
## Example Interactions
**Example 1 - Reading a file:**
User: "Show me the main function in src/app.ts"
<tool_call name="get_function">
<path>src/app.ts</path>
<name>main</name>
</tool_call>
**Example 2 - Finding TODOs:**
User: "Are there any TODO comments?"
<tool_call name="get_todos">
</tool_call>
**Example 3 - Project structure:**
User: "What files are in this project?"
<tool_call name="get_structure">
<path>.</path>
</tool_call>
## Available Tools
### Reading Tools
- \`get_lines(path, start?, end?)\`: Get specific lines from a file
- \`get_function(path, name)\`: Get a function by name
- \`get_class(path, name)\`: Get a class by name
- \`get_structure(path?, depth?)\`: Get project directory structure
### Reading
- get_lines(path, start?, end?) - Read file lines
- get_function(path, name) - Get function by name
- get_class(path, name) - Get class by name
- get_structure(path?, depth?) - List project files
### Editing Tools (require confirmation)
- \`edit_lines(path, start, end, content)\`: Replace specific lines in a file
- \`create_file(path, content)\`: Create a new file
- \`delete_file(path)\`: Delete a file
### Analysis
- get_todos(path?, type?) - Find TODO/FIXME comments
- get_dependencies(path) - What this file imports
- get_dependents(path) - What imports this file
- get_complexity(path?) - Code complexity metrics
- find_references(symbol) - Find all usages of a symbol
- find_definition(symbol) - Find where symbol is defined
### Search Tools
- \`find_references(symbol, path?)\`: Find all usages of a symbol
- \`find_definition(symbol)\`: Find where a symbol is defined
### Editing (requires confirmation)
- edit_lines(path, start, end, content) - Modify file lines
- create_file(path, content) - Create new file
- delete_file(path) - Delete a file
### Analysis Tools
- \`get_dependencies(path)\`: Get files this file imports
- \`get_dependents(path)\`: Get files that import this file
- \`get_complexity(path?, limit?)\`: Get complexity metrics
- \`get_todos(path?, type?)\`: Find TODO/FIXME comments
### Git
- git_status() - Repository status
- git_diff(path?, staged?) - Show changes
- git_commit(message, files?) - Create commit
### Git Tools
- \`git_status()\`: Get repository status
- \`git_diff(path?, staged?)\`: Get uncommitted changes
- \`git_commit(message, files?)\`: Create a commit (requires confirmation)
### Commands
- run_command(command, timeout?) - Execute shell command
- run_tests(path?, filter?) - Run test suite
### Run Tools
- \`run_command(command, timeout?)\`: Execute a shell command (security checked)
- \`run_tests(path?, filter?, watch?)\`: Run the test suite
## Rules
## Response Guidelines
1. **ALWAYS call a tool first** when asked about code - you cannot see any files
2. **Output XML directly** - don't say "I will use..." just output the tool call
3. **Wait for results** before making conclusions
4. **Be concise** in your responses
5. **Verify before editing** - always read code before modifying it
6. **Stay safe** - never execute destructive commands without user confirmation`
1. **Be concise**: Don't repeat information already in context.
2. **Show your work**: Explain what tools you're using and why.
3. **Verify before editing**: Always read the target code before modifying it.
4. **Handle errors gracefully**: If a tool fails, explain what went wrong and suggest alternatives.
/**
* Tool usage reminder - appended to messages to reinforce tool usage.
* This is added as the last system message before LLM call.
*/
export const TOOL_REMINDER = `⚠️ REMINDER: To answer this question, you MUST use a tool first.
Output the <tool_call> XML directly. Do NOT describe what you will do - just call the tool.
## Code Editing Rules
1. Always use \`get_lines\` or \`get_function\` before \`edit_lines\`.
2. Provide exact line numbers for edits.
3. For large changes, break into multiple small edits.
4. After editing, suggest running tests if available.
## Safety Rules
1. Never execute commands that could harm the system.
2. Never expose sensitive data (API keys, passwords).
3. Always confirm file deletions and destructive git operations.
4. Stay within the project directory.
When you need to perform an action, use the appropriate tool. Think step by step about what information you need and which tools will provide it most efficiently.`
Example - if asked about a file, output:
<tool_call name="get_lines">
<path>the/file/path.ts</path>
</tool_call>`
/**
* Build initial context from project structure and AST metadata.
@@ -580,12 +596,13 @@ export function formatCircularDeps(cycles: string[][]): string | null {
/**
* Format high impact files table for display in context.
* Shows files with highest impact scores (most dependents).
* Includes both direct and transitive dependent counts.
*
* Format:
* ## High Impact Files
* | File | Impact | Dependents |
* |------|--------|------------|
* | src/utils/validation.ts | 67% | 12 files |
* | File | Impact | Direct | Transitive |
* |------|--------|--------|------------|
* | src/utils/validation.ts | 67% | 12 | 24 |
*
* @param metas - Map of file paths to their metadata
* @param limit - Maximum number of files to show (default: 10)
@@ -601,7 +618,12 @@ export function formatHighImpactFiles(
}
// Collect files with impact score >= minImpact
const impactFiles: { path: string; impact: number; dependents: number }[] = []
const impactFiles: {
path: string
impact: number
dependents: number
transitive: number
}[] = []
for (const [path, meta] of metas) {
if (meta.impactScore >= minImpact) {
@@ -609,6 +631,7 @@ export function formatHighImpactFiles(
path,
impact: meta.impactScore,
dependents: meta.dependents.length,
transitive: meta.transitiveDepCount,
})
}
}
@@ -617,8 +640,11 @@ export function formatHighImpactFiles(
return null
}
// Sort by impact score descending, then by path
// Sort by transitive count descending, then by impact, then by path
impactFiles.sort((a, b) => {
if (a.transitive !== b.transitive) {
return b.transitive - a.transitive
}
if (a.impact !== b.impact) {
return b.impact - a.impact
}
@@ -631,15 +657,16 @@ export function formatHighImpactFiles(
const lines: string[] = [
"## High Impact Files",
"",
"| File | Impact | Dependents |",
"|------|--------|------------|",
"| File | Impact | Direct | Transitive |",
"|------|--------|--------|------------|",
]
for (const file of topFiles) {
const shortPath = shortenPath(file.path)
const impact = `${String(file.impact)}%`
const dependents = file.dependents === 1 ? "1 file" : `${String(file.dependents)} files`
lines.push(`| ${shortPath} | ${impact} | ${dependents} |`)
const direct = String(file.dependents)
const transitive = String(file.transitive)
lines.push(`| ${shortPath} | ${impact} | ${direct} | ${transitive} |`)
}
return lines.join("\n")

View File

@@ -509,3 +509,87 @@ export function getToolsByCategory(category: string): ToolDef[] {
return []
}
}
/*
* =============================================================================
* Native Ollama Tools Format
* =============================================================================
*/
/**
* Ollama native tool definition format.
*/
export interface OllamaTool {
type: "function"
function: {
name: string
description: string
parameters: {
type: "object"
properties: Record<string, OllamaToolProperty>
required: string[]
}
}
}
interface OllamaToolProperty {
type: string
description: string
enum?: string[]
items?: { type: string }
}
/**
* Convert ToolDef to Ollama native format.
*/
function convertToOllamaTool(tool: ToolDef): OllamaTool {
const properties: Record<string, OllamaToolProperty> = {}
const required: string[] = []
for (const param of tool.parameters) {
const prop: OllamaToolProperty = {
type: param.type === "array" ? "array" : param.type,
description: param.description,
}
if (param.enum) {
prop.enum = param.enum
}
if (param.type === "array") {
prop.items = { type: "string" }
}
properties[param.name] = prop
if (param.required) {
required.push(param.name)
}
}
return {
type: "function",
function: {
name: tool.name,
description: tool.description,
parameters: {
type: "object",
properties,
required,
},
},
}
}
/**
* All tools in Ollama native format.
* Used when useNativeTools is enabled.
*/
export const OLLAMA_NATIVE_TOOLS: OllamaTool[] = ALL_TOOLS.map(convertToOllamaTool)
/**
* Get native tool definitions for Ollama.
*/
export function getOllamaNativeTools(): OllamaTool[] {
return OLLAMA_NATIVE_TOOLS
}

View File

@@ -20,6 +20,7 @@ export const LLMConfigSchema = z.object({
temperature: z.number().min(0).max(2).default(0.1),
host: z.string().default("http://localhost:11434"),
timeout: z.number().int().positive().default(120_000),
useNativeTools: z.boolean().default(false),
})
/**

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,351 @@
/**
* E2E Test Helpers
* Provides dependencies for testing the full flow with REAL LLM.
*/
import { vi } from "vitest"
import * as fs from "node:fs/promises"
import * as path from "node:path"
import * as os from "node:os"
import type { IStorage, SymbolIndex, DepsGraph } from "../../src/domain/services/IStorage.js"
import type { ISessionStorage, SessionListItem } from "../../src/domain/services/ISessionStorage.js"
import type { FileData } from "../../src/domain/value-objects/FileData.js"
import type { FileAST } from "../../src/domain/value-objects/FileAST.js"
import type { FileMeta } from "../../src/domain/value-objects/FileMeta.js"
import type { UndoEntry } from "../../src/domain/value-objects/UndoEntry.js"
import { Session } from "../../src/domain/entities/Session.js"
import { ToolRegistry } from "../../src/infrastructure/tools/registry.js"
import { OllamaClient } from "../../src/infrastructure/llm/OllamaClient.js"
import { registerAllTools } from "../../src/cli/commands/tools-setup.js"
import type { LLMConfig } from "../../src/shared/constants/config.js"
/**
* Default LLM config for tests.
*/
export const DEFAULT_TEST_LLM_CONFIG: LLMConfig = {
model: "qwen2.5-coder:14b-instruct-q4_K_M",
contextWindow: 128_000,
temperature: 0.1,
host: "http://localhost:11434",
timeout: 180_000,
useNativeTools: true,
}
/**
* In-memory storage implementation for testing.
* Stores all data in Maps, no Redis required.
*/
export function createInMemoryStorage(): IStorage {
const files = new Map<string, FileData>()
const asts = new Map<string, FileAST>()
const metas = new Map<string, FileMeta>()
let symbolIndex: SymbolIndex = new Map()
let depsGraph: DepsGraph = { imports: new Map(), importedBy: new Map() }
const projectConfig = new Map<string, unknown>()
let connected = false
return {
getFile: vi.fn(async (filePath: string) => files.get(filePath) ?? null),
setFile: vi.fn(async (filePath: string, data: FileData) => {
files.set(filePath, data)
}),
deleteFile: vi.fn(async (filePath: string) => {
files.delete(filePath)
}),
getAllFiles: vi.fn(async () => new Map(files)),
getFileCount: vi.fn(async () => files.size),
getAST: vi.fn(async (filePath: string) => asts.get(filePath) ?? null),
setAST: vi.fn(async (filePath: string, ast: FileAST) => {
asts.set(filePath, ast)
}),
deleteAST: vi.fn(async (filePath: string) => {
asts.delete(filePath)
}),
getAllASTs: vi.fn(async () => new Map(asts)),
getMeta: vi.fn(async (filePath: string) => metas.get(filePath) ?? null),
setMeta: vi.fn(async (filePath: string, meta: FileMeta) => {
metas.set(filePath, meta)
}),
deleteMeta: vi.fn(async (filePath: string) => {
metas.delete(filePath)
}),
getAllMetas: vi.fn(async () => new Map(metas)),
getSymbolIndex: vi.fn(async () => symbolIndex),
setSymbolIndex: vi.fn(async (index: SymbolIndex) => {
symbolIndex = index
}),
getDepsGraph: vi.fn(async () => depsGraph),
setDepsGraph: vi.fn(async (graph: DepsGraph) => {
depsGraph = graph
}),
getProjectConfig: vi.fn(async (key: string) => projectConfig.get(key) ?? null),
setProjectConfig: vi.fn(async (key: string, value: unknown) => {
projectConfig.set(key, value)
}),
connect: vi.fn(async () => {
connected = true
}),
disconnect: vi.fn(async () => {
connected = false
}),
isConnected: vi.fn(() => connected),
clear: vi.fn(async () => {
files.clear()
asts.clear()
metas.clear()
symbolIndex = new Map()
depsGraph = { imports: new Map(), importedBy: new Map() }
projectConfig.clear()
}),
}
}
/**
* In-memory session storage for testing.
*/
export function createInMemorySessionStorage(): ISessionStorage {
const sessions = new Map<string, Session>()
const undoStacks = new Map<string, UndoEntry[]>()
return {
saveSession: vi.fn(async (session: Session) => {
sessions.set(session.id, session)
}),
loadSession: vi.fn(async (sessionId: string) => sessions.get(sessionId) ?? null),
deleteSession: vi.fn(async (sessionId: string) => {
sessions.delete(sessionId)
undoStacks.delete(sessionId)
}),
listSessions: vi.fn(async (projectName?: string): Promise<SessionListItem[]> => {
const items: SessionListItem[] = []
for (const session of sessions.values()) {
if (!projectName || session.projectName === projectName) {
items.push({
id: session.id,
projectName: session.projectName,
createdAt: session.createdAt,
lastActivityAt: session.lastActivityAt,
messageCount: session.history.length,
})
}
}
return items
}),
getLatestSession: vi.fn(async (projectName: string) => {
let latest: Session | null = null
for (const session of sessions.values()) {
if (session.projectName === projectName) {
if (!latest || session.lastActivityAt > latest.lastActivityAt) {
latest = session
}
}
}
return latest
}),
sessionExists: vi.fn(async (sessionId: string) => sessions.has(sessionId)),
pushUndoEntry: vi.fn(async (sessionId: string, entry: UndoEntry) => {
const stack = undoStacks.get(sessionId) ?? []
stack.push(entry)
undoStacks.set(sessionId, stack)
}),
popUndoEntry: vi.fn(async (sessionId: string) => {
const stack = undoStacks.get(sessionId) ?? []
return stack.pop() ?? null
}),
getUndoStack: vi.fn(async (sessionId: string) => undoStacks.get(sessionId) ?? []),
touchSession: vi.fn(async (sessionId: string) => {
const session = sessions.get(sessionId)
if (session) {
session.lastActivityAt = Date.now()
}
}),
clearAllSessions: vi.fn(async () => {
sessions.clear()
undoStacks.clear()
}),
}
}
/**
* Create REAL Ollama client for E2E tests.
*/
export function createRealOllamaClient(config?: Partial<LLMConfig>): OllamaClient {
return new OllamaClient({
...DEFAULT_TEST_LLM_CONFIG,
...config,
})
}
/**
* Create a tool registry with all 18 tools registered.
*/
export function createRealToolRegistry(): ToolRegistry {
const registry = new ToolRegistry()
registerAllTools(registry)
return registry
}
/**
* Create a new test session.
*/
export function createTestSession(projectName = "test-project"): Session {
return new Session(`test-${Date.now()}`, projectName)
}
/**
* Create a temporary test project directory with sample files.
*/
export async function createTestProject(): Promise<string> {
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "ipuaro-e2e-"))
await fs.mkdir(path.join(tempDir, "src"), { recursive: true })
await fs.writeFile(
path.join(tempDir, "src", "index.ts"),
`/**
* Main entry point
*/
export function main(): void {
console.log("Hello, world!")
}
export function add(a: number, b: number): number {
return a + b
}
export function multiply(a: number, b: number): number {
return a * b
}
// TODO: Add more math functions
main()
`,
)
await fs.writeFile(
path.join(tempDir, "src", "utils.ts"),
`/**
* Utility functions
*/
import { add } from "./index.js"
export function sum(numbers: number[]): number {
return numbers.reduce((acc, n) => add(acc, n), 0)
}
export class Calculator {
private result: number = 0
add(n: number): this {
this.result += n
return this
}
subtract(n: number): this {
this.result -= n
return this
}
getResult(): number {
return this.result
}
reset(): void {
this.result = 0
}
}
// FIXME: Handle edge cases for negative numbers
`,
)
await fs.writeFile(
path.join(tempDir, "package.json"),
JSON.stringify(
{
name: "test-project",
version: "1.0.0",
type: "module",
scripts: {
test: "echo 'Tests passed!'",
},
},
null,
4,
),
)
await fs.writeFile(
path.join(tempDir, "README.md"),
`# Test Project
A sample project for E2E testing.
## Features
- Basic math functions
- Calculator class
`,
)
return tempDir
}
/**
* Clean up test project directory.
*/
export async function cleanupTestProject(projectDir: string): Promise<void> {
await fs.rm(projectDir, { recursive: true, force: true })
}
/**
* All test dependencies bundled together.
*/
export interface E2ETestDependencies {
storage: IStorage
sessionStorage: ISessionStorage
llm: OllamaClient
tools: ToolRegistry
session: Session
projectRoot: string
}
/**
* Create all dependencies for E2E testing with REAL Ollama.
*/
export async function createE2ETestDependencies(
llmConfig?: Partial<LLMConfig>,
): Promise<E2ETestDependencies> {
const projectRoot = await createTestProject()
return {
storage: createInMemoryStorage(),
sessionStorage: createInMemorySessionStorage(),
llm: createRealOllamaClient(llmConfig),
tools: createRealToolRegistry(),
session: createTestSession(),
projectRoot,
}
}
/**
* Check if Ollama is available.
*/
export async function isOllamaAvailable(): Promise<boolean> {
const client = createRealOllamaClient()
return client.isAvailable()
}
/**
* Check if required model is available.
*/
export async function isModelAvailable(
model = "qwen2.5-coder:14b-instruct-q4_K_M",
): Promise<boolean> {
const client = createRealOllamaClient()
return client.hasModel(model)
}

View File

@@ -135,6 +135,108 @@ describe("ResponseParser", () => {
expect(result.parseErrors[0]).toContain("unknown_tool")
})
it("should normalize tool name aliases", () => {
// get_functions -> get_lines (common LLM typo)
const response1 = `<tool_call name="get_functions"><path>src/index.ts</path></tool_call>`
const result1 = parseToolCalls(response1)
expect(result1.toolCalls).toHaveLength(1)
expect(result1.toolCalls[0].name).toBe("get_lines")
expect(result1.hasParseErrors).toBe(false)
// read_file -> get_lines
const response2 = `<tool_call name="read_file"><path>test.ts</path></tool_call>`
const result2 = parseToolCalls(response2)
expect(result2.toolCalls).toHaveLength(1)
expect(result2.toolCalls[0].name).toBe("get_lines")
// find_todos -> get_todos
const response3 = `<tool_call name="find_todos"></tool_call>`
const result3 = parseToolCalls(response3)
expect(result3.toolCalls).toHaveLength(1)
expect(result3.toolCalls[0].name).toBe("get_todos")
// list_files -> get_structure
const response4 = `<tool_call name="list_files"><path>.</path></tool_call>`
const result4 = parseToolCalls(response4)
expect(result4.toolCalls).toHaveLength(1)
expect(result4.toolCalls[0].name).toBe("get_structure")
})
// JSON format tests
it("should parse JSON format tool calls as fallback", () => {
const response = `{"name": "get_lines", "arguments": {"path": "src/index.ts"}}`
const result = parseToolCalls(response)
expect(result.toolCalls).toHaveLength(1)
expect(result.toolCalls[0].name).toBe("get_lines")
expect(result.toolCalls[0].params).toEqual({ path: "src/index.ts" })
expect(result.hasParseErrors).toBe(false)
})
it("should parse JSON format with numeric arguments", () => {
const response = `{"name": "get_lines", "arguments": {"path": "src/index.ts", "start": 1, "end": 50}}`
const result = parseToolCalls(response)
expect(result.toolCalls).toHaveLength(1)
expect(result.toolCalls[0].params).toEqual({
path: "src/index.ts",
start: 1,
end: 50,
})
})
it("should parse JSON format with surrounding text", () => {
const response = `I'll read the file for you:
{"name": "get_lines", "arguments": {"path": "src/index.ts"}}
Let me know if you need more.`
const result = parseToolCalls(response)
expect(result.toolCalls).toHaveLength(1)
expect(result.toolCalls[0].name).toBe("get_lines")
expect(result.content).toContain("I'll read the file for you:")
expect(result.content).toContain("Let me know if you need more.")
})
it("should normalize tool name aliases in JSON format", () => {
// read_file -> get_lines
const response = `{"name": "read_file", "arguments": {"path": "test.ts"}}`
const result = parseToolCalls(response)
expect(result.toolCalls).toHaveLength(1)
expect(result.toolCalls[0].name).toBe("get_lines")
})
it("should reject unknown tool names in JSON format", () => {
const response = `{"name": "unknown_tool", "arguments": {"path": "test.ts"}}`
const result = parseToolCalls(response)
expect(result.toolCalls).toHaveLength(0)
expect(result.hasParseErrors).toBe(true)
expect(result.parseErrors[0]).toContain("unknown_tool")
})
it("should prefer XML over JSON when both present", () => {
const response = `<tool_call name="get_lines"><path>xml.ts</path></tool_call>
{"name": "get_function", "arguments": {"path": "json.ts", "name": "foo"}}`
const result = parseToolCalls(response)
// Should only parse XML since it was found first
expect(result.toolCalls).toHaveLength(1)
expect(result.toolCalls[0].name).toBe("get_lines")
expect(result.toolCalls[0].params.path).toBe("xml.ts")
})
it("should parse JSON with empty arguments", () => {
const response = `{"name": "git_status", "arguments": {}}`
const result = parseToolCalls(response)
expect(result.toolCalls).toHaveLength(1)
expect(result.toolCalls[0].name).toBe("git_status")
expect(result.toolCalls[0].params).toEqual({})
})
it("should support CDATA for multiline content", () => {
const response = `<tool_call name="edit_lines">
<path>src/index.ts</path>

View File

@@ -19,10 +19,16 @@ describe("prompts", () => {
expect(SYSTEM_PROMPT.length).toBeGreaterThan(100)
})
it("should contain core principles", () => {
expect(SYSTEM_PROMPT).toContain("Lazy Loading")
expect(SYSTEM_PROMPT).toContain("Precision")
expect(SYSTEM_PROMPT).toContain("Safety")
it("should contain mandatory tool usage instructions", () => {
expect(SYSTEM_PROMPT).toContain("MANDATORY")
expect(SYSTEM_PROMPT).toContain("Tools for Code Questions")
expect(SYSTEM_PROMPT).toContain("ZERO code in your context")
})
it("should contain when to use and when not to use tools", () => {
expect(SYSTEM_PROMPT).toContain("When to Use Tools")
expect(SYSTEM_PROMPT).toContain("Do NOT use tools")
expect(SYSTEM_PROMPT).toContain("Greetings")
})
it("should list available tools", () => {
@@ -34,8 +40,9 @@ describe("prompts", () => {
})
it("should include safety rules", () => {
expect(SYSTEM_PROMPT).toContain("Safety Rules")
expect(SYSTEM_PROMPT).toContain("Never execute commands that could harm")
expect(SYSTEM_PROMPT).toContain("Stay safe")
expect(SYSTEM_PROMPT).toContain("destructive commands")
expect(SYSTEM_PROMPT).toContain("Verify before editing")
})
})
@@ -2418,6 +2425,8 @@ describe("prompts", () => {
isEntryPoint: false,
fileType: "source",
impactScore: 2,
transitiveDepCount: 0,
transitiveDepByCount: 0,
},
],
])
@@ -2427,7 +2436,7 @@ describe("prompts", () => {
expect(result).toBeNull()
})
it("should format file with high impact score", () => {
it("should format file with high impact score and transitive counts", () => {
const metas = new Map<string, FileMeta>([
[
"src/utils/validation.ts",
@@ -2452,6 +2461,8 @@ describe("prompts", () => {
isEntryPoint: false,
fileType: "source",
impactScore: 67,
transitiveDepCount: 24,
transitiveDepByCount: 0,
},
],
])
@@ -2459,11 +2470,11 @@ describe("prompts", () => {
const result = formatHighImpactFiles(metas)
expect(result).toContain("## High Impact Files")
expect(result).toContain("| File | Impact | Dependents |")
expect(result).toContain("| utils/validation | 67% | 12 files |")
expect(result).toContain("| File | Impact | Direct | Transitive |")
expect(result).toContain("| utils/validation | 67% | 12 | 24 |")
})
it("should sort by impact score descending", () => {
it("should sort by transitive count descending, then by impact", () => {
const metas = new Map<string, FileMeta>([
[
"src/low.ts",
@@ -2475,6 +2486,8 @@ describe("prompts", () => {
isEntryPoint: false,
fileType: "source",
impactScore: 10,
transitiveDepCount: 5,
transitiveDepByCount: 0,
},
],
[
@@ -2487,6 +2500,8 @@ describe("prompts", () => {
isEntryPoint: false,
fileType: "source",
impactScore: 50,
transitiveDepCount: 15,
transitiveDepByCount: 0,
},
],
])
@@ -2511,6 +2526,8 @@ describe("prompts", () => {
isEntryPoint: false,
fileType: "source",
impactScore: 10 + i,
transitiveDepCount: i,
transitiveDepByCount: 0,
})
}
@@ -2535,6 +2552,8 @@ describe("prompts", () => {
isEntryPoint: false,
fileType: "source",
impactScore: 30,
transitiveDepCount: 5,
transitiveDepByCount: 0,
},
],
[
@@ -2547,6 +2566,8 @@ describe("prompts", () => {
isEntryPoint: false,
fileType: "source",
impactScore: 5,
transitiveDepCount: 1,
transitiveDepByCount: 0,
},
],
])
@@ -2558,28 +2579,6 @@ describe("prompts", () => {
expect(result).not.toContain("low")
})
it("should show singular 'file' for 1 dependent", () => {
const metas = new Map<string, FileMeta>([
[
"src/single.ts",
{
complexity: { loc: 10, nesting: 1, cyclomaticComplexity: 1, score: 10 },
dependencies: [],
dependents: ["a.ts"],
isHub: false,
isEntryPoint: false,
fileType: "source",
impactScore: 10,
},
],
])
const result = formatHighImpactFiles(metas)
expect(result).toContain("1 file")
expect(result).not.toContain("1 files")
})
it("should shorten src/ prefix", () => {
const metas = new Map<string, FileMeta>([
[
@@ -2592,6 +2591,8 @@ describe("prompts", () => {
isEntryPoint: false,
fileType: "source",
impactScore: 20,
transitiveDepCount: 5,
transitiveDepByCount: 0,
},
],
])
@@ -2614,6 +2615,8 @@ describe("prompts", () => {
isEntryPoint: false,
fileType: "source",
impactScore: 20,
transitiveDepCount: 3,
transitiveDepByCount: 0,
},
],
])
@@ -2660,6 +2663,8 @@ describe("prompts", () => {
isEntryPoint: true,
fileType: "source",
impactScore: 20,
transitiveDepCount: 5,
transitiveDepByCount: 0,
},
],
])
@@ -2681,6 +2686,8 @@ describe("prompts", () => {
isEntryPoint: true,
fileType: "source",
impactScore: 20,
transitiveDepCount: 5,
transitiveDepByCount: 0,
},
],
])