feat(ipuaro): add JSON tool call parsing and improve prompts

- Add JSON fallback parsing in ResponseParser for LLMs that prefer JSON - Add tool name aliases (get_functions -> get_lines, etc.) - Improve system prompt with clear tool usage guidelines - Add native Ollama tools support in OllamaClient - Add E2E tests for full workflow with real Ollama
2025-12-27 15:00:41 +05:00 · 2025-12-05 20:51:18 +05:00
parent c82006bbda
commit 3e7762ec4e
11 changed files with 2430 additions and 102 deletions
--- a/packages/ipuaro/CHANGELOG.md
+++ b/packages/ipuaro/CHANGELOG.md
@@ -5,6 +5,35 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

+## [0.30.2] - 2025-12-05 - JSON Tool Call Parsing & Improved Prompts
+
+### Added
+
+- **JSON Tool Call Fallback in ResponseParser**
+  - LLM responses with JSON format `{"name": "tool", "arguments": {...}}` are now parsed
+  - Fallback to JSON when XML format not found
+  - Works with models like qwen2.5-coder that prefer JSON over XML
+
+- **Tool Name Aliases**
+  - `get_functions`, `read_file`, `read_lines` → `get_lines`
+  - `list_files`, `get_files` → `get_structure`
+  - `find_todos` → `get_todos`
+  - And more common LLM typos/variations
+
+### Changed
+
+- **Improved System Prompt**
+  - Added clear "When to Use Tools" / "Do NOT use tools" sections
+  - More concise and directive instructions
+  - Better examples for tool usage
+
+### Technical Details
+
+- Total tests: 1848 passed (+8 new tests for JSON parsing)
+- 0 ESLint errors, 3 warnings (pre-existing complexity)
+
+---
+
 ## [0.30.1] - 2025-12-05 - Display Transitive Counts in Context

 ### Changed
--- a/packages/ipuaro/src/application/use-cases/HandleMessage.ts
+++ b/packages/ipuaro/src/application/use-cases/HandleMessage.ts
@@ -18,6 +18,7 @@ import {
    buildInitialContext,
    type ProjectStructure,
    SYSTEM_PROMPT,
+    TOOL_REMINDER,
 } from "../../infrastructure/llm/prompts.js"
 import { parseToolCalls } from "../../infrastructure/llm/ResponseParser.js"
 import type { IToolRegistry } from "../interfaces/IToolRegistry.js"
@@ -277,6 +278,12 @@ export class HandleMessage {

        messages.push(...session.history)

+        // Add tool reminder if last message is from user (first LLM call for this query)
+        const lastMessage = session.history[session.history.length - 1]
+        if (lastMessage?.role === "user") {
+            messages.push(createSystemMessage(TOOL_REMINDER))
+        }
+
        return messages
    }

--- a/packages/ipuaro/src/infrastructure/llm/OllamaClient.ts
+++ b/packages/ipuaro/src/infrastructure/llm/OllamaClient.ts
@@ -1,14 +1,17 @@
-import { type Message, Ollama } from "ollama"
+import { type Message, Ollama, type Tool } from "ollama"
 import type { ILLMClient, LLMResponse } from "../../domain/services/ILLMClient.js"
 import type { ChatMessage } from "../../domain/value-objects/ChatMessage.js"
+import { createToolCall, type ToolCall } from "../../domain/value-objects/ToolCall.js"
 import type { LLMConfig } from "../../shared/constants/config.js"
 import { IpuaroError } from "../../shared/errors/IpuaroError.js"
 import { estimateTokens } from "../../shared/utils/tokens.js"
 import { parseToolCalls } from "./ResponseParser.js"
+import { getOllamaNativeTools } from "./toolDefs.js"

 /**
 * Ollama LLM client implementation.
 * Wraps the Ollama SDK for chat completions with tool support.
+ * Supports both XML-based and native Ollama tool calling.
 */
 export class OllamaClient implements ILLMClient {
    private readonly client: Ollama
@@ -17,6 +20,7 @@ export class OllamaClient implements ILLMClient {
    private readonly contextWindow: number
    private readonly temperature: number
    private readonly timeout: number
+    private readonly useNativeTools: boolean
    private abortController: AbortController | null = null

    constructor(config: LLMConfig) {
@@ -26,11 +30,12 @@ export class OllamaClient implements ILLMClient {
        this.contextWindow = config.contextWindow
        this.temperature = config.temperature
        this.timeout = config.timeout
+        this.useNativeTools = config.useNativeTools ?? false
    }

    /**
     * Send messages to LLM and get response.
-     * Tool definitions should be included in the system prompt as XML format.
+     * Supports both XML-based tool calling and native Ollama tools.
     */
    async chat(messages: ChatMessage[]): Promise<LLMResponse> {
        const startTime = Date.now()
@@ -39,26 +44,11 @@ export class OllamaClient implements ILLMClient {
        try {
            const ollamaMessages = this.convertMessages(messages)

-            const response = await this.client.chat({
-                model: this.model,
-                messages: ollamaMessages,
-                options: {
-                    temperature: this.temperature,
-                },
-                stream: false,
-            })
-
-            const timeMs = Date.now() - startTime
-            const parsed = parseToolCalls(response.message.content)
-
-            return {
-                content: parsed.content,
-                toolCalls: parsed.toolCalls,
-                tokens: response.eval_count ?? estimateTokens(response.message.content),
-                timeMs,
-                truncated: false,
-                stopReason: this.determineStopReason(response, parsed.toolCalls),
+            if (this.useNativeTools) {
+                return await this.chatWithNativeTools(ollamaMessages, startTime)
            }
+
+            return await this.chatWithXMLTools(ollamaMessages, startTime)
        } catch (error) {
            if (error instanceof Error && error.name === "AbortError") {
                throw IpuaroError.llm("Request was aborted")
@@ -69,6 +59,131 @@ export class OllamaClient implements ILLMClient {
        }
    }

+    /**
+     * Chat using XML-based tool calling (legacy mode).
+     */
+    private async chatWithXMLTools(
+        ollamaMessages: Message[],
+        startTime: number,
+    ): Promise<LLMResponse> {
+        const response = await this.client.chat({
+            model: this.model,
+            messages: ollamaMessages,
+            options: {
+                temperature: this.temperature,
+            },
+            stream: false,
+        })
+
+        const timeMs = Date.now() - startTime
+        const parsed = parseToolCalls(response.message.content)
+
+        return {
+            content: parsed.content,
+            toolCalls: parsed.toolCalls,
+            tokens: response.eval_count ?? estimateTokens(response.message.content),
+            timeMs,
+            truncated: false,
+            stopReason: this.determineStopReason(response, parsed.toolCalls),
+        }
+    }
+
+    /**
+     * Chat using native Ollama tool calling.
+     */
+    private async chatWithNativeTools(
+        ollamaMessages: Message[],
+        startTime: number,
+    ): Promise<LLMResponse> {
+        const nativeTools = getOllamaNativeTools() as Tool[]
+
+        const response = await this.client.chat({
+            model: this.model,
+            messages: ollamaMessages,
+            tools: nativeTools,
+            options: {
+                temperature: this.temperature,
+            },
+            stream: false,
+        })
+
+        const timeMs = Date.now() - startTime
+        let toolCalls = this.parseNativeToolCalls(response.message.tool_calls)
+
+        // Fallback: some models return tool calls as JSON in content
+        if (toolCalls.length === 0 && response.message.content) {
+            toolCalls = this.parseToolCallsFromContent(response.message.content)
+        }
+
+        const content = toolCalls.length > 0 ? "" : response.message.content || ""
+
+        return {
+            content,
+            toolCalls,
+            tokens: response.eval_count ?? estimateTokens(response.message.content || ""),
+            timeMs,
+            truncated: false,
+            stopReason: toolCalls.length > 0 ? "tool_use" : "end",
+        }
+    }
+
+    /**
+     * Parse native Ollama tool calls into ToolCall format.
+     */
+    private parseNativeToolCalls(
+        nativeToolCalls?: { function: { name: string; arguments: Record<string, unknown> } }[],
+    ): ToolCall[] {
+        if (!nativeToolCalls || nativeToolCalls.length === 0) {
+            return []
+        }
+
+        return nativeToolCalls.map((tc, index) =>
+            createToolCall(
+                `native_${String(Date.now())}_${String(index)}`,
+                tc.function.name,
+                tc.function.arguments,
+            ),
+        )
+    }
+
+    /**
+     * Parse tool calls from content (fallback for models that return JSON in content).
+     * Supports format: {"name": "tool_name", "arguments": {...}}
+     */
+    private parseToolCallsFromContent(content: string): ToolCall[] {
+        const toolCalls: ToolCall[] = []
+
+        // Try to parse JSON objects from content
+        const jsonRegex = /\{[\s\S]*?"name"[\s\S]*?"arguments"[\s\S]*?\}/g
+        const matches = content.match(jsonRegex)
+
+        if (!matches) {
+            return toolCalls
+        }
+
+        for (const match of matches) {
+            try {
+                const parsed = JSON.parse(match) as {
+                    name?: string
+                    arguments?: Record<string, unknown>
+                }
+                if (parsed.name && typeof parsed.name === "string") {
+                    toolCalls.push(
+                        createToolCall(
+                            `json_${String(Date.now())}_${String(toolCalls.length)}`,
+                            parsed.name,
+                            parsed.arguments ?? {},
+                        ),
+                    )
+                }
+            } catch {
+                // Invalid JSON, skip
+            }
+        }
+
+        return toolCalls
+    }
+
    /**
     * Count tokens in text.
     * Uses estimation since Ollama doesn't provide a tokenizer endpoint.
--- a/packages/ipuaro/src/infrastructure/llm/ResponseParser.ts
+++ b/packages/ipuaro/src/infrastructure/llm/ResponseParser.ts
@@ -58,9 +58,50 @@ const VALID_TOOL_NAMES = new Set([
    "run_tests",
 ])

+/**
+ * Tool name aliases for common LLM typos/variations.
+ * Maps incorrect names to correct tool names.
+ */
+const TOOL_ALIASES: Record<string, string> = {
+    // get_lines aliases
+    get_functions: "get_lines",
+    read_file: "get_lines",
+    read_lines: "get_lines",
+    get_file: "get_lines",
+    read: "get_lines",
+    // get_function aliases
+    getfunction: "get_function",
+    // get_structure aliases
+    list_files: "get_structure",
+    get_files: "get_structure",
+    list_structure: "get_structure",
+    get_project_structure: "get_structure",
+    // get_todos aliases
+    find_todos: "get_todos",
+    list_todos: "get_todos",
+    // find_references aliases
+    get_references: "find_references",
+    // find_definition aliases
+    get_definition: "find_definition",
+    // edit_lines aliases
+    edit_file: "edit_lines",
+    modify_file: "edit_lines",
+    update_file: "edit_lines",
+}
+
+/**
+ * Normalize tool name using aliases.
+ */
+function normalizeToolName(name: string): string {
+    const lowerName = name.toLowerCase()
+    return TOOL_ALIASES[lowerName] ?? name
+}
+
 /**
 * Parse tool calls from LLM response text.
- * Supports XML format: <tool_call name="get_lines"><path>src/index.ts</path></tool_call>
+ * Supports both XML and JSON formats:
+ * - XML: <tool_call name="get_lines"><path>src/index.ts</path></tool_call>
+ * - JSON: {"name": "get_lines", "arguments": {"path": "src/index.ts"}}
 * Validates tool names and provides helpful error messages.
 */
 export function parseToolCalls(response: string): ParsedResponse {
@@ -68,14 +109,18 @@ export function parseToolCalls(response: string): ParsedResponse {
    const parseErrors: string[] = []
    let content = response

-    const matches = [...response.matchAll(TOOL_CALL_REGEX)]
+    // First, try XML format
+    const xmlMatches = [...response.matchAll(TOOL_CALL_REGEX)]

-    for (const match of matches) {
-        const [fullMatch, toolName, paramsXml] = match
+    for (const match of xmlMatches) {
+        const [fullMatch, rawToolName, paramsXml] = match
+
+        // Normalize tool name (handle common LLM typos/variations)
+        const toolName = normalizeToolName(rawToolName)

        if (!VALID_TOOL_NAMES.has(toolName)) {
            parseErrors.push(
-                `Unknown tool "${toolName}". Valid tools: ${[...VALID_TOOL_NAMES].join(", ")}`,
+                `Unknown tool "${rawToolName}". Valid tools: ${[...VALID_TOOL_NAMES].join(", ")}`,
            )
            continue
        }
@@ -91,7 +136,19 @@ export function parseToolCalls(response: string): ParsedResponse {
            content = content.replace(fullMatch, "")
        } catch (error) {
            const errorMsg = error instanceof Error ? error.message : String(error)
-            parseErrors.push(`Failed to parse tool call "${toolName}": ${errorMsg}`)
+            parseErrors.push(`Failed to parse tool call "${rawToolName}": ${errorMsg}`)
+        }
+    }
+
+    // If no XML tool calls found, try JSON format as fallback
+    if (toolCalls.length === 0) {
+        const jsonResult = parseJsonToolCalls(response)
+        toolCalls.push(...jsonResult.toolCalls)
+        parseErrors.push(...jsonResult.parseErrors)
+
+        // Remove JSON tool calls from content
+        for (const jsonMatch of jsonResult.matchedStrings) {
+            content = content.replace(jsonMatch, "")
        }
    }

@@ -105,6 +162,59 @@ export function parseToolCalls(response: string): ParsedResponse {
    }
 }

+/**
+ * JSON tool call format pattern.
+ * Matches: {"name": "tool_name", "arguments": {...}}
+ */
+const JSON_TOOL_CALL_REGEX =
+    /\{\s*"name"\s*:\s*"([^"]+)"\s*,\s*"arguments"\s*:\s*(\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\})\s*\}/g
+
+/**
+ * Parse tool calls from JSON format in response.
+ * This is a fallback for LLMs that prefer JSON over XML.
+ */
+function parseJsonToolCalls(response: string): {
+    toolCalls: ToolCall[]
+    parseErrors: string[]
+    matchedStrings: string[]
+} {
+    const toolCalls: ToolCall[] = []
+    const parseErrors: string[] = []
+    const matchedStrings: string[] = []
+
+    const matches = [...response.matchAll(JSON_TOOL_CALL_REGEX)]
+
+    for (const match of matches) {
+        const [fullMatch, rawToolName, argsJson] = match
+        matchedStrings.push(fullMatch)
+
+        // Normalize tool name
+        const toolName = normalizeToolName(rawToolName)
+
+        if (!VALID_TOOL_NAMES.has(toolName)) {
+            parseErrors.push(
+                `Unknown tool "${rawToolName}". Valid tools: ${[...VALID_TOOL_NAMES].join(", ")}`,
+            )
+            continue
+        }
+
+        try {
+            const args = JSON.parse(argsJson) as Record<string, unknown>
+            const toolCall = createToolCall(
+                `json_${String(Date.now())}_${String(toolCalls.length)}`,
+                toolName,
+                args,
+            )
+            toolCalls.push(toolCall)
+        } catch (error) {
+            const errorMsg = error instanceof Error ? error.message : String(error)
+            parseErrors.push(`Failed to parse JSON tool call "${rawToolName}": ${errorMsg}`)
+        }
+    }
+
+    return { toolCalls, parseErrors, matchedStrings }
+}
+
 /**
 * Parse parameters from XML content.
 */
--- a/packages/ipuaro/src/infrastructure/llm/prompts.ts
+++ b/packages/ipuaro/src/infrastructure/llm/prompts.ts
@@ -25,99 +25,115 @@ export interface BuildContextOptions {
 /**
 * System prompt for the ipuaro AI agent.
 */
-export const SYSTEM_PROMPT = `You are ipuaro, a local AI code assistant specialized in helping developers understand and modify their codebase. You operate within a single project directory and have access to powerful tools for reading, searching, analyzing, and editing code.
+export const SYSTEM_PROMPT = `You are ipuaro, a local AI code assistant with tools for reading, searching, analyzing, and editing code.

-## Core Principles
+## When to Use Tools

-1. **Lazy Loading**: You don't have the full code in context. Use tools to fetch exactly what you need.
-2. **Precision**: Always verify file paths and line numbers before making changes.
-3. **Safety**: Confirm destructive operations. Never execute dangerous commands.
-4. **Efficiency**: Minimize context usage. Request only necessary code sections.
+**Use tools** when the user asks about:
+- Code content (files, functions, classes)
+- Project structure
+- TODOs, complexity, dependencies
+- Git status, diffs, commits
+- Running commands or tests

-## Tool Calling Format
+**Do NOT use tools** for:
+- Greetings ("Hello", "Hi", "Thanks")
+- General questions not about this codebase
+- Clarifying questions back to the user

-When you need to use a tool, format your call as XML:
+## MANDATORY: Tools for Code Questions

-<tool_call name="tool_name">
-  <param_name>value</param_name>
-  <another_param>value</another_param>
-</tool_call>
+**CRITICAL:** You have ZERO code in your context. To answer ANY question about code, you MUST first call a tool.

-You can call multiple tools in one response. Always wait for tool results before making conclusions.
-
-**Examples:**
+**WRONG:**
+User: "What's in src/index.ts?"
+Assistant: "The file likely contains..." ← WRONG! Call a tool!

+**CORRECT:**
+User: "What's in src/index.ts?"
 <tool_call name="get_lines">
-  <path>src/index.ts</path>
-  <start>1</start>
-  <end>50</end>
+<path>src/index.ts</path>
 </tool_call>

-<tool_call name="edit_lines">
-  <path>src/utils.ts</path>
-  <start>10</start>
-  <end>15</end>
-  <content>const newCode = "hello";</content>
+## Tool Call Format
+
+Output this XML format. Do NOT explain before calling - just output the XML:
+
+<tool_call name="TOOL_NAME">
+<param1>value1</param1>
+<param2>value2</param2>
 </tool_call>

-<tool_call name="find_references">
-  <symbol>getUserById</symbol>
+## Example Interactions
+
+**Example 1 - Reading a file:**
+User: "Show me the main function in src/app.ts"
+<tool_call name="get_function">
+<path>src/app.ts</path>
+<name>main</name>
+</tool_call>
+
+**Example 2 - Finding TODOs:**
+User: "Are there any TODO comments?"
+<tool_call name="get_todos">
+</tool_call>
+
+**Example 3 - Project structure:**
+User: "What files are in this project?"
+<tool_call name="get_structure">
+<path>.</path>
 </tool_call>

 ## Available Tools

-### Reading Tools
- \`get_lines(path, start?, end?)\`: Get specific lines from a file
- \`get_function(path, name)\`: Get a function by name
- \`get_class(path, name)\`: Get a class by name
- \`get_structure(path?, depth?)\`: Get project directory structure
+### Reading
+- get_lines(path, start?, end?) - Read file lines
+- get_function(path, name) - Get function by name
+- get_class(path, name) - Get class by name
+- get_structure(path?, depth?) - List project files

-### Editing Tools (require confirmation)
- \`edit_lines(path, start, end, content)\`: Replace specific lines in a file
- \`create_file(path, content)\`: Create a new file
- \`delete_file(path)\`: Delete a file
+### Analysis
+- get_todos(path?, type?) - Find TODO/FIXME comments
+- get_dependencies(path) - What this file imports
+- get_dependents(path) - What imports this file
+- get_complexity(path?) - Code complexity metrics
+- find_references(symbol) - Find all usages of a symbol
+- find_definition(symbol) - Find where symbol is defined

-### Search Tools
- \`find_references(symbol, path?)\`: Find all usages of a symbol
- \`find_definition(symbol)\`: Find where a symbol is defined
+### Editing (requires confirmation)
+- edit_lines(path, start, end, content) - Modify file lines
+- create_file(path, content) - Create new file
+- delete_file(path) - Delete a file

-### Analysis Tools
- \`get_dependencies(path)\`: Get files this file imports
- \`get_dependents(path)\`: Get files that import this file
- \`get_complexity(path?, limit?)\`: Get complexity metrics
- \`get_todos(path?, type?)\`: Find TODO/FIXME comments
+### Git
+- git_status() - Repository status
+- git_diff(path?, staged?) - Show changes
+- git_commit(message, files?) - Create commit

-### Git Tools
- \`git_status()\`: Get repository status
- \`git_diff(path?, staged?)\`: Get uncommitted changes
- \`git_commit(message, files?)\`: Create a commit (requires confirmation)
+### Commands
+- run_command(command, timeout?) - Execute shell command
+- run_tests(path?, filter?) - Run test suite

-### Run Tools
- \`run_command(command, timeout?)\`: Execute a shell command (security checked)
- \`run_tests(path?, filter?, watch?)\`: Run the test suite
+## Rules

-## Response Guidelines
+1. **ALWAYS call a tool first** when asked about code - you cannot see any files
+2. **Output XML directly** - don't say "I will use..." just output the tool call
+3. **Wait for results** before making conclusions
+4. **Be concise** in your responses
+5. **Verify before editing** - always read code before modifying it
+6. **Stay safe** - never execute destructive commands without user confirmation`

-1. **Be concise**: Don't repeat information already in context.
-2. **Show your work**: Explain what tools you're using and why.
-3. **Verify before editing**: Always read the target code before modifying it.
-4. **Handle errors gracefully**: If a tool fails, explain what went wrong and suggest alternatives.
+/**
+ * Tool usage reminder - appended to messages to reinforce tool usage.
+ * This is added as the last system message before LLM call.
+ */
+export const TOOL_REMINDER = `⚠️ REMINDER: To answer this question, you MUST use a tool first.
+Output the <tool_call> XML directly. Do NOT describe what you will do - just call the tool.

-## Code Editing Rules
-
-1. Always use \`get_lines\` or \`get_function\` before \`edit_lines\`.
-2. Provide exact line numbers for edits.
-3. For large changes, break into multiple small edits.
-4. After editing, suggest running tests if available.
-
-## Safety Rules
-
-1. Never execute commands that could harm the system.
-2. Never expose sensitive data (API keys, passwords).
-3. Always confirm file deletions and destructive git operations.
-4. Stay within the project directory.
-
-When you need to perform an action, use the appropriate tool. Think step by step about what information you need and which tools will provide it most efficiently.`
+Example - if asked about a file, output:
+<tool_call name="get_lines">
+<path>the/file/path.ts</path>
+</tool_call>`

 /**
 * Build initial context from project structure and AST metadata.
--- a/packages/ipuaro/src/infrastructure/llm/toolDefs.ts
+++ b/packages/ipuaro/src/infrastructure/llm/toolDefs.ts
@@ -509,3 +509,87 @@ export function getToolsByCategory(category: string): ToolDef[] {
            return []
    }
 }
+
+/*
+ * =============================================================================
+ * Native Ollama Tools Format
+ * =============================================================================
+ */
+
+/**
+ * Ollama native tool definition format.
+ */
+export interface OllamaTool {
+    type: "function"
+    function: {
+        name: string
+        description: string
+        parameters: {
+            type: "object"
+            properties: Record<string, OllamaToolProperty>
+            required: string[]
+        }
+    }
+}
+
+interface OllamaToolProperty {
+    type: string
+    description: string
+    enum?: string[]
+    items?: { type: string }
+}
+
+/**
+ * Convert ToolDef to Ollama native format.
+ */
+function convertToOllamaTool(tool: ToolDef): OllamaTool {
+    const properties: Record<string, OllamaToolProperty> = {}
+    const required: string[] = []
+
+    for (const param of tool.parameters) {
+        const prop: OllamaToolProperty = {
+            type: param.type === "array" ? "array" : param.type,
+            description: param.description,
+        }
+
+        if (param.enum) {
+            prop.enum = param.enum
+        }
+
+        if (param.type === "array") {
+            prop.items = { type: "string" }
+        }
+
+        properties[param.name] = prop
+
+        if (param.required) {
+            required.push(param.name)
+        }
+    }
+
+    return {
+        type: "function",
+        function: {
+            name: tool.name,
+            description: tool.description,
+            parameters: {
+                type: "object",
+                properties,
+                required,
+            },
+        },
+    }
+}
+
+/**
+ * All tools in Ollama native format.
+ * Used when useNativeTools is enabled.
+ */
+export const OLLAMA_NATIVE_TOOLS: OllamaTool[] = ALL_TOOLS.map(convertToOllamaTool)
+
+/**
+ * Get native tool definitions for Ollama.
+ */
+export function getOllamaNativeTools(): OllamaTool[] {
+    return OLLAMA_NATIVE_TOOLS
+}
--- a/packages/ipuaro/src/shared/constants/config.ts
+++ b/packages/ipuaro/src/shared/constants/config.ts
@@ -20,6 +20,7 @@ export const LLMConfigSchema = z.object({
    temperature: z.number().min(0).max(2).default(0.1),
    host: z.string().default("http://localhost:11434"),
    timeout: z.number().int().positive().default(120_000),
+    useNativeTools: z.boolean().default(false),
 })

 /**
--- a/packages/ipuaro/tests/e2e/full-workflow.test.ts
+++ b/packages/ipuaro/tests/e2e/full-workflow.test.ts
--- a/packages/ipuaro/tests/e2e/test-helpers.ts
+++ b/packages/ipuaro/tests/e2e/test-helpers.ts
@@ -0,0 +1,351 @@
+/**
+ * E2E Test Helpers
+ * Provides dependencies for testing the full flow with REAL LLM.
+ */
+
+import { vi } from "vitest"
+import * as fs from "node:fs/promises"
+import * as path from "node:path"
+import * as os from "node:os"
+import type { IStorage, SymbolIndex, DepsGraph } from "../../src/domain/services/IStorage.js"
+import type { ISessionStorage, SessionListItem } from "../../src/domain/services/ISessionStorage.js"
+import type { FileData } from "../../src/domain/value-objects/FileData.js"
+import type { FileAST } from "../../src/domain/value-objects/FileAST.js"
+import type { FileMeta } from "../../src/domain/value-objects/FileMeta.js"
+import type { UndoEntry } from "../../src/domain/value-objects/UndoEntry.js"
+import { Session } from "../../src/domain/entities/Session.js"
+import { ToolRegistry } from "../../src/infrastructure/tools/registry.js"
+import { OllamaClient } from "../../src/infrastructure/llm/OllamaClient.js"
+import { registerAllTools } from "../../src/cli/commands/tools-setup.js"
+import type { LLMConfig } from "../../src/shared/constants/config.js"
+
+/**
+ * Default LLM config for tests.
+ */
+export const DEFAULT_TEST_LLM_CONFIG: LLMConfig = {
+    model: "qwen2.5-coder:14b-instruct-q4_K_M",
+    contextWindow: 128_000,
+    temperature: 0.1,
+    host: "http://localhost:11434",
+    timeout: 180_000,
+    useNativeTools: true,
+}
+
+/**
+ * In-memory storage implementation for testing.
+ * Stores all data in Maps, no Redis required.
+ */
+export function createInMemoryStorage(): IStorage {
+    const files = new Map<string, FileData>()
+    const asts = new Map<string, FileAST>()
+    const metas = new Map<string, FileMeta>()
+    let symbolIndex: SymbolIndex = new Map()
+    let depsGraph: DepsGraph = { imports: new Map(), importedBy: new Map() }
+    const projectConfig = new Map<string, unknown>()
+    let connected = false
+
+    return {
+        getFile: vi.fn(async (filePath: string) => files.get(filePath) ?? null),
+        setFile: vi.fn(async (filePath: string, data: FileData) => {
+            files.set(filePath, data)
+        }),
+        deleteFile: vi.fn(async (filePath: string) => {
+            files.delete(filePath)
+        }),
+        getAllFiles: vi.fn(async () => new Map(files)),
+        getFileCount: vi.fn(async () => files.size),
+
+        getAST: vi.fn(async (filePath: string) => asts.get(filePath) ?? null),
+        setAST: vi.fn(async (filePath: string, ast: FileAST) => {
+            asts.set(filePath, ast)
+        }),
+        deleteAST: vi.fn(async (filePath: string) => {
+            asts.delete(filePath)
+        }),
+        getAllASTs: vi.fn(async () => new Map(asts)),
+
+        getMeta: vi.fn(async (filePath: string) => metas.get(filePath) ?? null),
+        setMeta: vi.fn(async (filePath: string, meta: FileMeta) => {
+            metas.set(filePath, meta)
+        }),
+        deleteMeta: vi.fn(async (filePath: string) => {
+            metas.delete(filePath)
+        }),
+        getAllMetas: vi.fn(async () => new Map(metas)),
+
+        getSymbolIndex: vi.fn(async () => symbolIndex),
+        setSymbolIndex: vi.fn(async (index: SymbolIndex) => {
+            symbolIndex = index
+        }),
+        getDepsGraph: vi.fn(async () => depsGraph),
+        setDepsGraph: vi.fn(async (graph: DepsGraph) => {
+            depsGraph = graph
+        }),
+
+        getProjectConfig: vi.fn(async (key: string) => projectConfig.get(key) ?? null),
+        setProjectConfig: vi.fn(async (key: string, value: unknown) => {
+            projectConfig.set(key, value)
+        }),
+
+        connect: vi.fn(async () => {
+            connected = true
+        }),
+        disconnect: vi.fn(async () => {
+            connected = false
+        }),
+        isConnected: vi.fn(() => connected),
+        clear: vi.fn(async () => {
+            files.clear()
+            asts.clear()
+            metas.clear()
+            symbolIndex = new Map()
+            depsGraph = { imports: new Map(), importedBy: new Map() }
+            projectConfig.clear()
+        }),
+    }
+}
+
+/**
+ * In-memory session storage for testing.
+ */
+export function createInMemorySessionStorage(): ISessionStorage {
+    const sessions = new Map<string, Session>()
+    const undoStacks = new Map<string, UndoEntry[]>()
+
+    return {
+        saveSession: vi.fn(async (session: Session) => {
+            sessions.set(session.id, session)
+        }),
+        loadSession: vi.fn(async (sessionId: string) => sessions.get(sessionId) ?? null),
+        deleteSession: vi.fn(async (sessionId: string) => {
+            sessions.delete(sessionId)
+            undoStacks.delete(sessionId)
+        }),
+        listSessions: vi.fn(async (projectName?: string): Promise<SessionListItem[]> => {
+            const items: SessionListItem[] = []
+            for (const session of sessions.values()) {
+                if (!projectName || session.projectName === projectName) {
+                    items.push({
+                        id: session.id,
+                        projectName: session.projectName,
+                        createdAt: session.createdAt,
+                        lastActivityAt: session.lastActivityAt,
+                        messageCount: session.history.length,
+                    })
+                }
+            }
+            return items
+        }),
+        getLatestSession: vi.fn(async (projectName: string) => {
+            let latest: Session | null = null
+            for (const session of sessions.values()) {
+                if (session.projectName === projectName) {
+                    if (!latest || session.lastActivityAt > latest.lastActivityAt) {
+                        latest = session
+                    }
+                }
+            }
+            return latest
+        }),
+        sessionExists: vi.fn(async (sessionId: string) => sessions.has(sessionId)),
+        pushUndoEntry: vi.fn(async (sessionId: string, entry: UndoEntry) => {
+            const stack = undoStacks.get(sessionId) ?? []
+            stack.push(entry)
+            undoStacks.set(sessionId, stack)
+        }),
+        popUndoEntry: vi.fn(async (sessionId: string) => {
+            const stack = undoStacks.get(sessionId) ?? []
+            return stack.pop() ?? null
+        }),
+        getUndoStack: vi.fn(async (sessionId: string) => undoStacks.get(sessionId) ?? []),
+        touchSession: vi.fn(async (sessionId: string) => {
+            const session = sessions.get(sessionId)
+            if (session) {
+                session.lastActivityAt = Date.now()
+            }
+        }),
+        clearAllSessions: vi.fn(async () => {
+            sessions.clear()
+            undoStacks.clear()
+        }),
+    }
+}
+
+/**
+ * Create REAL Ollama client for E2E tests.
+ */
+export function createRealOllamaClient(config?: Partial<LLMConfig>): OllamaClient {
+    return new OllamaClient({
+        ...DEFAULT_TEST_LLM_CONFIG,
+        ...config,
+    })
+}
+
+/**
+ * Create a tool registry with all 18 tools registered.
+ */
+export function createRealToolRegistry(): ToolRegistry {
+    const registry = new ToolRegistry()
+    registerAllTools(registry)
+    return registry
+}
+
+/**
+ * Create a new test session.
+ */
+export function createTestSession(projectName = "test-project"): Session {
+    return new Session(`test-${Date.now()}`, projectName)
+}
+
+/**
+ * Create a temporary test project directory with sample files.
+ */
+export async function createTestProject(): Promise<string> {
+    const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "ipuaro-e2e-"))
+
+    await fs.mkdir(path.join(tempDir, "src"), { recursive: true })
+
+    await fs.writeFile(
+        path.join(tempDir, "src", "index.ts"),
+        `/**
+ * Main entry point
+ */
+export function main(): void {
+    console.log("Hello, world!")
+}
+
+export function add(a: number, b: number): number {
+    return a + b
+}
+
+export function multiply(a: number, b: number): number {
+    return a * b
+}
+
+// TODO: Add more math functions
+main()
+`,
+    )
+
+    await fs.writeFile(
+        path.join(tempDir, "src", "utils.ts"),
+        `/**
+ * Utility functions
+ */
+import { add } from "./index.js"
+
+export function sum(numbers: number[]): number {
+    return numbers.reduce((acc, n) => add(acc, n), 0)
+}
+
+export class Calculator {
+    private result: number = 0
+
+    add(n: number): this {
+        this.result += n
+        return this
+    }
+
+    subtract(n: number): this {
+        this.result -= n
+        return this
+    }
+
+    getResult(): number {
+        return this.result
+    }
+
+    reset(): void {
+        this.result = 0
+    }
+}
+
+// FIXME: Handle edge cases for negative numbers
+`,
+    )
+
+    await fs.writeFile(
+        path.join(tempDir, "package.json"),
+        JSON.stringify(
+            {
+                name: "test-project",
+                version: "1.0.0",
+                type: "module",
+                scripts: {
+                    test: "echo 'Tests passed!'",
+                },
+            },
+            null,
+            4,
+        ),
+    )
+
+    await fs.writeFile(
+        path.join(tempDir, "README.md"),
+        `# Test Project
+
+A sample project for E2E testing.
+
+## Features
+- Basic math functions
+- Calculator class
+`,
+    )
+
+    return tempDir
+}
+
+/**
+ * Clean up test project directory.
+ */
+export async function cleanupTestProject(projectDir: string): Promise<void> {
+    await fs.rm(projectDir, { recursive: true, force: true })
+}
+
+/**
+ * All test dependencies bundled together.
+ */
+export interface E2ETestDependencies {
+    storage: IStorage
+    sessionStorage: ISessionStorage
+    llm: OllamaClient
+    tools: ToolRegistry
+    session: Session
+    projectRoot: string
+}
+
+/**
+ * Create all dependencies for E2E testing with REAL Ollama.
+ */
+export async function createE2ETestDependencies(
+    llmConfig?: Partial<LLMConfig>,
+): Promise<E2ETestDependencies> {
+    const projectRoot = await createTestProject()
+
+    return {
+        storage: createInMemoryStorage(),
+        sessionStorage: createInMemorySessionStorage(),
+        llm: createRealOllamaClient(llmConfig),
+        tools: createRealToolRegistry(),
+        session: createTestSession(),
+        projectRoot,
+    }
+}
+
+/**
+ * Check if Ollama is available.
+ */
+export async function isOllamaAvailable(): Promise<boolean> {
+    const client = createRealOllamaClient()
+    return client.isAvailable()
+}
+
+/**
+ * Check if required model is available.
+ */
+export async function isModelAvailable(
+    model = "qwen2.5-coder:14b-instruct-q4_K_M",
+): Promise<boolean> {
+    const client = createRealOllamaClient()
+    return client.hasModel(model)
+}
--- a/packages/ipuaro/tests/unit/infrastructure/llm/ResponseParser.test.ts
+++ b/packages/ipuaro/tests/unit/infrastructure/llm/ResponseParser.test.ts
@@ -135,6 +135,108 @@ describe("ResponseParser", () => {
            expect(result.parseErrors[0]).toContain("unknown_tool")
        })

+        it("should normalize tool name aliases", () => {
+            // get_functions -> get_lines (common LLM typo)
+            const response1 = `<tool_call name="get_functions"><path>src/index.ts</path></tool_call>`
+            const result1 = parseToolCalls(response1)
+            expect(result1.toolCalls).toHaveLength(1)
+            expect(result1.toolCalls[0].name).toBe("get_lines")
+            expect(result1.hasParseErrors).toBe(false)
+
+            // read_file -> get_lines
+            const response2 = `<tool_call name="read_file"><path>test.ts</path></tool_call>`
+            const result2 = parseToolCalls(response2)
+            expect(result2.toolCalls).toHaveLength(1)
+            expect(result2.toolCalls[0].name).toBe("get_lines")
+
+            // find_todos -> get_todos
+            const response3 = `<tool_call name="find_todos"></tool_call>`
+            const result3 = parseToolCalls(response3)
+            expect(result3.toolCalls).toHaveLength(1)
+            expect(result3.toolCalls[0].name).toBe("get_todos")
+
+            // list_files -> get_structure
+            const response4 = `<tool_call name="list_files"><path>.</path></tool_call>`
+            const result4 = parseToolCalls(response4)
+            expect(result4.toolCalls).toHaveLength(1)
+            expect(result4.toolCalls[0].name).toBe("get_structure")
+        })
+
+        // JSON format tests
+        it("should parse JSON format tool calls as fallback", () => {
+            const response = `{"name": "get_lines", "arguments": {"path": "src/index.ts"}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("get_lines")
+            expect(result.toolCalls[0].params).toEqual({ path: "src/index.ts" })
+            expect(result.hasParseErrors).toBe(false)
+        })
+
+        it("should parse JSON format with numeric arguments", () => {
+            const response = `{"name": "get_lines", "arguments": {"path": "src/index.ts", "start": 1, "end": 50}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].params).toEqual({
+                path: "src/index.ts",
+                start: 1,
+                end: 50,
+            })
+        })
+
+        it("should parse JSON format with surrounding text", () => {
+            const response = `I'll read the file for you:
+{"name": "get_lines", "arguments": {"path": "src/index.ts"}}
+Let me know if you need more.`
+
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("get_lines")
+            expect(result.content).toContain("I'll read the file for you:")
+            expect(result.content).toContain("Let me know if you need more.")
+        })
+
+        it("should normalize tool name aliases in JSON format", () => {
+            // read_file -> get_lines
+            const response = `{"name": "read_file", "arguments": {"path": "test.ts"}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("get_lines")
+        })
+
+        it("should reject unknown tool names in JSON format", () => {
+            const response = `{"name": "unknown_tool", "arguments": {"path": "test.ts"}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(0)
+            expect(result.hasParseErrors).toBe(true)
+            expect(result.parseErrors[0]).toContain("unknown_tool")
+        })
+
+        it("should prefer XML over JSON when both present", () => {
+            const response = `<tool_call name="get_lines"><path>xml.ts</path></tool_call>
+{"name": "get_function", "arguments": {"path": "json.ts", "name": "foo"}}`
+
+            const result = parseToolCalls(response)
+
+            // Should only parse XML since it was found first
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("get_lines")
+            expect(result.toolCalls[0].params.path).toBe("xml.ts")
+        })
+
+        it("should parse JSON with empty arguments", () => {
+            const response = `{"name": "git_status", "arguments": {}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("git_status")
+            expect(result.toolCalls[0].params).toEqual({})
+        })
+
        it("should support CDATA for multiline content", () => {
            const response = `<tool_call name="edit_lines">
                <path>src/index.ts</path>
--- a/packages/ipuaro/tests/unit/infrastructure/llm/prompts.test.ts
+++ b/packages/ipuaro/tests/unit/infrastructure/llm/prompts.test.ts
@@ -19,10 +19,16 @@ describe("prompts", () => {
            expect(SYSTEM_PROMPT.length).toBeGreaterThan(100)
        })

-        it("should contain core principles", () => {
-            expect(SYSTEM_PROMPT).toContain("Lazy Loading")
-            expect(SYSTEM_PROMPT).toContain("Precision")
-            expect(SYSTEM_PROMPT).toContain("Safety")
+        it("should contain mandatory tool usage instructions", () => {
+            expect(SYSTEM_PROMPT).toContain("MANDATORY")
+            expect(SYSTEM_PROMPT).toContain("Tools for Code Questions")
+            expect(SYSTEM_PROMPT).toContain("ZERO code in your context")
+        })
+
+        it("should contain when to use and when not to use tools", () => {
+            expect(SYSTEM_PROMPT).toContain("When to Use Tools")
+            expect(SYSTEM_PROMPT).toContain("Do NOT use tools")
+            expect(SYSTEM_PROMPT).toContain("Greetings")
        })

        it("should list available tools", () => {
@@ -34,8 +40,9 @@ describe("prompts", () => {
        })

        it("should include safety rules", () => {
-            expect(SYSTEM_PROMPT).toContain("Safety Rules")
-            expect(SYSTEM_PROMPT).toContain("Never execute commands that could harm")
+            expect(SYSTEM_PROMPT).toContain("Stay safe")
+            expect(SYSTEM_PROMPT).toContain("destructive commands")
+            expect(SYSTEM_PROMPT).toContain("Verify before editing")
        })
    })