From 3e7762ec4ecf14f47cd2ee4a9ec6ed7736a82361 Mon Sep 17 00:00:00 2001
From: imfozilbek <fozilbek.samiyev@gmail.com>
Date: Fri, 5 Dec 2025 20:51:18 +0500
Subject: [PATCH] feat(ipuaro): add JSON tool call parsing and improve prompts

- Add JSON fallback parsing in ResponseParser for LLMs that prefer JSON
- Add tool name aliases (get_functions -> get_lines, etc.)
- Improve system prompt with clear tool usage guidelines
- Add native Ollama tools support in OllamaClient
- Add E2E tests for full workflow with real Ollama
---
 packages/ipuaro/CHANGELOG.md                  |   29 +
 .../application/use-cases/HandleMessage.ts    |    7 +
 .../src/infrastructure/llm/OllamaClient.ts    |  157 +-
 .../src/infrastructure/llm/ResponseParser.ts  |  122 +-
 .../ipuaro/src/infrastructure/llm/prompts.ts  |  154 +-
 .../ipuaro/src/infrastructure/llm/toolDefs.ts |   84 +
 .../ipuaro/src/shared/constants/config.ts     |    1 +
 .../ipuaro/tests/e2e/full-workflow.test.ts    | 1506 +++++++++++++++++
 packages/ipuaro/tests/e2e/test-helpers.ts     |  351 ++++
 .../infrastructure/llm/ResponseParser.test.ts |  102 ++
 .../unit/infrastructure/llm/prompts.test.ts   |   19 +-
 11 files changed, 2430 insertions(+), 102 deletions(-)
 create mode 100644 packages/ipuaro/tests/e2e/full-workflow.test.ts
 create mode 100644 packages/ipuaro/tests/e2e/test-helpers.ts

diff --git a/packages/ipuaro/CHANGELOG.md b/packages/ipuaro/CHANGELOG.md
index 4d1587b..82c1474 100644
--- a/packages/ipuaro/CHANGELOG.md
+++ b/packages/ipuaro/CHANGELOG.md
@@ -5,6 +5,35 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.30.2] - 2025-12-05 - JSON Tool Call Parsing & Improved Prompts
+
+### Added
+
+- **JSON Tool Call Fallback in ResponseParser**
+  - LLM responses with JSON format `{"name": "tool", "arguments": {...}}` are now parsed
+  - Fallback to JSON when XML format not found
+  - Works with models like qwen2.5-coder that prefer JSON over XML
+
+- **Tool Name Aliases**
+  - `get_functions`, `read_file`, `read_lines` → `get_lines`
+  - `list_files`, `get_files` → `get_structure`
+  - `find_todos` → `get_todos`
+  - And more common LLM typos/variations
+
+### Changed
+
+- **Improved System Prompt**
+  - Added clear "When to Use Tools" / "Do NOT use tools" sections
+  - More concise and directive instructions
+  - Better examples for tool usage
+
+### Technical Details
+
+- Total tests: 1848 passed (+8 new tests for JSON parsing)
+- 0 ESLint errors, 3 warnings (pre-existing complexity)
+
+---
+
 ## [0.30.1] - 2025-12-05 - Display Transitive Counts in Context
 
 ### Changed
diff --git a/packages/ipuaro/src/application/use-cases/HandleMessage.ts b/packages/ipuaro/src/application/use-cases/HandleMessage.ts
index d515c57..5c25f02 100644
--- a/packages/ipuaro/src/application/use-cases/HandleMessage.ts
+++ b/packages/ipuaro/src/application/use-cases/HandleMessage.ts
@@ -18,6 +18,7 @@ import {
     buildInitialContext,
     type ProjectStructure,
     SYSTEM_PROMPT,
+    TOOL_REMINDER,
 } from "../../infrastructure/llm/prompts.js"
 import { parseToolCalls } from "../../infrastructure/llm/ResponseParser.js"
 import type { IToolRegistry } from "../interfaces/IToolRegistry.js"
@@ -277,6 +278,12 @@ export class HandleMessage {
 
         messages.push(...session.history)
 
+        // Add tool reminder if last message is from user (first LLM call for this query)
+        const lastMessage = session.history[session.history.length - 1]
+        if (lastMessage?.role === "user") {
+            messages.push(createSystemMessage(TOOL_REMINDER))
+        }
+
         return messages
     }
 
diff --git a/packages/ipuaro/src/infrastructure/llm/OllamaClient.ts b/packages/ipuaro/src/infrastructure/llm/OllamaClient.ts
index 4162eba..4f59c0b 100644
--- a/packages/ipuaro/src/infrastructure/llm/OllamaClient.ts
+++ b/packages/ipuaro/src/infrastructure/llm/OllamaClient.ts
@@ -1,14 +1,17 @@
-import { type Message, Ollama } from "ollama"
+import { type Message, Ollama, type Tool } from "ollama"
 import type { ILLMClient, LLMResponse } from "../../domain/services/ILLMClient.js"
 import type { ChatMessage } from "../../domain/value-objects/ChatMessage.js"
+import { createToolCall, type ToolCall } from "../../domain/value-objects/ToolCall.js"
 import type { LLMConfig } from "../../shared/constants/config.js"
 import { IpuaroError } from "../../shared/errors/IpuaroError.js"
 import { estimateTokens } from "../../shared/utils/tokens.js"
 import { parseToolCalls } from "./ResponseParser.js"
+import { getOllamaNativeTools } from "./toolDefs.js"
 
 /**
  * Ollama LLM client implementation.
  * Wraps the Ollama SDK for chat completions with tool support.
+ * Supports both XML-based and native Ollama tool calling.
  */
 export class OllamaClient implements ILLMClient {
     private readonly client: Ollama
@@ -17,6 +20,7 @@ export class OllamaClient implements ILLMClient {
     private readonly contextWindow: number
     private readonly temperature: number
     private readonly timeout: number
+    private readonly useNativeTools: boolean
     private abortController: AbortController | null = null
 
     constructor(config: LLMConfig) {
@@ -26,11 +30,12 @@ export class OllamaClient implements ILLMClient {
         this.contextWindow = config.contextWindow
         this.temperature = config.temperature
         this.timeout = config.timeout
+        this.useNativeTools = config.useNativeTools ?? false
     }
 
     /**
      * Send messages to LLM and get response.
-     * Tool definitions should be included in the system prompt as XML format.
+     * Supports both XML-based tool calling and native Ollama tools.
      */
     async chat(messages: ChatMessage[]): Promise<LLMResponse> {
         const startTime = Date.now()
@@ -39,26 +44,11 @@ export class OllamaClient implements ILLMClient {
         try {
             const ollamaMessages = this.convertMessages(messages)
 
-            const response = await this.client.chat({
-                model: this.model,
-                messages: ollamaMessages,
-                options: {
-                    temperature: this.temperature,
-                },
-                stream: false,
-            })
-
-            const timeMs = Date.now() - startTime
-            const parsed = parseToolCalls(response.message.content)
-
-            return {
-                content: parsed.content,
-                toolCalls: parsed.toolCalls,
-                tokens: response.eval_count ?? estimateTokens(response.message.content),
-                timeMs,
-                truncated: false,
-                stopReason: this.determineStopReason(response, parsed.toolCalls),
+            if (this.useNativeTools) {
+                return await this.chatWithNativeTools(ollamaMessages, startTime)
             }
+
+            return await this.chatWithXMLTools(ollamaMessages, startTime)
         } catch (error) {
             if (error instanceof Error && error.name === "AbortError") {
                 throw IpuaroError.llm("Request was aborted")
@@ -69,6 +59,131 @@ export class OllamaClient implements ILLMClient {
         }
     }
 
+    /**
+     * Chat using XML-based tool calling (legacy mode).
+     */
+    private async chatWithXMLTools(
+        ollamaMessages: Message[],
+        startTime: number,
+    ): Promise<LLMResponse> {
+        const response = await this.client.chat({
+            model: this.model,
+            messages: ollamaMessages,
+            options: {
+                temperature: this.temperature,
+            },
+            stream: false,
+        })
+
+        const timeMs = Date.now() - startTime
+        const parsed = parseToolCalls(response.message.content)
+
+        return {
+            content: parsed.content,
+            toolCalls: parsed.toolCalls,
+            tokens: response.eval_count ?? estimateTokens(response.message.content),
+            timeMs,
+            truncated: false,
+            stopReason: this.determineStopReason(response, parsed.toolCalls),
+        }
+    }
+
+    /**
+     * Chat using native Ollama tool calling.
+     */
+    private async chatWithNativeTools(
+        ollamaMessages: Message[],
+        startTime: number,
+    ): Promise<LLMResponse> {
+        const nativeTools = getOllamaNativeTools() as Tool[]
+
+        const response = await this.client.chat({
+            model: this.model,
+            messages: ollamaMessages,
+            tools: nativeTools,
+            options: {
+                temperature: this.temperature,
+            },
+            stream: false,
+        })
+
+        const timeMs = Date.now() - startTime
+        let toolCalls = this.parseNativeToolCalls(response.message.tool_calls)
+
+        // Fallback: some models return tool calls as JSON in content
+        if (toolCalls.length === 0 && response.message.content) {
+            toolCalls = this.parseToolCallsFromContent(response.message.content)
+        }
+
+        const content = toolCalls.length > 0 ? "" : response.message.content || ""
+
+        return {
+            content,
+            toolCalls,
+            tokens: response.eval_count ?? estimateTokens(response.message.content || ""),
+            timeMs,
+            truncated: false,
+            stopReason: toolCalls.length > 0 ? "tool_use" : "end",
+        }
+    }
+
+    /**
+     * Parse native Ollama tool calls into ToolCall format.
+     */
+    private parseNativeToolCalls(
+        nativeToolCalls?: { function: { name: string; arguments: Record<string, unknown> } }[],
+    ): ToolCall[] {
+        if (!nativeToolCalls || nativeToolCalls.length === 0) {
+            return []
+        }
+
+        return nativeToolCalls.map((tc, index) =>
+            createToolCall(
+                `native_${String(Date.now())}_${String(index)}`,
+                tc.function.name,
+                tc.function.arguments,
+            ),
+        )
+    }
+
+    /**
+     * Parse tool calls from content (fallback for models that return JSON in content).
+     * Supports format: {"name": "tool_name", "arguments": {...}}
+     */
+    private parseToolCallsFromContent(content: string): ToolCall[] {
+        const toolCalls: ToolCall[] = []
+
+        // Try to parse JSON objects from content
+        const jsonRegex = /\{[\s\S]*?"name"[\s\S]*?"arguments"[\s\S]*?\}/g
+        const matches = content.match(jsonRegex)
+
+        if (!matches) {
+            return toolCalls
+        }
+
+        for (const match of matches) {
+            try {
+                const parsed = JSON.parse(match) as {
+                    name?: string
+                    arguments?: Record<string, unknown>
+                }
+                if (parsed.name && typeof parsed.name === "string") {
+                    toolCalls.push(
+                        createToolCall(
+                            `json_${String(Date.now())}_${String(toolCalls.length)}`,
+                            parsed.name,
+                            parsed.arguments ?? {},
+                        ),
+                    )
+                }
+            } catch {
+                // Invalid JSON, skip
+            }
+        }
+
+        return toolCalls
+    }
+
     /**
      * Count tokens in text.
      * Uses estimation since Ollama doesn't provide a tokenizer endpoint.
diff --git a/packages/ipuaro/src/infrastructure/llm/ResponseParser.ts b/packages/ipuaro/src/infrastructure/llm/ResponseParser.ts
index 5a1929f..992e412 100644
--- a/packages/ipuaro/src/infrastructure/llm/ResponseParser.ts
+++ b/packages/ipuaro/src/infrastructure/llm/ResponseParser.ts
@@ -58,9 +58,50 @@ const VALID_TOOL_NAMES = new Set([
     "run_tests",
 ])
 
+/**
+ * Tool name aliases for common LLM typos/variations.
+ * Maps incorrect names to correct tool names.
+ */
+const TOOL_ALIASES: Record<string, string> = {
+    // get_lines aliases
+    get_functions: "get_lines",
+    read_file: "get_lines",
+    read_lines: "get_lines",
+    get_file: "get_lines",
+    read: "get_lines",
+    // get_function aliases
+    getfunction: "get_function",
+    // get_structure aliases
+    list_files: "get_structure",
+    get_files: "get_structure",
+    list_structure: "get_structure",
+    get_project_structure: "get_structure",
+    // get_todos aliases
+    find_todos: "get_todos",
+    list_todos: "get_todos",
+    // find_references aliases
+    get_references: "find_references",
+    // find_definition aliases
+    get_definition: "find_definition",
+    // edit_lines aliases
+    edit_file: "edit_lines",
+    modify_file: "edit_lines",
+    update_file: "edit_lines",
+}
+
+/**
+ * Normalize tool name using aliases.
+ */
+function normalizeToolName(name: string): string {
+    const lowerName = name.toLowerCase()
+    return TOOL_ALIASES[lowerName] ?? name
+}
+
 /**
  * Parse tool calls from LLM response text.
- * Supports XML format: <tool_call name="get_lines"><path>src/index.ts</path></tool_call>
+ * Supports both XML and JSON formats:
+ * - XML: <tool_call name="get_lines"><path>src/index.ts</path></tool_call>
+ * - JSON: {"name": "get_lines", "arguments": {"path": "src/index.ts"}}
  * Validates tool names and provides helpful error messages.
  */
 export function parseToolCalls(response: string): ParsedResponse {
@@ -68,14 +109,18 @@ export function parseToolCalls(response: string): ParsedResponse {
     const parseErrors: string[] = []
     let content = response
 
-    const matches = [...response.matchAll(TOOL_CALL_REGEX)]
+    // First, try XML format
+    const xmlMatches = [...response.matchAll(TOOL_CALL_REGEX)]
 
-    for (const match of matches) {
-        const [fullMatch, toolName, paramsXml] = match
+    for (const match of xmlMatches) {
+        const [fullMatch, rawToolName, paramsXml] = match
+
+        // Normalize tool name (handle common LLM typos/variations)
+        const toolName = normalizeToolName(rawToolName)
 
         if (!VALID_TOOL_NAMES.has(toolName)) {
             parseErrors.push(
-                `Unknown tool "${toolName}". Valid tools: ${[...VALID_TOOL_NAMES].join(", ")}`,
+                `Unknown tool "${rawToolName}". Valid tools: ${[...VALID_TOOL_NAMES].join(", ")}`,
             )
             continue
         }
@@ -91,7 +136,19 @@ export function parseToolCalls(response: string): ParsedResponse {
             content = content.replace(fullMatch, "")
         } catch (error) {
             const errorMsg = error instanceof Error ? error.message : String(error)
-            parseErrors.push(`Failed to parse tool call "${toolName}": ${errorMsg}`)
+            parseErrors.push(`Failed to parse tool call "${rawToolName}": ${errorMsg}`)
+        }
+    }
+
+    // If no XML tool calls found, try JSON format as fallback
+    if (toolCalls.length === 0) {
+        const jsonResult = parseJsonToolCalls(response)
+        toolCalls.push(...jsonResult.toolCalls)
+        parseErrors.push(...jsonResult.parseErrors)
+
+        // Remove JSON tool calls from content
+        for (const jsonMatch of jsonResult.matchedStrings) {
+            content = content.replace(jsonMatch, "")
         }
     }
 
@@ -105,6 +162,59 @@ export function parseToolCalls(response: string): ParsedResponse {
     }
 }
 
+/**
+ * JSON tool call format pattern.
+ * Matches: {"name": "tool_name", "arguments": {...}}
+ */
+const JSON_TOOL_CALL_REGEX =
+    /\{\s*"name"\s*:\s*"([^"]+)"\s*,\s*"arguments"\s*:\s*(\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\})\s*\}/g
+
+/**
+ * Parse tool calls from JSON format in response.
+ * This is a fallback for LLMs that prefer JSON over XML.
+ */
+function parseJsonToolCalls(response: string): {
+    toolCalls: ToolCall[]
+    parseErrors: string[]
+    matchedStrings: string[]
+} {
+    const toolCalls: ToolCall[] = []
+    const parseErrors: string[] = []
+    const matchedStrings: string[] = []
+
+    const matches = [...response.matchAll(JSON_TOOL_CALL_REGEX)]
+
+    for (const match of matches) {
+        const [fullMatch, rawToolName, argsJson] = match
+        matchedStrings.push(fullMatch)
+
+        // Normalize tool name
+        const toolName = normalizeToolName(rawToolName)
+
+        if (!VALID_TOOL_NAMES.has(toolName)) {
+            parseErrors.push(
+                `Unknown tool "${rawToolName}". Valid tools: ${[...VALID_TOOL_NAMES].join(", ")}`,
+            )
+            continue
+        }
+
+        try {
+            const args = JSON.parse(argsJson) as Record<string, unknown>
+            const toolCall = createToolCall(
+                `json_${String(Date.now())}_${String(toolCalls.length)}`,
+                toolName,
+                args,
+            )
+            toolCalls.push(toolCall)
+        } catch (error) {
+            const errorMsg = error instanceof Error ? error.message : String(error)
+            parseErrors.push(`Failed to parse JSON tool call "${rawToolName}": ${errorMsg}`)
+        }
+    }
+
+    return { toolCalls, parseErrors, matchedStrings }
+}
+
 /**
  * Parse parameters from XML content.
  */
diff --git a/packages/ipuaro/src/infrastructure/llm/prompts.ts b/packages/ipuaro/src/infrastructure/llm/prompts.ts
index 9519ba8..1953dff 100644
--- a/packages/ipuaro/src/infrastructure/llm/prompts.ts
+++ b/packages/ipuaro/src/infrastructure/llm/prompts.ts
@@ -25,99 +25,115 @@ export interface BuildContextOptions {
 /**
  * System prompt for the ipuaro AI agent.
  */
-export const SYSTEM_PROMPT = `You are ipuaro, a local AI code assistant specialized in helping developers understand and modify their codebase. You operate within a single project directory and have access to powerful tools for reading, searching, analyzing, and editing code.
+export const SYSTEM_PROMPT = `You are ipuaro, a local AI code assistant with tools for reading, searching, analyzing, and editing code.
 
-## Core Principles
+## When to Use Tools
 
-1. **Lazy Loading**: You don't have the full code in context. Use tools to fetch exactly what you need.
-2. **Precision**: Always verify file paths and line numbers before making changes.
-3. **Safety**: Confirm destructive operations. Never execute dangerous commands.
-4. **Efficiency**: Minimize context usage. Request only necessary code sections.
+**Use tools** when the user asks about:
+- Code content (files, functions, classes)
+- Project structure
+- TODOs, complexity, dependencies
+- Git status, diffs, commits
+- Running commands or tests
 
-## Tool Calling Format
+**Do NOT use tools** for:
+- Greetings ("Hello", "Hi", "Thanks")
+- General questions not about this codebase
+- Clarifying questions back to the user
 
-When you need to use a tool, format your call as XML:
+## MANDATORY: Tools for Code Questions
 
-<tool_call name="tool_name">
-  <param_name>value</param_name>
-  <another_param>value</another_param>
-</tool_call>
+**CRITICAL:** You have ZERO code in your context. To answer ANY question about code, you MUST first call a tool.
 
-You can call multiple tools in one response. Always wait for tool results before making conclusions.
-
-**Examples:**
+**WRONG:**
+User: "What's in src/index.ts?"
+Assistant: "The file likely contains..." ← WRONG! Call a tool!
 
+**CORRECT:**
+User: "What's in src/index.ts?"
 <tool_call name="get_lines">
-  <path>src/index.ts</path>
-  <start>1</start>
-  <end>50</end>
+<path>src/index.ts</path>
 </tool_call>
 
-<tool_call name="edit_lines">
-  <path>src/utils.ts</path>
-  <start>10</start>
-  <end>15</end>
-  <content>const newCode = "hello";</content>
+## Tool Call Format
+
+Output this XML format. Do NOT explain before calling - just output the XML:
+
+<tool_call name="TOOL_NAME">
+<param1>value1</param1>
+<param2>value2</param2>
 </tool_call>
 
-<tool_call name="find_references">
-  <symbol>getUserById</symbol>
+## Example Interactions
+
+**Example 1 - Reading a file:**
+User: "Show me the main function in src/app.ts"
+<tool_call name="get_function">
+<path>src/app.ts</path>
+<name>main</name>
+</tool_call>
+
+**Example 2 - Finding TODOs:**
+User: "Are there any TODO comments?"
+<tool_call name="get_todos">
+</tool_call>
+
+**Example 3 - Project structure:**
+User: "What files are in this project?"
+<tool_call name="get_structure">
+<path>.</path>
 </tool_call>
 
 ## Available Tools
 
-### Reading Tools
-- \`get_lines(path, start?, end?)\`: Get specific lines from a file
-- \`get_function(path, name)\`: Get a function by name
-- \`get_class(path, name)\`: Get a class by name
-- \`get_structure(path?, depth?)\`: Get project directory structure
+### Reading
+- get_lines(path, start?, end?) - Read file lines
+- get_function(path, name) - Get function by name
+- get_class(path, name) - Get class by name
+- get_structure(path?, depth?) - List project files
 
-### Editing Tools (require confirmation)
-- \`edit_lines(path, start, end, content)\`: Replace specific lines in a file
-- \`create_file(path, content)\`: Create a new file
-- \`delete_file(path)\`: Delete a file
+### Analysis
+- get_todos(path?, type?) - Find TODO/FIXME comments
+- get_dependencies(path) - What this file imports
+- get_dependents(path) - What imports this file
+- get_complexity(path?) - Code complexity metrics
+- find_references(symbol) - Find all usages of a symbol
+- find_definition(symbol) - Find where symbol is defined
 
-### Search Tools
-- \`find_references(symbol, path?)\`: Find all usages of a symbol
-- \`find_definition(symbol)\`: Find where a symbol is defined
+### Editing (requires confirmation)
+- edit_lines(path, start, end, content) - Modify file lines
+- create_file(path, content) - Create new file
+- delete_file(path) - Delete a file
 
-### Analysis Tools
-- \`get_dependencies(path)\`: Get files this file imports
-- \`get_dependents(path)\`: Get files that import this file
-- \`get_complexity(path?, limit?)\`: Get complexity metrics
-- \`get_todos(path?, type?)\`: Find TODO/FIXME comments
+### Git
+- git_status() - Repository status
+- git_diff(path?, staged?) - Show changes
+- git_commit(message, files?) - Create commit
 
-### Git Tools
-- \`git_status()\`: Get repository status
-- \`git_diff(path?, staged?)\`: Get uncommitted changes
-- \`git_commit(message, files?)\`: Create a commit (requires confirmation)
+### Commands
+- run_command(command, timeout?) - Execute shell command
+- run_tests(path?, filter?) - Run test suite
 
-### Run Tools
-- \`run_command(command, timeout?)\`: Execute a shell command (security checked)
-- \`run_tests(path?, filter?, watch?)\`: Run the test suite
+## Rules
 
-## Response Guidelines
+1. **ALWAYS call a tool first** when asked about code - you cannot see any files
+2. **Output XML directly** - don't say "I will use..." just output the tool call
+3. **Wait for results** before making conclusions
+4. **Be concise** in your responses
+5. **Verify before editing** - always read code before modifying it
+6. **Stay safe** - never execute destructive commands without user confirmation`
 
-1. **Be concise**: Don't repeat information already in context.
-2. **Show your work**: Explain what tools you're using and why.
-3. **Verify before editing**: Always read the target code before modifying it.
-4. **Handle errors gracefully**: If a tool fails, explain what went wrong and suggest alternatives.
+/**
+ * Tool usage reminder - appended to messages to reinforce tool usage.
+ * This is added as the last system message before LLM call.
+ */
+export const TOOL_REMINDER = `⚠️ REMINDER: To answer this question, you MUST use a tool first.
+Output the <tool_call> XML directly. Do NOT describe what you will do - just call the tool.
 
-## Code Editing Rules
-
-1. Always use \`get_lines\` or \`get_function\` before \`edit_lines\`.
-2. Provide exact line numbers for edits.
-3. For large changes, break into multiple small edits.
-4. After editing, suggest running tests if available.
-
-## Safety Rules
-
-1. Never execute commands that could harm the system.
-2. Never expose sensitive data (API keys, passwords).
-3. Always confirm file deletions and destructive git operations.
-4. Stay within the project directory.
-
-When you need to perform an action, use the appropriate tool. Think step by step about what information you need and which tools will provide it most efficiently.`
+Example - if asked about a file, output:
+<tool_call name="get_lines">
+<path>the/file/path.ts</path>
+</tool_call>`
 
 /**
  * Build initial context from project structure and AST metadata.
diff --git a/packages/ipuaro/src/infrastructure/llm/toolDefs.ts b/packages/ipuaro/src/infrastructure/llm/toolDefs.ts
index 7fe7701..b9dd0b7 100644
--- a/packages/ipuaro/src/infrastructure/llm/toolDefs.ts
+++ b/packages/ipuaro/src/infrastructure/llm/toolDefs.ts
@@ -509,3 +509,87 @@ export function getToolsByCategory(category: string): ToolDef[] {
             return []
     }
 }
+
+/*
+ * =============================================================================
+ * Native Ollama Tools Format
+ * =============================================================================
+ */
+
+/**
+ * Ollama native tool definition format.
+ */
+export interface OllamaTool {
+    type: "function"
+    function: {
+        name: string
+        description: string
+        parameters: {
+            type: "object"
+            properties: Record<string, OllamaToolProperty>
+            required: string[]
+        }
+    }
+}
+
+interface OllamaToolProperty {
+    type: string
+    description: string
+    enum?: string[]
+    items?: { type: string }
+}
+
+/**
+ * Convert ToolDef to Ollama native format.
+ */
+function convertToOllamaTool(tool: ToolDef): OllamaTool {
+    const properties: Record<string, OllamaToolProperty> = {}
+    const required: string[] = []
+
+    for (const param of tool.parameters) {
+        const prop: OllamaToolProperty = {
+            type: param.type === "array" ? "array" : param.type,
+            description: param.description,
+        }
+
+        if (param.enum) {
+            prop.enum = param.enum
+        }
+
+        if (param.type === "array") {
+            prop.items = { type: "string" }
+        }
+
+        properties[param.name] = prop
+
+        if (param.required) {
+            required.push(param.name)
+        }
+    }
+
+    return {
+        type: "function",
+        function: {
+            name: tool.name,
+            description: tool.description,
+            parameters: {
+                type: "object",
+                properties,
+                required,
+            },
+        },
+    }
+}
+
+/**
+ * All tools in Ollama native format.
+ * Used when useNativeTools is enabled.
+ */
+export const OLLAMA_NATIVE_TOOLS: OllamaTool[] = ALL_TOOLS.map(convertToOllamaTool)
+
+/**
+ * Get native tool definitions for Ollama.
+ */
+export function getOllamaNativeTools(): OllamaTool[] {
+    return OLLAMA_NATIVE_TOOLS
+}
diff --git a/packages/ipuaro/src/shared/constants/config.ts b/packages/ipuaro/src/shared/constants/config.ts
index cdd0d3a..de21bc9 100644
--- a/packages/ipuaro/src/shared/constants/config.ts
+++ b/packages/ipuaro/src/shared/constants/config.ts
@@ -20,6 +20,7 @@ export const LLMConfigSchema = z.object({
     temperature: z.number().min(0).max(2).default(0.1),
     host: z.string().default("http://localhost:11434"),
     timeout: z.number().int().positive().default(120_000),
+    useNativeTools: z.boolean().default(false),
 })
 
 /**
diff --git a/packages/ipuaro/tests/e2e/full-workflow.test.ts b/packages/ipuaro/tests/e2e/full-workflow.test.ts
new file mode 100644
index 0000000..e1eb852
--- /dev/null
+++ b/packages/ipuaro/tests/e2e/full-workflow.test.ts
@@ -0,0 +1,1506 @@
+/**
+ * E2E Tests with REAL Ollama Integration
+ *
+ * These tests use the actual Ollama LLM to test the full workflow
+ * without the TUI layer.
+ *
+ * Requirements:
+ * - Ollama running at localhost:11434
+ * - qwen2.5-coder:14b-instruct model installed (with native tools support)
+ *
+ * Run: pnpm test:run tests/e2e/
+ */
+
+import { describe, it, expect, beforeAll, beforeEach, afterEach } from "vitest"
+import * as fs from "node:fs/promises"
+import * as path from "node:path"
+import { HandleMessage } from "../../src/application/use-cases/HandleMessage.js"
+import { ExecuteTool } from "../../src/application/use-cases/ExecuteTool.js"
+import { StartSession } from "../../src/application/use-cases/StartSession.js"
+import { UndoChange } from "../../src/application/use-cases/UndoChange.js"
+import { IndexProject } from "../../src/application/use-cases/IndexProject.js"
+import { ContextManager } from "../../src/application/use-cases/ContextManager.js"
+import type { HandleMessageEvents } from "../../src/application/use-cases/HandleMessage.js"
+import type { ChatMessage } from "../../src/domain/value-objects/ChatMessage.js"
+import type { ToolCall } from "../../src/domain/value-objects/ToolCall.js"
+import type { ToolResult } from "../../src/domain/value-objects/ToolResult.js"
+import type { ProjectStructure } from "../../src/infrastructure/llm/prompts.js"
+import { simpleGit } from "simple-git"
+import {
+    createE2ETestDependencies,
+    cleanupTestProject,
+    isOllamaAvailable,
+    isModelAvailable,
+    type E2ETestDependencies,
+} from "./test-helpers.js"
+
+describe("E2E: Full Workflow with Real Ollama", () => {
+    let deps: E2ETestDependencies
+    let ollamaAvailable: boolean
+    let modelAvailable: boolean
+
+    beforeAll(async () => {
+        ollamaAvailable = await isOllamaAvailable()
+        if (ollamaAvailable) {
+            modelAvailable = await isModelAvailable()
+        } else {
+            modelAvailable = false
+        }
+    })
+
+    beforeEach(async () => {
+        if (!ollamaAvailable || !modelAvailable) {
+            return
+        }
+        deps = await createE2ETestDependencies()
+    })
+
+    afterEach(async () => {
+        if (deps?.projectRoot) {
+            await cleanupTestProject(deps.projectRoot)
+        }
+    })
+
+    describe("Prerequisites", () => {
+        it("should have Ollama running", async () => {
+            expect(ollamaAvailable).toBe(true)
+        })
+
+        it("should have qwen2.5-coder:14b-instruct model", async () => {
+            if (!ollamaAvailable) {
+                console.warn("Skipping: Ollama not available")
+                return
+            }
+            expect(modelAvailable).toBe(true)
+        })
+
+        it("should have test project created", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const indexPath = path.join(deps.projectRoot, "src", "index.ts")
+            const content = await fs.readFile(indexPath, "utf-8")
+
+            expect(content).toContain("export function main")
+            expect(content).toContain("export function add")
+        })
+
+        it("should have all 18 tools registered", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            expect(deps.tools.size).toBe(18)
+
+            const toolNames = deps.tools.getNames()
+            expect(toolNames).toContain("get_lines")
+            expect(toolNames).toContain("get_function")
+            expect(toolNames).toContain("get_class")
+            expect(toolNames).toContain("get_structure")
+            expect(toolNames).toContain("edit_lines")
+            expect(toolNames).toContain("create_file")
+            expect(toolNames).toContain("delete_file")
+            expect(toolNames).toContain("find_references")
+            expect(toolNames).toContain("find_definition")
+            expect(toolNames).toContain("get_dependencies")
+            expect(toolNames).toContain("get_dependents")
+            expect(toolNames).toContain("get_complexity")
+            expect(toolNames).toContain("get_todos")
+            expect(toolNames).toContain("git_status")
+            expect(toolNames).toContain("git_diff")
+            expect(toolNames).toContain("git_commit")
+            expect(toolNames).toContain("run_command")
+            expect(toolNames).toContain("run_tests")
+        })
+    })
+
+    describe("HandleMessage with Real LLM", () => {
+        it("should process a simple question and get response", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const handleMessage = new HandleMessage(
+                deps.storage,
+                deps.sessionStorage,
+                deps.llm,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const messages: ChatMessage[] = []
+            const toolCalls: ToolCall[] = []
+            const toolResults: ToolResult[] = []
+
+            const userQuery = "Hello! Just say hi back."
+            console.log("\n" + "=".repeat(60))
+            console.log("[USER QUERY]:", userQuery)
+            console.log("=".repeat(60))
+
+            const events: HandleMessageEvents = {
+                onMessage: (msg) => {
+                    messages.push(msg)
+                    if (msg.role === "assistant") {
+                        console.log("\n[LLM RESPONSE]:", msg.content?.substring(0, 200) + "...")
+                    }
+                },
+                onToolCall: (call) => {
+                    toolCalls.push(call)
+                    console.log("[TOOL CALL]:", call.name, JSON.stringify(call.params))
+                },
+                onToolResult: (result) => {
+                    toolResults.push(result)
+                    console.log("[TOOL RESULT]:", result.success ? "✅ Success" : "❌ Error")
+                },
+                onConfirmation: async () => true,
+            }
+
+            handleMessage.setEvents(events)
+            handleMessage.setOptions({ autoApply: true, maxToolCalls: 10 })
+
+            await handleMessage.execute(deps.session, userQuery)
+
+            expect(messages.length).toBeGreaterThan(0)
+
+            const assistantMessages = messages.filter((m) => m.role === "assistant")
+            expect(assistantMessages.length).toBeGreaterThan(0)
+
+            expect(deps.session.history.length).toBeGreaterThan(0)
+        }, 120_000)
+
+        it("should use get_lines tool when asked to read a file", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const handleMessage = new HandleMessage(
+                deps.storage,
+                deps.sessionStorage,
+                deps.llm,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCalls: ToolCall[] = []
+            const toolResults: ToolResult[] = []
+
+            const userQuery = "Read the file src/index.ts and tell me what functions are defined there."
+            console.log("\n" + "=".repeat(60))
+            console.log("[USER QUERY]:", userQuery)
+            console.log("[PROJECT ROOT]:", deps.projectRoot)
+            console.log("=".repeat(60))
+
+            const events: HandleMessageEvents = {
+                onMessage: (msg) => {
+                    if (msg.role === "assistant") {
+                        console.log("\n[LLM RESPONSE]:", msg.content?.substring(0, 500))
+                    }
+                },
+                onToolCall: (call) => {
+                    console.log("\n🔧 [TOOL CALL]:", call.name)
+                    console.log("   Params:", JSON.stringify(call.params, null, 2))
+                    toolCalls.push(call)
+                },
+                onToolResult: (result) => {
+                    console.log("   [TOOL RESULT]:", result.success ? "✅ Success" : "❌ Error")
+                    if (result.data) {
+                        const dataStr = JSON.stringify(result.data)
+                        console.log("   Data:", dataStr.substring(0, 200) + (dataStr.length > 200 ? "..." : ""))
+                    }
+                    toolResults.push(result)
+                },
+                onConfirmation: async () => true,
+            }
+
+            handleMessage.setEvents(events)
+            handleMessage.setOptions({ autoApply: true, maxToolCalls: 5 })
+
+            await handleMessage.execute(deps.session, userQuery)
+
+            const assistantMessages = deps.session.history.filter((m) => m.role === "assistant")
+            expect(assistantMessages.length).toBeGreaterThan(0)
+
+            if (toolCalls.length > 0) {
+                console.log("\n✅ Tools used:", toolCalls.map((tc) => tc.name))
+            } else {
+                console.log("\n⚠️ LLM responded without using tools")
+            }
+        }, 180_000)
+
+        it("should use get_todos tool when asked to find TODOs", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const handleMessage = new HandleMessage(
+                deps.storage,
+                deps.sessionStorage,
+                deps.llm,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCalls: ToolCall[] = []
+
+            const events: HandleMessageEvents = {
+                onToolCall: (call) => {
+                    console.log(`Tool called: ${call.name}`)
+                    toolCalls.push(call)
+                },
+                onConfirmation: async () => true,
+            }
+
+            handleMessage.setEvents(events)
+            handleMessage.setOptions({ autoApply: true, maxToolCalls: 5 })
+
+            await handleMessage.execute(deps.session, "Find all TODO and FIXME comments in the project.")
+
+            const todoToolCalls = toolCalls.filter((tc) => tc.name === "get_todos")
+
+            if (todoToolCalls.length > 0) {
+                expect(todoToolCalls[0].name).toBe("get_todos")
+            } else {
+                console.log("LLM did not use get_todos tool, but used:", toolCalls.map((tc) => tc.name))
+            }
+
+            expect(deps.session.history.length).toBeGreaterThan(0)
+        }, 120_000)
+
+        it("should use get_structure tool when asked about project structure", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const handleMessage = new HandleMessage(
+                deps.storage,
+                deps.sessionStorage,
+                deps.llm,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCalls: ToolCall[] = []
+
+            const events: HandleMessageEvents = {
+                onToolCall: (call) => {
+                    console.log(`Tool called: ${call.name}`)
+                    toolCalls.push(call)
+                },
+                onConfirmation: async () => true,
+            }
+
+            handleMessage.setEvents(events)
+            handleMessage.setOptions({ autoApply: true, maxToolCalls: 5 })
+
+            await handleMessage.execute(deps.session, "Show me the project file structure.")
+
+            const structureToolCalls = toolCalls.filter((tc) => tc.name === "get_structure")
+
+            if (structureToolCalls.length > 0) {
+                expect(structureToolCalls[0].name).toBe("get_structure")
+            } else {
+                console.log("LLM used tools:", toolCalls.map((tc) => tc.name))
+            }
+
+            expect(deps.session.history.length).toBeGreaterThan(0)
+        }, 120_000)
+
+        it("should use get_class tool when asked about a class", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const handleMessage = new HandleMessage(
+                deps.storage,
+                deps.sessionStorage,
+                deps.llm,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCalls: ToolCall[] = []
+
+            const events: HandleMessageEvents = {
+                onToolCall: (call) => {
+                    console.log(`Tool called: ${call.name}`)
+                    toolCalls.push(call)
+                },
+                onConfirmation: async () => true,
+            }
+
+            handleMessage.setEvents(events)
+            handleMessage.setOptions({ autoApply: true, maxToolCalls: 5 })
+
+            await handleMessage.execute(
+                deps.session,
+                "Show me the Calculator class from src/utils.ts.",
+            )
+
+            const classToolCalls = toolCalls.filter(
+                (tc) => tc.name === "get_class" || tc.name === "get_lines",
+            )
+
+            expect(classToolCalls.length).toBeGreaterThanOrEqual(0)
+            expect(deps.session.history.length).toBeGreaterThan(0)
+        }, 120_000)
+
+        it("should use get_function tool when asked about a function", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const handleMessage = new HandleMessage(
+                deps.storage,
+                deps.sessionStorage,
+                deps.llm,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCalls: ToolCall[] = []
+
+            const events: HandleMessageEvents = {
+                onToolCall: (call) => {
+                    console.log(`Tool called: ${call.name}`)
+                    toolCalls.push(call)
+                },
+                onConfirmation: async () => true,
+            }
+
+            handleMessage.setEvents(events)
+            handleMessage.setOptions({ autoApply: true, maxToolCalls: 5 })
+
+            await handleMessage.execute(
+                deps.session,
+                "Show me the 'add' function from src/index.ts.",
+            )
+
+            const functionToolCalls = toolCalls.filter(
+                (tc) => tc.name === "get_function" || tc.name === "get_lines",
+            )
+
+            expect(functionToolCalls.length).toBeGreaterThanOrEqual(0)
+            expect(deps.session.history.length).toBeGreaterThan(0)
+        }, 120_000)
+    })
+
+    describe("ExecuteTool Direct Execution", () => {
+        it("should execute get_lines tool directly", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "test-1",
+                name: "get_lines",
+                params: {
+                    path: "src/index.ts",
+                    start: 1,
+                    end: 10,
+                },
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(result.success).toBe(true)
+            expect(result.data).toBeDefined()
+        })
+
+        it("should execute get_structure tool directly", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "test-2",
+                name: "get_structure",
+                params: {
+                    path: ".",
+                    depth: 3,
+                },
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(result.success).toBe(true)
+            expect(result.data).toBeDefined()
+        })
+
+        it("should execute get_todos tool directly", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            // get_todos uses storage.getAllFiles() - since storage is empty,
+            // it will return empty results. This is expected behavior.
+            // In a real scenario, the project would be indexed first.
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "test-3",
+                name: "get_todos",
+                params: {},
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            // The tool succeeds but returns empty when no files are indexed
+            expect(result.success).toBe(true)
+            expect(result.data).toBeDefined()
+
+            if (result.data && typeof result.data === "object" && "todos" in result.data) {
+                const data = result.data as { totalTodos: number; todos: unknown[] }
+                // With empty storage, totalTodos will be 0
+                expect(data.totalTodos).toBeGreaterThanOrEqual(0)
+            }
+        })
+
+        it("should execute create_file tool with confirmation", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "test-4",
+                name: "create_file",
+                params: {
+                    path: "src/new-file.ts",
+                    content: "export const test = 42;\n",
+                },
+            }
+
+            const { result, undoEntryCreated } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(result.success).toBe(true)
+            expect(undoEntryCreated).toBe(true)
+
+            const newFilePath = path.join(deps.projectRoot, "src", "new-file.ts")
+            const content = await fs.readFile(newFilePath, "utf-8")
+            expect(content).toBe("export const test = 42;\n")
+        })
+
+        it("should execute edit_lines tool with confirmation", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "test-5",
+                name: "edit_lines",
+                params: {
+                    path: "src/index.ts",
+                    start: 4,
+                    end: 4,
+                    content: '    console.log("Modified!");',
+                },
+            }
+
+            const { result, undoEntryCreated } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(result.success).toBe(true)
+            expect(undoEntryCreated).toBe(true)
+
+            const modifiedContent = await fs.readFile(
+                path.join(deps.projectRoot, "src", "index.ts"),
+                "utf-8",
+            )
+            expect(modifiedContent).toContain("Modified!")
+        })
+
+        it("should execute delete_file tool with confirmation", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const tempFilePath = path.join(deps.projectRoot, "src", "to-delete.ts")
+            await fs.writeFile(tempFilePath, "// File to delete\n")
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "test-6",
+                name: "delete_file",
+                params: {
+                    path: "src/to-delete.ts",
+                },
+            }
+
+            const { result, undoEntryCreated } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(result.success).toBe(true)
+            expect(undoEntryCreated).toBe(true)
+
+            const exists = await fs
+                .access(tempFilePath)
+                .then(() => true)
+                .catch(() => false)
+            expect(exists).toBe(false)
+        })
+
+        it("should execute run_command tool", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "test-7",
+                name: "run_command",
+                params: {
+                    command: "echo 'Hello from E2E test'",
+                },
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(result.success).toBe(true)
+            if (result.data && typeof result.data === "object" && "stdout" in result.data) {
+                expect(result.data.stdout).toContain("Hello from E2E test")
+            }
+        })
+    })
+
+    describe("Multi-turn Conversation", () => {
+        it("should maintain context across multiple messages", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const handleMessage = new HandleMessage(
+                deps.storage,
+                deps.sessionStorage,
+                deps.llm,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            handleMessage.setEvents({
+                onConfirmation: async () => true,
+            })
+            handleMessage.setOptions({ autoApply: true, maxToolCalls: 5 })
+
+            await handleMessage.execute(deps.session, "Read src/index.ts file.")
+
+            expect(deps.session.history.length).toBeGreaterThan(0)
+            const historyBeforeSecond = deps.session.history.length
+
+            await handleMessage.execute(deps.session, "Now what functions are in that file?")
+
+            expect(deps.session.history.length).toBeGreaterThan(historyBeforeSecond)
+        }, 180_000)
+    })
+
+    describe("HandleMessage with ProjectStructure", () => {
+        it("should use tools when project structure is provided", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const handleMessage = new HandleMessage(
+                deps.storage,
+                deps.sessionStorage,
+                deps.llm,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            // Set up project structure for context
+            const projectStructure: ProjectStructure = {
+                name: "test-project",
+                rootPath: deps.projectRoot,
+                files: ["src/index.ts", "src/utils.ts", "package.json", "README.md"],
+                directories: ["src"],
+            }
+
+            handleMessage.setProjectStructure(projectStructure)
+
+            const toolCalls: ToolCall[] = []
+
+            const events: HandleMessageEvents = {
+                onToolCall: (call) => {
+                    console.log(`[ProjectStructure test] Tool called: ${call.name}`)
+                    toolCalls.push(call)
+                },
+                onConfirmation: async () => true,
+            }
+
+            handleMessage.setEvents(events)
+            handleMessage.setOptions({ autoApply: true, maxToolCalls: 5 })
+
+            // Ask explicitly to use a tool
+            await handleMessage.execute(
+                deps.session,
+                "Use the get_structure tool to show me the project file structure.",
+            )
+
+            const assistantMessages = deps.session.history.filter((m) => m.role === "assistant")
+            expect(assistantMessages.length).toBeGreaterThan(0)
+
+            // Log what happened
+            if (toolCalls.length > 0) {
+                console.log("Tools used with ProjectStructure:", toolCalls.map((tc) => tc.name))
+            } else {
+                console.log(
+                    "LLM answered without tools - this is acceptable as tool usage is non-deterministic",
+                )
+            }
+        }, 120_000)
+    })
+
+    describe("Error Handling", () => {
+        it("should handle non-existent file gracefully", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "test-error-1",
+                name: "get_lines",
+                params: {
+                    path: "non-existent-file.ts",
+                    start: 1,
+                    end: 10,
+                },
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(result.success).toBe(false)
+            expect(result.error).toBeDefined()
+        })
+
+        it("should handle invalid tool parameters gracefully", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "test-error-2",
+                name: "get_lines",
+                params: {},
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(result.success).toBe(false)
+            expect(result.error).toBeDefined()
+        })
+
+        it("should handle unknown tool gracefully", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "test-error-3",
+                name: "unknown_tool",
+                params: {},
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(result.success).toBe(false)
+            expect(result.error).toContain("Unknown tool")
+        })
+    })
+
+    describe("All 18 Tools - Direct Execution", () => {
+        // READ TOOLS
+        it("should execute get_function tool", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "tool-get_function",
+                name: "get_function",
+                params: {
+                    path: "src/index.ts",
+                    name: "add",
+                },
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            // May fail if tree-sitter can't parse, but tool should return defined result
+            expect(result).toBeDefined()
+            if (result.success) {
+                expect(result.data).toBeDefined()
+            } else {
+                console.log("get_function error:", result.error)
+            }
+        })
+
+        it("should execute get_class tool", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "tool-get_class",
+                name: "get_class",
+                params: {
+                    path: "src/utils.ts",
+                    name: "Calculator",
+                },
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            // May fail if tree-sitter can't parse, but tool should return defined result
+            expect(result).toBeDefined()
+            if (result.success) {
+                expect(result.data).toBeDefined()
+            } else {
+                console.log("get_class error:", result.error)
+            }
+        })
+
+        // SEARCH TOOLS (require indexed storage)
+        it("should execute find_references tool", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "tool-find_references",
+                name: "find_references",
+                params: {
+                    symbol: "add",
+                },
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            // Will succeed but may return empty without indexed storage
+            expect(result.success).toBe(true)
+        })
+
+        it("should execute find_definition tool", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "tool-find_definition",
+                name: "find_definition",
+                params: {
+                    symbol: "Calculator",
+                },
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            // Will succeed but may return empty without indexed storage
+            expect(result.success).toBe(true)
+        })
+
+        // ANALYSIS TOOLS
+        it("should execute get_dependencies tool", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "tool-get_dependencies",
+                name: "get_dependencies",
+                params: {
+                    path: "src/utils.ts",
+                },
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            // Tool may return error without indexed storage, but should be defined
+            expect(result).toBeDefined()
+            if (!result.success) {
+                console.log("get_dependencies error (expected without index):", result.error)
+            }
+        })
+
+        it("should execute get_dependents tool", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "tool-get_dependents",
+                name: "get_dependents",
+                params: {
+                    path: "src/index.ts",
+                },
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            // Tool may return error without indexed storage, but should be defined
+            expect(result).toBeDefined()
+            if (!result.success) {
+                console.log("get_dependents error (expected without index):", result.error)
+            }
+        })
+
+        it("should execute get_complexity tool", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "tool-get_complexity",
+                name: "get_complexity",
+                params: {
+                    path: "src",
+                },
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            // Will succeed but may return empty without indexed storage
+            expect(result.success).toBe(true)
+        })
+
+        // GIT TOOLS
+        it("should execute git_status tool", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            // Initialize git repo for testing
+            const git = simpleGit(deps.projectRoot)
+            await git.init()
+            await git.addConfig("user.email", "test@test.com")
+            await git.addConfig("user.name", "Test User")
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "tool-git_status",
+                name: "git_status",
+                params: {},
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(result.success).toBe(true)
+            expect(result.data).toBeDefined()
+        })
+
+        it("should execute git_diff tool", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            // Initialize git repo
+            const git = simpleGit(deps.projectRoot)
+            await git.init()
+            await git.addConfig("user.email", "test@test.com")
+            await git.addConfig("user.name", "Test User")
+            await git.add(".")
+            await git.commit("Initial commit")
+
+            // Make a change
+            const indexPath = path.join(deps.projectRoot, "src", "index.ts")
+            const content = await fs.readFile(indexPath, "utf-8")
+            await fs.writeFile(indexPath, content + "\n// New line added\n")
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "tool-git_diff",
+                name: "git_diff",
+                params: {},
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(result.success).toBe(true)
+        })
+
+        it("should execute git_commit tool with confirmation", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            // Initialize git repo
+            const git = simpleGit(deps.projectRoot)
+            await git.init()
+            await git.addConfig("user.email", "test@test.com")
+            await git.addConfig("user.name", "Test User")
+            // Stage all files first
+            await git.add(".")
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "tool-git_commit",
+                name: "git_commit",
+                params: {
+                    message: "Test commit from E2E",
+                },
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(result.success).toBe(true)
+        })
+
+        // RUN TOOLS
+        it("should execute run_tests tool", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "tool-run_tests",
+                name: "run_tests",
+                params: {},
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            // May fail if npm test is not configured, but tool should execute
+            expect(result).toBeDefined()
+        })
+    })
+
+    describe("Use Case: StartSession", () => {
+        it("should create a new session", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const startSession = new StartSession(deps.sessionStorage)
+
+            const result = await startSession.execute("e2e-test-project", {
+                forceNew: true,
+            })
+
+            expect(result.session).toBeDefined()
+            expect(result.isNew).toBe(true)
+            expect(result.session.projectName).toBe("e2e-test-project")
+        })
+
+        it("should load existing session", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const startSession = new StartSession(deps.sessionStorage)
+
+            // Create first session
+            const first = await startSession.execute("e2e-test-project", { forceNew: true })
+            expect(first.isNew).toBe(true)
+
+            // Save it
+            await deps.sessionStorage.saveSession(first.session)
+
+            // Load it again
+            const second = await startSession.execute("e2e-test-project", { forceNew: false })
+            expect(second.isNew).toBe(false)
+            expect(second.session.id).toBe(first.session.id)
+        })
+    })
+
+    describe("Use Case: UndoChange", () => {
+        it("should create undo entry when creating file", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            // First create a file
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const createCall: ToolCall = {
+                id: "undo-test-create",
+                name: "create_file",
+                params: {
+                    path: "src/undo-test.ts",
+                    content: "export const undoTest = true;\n",
+                },
+            }
+
+            const { undoEntryCreated } = await executeTool.execute(createCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(undoEntryCreated).toBe(true)
+
+            // Verify file exists
+            const filePath = path.join(deps.projectRoot, "src", "undo-test.ts")
+            const exists = await fs
+                .access(filePath)
+                .then(() => true)
+                .catch(() => false)
+            expect(exists).toBe(true)
+
+            // Verify undo entry was created
+            const undoStack = await deps.sessionStorage.getUndoStack(deps.session.id)
+            expect(undoStack.length).toBeGreaterThan(0)
+        })
+
+        it("should create undo entry when editing file", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            // Edit the file
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const editCall: ToolCall = {
+                id: "undo-test-edit",
+                name: "edit_lines",
+                params: {
+                    path: "src/index.ts",
+                    start: 1,
+                    end: 1,
+                    content: "// EDITED LINE",
+                },
+            }
+
+            const { undoEntryCreated } = await executeTool.execute(editCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(undoEntryCreated).toBe(true)
+
+            // Verify edit was applied
+            const filePath = path.join(deps.projectRoot, "src", "index.ts")
+            const content = await fs.readFile(filePath, "utf-8")
+            expect(content).toContain("EDITED LINE")
+
+            // Verify undo entry was created
+            const undoStack = await deps.sessionStorage.getUndoStack(deps.session.id)
+            expect(undoStack.length).toBeGreaterThan(0)
+        })
+
+        it("should instantiate UndoChange use case", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const undoChange = new UndoChange(deps.sessionStorage, deps.projectRoot)
+            expect(undoChange).toBeDefined()
+
+            // Execute with empty undo stack
+            const result = await undoChange.execute(deps.session)
+            // Should return success: false when no undo entries
+            expect(result).toBeDefined()
+        })
+    })
+
+    describe("Use Case: IndexProject", () => {
+        it("should index project files", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const indexProject = new IndexProject(deps.storage)
+
+            const stats = await indexProject.execute(deps.projectRoot, {
+                ignorePatterns: ["node_modules", ".git"],
+                supportedExtensions: [".ts", ".tsx", ".js", ".jsx", ".json"],
+            })
+
+            expect(stats.filesScanned).toBeGreaterThan(0)
+            expect(stats.filesParsed).toBeGreaterThanOrEqual(0)
+        })
+
+        it("should populate storage after indexing", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const indexProject = new IndexProject(deps.storage)
+
+            await indexProject.execute(deps.projectRoot, {
+                ignorePatterns: ["node_modules", ".git"],
+                supportedExtensions: [".ts", ".tsx", ".js", ".jsx", ".json"],
+            })
+
+            // Check that files are now in storage
+            const fileCount = await deps.storage.getFileCount()
+            expect(fileCount).toBeGreaterThan(0)
+
+            const allFiles = await deps.storage.getAllFiles()
+            expect(allFiles.size).toBeGreaterThan(0)
+        })
+
+        it("should find TODOs after indexing", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            // First index the project
+            const indexProject = new IndexProject(deps.storage)
+            await indexProject.execute(deps.projectRoot, {
+                ignorePatterns: ["node_modules", ".git"],
+                supportedExtensions: [".ts", ".tsx", ".js", ".jsx", ".json"],
+            })
+
+            // Now get_todos should find the TODOs we put in test files
+            const executeTool = new ExecuteTool(
+                deps.storage,
+                deps.sessionStorage,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCall: ToolCall = {
+                id: "todos-after-index",
+                name: "get_todos",
+                params: {},
+            }
+
+            const { result } = await executeTool.execute(toolCall, deps.session, {
+                autoApply: true,
+            })
+
+            expect(result.success).toBe(true)
+            if (result.data && typeof result.data === "object" && "totalTodos" in result.data) {
+                const data = result.data as { totalTodos: number }
+                expect(data.totalTodos).toBeGreaterThan(0)
+            }
+        })
+    })
+
+    describe("Use Case: ContextManager", () => {
+        it("should track token usage", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const contextManager = new ContextManager(128_000)
+
+            contextManager.addTokens(1000)
+            contextManager.addTokens(500)
+
+            // ContextManager should track token usage internally
+            expect(contextManager.needsCompression()).toBe(false)
+        })
+
+        it("should sync from session", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const contextManager = new ContextManager(128_000)
+
+            // Add some history to session
+            deps.session.context.tokenUsage = 0.5
+            deps.session.context.filesInContext = ["src/index.ts"]
+
+            contextManager.syncFromSession(deps.session)
+
+            // Context should be synced
+            expect(deps.session.context.filesInContext).toContain("src/index.ts")
+        })
+
+        it("should update session context", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            const contextManager = new ContextManager(128_000)
+
+            contextManager.addTokens(50_000)
+            contextManager.updateSession(deps.session)
+
+            expect(deps.session.context.tokenUsage).toBeGreaterThan(0)
+        })
+
+        it("should detect when compression is needed", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            // Small context window to trigger compression
+            const contextManager = new ContextManager(10_000, {
+                autoCompressAt: 0.8,
+            })
+
+            // Add lots of tokens
+            contextManager.addTokens(9000)
+
+            expect(contextManager.needsCompression()).toBe(true)
+        })
+    })
+
+    describe("Full Integration: Index + HandleMessage + Tools", () => {
+        it("should work end-to-end with indexed project", async () => {
+            if (!ollamaAvailable || !modelAvailable) {
+                console.warn("Skipping: Ollama/model not available")
+                return
+            }
+
+            // Step 1: Index the project
+            const indexProject = new IndexProject(deps.storage)
+            await indexProject.execute(deps.projectRoot, {
+                ignorePatterns: ["node_modules", ".git"],
+                supportedExtensions: [".ts", ".tsx", ".js", ".jsx", ".json"],
+            })
+
+            // Step 2: Create HandleMessage with indexed storage
+            const handleMessage = new HandleMessage(
+                deps.storage,
+                deps.sessionStorage,
+                deps.llm,
+                deps.tools,
+                deps.projectRoot,
+            )
+
+            const toolCalls: ToolCall[] = []
+
+            handleMessage.setEvents({
+                onToolCall: (call) => toolCalls.push(call),
+                onConfirmation: async () => true,
+            })
+            handleMessage.setOptions({ autoApply: true, maxToolCalls: 10 })
+
+            // Step 3: Ask about the project
+            await handleMessage.execute(
+                deps.session,
+                "What functions are defined in this project? Use tools to find out.",
+            )
+
+            // Verify session has messages
+            expect(deps.session.history.length).toBeGreaterThan(0)
+
+            // Log tool usage
+            if (toolCalls.length > 0) {
+                console.log("Full integration - tools used:", toolCalls.map((tc) => tc.name))
+            }
+        }, 180_000)
+    })
+})
diff --git a/packages/ipuaro/tests/e2e/test-helpers.ts b/packages/ipuaro/tests/e2e/test-helpers.ts
new file mode 100644
index 0000000..6750d69
--- /dev/null
+++ b/packages/ipuaro/tests/e2e/test-helpers.ts
@@ -0,0 +1,351 @@
+/**
+ * E2E Test Helpers
+ * Provides dependencies for testing the full flow with REAL LLM.
+ */
+
+import { vi } from "vitest"
+import * as fs from "node:fs/promises"
+import * as path from "node:path"
+import * as os from "node:os"
+import type { IStorage, SymbolIndex, DepsGraph } from "../../src/domain/services/IStorage.js"
+import type { ISessionStorage, SessionListItem } from "../../src/domain/services/ISessionStorage.js"
+import type { FileData } from "../../src/domain/value-objects/FileData.js"
+import type { FileAST } from "../../src/domain/value-objects/FileAST.js"
+import type { FileMeta } from "../../src/domain/value-objects/FileMeta.js"
+import type { UndoEntry } from "../../src/domain/value-objects/UndoEntry.js"
+import { Session } from "../../src/domain/entities/Session.js"
+import { ToolRegistry } from "../../src/infrastructure/tools/registry.js"
+import { OllamaClient } from "../../src/infrastructure/llm/OllamaClient.js"
+import { registerAllTools } from "../../src/cli/commands/tools-setup.js"
+import type { LLMConfig } from "../../src/shared/constants/config.js"
+
+/**
+ * Default LLM config for tests.
+ */
+export const DEFAULT_TEST_LLM_CONFIG: LLMConfig = {
+    model: "qwen2.5-coder:14b-instruct-q4_K_M",
+    contextWindow: 128_000,
+    temperature: 0.1,
+    host: "http://localhost:11434",
+    timeout: 180_000,
+    useNativeTools: true,
+}
+
+/**
+ * In-memory storage implementation for testing.
+ * Stores all data in Maps, no Redis required.
+ */
+export function createInMemoryStorage(): IStorage {
+    const files = new Map<string, FileData>()
+    const asts = new Map<string, FileAST>()
+    const metas = new Map<string, FileMeta>()
+    let symbolIndex: SymbolIndex = new Map()
+    let depsGraph: DepsGraph = { imports: new Map(), importedBy: new Map() }
+    const projectConfig = new Map<string, unknown>()
+    let connected = false
+
+    return {
+        getFile: vi.fn(async (filePath: string) => files.get(filePath) ?? null),
+        setFile: vi.fn(async (filePath: string, data: FileData) => {
+            files.set(filePath, data)
+        }),
+        deleteFile: vi.fn(async (filePath: string) => {
+            files.delete(filePath)
+        }),
+        getAllFiles: vi.fn(async () => new Map(files)),
+        getFileCount: vi.fn(async () => files.size),
+
+        getAST: vi.fn(async (filePath: string) => asts.get(filePath) ?? null),
+        setAST: vi.fn(async (filePath: string, ast: FileAST) => {
+            asts.set(filePath, ast)
+        }),
+        deleteAST: vi.fn(async (filePath: string) => {
+            asts.delete(filePath)
+        }),
+        getAllASTs: vi.fn(async () => new Map(asts)),
+
+        getMeta: vi.fn(async (filePath: string) => metas.get(filePath) ?? null),
+        setMeta: vi.fn(async (filePath: string, meta: FileMeta) => {
+            metas.set(filePath, meta)
+        }),
+        deleteMeta: vi.fn(async (filePath: string) => {
+            metas.delete(filePath)
+        }),
+        getAllMetas: vi.fn(async () => new Map(metas)),
+
+        getSymbolIndex: vi.fn(async () => symbolIndex),
+        setSymbolIndex: vi.fn(async (index: SymbolIndex) => {
+            symbolIndex = index
+        }),
+        getDepsGraph: vi.fn(async () => depsGraph),
+        setDepsGraph: vi.fn(async (graph: DepsGraph) => {
+            depsGraph = graph
+        }),
+
+        getProjectConfig: vi.fn(async (key: string) => projectConfig.get(key) ?? null),
+        setProjectConfig: vi.fn(async (key: string, value: unknown) => {
+            projectConfig.set(key, value)
+        }),
+
+        connect: vi.fn(async () => {
+            connected = true
+        }),
+        disconnect: vi.fn(async () => {
+            connected = false
+        }),
+        isConnected: vi.fn(() => connected),
+        clear: vi.fn(async () => {
+            files.clear()
+            asts.clear()
+            metas.clear()
+            symbolIndex = new Map()
+            depsGraph = { imports: new Map(), importedBy: new Map() }
+            projectConfig.clear()
+        }),
+    }
+}
+
+/**
+ * In-memory session storage for testing.
+ */
+export function createInMemorySessionStorage(): ISessionStorage {
+    const sessions = new Map<string, Session>()
+    const undoStacks = new Map<string, UndoEntry[]>()
+
+    return {
+        saveSession: vi.fn(async (session: Session) => {
+            sessions.set(session.id, session)
+        }),
+        loadSession: vi.fn(async (sessionId: string) => sessions.get(sessionId) ?? null),
+        deleteSession: vi.fn(async (sessionId: string) => {
+            sessions.delete(sessionId)
+            undoStacks.delete(sessionId)
+        }),
+        listSessions: vi.fn(async (projectName?: string): Promise<SessionListItem[]> => {
+            const items: SessionListItem[] = []
+            for (const session of sessions.values()) {
+                if (!projectName || session.projectName === projectName) {
+                    items.push({
+                        id: session.id,
+                        projectName: session.projectName,
+                        createdAt: session.createdAt,
+                        lastActivityAt: session.lastActivityAt,
+                        messageCount: session.history.length,
+                    })
+                }
+            }
+            return items
+        }),
+        getLatestSession: vi.fn(async (projectName: string) => {
+            let latest: Session | null = null
+            for (const session of sessions.values()) {
+                if (session.projectName === projectName) {
+                    if (!latest || session.lastActivityAt > latest.lastActivityAt) {
+                        latest = session
+                    }
+                }
+            }
+            return latest
+        }),
+        sessionExists: vi.fn(async (sessionId: string) => sessions.has(sessionId)),
+        pushUndoEntry: vi.fn(async (sessionId: string, entry: UndoEntry) => {
+            const stack = undoStacks.get(sessionId) ?? []
+            stack.push(entry)
+            undoStacks.set(sessionId, stack)
+        }),
+        popUndoEntry: vi.fn(async (sessionId: string) => {
+            const stack = undoStacks.get(sessionId) ?? []
+            return stack.pop() ?? null
+        }),
+        getUndoStack: vi.fn(async (sessionId: string) => undoStacks.get(sessionId) ?? []),
+        touchSession: vi.fn(async (sessionId: string) => {
+            const session = sessions.get(sessionId)
+            if (session) {
+                session.lastActivityAt = Date.now()
+            }
+        }),
+        clearAllSessions: vi.fn(async () => {
+            sessions.clear()
+            undoStacks.clear()
+        }),
+    }
+}
+
+/**
+ * Create REAL Ollama client for E2E tests.
+ */
+export function createRealOllamaClient(config?: Partial<LLMConfig>): OllamaClient {
+    return new OllamaClient({
+        ...DEFAULT_TEST_LLM_CONFIG,
+        ...config,
+    })
+}
+
+/**
+ * Create a tool registry with all 18 tools registered.
+ */
+export function createRealToolRegistry(): ToolRegistry {
+    const registry = new ToolRegistry()
+    registerAllTools(registry)
+    return registry
+}
+
+/**
+ * Create a new test session.
+ */
+export function createTestSession(projectName = "test-project"): Session {
+    return new Session(`test-${Date.now()}`, projectName)
+}
+
+/**
+ * Create a temporary test project directory with sample files.
+ */
+export async function createTestProject(): Promise<string> {
+    const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "ipuaro-e2e-"))
+
+    await fs.mkdir(path.join(tempDir, "src"), { recursive: true })
+
+    await fs.writeFile(
+        path.join(tempDir, "src", "index.ts"),
+        `/**
+ * Main entry point
+ */
+export function main(): void {
+    console.log("Hello, world!")
+}
+
+export function add(a: number, b: number): number {
+    return a + b
+}
+
+export function multiply(a: number, b: number): number {
+    return a * b
+}
+
+// TODO: Add more math functions
+main()
+`,
+    )
+
+    await fs.writeFile(
+        path.join(tempDir, "src", "utils.ts"),
+        `/**
+ * Utility functions
+ */
+import { add } from "./index.js"
+
+export function sum(numbers: number[]): number {
+    return numbers.reduce((acc, n) => add(acc, n), 0)
+}
+
+export class Calculator {
+    private result: number = 0
+
+    add(n: number): this {
+        this.result += n
+        return this
+    }
+
+    subtract(n: number): this {
+        this.result -= n
+        return this
+    }
+
+    getResult(): number {
+        return this.result
+    }
+
+    reset(): void {
+        this.result = 0
+    }
+}
+
+// FIXME: Handle edge cases for negative numbers
+`,
+    )
+
+    await fs.writeFile(
+        path.join(tempDir, "package.json"),
+        JSON.stringify(
+            {
+                name: "test-project",
+                version: "1.0.0",
+                type: "module",
+                scripts: {
+                    test: "echo 'Tests passed!'",
+                },
+            },
+            null,
+            4,
+        ),
+    )
+
+    await fs.writeFile(
+        path.join(tempDir, "README.md"),
+        `# Test Project
+
+A sample project for E2E testing.
+
+## Features
+- Basic math functions
+- Calculator class
+`,
+    )
+
+    return tempDir
+}
+
+/**
+ * Clean up test project directory.
+ */
+export async function cleanupTestProject(projectDir: string): Promise<void> {
+    await fs.rm(projectDir, { recursive: true, force: true })
+}
+
+/**
+ * All test dependencies bundled together.
+ */
+export interface E2ETestDependencies {
+    storage: IStorage
+    sessionStorage: ISessionStorage
+    llm: OllamaClient
+    tools: ToolRegistry
+    session: Session
+    projectRoot: string
+}
+
+/**
+ * Create all dependencies for E2E testing with REAL Ollama.
+ */
+export async function createE2ETestDependencies(
+    llmConfig?: Partial<LLMConfig>,
+): Promise<E2ETestDependencies> {
+    const projectRoot = await createTestProject()
+
+    return {
+        storage: createInMemoryStorage(),
+        sessionStorage: createInMemorySessionStorage(),
+        llm: createRealOllamaClient(llmConfig),
+        tools: createRealToolRegistry(),
+        session: createTestSession(),
+        projectRoot,
+    }
+}
+
+/**
+ * Check if Ollama is available.
+ */
+export async function isOllamaAvailable(): Promise<boolean> {
+    const client = createRealOllamaClient()
+    return client.isAvailable()
+}
+
+/**
+ * Check if required model is available.
+ */
+export async function isModelAvailable(
+    model = "qwen2.5-coder:14b-instruct-q4_K_M",
+): Promise<boolean> {
+    const client = createRealOllamaClient()
+    return client.hasModel(model)
+}
diff --git a/packages/ipuaro/tests/unit/infrastructure/llm/ResponseParser.test.ts b/packages/ipuaro/tests/unit/infrastructure/llm/ResponseParser.test.ts
index 43decf6..4e32b14 100644
--- a/packages/ipuaro/tests/unit/infrastructure/llm/ResponseParser.test.ts
+++ b/packages/ipuaro/tests/unit/infrastructure/llm/ResponseParser.test.ts
@@ -135,6 +135,108 @@ describe("ResponseParser", () => {
             expect(result.parseErrors[0]).toContain("unknown_tool")
         })
 
+        it("should normalize tool name aliases", () => {
+            // get_functions -> get_lines (common LLM typo)
+            const response1 = `<tool_call name="get_functions"><path>src/index.ts</path></tool_call>`
+            const result1 = parseToolCalls(response1)
+            expect(result1.toolCalls).toHaveLength(1)
+            expect(result1.toolCalls[0].name).toBe("get_lines")
+            expect(result1.hasParseErrors).toBe(false)
+
+            // read_file -> get_lines
+            const response2 = `<tool_call name="read_file"><path>test.ts</path></tool_call>`
+            const result2 = parseToolCalls(response2)
+            expect(result2.toolCalls).toHaveLength(1)
+            expect(result2.toolCalls[0].name).toBe("get_lines")
+
+            // find_todos -> get_todos
+            const response3 = `<tool_call name="find_todos"></tool_call>`
+            const result3 = parseToolCalls(response3)
+            expect(result3.toolCalls).toHaveLength(1)
+            expect(result3.toolCalls[0].name).toBe("get_todos")
+
+            // list_files -> get_structure
+            const response4 = `<tool_call name="list_files"><path>.</path></tool_call>`
+            const result4 = parseToolCalls(response4)
+            expect(result4.toolCalls).toHaveLength(1)
+            expect(result4.toolCalls[0].name).toBe("get_structure")
+        })
+
+        // JSON format tests
+        it("should parse JSON format tool calls as fallback", () => {
+            const response = `{"name": "get_lines", "arguments": {"path": "src/index.ts"}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("get_lines")
+            expect(result.toolCalls[0].params).toEqual({ path: "src/index.ts" })
+            expect(result.hasParseErrors).toBe(false)
+        })
+
+        it("should parse JSON format with numeric arguments", () => {
+            const response = `{"name": "get_lines", "arguments": {"path": "src/index.ts", "start": 1, "end": 50}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].params).toEqual({
+                path: "src/index.ts",
+                start: 1,
+                end: 50,
+            })
+        })
+
+        it("should parse JSON format with surrounding text", () => {
+            const response = `I'll read the file for you:
+{"name": "get_lines", "arguments": {"path": "src/index.ts"}}
+Let me know if you need more.`
+
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("get_lines")
+            expect(result.content).toContain("I'll read the file for you:")
+            expect(result.content).toContain("Let me know if you need more.")
+        })
+
+        it("should normalize tool name aliases in JSON format", () => {
+            // read_file -> get_lines
+            const response = `{"name": "read_file", "arguments": {"path": "test.ts"}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("get_lines")
+        })
+
+        it("should reject unknown tool names in JSON format", () => {
+            const response = `{"name": "unknown_tool", "arguments": {"path": "test.ts"}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(0)
+            expect(result.hasParseErrors).toBe(true)
+            expect(result.parseErrors[0]).toContain("unknown_tool")
+        })
+
+        it("should prefer XML over JSON when both present", () => {
+            const response = `<tool_call name="get_lines"><path>xml.ts</path></tool_call>
+{"name": "get_function", "arguments": {"path": "json.ts", "name": "foo"}}`
+
+            const result = parseToolCalls(response)
+
+            // Should only parse XML since it was found first
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("get_lines")
+            expect(result.toolCalls[0].params.path).toBe("xml.ts")
+        })
+
+        it("should parse JSON with empty arguments", () => {
+            const response = `{"name": "git_status", "arguments": {}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("git_status")
+            expect(result.toolCalls[0].params).toEqual({})
+        })
+
         it("should support CDATA for multiline content", () => {
             const response = `<tool_call name="edit_lines">
                 <path>src/index.ts</path>
diff --git a/packages/ipuaro/tests/unit/infrastructure/llm/prompts.test.ts b/packages/ipuaro/tests/unit/infrastructure/llm/prompts.test.ts
index 03c30b0..46b1a58 100644
--- a/packages/ipuaro/tests/unit/infrastructure/llm/prompts.test.ts
+++ b/packages/ipuaro/tests/unit/infrastructure/llm/prompts.test.ts
@@ -19,10 +19,16 @@ describe("prompts", () => {
             expect(SYSTEM_PROMPT.length).toBeGreaterThan(100)
         })
 
-        it("should contain core principles", () => {
-            expect(SYSTEM_PROMPT).toContain("Lazy Loading")
-            expect(SYSTEM_PROMPT).toContain("Precision")
-            expect(SYSTEM_PROMPT).toContain("Safety")
+        it("should contain mandatory tool usage instructions", () => {
+            expect(SYSTEM_PROMPT).toContain("MANDATORY")
+            expect(SYSTEM_PROMPT).toContain("Tools for Code Questions")
+            expect(SYSTEM_PROMPT).toContain("ZERO code in your context")
+        })
+
+        it("should contain when to use and when not to use tools", () => {
+            expect(SYSTEM_PROMPT).toContain("When to Use Tools")
+            expect(SYSTEM_PROMPT).toContain("Do NOT use tools")
+            expect(SYSTEM_PROMPT).toContain("Greetings")
         })
 
         it("should list available tools", () => {
@@ -34,8 +40,9 @@ describe("prompts", () => {
         })
 
         it("should include safety rules", () => {
-            expect(SYSTEM_PROMPT).toContain("Safety Rules")
-            expect(SYSTEM_PROMPT).toContain("Never execute commands that could harm")
+            expect(SYSTEM_PROMPT).toContain("Stay safe")
+            expect(SYSTEM_PROMPT).toContain("destructive commands")
+            expect(SYSTEM_PROMPT).toContain("Verify before editing")
         })
     })