refactor(ipuaro): simplify LLM integration with pure XML tool format

Refactor OllamaClient to use pure XML format for tool calls as designed in CONCEPT.md. Removes dual system (Ollama native tools + XML parser) in favor of single source of truth (ResponseParser). Changes: - Remove tools parameter from ILLMClient.chat() interface - Remove convertTools(), convertParameters(), extractToolCalls() - Add XML format instructions to system prompt with examples - Add CDATA support in ResponseParser for multiline content - Add tool name validation with helpful error messages - Move ToolDef/ToolParameter to shared/types/tool-definitions.ts Benefits: - Simplified architecture (single source of truth) - CONCEPT.md compliance (pure XML as designed) - Better validation (early detection of invalid tools) - Reduced complexity (fewer format conversions) Tests: 1444 passed (+4 new tests) Coverage: 97.83% lines, 91.98% branches, 99.16% functions
2025-12-28 07:16:53 +05:00 · 2025-12-01 21:03:55 +05:00
parent 902d1db831
commit 0433ef102c
13 changed files with 290 additions and 212 deletions
--- a/packages/ipuaro/src/infrastructure/llm/OllamaClient.ts
+++ b/packages/ipuaro/src/infrastructure/llm/OllamaClient.ts
@@ -1,15 +1,10 @@
-import { type Message, Ollama, type Tool } from "ollama"
-import type {
-    ILLMClient,
-    LLMResponse,
-    ToolDef,
-    ToolParameter,
-} from "../../domain/services/ILLMClient.js"
+import { type Message, Ollama } from "ollama"
+import type { ILLMClient, LLMResponse } from "../../domain/services/ILLMClient.js"
 import type { ChatMessage } from "../../domain/value-objects/ChatMessage.js"
-import { createToolCall, type ToolCall } from "../../domain/value-objects/ToolCall.js"
 import type { LLMConfig } from "../../shared/constants/config.js"
 import { IpuaroError } from "../../shared/errors/IpuaroError.js"
 import { estimateTokens } from "../../shared/utils/tokens.js"
+import { parseToolCalls } from "./ResponseParser.js"

 /**
 * Ollama LLM client implementation.
@@ -35,19 +30,18 @@ export class OllamaClient implements ILLMClient {

    /**
     * Send messages to LLM and get response.
+     * Tool definitions should be included in the system prompt as XML format.
     */
-    async chat(messages: ChatMessage[], tools?: ToolDef[]): Promise<LLMResponse> {
+    async chat(messages: ChatMessage[]): Promise<LLMResponse> {
        const startTime = Date.now()
        this.abortController = new AbortController()

        try {
            const ollamaMessages = this.convertMessages(messages)
-            const ollamaTools = tools ? this.convertTools(tools) : undefined

            const response = await this.client.chat({
                model: this.model,
                messages: ollamaMessages,
-                tools: ollamaTools,
                options: {
                    temperature: this.temperature,
                },
@@ -55,15 +49,15 @@ export class OllamaClient implements ILLMClient {
            })

            const timeMs = Date.now() - startTime
-            const toolCalls = this.extractToolCalls(response.message)
+            const parsed = parseToolCalls(response.message.content)

            return {
-                content: response.message.content,
-                toolCalls,
+                content: parsed.content,
+                toolCalls: parsed.toolCalls,
                tokens: response.eval_count ?? estimateTokens(response.message.content),
                timeMs,
                truncated: false,
-                stopReason: this.determineStopReason(response, toolCalls),
+                stopReason: this.determineStopReason(response, parsed.toolCalls),
            }
        } catch (error) {
            if (error instanceof Error && error.name === "AbortError") {
@@ -205,69 +199,12 @@ export class OllamaClient implements ILLMClient {
        }
    }

-    /**
-     * Convert ToolDef array to Ollama Tool format.
-     */
-    private convertTools(tools: ToolDef[]): Tool[] {
-        return tools.map(
-            (tool): Tool => ({
-                type: "function",
-                function: {
-                    name: tool.name,
-                    description: tool.description,
-                    parameters: {
-                        type: "object",
-                        properties: this.convertParameters(tool.parameters),
-                        required: tool.parameters.filter((p) => p.required).map((p) => p.name),
-                    },
-                },
-            }),
-        )
-    }
-
-    /**
-     * Convert ToolParameter array to JSON Schema properties.
-     */
-    private convertParameters(
-        params: ToolParameter[],
-    ): Record<string, { type: string; description: string; enum?: string[] }> {
-        const properties: Record<string, { type: string; description: string; enum?: string[] }> =
-            {}
-
-        for (const param of params) {
-            properties[param.name] = {
-                type: param.type,
-                description: param.description,
-                ...(param.enum && { enum: param.enum }),
-            }
-        }
-
-        return properties
-    }
-
-    /**
-     * Extract tool calls from Ollama response message.
-     */
-    private extractToolCalls(message: Message): ToolCall[] {
-        if (!message.tool_calls || message.tool_calls.length === 0) {
-            return []
-        }
-
-        return message.tool_calls.map((tc, index) =>
-            createToolCall(
-                `call_${String(Date.now())}_${String(index)}`,
-                tc.function.name,
-                tc.function.arguments,
-            ),
-        )
-    }
-
    /**
     * Determine stop reason from response.
     */
    private determineStopReason(
        response: { done_reason?: string },
-        toolCalls: ToolCall[],
+        toolCalls: { name: string; params: Record<string, unknown> }[],
    ): "end" | "length" | "tool_use" {
        if (toolCalls.length > 0) {
            return "tool_use"
--- a/packages/ipuaro/src/infrastructure/llm/ResponseParser.ts
+++ b/packages/ipuaro/src/infrastructure/llm/ResponseParser.ts
@@ -27,9 +27,41 @@ const TOOL_CALL_REGEX = /<tool_call\s+name\s*=\s*"([^"]+)">([\s\S]*?)<\/tool_cal
 const PARAM_REGEX_NAMED = /<param\s+name\s*=\s*"([^"]+)">([\s\S]*?)<\/param>/gi
 const PARAM_REGEX_ELEMENT = /<([a-z_][a-z0-9_]*)>([\s\S]*?)<\/\1>/gi

+/**
+ * CDATA section pattern.
+ * Matches: <![CDATA[...]]>
+ */
+const CDATA_REGEX = /<!\[CDATA\[([\s\S]*?)\]\]>/g
+
+/**
+ * Valid tool names.
+ * Used for validation to catch typos or hallucinations.
+ */
+const VALID_TOOL_NAMES = new Set([
+    "get_lines",
+    "get_function",
+    "get_class",
+    "get_structure",
+    "edit_lines",
+    "create_file",
+    "delete_file",
+    "find_references",
+    "find_definition",
+    "get_dependencies",
+    "get_dependents",
+    "get_complexity",
+    "get_todos",
+    "git_status",
+    "git_diff",
+    "git_commit",
+    "run_command",
+    "run_tests",
+])
+
 /**
 * Parse tool calls from LLM response text.
 * Supports XML format: <tool_call name="get_lines"><path>src/index.ts</path></tool_call>
+ * Validates tool names and provides helpful error messages.
 */
 export function parseToolCalls(response: string): ParsedResponse {
    const toolCalls: ToolCall[] = []
@@ -41,6 +73,13 @@ export function parseToolCalls(response: string): ParsedResponse {
    for (const match of matches) {
        const [fullMatch, toolName, paramsXml] = match

+        if (!VALID_TOOL_NAMES.has(toolName)) {
+            parseErrors.push(
+                `Unknown tool "${toolName}". Valid tools: ${[...VALID_TOOL_NAMES].join(", ")}`,
+            )
+            continue
+        }
+
        try {
            const params = parseParameters(paramsXml)
            const toolCall = createToolCall(
@@ -91,10 +130,16 @@ function parseParameters(xml: string): Record<string, unknown> {

 /**
 * Parse a value string to appropriate type.
+ * Supports CDATA sections for multiline content.
 */
 function parseValue(value: string): unknown {
    const trimmed = value.trim()

+    const cdataMatches = [...trimmed.matchAll(CDATA_REGEX)]
+    if (cdataMatches.length > 0 && cdataMatches[0][1] !== undefined) {
+        return cdataMatches[0][1]
+    }
+
    if (trimmed === "true") {
        return true
    }
--- a/packages/ipuaro/src/infrastructure/llm/prompts.ts
+++ b/packages/ipuaro/src/infrastructure/llm/prompts.ts
@@ -23,37 +23,67 @@ export const SYSTEM_PROMPT = `You are ipuaro, a local AI code assistant speciali
 3. **Safety**: Confirm destructive operations. Never execute dangerous commands.
 4. **Efficiency**: Minimize context usage. Request only necessary code sections.

+## Tool Calling Format
+
+When you need to use a tool, format your call as XML:
+
+<tool_call name="tool_name">
+  <param_name>value</param_name>
+  <another_param>value</another_param>
+</tool_call>
+
+You can call multiple tools in one response. Always wait for tool results before making conclusions.
+
+**Examples:**
+
+<tool_call name="get_lines">
+  <path>src/index.ts</path>
+  <start>1</start>
+  <end>50</end>
+</tool_call>
+
+<tool_call name="edit_lines">
+  <path>src/utils.ts</path>
+  <start>10</start>
+  <end>15</end>
+  <content>const newCode = "hello";</content>
+</tool_call>
+
+<tool_call name="find_references">
+  <symbol>getUserById</symbol>
+</tool_call>
+
 ## Available Tools

 ### Reading Tools
- \`get_lines\`: Get specific lines from a file
- \`get_function\`: Get a function by name
- \`get_class\`: Get a class by name
- \`get_structure\`: Get project directory structure
+- \`get_lines(path, start?, end?)\`: Get specific lines from a file
+- \`get_function(path, name)\`: Get a function by name
+- \`get_class(path, name)\`: Get a class by name
+- \`get_structure(path?, depth?)\`: Get project directory structure

 ### Editing Tools (require confirmation)
- \`edit_lines\`: Replace specific lines in a file
- \`create_file\`: Create a new file
- \`delete_file\`: Delete a file
+- \`edit_lines(path, start, end, content)\`: Replace specific lines in a file
+- \`create_file(path, content)\`: Create a new file
+- \`delete_file(path)\`: Delete a file

 ### Search Tools
- \`find_references\`: Find all usages of a symbol
- \`find_definition\`: Find where a symbol is defined
+- \`find_references(symbol, path?)\`: Find all usages of a symbol
+- \`find_definition(symbol)\`: Find where a symbol is defined

 ### Analysis Tools
- \`get_dependencies\`: Get files this file imports
- \`get_dependents\`: Get files that import this file
- \`get_complexity\`: Get complexity metrics
- \`get_todos\`: Find TODO/FIXME comments
+- \`get_dependencies(path)\`: Get files this file imports
+- \`get_dependents(path)\`: Get files that import this file
+- \`get_complexity(path?, limit?)\`: Get complexity metrics
+- \`get_todos(path?, type?)\`: Find TODO/FIXME comments

 ### Git Tools
- \`git_status\`: Get repository status
- \`git_diff\`: Get uncommitted changes
- \`git_commit\`: Create a commit (requires confirmation)
+- \`git_status()\`: Get repository status
+- \`git_diff(path?, staged?)\`: Get uncommitted changes
+- \`git_commit(message, files?)\`: Create a commit (requires confirmation)

 ### Run Tools
- \`run_command\`: Execute a shell command (security checked)
- \`run_tests\`: Run the test suite
+- \`run_command(command, timeout?)\`: Execute a shell command (security checked)
+- \`run_tests(path?, filter?, watch?)\`: Run the test suite

 ## Response Guidelines

--- a/packages/ipuaro/src/infrastructure/llm/toolDefs.ts
+++ b/packages/ipuaro/src/infrastructure/llm/toolDefs.ts
@@ -1,4 +1,4 @@
-import type { ToolDef } from "../../domain/services/ILLMClient.js"
+import type { ToolDef } from "../../shared/types/tool-definitions.js"

 /**
 * Tool definitions for ipuaro LLM.