feat(ipuaro): add JSON tool call parsing and improve prompts

- Add JSON fallback parsing in ResponseParser for LLMs that prefer JSON - Add tool name aliases (get_functions -> get_lines, etc.) - Improve system prompt with clear tool usage guidelines - Add native Ollama tools support in OllamaClient - Add E2E tests for full workflow with real Ollama
2025-12-27 23:06:54 +05:00 · 2025-12-05 20:51:18 +05:00
parent c82006bbda
commit 3e7762ec4e
11 changed files with 2430 additions and 102 deletions
--- a/packages/ipuaro/tests/e2e/full-workflow.test.ts
+++ b/packages/ipuaro/tests/e2e/full-workflow.test.ts
--- a/packages/ipuaro/tests/e2e/test-helpers.ts
+++ b/packages/ipuaro/tests/e2e/test-helpers.ts
@@ -0,0 +1,351 @@
+/**
+ * E2E Test Helpers
+ * Provides dependencies for testing the full flow with REAL LLM.
+ */
+
+import { vi } from "vitest"
+import * as fs from "node:fs/promises"
+import * as path from "node:path"
+import * as os from "node:os"
+import type { IStorage, SymbolIndex, DepsGraph } from "../../src/domain/services/IStorage.js"
+import type { ISessionStorage, SessionListItem } from "../../src/domain/services/ISessionStorage.js"
+import type { FileData } from "../../src/domain/value-objects/FileData.js"
+import type { FileAST } from "../../src/domain/value-objects/FileAST.js"
+import type { FileMeta } from "../../src/domain/value-objects/FileMeta.js"
+import type { UndoEntry } from "../../src/domain/value-objects/UndoEntry.js"
+import { Session } from "../../src/domain/entities/Session.js"
+import { ToolRegistry } from "../../src/infrastructure/tools/registry.js"
+import { OllamaClient } from "../../src/infrastructure/llm/OllamaClient.js"
+import { registerAllTools } from "../../src/cli/commands/tools-setup.js"
+import type { LLMConfig } from "../../src/shared/constants/config.js"
+
+/**
+ * Default LLM config for tests.
+ */
+export const DEFAULT_TEST_LLM_CONFIG: LLMConfig = {
+    model: "qwen2.5-coder:14b-instruct-q4_K_M",
+    contextWindow: 128_000,
+    temperature: 0.1,
+    host: "http://localhost:11434",
+    timeout: 180_000,
+    useNativeTools: true,
+}
+
+/**
+ * In-memory storage implementation for testing.
+ * Stores all data in Maps, no Redis required.
+ */
+export function createInMemoryStorage(): IStorage {
+    const files = new Map<string, FileData>()
+    const asts = new Map<string, FileAST>()
+    const metas = new Map<string, FileMeta>()
+    let symbolIndex: SymbolIndex = new Map()
+    let depsGraph: DepsGraph = { imports: new Map(), importedBy: new Map() }
+    const projectConfig = new Map<string, unknown>()
+    let connected = false
+
+    return {
+        getFile: vi.fn(async (filePath: string) => files.get(filePath) ?? null),
+        setFile: vi.fn(async (filePath: string, data: FileData) => {
+            files.set(filePath, data)
+        }),
+        deleteFile: vi.fn(async (filePath: string) => {
+            files.delete(filePath)
+        }),
+        getAllFiles: vi.fn(async () => new Map(files)),
+        getFileCount: vi.fn(async () => files.size),
+
+        getAST: vi.fn(async (filePath: string) => asts.get(filePath) ?? null),
+        setAST: vi.fn(async (filePath: string, ast: FileAST) => {
+            asts.set(filePath, ast)
+        }),
+        deleteAST: vi.fn(async (filePath: string) => {
+            asts.delete(filePath)
+        }),
+        getAllASTs: vi.fn(async () => new Map(asts)),
+
+        getMeta: vi.fn(async (filePath: string) => metas.get(filePath) ?? null),
+        setMeta: vi.fn(async (filePath: string, meta: FileMeta) => {
+            metas.set(filePath, meta)
+        }),
+        deleteMeta: vi.fn(async (filePath: string) => {
+            metas.delete(filePath)
+        }),
+        getAllMetas: vi.fn(async () => new Map(metas)),
+
+        getSymbolIndex: vi.fn(async () => symbolIndex),
+        setSymbolIndex: vi.fn(async (index: SymbolIndex) => {
+            symbolIndex = index
+        }),
+        getDepsGraph: vi.fn(async () => depsGraph),
+        setDepsGraph: vi.fn(async (graph: DepsGraph) => {
+            depsGraph = graph
+        }),
+
+        getProjectConfig: vi.fn(async (key: string) => projectConfig.get(key) ?? null),
+        setProjectConfig: vi.fn(async (key: string, value: unknown) => {
+            projectConfig.set(key, value)
+        }),
+
+        connect: vi.fn(async () => {
+            connected = true
+        }),
+        disconnect: vi.fn(async () => {
+            connected = false
+        }),
+        isConnected: vi.fn(() => connected),
+        clear: vi.fn(async () => {
+            files.clear()
+            asts.clear()
+            metas.clear()
+            symbolIndex = new Map()
+            depsGraph = { imports: new Map(), importedBy: new Map() }
+            projectConfig.clear()
+        }),
+    }
+}
+
+/**
+ * In-memory session storage for testing.
+ */
+export function createInMemorySessionStorage(): ISessionStorage {
+    const sessions = new Map<string, Session>()
+    const undoStacks = new Map<string, UndoEntry[]>()
+
+    return {
+        saveSession: vi.fn(async (session: Session) => {
+            sessions.set(session.id, session)
+        }),
+        loadSession: vi.fn(async (sessionId: string) => sessions.get(sessionId) ?? null),
+        deleteSession: vi.fn(async (sessionId: string) => {
+            sessions.delete(sessionId)
+            undoStacks.delete(sessionId)
+        }),
+        listSessions: vi.fn(async (projectName?: string): Promise<SessionListItem[]> => {
+            const items: SessionListItem[] = []
+            for (const session of sessions.values()) {
+                if (!projectName || session.projectName === projectName) {
+                    items.push({
+                        id: session.id,
+                        projectName: session.projectName,
+                        createdAt: session.createdAt,
+                        lastActivityAt: session.lastActivityAt,
+                        messageCount: session.history.length,
+                    })
+                }
+            }
+            return items
+        }),
+        getLatestSession: vi.fn(async (projectName: string) => {
+            let latest: Session | null = null
+            for (const session of sessions.values()) {
+                if (session.projectName === projectName) {
+                    if (!latest || session.lastActivityAt > latest.lastActivityAt) {
+                        latest = session
+                    }
+                }
+            }
+            return latest
+        }),
+        sessionExists: vi.fn(async (sessionId: string) => sessions.has(sessionId)),
+        pushUndoEntry: vi.fn(async (sessionId: string, entry: UndoEntry) => {
+            const stack = undoStacks.get(sessionId) ?? []
+            stack.push(entry)
+            undoStacks.set(sessionId, stack)
+        }),
+        popUndoEntry: vi.fn(async (sessionId: string) => {
+            const stack = undoStacks.get(sessionId) ?? []
+            return stack.pop() ?? null
+        }),
+        getUndoStack: vi.fn(async (sessionId: string) => undoStacks.get(sessionId) ?? []),
+        touchSession: vi.fn(async (sessionId: string) => {
+            const session = sessions.get(sessionId)
+            if (session) {
+                session.lastActivityAt = Date.now()
+            }
+        }),
+        clearAllSessions: vi.fn(async () => {
+            sessions.clear()
+            undoStacks.clear()
+        }),
+    }
+}
+
+/**
+ * Create REAL Ollama client for E2E tests.
+ */
+export function createRealOllamaClient(config?: Partial<LLMConfig>): OllamaClient {
+    return new OllamaClient({
+        ...DEFAULT_TEST_LLM_CONFIG,
+        ...config,
+    })
+}
+
+/**
+ * Create a tool registry with all 18 tools registered.
+ */
+export function createRealToolRegistry(): ToolRegistry {
+    const registry = new ToolRegistry()
+    registerAllTools(registry)
+    return registry
+}
+
+/**
+ * Create a new test session.
+ */
+export function createTestSession(projectName = "test-project"): Session {
+    return new Session(`test-${Date.now()}`, projectName)
+}
+
+/**
+ * Create a temporary test project directory with sample files.
+ */
+export async function createTestProject(): Promise<string> {
+    const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "ipuaro-e2e-"))
+
+    await fs.mkdir(path.join(tempDir, "src"), { recursive: true })
+
+    await fs.writeFile(
+        path.join(tempDir, "src", "index.ts"),
+        `/**
+ * Main entry point
+ */
+export function main(): void {
+    console.log("Hello, world!")
+}
+
+export function add(a: number, b: number): number {
+    return a + b
+}
+
+export function multiply(a: number, b: number): number {
+    return a * b
+}
+
+// TODO: Add more math functions
+main()
+`,
+    )
+
+    await fs.writeFile(
+        path.join(tempDir, "src", "utils.ts"),
+        `/**
+ * Utility functions
+ */
+import { add } from "./index.js"
+
+export function sum(numbers: number[]): number {
+    return numbers.reduce((acc, n) => add(acc, n), 0)
+}
+
+export class Calculator {
+    private result: number = 0
+
+    add(n: number): this {
+        this.result += n
+        return this
+    }
+
+    subtract(n: number): this {
+        this.result -= n
+        return this
+    }
+
+    getResult(): number {
+        return this.result
+    }
+
+    reset(): void {
+        this.result = 0
+    }
+}
+
+// FIXME: Handle edge cases for negative numbers
+`,
+    )
+
+    await fs.writeFile(
+        path.join(tempDir, "package.json"),
+        JSON.stringify(
+            {
+                name: "test-project",
+                version: "1.0.0",
+                type: "module",
+                scripts: {
+                    test: "echo 'Tests passed!'",
+                },
+            },
+            null,
+            4,
+        ),
+    )
+
+    await fs.writeFile(
+        path.join(tempDir, "README.md"),
+        `# Test Project
+
+A sample project for E2E testing.
+
+## Features
+- Basic math functions
+- Calculator class
+`,
+    )
+
+    return tempDir
+}
+
+/**
+ * Clean up test project directory.
+ */
+export async function cleanupTestProject(projectDir: string): Promise<void> {
+    await fs.rm(projectDir, { recursive: true, force: true })
+}
+
+/**
+ * All test dependencies bundled together.
+ */
+export interface E2ETestDependencies {
+    storage: IStorage
+    sessionStorage: ISessionStorage
+    llm: OllamaClient
+    tools: ToolRegistry
+    session: Session
+    projectRoot: string
+}
+
+/**
+ * Create all dependencies for E2E testing with REAL Ollama.
+ */
+export async function createE2ETestDependencies(
+    llmConfig?: Partial<LLMConfig>,
+): Promise<E2ETestDependencies> {
+    const projectRoot = await createTestProject()
+
+    return {
+        storage: createInMemoryStorage(),
+        sessionStorage: createInMemorySessionStorage(),
+        llm: createRealOllamaClient(llmConfig),
+        tools: createRealToolRegistry(),
+        session: createTestSession(),
+        projectRoot,
+    }
+}
+
+/**
+ * Check if Ollama is available.
+ */
+export async function isOllamaAvailable(): Promise<boolean> {
+    const client = createRealOllamaClient()
+    return client.isAvailable()
+}
+
+/**
+ * Check if required model is available.
+ */
+export async function isModelAvailable(
+    model = "qwen2.5-coder:14b-instruct-q4_K_M",
+): Promise<boolean> {
+    const client = createRealOllamaClient()
+    return client.hasModel(model)
+}
--- a/packages/ipuaro/tests/unit/infrastructure/llm/ResponseParser.test.ts
+++ b/packages/ipuaro/tests/unit/infrastructure/llm/ResponseParser.test.ts
@@ -135,6 +135,108 @@ describe("ResponseParser", () => {
            expect(result.parseErrors[0]).toContain("unknown_tool")
        })

+        it("should normalize tool name aliases", () => {
+            // get_functions -> get_lines (common LLM typo)
+            const response1 = `<tool_call name="get_functions"><path>src/index.ts</path></tool_call>`
+            const result1 = parseToolCalls(response1)
+            expect(result1.toolCalls).toHaveLength(1)
+            expect(result1.toolCalls[0].name).toBe("get_lines")
+            expect(result1.hasParseErrors).toBe(false)
+
+            // read_file -> get_lines
+            const response2 = `<tool_call name="read_file"><path>test.ts</path></tool_call>`
+            const result2 = parseToolCalls(response2)
+            expect(result2.toolCalls).toHaveLength(1)
+            expect(result2.toolCalls[0].name).toBe("get_lines")
+
+            // find_todos -> get_todos
+            const response3 = `<tool_call name="find_todos"></tool_call>`
+            const result3 = parseToolCalls(response3)
+            expect(result3.toolCalls).toHaveLength(1)
+            expect(result3.toolCalls[0].name).toBe("get_todos")
+
+            // list_files -> get_structure
+            const response4 = `<tool_call name="list_files"><path>.</path></tool_call>`
+            const result4 = parseToolCalls(response4)
+            expect(result4.toolCalls).toHaveLength(1)
+            expect(result4.toolCalls[0].name).toBe("get_structure")
+        })
+
+        // JSON format tests
+        it("should parse JSON format tool calls as fallback", () => {
+            const response = `{"name": "get_lines", "arguments": {"path": "src/index.ts"}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("get_lines")
+            expect(result.toolCalls[0].params).toEqual({ path: "src/index.ts" })
+            expect(result.hasParseErrors).toBe(false)
+        })
+
+        it("should parse JSON format with numeric arguments", () => {
+            const response = `{"name": "get_lines", "arguments": {"path": "src/index.ts", "start": 1, "end": 50}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].params).toEqual({
+                path: "src/index.ts",
+                start: 1,
+                end: 50,
+            })
+        })
+
+        it("should parse JSON format with surrounding text", () => {
+            const response = `I'll read the file for you:
+{"name": "get_lines", "arguments": {"path": "src/index.ts"}}
+Let me know if you need more.`
+
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("get_lines")
+            expect(result.content).toContain("I'll read the file for you:")
+            expect(result.content).toContain("Let me know if you need more.")
+        })
+
+        it("should normalize tool name aliases in JSON format", () => {
+            // read_file -> get_lines
+            const response = `{"name": "read_file", "arguments": {"path": "test.ts"}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("get_lines")
+        })
+
+        it("should reject unknown tool names in JSON format", () => {
+            const response = `{"name": "unknown_tool", "arguments": {"path": "test.ts"}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(0)
+            expect(result.hasParseErrors).toBe(true)
+            expect(result.parseErrors[0]).toContain("unknown_tool")
+        })
+
+        it("should prefer XML over JSON when both present", () => {
+            const response = `<tool_call name="get_lines"><path>xml.ts</path></tool_call>
+{"name": "get_function", "arguments": {"path": "json.ts", "name": "foo"}}`
+
+            const result = parseToolCalls(response)
+
+            // Should only parse XML since it was found first
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("get_lines")
+            expect(result.toolCalls[0].params.path).toBe("xml.ts")
+        })
+
+        it("should parse JSON with empty arguments", () => {
+            const response = `{"name": "git_status", "arguments": {}}`
+            const result = parseToolCalls(response)
+
+            expect(result.toolCalls).toHaveLength(1)
+            expect(result.toolCalls[0].name).toBe("git_status")
+            expect(result.toolCalls[0].params).toEqual({})
+        })
+
        it("should support CDATA for multiline content", () => {
            const response = `<tool_call name="edit_lines">
                <path>src/index.ts</path>
--- a/packages/ipuaro/tests/unit/infrastructure/llm/prompts.test.ts
+++ b/packages/ipuaro/tests/unit/infrastructure/llm/prompts.test.ts
@@ -19,10 +19,16 @@ describe("prompts", () => {
            expect(SYSTEM_PROMPT.length).toBeGreaterThan(100)
        })

-        it("should contain core principles", () => {
-            expect(SYSTEM_PROMPT).toContain("Lazy Loading")
-            expect(SYSTEM_PROMPT).toContain("Precision")
-            expect(SYSTEM_PROMPT).toContain("Safety")
+        it("should contain mandatory tool usage instructions", () => {
+            expect(SYSTEM_PROMPT).toContain("MANDATORY")
+            expect(SYSTEM_PROMPT).toContain("Tools for Code Questions")
+            expect(SYSTEM_PROMPT).toContain("ZERO code in your context")
+        })
+
+        it("should contain when to use and when not to use tools", () => {
+            expect(SYSTEM_PROMPT).toContain("When to Use Tools")
+            expect(SYSTEM_PROMPT).toContain("Do NOT use tools")
+            expect(SYSTEM_PROMPT).toContain("Greetings")
        })

        it("should list available tools", () => {
@@ -34,8 +40,9 @@ describe("prompts", () => {
        })

        it("should include safety rules", () => {
-            expect(SYSTEM_PROMPT).toContain("Safety Rules")
-            expect(SYSTEM_PROMPT).toContain("Never execute commands that could harm")
+            expect(SYSTEM_PROMPT).toContain("Stay safe")
+            expect(SYSTEM_PROMPT).toContain("destructive commands")
+            expect(SYSTEM_PROMPT).toContain("Verify before editing")
        })
    })