mirror of
https://github.com/samiyev/puaros.git
synced 2025-12-27 23:06:54 +05:00
feat(ipuaro): add JSON tool call parsing and improve prompts
- Add JSON fallback parsing in ResponseParser for LLMs that prefer JSON - Add tool name aliases (get_functions -> get_lines, etc.) - Improve system prompt with clear tool usage guidelines - Add native Ollama tools support in OllamaClient - Add E2E tests for full workflow with real Ollama
This commit is contained in:
1506
packages/ipuaro/tests/e2e/full-workflow.test.ts
Normal file
1506
packages/ipuaro/tests/e2e/full-workflow.test.ts
Normal file
File diff suppressed because it is too large
Load Diff
351
packages/ipuaro/tests/e2e/test-helpers.ts
Normal file
351
packages/ipuaro/tests/e2e/test-helpers.ts
Normal file
@@ -0,0 +1,351 @@
|
||||
/**
|
||||
* E2E Test Helpers
|
||||
* Provides dependencies for testing the full flow with REAL LLM.
|
||||
*/
|
||||
|
||||
import { vi } from "vitest"
|
||||
import * as fs from "node:fs/promises"
|
||||
import * as path from "node:path"
|
||||
import * as os from "node:os"
|
||||
import type { IStorage, SymbolIndex, DepsGraph } from "../../src/domain/services/IStorage.js"
|
||||
import type { ISessionStorage, SessionListItem } from "../../src/domain/services/ISessionStorage.js"
|
||||
import type { FileData } from "../../src/domain/value-objects/FileData.js"
|
||||
import type { FileAST } from "../../src/domain/value-objects/FileAST.js"
|
||||
import type { FileMeta } from "../../src/domain/value-objects/FileMeta.js"
|
||||
import type { UndoEntry } from "../../src/domain/value-objects/UndoEntry.js"
|
||||
import { Session } from "../../src/domain/entities/Session.js"
|
||||
import { ToolRegistry } from "../../src/infrastructure/tools/registry.js"
|
||||
import { OllamaClient } from "../../src/infrastructure/llm/OllamaClient.js"
|
||||
import { registerAllTools } from "../../src/cli/commands/tools-setup.js"
|
||||
import type { LLMConfig } from "../../src/shared/constants/config.js"
|
||||
|
||||
/**
|
||||
* Default LLM config for tests.
|
||||
*/
|
||||
export const DEFAULT_TEST_LLM_CONFIG: LLMConfig = {
|
||||
model: "qwen2.5-coder:14b-instruct-q4_K_M",
|
||||
contextWindow: 128_000,
|
||||
temperature: 0.1,
|
||||
host: "http://localhost:11434",
|
||||
timeout: 180_000,
|
||||
useNativeTools: true,
|
||||
}
|
||||
|
||||
/**
|
||||
* In-memory storage implementation for testing.
|
||||
* Stores all data in Maps, no Redis required.
|
||||
*/
|
||||
export function createInMemoryStorage(): IStorage {
|
||||
const files = new Map<string, FileData>()
|
||||
const asts = new Map<string, FileAST>()
|
||||
const metas = new Map<string, FileMeta>()
|
||||
let symbolIndex: SymbolIndex = new Map()
|
||||
let depsGraph: DepsGraph = { imports: new Map(), importedBy: new Map() }
|
||||
const projectConfig = new Map<string, unknown>()
|
||||
let connected = false
|
||||
|
||||
return {
|
||||
getFile: vi.fn(async (filePath: string) => files.get(filePath) ?? null),
|
||||
setFile: vi.fn(async (filePath: string, data: FileData) => {
|
||||
files.set(filePath, data)
|
||||
}),
|
||||
deleteFile: vi.fn(async (filePath: string) => {
|
||||
files.delete(filePath)
|
||||
}),
|
||||
getAllFiles: vi.fn(async () => new Map(files)),
|
||||
getFileCount: vi.fn(async () => files.size),
|
||||
|
||||
getAST: vi.fn(async (filePath: string) => asts.get(filePath) ?? null),
|
||||
setAST: vi.fn(async (filePath: string, ast: FileAST) => {
|
||||
asts.set(filePath, ast)
|
||||
}),
|
||||
deleteAST: vi.fn(async (filePath: string) => {
|
||||
asts.delete(filePath)
|
||||
}),
|
||||
getAllASTs: vi.fn(async () => new Map(asts)),
|
||||
|
||||
getMeta: vi.fn(async (filePath: string) => metas.get(filePath) ?? null),
|
||||
setMeta: vi.fn(async (filePath: string, meta: FileMeta) => {
|
||||
metas.set(filePath, meta)
|
||||
}),
|
||||
deleteMeta: vi.fn(async (filePath: string) => {
|
||||
metas.delete(filePath)
|
||||
}),
|
||||
getAllMetas: vi.fn(async () => new Map(metas)),
|
||||
|
||||
getSymbolIndex: vi.fn(async () => symbolIndex),
|
||||
setSymbolIndex: vi.fn(async (index: SymbolIndex) => {
|
||||
symbolIndex = index
|
||||
}),
|
||||
getDepsGraph: vi.fn(async () => depsGraph),
|
||||
setDepsGraph: vi.fn(async (graph: DepsGraph) => {
|
||||
depsGraph = graph
|
||||
}),
|
||||
|
||||
getProjectConfig: vi.fn(async (key: string) => projectConfig.get(key) ?? null),
|
||||
setProjectConfig: vi.fn(async (key: string, value: unknown) => {
|
||||
projectConfig.set(key, value)
|
||||
}),
|
||||
|
||||
connect: vi.fn(async () => {
|
||||
connected = true
|
||||
}),
|
||||
disconnect: vi.fn(async () => {
|
||||
connected = false
|
||||
}),
|
||||
isConnected: vi.fn(() => connected),
|
||||
clear: vi.fn(async () => {
|
||||
files.clear()
|
||||
asts.clear()
|
||||
metas.clear()
|
||||
symbolIndex = new Map()
|
||||
depsGraph = { imports: new Map(), importedBy: new Map() }
|
||||
projectConfig.clear()
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* In-memory session storage for testing.
|
||||
*/
|
||||
export function createInMemorySessionStorage(): ISessionStorage {
|
||||
const sessions = new Map<string, Session>()
|
||||
const undoStacks = new Map<string, UndoEntry[]>()
|
||||
|
||||
return {
|
||||
saveSession: vi.fn(async (session: Session) => {
|
||||
sessions.set(session.id, session)
|
||||
}),
|
||||
loadSession: vi.fn(async (sessionId: string) => sessions.get(sessionId) ?? null),
|
||||
deleteSession: vi.fn(async (sessionId: string) => {
|
||||
sessions.delete(sessionId)
|
||||
undoStacks.delete(sessionId)
|
||||
}),
|
||||
listSessions: vi.fn(async (projectName?: string): Promise<SessionListItem[]> => {
|
||||
const items: SessionListItem[] = []
|
||||
for (const session of sessions.values()) {
|
||||
if (!projectName || session.projectName === projectName) {
|
||||
items.push({
|
||||
id: session.id,
|
||||
projectName: session.projectName,
|
||||
createdAt: session.createdAt,
|
||||
lastActivityAt: session.lastActivityAt,
|
||||
messageCount: session.history.length,
|
||||
})
|
||||
}
|
||||
}
|
||||
return items
|
||||
}),
|
||||
getLatestSession: vi.fn(async (projectName: string) => {
|
||||
let latest: Session | null = null
|
||||
for (const session of sessions.values()) {
|
||||
if (session.projectName === projectName) {
|
||||
if (!latest || session.lastActivityAt > latest.lastActivityAt) {
|
||||
latest = session
|
||||
}
|
||||
}
|
||||
}
|
||||
return latest
|
||||
}),
|
||||
sessionExists: vi.fn(async (sessionId: string) => sessions.has(sessionId)),
|
||||
pushUndoEntry: vi.fn(async (sessionId: string, entry: UndoEntry) => {
|
||||
const stack = undoStacks.get(sessionId) ?? []
|
||||
stack.push(entry)
|
||||
undoStacks.set(sessionId, stack)
|
||||
}),
|
||||
popUndoEntry: vi.fn(async (sessionId: string) => {
|
||||
const stack = undoStacks.get(sessionId) ?? []
|
||||
return stack.pop() ?? null
|
||||
}),
|
||||
getUndoStack: vi.fn(async (sessionId: string) => undoStacks.get(sessionId) ?? []),
|
||||
touchSession: vi.fn(async (sessionId: string) => {
|
||||
const session = sessions.get(sessionId)
|
||||
if (session) {
|
||||
session.lastActivityAt = Date.now()
|
||||
}
|
||||
}),
|
||||
clearAllSessions: vi.fn(async () => {
|
||||
sessions.clear()
|
||||
undoStacks.clear()
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create REAL Ollama client for E2E tests.
|
||||
*/
|
||||
export function createRealOllamaClient(config?: Partial<LLMConfig>): OllamaClient {
|
||||
return new OllamaClient({
|
||||
...DEFAULT_TEST_LLM_CONFIG,
|
||||
...config,
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a tool registry with all 18 tools registered.
|
||||
*/
|
||||
export function createRealToolRegistry(): ToolRegistry {
|
||||
const registry = new ToolRegistry()
|
||||
registerAllTools(registry)
|
||||
return registry
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new test session.
|
||||
*/
|
||||
export function createTestSession(projectName = "test-project"): Session {
|
||||
return new Session(`test-${Date.now()}`, projectName)
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a temporary test project directory with sample files.
|
||||
*/
|
||||
export async function createTestProject(): Promise<string> {
|
||||
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "ipuaro-e2e-"))
|
||||
|
||||
await fs.mkdir(path.join(tempDir, "src"), { recursive: true })
|
||||
|
||||
await fs.writeFile(
|
||||
path.join(tempDir, "src", "index.ts"),
|
||||
`/**
|
||||
* Main entry point
|
||||
*/
|
||||
export function main(): void {
|
||||
console.log("Hello, world!")
|
||||
}
|
||||
|
||||
export function add(a: number, b: number): number {
|
||||
return a + b
|
||||
}
|
||||
|
||||
export function multiply(a: number, b: number): number {
|
||||
return a * b
|
||||
}
|
||||
|
||||
// TODO: Add more math functions
|
||||
main()
|
||||
`,
|
||||
)
|
||||
|
||||
await fs.writeFile(
|
||||
path.join(tempDir, "src", "utils.ts"),
|
||||
`/**
|
||||
* Utility functions
|
||||
*/
|
||||
import { add } from "./index.js"
|
||||
|
||||
export function sum(numbers: number[]): number {
|
||||
return numbers.reduce((acc, n) => add(acc, n), 0)
|
||||
}
|
||||
|
||||
export class Calculator {
|
||||
private result: number = 0
|
||||
|
||||
add(n: number): this {
|
||||
this.result += n
|
||||
return this
|
||||
}
|
||||
|
||||
subtract(n: number): this {
|
||||
this.result -= n
|
||||
return this
|
||||
}
|
||||
|
||||
getResult(): number {
|
||||
return this.result
|
||||
}
|
||||
|
||||
reset(): void {
|
||||
this.result = 0
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: Handle edge cases for negative numbers
|
||||
`,
|
||||
)
|
||||
|
||||
await fs.writeFile(
|
||||
path.join(tempDir, "package.json"),
|
||||
JSON.stringify(
|
||||
{
|
||||
name: "test-project",
|
||||
version: "1.0.0",
|
||||
type: "module",
|
||||
scripts: {
|
||||
test: "echo 'Tests passed!'",
|
||||
},
|
||||
},
|
||||
null,
|
||||
4,
|
||||
),
|
||||
)
|
||||
|
||||
await fs.writeFile(
|
||||
path.join(tempDir, "README.md"),
|
||||
`# Test Project
|
||||
|
||||
A sample project for E2E testing.
|
||||
|
||||
## Features
|
||||
- Basic math functions
|
||||
- Calculator class
|
||||
`,
|
||||
)
|
||||
|
||||
return tempDir
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up test project directory.
|
||||
*/
|
||||
export async function cleanupTestProject(projectDir: string): Promise<void> {
|
||||
await fs.rm(projectDir, { recursive: true, force: true })
|
||||
}
|
||||
|
||||
/**
|
||||
* All test dependencies bundled together.
|
||||
*/
|
||||
export interface E2ETestDependencies {
|
||||
storage: IStorage
|
||||
sessionStorage: ISessionStorage
|
||||
llm: OllamaClient
|
||||
tools: ToolRegistry
|
||||
session: Session
|
||||
projectRoot: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Create all dependencies for E2E testing with REAL Ollama.
|
||||
*/
|
||||
export async function createE2ETestDependencies(
|
||||
llmConfig?: Partial<LLMConfig>,
|
||||
): Promise<E2ETestDependencies> {
|
||||
const projectRoot = await createTestProject()
|
||||
|
||||
return {
|
||||
storage: createInMemoryStorage(),
|
||||
sessionStorage: createInMemorySessionStorage(),
|
||||
llm: createRealOllamaClient(llmConfig),
|
||||
tools: createRealToolRegistry(),
|
||||
session: createTestSession(),
|
||||
projectRoot,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if Ollama is available.
|
||||
*/
|
||||
export async function isOllamaAvailable(): Promise<boolean> {
|
||||
const client = createRealOllamaClient()
|
||||
return client.isAvailable()
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if required model is available.
|
||||
*/
|
||||
export async function isModelAvailable(
|
||||
model = "qwen2.5-coder:14b-instruct-q4_K_M",
|
||||
): Promise<boolean> {
|
||||
const client = createRealOllamaClient()
|
||||
return client.hasModel(model)
|
||||
}
|
||||
@@ -135,6 +135,108 @@ describe("ResponseParser", () => {
|
||||
expect(result.parseErrors[0]).toContain("unknown_tool")
|
||||
})
|
||||
|
||||
it("should normalize tool name aliases", () => {
|
||||
// get_functions -> get_lines (common LLM typo)
|
||||
const response1 = `<tool_call name="get_functions"><path>src/index.ts</path></tool_call>`
|
||||
const result1 = parseToolCalls(response1)
|
||||
expect(result1.toolCalls).toHaveLength(1)
|
||||
expect(result1.toolCalls[0].name).toBe("get_lines")
|
||||
expect(result1.hasParseErrors).toBe(false)
|
||||
|
||||
// read_file -> get_lines
|
||||
const response2 = `<tool_call name="read_file"><path>test.ts</path></tool_call>`
|
||||
const result2 = parseToolCalls(response2)
|
||||
expect(result2.toolCalls).toHaveLength(1)
|
||||
expect(result2.toolCalls[0].name).toBe("get_lines")
|
||||
|
||||
// find_todos -> get_todos
|
||||
const response3 = `<tool_call name="find_todos"></tool_call>`
|
||||
const result3 = parseToolCalls(response3)
|
||||
expect(result3.toolCalls).toHaveLength(1)
|
||||
expect(result3.toolCalls[0].name).toBe("get_todos")
|
||||
|
||||
// list_files -> get_structure
|
||||
const response4 = `<tool_call name="list_files"><path>.</path></tool_call>`
|
||||
const result4 = parseToolCalls(response4)
|
||||
expect(result4.toolCalls).toHaveLength(1)
|
||||
expect(result4.toolCalls[0].name).toBe("get_structure")
|
||||
})
|
||||
|
||||
// JSON format tests
|
||||
it("should parse JSON format tool calls as fallback", () => {
|
||||
const response = `{"name": "get_lines", "arguments": {"path": "src/index.ts"}}`
|
||||
const result = parseToolCalls(response)
|
||||
|
||||
expect(result.toolCalls).toHaveLength(1)
|
||||
expect(result.toolCalls[0].name).toBe("get_lines")
|
||||
expect(result.toolCalls[0].params).toEqual({ path: "src/index.ts" })
|
||||
expect(result.hasParseErrors).toBe(false)
|
||||
})
|
||||
|
||||
it("should parse JSON format with numeric arguments", () => {
|
||||
const response = `{"name": "get_lines", "arguments": {"path": "src/index.ts", "start": 1, "end": 50}}`
|
||||
const result = parseToolCalls(response)
|
||||
|
||||
expect(result.toolCalls).toHaveLength(1)
|
||||
expect(result.toolCalls[0].params).toEqual({
|
||||
path: "src/index.ts",
|
||||
start: 1,
|
||||
end: 50,
|
||||
})
|
||||
})
|
||||
|
||||
it("should parse JSON format with surrounding text", () => {
|
||||
const response = `I'll read the file for you:
|
||||
{"name": "get_lines", "arguments": {"path": "src/index.ts"}}
|
||||
Let me know if you need more.`
|
||||
|
||||
const result = parseToolCalls(response)
|
||||
|
||||
expect(result.toolCalls).toHaveLength(1)
|
||||
expect(result.toolCalls[0].name).toBe("get_lines")
|
||||
expect(result.content).toContain("I'll read the file for you:")
|
||||
expect(result.content).toContain("Let me know if you need more.")
|
||||
})
|
||||
|
||||
it("should normalize tool name aliases in JSON format", () => {
|
||||
// read_file -> get_lines
|
||||
const response = `{"name": "read_file", "arguments": {"path": "test.ts"}}`
|
||||
const result = parseToolCalls(response)
|
||||
|
||||
expect(result.toolCalls).toHaveLength(1)
|
||||
expect(result.toolCalls[0].name).toBe("get_lines")
|
||||
})
|
||||
|
||||
it("should reject unknown tool names in JSON format", () => {
|
||||
const response = `{"name": "unknown_tool", "arguments": {"path": "test.ts"}}`
|
||||
const result = parseToolCalls(response)
|
||||
|
||||
expect(result.toolCalls).toHaveLength(0)
|
||||
expect(result.hasParseErrors).toBe(true)
|
||||
expect(result.parseErrors[0]).toContain("unknown_tool")
|
||||
})
|
||||
|
||||
it("should prefer XML over JSON when both present", () => {
|
||||
const response = `<tool_call name="get_lines"><path>xml.ts</path></tool_call>
|
||||
{"name": "get_function", "arguments": {"path": "json.ts", "name": "foo"}}`
|
||||
|
||||
const result = parseToolCalls(response)
|
||||
|
||||
// Should only parse XML since it was found first
|
||||
expect(result.toolCalls).toHaveLength(1)
|
||||
expect(result.toolCalls[0].name).toBe("get_lines")
|
||||
expect(result.toolCalls[0].params.path).toBe("xml.ts")
|
||||
})
|
||||
|
||||
it("should parse JSON with empty arguments", () => {
|
||||
const response = `{"name": "git_status", "arguments": {}}`
|
||||
const result = parseToolCalls(response)
|
||||
|
||||
expect(result.toolCalls).toHaveLength(1)
|
||||
expect(result.toolCalls[0].name).toBe("git_status")
|
||||
expect(result.toolCalls[0].params).toEqual({})
|
||||
})
|
||||
|
||||
it("should support CDATA for multiline content", () => {
|
||||
const response = `<tool_call name="edit_lines">
|
||||
<path>src/index.ts</path>
|
||||
|
||||
@@ -19,10 +19,16 @@ describe("prompts", () => {
|
||||
expect(SYSTEM_PROMPT.length).toBeGreaterThan(100)
|
||||
})
|
||||
|
||||
it("should contain core principles", () => {
|
||||
expect(SYSTEM_PROMPT).toContain("Lazy Loading")
|
||||
expect(SYSTEM_PROMPT).toContain("Precision")
|
||||
expect(SYSTEM_PROMPT).toContain("Safety")
|
||||
it("should contain mandatory tool usage instructions", () => {
|
||||
expect(SYSTEM_PROMPT).toContain("MANDATORY")
|
||||
expect(SYSTEM_PROMPT).toContain("Tools for Code Questions")
|
||||
expect(SYSTEM_PROMPT).toContain("ZERO code in your context")
|
||||
})
|
||||
|
||||
it("should contain when to use and when not to use tools", () => {
|
||||
expect(SYSTEM_PROMPT).toContain("When to Use Tools")
|
||||
expect(SYSTEM_PROMPT).toContain("Do NOT use tools")
|
||||
expect(SYSTEM_PROMPT).toContain("Greetings")
|
||||
})
|
||||
|
||||
it("should list available tools", () => {
|
||||
@@ -34,8 +40,9 @@ describe("prompts", () => {
|
||||
})
|
||||
|
||||
it("should include safety rules", () => {
|
||||
expect(SYSTEM_PROMPT).toContain("Safety Rules")
|
||||
expect(SYSTEM_PROMPT).toContain("Never execute commands that could harm")
|
||||
expect(SYSTEM_PROMPT).toContain("Stay safe")
|
||||
expect(SYSTEM_PROMPT).toContain("destructive commands")
|
||||
expect(SYSTEM_PROMPT).toContain("Verify before editing")
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user