mirror of
https://github.com/samiyev/puaros.git
synced 2025-12-28 07:16:53 +05:00
Refactor OllamaClient to use pure XML format for tool calls as designed in CONCEPT.md. Removes dual system (Ollama native tools + XML parser) in favor of single source of truth (ResponseParser). Changes: - Remove tools parameter from ILLMClient.chat() interface - Remove convertTools(), convertParameters(), extractToolCalls() - Add XML format instructions to system prompt with examples - Add CDATA support in ResponseParser for multiline content - Add tool name validation with helpful error messages - Move ToolDef/ToolParameter to shared/types/tool-definitions.ts Benefits: - Simplified architecture (single source of truth) - CONCEPT.md compliance (pure XML as designed) - Better validation (early detection of invalid tools) - Reduced complexity (fewer format conversions) Tests: 1444 passed (+4 new tests) Coverage: 97.83% lines, 91.98% branches, 99.16% functions
266 lines
6.7 KiB
TypeScript
266 lines
6.7 KiB
TypeScript
import { createToolCall, type ToolCall } from "../../domain/value-objects/ToolCall.js"
|
|
|
|
/**
|
|
* Parsed response from LLM.
|
|
*/
|
|
export interface ParsedResponse {
|
|
/** Text content (excluding tool calls) */
|
|
content: string
|
|
/** Extracted tool calls */
|
|
toolCalls: ToolCall[]
|
|
/** Whether parsing encountered issues */
|
|
hasParseErrors: boolean
|
|
/** Parse error messages */
|
|
parseErrors: string[]
|
|
}
|
|
|
|
/**
|
|
* XML tool call tag pattern.
|
|
* Matches: <tool_call name="tool_name">...</tool_call>
|
|
*/
|
|
const TOOL_CALL_REGEX = /<tool_call\s+name\s*=\s*"([^"]+)">([\s\S]*?)<\/tool_call>/gi
|
|
|
|
/**
|
|
* XML parameter tag pattern.
|
|
* Matches: <param name="param_name">value</param> or <param_name>value</param_name>
|
|
*/
|
|
const PARAM_REGEX_NAMED = /<param\s+name\s*=\s*"([^"]+)">([\s\S]*?)<\/param>/gi
|
|
const PARAM_REGEX_ELEMENT = /<([a-z_][a-z0-9_]*)>([\s\S]*?)<\/\1>/gi
|
|
|
|
/**
|
|
* CDATA section pattern.
|
|
* Matches: <![CDATA[...]]>
|
|
*/
|
|
const CDATA_REGEX = /<!\[CDATA\[([\s\S]*?)\]\]>/g
|
|
|
|
/**
|
|
* Valid tool names.
|
|
* Used for validation to catch typos or hallucinations.
|
|
*/
|
|
const VALID_TOOL_NAMES = new Set([
|
|
"get_lines",
|
|
"get_function",
|
|
"get_class",
|
|
"get_structure",
|
|
"edit_lines",
|
|
"create_file",
|
|
"delete_file",
|
|
"find_references",
|
|
"find_definition",
|
|
"get_dependencies",
|
|
"get_dependents",
|
|
"get_complexity",
|
|
"get_todos",
|
|
"git_status",
|
|
"git_diff",
|
|
"git_commit",
|
|
"run_command",
|
|
"run_tests",
|
|
])
|
|
|
|
/**
|
|
* Parse tool calls from LLM response text.
|
|
* Supports XML format: <tool_call name="get_lines"><path>src/index.ts</path></tool_call>
|
|
* Validates tool names and provides helpful error messages.
|
|
*/
|
|
export function parseToolCalls(response: string): ParsedResponse {
|
|
const toolCalls: ToolCall[] = []
|
|
const parseErrors: string[] = []
|
|
let content = response
|
|
|
|
const matches = [...response.matchAll(TOOL_CALL_REGEX)]
|
|
|
|
for (const match of matches) {
|
|
const [fullMatch, toolName, paramsXml] = match
|
|
|
|
if (!VALID_TOOL_NAMES.has(toolName)) {
|
|
parseErrors.push(
|
|
`Unknown tool "${toolName}". Valid tools: ${[...VALID_TOOL_NAMES].join(", ")}`,
|
|
)
|
|
continue
|
|
}
|
|
|
|
try {
|
|
const params = parseParameters(paramsXml)
|
|
const toolCall = createToolCall(
|
|
`xml_${String(Date.now())}_${String(toolCalls.length)}`,
|
|
toolName,
|
|
params,
|
|
)
|
|
toolCalls.push(toolCall)
|
|
content = content.replace(fullMatch, "")
|
|
} catch (error) {
|
|
const errorMsg = error instanceof Error ? error.message : String(error)
|
|
parseErrors.push(`Failed to parse tool call "${toolName}": ${errorMsg}`)
|
|
}
|
|
}
|
|
|
|
content = content.trim()
|
|
|
|
return {
|
|
content,
|
|
toolCalls,
|
|
hasParseErrors: parseErrors.length > 0,
|
|
parseErrors,
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse parameters from XML content.
|
|
*/
|
|
function parseParameters(xml: string): Record<string, unknown> {
|
|
const params: Record<string, unknown> = {}
|
|
|
|
const namedMatches = [...xml.matchAll(PARAM_REGEX_NAMED)]
|
|
for (const match of namedMatches) {
|
|
const [, name, value] = match
|
|
params[name] = parseValue(value)
|
|
}
|
|
|
|
if (namedMatches.length === 0) {
|
|
const elementMatches = [...xml.matchAll(PARAM_REGEX_ELEMENT)]
|
|
for (const match of elementMatches) {
|
|
const [, name, value] = match
|
|
params[name] = parseValue(value)
|
|
}
|
|
}
|
|
|
|
return params
|
|
}
|
|
|
|
/**
|
|
* Parse a value string to appropriate type.
|
|
* Supports CDATA sections for multiline content.
|
|
*/
|
|
function parseValue(value: string): unknown {
|
|
const trimmed = value.trim()
|
|
|
|
const cdataMatches = [...trimmed.matchAll(CDATA_REGEX)]
|
|
if (cdataMatches.length > 0 && cdataMatches[0][1] !== undefined) {
|
|
return cdataMatches[0][1]
|
|
}
|
|
|
|
if (trimmed === "true") {
|
|
return true
|
|
}
|
|
|
|
if (trimmed === "false") {
|
|
return false
|
|
}
|
|
|
|
if (trimmed === "null") {
|
|
return null
|
|
}
|
|
|
|
const num = Number(trimmed)
|
|
if (!isNaN(num) && trimmed !== "") {
|
|
return num
|
|
}
|
|
|
|
if (
|
|
(trimmed.startsWith("[") && trimmed.endsWith("]")) ||
|
|
(trimmed.startsWith("{") && trimmed.endsWith("}"))
|
|
) {
|
|
try {
|
|
return JSON.parse(trimmed)
|
|
} catch {
|
|
return trimmed
|
|
}
|
|
}
|
|
|
|
return trimmed
|
|
}
|
|
|
|
/**
|
|
* Format tool calls to XML for prompt injection.
|
|
* Useful when you need to show the LLM the expected format.
|
|
*/
|
|
export function formatToolCallsAsXml(toolCalls: ToolCall[]): string {
|
|
return toolCalls
|
|
.map((tc) => {
|
|
const params = Object.entries(tc.params)
|
|
.map(([key, value]) => ` <${key}>${formatValueForXml(value)}</${key}>`)
|
|
.join("\n")
|
|
return `<tool_call name="${tc.name}">\n${params}\n</tool_call>`
|
|
})
|
|
.join("\n\n")
|
|
}
|
|
|
|
/**
|
|
* Format a value for XML output.
|
|
*/
|
|
function formatValueForXml(value: unknown): string {
|
|
if (value === null || value === undefined) {
|
|
return ""
|
|
}
|
|
|
|
if (typeof value === "object") {
|
|
return JSON.stringify(value)
|
|
}
|
|
|
|
if (typeof value === "string") {
|
|
return value
|
|
}
|
|
|
|
if (typeof value === "number" || typeof value === "boolean") {
|
|
return String(value)
|
|
}
|
|
|
|
return JSON.stringify(value)
|
|
}
|
|
|
|
/**
|
|
* Extract thinking/reasoning from response.
|
|
* Matches content between <thinking>...</thinking> tags.
|
|
*/
|
|
export function extractThinking(response: string): { thinking: string; content: string } {
|
|
const thinkingRegex = /<thinking>([\s\S]*?)<\/thinking>/gi
|
|
const matches = [...response.matchAll(thinkingRegex)]
|
|
|
|
if (matches.length === 0) {
|
|
return { thinking: "", content: response }
|
|
}
|
|
|
|
let content = response
|
|
const thoughts: string[] = []
|
|
|
|
for (const match of matches) {
|
|
thoughts.push(match[1].trim())
|
|
content = content.replace(match[0], "")
|
|
}
|
|
|
|
return {
|
|
thinking: thoughts.join("\n\n"),
|
|
content: content.trim(),
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if response contains tool calls.
|
|
*/
|
|
export function hasToolCalls(response: string): boolean {
|
|
return TOOL_CALL_REGEX.test(response)
|
|
}
|
|
|
|
/**
|
|
* Validate tool call parameters against expected schema.
|
|
*/
|
|
export function validateToolCallParams(
|
|
toolName: string,
|
|
params: Record<string, unknown>,
|
|
requiredParams: string[],
|
|
): { valid: boolean; errors: string[] } {
|
|
const errors: string[] = []
|
|
|
|
for (const param of requiredParams) {
|
|
if (!(param in params) || params[param] === undefined || params[param] === null) {
|
|
errors.push(`Missing required parameter: ${param}`)
|
|
}
|
|
}
|
|
|
|
return {
|
|
valid: errors.length === 0,
|
|
errors,
|
|
}
|
|
}
|