Merge pull request #1658 from code-yeongyu/fix-1233
fix: detect completion tags in ralph/ULW loop (#1233)
This commit is contained in:
commit
32193dc10d
@ -511,6 +511,38 @@ describe("ralph-loop", () => {
|
|||||||
expect(messagesCalls[0].sessionID).toBe("session-123")
|
expect(messagesCalls[0].sessionID).toBe("session-123")
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test("should detect completion promise in reasoning part via session messages API", async () => {
|
||||||
|
//#given - active loop with assistant reasoning containing completion promise
|
||||||
|
mockSessionMessages = [
|
||||||
|
{ info: { role: "user" }, parts: [{ type: "text", text: "Build something" }] },
|
||||||
|
{
|
||||||
|
info: { role: "assistant" },
|
||||||
|
parts: [
|
||||||
|
{ type: "reasoning", text: "I am done now. <promise>REASONING_DONE</promise>" },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||||
|
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
|
||||||
|
})
|
||||||
|
hook.startLoop("session-123", "Build something", {
|
||||||
|
completionPromise: "REASONING_DONE",
|
||||||
|
})
|
||||||
|
|
||||||
|
//#when - session goes idle
|
||||||
|
await hook.event({
|
||||||
|
event: {
|
||||||
|
type: "session.idle",
|
||||||
|
properties: { sessionID: "session-123" },
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
//#then - loop completed via API detection, no continuation
|
||||||
|
expect(promptCalls.length).toBe(0)
|
||||||
|
expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
|
||||||
|
expect(hook.getState()).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
test("should handle multiple iterations correctly", async () => {
|
test("should handle multiple iterations correctly", async () => {
|
||||||
// given - active loop
|
// given - active loop
|
||||||
const hook = createRalphLoopHook(createMockPluginInput())
|
const hook = createRalphLoopHook(createMockPluginInput())
|
||||||
@ -596,13 +628,14 @@ describe("ralph-loop", () => {
|
|||||||
expect(promptCalls.length).toBe(1)
|
expect(promptCalls.length).toBe(1)
|
||||||
})
|
})
|
||||||
|
|
||||||
test("should only check LAST assistant message for completion", async () => {
|
test("should check last 3 assistant messages for completion", async () => {
|
||||||
// given - multiple assistant messages, only first has completion promise
|
// given - multiple assistant messages, promise in recent (not last) assistant message
|
||||||
mockSessionMessages = [
|
mockSessionMessages = [
|
||||||
{ info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
|
{ info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
|
||||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "I'll work on it. <promise>DONE</promise>" }] },
|
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Working on it." }] },
|
||||||
{ info: { role: "user" }, parts: [{ type: "text", text: "Continue" }] },
|
{ info: { role: "user" }, parts: [{ type: "text", text: "Continue" }] },
|
||||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Working on more features..." }] },
|
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Nearly there... <promise>DONE</promise>" }] },
|
||||||
|
{ info: { role: "assistant" }, parts: [{ type: "text", text: "(extra output after promise)" }] },
|
||||||
]
|
]
|
||||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||||
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
|
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
|
||||||
@ -614,35 +647,36 @@ describe("ralph-loop", () => {
|
|||||||
event: { type: "session.idle", properties: { sessionID: "session-123" } },
|
event: { type: "session.idle", properties: { sessionID: "session-123" } },
|
||||||
})
|
})
|
||||||
|
|
||||||
// then - loop should continue (last message has no completion promise)
|
// then - loop should complete (promise found within last 3 assistant messages)
|
||||||
expect(promptCalls.length).toBe(1)
|
|
||||||
expect(hook.getState()?.iteration).toBe(2)
|
|
||||||
})
|
|
||||||
|
|
||||||
test("should detect completion only in LAST assistant message", async () => {
|
|
||||||
// given - last assistant message has completion promise
|
|
||||||
mockSessionMessages = [
|
|
||||||
{ info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
|
|
||||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Starting work..." }] },
|
|
||||||
{ info: { role: "user" }, parts: [{ type: "text", text: "Continue" }] },
|
|
||||||
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Task complete! <promise>DONE</promise>" }] },
|
|
||||||
]
|
|
||||||
const hook = createRalphLoopHook(createMockPluginInput(), {
|
|
||||||
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
|
|
||||||
})
|
|
||||||
hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
|
|
||||||
|
|
||||||
// when - session goes idle
|
|
||||||
await hook.event({
|
|
||||||
event: { type: "session.idle", properties: { sessionID: "session-123" } },
|
|
||||||
})
|
|
||||||
|
|
||||||
// then - loop should complete (last message has completion promise)
|
|
||||||
expect(promptCalls.length).toBe(0)
|
expect(promptCalls.length).toBe(0)
|
||||||
expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
|
expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
|
||||||
expect(hook.getState()).toBeNull()
|
expect(hook.getState()).toBeNull()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test("should NOT detect completion if promise is older than last 3 assistant messages", async () => {
|
||||||
|
// given - promise appears in an assistant message older than last 3
|
||||||
|
mockSessionMessages = [
|
||||||
|
{ info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
|
||||||
|
{ info: { role: "assistant" }, parts: [{ type: "text", text: "Promise early <promise>DONE</promise>" }] },
|
||||||
|
{ info: { role: "assistant" }, parts: [{ type: "text", text: "More work 1" }] },
|
||||||
|
{ info: { role: "assistant" }, parts: [{ type: "text", text: "More work 2" }] },
|
||||||
|
{ info: { role: "assistant" }, parts: [{ type: "text", text: "More work 3" }] },
|
||||||
|
]
|
||||||
|
const hook = createRalphLoopHook(createMockPluginInput(), {
|
||||||
|
getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
|
||||||
|
})
|
||||||
|
hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
|
||||||
|
|
||||||
|
// when - session goes idle
|
||||||
|
await hook.event({
|
||||||
|
event: { type: "session.idle", properties: { sessionID: "session-123" } },
|
||||||
|
})
|
||||||
|
|
||||||
|
// then - loop should continue (promise is older than last 3 assistant messages)
|
||||||
|
expect(promptCalls.length).toBe(1)
|
||||||
|
expect(hook.getState()?.iteration).toBe(2)
|
||||||
|
})
|
||||||
|
|
||||||
test("should allow starting new loop while previous loop is active (different session)", async () => {
|
test("should allow starting new loop while previous loop is active (different session)", async () => {
|
||||||
// given - active loop in session A
|
// given - active loop in session A
|
||||||
const hook = createRalphLoopHook(createMockPluginInput())
|
const hook = createRalphLoopHook(createMockPluginInput())
|
||||||
@ -928,7 +962,7 @@ Original task: Build something`
|
|||||||
const elapsed = Date.now() - startTime
|
const elapsed = Date.now() - startTime
|
||||||
|
|
||||||
// then - should complete quickly (not hang for 10s)
|
// then - should complete quickly (not hang for 10s)
|
||||||
expect(elapsed).toBeLessThan(2000)
|
expect(elapsed).toBeLessThan(6000)
|
||||||
// then - loop should continue (API error = no completion detected)
|
// then - loop should continue (API error = no completion detected)
|
||||||
expect(promptCalls.length).toBe(1)
|
expect(promptCalls.length).toBe(1)
|
||||||
expect(apiCallCount).toBeGreaterThan(0)
|
expect(apiCallCount).toBeGreaterThan(0)
|
||||||
|
|||||||
@ -67,7 +67,7 @@ export interface RalphLoopHook {
|
|||||||
getState: () => RalphLoopState | null
|
getState: () => RalphLoopState | null
|
||||||
}
|
}
|
||||||
|
|
||||||
const DEFAULT_API_TIMEOUT = 3000
|
const DEFAULT_API_TIMEOUT = 5000
|
||||||
|
|
||||||
export function createRalphLoopHook(
|
export function createRalphLoopHook(
|
||||||
ctx: PluginInput,
|
ctx: PluginInput,
|
||||||
@ -80,6 +80,23 @@ export function createRalphLoopHook(
|
|||||||
const apiTimeout = options?.apiTimeout ?? DEFAULT_API_TIMEOUT
|
const apiTimeout = options?.apiTimeout ?? DEFAULT_API_TIMEOUT
|
||||||
const checkSessionExists = options?.checkSessionExists
|
const checkSessionExists = options?.checkSessionExists
|
||||||
|
|
||||||
|
async function withTimeout<TData>(promise: Promise<TData>, timeoutMs: number): Promise<TData> {
|
||||||
|
let timeoutId: ReturnType<typeof setTimeout> | undefined
|
||||||
|
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||||
|
timeoutId = setTimeout(() => {
|
||||||
|
reject(new Error("API timeout"))
|
||||||
|
}, timeoutMs)
|
||||||
|
})
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await Promise.race([promise, timeoutPromise])
|
||||||
|
} finally {
|
||||||
|
if (timeoutId !== undefined) {
|
||||||
|
clearTimeout(timeoutId)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function getSessionState(sessionID: string): SessionState {
|
function getSessionState(sessionID: string): SessionState {
|
||||||
let state = sessions.get(sessionID)
|
let state = sessions.get(sessionID)
|
||||||
if (!state) {
|
if (!state) {
|
||||||
@ -126,34 +143,44 @@ export function createRalphLoopHook(
|
|||||||
promise: string
|
promise: string
|
||||||
): Promise<boolean> {
|
): Promise<boolean> {
|
||||||
try {
|
try {
|
||||||
const response = await Promise.race([
|
const response = await withTimeout(
|
||||||
ctx.client.session.messages({
|
ctx.client.session.messages({
|
||||||
path: { id: sessionID },
|
path: { id: sessionID },
|
||||||
query: { directory: ctx.directory },
|
query: { directory: ctx.directory },
|
||||||
}),
|
}),
|
||||||
new Promise<never>((_, reject) =>
|
apiTimeout
|
||||||
setTimeout(() => reject(new Error("API timeout")), apiTimeout)
|
)
|
||||||
),
|
|
||||||
])
|
|
||||||
|
|
||||||
const messages = (response as { data?: unknown[] }).data ?? []
|
const messages = (response as { data?: unknown[] }).data ?? []
|
||||||
if (!Array.isArray(messages)) return false
|
if (!Array.isArray(messages)) return false
|
||||||
|
|
||||||
const assistantMessages = (messages as OpenCodeSessionMessage[]).filter(
|
const assistantMessages = (messages as OpenCodeSessionMessage[]).filter((msg) => msg.info?.role === "assistant")
|
||||||
(msg) => msg.info?.role === "assistant"
|
if (assistantMessages.length === 0) return false
|
||||||
)
|
|
||||||
const lastAssistant = assistantMessages[assistantMessages.length - 1]
|
|
||||||
if (!lastAssistant?.parts) return false
|
|
||||||
|
|
||||||
const pattern = new RegExp(`<promise>\\s*${escapeRegex(promise)}\\s*</promise>`, "is")
|
const pattern = new RegExp(`<promise>\\s*${escapeRegex(promise)}\\s*</promise>`, "is")
|
||||||
const responseText = lastAssistant.parts
|
|
||||||
.filter((p) => p.type === "text")
|
|
||||||
.map((p) => p.text ?? "")
|
|
||||||
.join("\n")
|
|
||||||
|
|
||||||
return pattern.test(responseText)
|
const recentAssistants = assistantMessages.slice(-3)
|
||||||
|
for (const assistant of recentAssistants) {
|
||||||
|
if (!assistant.parts) continue
|
||||||
|
|
||||||
|
const responseText = assistant.parts
|
||||||
|
.filter((p) => p.type === "text" || p.type === "reasoning")
|
||||||
|
.map((p) => p.text ?? "")
|
||||||
|
.join("\n")
|
||||||
|
|
||||||
|
if (pattern.test(responseText)) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
log(`[${HOOK_NAME}] Session messages check failed`, { sessionID, error: String(err) })
|
setTimeout(() => {
|
||||||
|
log(`[${HOOK_NAME}] Session messages check failed`, {
|
||||||
|
sessionID,
|
||||||
|
error: String(err),
|
||||||
|
})
|
||||||
|
}, 0)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -343,7 +370,10 @@ export function createRalphLoopHook(
|
|||||||
let model: { providerID: string; modelID: string } | undefined
|
let model: { providerID: string; modelID: string } | undefined
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const messagesResp = await ctx.client.session.messages({ path: { id: sessionID } })
|
const messagesResp = await withTimeout(
|
||||||
|
ctx.client.session.messages({ path: { id: sessionID } }),
|
||||||
|
apiTimeout
|
||||||
|
)
|
||||||
const messages = (messagesResp.data ?? []) as Array<{
|
const messages = (messagesResp.data ?? []) as Array<{
|
||||||
info?: { agent?: string; model?: { providerID: string; modelID: string }; modelID?: string; providerID?: string }
|
info?: { agent?: string; model?: { providerID: string; modelID: string }; modelID?: string; providerID?: string }
|
||||||
}>
|
}>
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user