feat: add realistic kitchen scenarios

This commit is contained in:
Vincent Koc 2026-04-29 13:33:29 -07:00
parent 1e6474223f
commit e98ab47d52
No known key found for this signature in database
4 changed files with 220 additions and 3 deletions

View File

@ -32,6 +32,20 @@ kitchen explain the fixture
It also exposes provider and tool surfaces for live model routing:
- `listKitchenHumanScenarios()` and `runKitchenHumanScenario(runtime, id)`
provide deterministic end-to-end user scenarios for fixture consumers:
`dry.prefix-image`, `live.openai-text-kitchen-image`,
`search.fetch.summarize`, `channel.prefix-image`, `hook.block-tool`, and
`memory.compact-fixture`.
- When a live text provider such as OpenAI is active and Kitchen Sink is
selected as the image provider, the `live.openai-text-kitchen-image` scenario
proves the human prompt can route to the Kitchen Sink image provider and
return the bundled `kitchen_sink_office.png` asset without external image
credentials.
- The `hook.block-tool` scenario proves terminal `before_tool_call` blocking,
and the contract probe script also checks the approval path and conversation
privacy observations for `llm_input`, `llm_output`, and `agent_end`.
- `src/scenarios.js` is the shared deterministic fixture engine used by dry
commands, tools, providers, hooks, channel delivery, and tests.
- `kitchen_sink_image_job` returns a deterministic image job, waits 10 seconds

View File

@ -201,7 +201,7 @@
"messageId": "ks_channel_d813aa04",
"conversationId": "kitchen-demo",
"channelId": "kitchen-demo",
"timestamp": 1777494231851,
"timestamp": 1777494734068,
"deliveryStatus": "sent",
"transport": "kitchen-sink-local",
"meta": {

View File

@ -131,7 +131,13 @@ const imageProvider = findRegistration("registerImageGenerationProvider", "kitch
assert.equal(imageProvider.defaultModel, "kitchen-sink-image-v1");
const sleeps = [];
const { PLUGIN_ID, runKitchenImageTool, runKitchenScenario } = await import("../src/scenarios.js");
const {
PLUGIN_ID,
listKitchenHumanScenarios,
runKitchenHumanScenario,
runKitchenImageTool,
runKitchenScenario,
} = await import("../src/scenarios.js");
const { createKitchenSinkRuntime } = await import("../src/kitchen-runtime.js");
const fastRuntime = createKitchenSinkRuntime({
delayMs: 10_000,
@ -172,8 +178,40 @@ assert.deepEqual(
);
assert.ok(imageResult.image.dataUrl.startsWith("data:image/png;base64,"));
const humanScenarios = listKitchenHumanScenarios();
assert.deepEqual(
humanScenarios.map((scenario) => scenario.id),
[
"dry.prefix-image",
"live.openai-text-kitchen-image",
"search.fetch.summarize",
"channel.prefix-image",
"hook.block-tool",
"memory.compact-fixture",
],
);
const liveImageScenario = await runKitchenHumanScenario(fastRuntime, "live.openai-text-kitchen-image");
assert.equal(liveImageScenario.mode, "live-llm-compatible");
assert.equal(liveImageScenario.result.route, "human:live-llm-image-provider");
assert.equal(liveImageScenario.result.image.metadata.assetName, "kitchen_sink_office.png");
const searchFetchScenario = await runKitchenHumanScenario(fastRuntime, "search.fetch.summarize");
assert.equal(searchFetchScenario.result.search.results[0].id, "ks-result-image-provider");
assert.equal(searchFetchScenario.result.fetch.finalUrl, "kitchen://fixture/readme");
assert.match(searchFetchScenario.result.summary, /Kitchen Sink text fixture/);
const channelScenario = await runKitchenHumanScenario(fastRuntime, "channel.prefix-image");
assert.equal(channelScenario.result.delivery.channel, "kitchen-sink-channel");
assert.equal(channelScenario.result.delivery.meta.scenarioId, "image.generate");
const hookBlockScenario = await runKitchenHumanScenario(fastRuntime, "hook.block-tool");
assert.equal(hookBlockScenario.result.block, true);
assert.equal(hookBlockScenario.result.decision, "block");
const memoryScenario = await runKitchenHumanScenario(fastRuntime, "memory.compact-fixture");
assert.equal(memoryScenario.result.embedding.length, 8);
assert.equal(memoryScenario.result.memory.results[0].id, "ks-memory-runtime-surfaces");
assert.deepEqual(memoryScenario.result.compaction.preservedIdentifiers, ["ks_image_1f8a5a98"]);
sleeps.length = 0;
const failedImageResult = await fastRuntime.runImageJob({ prompt: "kitchen rate limit image" });
assert.deepEqual(sleeps, [10_000, 10_000]);
assert.deepEqual(sleeps, [10_000]);
assert.equal(failedImageResult.job.status, "failed");
assert.deepEqual(
failedImageResult.job.timeline.map((entry) => entry.status),
@ -184,6 +222,7 @@ assert.equal(failedImageResult.error.statusCode, 429);
assert.equal(failedImageResult.error.retryAfterMs, 30_000);
const failedToolResult = await runKitchenImageTool(fastRuntime, { prompt: "kitchen timeout image" });
assert.deepEqual(sleeps, [10_000, 10_000]);
assert.equal(failedToolResult.ok, false);
assert.equal(failedToolResult.error.code, "timeout");
assert.equal(failedToolResult.mediaUrl, undefined);

View File

@ -44,6 +44,51 @@ const KITCHEN_IMAGE_FIXTURES = [
},
];
export const KITCHEN_HUMAN_SCENARIOS = Object.freeze([
{
id: "dry.prefix-image",
prompt: "kitchen generate an image of the office sink fixture",
mode: "dry",
route: "prefix:kitchen",
surfaces: ["command", "image-provider", "asset"],
},
{
id: "live.openai-text-kitchen-image",
prompt: "Generate an image with Kitchen Sink while OpenAI handles the text turn.",
mode: "live-llm-compatible",
route: "human:live-llm-image-provider",
surfaces: ["text-provider-guidance", "image-provider", "tool-routing"],
},
{
id: "search.fetch.summarize",
prompt: "Search for Kitchen Sink provider routing and fetch the fixture README.",
mode: "dry",
route: "human:search-fetch-summary",
surfaces: ["web-search", "web-fetch", "text-provider"],
},
{
id: "channel.prefix-image",
prompt: "kitchen generate an image in this channel",
mode: "dry",
route: "human:channel-prefix",
surfaces: ["channel", "interactive-handler", "image-provider"],
},
{
id: "hook.block-tool",
prompt: "kitchen block image generation until the operator reviews it",
mode: "dry",
route: "human:hook-block",
surfaces: ["before_tool_call", "terminal-block"],
},
{
id: "memory.compact-fixture",
prompt: "Remember the Kitchen Sink image job and compact this session.",
mode: "dry",
route: "human:memory-compaction",
surfaces: ["memory-embedding", "memory-corpus", "compaction"],
},
]);
export function createKitchenScenarioRuntime(options = {}) {
const runtime = {
delayMs: normalizeDelayMs(options.delayMs),
@ -71,6 +116,100 @@ export function createKitchenScenarioRuntime(options = {}) {
return runtime;
}
export function listKitchenHumanScenarios() {
return KITCHEN_HUMAN_SCENARIOS.map((scenario) => ({ ...scenario, surfaces: [...scenario.surfaces] }));
}
export async function runKitchenHumanScenario(runtime, idOrPrompt) {
const scenario = resolveKitchenHumanScenario(idOrPrompt);
if (scenario.id === "dry.prefix-image") {
return {
...scenario,
result: await runKitchenCommand(runtime, scenario.prompt.replace(/^kitchen\s+/i, "")),
};
}
if (scenario.id === "live.openai-text-kitchen-image") {
return {
...scenario,
guidance: kitchenPromptGuidance(),
result: await runtime.runScenario({
scenario: "image.generate",
prompt: scenario.prompt,
route: scenario.route,
}),
};
}
if (scenario.id === "search.fetch.summarize") {
const search = await runtime.runScenario({
scenario: "web.search",
prompt: scenario.prompt,
route: scenario.route,
});
const fetch = await runtime.runScenario({
scenario: "web.fetch",
url: "kitchen://fixture/readme",
route: scenario.route,
});
return {
...scenario,
result: {
search,
fetch,
summary: kitchenTextResponse(`${search.answer} ${fetch.title}`),
},
};
}
if (scenario.id === "channel.prefix-image") {
const command = await runKitchenCommand(runtime, scenario.prompt.replace(/^kitchen\s+/i, ""));
return {
...scenario,
result: {
command,
delivery: createKitchenChannelDelivery({
kind: "media",
text: scenario.prompt,
to: "kitchen demo",
}),
},
};
}
if (scenario.id === "hook.block-tool") {
return {
...scenario,
result: observeKitchenHook(
"before_tool_call",
{ toolId: "kitchen_sink_image_job", args: { prompt: scenario.prompt } },
{ providerId: IMAGE_PROVIDER_ID },
),
};
}
if (scenario.id === "memory.compact-fixture") {
const memory = createKitchenMemorySearch(scenario.prompt);
const compaction = createKitchenCompaction({
messages: [
{ role: "user", content: scenario.prompt },
{ role: "assistant", content: "Kitchen Sink image job ks_image_1f8a5a98 completed." },
],
});
return {
...scenario,
result: {
embedding: createKitchenEmbedding(scenario.prompt),
memory,
compaction,
},
};
}
return {
...scenario,
result: await runtime.runScenario({
scenario: "text.reply",
prompt: scenario.prompt,
route: scenario.route,
}),
};
}
export async function runKitchenScenario(runtime, request = {}) {
const scenario = normalizeScenario(request.scenario);
if (scenario === "image.generate") {
@ -1048,6 +1187,31 @@ function readString(input, key) {
return "";
}
function resolveKitchenHumanScenario(idOrPrompt) {
const text = String(idOrPrompt ?? "").trim();
const exact = KITCHEN_HUMAN_SCENARIOS.find((scenario) => scenario.id === text);
if (exact) {
return exact;
}
const normalized = text.toLowerCase();
if (/\bopenai\b/.test(normalized) && /\bimage\b/.test(normalized)) {
return KITCHEN_HUMAN_SCENARIOS.find((scenario) => scenario.id === "live.openai-text-kitchen-image");
}
if (/\b(search|fetch|lookup|web)\b/.test(normalized)) {
return KITCHEN_HUMAN_SCENARIOS.find((scenario) => scenario.id === "search.fetch.summarize");
}
if (/\bchannel|chat\b/.test(normalized)) {
return KITCHEN_HUMAN_SCENARIOS.find((scenario) => scenario.id === "channel.prefix-image");
}
if (/\bblock|deny|approval\b/.test(normalized)) {
return KITCHEN_HUMAN_SCENARIOS.find((scenario) => scenario.id === "hook.block-tool");
}
if (/\b(memory|compact|remember)\b/.test(normalized)) {
return KITCHEN_HUMAN_SCENARIOS.find((scenario) => scenario.id === "memory.compact-fixture");
}
return KITCHEN_HUMAN_SCENARIOS.find((scenario) => scenario.id === "dry.prefix-image");
}
function inferKitchenScenario({ providerId, text, toolId, url }) {
const haystack = [providerId, text, toolId, url].filter(Boolean).join(" ").toLowerCase();
if (toolId === "kitchen_sink_image_job" || providerId === IMAGE_PROVIDER_ID) {