feat: add realistic kitchen scenarios
This commit is contained in:
parent
1e6474223f
commit
e98ab47d52
14
README.md
14
README.md
@ -32,6 +32,20 @@ kitchen explain the fixture
|
||||
|
||||
It also exposes provider and tool surfaces for live model routing:
|
||||
|
||||
- `listKitchenHumanScenarios()` and `runKitchenHumanScenario(runtime, id)`
|
||||
provide deterministic end-to-end user scenarios for fixture consumers:
|
||||
`dry.prefix-image`, `live.openai-text-kitchen-image`,
|
||||
`search.fetch.summarize`, `channel.prefix-image`, `hook.block-tool`, and
|
||||
`memory.compact-fixture`.
|
||||
- When a live text provider such as OpenAI is active and Kitchen Sink is
|
||||
selected as the image provider, the `live.openai-text-kitchen-image` scenario
|
||||
proves the human prompt can route to the Kitchen Sink image provider and
|
||||
return the bundled `kitchen_sink_office.png` asset without external image
|
||||
credentials.
|
||||
- The `hook.block-tool` scenario proves terminal `before_tool_call` blocking,
|
||||
and the contract probe script also checks the approval path and conversation
|
||||
privacy observations for `llm_input`, `llm_output`, and `agent_end`.
|
||||
|
||||
- `src/scenarios.js` is the shared deterministic fixture engine used by dry
|
||||
commands, tools, providers, hooks, channel delivery, and tests.
|
||||
- `kitchen_sink_image_job` returns a deterministic image job, waits 10 seconds
|
||||
|
||||
@ -201,7 +201,7 @@
|
||||
"messageId": "ks_channel_d813aa04",
|
||||
"conversationId": "kitchen-demo",
|
||||
"channelId": "kitchen-demo",
|
||||
"timestamp": 1777494231851,
|
||||
"timestamp": 1777494734068,
|
||||
"deliveryStatus": "sent",
|
||||
"transport": "kitchen-sink-local",
|
||||
"meta": {
|
||||
|
||||
@ -131,7 +131,13 @@ const imageProvider = findRegistration("registerImageGenerationProvider", "kitch
|
||||
assert.equal(imageProvider.defaultModel, "kitchen-sink-image-v1");
|
||||
|
||||
const sleeps = [];
|
||||
const { PLUGIN_ID, runKitchenImageTool, runKitchenScenario } = await import("../src/scenarios.js");
|
||||
const {
|
||||
PLUGIN_ID,
|
||||
listKitchenHumanScenarios,
|
||||
runKitchenHumanScenario,
|
||||
runKitchenImageTool,
|
||||
runKitchenScenario,
|
||||
} = await import("../src/scenarios.js");
|
||||
const { createKitchenSinkRuntime } = await import("../src/kitchen-runtime.js");
|
||||
const fastRuntime = createKitchenSinkRuntime({
|
||||
delayMs: 10_000,
|
||||
@ -172,8 +178,40 @@ assert.deepEqual(
|
||||
);
|
||||
assert.ok(imageResult.image.dataUrl.startsWith("data:image/png;base64,"));
|
||||
|
||||
const humanScenarios = listKitchenHumanScenarios();
|
||||
assert.deepEqual(
|
||||
humanScenarios.map((scenario) => scenario.id),
|
||||
[
|
||||
"dry.prefix-image",
|
||||
"live.openai-text-kitchen-image",
|
||||
"search.fetch.summarize",
|
||||
"channel.prefix-image",
|
||||
"hook.block-tool",
|
||||
"memory.compact-fixture",
|
||||
],
|
||||
);
|
||||
const liveImageScenario = await runKitchenHumanScenario(fastRuntime, "live.openai-text-kitchen-image");
|
||||
assert.equal(liveImageScenario.mode, "live-llm-compatible");
|
||||
assert.equal(liveImageScenario.result.route, "human:live-llm-image-provider");
|
||||
assert.equal(liveImageScenario.result.image.metadata.assetName, "kitchen_sink_office.png");
|
||||
const searchFetchScenario = await runKitchenHumanScenario(fastRuntime, "search.fetch.summarize");
|
||||
assert.equal(searchFetchScenario.result.search.results[0].id, "ks-result-image-provider");
|
||||
assert.equal(searchFetchScenario.result.fetch.finalUrl, "kitchen://fixture/readme");
|
||||
assert.match(searchFetchScenario.result.summary, /Kitchen Sink text fixture/);
|
||||
const channelScenario = await runKitchenHumanScenario(fastRuntime, "channel.prefix-image");
|
||||
assert.equal(channelScenario.result.delivery.channel, "kitchen-sink-channel");
|
||||
assert.equal(channelScenario.result.delivery.meta.scenarioId, "image.generate");
|
||||
const hookBlockScenario = await runKitchenHumanScenario(fastRuntime, "hook.block-tool");
|
||||
assert.equal(hookBlockScenario.result.block, true);
|
||||
assert.equal(hookBlockScenario.result.decision, "block");
|
||||
const memoryScenario = await runKitchenHumanScenario(fastRuntime, "memory.compact-fixture");
|
||||
assert.equal(memoryScenario.result.embedding.length, 8);
|
||||
assert.equal(memoryScenario.result.memory.results[0].id, "ks-memory-runtime-surfaces");
|
||||
assert.deepEqual(memoryScenario.result.compaction.preservedIdentifiers, ["ks_image_1f8a5a98"]);
|
||||
|
||||
sleeps.length = 0;
|
||||
const failedImageResult = await fastRuntime.runImageJob({ prompt: "kitchen rate limit image" });
|
||||
assert.deepEqual(sleeps, [10_000, 10_000]);
|
||||
assert.deepEqual(sleeps, [10_000]);
|
||||
assert.equal(failedImageResult.job.status, "failed");
|
||||
assert.deepEqual(
|
||||
failedImageResult.job.timeline.map((entry) => entry.status),
|
||||
@ -184,6 +222,7 @@ assert.equal(failedImageResult.error.statusCode, 429);
|
||||
assert.equal(failedImageResult.error.retryAfterMs, 30_000);
|
||||
|
||||
const failedToolResult = await runKitchenImageTool(fastRuntime, { prompt: "kitchen timeout image" });
|
||||
assert.deepEqual(sleeps, [10_000, 10_000]);
|
||||
assert.equal(failedToolResult.ok, false);
|
||||
assert.equal(failedToolResult.error.code, "timeout");
|
||||
assert.equal(failedToolResult.mediaUrl, undefined);
|
||||
|
||||
164
src/scenarios.js
164
src/scenarios.js
@ -44,6 +44,51 @@ const KITCHEN_IMAGE_FIXTURES = [
|
||||
},
|
||||
];
|
||||
|
||||
export const KITCHEN_HUMAN_SCENARIOS = Object.freeze([
|
||||
{
|
||||
id: "dry.prefix-image",
|
||||
prompt: "kitchen generate an image of the office sink fixture",
|
||||
mode: "dry",
|
||||
route: "prefix:kitchen",
|
||||
surfaces: ["command", "image-provider", "asset"],
|
||||
},
|
||||
{
|
||||
id: "live.openai-text-kitchen-image",
|
||||
prompt: "Generate an image with Kitchen Sink while OpenAI handles the text turn.",
|
||||
mode: "live-llm-compatible",
|
||||
route: "human:live-llm-image-provider",
|
||||
surfaces: ["text-provider-guidance", "image-provider", "tool-routing"],
|
||||
},
|
||||
{
|
||||
id: "search.fetch.summarize",
|
||||
prompt: "Search for Kitchen Sink provider routing and fetch the fixture README.",
|
||||
mode: "dry",
|
||||
route: "human:search-fetch-summary",
|
||||
surfaces: ["web-search", "web-fetch", "text-provider"],
|
||||
},
|
||||
{
|
||||
id: "channel.prefix-image",
|
||||
prompt: "kitchen generate an image in this channel",
|
||||
mode: "dry",
|
||||
route: "human:channel-prefix",
|
||||
surfaces: ["channel", "interactive-handler", "image-provider"],
|
||||
},
|
||||
{
|
||||
id: "hook.block-tool",
|
||||
prompt: "kitchen block image generation until the operator reviews it",
|
||||
mode: "dry",
|
||||
route: "human:hook-block",
|
||||
surfaces: ["before_tool_call", "terminal-block"],
|
||||
},
|
||||
{
|
||||
id: "memory.compact-fixture",
|
||||
prompt: "Remember the Kitchen Sink image job and compact this session.",
|
||||
mode: "dry",
|
||||
route: "human:memory-compaction",
|
||||
surfaces: ["memory-embedding", "memory-corpus", "compaction"],
|
||||
},
|
||||
]);
|
||||
|
||||
export function createKitchenScenarioRuntime(options = {}) {
|
||||
const runtime = {
|
||||
delayMs: normalizeDelayMs(options.delayMs),
|
||||
@ -71,6 +116,100 @@ export function createKitchenScenarioRuntime(options = {}) {
|
||||
return runtime;
|
||||
}
|
||||
|
||||
export function listKitchenHumanScenarios() {
|
||||
return KITCHEN_HUMAN_SCENARIOS.map((scenario) => ({ ...scenario, surfaces: [...scenario.surfaces] }));
|
||||
}
|
||||
|
||||
export async function runKitchenHumanScenario(runtime, idOrPrompt) {
|
||||
const scenario = resolveKitchenHumanScenario(idOrPrompt);
|
||||
if (scenario.id === "dry.prefix-image") {
|
||||
return {
|
||||
...scenario,
|
||||
result: await runKitchenCommand(runtime, scenario.prompt.replace(/^kitchen\s+/i, "")),
|
||||
};
|
||||
}
|
||||
if (scenario.id === "live.openai-text-kitchen-image") {
|
||||
return {
|
||||
...scenario,
|
||||
guidance: kitchenPromptGuidance(),
|
||||
result: await runtime.runScenario({
|
||||
scenario: "image.generate",
|
||||
prompt: scenario.prompt,
|
||||
route: scenario.route,
|
||||
}),
|
||||
};
|
||||
}
|
||||
if (scenario.id === "search.fetch.summarize") {
|
||||
const search = await runtime.runScenario({
|
||||
scenario: "web.search",
|
||||
prompt: scenario.prompt,
|
||||
route: scenario.route,
|
||||
});
|
||||
const fetch = await runtime.runScenario({
|
||||
scenario: "web.fetch",
|
||||
url: "kitchen://fixture/readme",
|
||||
route: scenario.route,
|
||||
});
|
||||
return {
|
||||
...scenario,
|
||||
result: {
|
||||
search,
|
||||
fetch,
|
||||
summary: kitchenTextResponse(`${search.answer} ${fetch.title}`),
|
||||
},
|
||||
};
|
||||
}
|
||||
if (scenario.id === "channel.prefix-image") {
|
||||
const command = await runKitchenCommand(runtime, scenario.prompt.replace(/^kitchen\s+/i, ""));
|
||||
return {
|
||||
...scenario,
|
||||
result: {
|
||||
command,
|
||||
delivery: createKitchenChannelDelivery({
|
||||
kind: "media",
|
||||
text: scenario.prompt,
|
||||
to: "kitchen demo",
|
||||
}),
|
||||
},
|
||||
};
|
||||
}
|
||||
if (scenario.id === "hook.block-tool") {
|
||||
return {
|
||||
...scenario,
|
||||
result: observeKitchenHook(
|
||||
"before_tool_call",
|
||||
{ toolId: "kitchen_sink_image_job", args: { prompt: scenario.prompt } },
|
||||
{ providerId: IMAGE_PROVIDER_ID },
|
||||
),
|
||||
};
|
||||
}
|
||||
if (scenario.id === "memory.compact-fixture") {
|
||||
const memory = createKitchenMemorySearch(scenario.prompt);
|
||||
const compaction = createKitchenCompaction({
|
||||
messages: [
|
||||
{ role: "user", content: scenario.prompt },
|
||||
{ role: "assistant", content: "Kitchen Sink image job ks_image_1f8a5a98 completed." },
|
||||
],
|
||||
});
|
||||
return {
|
||||
...scenario,
|
||||
result: {
|
||||
embedding: createKitchenEmbedding(scenario.prompt),
|
||||
memory,
|
||||
compaction,
|
||||
},
|
||||
};
|
||||
}
|
||||
return {
|
||||
...scenario,
|
||||
result: await runtime.runScenario({
|
||||
scenario: "text.reply",
|
||||
prompt: scenario.prompt,
|
||||
route: scenario.route,
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
export async function runKitchenScenario(runtime, request = {}) {
|
||||
const scenario = normalizeScenario(request.scenario);
|
||||
if (scenario === "image.generate") {
|
||||
@ -1048,6 +1187,31 @@ function readString(input, key) {
|
||||
return "";
|
||||
}
|
||||
|
||||
function resolveKitchenHumanScenario(idOrPrompt) {
|
||||
const text = String(idOrPrompt ?? "").trim();
|
||||
const exact = KITCHEN_HUMAN_SCENARIOS.find((scenario) => scenario.id === text);
|
||||
if (exact) {
|
||||
return exact;
|
||||
}
|
||||
const normalized = text.toLowerCase();
|
||||
if (/\bopenai\b/.test(normalized) && /\bimage\b/.test(normalized)) {
|
||||
return KITCHEN_HUMAN_SCENARIOS.find((scenario) => scenario.id === "live.openai-text-kitchen-image");
|
||||
}
|
||||
if (/\b(search|fetch|lookup|web)\b/.test(normalized)) {
|
||||
return KITCHEN_HUMAN_SCENARIOS.find((scenario) => scenario.id === "search.fetch.summarize");
|
||||
}
|
||||
if (/\bchannel|chat\b/.test(normalized)) {
|
||||
return KITCHEN_HUMAN_SCENARIOS.find((scenario) => scenario.id === "channel.prefix-image");
|
||||
}
|
||||
if (/\bblock|deny|approval\b/.test(normalized)) {
|
||||
return KITCHEN_HUMAN_SCENARIOS.find((scenario) => scenario.id === "hook.block-tool");
|
||||
}
|
||||
if (/\b(memory|compact|remember)\b/.test(normalized)) {
|
||||
return KITCHEN_HUMAN_SCENARIOS.find((scenario) => scenario.id === "memory.compact-fixture");
|
||||
}
|
||||
return KITCHEN_HUMAN_SCENARIOS.find((scenario) => scenario.id === "dry.prefix-image");
|
||||
}
|
||||
|
||||
function inferKitchenScenario({ providerId, text, toolId, url }) {
|
||||
const haystack = [providerId, text, toolId, url].filter(Boolean).join(" ").toLowerCase();
|
||||
if (toolId === "kitchen_sink_image_job" || providerId === IMAGE_PROVIDER_ID) {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user