From 46c2adf53880ffca973b982d8b5c1619b4e93eb6 Mon Sep 17 00:00:00 2001 From: "openclaw-docs-sync[bot]" Date: Mon, 27 Apr 2026 06:11:47 +0000 Subject: [PATCH] chore(sync): mirror docs from openclaw/openclaw@ca67762b8887a4c97cefe7d434b6d3b539d47242 --- .openclaw-sync/source.json | 4 ++-- docs/gateway/config-tools.md | 6 ++++++ docs/providers/ollama.md | 38 ++++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/.openclaw-sync/source.json b/.openclaw-sync/source.json index 3049f9b8c..1f2209aa8 100644 --- a/.openclaw-sync/source.json +++ b/.openclaw-sync/source.json @@ -1,5 +1,5 @@ { "repository": "openclaw/openclaw", - "sha": "2a6fab9d22a40b2fcd33a1c69f4dd69d1d83be66", - "syncedAt": "2026-04-27T05:58:51.720Z" + "sha": "ca67762b8887a4c97cefe7d434b6d3b539d47242", + "syncedAt": "2026-04-27T06:10:35.741Z" } diff --git a/docs/gateway/config-tools.md b/docs/gateway/config-tools.md index 8504d83ad..f0149cc73 100644 --- a/docs/gateway/config-tools.md +++ b/docs/gateway/config-tools.md @@ -215,6 +215,11 @@ Configures inbound media understanding (image/audio/video): { type: "cli", command: "whisper", args: ["--model", "base", "{{MediaPath}}"] }, ], }, + image: { + enabled: true, + timeoutSeconds: 180, + models: [{ provider: "ollama", model: "gemma4:26b", timeoutSeconds: 300 }], + }, video: { enabled: true, maxBytes: 52428800, @@ -242,6 +247,7 @@ Configures inbound media understanding (image/audio/video): - `capabilities`: optional list (`image`, `audio`, `video`). Defaults: `openai`/`anthropic`/`minimax` → image, `google` → image+audio+video, `groq` → audio. - `prompt`, `maxChars`, `maxBytes`, `timeoutSeconds`, `language`: per-entry overrides. + - `tools.media.image.timeoutSeconds` and matching image model `timeoutSeconds` entries also apply when the agent calls the explicit `image` tool. - Failures fall back to the next entry. Provider auth follows standard order: `auth-profiles.json` → env vars → `models.providers.*.apiKey`. diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md index ee5a81c35..693c56129 100644 --- a/docs/providers/ollama.md +++ b/docs/providers/ollama.md @@ -241,6 +241,44 @@ To make Ollama the default image-understanding model for inbound media, configur } ``` +Slow local vision models can need a longer image-understanding timeout than cloud models. They can also crash or stop when Ollama tries to allocate the full advertised vision context on constrained hardware. Set a capability timeout, and cap `num_ctx` on the model entry when you only need a normal image-description turn: + +```json5 +{ + models: { + providers: { + ollama: { + models: [ + { + id: "qwen2.5vl:7b", + name: "qwen2.5vl:7b", + input: ["text", "image"], + params: { num_ctx: 2048, keep_alive: "1m" }, + }, + ], + }, + }, + }, + tools: { + media: { + image: { + timeoutSeconds: 180, + models: [{ provider: "ollama", model: "qwen2.5vl:7b", timeoutSeconds: 300 }], + }, + }, + }, +} +``` + +This timeout applies to inbound image understanding and to the explicit `image` tool the agent can call during a turn. Provider-level `models.providers.ollama.timeoutSeconds` still controls the underlying Ollama HTTP request guard for normal model calls. + +Live-verify the explicit image tool against local Ollama with: + +```bash +OPENCLAW_LIVE_TEST=1 OPENCLAW_LIVE_OLLAMA_IMAGE=1 \ + pnpm test:live -- src/agents/tools/image-tool.ollama.live.test.ts +``` + If you define `models.providers.ollama.models` manually, mark vision models with image input support: ```json5