diff --git a/records/openclaw-openclaw/items/79555.md b/records/openclaw-openclaw/items/79555.md index 6dfa5d36d3..d39159da29 100644 --- a/records/openclaw-openclaw/items/79555.md +++ b/records/openclaw-openclaw/items/79555.md @@ -1,5 +1,5 @@ --- -review_comment_synced_at: 2026-05-08T23:14:47.342Z +review_comment_synced_at: 2026-05-08T23:31:10.094Z number: 79555 repository: openclaw/openclaw type: pull_request @@ -7,12 +7,12 @@ title: "fix(xai): context-aware thinking profile for reasoning-capable models" url: https://github.com/openclaw/openclaw/pull/79555 state_at_review: open item_created_at: 2026-05-08T23:07:25Z -item_updated_at: 2026-05-08T23:08:09Z +item_updated_at: 2026-05-08T23:24:12Z author: colinmcintosh author_association: NONE labels: ["size: S","triage: needs-real-behavior-proof"] -reviewed_at: 2026-05-08T23:14:05.250Z -main_sha: ff4d5541a2ad104e0fc9c7c52b452ab4d5d8e17f +reviewed_at: 2026-05-08T23:30:31.929Z +main_sha: b6423fbd323cc1ecfc8ef452b7d862a5ec0685f8 pull_head_sha: 3191edb404585b856fb1e6af89ada027990ca12f latest_release: v2026.5.7 latest_release_sha: eeef4864494f859838fec1586bedbab1f8fa5702 @@ -31,19 +31,19 @@ review_model: gpt-5.5 review_reasoning_effort: high review_sandbox: danger-full-access review_service_tier: default -review_prompt_chars: 48268 +review_prompt_chars: 58997 review_static_prompt_chars: 33412 -review_context_chars: 13608 +review_context_chars: 24337 review_schema_chars: 14081 review_additional_prompt_chars: 0 -review_context_elapsed_ms: 3789 -review_codex_elapsed_ms: 197482 +review_context_elapsed_ms: 3028 +review_codex_elapsed_ms: 197517 review_mode: propose review_status: complete local_checkout_access: verified -item_snapshot_hash: 845ebb8ea37bda1ff9d4ef054e4e2f33cb26209e6e83c39424e003e9205952ca +item_snapshot_hash: d766843d8f834fb5d10e176a2cbf0c072cae6d4ec4791692c5dcc177b369af3e close_comment_sha256: none -review_comment_sha256: 5307937b79d910fa3fbd671140bcd2465de3c8f05338a25346588e9cf624db92 +review_comment_sha256: b369f921981b898c9dfe6bdb8c0a3ee00c97985917edd144d09183f32fb29ff8 review_comment_id: 4410507930 review_comment_url: https://github.com/openclaw/openclaw/pull/79555#issuecomment-4410507930 decision: keep_open @@ -52,21 +52,21 @@ confidence: high action_taken: kept_open work_candidate: manual_review work_confidence: high -work_priority: high +work_priority: medium work_status: manual_review -work_reason_sha256: 6eddad7b274b20b7690060db533b11dfd9f2313c44d7bc66636b4c32161cb22c +work_reason_sha256: 2026923246ad87e708ea1d002a73b8a161b4ce8bf052a24ebf5469f8542bbc55 work_prompt_sha256: none -work_cluster_refs: ["https://github.com/openclaw/openclaw/pull/79555","https://github.com/openclaw/openclaw/issues/79210"] +work_cluster_refs: ["https://github.com/openclaw/openclaw/issues/79210","https://github.com/openclaw/openclaw/pull/79555"] work_validation: ["pnpm test extensions/xai/stream.test.ts extensions/xai/index.test.ts extensions/xai/provider-policy-api.test.ts","pnpm test src/auto-reply/thinking.test.ts src/agents/openai-transport-stream.test.ts","OPENCLAW_LIVE_TEST=1 OPENCLAW_LIVE_TEST_QUIET=1 pnpm test:live -- extensions/xai/xai.live.test.ts"] -work_likely_files: ["extensions/xai/provider-policy-api.ts","extensions/xai/index.ts","extensions/xai/stream.ts","extensions/xai/runtime-model-compat.ts","extensions/xai/stream.test.ts","extensions/xai/xai.live.test.ts","CHANGELOG.md"] +work_likely_files: ["extensions/xai/provider-policy-api.ts","extensions/xai/index.ts","extensions/xai/runtime-model-compat.ts","extensions/xai/stream.ts","extensions/xai/stream.test.ts","extensions/xai/index.test.ts","extensions/xai/provider-policy-api.test.ts"] item_category: feature reproduction_status: source_reproducible reproduction_confidence: high requires_new_feature: true requires_new_config_option: false requires_product_decision: false -real_behavior_proof_status: mock_only -real_behavior_proof_evidence_kind: none +real_behavior_proof_status: insufficient +real_behavior_proof_evidence_kind: live_output real_behavior_proof_needs_contributor_action: true --- @@ -84,9 +84,9 @@ Labels: size: S, triage: needs-real-behavior-proof Created at: May 8, 2026, 23:07 UTC -Updated at: May 8, 2026, 23:08 UTC +Updated at: May 8, 2026, 23:24 UTC -Reviewed against: [ff4d5541a2ad](https://github.com/openclaw/openclaw/commit/ff4d5541a2ad104e0fc9c7c52b452ab4d5d8e17f) +Reviewed against: [b6423fbd323c](https://github.com/openclaw/openclaw/commit/b6423fbd323cc1ecfc8ef452b7d862a5ec0685f8) Codex review: model gpt-5.5, reasoning high @@ -104,23 +104,23 @@ Action taken: kept_open ## Summary -Keep open: this PR exposes xAI thinking levels, but current main still strips every xAI reasoning control before dispatch, so it does not actually implement the linked Grok reasoning-effort feature. +Keep open: this PR is an incomplete fix for the linked Grok reasoning-effort request because it advertises selectable xAI thinking levels while the current xAI transport still strips those controls before dispatch, and the PR proof does not show real runtime behavior. ## What This Changes -This PR replaces the xAI off-only thinking profile with a context-aware resolver and adds provider-policy unit tests. +The PR changes the xAI provider policy to advertise off/low/medium/high thinking levels for reasoning-capable xAI models, wires that resolver into the plugin entry, and adds provider-policy tests. ## Best Possible Solution -Update the xAI provider profile, runtime compat, and payload wrapper together so only supported Grok routes expose and forward the documented reasoning effort, while keeping unsupported xAI routes off-only. +Update the xAI provider profile, runtime compat mapping, and stream payload wrapper together so supported Grok routes expose and forward the documented reasoning effort while unsupported xAI routes remain off-only. ## Reproduction Assessment -Yes, source inspection gives a high-confidence path: this PR advertises `low/medium/high`, but current xAI stream code still deletes all reasoning-effort fields before dispatch. I did not see after-fix real xAI runtime proof in the PR. +Yes. Source inspection gives a high-confidence path: this PR advertises xAI low/medium/high, but current main still deletes all xAI reasoning-effort fields and maps xAI thinking levels to null before dispatch. ## Solution Assessment -No. The maintainable fix needs both the provider-owned thinking profile and the xAI transport/compat path updated, with regression coverage and real xAI proof; changing only the profile leaves the requested behavior ineffective. +No. The policy-only change is not the best fix because it exposes controls without making them affect runtime requests; the maintainable fix needs provider policy and transport support changed together with regression and real xAI proof. ## Review Findings @@ -131,14 +131,14 @@ Overall confidence: 0.92 Full review comments: - **[P2] Forward xAI reasoning controls before exposing levels:** `extensions/xai/index.ts:204` - - body: This now advertises `low`, `medium`, and `high` for reasoning-capable xAI models, but the xAI stream wrapper still deletes `reasoning`, `reasoningEffort`, and `reasoning_effort`, and runtime compat maps every xAI thinking level to `null`. Users would see/select these levels while the request sent to xAI remains unchanged, so the linked Grok reasoning-effort feature is not actually fixed. + - body: This advertises low, medium, and high for reasoning-capable xAI models, but the xAI stream wrapper still deletes reasoning, reasoningEffort, and reasoning_effort, and runtime compat maps every xAI thinking level to null. Users can select these levels while requests sent to xAI remain unchanged, so the linked Grok reasoning-effort behavior is not fixed. - confidence: 0.93 ## Security Review Status: cleared -Summary: The diff only changes xAI provider policy code and unit tests; I found no concrete security or supply-chain regression. +Summary: Cleared: the diff only changes xAI provider policy wiring and tests, with no new dependency, workflow, secret, package, or code-execution surface. Concerns: @@ -146,13 +146,13 @@ Concerns: ## Real Behavior Proof -Status: mock_only +Status: insufficient -Evidence kind: none +Evidence kind: live_output Needs contributor action: true -Summary: The PR body only provides unit-test proof; it needs after-fix real xAI runtime evidence such as redacted terminal output, logs, linked artifacts, or diagnostic screenshots/recordings before merge. +Summary: The PR body provides standalone resolver output and explicitly says no live Gateway/xAI run was tested; redacted terminal output, logs, artifacts, or diagnostic screenshots/recordings of the real runtime path are still needed, and updating the PR body should trigger re-review or a maintainer can comment @clawsweeper re-review. ## Work Candidate @@ -160,26 +160,26 @@ Candidate: manual_review Confidence: high -Priority: high +Priority: medium Status: manual_review -Reason: Needs contributor and maintainer follow-up: the missing real-behavior proof cannot be supplied by an automated repair lane, and the code fix must be validated against the xAI runtime path. +Reason: Needs contributor real-behavior proof and maintainer follow-up on whether to broaden this PR to the transport path or replace it with a complete xAI reasoning-effort fix. Cluster refs: -- https://github.com/openclaw/openclaw/pull/79555 - https://github.com/openclaw/openclaw/issues/79210 +- https://github.com/openclaw/openclaw/pull/79555 Likely files: - extensions/xai/provider-policy-api.ts - extensions/xai/index.ts -- extensions/xai/stream.ts - extensions/xai/runtime-model-compat.ts +- extensions/xai/stream.ts - extensions/xai/stream.test.ts -- extensions/xai/xai.live.test.ts -- CHANGELOG.md +- extensions/xai/index.test.ts +- extensions/xai/provider-policy-api.test.ts Validation: @@ -189,50 +189,50 @@ Validation: ## Evidence -- **PR diff advertises xAI thinking levels:** The branch wires `resolveThinkingProfile(ctx)` into the xAI provider and returns `off/low/medium/high` when the catalog marks the model as reasoning-capable. +- **PR diff advertises xAI thinking levels:** The branch replaces the off-only xAI profile hook with resolveThinkingProfile(ctx), which can return off/low/medium/high for reasoning-capable models. - file: [extensions/xai/index.ts:204](https://github.com/openclaw/openclaw/blob/3191edb404585b856fb1e6af89ada027990ca12f/extensions/xai/index.ts#L204) - - command: `curl -fsSL https://github.com/openclaw/openclaw/pull/79555.diff | sed -n '1,240p'` + - command: `curl -fsSL https://api.github.com/repos/openclaw/openclaw/pulls/79555/files` - sha: [3191edb40458](https://github.com/openclaw/openclaw/commit/3191edb404585b856fb1e6af89ada027990ca12f) -- **xAI stream still strips reasoning controls:** Current main deletes `reasoning`, `reasoningEffort`, and `reasoning_effort` from xAI payloads before the request is sent. - - file: [extensions/xai/stream.ts:173](https://github.com/openclaw/openclaw/blob/ff4d5541a2ad104e0fc9c7c52b452ab4d5d8e17f/extensions/xai/stream.ts#L173) - - command: `nl -ba extensions/xai/stream.ts | sed -n '130,200p'` - - sha: [ff4d5541a2ad](https://github.com/openclaw/openclaw/commit/ff4d5541a2ad104e0fc9c7c52b452ab4d5d8e17f) -- **Runtime compat disables xAI thinking levels:** Current main maps every xAI thinking level to `null`, preserving the intentional off-only transport behavior. - - file: [extensions/xai/runtime-model-compat.ts:10](https://github.com/openclaw/openclaw/blob/ff4d5541a2ad104e0fc9c7c52b452ab4d5d8e17f/extensions/xai/runtime-model-compat.ts#L10) - - command: `nl -ba extensions/xai/runtime-model-compat.ts | sed -n '1,80p'` - - sha: [ff4d5541a2ad](https://github.com/openclaw/openclaw/commit/ff4d5541a2ad104e0fc9c7c52b452ab4d5d8e17f) -- **Changelog explains current behavior:** The active changelog says xAI reasoning controls were stopped and the xAI thinking profile was clamped to off after live `Invalid reasoning effort` failures. - - file: [CHANGELOG.md:326](https://github.com/openclaw/openclaw/blob/ff4d5541a2ad104e0fc9c7c52b452ab4d5d8e17f/CHANGELOG.md#L326) - - command: `nl -ba CHANGELOG.md | sed -n '318,332p'` - - sha: [ff4d5541a2ad](https://github.com/openclaw/openclaw/commit/ff4d5541a2ad104e0fc9c7c52b452ab4d5d8e17f) -- **Linked request remains broader than the patch:** The linked feature request asks to let users set Grok 4.3 `reasoning_effort`; this PR only changes the advertised profile and unit tests, not the outgoing payload behavior. - - command: `curl -fsSL https://api.github.com/repos/openclaw/openclaw/issues/79210 | sed -n '1,180p'` -- **Real behavior proof is test-only:** The PR body lists `pnpm test:extension xai` as proof; no live xAI request, runtime log, terminal output, screenshot with diagnostics, or artifact shows the changed behavior reaching xAI. - - command: `curl -fsSL https://api.github.com/repos/openclaw/openclaw/pulls/79555 | sed -n '1,220p'` +- **xAI stream still strips reasoning controls:** Current main deletes reasoning, reasoningEffort, and reasoning_effort from outgoing xAI payloads before the request reaches the provider. + - file: [extensions/xai/stream.ts:173](https://github.com/openclaw/openclaw/blob/b6423fbd323cc1ecfc8ef452b7d862a5ec0685f8/extensions/xai/stream.ts#L173) + - command: `nl -ba extensions/xai/stream.ts | sed -n '140,205p'` + - sha: [b6423fbd323c](https://github.com/openclaw/openclaw/commit/b6423fbd323cc1ecfc8ef452b7d862a5ec0685f8) +- **Runtime compat disables xAI thinking levels:** Current main maps every xAI thinking level to null, preserving the off-only transport behavior even if the UI/profile advertises more levels. + - file: [extensions/xai/runtime-model-compat.ts:10](https://github.com/openclaw/openclaw/blob/b6423fbd323cc1ecfc8ef452b7d862a5ec0685f8/extensions/xai/runtime-model-compat.ts#L10) + - command: `nl -ba extensions/xai/runtime-model-compat.ts | sed -n '1,160p'` + - sha: [b6423fbd323c](https://github.com/openclaw/openclaw/commit/b6423fbd323cc1ecfc8ef452b7d862a5ec0685f8) +- **Changelog records intentional current behavior:** The active changelog says OpenClaw stopped sending xAI reasoning controls and clamped the xAI thinking profile to off after live invalid-effort failures. + - file: [CHANGELOG.md:326](https://github.com/openclaw/openclaw/blob/b6423fbd323cc1ecfc8ef452b7d862a5ec0685f8/CHANGELOG.md#L326) + - command: `nl -ba CHANGELOG.md | sed -n '300,345p'` + - sha: [b6423fbd323c](https://github.com/openclaw/openclaw/commit/b6423fbd323cc1ecfc8ef452b7d862a5ec0685f8) +- **Contributor acknowledged transport gap:** The author confirmed in discussion that the stream wrapper deletes reasoning fields and runtime compat maps levels to null, leaving this PR intentionally policy-only. + - command: `curl -fsSL https://api.github.com/repos/openclaw/openclaw/issues/79555/comments` +- **Real behavior proof is insufficient:** The PR body provides standalone resolver output and explicitly says no live Gateway runtime with an xAI API key was tested. + - command: `curl -fsSL https://api.github.com/repos/openclaw/openclaw/pulls/79555` - sha: [3191edb40458](https://github.com/openclaw/openclaw/commit/3191edb404585b856fb1e6af89ada027990ca12f) ## Likely Related People -- **steipete:** recent maintainer and current behavior introducer - - reason: Git blame and file history point the current bundled xAI plugin, off-only thinking profile, payload stripping, and catalog/runtime compat surface to the current-main xAI plugin import commit. +- **steipete:** recent maintainer and current xAI behavior owner + - reason: GitHub path history shows recent xAI commits that clamp/omit reasoning efforts and maintain the xAI plugin surface, including commits affecting provider policy, runtime compat, and adjacent xAI provider wiring. - confidence: high - - commits: 1c8e58b4ff8e40dd09411f6053197e3cd9cbac90 - - files: extensions/xai/index.ts, extensions/xai/provider-policy-api.ts, extensions/xai/stream.ts, extensions/xai/runtime-model-compat.ts, extensions/xai/model-definitions.ts + - commits: e9987ffc3aa0928486ce541952ce47713f6cad66, ea26a9dba066344ac53fc190e4a0ed5ec6234f03, 6f6b8fc4650c6b94d0b6e85a0ebe826f01c1185c, a8907d80ddaced42d13808803574fcd26d1679a0 + - files: extensions/xai/provider-policy-api.ts, extensions/xai/runtime-model-compat.ts, extensions/xai/index.ts, extensions/xai/stream.ts - **huntharo:** recent adjacent owner - - reason: Recent xAI plugin migration and code-execution wiring commits touched the same provider/plugin boundary that owns xAI runtime behavior. + - reason: GitHub path history shows xAI Responses-default and unsupported reasoning-parameter commits in the same transport/runtime area, with Peter merging those changes into main. - confidence: medium - - commits: b7ab0ddb55990dc5ba6e2e9ee72a46672ac3f445, 216796f1e3e2f5d06a01047bbe8e688869e8b972, 2765fdc2ddd56cda9068749093b2d4d8c1eaa9a4 - - files: extensions/xai/index.ts, extensions/xai/code-execution.ts, extensions/xai/provider-models.ts -- **George Pickett:** original feature contributor - - reason: The older history shows the original xAI Grok provider support landing under this area, so they are relevant background for provider behavior but not the current patch owner. + - commits: fd748171b8019d109f11a14669b0681fdd128a5c, 5ed8ee6832d1157f7b22b6da0401976f81b2d1a1, 0bd0097557d6fe53bb444fcb9cc6e87d501676ef + - files: extensions/xai/stream.ts, extensions/xai/index.ts +- **vincentkoc:** provider runtime refactor owner + - reason: Recent provider-hook and xAI SDK import work touched the shared plugin/provider boundary that this PR uses, though not the specific xAI reasoning policy decision. - confidence: low - - commits: db31c0ccca922704185061aac89123c52970ea64 - - files: extensions/xai, src + - commits: 20d14745cfd1f3e908e7b098f3b9970aa1c31e9c, 9c42e6424d2505f99484b1ac4b69ed2b4e0d7201 + - files: src/plugins/provider-thinking.ts, extensions/xai/stream.ts ## Risks / Open Questions -- The patch may re-expose UI/session controls that silently have no effect because xAI payload shaping still removes the transport fields. -- A correct change needs live or otherwise real xAI runtime proof to avoid reintroducing the recently documented `Invalid reasoning effort` failures. +- Users could see and select xAI low/medium/high thinking levels while outgoing requests remain unchanged. +- A correct transport change needs real xAI runtime proof to avoid reintroducing the recently documented invalid-effort failures. ## Close Comment @@ -240,19 +240,19 @@ _No close comment posted._ ## GitHub Snapshot -- comments: 2 -- timeline events: 7 +- comments: 4 +- timeline events: 9 - related items: 1 - PR files: 3 - PR commits: 1 ## Review Telemetry -- prompt chars: 48268 +- prompt chars: 58997 - static prompt chars: 33412 -- context chars: 13608 +- context chars: 24337 - schema chars: 14081 - additional prompt chars: 0 -- context collection ms: 3789 -- Codex review ms: 197482 +- context collection ms: 3028 +- Codex review ms: 197517 \ No newline at end of file