diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0abe8dd..88c2539 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,7 +52,8 @@ jobs: - name: Run Tests (Unit Tests Only) run: | if [[ "${{ runner.os }}" == "Linux" ]]; then - swift test --filter TachikomaTests --skip "OpenAIAudioProviderTests" --skip "ProviderEndToEndTests" + # Several test suites mutate process-wide env/profile state. + swift test --no-parallel --filter TachikomaTests --skip "OpenAIAudioProviderTests" --skip "ProviderEndToEndTests" else swift test --filter TachikomaTests fi diff --git a/.github/workflows/cross-platform.yml b/.github/workflows/cross-platform.yml index 3987af7..8e80fda 100644 --- a/.github/workflows/cross-platform.yml +++ b/.github/workflows/cross-platform.yml @@ -48,7 +48,8 @@ jobs: export OPENAI_API_KEY="${OPENAI_API_KEY:-test-key}" export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-test-key}" SKIP_FLAGS="--skip ProviderEndToEndTests" - swift test $SKIP_FLAGS + # Several test suites mutate process-wide env/profile state. + swift test --no-parallel $SKIP_FLAGS test-linux-ubuntu-24: runs-on: ubuntu-24.04 @@ -76,7 +77,8 @@ jobs: export OPENAI_API_KEY="${OPENAI_API_KEY:-test-key}" export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-test-key}" SKIP_FLAGS="--skip ProviderEndToEndTests" - swift test $SKIP_FLAGS + # Several test suites mutate process-wide env/profile state. + swift test --no-parallel $SKIP_FLAGS # Optional: Build release artifacts build-release: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7271f66..87c3ceb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -105,7 +105,8 @@ jobs: echo "OPENAI_API_KEY missing; skipping OpenAIAudioProviderTests" SKIP_FLAGS="$SKIP_FLAGS --skip OpenAIAudioProviderTests" fi - swift test --verbose $SKIP_FLAGS + # Several test suites mutate process-wide env/profile state. + swift test --no-parallel --verbose $SKIP_FLAGS env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} diff --git a/.swiftlint.yml b/.swiftlint.yml index ca59682..ab8c91b 100644 --- a/.swiftlint.yml +++ b/.swiftlint.yml @@ -107,8 +107,8 @@ disabled_rules: # Rule configurations file_length: - warning: 1000 - error: 2000 + warning: 2000 + error: 2500 ignore_comment_only_lines: true function_parameter_count: @@ -137,8 +137,8 @@ trailing_comma: mandatory_comma: true type_body_length: - warning: 800 - error: 1200 + warning: 1800 + error: 2200 type_name: min_length: diff --git a/CHANGELOG.md b/CHANGELOG.md index 792052a..0e31b0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,8 @@ All notable changes to the Tachikoma project will be documented in this file. - Added explicit LM Studio model shortcuts such as `lmstudio` and `lmstudio/openai/gpt-oss-120b` so local provider selections no longer fall through to Ollama custom IDs. ### Changed -- Refreshed the first-class model catalog to current provider IDs: OpenAI GPT-5.5/5.4, Claude Opus 4.7/Sonnet 4.6/Haiku 4.5, Gemini 3.1, Mistral latest aliases, Groq current production IDs, and xAI Grok 4.3/4.20. +- Refreshed the first-class model catalog to current provider IDs: OpenAI GPT-5.5/5.4, Claude Fable 5/Opus 4.8/Opus 4.7/Sonnet 4.6/Haiku 4.5, Gemini 3.1, Mistral latest aliases, Groq current production IDs, and xAI Grok 4.3/4.20. +- Added explicit `claude-fable-5` support with 1M context, 128K max output, signed-thinking replay, refusal handling, and non-streaming generation; `LanguageModel.default` remains `claude-opus-4-8`, while `LanguageModel.defaultStreaming` now uses streaming-safe `gpt-5.5`. - Removed stale direct model support for retired or non-canonical IDs including GPT-5.1/5.2/pseudo-thinking models, deprecated Claude Sonnet/Opus 4 snapshots, Grok 2/3/4-fast rows, old Groq Llama/Mixtral/Gemma aliases, stale Mistral aliases, and invalid LM Studio `current`. ### Fixed diff --git a/README.md b/README.md index e2afdb9..52dd039 100644 --- a/README.md +++ b/README.md @@ -100,8 +100,8 @@ print(result.text) ## Models Common picks: -- Anthropic: `claude-opus-4-5` (`LanguageModel.default`) -- OpenAI: `gpt-5.5` (flagship), `gpt-5.4` / `gpt-5.4-mini` / `gpt-5.4-nano`, `gpt-5` +- Anthropic: `claude-opus-4-8` (`LanguageModel.default`, non-streaming), `claude-fable-5` (explicit opt-in) +- OpenAI: `gpt-5.5` (`LanguageModel.defaultStreaming`), `gpt-5.4` / `gpt-5.4-mini` / `gpt-5.4-nano`, `gpt-5` - Google: `gemini-3.1-pro-preview`, `gemini-3-flash` - Grok: `grok-4.3` - Local: `ollama/llama3.3` diff --git a/Sources/Tachikoma/Core/AnthropicMessageConversion.swift b/Sources/Tachikoma/Core/AnthropicMessageConversion.swift index 76aea38..ee03907 100644 --- a/Sources/Tachikoma/Core/AnthropicMessageConversion.swift +++ b/Sources/Tachikoma/Core/AnthropicMessageConversion.swift @@ -1,18 +1,56 @@ import Foundation +struct AnthropicReasoningReplayTarget { + let provider: String + let modelId: String + let endpointIdentity: String? + let allowsLegacyUnknown: Bool + + func matches(_ customData: [String: String]) -> Bool { + guard customData["tachikoma.reasoning.provider"] == self.provider else { + return false + } + guard customData["tachikoma.reasoning.model"] == self.modelId else { + return false + } + return customData["tachikoma.reasoning.base_url"] == self.endpointIdentity + } +} + enum AnthropicMessageConversion { static func convertMessagesToAnthropic( _ messages: [ModelMessage], thinkingEnabled: Bool, + reasoningTarget: AnthropicReasoningReplayTarget? = nil, ) throws -> (String?, [AnthropicMessage]) { var systemMessage: String? var anthropicMessages: [AnthropicMessage] = [] - var pendingSignedThinking: (text: String, signature: String, type: String)? + var pendingThinkingBlocks: [(text: String, signature: String?, type: String)] = [] let thinkingSignatureKey = "anthropic.thinking.signature" let thinkingTypeKey = "anthropic.thinking.type" + func appendThinkingBlocks( + _ pendingBlocks: [(text: String, signature: String?, type: String)], + to content: inout [AnthropicContent], + ) { + for pending in pendingBlocks { + if pending.type == "redacted_thinking" { + content.append(.redactedThinking(.init( + type: "redacted_thinking", + data: pending.text, + ))) + } else if let signature = pending.signature { + content.append(.thinking(.init( + type: "thinking", + thinking: pending.text, + signature: signature, + ))) + } + } + } + for message in messages { switch message.role { case .system: @@ -48,29 +86,32 @@ enum AnthropicMessageConversion { }.joined() let signature = message.metadata?.customData?[thinkingSignatureKey] let type = message.metadata?.customData?[thinkingTypeKey] ?? "thinking" - if let signature, !signature.isEmpty { - pendingSignedThinking = (text: text, signature: signature, type: type) + let customData = message.metadata?.customData ?? [:] + if + customData["tachikoma.reasoning.provider"] != nil || + customData["tachikoma.reasoning.model"] != nil || + customData["tachikoma.reasoning.base_url"] != nil || + customData["anthropic.thinking.model"] != nil + { + guard reasoningTarget?.matches(customData) == true else { + continue + } + } else if reasoningTarget?.allowsLegacyUnknown != true { + continue + } + if type == "redacted_thinking" { + pendingThinkingBlocks.append((text: text, signature: nil, type: type)) + } else if let signature, !signature.isEmpty { + pendingThinkingBlocks.append((text: text, signature: signature, type: type)) } continue } var content: [AnthropicContent] = [] - if thinkingEnabled, let pending = pendingSignedThinking { - if pending.type == "redacted_thinking" { - content.append(.redactedThinking(.init( - type: "redacted_thinking", - redactedThinking: pending.text, - signature: pending.signature, - ))) - } else { - content.append(.thinking(.init( - type: "thinking", - thinking: pending.text, - signature: pending.signature, - ))) - } - pendingSignedThinking = nil + if thinkingEnabled, !pendingThinkingBlocks.isEmpty { + appendThinkingBlocks(pendingThinkingBlocks, to: &content) + pendingThinkingBlocks.removeAll() } // Process each content part @@ -139,21 +180,12 @@ enum AnthropicMessageConversion { } } - if thinkingEnabled, let pending = pendingSignedThinking { - let thinkingContent: AnthropicContent = if pending.type == "redacted_thinking" { - .redactedThinking(.init( - type: "redacted_thinking", - redactedThinking: pending.text, - signature: pending.signature, - )) - } else { - .thinking(.init( - type: "thinking", - thinking: pending.text, - signature: pending.signature, - )) + if thinkingEnabled, !pendingThinkingBlocks.isEmpty { + var content: [AnthropicContent] = [] + appendThinkingBlocks(pendingThinkingBlocks, to: &content) + if !content.isEmpty { + anthropicMessages.append(AnthropicMessage(role: "assistant", content: content)) } - anthropicMessages.append(AnthropicMessage(role: "assistant", content: [thinkingContent])) } return (systemMessage, anthropicMessages) diff --git a/Sources/Tachikoma/Core/BaseProviders.swift b/Sources/Tachikoma/Core/BaseProviders.swift index d7ccca5..876bbf6 100644 --- a/Sources/Tachikoma/Core/BaseProviders.swift +++ b/Sources/Tachikoma/Core/BaseProviders.swift @@ -1,3 +1,12 @@ +#if canImport(CryptoKit) +import CryptoKit + +private typealias ReasoningEndpointHasher = CryptoKit.SHA256 +#else +import Crypto + +private typealias ReasoningEndpointHasher = Crypto.SHA256 +#endif import Foundation #if canImport(FoundationNetworking) import FoundationNetworking @@ -17,22 +26,35 @@ public final class AnthropicProvider: ModelProvider { private let auth: TKAuthValue private let betaHeader: String private let additionalHeaders: [String: String] + private let reasoningProvider: String + private let reasoningModelId: String + private let reasoningBaseURL: String? + private let urlSession: URLSession private static let requiredBetaFlags: [String] = [ "interleaved-thinking-2025-05-14", "fine-grained-tool-streaming-2025-05-14", ] - public init( model: LanguageModel.Anthropic, configuration: TachikomaConfiguration, additionalHeaders: [String: String] = [:], authOverride: TKAuthValue? = nil, + reasoningProvider: String = "anthropic", + reasoningModelId: String? = nil, + reasoningBaseURL: String? = nil, + urlSession: URLSession = .shared, ) throws { self.model = model self.modelId = model.modelId self.baseURL = configuration.getBaseURL(for: .anthropic) ?? "https://api.anthropic.com" self.additionalHeaders = additionalHeaders + self.reasoningProvider = reasoningProvider + self.reasoningModelId = reasoningModelId ?? model.modelId + self.reasoningBaseURL = ReasoningEndpointIdentity.canonical( + reasoningBaseURL ?? (reasoningProvider == "anthropic" ? self.baseURL : nil), + ) + self.urlSession = urlSession if let authOverride { self.auth = authOverride @@ -57,16 +79,18 @@ public final class AnthropicProvider: ModelProvider { throw TachikomaError.authenticationFailed("ANTHROPIC_API_KEY not found") } - self.betaHeader = Self.mergedBetaHeader(configuration: configuration, auth: self.auth) + self.betaHeader = Self.mergedBetaHeader(configuration: configuration, auth: self.auth, model: model) + let isFable = Self.isFable(model: model) + let supportsSafeStreaming = !Self.hasStreamingRefusalRisk(model: model) self.capabilities = ModelCapabilities( supportsVision: model.supportsVision, supportsTools: model.supportsTools, - supportsStreaming: true, + supportsStreaming: supportsSafeStreaming, supportsAudioInput: model.supportsAudioInput, supportsAudioOutput: model.supportsAudioOutput, - contextLength: model.contextLength, - maxOutputTokens: 4096, + contextLength: isFable ? 1_000_000 : model.contextLength, + maxOutputTokens: isFable ? 128_000 : model.maxOutputTokens, ) } @@ -94,6 +118,16 @@ public final class AnthropicProvider: ModelProvider { } private static func mergedBetaHeader(configuration: TachikomaConfiguration, auth: TKAuthValue) -> String { + self.mergedBetaHeader(configuration: configuration, auth: auth, model: nil) + } + + private static func mergedBetaHeader( + configuration: TachikomaConfiguration, + auth: TKAuthValue, + model: LanguageModel.Anthropic?, + ) + -> String + { var existing: String? if case let .bearer(_, betaHeader) = auth { existing = betaHeader @@ -103,6 +137,14 @@ public final class AnthropicProvider: ModelProvider { existing = configuration.credentialValue(for: "ANTHROPIC_BETA_HEADER") } + if let model, Self.isFable(model: model) { + return existing? + .split(separator: ",") + .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } + .filter { !$0.isEmpty } + .joined(separator: ",") ?? "" + } + return Self.mergedBetaHeader(existing: existing) } @@ -117,9 +159,13 @@ public final class AnthropicProvider: ModelProvider { case .disabled: return nil case .adaptive: + if Self.isFable(model: model) { return nil } guard self.usesAdaptiveThinking(model: model) else { return nil } return AnthropicThinking(type: "adaptive", budgetTokens: nil) case let .enabled(budgetTokens): + if Self.isFable(model: model) { + return nil + } if case .opus48 = model { return AnthropicThinking(type: "adaptive", budgetTokens: nil) } @@ -148,6 +194,7 @@ public final class AnthropicProvider: ModelProvider { } private func usesAdaptiveThinking(model: LanguageModel.Anthropic) -> Bool { + if Self.isFable(model: model) { return true } if case .opus48 = model { return true } if case .opus47 = model { return true } if case .sonnet46 = model { return true } @@ -155,11 +202,12 @@ public final class AnthropicProvider: ModelProvider { } private func supportsEffort(model: LanguageModel.Anthropic) -> Bool { + if Self.isFable(model: model) { return true } switch model { case .opus48, .opus47, .opus45, .sonnet46: - true + return true default: - false + return false } } @@ -202,6 +250,19 @@ public final class AnthropicProvider: ModelProvider { } let validatedSettings = request.settings.validated(for: .anthropic(self.model)) + if + Self.isFable(model: self.model), + case .disabled = validatedSettings.providerOptions.anthropic?.thinking + { + throw TachikomaError.invalidConfiguration( + "Claude Fable 5 always uses adaptive thinking; disabled thinking is not supported", + ) + } + if Self.isFable(model: self.model), request.messages.last?.role == .assistant { + throw TachikomaError.invalidConfiguration( + "Claude Fable 5 does not support assistant prefill requests", + ) + } let requestedThinking = self.anthropicThinking( from: validatedSettings.providerOptions.anthropic?.thinking, model: self.model, @@ -214,11 +275,19 @@ public final class AnthropicProvider: ModelProvider { var thinking: AnthropicThinking? let systemMessage: String? let messages: [AnthropicMessage] + let preserveSignedThinking = requestedThinking != nil || self.requiresSignedThinkingReplay(model: self.model) + let reasoningTarget = AnthropicReasoningReplayTarget( + provider: self.reasoningProvider, + modelId: self.reasoningModelId, + endpointIdentity: self.reasoningBaseURL, + allowsLegacyUnknown: !Self.isFable(model: self.model), + ) do { thinking = requestedThinking (systemMessage, messages) = try AnthropicMessageConversion.convertMessagesToAnthropic( request.messages, - thinkingEnabled: requestedThinking != nil, + thinkingEnabled: preserveSignedThinking, + reasoningTarget: reasoningTarget, ) } catch { // If we can't provide signed thinking blocks for a cached/history session, fall back to non-thinking mode. @@ -227,14 +296,20 @@ public final class AnthropicProvider: ModelProvider { (systemMessage, messages) = try AnthropicMessageConversion.convertMessagesToAnthropic( request.messages, thinkingEnabled: false, + reasoningTarget: reasoningTarget, ) } else { throw error } } + let maxTokens = validatedSettings.maxTokens ?? self.defaultMaxTokens(for: self.model) + if !stream, Self.requiresExtendedNonStreamingTimeout(model: self.model, maxTokens: maxTokens) { + urlRequest.timeoutInterval = 1800 + } + let anthropicRequest = try AnthropicMessageRequest( model: modelId, - maxTokens: validatedSettings.maxTokens ?? 1024, + maxTokens: maxTokens, temperature: thinking == nil ? validatedSettings.temperature : nil, system: systemMessage, messages: messages, @@ -270,7 +345,7 @@ public final class AnthropicProvider: ModelProvider { } } - let (data, response) = try await URLSession.shared.data(for: urlRequest) + let (data, response) = try await self.urlSession.data(for: urlRequest) guard let httpResponse = response as? HTTPURLResponse else { throw TachikomaError.networkError(NSError(domain: "Invalid response", code: 0)) @@ -302,7 +377,7 @@ public final class AnthropicProvider: ModelProvider { switch content { case let .text(textContent): textContent.text - case .toolUse: + case .thinking, .redactedThinking, .toolUse: nil } }.joined() @@ -312,19 +387,63 @@ public final class AnthropicProvider: ModelProvider { outputTokens: anthropicResponse.usage.outputTokens, ) - let finishReason: FinishReason? = switch anthropicResponse.stopReason { - case "end_turn": .stop - case "max_tokens": .length - case "tool_use": .toolCalls - case "stop_sequence": .stop - default: .other + let finishReason = Self.mapFinishReason(anthropicResponse.stopReason) + if finishReason == .contentFilter { + let fallbackRefusalText = if let category = anthropicResponse.stopDetails?.category { + "Request refused by Anthropic content filter (\(category))" + } else { + "Request refused by Anthropic content filter" + } + let refusalText = anthropicResponse.stopDetails?.explanation ?? fallbackRefusalText + return ProviderResponse( + text: refusalText, + usage: usage, + finishReason: finishReason, + toolCalls: nil, + reasoning: [], + assistantMessages: [], + isBillable: usage.outputTokens > 0, + ) } - // Convert tool calls if present - let toolCalls = anthropicResponse.content.compactMap { content -> AgentToolCall? in + var reasoning: [ProviderReasoningBlock] = [] + var toolCalls: [AgentToolCall] = [] + var assistantMessages: [ModelMessage] = [] + + for content in anthropicResponse.content { switch content { - case .text: - return nil + case let .text(textContent): + if !textContent.text.isEmpty { + assistantMessages.append(.assistant(textContent.text)) + } + case let .thinking(thinking): + let block = ProviderReasoningBlock( + text: thinking.thinking, + signature: thinking.signature, + type: thinking.type, + ) + reasoning.append(block) + assistantMessages.append(ModelMessage( + role: .assistant, + content: [.text(thinking.thinking)], + channel: .thinking, + metadata: .init(customData: self.reasoningMetadata( + type: thinking.type, + signature: thinking.signature, + )), + )) + case let .redactedThinking(thinking): + let block = ProviderReasoningBlock( + text: thinking.data, + type: thinking.type, + ) + reasoning.append(block) + assistantMessages.append(ModelMessage( + role: .assistant, + content: [.text(thinking.data)], + channel: .thinking, + metadata: .init(customData: self.reasoningMetadata(type: thinking.type)), + )) case let .toolUse(toolUse): // Convert input to AnyAgentToolValue dictionary var arguments: [String: AnyAgentToolValue] = [:] @@ -340,11 +459,13 @@ public final class AnthropicProvider: ModelProvider { } } - return AgentToolCall( + let toolCall = AgentToolCall( id: toolUse.id, name: toolUse.name, arguments: arguments, ) + toolCalls.append(toolCall) + assistantMessages.append(ModelMessage(role: .assistant, content: [.toolCall(toolCall)])) } } @@ -353,9 +474,61 @@ public final class AnthropicProvider: ModelProvider { usage: usage, finishReason: finishReason, toolCalls: toolCalls.isEmpty ? nil : toolCalls, + reasoning: reasoning, + assistantMessages: assistantMessages, ) } + private func reasoningMetadata(type: String, signature: String? = nil) -> [String: String] { + var metadata = [ + "anthropic.thinking.model": self.reasoningModelId, + "anthropic.thinking.type": type, + "tachikoma.reasoning.provider": self.reasoningProvider, + "tachikoma.reasoning.model": self.reasoningModelId, + ] + if let signature, !signature.isEmpty { + metadata["anthropic.thinking.signature"] = signature + } + if let reasoningBaseURL { + metadata["tachikoma.reasoning.base_url"] = reasoningBaseURL + } + return metadata + } + + private func requiresSignedThinkingReplay(model: LanguageModel.Anthropic) -> Bool { + Self.isFable(model: model) + } + + private func defaultMaxTokens(for model: LanguageModel.Anthropic) -> Int { + if Self.isFable(model: model) { return min(128_000, 16384) } + return 1024 + } + + private static func isFable(model: LanguageModel.Anthropic) -> Bool { + LanguageModel.Anthropic.isFable(modelId: model.modelId) + } + + private static func hasStreamingRefusalRisk(model: LanguageModel.Anthropic) -> Bool { + LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: model.modelId) + } + + private static func requiresExtendedNonStreamingTimeout(model: LanguageModel.Anthropic, maxTokens: Int) -> Bool { + self.isFable(model: model) || maxTokens >= 64000 + } + + static func mapFinishReason(_ stopReason: String?) -> FinishReason? { + switch stopReason { + case "end_turn": .stop + case "max_tokens": .length + case "tool_use": .toolCalls + case "stop_sequence": .stop + case "model_context_window_exceeded": .length + case "refusal": .contentFilter + case nil: nil + default: .other + } + } + private func applyAuth(to request: inout URLRequest, secret: String) { switch self.auth { case .apiKey: @@ -363,10 +536,19 @@ public final class AnthropicProvider: ModelProvider { case .bearer: request.setValue("Bearer " + secret, forHTTPHeaderField: "Authorization") } - request.setValue(self.betaHeader, forHTTPHeaderField: "anthropic-beta") + if !self.betaHeader.isEmpty { + request.setValue(self.betaHeader, forHTTPHeaderField: "anthropic-beta") + } } public func streamText(request: ProviderRequest) async throws -> AsyncThrowingStream { + guard !Self.hasStreamingRefusalRisk(model: self.model) else { + let message = "\(self.model.modelId) streaming is disabled because Anthropic refusals require rollback-aware handling" + throw TachikomaError.invalidConfiguration( + "\(message); use generateText instead", + ) + } + let urlRequest = try self.makeURLRequest(for: request, stream: true) // Debug logging only when explicitly enabled @@ -417,7 +599,7 @@ public final class AnthropicProvider: ModelProvider { (Data, URLResponse), Error, >) in - URLSession.shared.dataTask(with: urlRequest) { data, response, error in + self.urlSession.dataTask(with: urlRequest) { data, response, error in if let error { continuation.resume(throwing: error) } else if let data, let response { @@ -445,7 +627,7 @@ public final class AnthropicProvider: ModelProvider { let lines = String(data: data, encoding: .utf8)?.components(separatedBy: "\n") ?? [] #else // macOS/iOS: Use streaming API - let (bytes, response) = try await URLSession.shared.bytes(for: urlRequest) + let (bytes, response) = try await self.urlSession.bytes(for: urlRequest) guard let httpResponse = response as? HTTPURLResponse else { throw TachikomaError.networkError(NSError(domain: "Invalid response", code: 0)) @@ -469,6 +651,7 @@ public final class AnthropicProvider: ModelProvider { var currentReasoningSignature: String? var currentReasoningType: String? var reasoningSignatureEmitted = false + var finishReason: FinishReason? do { for try await line in bytes.lines { @@ -502,7 +685,7 @@ public final class AnthropicProvider: ModelProvider { currentReasoningType = nil reasoningSignatureEmitted = false } - continuation.yield(TextStreamDelta.done()) + continuation.yield(.done(finishReason: finishReason)) break } @@ -533,6 +716,12 @@ public final class AnthropicProvider: ModelProvider { currentReasoningSignature = nil currentReasoningType = block.type reasoningSignatureEmitted = false + if block.type == "redacted_thinking", let data = block.data { + continuation.yield(TextStreamDelta.reasoning( + data, + type: "redacted_thinking", + )) + } continue } } @@ -637,7 +826,9 @@ public final class AnthropicProvider: ModelProvider { case "message_delta": // Message-level updates (usage, etc.) - // Usage is typically included in the done event, not separately + if let stopReason = event.delta?.stopReason { + finishReason = Self.mapFinishReason(stopReason) + } continue case "message_stop": @@ -657,7 +848,7 @@ public final class AnthropicProvider: ModelProvider { currentReasoningType = nil reasoningSignatureEmitted = false } - continuation.yield(TextStreamDelta.done()) + continuation.yield(.done(finishReason: finishReason)) default: // Unknown event type, skip @@ -694,6 +885,7 @@ public final class AnthropicProvider: ModelProvider { var currentReasoningSignature: String? var currentReasoningType: String? var reasoningSignatureEmitted = false + var finishReason: FinishReason? do { for line in lines { @@ -720,7 +912,7 @@ public final class AnthropicProvider: ModelProvider { type: currentReasoningType, )) } - continuation.yield(TextStreamDelta.done()) + continuation.yield(.done(finishReason: finishReason)) break } @@ -739,6 +931,12 @@ public final class AnthropicProvider: ModelProvider { currentReasoningSignature = nil currentReasoningType = block.type reasoningSignatureEmitted = false + if block.type == "redacted_thinking", let data = block.data { + continuation.yield(TextStreamDelta.reasoning( + data, + type: "redacted_thinking", + )) + } } case "content_block_delta": if let delta = event.delta { @@ -761,6 +959,10 @@ public final class AnthropicProvider: ModelProvider { accumulatedReasoning += thinking } } + case "message_delta": + if let stopReason = event.delta?.stopReason { + finishReason = Self.mapFinishReason(stopReason) + } case "message_stop": if !accumulatedText.isEmpty { continuation.yield(TextStreamDelta.text(accumulatedText)) @@ -772,7 +974,7 @@ public final class AnthropicProvider: ModelProvider { type: currentReasoningType, )) } - continuation.yield(TextStreamDelta.done()) + continuation.yield(.done(finishReason: finishReason)) default: continue } @@ -836,6 +1038,37 @@ public final class AnthropicProvider: ModelProvider { } } +@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) +enum ReasoningEndpointIdentity { + static func canonical(_ rawValue: String?) -> String? { + guard + let trimmed = rawValue?.trimmingCharacters(in: .whitespacesAndNewlines), + !trimmed.isEmpty, + var components = URLComponents(string: trimmed), + let scheme = components.scheme?.lowercased(), + let host = components.host?.lowercased() else + { + return nil + } + + components.scheme = scheme + components.host = host + components.user = nil + components.password = nil + components.fragment = nil + while components.path.count > 1, components.path.hasSuffix("/") { + components.path.removeLast() + } + + guard let value = components.string else { return nil } + guard let data = value.data(using: .utf8) else { return nil } + let digest = ReasoningEndpointHasher.hash(data: data) + .map { String(format: "%02x", $0) } + .joined() + return "sha256:\(digest)" + } +} + /// Provider for Ollama models @available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) public final class OllamaProvider: ModelProvider { diff --git a/Sources/Tachikoma/Core/Generation.swift b/Sources/Tachikoma/Core/Generation.swift index da63b6c..837bdd3 100644 --- a/Sources/Tachikoma/Core/Generation.swift +++ b/Sources/Tachikoma/Core/Generation.swift @@ -35,10 +35,11 @@ public func generateText( var currentMessages = messages var allSteps: [GenerationStep] = [] var totalUsage = Usage(inputTokens: 0, outputTokens: 0) + var finalResponseStartIndex = messages.count for stepIndex in 0.. { continuation in Task { do { let totalInputTokens = 0 var totalOutputTokens = 0 + var bufferedDeltas: [TextStreamDelta] = [] + var bufferedVisibleText = "" + var didReceiveTerminal = false + var didTriggerLocalStop = false - for try await delta in capturedStream { - continuation.yield(delta) - + func track(_ delta: TextStreamDelta) { // Track tokens as they come in (approximate) if case .textDelta = delta.type, let content = delta.content { // Rough approximation: ~4 characters per token totalOutputTokens += max(1, content.count / 4) } + } + + func yieldAndTrack(_ delta: TextStreamDelta) { + track(delta) + continuation.yield(delta) + } + + if let capturedStopCondition { + await capturedStopCondition.reset() + } + + for try await delta in capturedStream { + if buffersUntilDone, delta.type != .done { + if !didTriggerLocalStop { + bufferedDeltas.append(delta) + track(delta) + if + let capturedStopCondition, + case .textDelta = delta.type, + let content = delta.content + { + bufferedVisibleText += content + didTriggerLocalStop = await capturedStopCondition.shouldStop( + text: bufferedVisibleText, + delta: content, + ) + } + } + continue + } + + if case .done = delta.type { + didReceiveTerminal = true + if buffersUntilDone { + if delta.finishReason == .contentFilter { + bufferedDeltas.removeAll() + yieldAndTrack(delta) + } else { + for bufferedDelta in bufferedDeltas { + continuation.yield(bufferedDelta) + } + bufferedDeltas.removeAll() + if didTriggerLocalStop { + yieldAndTrack(TextStreamDelta.done(usage: delta.usage, finishReason: .stop)) + } else { + yieldAndTrack(delta) + } + } + } else { + yieldAndTrack(delta) + } + } else { + yieldAndTrack(delta) + } if case .done = delta.type { // Record final usage (this is approximate for streaming) @@ -348,6 +433,10 @@ public func streamText( } } + if buffersUntilDone, !didReceiveTerminal, !bufferedDeltas.isEmpty { + throw TachikomaError.apiError("Stream ended before provider completion status was received") + } + continuation.finish() } catch { if shouldEndSession { @@ -392,7 +481,7 @@ public func generateObject( let provider = try resolvedConfiguration.makeProvider(for: model) let request = ProviderRequest( - messages: messages, + messages: messages.sanitizedForProvider(model, configuration: resolvedConfiguration), tools: nil, settings: settings, outputFormat: .json, @@ -406,6 +495,10 @@ public func generateObject( try await provider.generateText(request: request) } + if response.finishReason == .contentFilter { + throw TachikomaError.apiError("Response was blocked by the provider content filter") + } + // Parse the JSON response into the expected type guard let jsonData = response.text.data(using: .utf8) else { throw TachikomaError.invalidInput("Response text is not valid UTF-8") @@ -446,11 +539,14 @@ public func streamObject( -> StreamObjectResult { let resolvedConfiguration = TachikomaConfiguration.resolve(configuration) + guard model.supportsStreaming else { + throw TachikomaError.invalidConfiguration("\(model.modelId) does not support streaming") + } let provider = try resolvedConfiguration.makeProvider(for: model) // Create request with JSON output format let request = ProviderRequest( - messages: messages, + messages: messages.sanitizedForProvider(model, configuration: resolvedConfiguration), tools: nil, settings: settings, outputFormat: .json, @@ -458,6 +554,7 @@ public func streamObject( // Get the text stream from the provider let stream = try await provider.streamText(request: request) + let buffersUntilDone = model.buffersObjectStreamUntilDone(settings: settings) // Create a new stream that attempts to parse partial JSON objects let objectStream = AsyncThrowingStream, Error> { continuation in @@ -466,6 +563,37 @@ public func streamObject( var accumulatedText = "" var lastValidObject: T? var hasStarted = false + var bufferedStartDelta: ObjectStreamDelta? + var didFinishObject = false + + func publishCompleteObject(allowLastValidObjectFallback: Bool) throws { + if buffersUntilDone, let bufferedStartDelta { + continuation.yield(bufferedStartDelta) + } + if + let jsonData = accumulatedText.data(using: .utf8), + let finalObject = try? JSONDecoder().decode(T.self, from: jsonData) + { + continuation.yield(ObjectStreamDelta( + type: .complete, + object: finalObject, + rawText: accumulatedText, + )) + } else if allowLastValidObjectFallback, let lastValidObject { + // If we have a last valid object, use it as complete + continuation.yield(ObjectStreamDelta( + type: .complete, + object: lastValidObject, + rawText: accumulatedText, + )) + } else { + throw TachikomaError.invalidInput( + "Failed to parse complete object from stream", + ) + } + continuation.yield(ObjectStreamDelta(type: .done)) + didFinishObject = true + } for try await delta in stream { if case .textDelta = delta.type, let content = delta.content { @@ -474,7 +602,16 @@ public func streamObject( // Signal stream start if !hasStarted { hasStarted = true - continuation.yield(ObjectStreamDelta(type: .start)) + let startDelta = ObjectStreamDelta(type: .start) + if buffersUntilDone { + bufferedStartDelta = startDelta + } else { + continuation.yield(startDelta) + } + } + + if buffersUntilDone { + continue } // Attempt to parse the accumulated JSON @@ -482,44 +619,51 @@ public func streamObject( // Try to parse as complete object if let object = try? JSONDecoder().decode(T.self, from: jsonData) { lastValidObject = object - continuation.yield(ObjectStreamDelta( + let objectDelta = ObjectStreamDelta( type: .partial, object: object, rawText: accumulatedText, - )) + ) + continuation.yield(objectDelta) } else if let partialObject = attemptPartialParse(T.self, from: accumulatedText) { // Attempt to parse as partial object lastValidObject = partialObject - continuation.yield(ObjectStreamDelta( + let objectDelta = ObjectStreamDelta( type: .partial, object: partialObject, rawText: accumulatedText, - )) + ) + continuation.yield(objectDelta) } } } else if case .done = delta.type { - // Final parse attempt - if - let jsonData = accumulatedText.data(using: .utf8), - let finalObject = try? JSONDecoder().decode(T.self, from: jsonData) - { - continuation.yield(ObjectStreamDelta( - type: .complete, - object: finalObject, - rawText: accumulatedText, - )) - } else if let lastValidObject { - // If we have a last valid object, use it as complete - continuation.yield(ObjectStreamDelta( - type: .complete, - object: lastValidObject, - rawText: accumulatedText, - )) - } else { - throw TachikomaError.invalidInput( - "Failed to parse complete object from stream", - ) + if delta.finishReason == .contentFilter { + throw TachikomaError.apiError("Response was blocked by the provider content filter") } + try publishCompleteObject(allowLastValidObjectFallback: delta.finishReason == .stop || delta + .finishReason == nil) + } + } + + if !didFinishObject, hasStarted { + if buffersUntilDone { + throw TachikomaError.apiError("Stream ended before provider completion status was received") + } else if + let jsonData = accumulatedText.data(using: .utf8), + let finalObject = try? JSONDecoder().decode(T.self, from: jsonData) + { + continuation.yield(ObjectStreamDelta( + type: .complete, + object: finalObject, + rawText: accumulatedText, + )) + continuation.yield(ObjectStreamDelta(type: .done)) + } else if let lastValidObject { + continuation.yield(ObjectStreamDelta( + type: .complete, + object: lastValidObject, + rawText: accumulatedText, + )) continuation.yield(ObjectStreamDelta(type: .done)) } } @@ -599,6 +743,463 @@ private func fixPartialJSON(_ json: String) -> String { return fixed } +extension LanguageModel { + fileprivate func buffersTextStreamUntilDone(settings: GenerationSettings) -> Bool { + self.hasAnthropicStreamingRefusalRisk || + settings.streamBuffering == .untilTerminal || + (settings.stopConditions != nil && self.canEmitTerminalContentFilterAfterText) + } + + fileprivate func buffersObjectStreamUntilDone(settings: GenerationSettings) -> Bool { + settings.streamBuffering == .untilTerminal || + self.hasAnthropicStreamingRefusalRisk + } + + private var hasAnthropicStreamingRefusalRisk: Bool { + switch self { + case let .anthropic(model): + return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: model.modelId) + case let .anthropicCompatible(modelId, _): + return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId) + case let .openRouter(modelId), let .together(modelId): + return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId) + case let .openaiCompatible(modelId, _): + return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId) + case let .custom(provider): + if + let parsed = ProviderParser.parse(provider.modelId), + CustomProviderRegistry.shared.get(parsed.provider)?.kind == .anthropic + { + return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: parsed.model) + } + return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: provider.modelId) + default: + return false + } + } + + private var canEmitTerminalContentFilterAfterText: Bool { + switch self { + case .openai, + .openaiCompatible, + .openRouter, + .together, + .replicate, + .google, + .mistral, + .groq, + .grok, + .azureOpenAI: + return true + case let .custom(provider): + guard + let parsed = ProviderParser.parse(provider.modelId), + let registeredProvider = CustomProviderRegistry.shared.get(parsed.provider) else + { + return false + } + switch registeredProvider.kind { + case .openai: + return true + case .anthropic: + return false + } + default: + return false + } + } +} + +private struct ReasoningReplayTarget { + let provider: String + let modelId: String + let baseURL: String? + let allowsLegacyUnknown: Bool + + func matches(_ customData: [String: String]) -> Bool { + guard customData["tachikoma.reasoning.provider"] == self.provider else { + return false + } + guard customData["tachikoma.reasoning.model"] == self.modelId else { + return false + } + return customData["tachikoma.reasoning.base_url"] == self.endpointIdentity + } + + var endpointIdentity: String? { + ReasoningEndpointIdentity.canonical(self.baseURL) + } +} + +extension [ModelMessage] { + fileprivate func replacingGeneratedAssistantText(after prefixCount: Int, with text: String) -> [ModelMessage] { + guard self.indices.contains(prefixCount) else { + return self + } + + var messages = self + var cursor = text.startIndex + for messageIndex in prefixCount.. [ModelMessage] + { + if let target = model.anthropicThinkingReplayTarget(configuration: configuration) { + var sanitized: [ModelMessage] = [] + for message in self { + if message.isSyntheticReasoningBoundary { + if sanitized.last?.channel == .thinking { + sanitized.append(message) + } + continue + } + guard message.channel == .thinking else { + sanitized.append(message) + continue + } + guard !message.hasOpenRouterReasoningReplayMetadata else { + continue + } + guard let producerModel = message.metadata?.customData?["anthropic.thinking.model"] else { + if + target.allowsLegacyUnknown, + message.metadata?.customData?["anthropic.thinking.type"] != nil + { + sanitized.append(message) + } + continue + } + let customData = message.metadata?.customData ?? [:] + if producerModel == target.modelId, target.matches(customData) { + sanitized.append(message) + } + } + return sanitized + } + + if let target = model.openRouterReasoningReplayTarget(configuration: configuration) { + var sanitized: [ModelMessage] = [] + for message in self { + if message.isSyntheticReasoningBoundary { + if sanitized.last?.channel == .thinking { + sanitized.append(message) + } + continue + } + guard message.channel == .thinking else { + sanitized.append(message) + continue + } + guard message.hasOpenRouterReasoningReplayMetadata else { + continue + } + if target.matches(message.metadata?.customData ?? [:]) { + sanitized.append(message) + } + } + return sanitized + } + + return self.filter { !$0.isSyntheticReasoningBoundary && $0.channel != .thinking } + } +} + +extension ModelMessage { + private var hasAnthropicThinkingReplayMetadata: Bool { + guard let customData = metadata?.customData else { return false } + return customData["anthropic.thinking.model"] != nil || + customData["anthropic.thinking.type"] != nil || + customData["anthropic.thinking.signature"] != nil + } + + fileprivate var hasOpenRouterReasoningReplayMetadata: Bool { + guard let customData = metadata?.customData else { return false } + return customData["openrouter.reasoning_details"] != nil || + customData["openrouter.reasoning"] != nil + } + + private var hasProviderReasoningReplayMetadata: Bool { + self.hasAnthropicThinkingReplayMetadata || self.hasOpenRouterReasoningReplayMetadata + } + + fileprivate var isSyntheticReasoningBoundary: Bool { + metadata?.customData?["tachikoma.internal.boundary"] == "reasoning_only" + } +} + +extension [ModelMessage] { + fileprivate func sanitizedForToolContext() -> [ModelMessage] { + self.filter { $0.channel != .thinking && !$0.isSyntheticReasoningBoundary } + } + + fileprivate func containsAssistantText(_ text: String) -> Bool { + guard !text.isEmpty else { return true } + let assistantTexts = self.flatMap { message -> [String] in + guard message.role == .assistant, message.channel != .thinking else { + return [] + } + return message.content.compactMap { part in + if case let .text(value) = part { + return value + } + return nil + } + } + return assistantTexts.contains(text) || assistantTexts.joined() == text + } + + fileprivate func containsReasoningBlock(_ reasoning: ProviderReasoningBlock) -> Bool { + self.contains { message in + message.role == .assistant && message.channel == .thinking && message.content.contains { part in + guard case let .text(value) = part else { return false } + if let signature = reasoning.signature, !signature.isEmpty { + return message.metadata?.customData?["anthropic.thinking.signature"] == signature || + message.metadata?.customData?["tachikoma.reasoning.signature"] == signature + } + return value == reasoning.text + } + } + } + + fileprivate func containsToolCall(id: String) -> Bool { + self.contains { message in + message.role == .assistant && message.content.contains { part in + if case let .toolCall(toolCall) = part { + return toolCall.id == id + } + return false + } + } + } +} + +extension LanguageModel { + fileprivate func responseHistoryMessages( + nativeMessages: [ModelMessage], + text: String, + reasoning: [ProviderReasoningBlock], + toolCalls: [AgentToolCall], + configuration: TachikomaConfiguration, + ) + -> [ModelMessage] + { + var history = nativeMessages + + for reasoningBlock in reasoning where !history.containsReasoningBlock(reasoningBlock) { + history.append(ModelMessage( + role: .assistant, + content: [.text(reasoningBlock.text)], + channel: .thinking, + metadata: .init(customData: self.anthropicThinkingMetadata( + for: reasoningBlock, + configuration: configuration, + )), + )) + } + + let missingToolCalls = toolCalls.filter { !history.containsToolCall(id: $0.id) } + let isMissingText = !history.containsAssistantText(text) + let needsFallbackBoundary = nativeMessages.isEmpty && text.isEmpty && missingToolCalls.isEmpty + + guard isMissingText || !missingToolCalls.isEmpty || needsFallbackBoundary else { + return history + } + + var fallbackContent: [ModelMessage.ContentPart] = [] + if isMissingText || needsFallbackBoundary { + fallbackContent.append(.text(text)) + } + fallbackContent.append(contentsOf: missingToolCalls.map { .toolCall($0) }) + history.append(ModelMessage(role: .assistant, content: fallbackContent)) + return history + } + + fileprivate func anthropicThinkingReplayTarget(configuration: TachikomaConfiguration) -> ReasoningReplayTarget? { + switch self { + case let .anthropic(model): + return ReasoningReplayTarget( + provider: "anthropic", + modelId: model.modelId, + baseURL: configuration.getBaseURL(for: .anthropic) ?? Provider.anthropic.defaultBaseURL, + allowsLegacyUnknown: !LanguageModel.Anthropic.isFable(modelId: model.modelId), + ) + case let .anthropicCompatible(modelId, baseURL): + return ReasoningReplayTarget( + provider: "anthropic-compatible", + modelId: modelId, + baseURL: baseURL, + allowsLegacyUnknown: !LanguageModel.Anthropic.isFable(modelId: modelId), + ) + case let .minimax(model): + return ReasoningReplayTarget( + provider: "minimax", + modelId: model.modelId, + baseURL: configuration.getBaseURL(for: .minimax) ?? Provider.minimax.defaultBaseURL, + allowsLegacyUnknown: true, + ) + case let .minimaxCN(model): + return ReasoningReplayTarget( + provider: "minimax-cn", + modelId: model.modelId, + baseURL: configuration.getBaseURL(for: .minimaxCN) ?? Provider.minimaxCN.defaultBaseURL, + allowsLegacyUnknown: true, + ) + case let .custom(provider): + if let directAnthropicProvider = provider as? AnthropicProvider { + return ReasoningReplayTarget( + provider: "anthropic", + modelId: directAnthropicProvider.modelId, + baseURL: directAnthropicProvider.baseURL ?? Provider.anthropic.defaultBaseURL, + allowsLegacyUnknown: !LanguageModel.Anthropic.isFable(modelId: directAnthropicProvider.modelId), + ) + } + if let compatibleProvider = provider as? AnthropicCompatibleProvider { + return ReasoningReplayTarget( + provider: "anthropic-compatible", + modelId: compatibleProvider.modelId, + baseURL: compatibleProvider.baseURL, + allowsLegacyUnknown: !LanguageModel.Anthropic.isFable(modelId: compatibleProvider.modelId), + ) + } + guard + let parsed = ProviderParser.parse(provider.modelId), + let registeredProvider = CustomProviderRegistry.shared.get(parsed.provider), + registeredProvider.kind == .anthropic else + { + return provider.modelId.contains("claude") || provider.modelId.contains("anthropic") + ? ReasoningReplayTarget( + provider: "custom-anthropic", + modelId: provider.modelId, + baseURL: provider.baseURL, + allowsLegacyUnknown: !LanguageModel.Anthropic.isFable(modelId: provider.modelId), + ) + : nil + } + return ReasoningReplayTarget( + provider: "custom-anthropic", + modelId: parsed.model, + baseURL: registeredProvider.baseURL, + allowsLegacyUnknown: !LanguageModel.Anthropic.isFable(modelId: parsed.model), + ) + default: + return nil + } + } + + fileprivate func openRouterReasoningReplayTarget(configuration: TachikomaConfiguration) -> ReasoningReplayTarget? { + switch self { + case let .openRouter(modelId): + ReasoningReplayTarget( + provider: "openrouter", + modelId: modelId, + baseURL: configuration.getBaseURL(for: .custom("openrouter")) ?? "https://openrouter.ai/api/v1", + allowsLegacyUnknown: false, + ) + default: + nil + } + } + + private func anthropicThinkingMetadata( + for reasoning: ProviderReasoningBlock, + configuration: TachikomaConfiguration, + ) + -> [String: String] + { + if + let rawJSON = reasoning.rawJSON, + let target = self.openRouterReasoningReplayTarget(configuration: configuration) + { + var metadata = [ + "openrouter.reasoning_details": rawJSON, + "tachikoma.reasoning.type": reasoning.type, + "tachikoma.reasoning.provider": target.provider, + "tachikoma.reasoning.model": target.modelId, + ] + if let endpointIdentity = target.endpointIdentity { + metadata["tachikoma.reasoning.base_url"] = endpointIdentity + } + return metadata + } + if + reasoning.type == "openrouter_reasoning", + let target = self.openRouterReasoningReplayTarget(configuration: configuration) + { + var metadata = [ + "openrouter.reasoning": reasoning.text, + "tachikoma.reasoning.type": reasoning.type, + "tachikoma.reasoning.provider": target.provider, + "tachikoma.reasoning.model": target.modelId, + ] + if let endpointIdentity = target.endpointIdentity { + metadata["tachikoma.reasoning.base_url"] = endpointIdentity + } + return metadata + } + + guard let target = self.anthropicThinkingReplayTarget(configuration: configuration) else { + var customData = ["tachikoma.reasoning.type": reasoning.type] + if let signature = reasoning.signature, !signature.isEmpty { + customData["tachikoma.reasoning.signature"] = signature + } + return customData + } + + var customData = [ + "anthropic.thinking.type": reasoning.type, + "anthropic.thinking.model": target.modelId, + "tachikoma.reasoning.provider": target.provider, + "tachikoma.reasoning.model": target.modelId, + ] + if let endpointIdentity = target.endpointIdentity { + customData["tachikoma.reasoning.base_url"] = endpointIdentity + } + if let signature = reasoning.signature, !signature.isEmpty { + customData["anthropic.thinking.signature"] = signature + } + return customData + } +} + // MARK: - Convenience Functions /// Simple text generation from a prompt (convenience wrapper) - with Model enum @@ -746,7 +1347,7 @@ public func analyze( @available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) public func stream( _ prompt: String, - using model: LanguageModel = .default, + using model: LanguageModel = .defaultStreaming, system: String? = nil, maxTokens: Int? = nil, temperature: Double? = nil, diff --git a/Sources/Tachikoma/Core/ModelCapabilities.swift b/Sources/Tachikoma/Core/ModelCapabilities.swift index 00a91a8..9a97454 100644 --- a/Sources/Tachikoma/Core/ModelCapabilities.swift +++ b/Sources/Tachikoma/Core/ModelCapabilities.swift @@ -259,7 +259,7 @@ public final class ModelCapabilityRegistry: @unchecked Sendable { ), ) - // Opus 4.7+ maps requested thinking to Anthropic's adaptive thinking request shape. + // Fable 5 and Opus 4.7+ map requested thinking to Anthropic's adaptive thinking request shape. let claudeAdaptiveThinkingCapabilities = ModelParameterCapabilities( supportsTemperature: false, supportsTopP: false, @@ -270,6 +270,7 @@ public final class ModelCapabilityRegistry: @unchecked Sendable { ), excludedParameters: ["temperature", "topP", "topK"], ) + self.capabilities["anthropic:claude-fable-5"] = claudeAdaptiveThinkingCapabilities self.capabilities["anthropic:claude-opus-4-8"] = claudeAdaptiveThinkingCapabilities self.capabilities["anthropic:claude-opus-4-7"] = claudeAdaptiveThinkingCapabilities self.capabilities["anthropic:claude-opus-4-5"] = claude4Capabilities @@ -344,6 +345,19 @@ public final class ModelCapabilityRegistry: @unchecked Sendable { return registered } + if self.isAnthropicFableCompatible(model) { + return ModelParameterCapabilities( + supportsTemperature: false, + supportsTopP: false, + supportsTopK: false, + supportedProviderOptions: .init( + supportsThinking: true, + supportsCacheControl: true, + ), + excludedParameters: ["temperature", "topP", "topK"], + ) + } + // Return provider-based defaults switch model { case .openai: @@ -382,6 +396,30 @@ public final class ModelCapabilityRegistry: @unchecked Sendable { return ModelParameterCapabilities() } } + + private func isAnthropicFableCompatible(_ model: LanguageModel) -> Bool { + switch model { + case let .anthropic(anthropic): + return LanguageModel.Anthropic.isFable(modelId: anthropic.modelId) + case let .anthropicCompatible(modelId, _): + return LanguageModel.Anthropic.isFable(modelId: modelId) + case let .openRouter(modelId), + let .openaiCompatible(modelId, _), + let .together(modelId): + return LanguageModel.Anthropic.isFable(modelId: modelId) + case let .custom(provider): + guard + let parsed = ProviderParser.parse(provider.modelId), + LanguageModel.Anthropic.isFable(modelId: parsed.model), + CustomProviderRegistry.shared.get(parsed.provider)?.kind == .anthropic else + { + return false + } + return true + default: + return false + } + } } // MARK: - GenerationSettings Extension @@ -441,6 +479,7 @@ extension GenerationSettings { stopConditions: stopConditions, seed: seed, providerOptions: adjustedProviderOptions, + streamBuffering: self.streamBuffering, ) } diff --git a/Sources/Tachikoma/Core/OpenAICompatibleHelper.swift b/Sources/Tachikoma/Core/OpenAICompatibleHelper.swift index 92ce507..3e798a3 100644 --- a/Sources/Tachikoma/Core/OpenAICompatibleHelper.swift +++ b/Sources/Tachikoma/Core/OpenAICompatibleHelper.swift @@ -36,14 +36,24 @@ struct OpenAICompatibleHelper { } // Extract stop sequences from stop conditions - let stopSequences = Self.extractStopSequences(from: request.settings.stopConditions) + let settings = Self.validatedSettings( + request.settings, + providerName: providerName, + modelId: modelId, + baseURL: baseURL, + ) + let stopSequences = Self.extractStopSequences(from: settings.stopConditions) // Convert request to OpenAI-compatible format let openAIRequest = try OpenAIChatRequest( model: modelId, - messages: convertMessages(request.messages), - temperature: request.settings.temperature, - maxTokens: request.settings.maxTokens, + messages: convertMessages( + request.messages, + replayOpenRouterReasoningForModel: providerName == "OpenRouter" ? modelId : nil, + replayOpenRouterReasoningForBaseURL: providerName == "OpenRouter" ? baseURL : nil, + ), + temperature: settings.temperature, + maxTokens: settings.maxTokens, tools: request.tools?.compactMap { try self.convertTool($0) }, stream: false, stop: stopSequences.isEmpty ? nil : stopSequences, @@ -100,14 +110,9 @@ struct OpenAICompatibleHelper { let usage = openAIResponse.usage.map { Usage(inputTokens: $0.promptTokens ?? 0, outputTokens: $0.completionTokens ?? 0) } + let reasoning = Self.reasoningBlocks(from: choice.message) - let finishReason: FinishReason? = switch choice.finishReason { - case "stop": .stop - case "length": .length - case "tool_calls": .toolCalls - case "content_filter": .contentFilter - default: .other - } + let finishReason = Self.mapFinishReason(choice.finishReason) // Convert tool calls if present let toolCalls = choice.message.toolCalls?.compactMap { openAIToolCall -> AgentToolCall? in @@ -142,6 +147,7 @@ struct OpenAICompatibleHelper { usage: usage, finishReason: finishReason, toolCalls: toolCalls, + reasoning: reasoning, ) } @@ -173,14 +179,27 @@ struct OpenAICompatibleHelper { } // Extract stop sequences from stop conditions - let stopSequences = Self.extractStopSequences(from: request.settings.stopConditions) + guard !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId) else { + throw TachikomaError.invalidConfiguration("\(modelId) does not support streaming") + } + let settings = Self.validatedSettings( + request.settings, + providerName: providerName, + modelId: modelId, + baseURL: baseURL, + ) + let stopSequences = Self.extractStopSequences(from: settings.stopConditions) // Convert request to OpenAI-compatible format let openAIRequest = try OpenAIChatRequest( model: modelId, - messages: convertMessages(request.messages), - temperature: request.settings.temperature, - maxTokens: request.settings.maxTokens, + messages: convertMessages( + request.messages, + replayOpenRouterReasoningForModel: providerName == "OpenRouter" ? modelId : nil, + replayOpenRouterReasoningForBaseURL: providerName == "OpenRouter" ? baseURL : nil, + ), + temperature: settings.temperature, + maxTokens: settings.maxTokens, tools: request.tools?.compactMap { try self.convertTool($0) }, stream: true, stop: stopSequences.isEmpty ? nil : stopSequences, @@ -346,8 +365,10 @@ struct OpenAICompatibleHelper { } } - if choice.finishReason != nil { - continuation.yield(TextStreamDelta.done()) + if let finishReason = choice.finishReason { + continuation.yield(TextStreamDelta.done( + finishReason: Self.mapFinishReason(finishReason), + )) break } } @@ -427,9 +448,10 @@ struct OpenAICompatibleHelper { } if let finishReason = choice.finishReason { - if finishReason == "stop" || finishReason == "tool_calls" { - continuation.yield(TextStreamDelta.done()) - } + continuation.yield(TextStreamDelta.done( + finishReason: Self.mapFinishReason(finishReason), + )) + break } } } catch { @@ -456,6 +478,39 @@ struct OpenAICompatibleHelper { // MARK: - Helper Methods + private static func mapFinishReason(_ reason: String?) -> FinishReason? { + switch reason { + case "stop": .stop + case "length": .length + case "tool_calls": .toolCalls + case "content_filter": .contentFilter + case nil: nil + default: .other + } + } + + private static func validatedSettings( + _ settings: GenerationSettings, + providerName: String, + modelId: String, + baseURL: String, + ) + -> GenerationSettings + { + settings.validated(for: self.languageModel(providerName: providerName, modelId: modelId, baseURL: baseURL)) + } + + private static func languageModel(providerName: String, modelId: String, baseURL: String) -> LanguageModel { + switch providerName.lowercased() { + case "openrouter": + .openRouter(modelId: modelId) + case "together": + .together(modelId: modelId) + default: + .openaiCompatible(modelId: modelId, baseURL: baseURL) + } + } + /// Extract native stop sequences from stop conditions private static func extractStopSequences(from stopCondition: (any StopCondition)?) -> [String] { // Extract native stop sequences from stop conditions @@ -512,18 +567,55 @@ struct OpenAICompatibleHelper { } } - private static func convertMessages(_ messages: [ModelMessage]) throws -> [OpenAIChatMessage] { - messages.map { message in + private static func convertMessages( + _ messages: [ModelMessage], + replayOpenRouterReasoningForModel modelId: String?, + replayOpenRouterReasoningForBaseURL baseURL: String?, + ) throws + -> [OpenAIChatMessage] + { + var converted: [OpenAIChatMessage] = [] + var pendingReasoningDetails: [JSONValue] = [] + var pendingReasoningText: [String] = [] + let endpointIdentity = ReasoningEndpointIdentity.canonical(baseURL) + + for message in messages { + if + message.channel == .thinking, + let customData = message.metadata?.customData, + customData["tachikoma.reasoning.provider"] == "openrouter", + customData["tachikoma.reasoning.model"] == modelId, + customData["tachikoma.reasoning.base_url"] == endpointIdentity, + let rawReasoningDetails = customData["openrouter.reasoning_details"] + { + pendingReasoningDetails.append(contentsOf: Self.decodeReasoningDetails(rawReasoningDetails)) + continue + } + if + message.channel == .thinking, + let customData = message.metadata?.customData, + customData["tachikoma.reasoning.provider"] == "openrouter", + customData["tachikoma.reasoning.model"] == modelId, + customData["tachikoma.reasoning.base_url"] == endpointIdentity, + let reasoning = customData["openrouter.reasoning"] + { + pendingReasoningText.append(reasoning) + continue + } + if message.channel == .thinking { + continue + } + switch message.role { case .system: - return OpenAIChatMessage(role: "system", content: message.content.compactMap { part in + converted.append(OpenAIChatMessage(role: "system", content: message.content.compactMap { part in if case let .text(text) = part { return text } return nil - }.joined()) + }.joined())) case .user: if message.content.count == 1, case let .text(text) = message.content.first! { // Simple text message - return OpenAIChatMessage(role: "user", content: text) + converted.append(OpenAIChatMessage(role: "user", content: text)) } else { // Multi-modal message let content = message.content.compactMap { contentPart -> OpenAIChatMessageContent? in @@ -540,7 +632,7 @@ struct OpenAICompatibleHelper { return nil // Skip tool calls and results in user messages } } - return OpenAIChatMessage(role: "user", content: content) + converted.append(OpenAIChatMessage(role: "user", content: content)) } case .assistant: // Check if this assistant message contains tool calls @@ -571,15 +663,25 @@ struct OpenAICompatibleHelper { // If we have tool calls, create a message with tool calls if !toolCalls.isEmpty { - return OpenAIChatMessage( + converted.append(OpenAIChatMessage( role: "assistant", content: textContent.isEmpty ? nil : textContent, toolCalls: toolCalls, - ) + reasoning: pendingReasoningText.isEmpty ? nil : pendingReasoningText.joined(separator: "\n"), + reasoningDetails: pendingReasoningDetails.isEmpty ? nil : pendingReasoningDetails, + )) } else { // Regular text message - return OpenAIChatMessage(role: "assistant", content: textContent) + converted.append(OpenAIChatMessage( + role: "assistant", + content: textContent, + toolCalls: nil, + reasoning: pendingReasoningText.isEmpty ? nil : pendingReasoningText.joined(separator: "\n"), + reasoningDetails: pendingReasoningDetails.isEmpty ? nil : pendingReasoningDetails, + )) } + pendingReasoningText.removeAll() + pendingReasoningDetails.removeAll() case .tool: // Extract tool call ID and result content from tool result var toolCallId: String? @@ -598,9 +700,44 @@ struct OpenAICompatibleHelper { } } - return OpenAIChatMessage(role: "tool", content: resultContent, toolCallId: toolCallId) + converted.append(OpenAIChatMessage(role: "tool", content: resultContent, toolCallId: toolCallId)) } } + + return converted + } + + private static func reasoningBlocks(from message: OpenAIChatResponse.Message) -> [ProviderReasoningBlock] { + var blocks: [ProviderReasoningBlock] = [] + if let details = message.reasoningDetails, !details.isEmpty { + blocks.append(ProviderReasoningBlock( + text: message.reasoning ?? "", + type: "openrouter_reasoning_details", + rawJSON: Self.encodeReasoningDetails(details), + )) + } else if let reasoning = message.reasoning, !reasoning.isEmpty { + blocks.append(ProviderReasoningBlock( + text: reasoning, + type: "openrouter_reasoning", + rawJSON: nil, + )) + } + return blocks + } + + private static func encodeReasoningDetails(_ details: [JSONValue]) -> String? { + guard let data = try? JSONEncoder().encode(details) else { return nil } + return String(data: data, encoding: .utf8) + } + + private static func decodeReasoningDetails(_ rawJSON: String) -> [JSONValue] { + guard + let data = rawJSON.data(using: .utf8), + let details = try? JSONDecoder().decode([JSONValue].self, from: data) else + { + return [] + } + return details } private static func convertTool(_ tool: AgentTool) throws -> OpenAITool { diff --git a/Sources/Tachikoma/Core/StopConditions.swift b/Sources/Tachikoma/Core/StopConditions.swift index 9e9b232..09fcf6b 100644 --- a/Sources/Tachikoma/Core/StopConditions.swift +++ b/Sources/Tachikoma/Core/StopConditions.swift @@ -13,6 +13,17 @@ public protocol StopCondition: Sendable { func reset() async } +@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) +protocol StableCacheKeyStopCondition { + var stableCacheKey: String? { get } +} + +@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) +private func compositeStableCacheKey(kind: String, children: [String]) -> String { + let encodedChildren = children.map { "\($0.utf8.count):\($0)" }.joined() + return "\(kind):[\(encodedChildren)]" +} + // MARK: - Built-in Stop Conditions /// Stop when a specific string is encountered @@ -39,6 +50,13 @@ public struct StringStopCondition: StopCondition { public func reset() async {} } +@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) +extension StringStopCondition: StableCacheKeyStopCondition { + var stableCacheKey: String? { + "string:\(self.caseSensitive):\(self.stopString)" + } +} + /// Stop when a regex pattern is matched @available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) public struct RegexStopCondition: StopCondition { @@ -82,6 +100,13 @@ public struct RegexStopCondition: StopCondition { } } +@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) +extension RegexStopCondition: StableCacheKeyStopCondition { + var stableCacheKey: String? { + "regex:\(self.pattern)" + } +} + /// Stop after a certain number of tokens @available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) public actor TokenCountStopCondition: StopCondition { @@ -182,6 +207,15 @@ public struct AnyStopCondition: StopCondition { } } +@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) +extension AnyStopCondition: StableCacheKeyStopCondition { + var stableCacheKey: String? { + let keys = self.conditions.compactMap { ($0 as? StableCacheKeyStopCondition)?.stableCacheKey } + guard keys.count == self.conditions.count else { return nil } + return compositeStableCacheKey(kind: "any", children: keys) + } +} + /// Stop when all conditions are met @available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) public struct AllStopCondition: StopCondition { @@ -211,6 +245,15 @@ public struct AllStopCondition: StopCondition { } } +@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) +extension AllStopCondition: StableCacheKeyStopCondition { + var stableCacheKey: String? { + let keys = self.conditions.compactMap { ($0 as? StableCacheKeyStopCondition)?.stableCacheKey } + guard keys.count == self.conditions.count else { return nil } + return compositeStableCacheKey(kind: "all", children: keys) + } +} + // MARK: - Stateful Stop Conditions /// Stop when a pattern appears consecutively N times @@ -386,6 +429,13 @@ public struct NeverStopCondition: StopCondition { public func reset() async {} } +@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) +extension NeverStopCondition: StableCacheKeyStopCondition { + var stableCacheKey: String? { + "never" + } +} + // MARK: - Integration with Generation Functions @available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) @@ -431,9 +481,8 @@ extension AsyncThrowingStream where Element == TextStreamDelta { // Check stop condition if await condition.shouldStop(text: accumulatedText, delta: content) { - // Yield the current delta then stop continuation.yield(delta) - continuation.yield(TextStreamDelta.done()) + continuation.yield(TextStreamDelta.done(finishReason: .stop)) continuation.finish() return } diff --git a/Sources/Tachikoma/Core/Types.swift b/Sources/Tachikoma/Core/Types.swift index 7d16a88..eb96c8c 100644 --- a/Sources/Tachikoma/Core/Types.swift +++ b/Sources/Tachikoma/Core/Types.swift @@ -326,6 +326,11 @@ public enum ImageInput: Sendable { /// Settings for text generation @available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) public struct GenerationSettings: Sendable { + public enum StreamBufferingMode: String, Sendable, Codable { + case incremental + case untilTerminal + } + public let maxTokens: Int? public let temperature: Double? public let topP: Double? @@ -337,6 +342,7 @@ public struct GenerationSettings: Sendable { public let stopConditions: (any StopCondition)? public let seed: Int? public let providerOptions: ProviderOptions + public let streamBuffering: StreamBufferingMode public init( maxTokens: Int? = nil, @@ -350,6 +356,7 @@ public struct GenerationSettings: Sendable { stopConditions: (any StopCondition)? = nil, seed: Int? = nil, providerOptions: ProviderOptions = .init(), + streamBuffering: StreamBufferingMode = .incremental, ) { self.maxTokens = maxTokens self.temperature = temperature @@ -362,9 +369,27 @@ public struct GenerationSettings: Sendable { self.stopConditions = stopConditions self.seed = seed self.providerOptions = providerOptions + self.streamBuffering = streamBuffering } public static let `default` = GenerationSettings() + + public func withStreamBuffering(_ mode: StreamBufferingMode) -> GenerationSettings { + GenerationSettings( + maxTokens: self.maxTokens, + temperature: self.temperature, + topP: self.topP, + topK: self.topK, + frequencyPenalty: self.frequencyPenalty, + presencePenalty: self.presencePenalty, + stopSequences: self.stopSequences, + reasoningEffort: self.reasoningEffort, + stopConditions: self.stopConditions, + seed: self.seed, + providerOptions: self.providerOptions, + streamBuffering: mode, + ) + } } /// Manual Codable conformance excluding non-codable stopConditions @@ -380,6 +405,7 @@ extension GenerationSettings: Codable { case reasoningEffort case seed case providerOptions + case streamBuffering } public init(from decoder: Decoder) throws { @@ -394,6 +420,8 @@ extension GenerationSettings: Codable { self.reasoningEffort = try container.decodeIfPresent(ReasoningEffort.self, forKey: .reasoningEffort) self.seed = try container.decodeIfPresent(Int.self, forKey: .seed) self.providerOptions = try container.decodeIfPresent(ProviderOptions.self, forKey: .providerOptions) ?? .init() + self.streamBuffering = try container + .decodeIfPresent(StreamBufferingMode.self, forKey: .streamBuffering) ?? .incremental self.stopConditions = nil // Can't decode function types } @@ -409,6 +437,7 @@ extension GenerationSettings: Codable { try container.encodeIfPresent(self.reasoningEffort, forKey: .reasoningEffort) try container.encodeIfPresent(self.seed, forKey: .seed) try container.encode(self.providerOptions, forKey: .providerOptions) + try container.encode(self.streamBuffering, forKey: .streamBuffering) // Don't encode stopConditions since it can't be serialized } } diff --git a/Sources/Tachikoma/Core/UIIntegration.swift b/Sources/Tachikoma/Core/UIIntegration.swift index 34c7ac1..0354820 100644 --- a/Sources/Tachikoma/Core/UIIntegration.swift +++ b/Sources/Tachikoma/Core/UIIntegration.swift @@ -132,7 +132,9 @@ extension [ModelMessage] { /// Convert model messages to UI messages for display public func toUIMessages() -> [UIMessage] { // Convert model messages to UI messages for display - map { modelMessage in + compactMap { modelMessage in + guard !modelMessage.isProviderNativeReasoningBlock else { return nil } + guard !modelMessage.isSyntheticReasoningBoundary else { return nil } var content = "" var attachments: [UIAttachment] = [] var toolCalls: [AgentToolCall] = [] @@ -179,6 +181,20 @@ extension [ModelMessage] { } } +extension ModelMessage { + fileprivate var isProviderNativeReasoningBlock: Bool { + guard channel == .thinking, let customData = metadata?.customData else { return false } + return customData["anthropic.thinking.model"] != nil || + customData["anthropic.thinking.type"] != nil || + customData["anthropic.thinking.signature"] != nil || + customData["tachikoma.reasoning.provider"] != nil + } + + fileprivate var isSyntheticReasoningBoundary: Bool { + metadata?.customData?["tachikoma.internal.boundary"] == "reasoning_only" + } +} + // MARK: - Streaming Extensions @available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) diff --git a/Sources/Tachikoma/Models/Model.swift b/Sources/Tachikoma/Models/Model.swift index 4dcb49c..2569377 100644 --- a/Sources/Tachikoma/Models/Model.swift +++ b/Sources/Tachikoma/Models/Model.swift @@ -1,7 +1,5 @@ import Foundation -// swiftlint:disable file_length - // MARK: - Modern Language Model System /// Language model selection following AI SDK patterns @@ -156,7 +154,8 @@ public enum LanguageModel: Sendable, CustomStringConvertible, Hashable { } public enum Anthropic: Sendable, Hashable, CaseIterable { - // Claude 4.x / 4.5+ Series + // Claude 5 / 4.x Series + case fable5 case opus48 case opus47 case opus45 @@ -170,6 +169,7 @@ public enum LanguageModel: Sendable, CustomStringConvertible, Hashable { public static var allCases: [Anthropic] { [ + .fable5, .opus48, .opus47, .opus45, @@ -183,6 +183,7 @@ public enum LanguageModel: Sendable, CustomStringConvertible, Hashable { public var modelId: String { switch self { case let .custom(id): id + case .fable5: "claude-fable-5" case .opus48: "claude-opus-4-8" case .opus47: "claude-opus-4-7" case .opus45: "claude-opus-4-5" @@ -195,7 +196,7 @@ public enum LanguageModel: Sendable, CustomStringConvertible, Hashable { public var supportsVision: Bool { switch self { - case .opus48, .opus47, .opus45, .opus4, .sonnet46, .sonnet45, .haiku45: + case .fable5, .opus48, .opus47, .opus45, .opus4, .sonnet46, .sonnet45, .haiku45: true case .custom: true // Assume custom models support vision } @@ -217,12 +218,82 @@ public enum LanguageModel: Sendable, CustomStringConvertible, Hashable { public var contextLength: Int { switch self { - case .opus48, .opus47, .sonnet46: 1_000_000 + case .fable5, .opus48, .opus47, .sonnet46: 1_000_000 case .haiku45: 200_000 case .opus45, .opus4, .sonnet45: 500_000 - case .custom: 200_000 // Default assumption + case let .custom(id): + Self.isFable(modelId: id) ? 1_000_000 : 200_000 // Default assumption } } + + public var maxOutputTokens: Int { + switch self { + case .fable5, .opus48, .opus47: 128_000 + case .sonnet46, .haiku45: 64000 + case let .custom(id): + Self.isFable(modelId: id) ? 128_000 : 8192 + case .opus45, .opus4, .sonnet45: 4096 + } + } + + public var supportsStreaming: Bool { + !Self.hasStreamingRefusalRisk(modelId: self.modelId) + } + + public static func isFable(modelId: String) -> Bool { + let normalized = modelId.lowercased() + let pathSegments = normalized + .components(separatedBy: CharacterSet(charactersIn: "/:@")) + .filter { !$0.isEmpty } + let dotSegments = pathSegments.flatMap { $0.components(separatedBy: ".") } + .filter { !$0.isEmpty } + let segments = pathSegments + dotSegments + let canonicalSegments: Set = [ + "claude-fable-5", + "fable-5", + "fable5", + "fable", + ] + return normalized == Self.fable5.modelId || segments.contains { segment in + if canonicalSegments.contains(segment) { return true } + let compactSegment = segment + .replacingOccurrences(of: "-", with: "") + .replacingOccurrences(of: "_", with: "") + .replacingOccurrences(of: ".", with: "") + return compactSegment == "claudefable5" || compactSegment == "fable5" + } + } + + public static func isOpus48(modelId: String) -> Bool { + let normalized = modelId.lowercased() + let compactExact = normalized + .replacingOccurrences(of: "-", with: "") + .replacingOccurrences(of: "_", with: "") + .replacingOccurrences(of: ".", with: "") + let pathSegments = normalized + .components(separatedBy: CharacterSet(charactersIn: "/:@")) + .filter { !$0.isEmpty } + let dotSegments = pathSegments.flatMap { $0.components(separatedBy: ".") } + .filter { !$0.isEmpty } + let segments = pathSegments + dotSegments + let canonicalSegments: Set = [ + "claude-opus-4-8", + "opus-4-8", + "opus48", + ] + return normalized == Self.opus48.modelId || segments.contains { segment in + if canonicalSegments.contains(segment) { return true } + let compactSegment = segment + .replacingOccurrences(of: "-", with: "") + .replacingOccurrences(of: "_", with: "") + .replacingOccurrences(of: ".", with: "") + return compactSegment == "claudeopus48" || compactSegment == "opus48" + } || compactExact == "claudeopus48" || compactExact == "opus48" + } + + public static func hasStreamingRefusalRisk(modelId: String) -> Bool { + self.isFable(modelId: modelId) || self.isOpus48(modelId: modelId) + } } public enum Google: String, Sendable, Hashable, CaseIterable { @@ -783,8 +854,40 @@ public enum LanguageModel: Sendable, CustomStringConvertible, Hashable { } public var supportsStreaming: Bool { - // All models support streaming by default - true + if case let .anthropic(model) = self { + return model.supportsStreaming + } + if case let .anthropicCompatible(modelId, _) = self { + return !Anthropic.hasStreamingRefusalRisk(modelId: modelId) + } + if case let .openRouter(modelId) = self, modelId.lowercased().hasPrefix("anthropic/") { + return !Anthropic.hasStreamingRefusalRisk(modelId: modelId) + } + if case let .together(modelId) = self, modelId.lowercased().hasPrefix("anthropic/") { + return !Anthropic.hasStreamingRefusalRisk(modelId: modelId) + } + if case let .openaiCompatible(modelId, _) = self { + let normalized = modelId.lowercased() + guard + normalized.contains("claude") || + normalized.hasPrefix("anthropic/") || + normalized.hasPrefix("anthropic.") else + { + return true + } + return !Anthropic.hasStreamingRefusalRisk(modelId: modelId) + } + if + case let .custom(provider) = self, + let parsed = ProviderParser.parse(provider.modelId), + CustomProviderRegistry.shared.get(parsed.provider)?.kind == .anthropic + { + return !Anthropic.hasStreamingRefusalRisk(modelId: parsed.model) + } + if case let .custom(provider) = self { + return provider.capabilities.supportsStreaming + } + return true } public var providerName: String { @@ -829,10 +932,11 @@ public enum LanguageModel: Sendable, CustomStringConvertible, Hashable { // MARK: - Default Model public static let `default`: LanguageModel = .anthropic(.opus48) + public static let defaultStreaming: LanguageModel = .openai(.gpt55) // MARK: - Convenience Static Properties - /// Default Claude model (opus48) + /// Default Claude model (Opus 4.8) public static let claude: LanguageModel = .anthropic(.opus48) /// Default Grok model (Grok 4.3) @@ -967,10 +1071,14 @@ extension LanguageModel { model.contextLength case .azureOpenAI: 128_000 // conservative default matching OpenAI tier - case .openRouter, .together, .replicate: - 128_000 // Common default - case .openaiCompatible, .anthropicCompatible: + case let .openRouter(modelId), let .together(modelId): + Anthropic.isFable(modelId: modelId) ? 1_000_000 : 128_000 + case .replicate: 128_000 // Common default + case let .openaiCompatible(modelId, _): + Anthropic.isFable(modelId: modelId) ? 1_000_000 : 128_000 + case let .anthropicCompatible(modelId, _): + Anthropic.isFable(modelId: modelId) ? 1_000_000 : 128_000 case let .custom(provider): provider.capabilities.contextLength } @@ -1224,6 +1332,13 @@ extension LanguageModel { // MARK: Anthropic models + func matchesExactAlias(_ aliases: Set, compactAliases: Set = []) -> Bool { + aliases.contains(normalized) || + aliases.contains(dashed) || + aliases.contains(dotted) || + compactAliases.contains(compact) + } + if dotted.contains("claude-3") || compact.contains("claude3") { return nil } @@ -1237,12 +1352,31 @@ extension LanguageModel { } if - dotted.contains("claude-opus-4-8") || - dotted.contains("claude-opus-4.8") || - compact.contains("claudeopus48") || - dotted.contains("opus-4-8") || - dotted.contains("opus-4.8") || - compact.contains("opus48") + matchesExactAlias( + [ + "claude-fable-5", + "fable-5", + "fable.5", + "fable5", + "fable", + ], + compactAliases: ["claudefable5", "fable5"], + ) + { + return .anthropic(.fable5) + } + + if + matchesExactAlias( + [ + "claude-opus-4-8", + "claude-opus-4.8", + "opus-4-8", + "opus-4.8", + "opus48", + ], + compactAliases: ["claudeopus48", "opus48"], + ) { return .anthropic(.opus48) } @@ -1563,8 +1697,8 @@ extension LanguageModel { } private static func looksAnthropic(_ normalized: String) -> Bool { - normalized.contains("claude") || normalized.contains("opus") || normalized.contains("sonnet") || - normalized.contains("haiku") || normalized == "anthropic" + normalized.contains("claude") || normalized.contains("fable") || normalized.contains("opus") || + normalized.contains("sonnet") || normalized.contains("haiku") || normalized == "anthropic" } private static func looksGoogle(_ normalized: String) -> Bool { @@ -1660,5 +1794,3 @@ extension LanguageModel { } } } - -// swiftlint:enable file_length diff --git a/Sources/Tachikoma/Models/ModelProvider.swift b/Sources/Tachikoma/Models/ModelProvider.swift index 717cbca..0741977 100644 --- a/Sources/Tachikoma/Models/ModelProvider.swift +++ b/Sources/Tachikoma/Models/ModelProvider.swift @@ -84,6 +84,27 @@ public struct ProviderResponse: Sendable { public let usage: Usage? public let finishReason: FinishReason? public let toolCalls: [AgentToolCall]? + public let reasoning: [ProviderReasoningBlock] + public let assistantMessages: [ModelMessage] + public let isBillable: Bool + + public init( + text: String, + usage: Usage? = nil, + finishReason: FinishReason? = nil, + toolCalls: [AgentToolCall]? = nil, + reasoning: [ProviderReasoningBlock] = [], + assistantMessages: [ModelMessage] = [], + isBillable: Bool = true, + ) { + self.text = text + self.usage = usage + self.finishReason = finishReason + self.toolCalls = toolCalls + self.reasoning = reasoning + self.assistantMessages = assistantMessages + self.isBillable = isBillable + } public init( text: String, @@ -91,9 +112,30 @@ public struct ProviderResponse: Sendable { finishReason: FinishReason? = nil, toolCalls: [AgentToolCall]? = nil, ) { - self.text = text - self.usage = usage - self.finishReason = finishReason - self.toolCalls = toolCalls + self.init( + text: text, + usage: usage, + finishReason: finishReason, + toolCalls: toolCalls, + reasoning: [], + assistantMessages: [], + isBillable: true, + ) + } +} + +/// Provider-native signed reasoning block that must be replayed in later requests. +@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) +public struct ProviderReasoningBlock: Sendable, Equatable { + public let text: String + public let signature: String? + public let type: String + public let rawJSON: String? + + public init(text: String, signature: String? = nil, type: String = "thinking", rawJSON: String? = nil) { + self.text = text + self.signature = signature + self.type = type + self.rawJSON = rawJSON } } diff --git a/Sources/Tachikoma/Models/ModelSelection.swift b/Sources/Tachikoma/Models/ModelSelection.swift index 60a3a22..2b7522d 100644 --- a/Sources/Tachikoma/Models/ModelSelection.swift +++ b/Sources/Tachikoma/Models/ModelSelection.swift @@ -162,6 +162,8 @@ public struct ModelSelector { private static func parseAnthropicModel(_ input: String) -> Model.Anthropic? { switch input { // Direct matches + case "claude-fable-5", "claude-fable-5-latest", "fable-5", "fable.5", "fable5", "fable": + return .fable5 case "claude-opus-4-8", "claude-opus-4.8", "opus-4-8", "opus-4.8", "opus48", "claude-opus-4-8-latest": return .opus48 @@ -183,7 +185,7 @@ public struct ModelSelector { case "claude-haiku", "haiku": return .haiku45 case "anthropic": - return .opus48 // Default Anthropic model + return .opus48 default: // Check if it's a Claude model ID if self.isUnsupportedLegacyAnthropicModel(input) { @@ -518,6 +520,7 @@ public func getAllAvailableModels() -> String { output += "\nShortcuts:\n" output += " • claude, claude-opus, opus → claude-opus-4-8\n" + output += " • fable → claude-fable-5\n" output += " • gpt → gpt-5.5\n" output += " • gemini → gemini-3.5-flash\n" output += " • minimax → MiniMax-M2.7\n" @@ -526,7 +529,7 @@ public func getAllAvailableModels() -> String { output += " • llama, llama3 → llama3.3\n" output += "\nCustom Models:\n" - output += " • OpenRouter: anthropic/claude-opus-4-8\n" + output += " • OpenRouter: anthropic/claude-fable-5\n" output += " • Custom OpenAI: custom-gpt-model\n" output += " • Local Ollama: any-model:tag\n" diff --git a/Sources/Tachikoma/Providers/Anthropic/AnthropicTypes.swift b/Sources/Tachikoma/Providers/Anthropic/AnthropicTypes.swift index 6f497e6..0aca8e9 100644 --- a/Sources/Tachikoma/Providers/Anthropic/AnthropicTypes.swift +++ b/Sources/Tachikoma/Providers/Anthropic/AnthropicTypes.swift @@ -139,13 +139,11 @@ enum AnthropicContent: Codable { struct RedactedThinkingContent: Codable { let type: String - let redactedThinking: String - let signature: String + let data: String enum CodingKeys: String, CodingKey { case type - case redactedThinking = "redacted_thinking" - case signature + case data } } @@ -402,12 +400,14 @@ struct AnthropicMessageResponse: Codable { let model: String let stopReason: String? let stopSequence: String? + let stopDetails: StopDetails? let usage: AnthropicUsage enum CodingKeys: String, CodingKey { case id, type, role, content, model, usage case stopReason = "stop_reason" case stopSequence = "stop_sequence" + case stopDetails = "stop_details" } init(from decoder: Decoder) throws { @@ -419,12 +419,20 @@ struct AnthropicMessageResponse: Codable { self.model = try container.decode(String.self, forKey: .model) self.stopReason = try container.decodeIfPresent(String.self, forKey: .stopReason) self.stopSequence = try container.decodeIfPresent(String.self, forKey: .stopSequence) + self.stopDetails = try container.decodeIfPresent(StopDetails.self, forKey: .stopDetails) self.usage = try container.decode(AnthropicUsage.self, forKey: .usage) } + + struct StopDetails: Codable { + let category: String? + let explanation: String? + } } enum AnthropicResponseContent: Codable { case text(TextContent) + case thinking(ThinkingContent) + case redactedThinking(RedactedThinkingContent) case toolUse(ToolUseContent) struct TextContent: Codable { @@ -461,6 +469,22 @@ enum AnthropicResponseContent: Codable { } } + struct ThinkingContent: Codable { + let type: String + let thinking: String + let signature: String + } + + struct RedactedThinkingContent: Codable { + let type: String + let data: String + + enum CodingKeys: String, CodingKey { + case type + case data + } + } + struct ToolUseContent: Codable { let type: String let id: String @@ -540,6 +564,10 @@ enum AnthropicResponseContent: Codable { switch type { case "text": self = try .text(TextContent(from: decoder)) + case "thinking": + self = try .thinking(ThinkingContent(from: decoder)) + case "redacted_thinking": + self = try .redactedThinking(RedactedThinkingContent(from: decoder)) case "tool_use": self = try .toolUse(ToolUseContent(from: decoder)) default: @@ -558,6 +586,10 @@ enum AnthropicResponseContent: Codable { switch self { case let .text(content): try content.encode(to: encoder) + case let .thinking(content): + try content.encode(to: encoder) + case let .redactedThinking(content): + try content.encode(to: encoder) case let .toolUse(content): try content.encode(to: encoder) } @@ -567,6 +599,8 @@ enum AnthropicResponseContent: Codable { case type case text case thinking + case redactedThinking = "redacted_thinking" + case signature } } @@ -578,6 +612,17 @@ struct AnthropicUsage: Codable { case inputTokens = "input_tokens" case outputTokens = "output_tokens" } + + init(inputTokens: Int, outputTokens: Int) { + self.inputTokens = inputTokens + self.outputTokens = outputTokens + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + self.inputTokens = try container.decodeIfPresent(Int.self, forKey: .inputTokens) ?? 0 + self.outputTokens = try container.decodeIfPresent(Int.self, forKey: .outputTokens) ?? 0 + } } // MARK: - Streaming Types @@ -612,12 +657,11 @@ struct AnthropicStreamContentBlock: Codable { let text: String? let input: Any? let thinking: String? - let redactedThinking: String? + let data: String? let signature: String? enum CodingKeys: String, CodingKey { - case type, id, name, text, input, thinking, signature - case redactedThinking = "redacted_thinking" + case type, id, name, text, input, thinking, data, signature } init(from decoder: Decoder) throws { @@ -627,7 +671,7 @@ struct AnthropicStreamContentBlock: Codable { self.name = try? container.decode(String.self, forKey: .name) self.text = try? container.decode(String.self, forKey: .text) self.thinking = try? container.decode(String.self, forKey: .thinking) - self.redactedThinking = try? container.decode(String.self, forKey: .redactedThinking) + self.data = try? container.decode(String.self, forKey: .data) self.signature = try? container.decode(String.self, forKey: .signature) // Decode input as generic JSON if present @@ -653,7 +697,7 @@ struct AnthropicStreamContentBlock: Codable { try container.encodeIfPresent(self.name, forKey: .name) try container.encodeIfPresent(self.text, forKey: .text) try container.encodeIfPresent(self.thinking, forKey: .thinking) - try container.encodeIfPresent(self.redactedThinking, forKey: .redactedThinking) + try container.encodeIfPresent(self.data, forKey: .data) try container.encodeIfPresent(self.signature, forKey: .signature) if let input { let data = try JSONSerialization.data(withJSONObject: input) @@ -677,6 +721,17 @@ struct AnthropicStreamDelta: Codable { case stopReason = "stop_reason" case stopSequence = "stop_sequence" } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + self.type = try container.decodeIfPresent(String.self, forKey: .type) ?? "" + self.text = try container.decodeIfPresent(String.self, forKey: .text) + self.thinking = try container.decodeIfPresent(String.self, forKey: .thinking) + self.signature = try container.decodeIfPresent(String.self, forKey: .signature) + self.partialJson = try container.decodeIfPresent(String.self, forKey: .partialJson) + self.stopReason = try container.decodeIfPresent(String.self, forKey: .stopReason) + self.stopSequence = try container.decodeIfPresent(String.self, forKey: .stopSequence) + } } struct AnthropicErrorResponse: Codable { diff --git a/Sources/Tachikoma/Providers/Compatible/AnthropicCompatibleProvider.swift b/Sources/Tachikoma/Providers/Compatible/AnthropicCompatibleProvider.swift index 0770121..3990b26 100644 --- a/Sources/Tachikoma/Providers/Compatible/AnthropicCompatibleProvider.swift +++ b/Sources/Tachikoma/Providers/Compatible/AnthropicCompatibleProvider.swift @@ -10,6 +10,8 @@ public final class AnthropicCompatibleProvider: ModelProvider { public let capabilities: ModelCapabilities private let configuration: TachikomaConfiguration private let auth: TKAuthValue? + private let reasoningProvider: String + private let reasoningBaseURL: String? public init( modelId: String, @@ -19,11 +21,16 @@ public final class AnthropicCompatibleProvider: ModelProvider { additionalHeaders: [String: String] = [:], auth: TKAuthValue? = nil, capabilities: ModelCapabilities? = nil, + reasoningProvider: String = "anthropic-compatible", + reasoningBaseURL: String? = nil, + includeReasoningBaseURL: Bool = true, ) throws { self.modelId = modelId self.baseURL = baseURL self.configuration = configuration self.additionalHeaders = additionalHeaders + self.reasoningProvider = reasoningProvider + self.reasoningBaseURL = includeReasoningBaseURL ? (reasoningBaseURL ?? baseURL) : nil // Try explicit provider key, then configuration, then common environment variable patterns. if let key = apiKey { @@ -51,12 +58,24 @@ public final class AnthropicCompatibleProvider: ModelProvider { self.auth = nil } - self.capabilities = capabilities ?? ModelCapabilities( + let isFable = LanguageModel.Anthropic.isFable(modelId: modelId) + let supportsSafeStreaming = !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId) + let baseCapabilities = capabilities ?? ModelCapabilities( supportsVision: true, supportsTools: true, - supportsStreaming: true, - contextLength: 200_000, - maxOutputTokens: 8192, + supportsStreaming: supportsSafeStreaming, + contextLength: isFable ? 1_000_000 : 200_000, + maxOutputTokens: isFable ? 128_000 : 8192, + ) + self.capabilities = supportsSafeStreaming ? baseCapabilities : ModelCapabilities( + supportsVision: baseCapabilities.supportsVision, + supportsTools: baseCapabilities.supportsTools, + supportsStreaming: false, + supportsAudioInput: baseCapabilities.supportsAudioInput, + supportsAudioOutput: baseCapabilities.supportsAudioOutput, + contextLength: baseCapabilities.contextLength, + maxOutputTokens: baseCapabilities.maxOutputTokens, + costPerToken: baseCapabilities.costPerToken, ) } @@ -89,6 +108,9 @@ public final class AnthropicCompatibleProvider: ModelProvider { configuration: compatConfig, additionalHeaders: self.additionalHeaders, authOverride: self.auth, + reasoningProvider: self.reasoningProvider, + reasoningModelId: self.modelId, + reasoningBaseURL: self.reasoningBaseURL, ) } } diff --git a/Sources/Tachikoma/Providers/Compatible/OpenAICompatibleProvider.swift b/Sources/Tachikoma/Providers/Compatible/OpenAICompatibleProvider.swift index 81890d8..e88ef03 100644 --- a/Sources/Tachikoma/Providers/Compatible/OpenAICompatibleProvider.swift +++ b/Sources/Tachikoma/Providers/Compatible/OpenAICompatibleProvider.swift @@ -40,12 +40,13 @@ public final class OpenAICompatibleProvider: ModelProvider { self.apiKey = nil // Some compatible APIs don't require keys } + let isFable = LanguageModel.Anthropic.isFable(modelId: modelId) self.capabilities = ModelCapabilities( supportsVision: false, supportsTools: true, - supportsStreaming: true, - contextLength: 128_000, - maxOutputTokens: 4096, + supportsStreaming: !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId), + contextLength: isFable ? 1_000_000 : 128_000, + maxOutputTokens: isFable ? 128_000 : 4096, ) } @@ -63,8 +64,12 @@ public final class OpenAICompatibleProvider: ModelProvider { } public func streamText(request: ProviderRequest) async throws -> AsyncThrowingStream { + guard !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: self.modelId) else { + throw TachikomaError.invalidConfiguration("\(self.modelId) does not support streaming") + } + // Use OpenAI-compatible streaming implementation - try await OpenAICompatibleHelper.streamText( + return try await OpenAICompatibleHelper.streamText( request: request, modelId: self.modelId, baseURL: self.baseURL!, diff --git a/Sources/Tachikoma/Providers/OpenAI/OpenAIResponsesProvider.swift b/Sources/Tachikoma/Providers/OpenAI/OpenAIResponsesProvider.swift index 6ba8848..e40271e 100644 --- a/Sources/Tachikoma/Providers/OpenAI/OpenAIResponsesProvider.swift +++ b/Sources/Tachikoma/Providers/OpenAI/OpenAIResponsesProvider.swift @@ -213,6 +213,21 @@ public final class OpenAIResponsesProvider: ModelProvider { // Parse the entire response for Linux let responseText = String(data: data, encoding: .utf8) ?? "" let lines = responseText.components(separatedBy: "\n") + var streamState = ResponsesStreamState() + for line in lines { + if + try Self.processResponsesStreamLine( + line, + model: self.model, + state: &streamState, + continuation: continuation, + ) + { + return + } + } + continuation.finish() + return #else // macOS/iOS: Use streaming API let (bytes, response) = try await self.session.bytes(for: finalURLRequest) @@ -242,157 +257,18 @@ public final class OpenAIResponsesProvider: ModelProvider { throw TachikomaError.apiError("Failed to start streaming: \(errorMessage)") } - var previousContent = "" // Track previously sent content for GPT-5 preambles - struct PartialToolCall { - var id: String - var name: String? - var arguments: String - } - var pendingToolCalls: [String: PartialToolCall] = [:] + var streamState = ResponsesStreamState() for try await line in bytes.lines { - // Handle SSE format - if line.hasPrefix("data: ") { - let jsonString = String(line.dropFirst(6)) - - if ProcessInfo.processInfo.environment["DEBUG_TACHIKOMA_STREAM"] != nil { - Self.debugLog("raw stream: \(jsonString)") - } - - if jsonString == "[DONE]" { - continuation.finish() - return - } - - if let data = jsonString.data(using: .utf8) { - // Responses API event streams use typed event payloads. - if Self.usesResponsesEventStream(self.model) { - if - let event = try? JSONSerialization.jsonObject(with: data) as? [String: Any], - let eventType = event["type"] as? String - { - if ProcessInfo.processInfo.environment["DEBUG_TACHIKOMA"] != nil { - Self.debugLog("event: \(eventType) payload: \(event)") - } - - switch eventType { - case "response.output_text.delta": - if let delta = event["delta"] as? String, !delta.isEmpty { - continuation.yield(TextStreamDelta.text(delta)) - } - - case "response.output_item.added": - if - let item = event["item"] as? [String: Any], - let itemType = item["type"] as? String, - itemType == "function_call" - { - let identifier = (item["id"] as? String) ?? - (item["call_id"] as? String) ?? UUID().uuidString - var partial = pendingToolCalls[identifier] ?? PartialToolCall( - id: identifier, - name: nil, - arguments: "", - ) - if let name = item["name"] as? String { - partial.name = name - } - pendingToolCalls[identifier] = partial - } - - case "response.function_call_arguments.delta": - if - let itemId = event["item_id"] as? String, - let delta = event["delta"] as? String - { - var partial = pendingToolCalls[itemId] ?? PartialToolCall( - id: itemId, - name: nil, - arguments: "", - ) - partial.arguments.append(delta) - pendingToolCalls[itemId] = partial - } - - case "response.function_call_arguments.done": - if - let itemId = event["item_id"] as? String, - let arguments = event["arguments"] as? String - { - var partial = pendingToolCalls[itemId] ?? PartialToolCall( - id: itemId, - name: nil, - arguments: "", - ) - partial.arguments = arguments - pendingToolCalls[itemId] = partial - - if - let name = partial.name, - let toolCall = Self.makeToolCall( - id: itemId, - name: name, - argumentsJSON: arguments, - ) - { - continuation.yield(.tool(toolCall)) - pendingToolCalls.removeValue(forKey: itemId) - } - } - - case "response.completed": - continuation.finish() - return - - default: - break - } - } - } else { - // Try alternate Responses API delta format. - do { - let chunk = try JSONDecoder().decode( - OpenAIResponsesStreamChunk.self, - from: data, - ) - - // Convert to TextStreamDelta - if - let choice = chunk.choices.first, - let content = choice.delta.content, - !content.isEmpty - { - // Handle accumulated content for models with preambles - if content.hasPrefix(previousContent), !previousContent.isEmpty { - // This is accumulated content, extract just the delta - let delta = String(content.dropFirst(previousContent.count)) - if !delta.isEmpty { - continuation.yield(TextStreamDelta.text(delta)) - previousContent = content // Update the accumulated content - } - } else { - // This is a true delta or the first chunk - continuation.yield(TextStreamDelta.text(content)) - previousContent += content // Accumulate for comparison - } - } - - // Check for finish - if - let choice = chunk.choices.first, - choice.finishReason != nil - { - continuation.finish() - return - } - } catch { - // Ignore parsing errors for incomplete chunks - } - } - } - } else if line.hasPrefix("event: ") { - // Track event types for GPT-5 streaming (but we handle them in data lines) - // This helps us understand the stream structure + if + try Self.processResponsesStreamLine( + line, + model: self.model, + state: &streamState, + continuation: continuation, + ) + { + return } } @@ -405,6 +281,201 @@ public final class OpenAIResponsesProvider: ModelProvider { } } + private struct ResponsesStreamState { + struct PartialToolCall { + var id: String + var name: String? + var arguments: String + } + + var previousContent = "" + var pendingToolCalls: [String: PartialToolCall] = [:] + var didYieldToolCall = false + var didReceiveRefusal = false + } + + private static func processResponsesStreamLine( + _ line: String, + model: LanguageModel.OpenAI, + state: inout ResponsesStreamState, + continuation: AsyncThrowingStream.Continuation, + ) throws + -> Bool + { + guard line.hasPrefix("data: ") else { + return false + } + + let jsonString = String(line.dropFirst(6)) + if ProcessInfo.processInfo.environment["DEBUG_TACHIKOMA_STREAM"] != nil { + Self.debugLog("raw stream: \(jsonString)") + } + + if jsonString == "[DONE]" { + continuation.finish() + return true + } + + guard let data = jsonString.data(using: .utf8) else { + return false + } + + if Self.usesResponsesEventStream(model) { + guard + let event = try? JSONSerialization.jsonObject(with: data) as? [String: Any], + let eventType = event["type"] as? String else + { + return false + } + + if ProcessInfo.processInfo.environment["DEBUG_TACHIKOMA"] != nil { + Self.debugLog("event: \(eventType) payload: \(event)") + } + + switch eventType { + case "response.output_text.delta": + if let delta = event["delta"] as? String, !delta.isEmpty { + continuation.yield(TextStreamDelta.text(delta)) + } + + case "response.output_item.added": + if + let item = event["item"] as? [String: Any], + let itemType = item["type"] as? String, + itemType == "function_call" + { + let identifier = (item["id"] as? String) ?? + (item["call_id"] as? String) ?? UUID().uuidString + var partial = state.pendingToolCalls[identifier] ?? ResponsesStreamState.PartialToolCall( + id: identifier, + name: nil, + arguments: "", + ) + if let name = item["name"] as? String { + partial.name = name + } + state.pendingToolCalls[identifier] = partial + } + + case "response.function_call_arguments.delta": + if + let itemId = event["item_id"] as? String, + let delta = event["delta"] as? String + { + var partial = state.pendingToolCalls[itemId] ?? ResponsesStreamState.PartialToolCall( + id: itemId, + name: nil, + arguments: "", + ) + partial.arguments.append(delta) + state.pendingToolCalls[itemId] = partial + } + + case "response.function_call_arguments.done": + if + let itemId = event["item_id"] as? String, + let arguments = event["arguments"] as? String + { + var partial = state.pendingToolCalls[itemId] ?? ResponsesStreamState.PartialToolCall( + id: itemId, + name: nil, + arguments: "", + ) + partial.arguments = arguments + state.pendingToolCalls[itemId] = partial + + if + let name = partial.name, + let toolCall = Self.makeToolCall(id: itemId, name: name, argumentsJSON: arguments) + { + continuation.yield(.tool(toolCall)) + state.didYieldToolCall = true + state.pendingToolCalls.removeValue(forKey: itemId) + } + } + + case "response.refusal.delta", + "response.refusal.done": + state.didReceiveRefusal = true + + case "response.completed": + let finishReason: FinishReason = state.didReceiveRefusal + ? .contentFilter + : (state.didYieldToolCall ? .toolCalls : .stop) + continuation.yield(.done(finishReason: finishReason)) + continuation.finish() + return true + + case "response.incomplete": + let finishReason = Self.finishReasonForIncompleteResponseEvent(event) + continuation.yield(.done(finishReason: finishReason)) + continuation.finish() + return true + + case "response.failed", + "error": + throw TachikomaError.apiError(Self.errorMessageForResponseStreamEvent(event)) + + default: + break + } + return false + } + + do { + let chunk = try JSONDecoder().decode(OpenAIResponsesStreamChunk.self, from: data) + if + let choice = chunk.choices.first, + let content = choice.delta.content, + !content.isEmpty + { + if content.hasPrefix(state.previousContent), !state.previousContent.isEmpty { + let delta = String(content.dropFirst(state.previousContent.count)) + if !delta.isEmpty { + continuation.yield(TextStreamDelta.text(delta)) + state.previousContent = content + } + } else { + continuation.yield(TextStreamDelta.text(content)) + state.previousContent += content + } + } + + if let choice = chunk.choices.first, let finishReason = choice.finishReason { + continuation.yield(.done(finishReason: Self.finishReasonForChatStream(finishReason))) + continuation.finish() + return true + } + } catch { + // Ignore parsing errors for incomplete chunks. + } + + return false + } + + private static func finishReasonForChatStream(_ reason: String) -> FinishReason { + switch reason { + case "stop": .stop + case "length": .length + case "tool_calls": .toolCalls + case "content_filter": .contentFilter + default: .other + } + } + + private static func errorMessageForResponseStreamEvent(_ event: [String: Any]) -> String { + let eventType = event["type"] as? String ?? "error" + let errorPayload = (event["error"] as? [String: Any]) ?? + (event["response"] as? [String: Any]).flatMap { $0["error"] as? [String: Any] } + if let message = errorPayload?["message"] as? String, !message.isEmpty { + return "OpenAI Responses API stream \(eventType): \(message)" + } + if let message = event["message"] as? String, !message.isEmpty { + return "OpenAI Responses API stream \(eventType): \(message)" + } + return "OpenAI Responses API stream \(eventType)" + } + private func authHeader() -> (String, String, String) { switch self.auth { case let .apiKey(key): @@ -558,6 +629,29 @@ public final class OpenAIResponsesProvider: ModelProvider { } } + private static func finishReasonForIncompleteResponseEvent(_ event: [String: Any]) -> FinishReason { + guard + let response = event["response"] as? [String: Any], + let incompleteDetails = response["incomplete_details"] as? [String: Any], + let reason = incompleteDetails["reason"] as? String else + { + return .other + } + + return Self.finishReasonForIncompleteReason(reason) + } + + private static func finishReasonForIncompleteReason(_ reason: String?) -> FinishReason { + switch reason { + case "content_filter": + .contentFilter + case "max_output_tokens": + .length + default: + .other + } + } + private func makeMessageEntry(role: String, message: ModelMessage) -> ResponsesMessage? { let parts = self.convertContentParts(for: message) guard !parts.isEmpty else { return nil } @@ -756,14 +850,15 @@ public final class OpenAIResponsesProvider: ModelProvider { static func convertToProviderResponse(_ response: OpenAIResponsesResponse) throws -> ProviderResponse { // Handle GPT-5 output arrays and alternate choices arrays. - let text: String - let toolCalls: [AgentToolCall]? - let finishReason: FinishReason? + var text: String + var toolCalls: [AgentToolCall]? + var finishReason: FinishReason? if let outputs = response.output { // GPT-5 format with output array var collectedText = "" var collectedToolCalls: [AgentToolCall] = [] + var didCollectRefusal = false for output in outputs { if output.type == "message" { @@ -774,6 +869,10 @@ public final class OpenAIResponsesProvider: ModelProvider { if let textSegment = chunk.text { collectedText.append(textSegment) } + case "refusal": + if chunk.refusal != nil || chunk.text != nil { + didCollectRefusal = true + } case "tool_call": if let toolCall = chunk.toolCall, @@ -795,11 +894,21 @@ public final class OpenAIResponsesProvider: ModelProvider { } text = collectedText - toolCalls = collectedToolCalls.isEmpty ? nil : collectedToolCalls - if let toolCalls, !toolCalls.isEmpty { - finishReason = .toolCalls + let incompleteFinishReason = response.status == "incomplete" + ? Self.finishReasonForIncompleteReason(response.incompleteDetails?.reason) + : nil + if incompleteFinishReason == .contentFilter || didCollectRefusal { + text = "" + toolCalls = nil + finishReason = .contentFilter } else { - finishReason = .stop + toolCalls = collectedToolCalls.isEmpty ? nil : collectedToolCalls + } + if finishReason == nil, let toolCalls, !toolCalls.isEmpty { + finishReason = .toolCalls + } + if finishReason == nil { + finishReason = incompleteFinishReason ?? .stop } } else if let choices = response.choices, let choice = choices.first { // Alternate format with choices array. @@ -814,11 +923,17 @@ public final class OpenAIResponsesProvider: ModelProvider { case "stop": finishReason = .stop case "length": finishReason = .length case "tool_calls": finishReason = .toolCalls + case "content_filter": finishReason = .contentFilter default: finishReason = .stop } } else { finishReason = nil } + if finishReason == .contentFilter || choice.message.refusal != nil { + text = "" + toolCalls = nil + finishReason = .contentFilter + } } else { throw TachikomaError.apiError("No output or choices in response") } diff --git a/Sources/Tachikoma/Providers/OpenAI/OpenAIResponsesTypes.swift b/Sources/Tachikoma/Providers/OpenAI/OpenAIResponsesTypes.swift index c12388e..5ec8420 100644 --- a/Sources/Tachikoma/Providers/OpenAI/OpenAIResponsesTypes.swift +++ b/Sources/Tachikoma/Providers/OpenAI/OpenAIResponsesTypes.swift @@ -484,11 +484,17 @@ struct OpenAIResponsesResponse: Codable { let choices: [ResponsesChoice]? // Alternate responses can use choices array let usage: ResponsesUsage? let metadata: ResponsesMetadata? + let incompleteDetails: IncompleteDetails? enum CodingKeys: String, CodingKey { case id, object, status, model, output, choices, usage, metadata case createdAt = "created_at" case created + case incompleteDetails = "incomplete_details" + } + + struct IncompleteDetails: Codable { + let reason: String? } /// GPT-5 output format @@ -513,11 +519,25 @@ struct OpenAIResponsesResponse: Codable { struct OutputContent: Codable { let type: String let text: String? + let refusal: String? let toolCall: ResponsesToolCall? + init( + type: String, + text: String? = nil, + refusal: String? = nil, + toolCall: ResponsesToolCall? = nil, + ) { + self.type = type + self.text = text + self.refusal = refusal + self.toolCall = toolCall + } + enum CodingKeys: String, CodingKey { case type case text + case refusal case toolCall = "tool_call" } } diff --git a/Sources/Tachikoma/Providers/OpenAI/OpenAITypes.swift b/Sources/Tachikoma/Providers/OpenAI/OpenAITypes.swift index d576c97..0dab123 100644 --- a/Sources/Tachikoma/Providers/OpenAI/OpenAITypes.swift +++ b/Sources/Tachikoma/Providers/OpenAI/OpenAITypes.swift @@ -77,11 +77,14 @@ struct OpenAIChatMessage: Codable { let content: Either? let toolCallId: String? let toolCalls: [AgentToolCall]? + let reasoning: String? + let reasoningDetails: [JSONValue]? enum CodingKeys: String, CodingKey { - case role, content + case role, content, reasoning case toolCallId = "tool_call_id" case toolCalls = "tool_calls" + case reasoningDetails = "reasoning_details" } struct AgentToolCall: Codable { @@ -100,6 +103,8 @@ struct OpenAIChatMessage: Codable { self.content = .left(content) self.toolCallId = toolCallId self.toolCalls = nil + self.reasoning = nil + self.reasoningDetails = nil } init(role: String, content: [OpenAIChatMessageContent], toolCallId: String? = nil) { @@ -107,13 +112,23 @@ struct OpenAIChatMessage: Codable { self.content = .right(content) self.toolCallId = toolCallId self.toolCalls = nil + self.reasoning = nil + self.reasoningDetails = nil } - init(role: String, content: String? = nil, toolCalls: [AgentToolCall]?) { + init( + role: String, + content: String? = nil, + toolCalls: [AgentToolCall]?, + reasoning: String? = nil, + reasoningDetails: [JSONValue]? = nil, + ) { self.role = role self.content = content.map { .left($0) } self.toolCallId = nil self.toolCalls = toolCalls + self.reasoning = reasoning + self.reasoningDetails = reasoningDetails } } @@ -246,10 +261,13 @@ struct OpenAIChatResponse: Codable { let role: String let content: String? let toolCalls: [AgentToolCall]? + let reasoning: String? + let reasoningDetails: [JSONValue]? enum CodingKeys: String, CodingKey { - case role, content + case role, content, reasoning case toolCalls = "tool_calls" + case reasoningDetails = "reasoning_details" } } diff --git a/Sources/Tachikoma/Providers/OpenRouter/OpenRouterProvider.swift b/Sources/Tachikoma/Providers/OpenRouter/OpenRouterProvider.swift index 4611edd..6e2398c 100644 --- a/Sources/Tachikoma/Providers/OpenRouter/OpenRouterProvider.swift +++ b/Sources/Tachikoma/Providers/OpenRouter/OpenRouterProvider.swift @@ -33,12 +33,13 @@ public final class OpenRouterProvider: ModelProvider { throw TachikomaError.authenticationFailed("OPENROUTER_API_KEY not found") } + let isFable = LanguageModel.Anthropic.isFable(modelId: modelId) self.capabilities = ModelCapabilities( supportsVision: true, supportsTools: true, - supportsStreaming: true, - contextLength: 128_000, - maxOutputTokens: 4096, + supportsStreaming: !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId), + contextLength: isFable ? 1_000_000 : 128_000, + maxOutputTokens: isFable ? 128_000 : 4096, ) self.defaultHeaders = [ @@ -67,6 +68,9 @@ public final class OpenRouterProvider: ModelProvider { guard let baseURL, let apiKey else { throw TachikomaError.invalidConfiguration("OpenRouter provider missing base URL or API key") } + guard !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: self.modelId) else { + throw TachikomaError.invalidConfiguration("\(self.modelId) does not support streaming") + } return try await OpenAICompatibleHelper.streamText( request: request, diff --git a/Sources/Tachikoma/Providers/ProviderFactory.swift b/Sources/Tachikoma/Providers/ProviderFactory.swift index cac14e7..d567890 100644 --- a/Sources/Tachikoma/Providers/ProviderFactory.swift +++ b/Sources/Tachikoma/Providers/ProviderFactory.swift @@ -125,6 +125,8 @@ public struct ProviderFactory { configuration: configuration, apiKey: custom.apiKey, additionalHeaders: custom.headers, + reasoningProvider: "custom-anthropic", + reasoningBaseURL: custom.baseURL, ) } } @@ -141,10 +143,10 @@ public struct ProviderFactory { ) throws -> any ModelProvider { - try AnthropicCompatibleProvider( + let baseURL = configuration.getBaseURL(for: provider) ?? provider.defaultBaseURL ?? "https://api.minimax.io/anthropic" + return try AnthropicCompatibleProvider( modelId: model.modelId, - baseURL: configuration.getBaseURL(for: provider) ?? provider - .defaultBaseURL ?? "https://api.minimax.io/anthropic", + baseURL: baseURL, configuration: configuration, apiKey: apiKey, // MiniMax's Anthropic-compatible setup uses Claude Code-style Authorization auth, not Anthropic x-api-key. @@ -156,6 +158,8 @@ public struct ProviderFactory { contextLength: model.contextLength, maxOutputTokens: 8192, ), + reasoningProvider: provider == .minimaxCN ? "minimax-cn" : "minimax", + reasoningBaseURL: baseURL, ) } } diff --git a/Sources/Tachikoma/Providers/ProviderParser.swift b/Sources/Tachikoma/Providers/ProviderParser.swift index 69d7367..8f418fc 100644 --- a/Sources/Tachikoma/Providers/ProviderParser.swift +++ b/Sources/Tachikoma/Providers/ProviderParser.swift @@ -8,7 +8,7 @@ public enum ProviderParser { /// The provider name (e.g., "openai", "anthropic", "ollama") public let provider: String - /// The model name (e.g., "gpt-5.5", "claude-opus-4-7", "llava:latest") + /// The model name (e.g., "gpt-5.5", "claude-fable-5", "llava:latest") public let model: String /// The full string representation (e.g., "openai/gpt-5.5") @@ -44,7 +44,7 @@ public enum ProviderParser { } /// Parse a comma-separated list of providers - /// - Parameter providersString: String like "openai/gpt-5.5,anthropic/claude-opus-4-7,ollama/llava:latest" + /// - Parameter providersString: String like "openai/gpt-5.5,anthropic/claude-fable-5,ollama/llava:latest" /// - Returns: Array of parsed configurations public static func parseList(_ providersString: String) -> [ProviderConfig] { // Parse a comma-separated list of providers @@ -54,7 +54,7 @@ public enum ProviderParser { } /// Get the first provider from a comma-separated list - /// - Parameter providersString: String like "openai/gpt-5.5,anthropic/claude-opus-4-7" + /// - Parameter providersString: String like "openai/gpt-5.5,anthropic/claude-fable-5" /// - Returns: First parsed configuration or nil if none valid public static func parseFirst(_ providersString: String) -> ProviderConfig? { // Get the first provider from a comma-separated list @@ -270,8 +270,10 @@ public enum ProviderParser { } return switch normalized { + case "claude-fable-5", "claude-fable-5-latest", "fable-5", "fable.5", "fable5", "fable": + .anthropic(.fable5) case "claude-opus-4-8", "claude-opus-4.8", "claude-opus-4-8-latest", "opus-4-8", "opus-4.8", - "opus48": + "opus48", "claude", "claude-latest", "claude_latest", "claudelatest", "claude-default", "claude_default": .anthropic(.opus48) case "claude-opus-4-7", "claude-opus-4.7", "claude-opus-4-7-latest", "opus-4-7", "opus-4.7", "opus47": .anthropic(.opus47) diff --git a/Sources/Tachikoma/Providers/Together/TogetherProvider.swift b/Sources/Tachikoma/Providers/Together/TogetherProvider.swift index a4236d5..361508a 100644 --- a/Sources/Tachikoma/Providers/Together/TogetherProvider.swift +++ b/Sources/Tachikoma/Providers/Together/TogetherProvider.swift @@ -27,12 +27,13 @@ public final class TogetherProvider: ModelProvider { throw TachikomaError.authenticationFailed("TOGETHER_API_KEY not found") } + let isFable = LanguageModel.Anthropic.isFable(modelId: modelId) self.capabilities = ModelCapabilities( supportsVision: true, supportsTools: true, - supportsStreaming: true, - contextLength: 128_000, - maxOutputTokens: 4096, + supportsStreaming: !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId), + contextLength: isFable ? 1_000_000 : 128_000, + maxOutputTokens: isFable ? 128_000 : 4096, ) } @@ -55,6 +56,9 @@ public final class TogetherProvider: ModelProvider { guard let baseURL, let apiKey else { throw TachikomaError.invalidConfiguration("Together provider missing base URL or API key") } + guard !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: self.modelId) else { + throw TachikomaError.invalidConfiguration("\(self.modelId) does not support streaming") + } return try await OpenAICompatibleHelper.streamText( request: request, diff --git a/Sources/Tachikoma/Utilities/ResponseCache.swift b/Sources/Tachikoma/Utilities/ResponseCache.swift index 89573ef..fc9366b 100644 --- a/Sources/Tachikoma/Utilities/ResponseCache.swift +++ b/Sources/Tachikoma/Utilities/ResponseCache.swift @@ -6,18 +6,44 @@ import UIKit // MARK: - Cache Key /// Hashable key for cache entries +@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) +public struct CacheProviderIdentity: Hashable, Sendable { + public let providerKind: String + public let modelId: String + public let endpointIdentity: String? + + public init(providerKind: String, modelId: String, baseURL: String?) { + self.providerKind = providerKind + self.modelId = modelId + self.endpointIdentity = ReasoningEndpointIdentity.canonical(baseURL) + } +} + @available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) struct CacheKey: Hashable { let hash: String let model: String? // Store model ID for invalidation + let isCacheable: Bool - init(from request: ProviderRequest, model: String? = nil) { - self.model = model + init(from request: ProviderRequest, providerIdentity: CacheProviderIdentity? = nil, model: String? = nil) { + self.model = providerIdentity?.modelId ?? model // Create a unique hash from the request var hasher = Hasher() + if let providerIdentity { + hasher.combine(providerIdentity.providerKind) + hasher.combine(providerIdentity.modelId) + hasher.combine(providerIdentity.endpointIdentity) + } // Combine message content for message in request.messages { hasher.combine(message.role.rawValue) + hasher.combine(message.channel?.rawValue) + hasher.combine(message.metadata?.conversationId) + hasher.combine(message.metadata?.turnId) + for key in message.metadata?.customData?.keys.sorted() ?? [] { + hasher.combine(key) + hasher.combine(message.metadata?.customData?[key]) + } for part in message.content { switch part { case let .text(text): @@ -41,8 +67,32 @@ struct CacheKey: Hashable { hasher.combine(request.settings.temperature) hasher.combine(request.settings.maxTokens) hasher.combine(request.settings.topP) + hasher.combine(request.settings.topK) + hasher.combine(request.settings.frequencyPenalty) + hasher.combine(request.settings.presencePenalty) + hasher.combine(request.settings.stopSequences) + hasher.combine(request.settings.reasoningEffort?.rawValue) + hasher.combine(request.settings.seed) + if let stopConditions = request.settings.stopConditions { + guard let cacheKey = (stopConditions as? StableCacheKeyStopCondition)?.stableCacheKey else { + self.hash = "" + self.isCacheable = false + return + } + hasher.combine(cacheKey) + } + if let providerOptionsData = try? Self.providerOptionsEncoder.encode(request.settings.providerOptions) { + hasher.combine(providerOptionsData) + } self.hash = String(hasher.finalize()) + self.isCacheable = true } + + private static let providerOptionsEncoder: JSONEncoder = { + let encoder = JSONEncoder() + encoder.outputFormatting = [.sortedKeys] + return encoder + }() } // MARK: - Response Cache @@ -75,11 +125,16 @@ public actor ResponseCache { public func get( for request: ProviderRequest, ttlOverride: TimeInterval? = nil, + providerIdentity: CacheProviderIdentity? = nil, ) -> ProviderResponse? { // Get cached response with TTL validation - let key = CacheKey(from: request) + let key = CacheKey(from: request, providerIdentity: providerIdentity) + guard key.isCacheable else { + self.statistics.recordMiss() + return nil + } guard let entry = cache[key] else { self.statistics.recordMiss() @@ -109,9 +164,13 @@ public actor ResponseCache { for request: ProviderRequest, ttl: TimeInterval? = nil, priority: CachePriority = .normal, + providerIdentity: CacheProviderIdentity? = nil, ) { // Store response with custom TTL and priority - let key = CacheKey(from: request) + let key = CacheKey(from: request, providerIdentity: providerIdentity) + guard key.isCacheable else { + return + } // Check memory limit if self.shouldEvictForMemory() { @@ -457,9 +516,33 @@ final class CacheEntry: @unchecked Sendable { // Rough estimation based on response content let textSize = self.response.text.utf8.count let toolCallsSize = (response.toolCalls?.count ?? 0) * 100 // Estimate 100 bytes per tool call + let reasoningSize = self.response.reasoning.reduce(0) { total, block in + total + block.text.utf8.count + + (block.signature?.utf8.count ?? 0) + + (block.rawJSON?.utf8.count ?? 0) + + block.type.utf8.count + } + let assistantMessageSize = self.response.assistantMessages.reduce(0) { total, message in + let contentSize = message.content.reduce(0) { contentTotal, part in + switch part { + case let .text(text): + contentTotal + text.utf8.count + case let .image(image): + contentTotal + image.mimeType.utf8.count + image.data.utf8.count + case let .toolCall(call): + contentTotal + call.id.utf8.count + call.name.utf8.count + 100 + case let .toolResult(result): + contentTotal + result.toolCallId.utf8.count + 100 + } + } + let metadataSize = (message.metadata?.customData ?? [:]).reduce(0) { metadataTotal, pair in + metadataTotal + pair.key.utf8.count + pair.value.utf8.count + } + return total + contentSize + metadataSize + (message.channel?.rawValue.utf8.count ?? 0) + } let usageSize = 50 // Fixed overhead for usage data - return textSize + toolCallsSize + usageSize + 100 // 100 bytes overhead + return textSize + toolCallsSize + reasoningSize + assistantMessageSize + usageSize + 100 } } @@ -546,6 +629,7 @@ extension ResponseCache { public struct CacheAwareProvider: ModelProvider { let provider: Base let cache: ResponseCache + private let providerIdentity: CacheProviderIdentity public var modelId: String { self.provider.modelId @@ -563,11 +647,21 @@ public struct CacheAwareProvider: ModelProvider { self.provider.capabilities } + init(provider: Base, cache: ResponseCache) { + self.provider = provider + self.cache = cache + self.providerIdentity = CacheProviderIdentity( + providerKind: String(reflecting: Base.self), + modelId: provider.modelId, + baseURL: provider.baseURL, + ) + } + public func generateText(request: ProviderRequest) async throws -> ProviderResponse { // Check cache with smart TTL based on request type let ttl = self.determineTTL(for: request) - if let cached = await cache.get(for: request, ttlOverride: ttl) { + if let cached = await cache.get(for: request, ttlOverride: ttl, providerIdentity: self.providerIdentity) { return cached } @@ -575,7 +669,13 @@ public struct CacheAwareProvider: ModelProvider { let response = try await provider.generateText(request: request) let priority = self.determinePriority(for: request) - await self.cache.store(response, for: request, ttl: ttl, priority: priority) + await self.cache.store( + response, + for: request, + ttl: ttl, + priority: priority, + providerIdentity: self.providerIdentity, + ) return response } diff --git a/Sources/Tachikoma/Utilities/UsageTracking.swift b/Sources/Tachikoma/Utilities/UsageTracking.swift index 8f18529..00f3b28 100644 --- a/Sources/Tachikoma/Utilities/UsageTracking.swift +++ b/Sources/Tachikoma/Utilities/UsageTracking.swift @@ -543,6 +543,7 @@ public struct ModelCostCalculator: Sendable { // Anthropic Pricing (as of 2026) case let .anthropic(anthropicModel): switch anthropicModel { + case .fable5: (10.00, 50.00) case .opus48: (5.00, 25.00) case .opus47: (5.00, 25.00) case .opus45: (5.00, 25.00) @@ -550,7 +551,8 @@ public struct ModelCostCalculator: Sendable { case .sonnet46: (3.00, 15.00) case .sonnet45: (4.00, 18.00) case .haiku45: (1.20, 6.00) - case .custom: (3.00, 15.00) // Default estimate + case let .custom(id): + id.lowercased().contains("claude-fable-5") ? (10.00, 50.00) : (3.00, 15.00) } // Google Pricing (standard tier, as of 2026) case let .google(googleModel): diff --git a/Sources/TachikomaAgent/Agent.swift b/Sources/TachikomaAgent/Agent.swift index 6f3e550..ec43784 100644 --- a/Sources/TachikomaAgent/Agent.swift +++ b/Sources/TachikomaAgent/Agent.swift @@ -26,32 +26,43 @@ public final class Agent: @unchecked Sendable { public private(set) var tools: [AgentTool] /// Language model used by this agent - public var model: LanguageModel + public var model: LanguageModel { + didSet { + self.usesImplicitDefaultModel = false + } + } /// Generation settings for the agent public var settings: GenerationSettings + /// Provider configuration for generation and streaming + private let configuration: TachikomaConfiguration + /// The context instance passed to tool executions private let context: Context /// Current conversation history public private(set) var conversation: Conversation + private var usesImplicitDefaultModel: Bool public init( name: String, instructions: String, - model: LanguageModel = .default, + model: LanguageModel? = nil, tools: [AgentTool] = [], settings: GenerationSettings = .default, + configuration: TachikomaConfiguration = .current, context: Context, ) { self.name = name self.instructions = instructions - self.model = model + self.usesImplicitDefaultModel = model == nil + self.model = model ?? .default self.tools = tools self.settings = settings + self.configuration = configuration self.context = context - self.conversation = Conversation() + self.conversation = Conversation(configuration: configuration) // Add system message with instructions self.conversation.addSystemMessage(instructions) @@ -71,66 +82,133 @@ public final class Agent: @unchecked Sendable { /// Execute a single message with the agent public func execute(_ message: String) async throws -> AgentResponse { - // Add user message to conversation - self.conversation.addUserMessage(message) + let conversation = self.conversation + let model = self.model + let tools = self.tools + let settings = self.settings - // Generate response using the conversation - let result = try await generateText( - model: model, - messages: conversation.getModelMessages(), - tools: self.tools.isEmpty ? nil : self.tools, - settings: self.settings, - maxSteps: 5, // Allow multi-step tool execution - ) + return try await conversation.withContinuationLock { + conversation.addUserMessage(message) + let conversationMessages = conversation.messages + let modelMessages = conversationMessages.map { $0.toModelMessage() } + let snapshotIDs = conversationMessages.map(\.id) + let anchorID = conversationMessages.last?.id + let result = try await generateText( + model: model, + messages: modelMessages, + tools: tools.isEmpty ? nil : tools, + settings: settings, + maxSteps: 5, // Allow multi-step tool execution + configuration: self.configuration, + ) - // Add assistant response to conversation - self.conversation.addAssistantMessage(result.text) - - // Add any tool calls and results to conversation - for step in result.steps { - if !step.toolCalls.isEmpty { - for _ in step.toolCalls { - // Tool calls are already added by generateText + let didMerge: Bool + if result.finishReason == .contentFilter { + didMerge = conversation.mergeContentFilterResult( + result.messages, + originalMessages: modelMessages, + afterMessageID: anchorID, + validatingSnapshotIDs: snapshotIDs, + ) + } else if let anchorID { + let generatedMessages = Array(result.messages.dropFirst(modelMessages.count)) + let didMerge = conversation.appendGeneratedMessages( + generatedMessages, + afterMessageID: anchorID, + validatingSnapshotIDs: snapshotIDs, + ) + guard didMerge else { + throw TachikomaError.invalidConfiguration( + "Conversation changed during generation; refusing to merge response", + ) } + return AgentResponse( + text: result.text, + usage: result.usage, + finishReason: result.finishReason ?? .other, + steps: result.steps, + conversationLength: conversation.messages.count, + ) + } else { + didMerge = conversation.messages.isEmpty } - if !step.toolResults.isEmpty { - for _ in step.toolResults { - // Tool results are already added by generateText - } + + guard didMerge else { + throw TachikomaError.invalidConfiguration( + "Conversation changed during generation; refusing to merge response", + ) } + + return AgentResponse( + text: result.text, + usage: result.usage, + finishReason: result.finishReason ?? .other, + steps: result.steps, + conversationLength: conversation.messages.count, + ) } - - return AgentResponse( - text: result.text, - usage: result.usage, - finishReason: result.finishReason ?? .other, - steps: result.steps, - conversationLength: self.conversation.messages.count, - ) } /// Stream a response from the agent public func stream(_ message: String) async throws -> AsyncThrowingStream { + let streamingModel = if self.usesImplicitDefaultModel { + LanguageModel.defaultStreaming + } else if self.model.supportsStreaming { + self.model + } else { + self.model + } + guard streamingModel.supportsStreaming else { + throw TachikomaError.invalidConfiguration("\(self.model.modelId) does not support streaming") + } + + let conversation = self.conversation + try await conversation.acquireContinuationLock() + let gateRelease = AsyncReleaseOnce { + await conversation.releaseContinuationLock() + } + // Add user message to conversation - self.conversation.addUserMessage(message) + conversation.addUserMessage(message) + let conversationMessages = conversation.messages + let modelMessages = conversationMessages.map { $0.toModelMessage() } + let snapshotIDs = conversationMessages.map(\.id) + let buffersUntilDone = self.settings.streamBuffering == .untilTerminal // Stream response - let streamResult = try await streamText( - model: model, - messages: conversation.getModelMessages(), - tools: self.tools.isEmpty ? nil : self.tools, - settings: self.settings, - maxSteps: 5, - ) + let streamResult: StreamTextResult + do { + streamResult = try await streamText( + model: streamingModel, + messages: modelMessages, + tools: self.tools.isEmpty ? nil : self.tools, + settings: self.settings, + maxSteps: 5, + configuration: self.configuration, + ) + } catch { + gateRelease.release() + throw error + } // Track final message in conversation (this is approximate for streaming) return AsyncThrowingStream { continuation in - Task { + let producer = Task { + defer { + gateRelease.release() + } do { var assistantText = "" + var bufferedDeltas: [TextStreamDelta] = [] + var didReceiveTerminal = false for try await delta in streamResult.stream { - continuation.yield(delta) + try Task.checkCancellation() + if buffersUntilDone { + bufferedDeltas.append(delta) + } else { + continuation.yield(delta) + } // Collect assistant text if case .textDelta = delta.type, let content = delta.content { @@ -138,25 +216,60 @@ public final class Agent: @unchecked Sendable { } if case .done = delta.type { + didReceiveTerminal = true + guard delta.finishReason != .contentFilter else { + let didRollback = conversation.replaceModelMessages( + modelMessages.droppingLastUserTurn(), + validatingSnapshotIDs: snapshotIDs, + ) + guard didRollback else { + throw TachikomaError.invalidConfiguration( + "Conversation changed during streaming; refusing to merge response", + ) + } + assistantText = "" + bufferedDeltas.removeAll() + if buffersUntilDone { + continuation.yield(delta) + } + continue + } + if buffersUntilDone { + for bufferedDelta in bufferedDeltas { + continuation.yield(bufferedDelta) + } + bufferedDeltas.removeAll() + } // Add final assistant message to conversation if !assistantText.isEmpty { - self.conversation.addAssistantMessage(assistantText) + conversation.addAssistantMessage(assistantText) + assistantText = "" } } } + if buffersUntilDone, !didReceiveTerminal, !bufferedDeltas.isEmpty { + throw TachikomaError.apiError("Stream ended before provider completion status was received") + } + if !buffersUntilDone, !assistantText.isEmpty { + try Task.checkCancellation() + conversation.addAssistantMessage(assistantText) + } continuation.finish() } catch { continuation.finish(throwing: error) } } + continuation.onTermination = { @Sendable _ in + producer.cancel() + } } } /// Reset the agent's conversation history public func resetConversation() { // Reset the agent's conversation history - self.conversation = Conversation() + self.conversation = Conversation(configuration: self.configuration) self.conversation.addSystemMessage(self.instructions) } @@ -169,7 +282,7 @@ public final class Agent: @unchecked Sendable { public func updateInstructions(_ newInstructions: String) { // Create new conversation with updated instructions let oldMessages = self.conversation.getModelMessages().filter { $0.role != .system } - self.conversation = Conversation() + self.conversation = Conversation(configuration: self.configuration) self.conversation.addSystemMessage(newInstructions) // Re-add non-system messages @@ -179,6 +292,30 @@ public final class Agent: @unchecked Sendable { } } +final class AsyncReleaseOnce: @unchecked Sendable { + private let lock = NSLock() + private var didRelease = false + private let operation: @Sendable () async -> Void + + init(operation: @escaping @Sendable () async -> Void) { + self.operation = operation + } + + func release() { + self.lock.lock() + guard !self.didRelease else { + self.lock.unlock() + return + } + self.didRelease = true + self.lock.unlock() + + Task { + await self.operation() + } + } +} + /// Response from an agent execution @available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) public struct AgentResponse: Sendable { @@ -537,3 +674,33 @@ public enum SessionStatus: String, Codable, Sendable, CaseIterable { case failed case cancelled } + +extension LanguageModel { + var requiresTerminalRefusalBuffering: Bool { + switch self { + case let .anthropic(model): + return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: model.modelId) + case let .anthropicCompatible(modelId, _): + return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId) + case let .openRouter(modelId), let .together(modelId): + return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId) + case let .openaiCompatible(modelId, _): + return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId) + case let .custom(provider): + guard + let parsed = ProviderParser.parse(provider.modelId), + let registeredProvider = CustomProviderRegistry.shared.get(parsed.provider) else + { + return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: provider.modelId) + } + switch registeredProvider.kind { + case .openai: + return false + case .anthropic: + return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: parsed.model) + } + default: + return false + } + } +} diff --git a/Sources/TachikomaAgent/Conversation.swift b/Sources/TachikomaAgent/Conversation.swift index 7830902..e1b8579 100644 --- a/Sources/TachikomaAgent/Conversation.swift +++ b/Sources/TachikomaAgent/Conversation.swift @@ -3,10 +3,66 @@ import Tachikoma // MARK: - Conversation Management +private actor ContinuationGate { + private struct Waiter { + let id: UUID + let continuation: CheckedContinuation + } + + private var isLocked = false + private var waiters: [Waiter] = [] + + func acquire() async throws { + try Task.checkCancellation() + + if !self.isLocked { + self.isLocked = true + return + } + + let id = UUID() + let acquired = await withTaskCancellationHandler { + await withCheckedContinuation { continuation in + self.waiters.append(Waiter(id: id, continuation: continuation)) + } + } onCancel: { + Task { await self.cancelWaiter(id) } + } + + guard acquired else { + throw CancellationError() + } + + if Task.isCancelled { + self.release() + throw CancellationError() + } + } + + private func cancelWaiter(_ id: UUID) { + guard let index = self.waiters.firstIndex(where: { $0.id == id }) else { + return + } + + let waiter = self.waiters.remove(at: index) + waiter.continuation.resume(returning: false) + } + + func release() { + if self.waiters.isEmpty { + self.isLocked = false + } else { + let waiter = self.waiters.removeFirst() + waiter.continuation.resume(returning: true) + } + } +} + /// A conversation with an AI model @available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) public final class Conversation: @unchecked Sendable { private let lock = NSLock() + private let continuationGate = ContinuationGate() private var _messages: [ConversationMessage] = [] /// The configuration used by this conversation @@ -59,7 +115,6 @@ public final class Conversation: @unchecked Sendable { /// Get messages as ModelMessage array for API compatibility public func getModelMessages() -> [ModelMessage] { - // Get messages as ModelMessage array for API compatibility self.messages.map { $0.toModelMessage() } } @@ -72,31 +127,193 @@ public final class Conversation: @unchecked Sendable { self.lock.unlock() } - /// Continue the conversation with a model - public func continueConversation(using model: Model? = nil, tools _: [AgentTool]? = nil) async throws -> String { - // Convert conversation messages to model messages - let modelMessages = self.messages.map { conversationMessage in - ModelMessage( - id: conversationMessage.id, - role: ModelMessage.Role(rawValue: conversationMessage.role.rawValue) ?? .user, - content: [.text(conversationMessage.content)], - timestamp: conversationMessage.timestamp, + /// Replace the conversation with lossless ModelMessage history. + public func replaceModelMessages(_ modelMessages: [ModelMessage]) { + self.lock.lock() + self._messages = modelMessages.map { ConversationMessage.from($0) } + self.lock.unlock() + } + + /// Replace the conversation only if the original snapshot is still current. + public func replaceModelMessages( + _ modelMessages: [ModelMessage], + validatingSnapshotIDs snapshotIDs: [String], + ) + -> Bool + { + self.lock.lock() + defer { self.lock.unlock() } + + guard self._messages.count >= snapshotIDs.count else { + return false + } + + let currentPrefixIDs = self._messages.prefix(snapshotIDs.count).map(\.id) + guard currentPrefixIDs == snapshotIDs else { + return false + } + + let laterMessages = self._messages.dropFirst(snapshotIDs.count) + self._messages = modelMessages.map { ConversationMessage.from($0) } + laterMessages + return true + } + + /// Replace the original snapshot with generated history while preserving later appends. + public func mergeGeneratedMessages(_ modelMessages: [ModelMessage], replacingPrefixCount prefixCount: Int) { + self.lock.lock() + let laterMessages = self._messages.dropFirst(min(prefixCount, self._messages.count)) + self._messages = modelMessages.map { ConversationMessage.from($0) } + laterMessages + self.lock.unlock() + } + + /// Insert generated response messages after the snapshot anchor while preserving concurrent appends. + public func appendGeneratedMessages(_ modelMessages: [ModelMessage], afterMessageID messageID: String) { + guard !modelMessages.isEmpty else { return } + + self.lock.lock() + let conversationMessages = modelMessages.map { ConversationMessage.from($0) } + if let index = self._messages.firstIndex(where: { $0.id == messageID }) { + self._messages.insert(contentsOf: conversationMessages, at: self._messages.index(after: index)) + } else { + self._messages.append(contentsOf: conversationMessages) + } + self.lock.unlock() + } + + /// Insert generated response messages only if the snapshot prefix is still current. + public func appendGeneratedMessages( + _ modelMessages: [ModelMessage], + afterMessageID messageID: String, + validatingSnapshotIDs snapshotIDs: [String], + ) + -> Bool + { + guard !modelMessages.isEmpty else { return true } + + self.lock.lock() + defer { self.lock.unlock() } + + guard self._messages.count >= snapshotIDs.count else { + return false + } + + let currentPrefixIDs = self._messages.prefix(snapshotIDs.count).map(\.id) + guard currentPrefixIDs == snapshotIDs else { + return false + } + + let conversationMessages = modelMessages.map { ConversationMessage.from($0) } + if let index = self._messages.firstIndex(where: { $0.id == messageID }) { + self._messages.insert(contentsOf: conversationMessages, at: self._messages.index(after: index)) + } else { + self._messages.append(contentsOf: conversationMessages) + } + return true + } + + /// Merge a refused generation without losing completed tool steps. + public func mergeContentFilterResult( + _ resultMessages: [ModelMessage], + originalMessages: [ModelMessage], + afterMessageID _: String?, + validatingSnapshotIDs snapshotIDs: [String], + ) + -> Bool + { + let generatedMessages = Array(resultMessages.dropFirst(originalMessages.count)) + if !generatedMessages.isEmpty { + return self.replaceModelMessages( + originalMessages + generatedMessages, + validatingSnapshotIDs: snapshotIDs, ) } - // Generate response using the core API - let response = try await generateText( - model: model ?? .default, - messages: modelMessages, - tools: [], - settings: .default, - configuration: configuration, + return self.replaceModelMessages( + originalMessages.droppingLastUserTurn(), + validatingSnapshotIDs: snapshotIDs, ) + } - // Add the response to the conversation - self.addAssistantMessage(response.text) + public func removeMessage(id: String) { + self.lock.lock() + self._messages.removeAll { $0.id == id } + self.lock.unlock() + } - return response.text + public func withContinuationLock(_ operation: () async throws -> T) async throws -> T { + try await self.acquireContinuationLock() + do { + let result = try await operation() + await self.releaseContinuationLock() + return result + } catch { + await self.releaseContinuationLock() + throw error + } + } + + public func acquireContinuationLock() async throws { + try await self.continuationGate.acquire() + } + + public func releaseContinuationLock() async { + await self.continuationGate.release() + } + + /// Continue the conversation with a model + public func continueConversation( + using model: Model? = nil, + tools: [AgentTool]? = nil, + maxSteps: Int = 5, + ) async throws + -> String + { + try await self.withContinuationLock { + let conversationMessages = self.messages + let modelMessages = conversationMessages.map { $0.toModelMessage() } + let snapshotIDs = conversationMessages.map(\.id) + let anchorID = conversationMessages.last?.id + + // Generate response using the core API + let response = try await generateText( + model: model ?? .default, + messages: modelMessages, + tools: tools, + settings: .default, + maxSteps: maxSteps, + configuration: configuration, + ) + + let didMerge: Bool + if response.finishReason == .contentFilter { + didMerge = self.mergeContentFilterResult( + response.messages, + originalMessages: modelMessages, + afterMessageID: anchorID, + validatingSnapshotIDs: snapshotIDs, + ) + } else if let anchorID { + let generatedMessages = Array(response.messages.dropFirst(modelMessages.count)) + didMerge = self.appendGeneratedMessages( + generatedMessages, + afterMessageID: anchorID, + validatingSnapshotIDs: snapshotIDs, + ) + } else if self.messages.isEmpty { + self.replaceModelMessages(response.messages) + didMerge = true + } else { + didMerge = false + } + + guard didMerge else { + throw TachikomaError.invalidConfiguration( + "Conversation changed during generation; refusing to merge response", + ) + } + + return response.text + } } /// Continue the conversation with a model, streaming the response @@ -106,43 +323,90 @@ public final class Conversation: @unchecked Sendable { ) async throws -> AsyncThrowingStream { - // Convert conversation messages to model messages - let modelMessages = self.messages.map { conversationMessage in - ModelMessage( - id: conversationMessage.id, - role: ModelMessage.Role(rawValue: conversationMessage.role.rawValue) ?? .user, - content: [.text(conversationMessage.content)], - timestamp: conversationMessage.timestamp, - ) + try await self.acquireContinuationLock() + let gateRelease = AsyncReleaseOnce { + await self.releaseContinuationLock() } + let conversationMessages = self.messages + let modelMessages = conversationMessages.map { $0.toModelMessage() } + let snapshotIDs = conversationMessages.map(\.id) + let resolvedModel = model ?? .defaultStreaming + let streamSettings = GenerationSettings.default + let buffersUntilDone = streamSettings.streamBuffering == .untilTerminal || + resolvedModel.requiresTerminalRefusalBuffering // Generate response using the core API - let responseStream = try await streamText( - model: model ?? .default, - messages: modelMessages, - tools: tools ?? [], // Use provided tools or empty array - settings: .default, - configuration: configuration, - ) + let responseStream: StreamTextResult + do { + responseStream = try await streamText( + model: resolvedModel, + messages: modelMessages, + tools: tools ?? [], // Use provided tools or empty array + settings: streamSettings, + configuration: self.configuration, + ) + } catch { + gateRelease.release() + throw error + } // Create a new stream to process the response and update the conversation return AsyncThrowingStream { continuation in - Task { + let producer = Task { + defer { + gateRelease.release() + } var fullResponse = "" + var isContentFiltered = false + var bufferedText: [String] = [] + var didApproveBufferedResponse = !buffersUntilDone + var didReceiveTerminal = false do { for try await delta in responseStream.stream { + try Task.checkCancellation() switch delta.type { case .textDelta: if let text = delta.content { - continuation.yield(text) + if buffersUntilDone { + bufferedText.append(text) + } else { + continuation.yield(text) + } fullResponse += text } + case .done where delta.finishReason == .contentFilter: + didReceiveTerminal = true + isContentFiltered = true + let didRollback = self.replaceModelMessages( + modelMessages.droppingLastUserTurn(), + validatingSnapshotIDs: snapshotIDs, + ) + guard didRollback else { + throw TachikomaError.invalidConfiguration( + "Conversation changed during streaming; refusing to merge response", + ) + } + fullResponse = "" + bufferedText.removeAll() + case .done: + didReceiveTerminal = true + if buffersUntilDone { + for text in bufferedText { + continuation.yield(text) + } + didApproveBufferedResponse = true + bufferedText.removeAll() + } default: break } } + if buffersUntilDone, !didReceiveTerminal, !bufferedText.isEmpty { + throw TachikomaError.apiError("Stream ended before provider completion status was received") + } // Add the full response to the conversation - if !fullResponse.isEmpty { + if !isContentFiltered, !fullResponse.isEmpty, didApproveBufferedResponse { + try Task.checkCancellation() self.addAssistantMessage(fullResponse) } continuation.finish() @@ -150,10 +414,20 @@ public final class Conversation: @unchecked Sendable { continuation.finish(throwing: error) } } + continuation.onTermination = { @Sendable _ in + producer.cancel() + } } } } +extension [ModelMessage] { + func droppingLastUserTurn() -> [ModelMessage] { + guard self.last?.role == .user else { return self } + return Array(self.dropLast()) + } +} + /// A message in a conversation @available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) public struct ConversationMessage: Sendable, Codable, Equatable { @@ -161,6 +435,9 @@ public struct ConversationMessage: Sendable, Codable, Equatable { public let role: Role public let content: String public let timestamp: Date + public let contentParts: [ModelMessage.ContentPart]? + public let channel: ResponseChannel? + public let metadata: MessageMetadata? public enum Role: String, Sendable, Codable, CaseIterable { case system @@ -169,11 +446,22 @@ public struct ConversationMessage: Sendable, Codable, Equatable { case tool } - public init(id: String = UUID().uuidString, role: Role, content: String, timestamp: Date = Date()) { + public init( + id: String = UUID().uuidString, + role: Role, + content: String, + timestamp: Date = Date(), + contentParts: [ModelMessage.ContentPart]? = nil, + channel: ResponseChannel? = nil, + metadata: MessageMetadata? = nil, + ) { self.id = id self.role = role self.content = content self.timestamp = timestamp + self.contentParts = contentParts + self.channel = channel + self.metadata = metadata } /// Convert to ModelMessage for API compatibility @@ -189,8 +477,10 @@ public struct ConversationMessage: Sendable, Codable, Equatable { return ModelMessage( id: self.id, role: modelRole, - content: [.text(self.content)], + content: self.contentParts ?? [.text(self.content)], timestamp: self.timestamp, + channel: self.channel, + metadata: self.metadata, ) } @@ -219,6 +509,9 @@ public struct ConversationMessage: Sendable, Codable, Equatable { role: role, content: textContent, timestamp: modelMessage.timestamp, + contentParts: modelMessage.content, + channel: modelMessage.channel, + metadata: modelMessage.metadata, ) } } diff --git a/Tests/TachikomaMCPTests/MCPClientTests.swift b/Tests/TachikomaMCPTests/MCPClientTests.swift index adf86dd..519ec30 100644 --- a/Tests/TachikomaMCPTests/MCPClientTests.swift +++ b/Tests/TachikomaMCPTests/MCPClientTests.swift @@ -191,7 +191,7 @@ struct MCPClientTests { let imageData = Data("test".utf8) let imageResponse = ToolResponse.image(data: imageData, mimeType: "image/png") #expect(imageResponse.content.count == 1) - if case .image(data: let data, mimeType: let mimeType, annotations: _, _meta: _) = imageResponse.content.first { + if case let .image(data: data, mimeType: mimeType, annotations: _, _meta: _) = imageResponse.content.first { #expect(data == imageData.base64EncodedString()) #expect(mimeType == "image/png") } else { diff --git a/Tests/TachikomaMCPTests/MCPToolAdapterTests.swift b/Tests/TachikomaMCPTests/MCPToolAdapterTests.swift index d603fd7..59e54ac 100644 --- a/Tests/TachikomaMCPTests/MCPToolAdapterTests.swift +++ b/Tests/TachikomaMCPTests/MCPToolAdapterTests.swift @@ -130,7 +130,7 @@ struct MCPToolAdapterTests { #expect(response.isError == false) #expect(response.content.count == 1) - if case .image(data: let data, mimeType: let mimeType, annotations: _, _meta: _) = response.content[0] { + if case let .image(data: data, mimeType: mimeType, annotations: _, _meta: _) = response.content[0] { #expect(data == imageData.base64EncodedString()) #expect(mimeType == "image/jpeg") } else { diff --git a/Tests/TachikomaTests/Core/CustomProviderRegistryTests.swift b/Tests/TachikomaTests/Core/CustomProviderRegistryTests.swift index e687d55..0aeb722 100644 --- a/Tests/TachikomaTests/Core/CustomProviderRegistryTests.swift +++ b/Tests/TachikomaTests/Core/CustomProviderRegistryTests.swift @@ -120,6 +120,30 @@ struct CustomProviderRegistryTests { let compatibleClaude = try #require(resolvedClaude as? AnthropicCompatibleProvider) #expect(compatibleClaude.apiKey == "claude-provider-key") + let fableModel = LanguageModel.custom( + provider: DynamicCustomProvider(modelId: "claude-proxy/claude-fable-5"), + ) + #expect(fableModel.supportsStreaming == false) + let resolvedFable = try ProviderFactory.createProvider( + for: fableModel, + configuration: TachikomaConfiguration(loadFromEnvironment: false), + ) + let compatibleFable = try #require(resolvedFable as? AnthropicCompatibleProvider) + #expect(compatibleFable.capabilities.supportsStreaming == false) + + let directFableProvider = DynamicCustomProvider( + modelId: "claude-fable-5", + capabilities: ModelCapabilities(supportsStreaming: false), + ) + let directFableModel = LanguageModel.custom(provider: directFableProvider) + #expect(directFableModel.supportsStreaming == false) + + let unrelatedFableNamedProvider = DynamicCustomProvider( + modelId: "local-claude-fable-5-benchmark", + capabilities: ModelCapabilities(supportsStreaming: true), + ) + #expect(LanguageModel.custom(provider: unrelatedFableNamedProvider).supportsStreaming == true) + #expect(CustomProviderRegistry.shared.get("missing") == nil) } @@ -175,10 +199,11 @@ private final class DynamicCustomProvider: ModelProvider { let modelId: String let baseURL: String? = nil let apiKey: String? = nil - let capabilities = ModelCapabilities() + let capabilities: ModelCapabilities - init(modelId: String) { + init(modelId: String, capabilities: ModelCapabilities = ModelCapabilities()) { self.modelId = modelId + self.capabilities = capabilities } func generateText(request _: ProviderRequest) async throws -> ProviderResponse { diff --git a/Tests/TachikomaTests/Core/GenerationTests.swift b/Tests/TachikomaTests/Core/GenerationTests.swift index e96bc9b..ec8763c 100644 --- a/Tests/TachikomaTests/Core/GenerationTests.swift +++ b/Tests/TachikomaTests/Core/GenerationTests.swift @@ -105,7 +105,7 @@ struct GenerationTests { try await TestHelpers.withTestConfiguration(apiKeys: ["anthropic": "test-key"]) { config in let stream = try await stream( "Write a haiku", - using: .anthropic(.sonnet46), + using: .anthropic(.custom("claude-3-5-sonnet-20241022")), system: "You are a poet", configuration: config, ) @@ -132,6 +132,243 @@ struct GenerationTests { } } + @Test + func `StreamText rejects refusal-prone aggregator models`() async throws { + await #expect(throws: TachikomaError.self) { + _ = try await streamText( + model: .openRouter(modelId: "anthropic/claude-fable-5"), + messages: [.user("hi")], + configuration: TachikomaConfiguration(loadFromEnvironment: false), + ) + } + + await #expect(throws: TachikomaError.self) { + _ = try await streamText( + model: .openaiCompatible( + modelId: "anthropic/claude-fable-5", + baseURL: "https://example.test", + ), + messages: [.user("hi")], + configuration: TachikomaConfiguration(loadFromEnvironment: false), + ) + } + } + + @Test + func `StreamObject rejects unsupported streaming models`() async throws { + struct Payload: Codable, Sendable { + let ok: Bool + } + + await #expect(throws: TachikomaError.self) { + _ = try await streamObject( + model: .openRouter(modelId: "anthropic/claude-fable-5"), + messages: [.user("hi")], + schema: Payload.self, + configuration: TachikomaConfiguration(loadFromEnvironment: false), + ) + } + } + + @Test + func `StreamObject explicit terminal buffering suppresses content filter partial output`() async throws { + struct Payload: Codable, Sendable { + let ok: Bool + } + + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider( + response: ProviderResponse(text: "", finishReason: .stop), + streamDeltas: [ + .text(#"{"ok":true}"#), + .done(finishReason: .contentFilter), + ], + ) + } + + let result = try await streamObject( + model: .openai(.gpt55), + messages: [.user("hi")], + schema: Payload.self, + settings: GenerationSettings(streamBuffering: .untilTerminal), + configuration: config, + ) + + var publishedDeltas = 0 + do { + for try await _ in result.objectStream { + publishedDeltas += 1 + } + Issue.record("Expected content filter error") + } catch let error as TachikomaError { + guard case let .apiError(message) = error else { + Issue.record("Expected apiError, got \(error)") + return + } + #expect(message.contains("content filter")) + #expect(publishedDeltas == 0) + } + } + + @Test + func `StreamObject completes direct custom stream without terminal status`() async throws { + struct Payload: Codable, Sendable { + let ok: Bool + } + + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider( + response: ProviderResponse(text: "", finishReason: .stop), + streamDeltas: [.text(#"{"ok":true}"#)], + ) + } + + let result = try await streamObject( + model: .custom(provider: StaticProvider( + response: ProviderResponse(text: ""), + capabilities: ModelCapabilities(supportsStreaming: true), + streamDeltas: [], + )), + messages: [.user("hi")], + schema: Payload.self, + settings: GenerationSettings(stopConditions: StringStopCondition("ok")), + configuration: config, + ) + + var completed = false + for try await delta in result.objectStream { + if delta.type == .complete { + #expect(delta.object?.ok == true) + completed = true + } + } + #expect(completed) + } + + @Test + func `StreamObject stays incremental by default when terminal content filter arrives`() async throws { + struct Payload: Codable, Sendable { + let ok: Bool + } + + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider( + response: ProviderResponse(text: "", finishReason: .stop), + streamDeltas: [ + .text(#"{"ok":true}"#), + .done(finishReason: .contentFilter), + ], + ) + } + + let result = try await streamObject( + model: .openaiCompatible(modelId: "json-stream", baseURL: "https://example.test"), + messages: [.user("hi")], + schema: Payload.self, + configuration: config, + ) + + var publishedDeltas = 0 + do { + for try await _ in result.objectStream { + publishedDeltas += 1 + } + Issue.record("Expected content filter error") + } catch let error as TachikomaError { + guard case let .apiError(message) = error else { + Issue.record("Expected apiError, got \(error)") + return + } + #expect(message.contains("content filter")) + #expect(publishedDeltas > 0) + } + } + + @Test + func `StreamObject honors explicit terminal buffering for custom providers`() async throws { + struct Payload: Codable, Sendable { + let ok: Bool + } + + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider( + response: ProviderResponse(text: "", finishReason: .stop), + streamDeltas: [ + .text(#"{"ok":true}"#), + .done(finishReason: .contentFilter), + ], + ) + } + + let result = try await streamObject( + model: .custom(provider: StaticProvider( + response: ProviderResponse(text: ""), + capabilities: ModelCapabilities(supportsStreaming: true), + streamDeltas: [], + )), + messages: [.user("hi")], + schema: Payload.self, + settings: GenerationSettings(streamBuffering: .untilTerminal), + configuration: config, + ) + + var publishedDeltas = 0 + do { + for try await _ in result.objectStream { + publishedDeltas += 1 + } + Issue.record("Expected content filter error") + } catch let error as TachikomaError { + guard case let .apiError(message) = error else { + Issue.record("Expected apiError, got \(error)") + return + } + #expect(message.contains("content filter")) + #expect(publishedDeltas == 0) + } + } + + @Test + func `StreamObject rejects length-truncated object stream`() async throws { + struct Payload: Codable, Sendable { + let ok: Bool + } + + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider( + response: ProviderResponse(text: "", finishReason: .stop), + streamDeltas: [ + .text(#"{"ok":true}"#), + .text(#", "unfinished":"#), + .done(finishReason: .length), + ], + ) + } + + let result = try await streamObject( + model: .openai(.gpt55), + messages: [.user("hi")], + schema: Payload.self, + configuration: config, + ) + + do { + for try await _ in result.objectStream {} + Issue.record("Expected truncated stream error") + } catch let error as TachikomaError { + guard case let .invalidInput(message) = error else { + Issue.record("Expected invalidInput, got \(error)") + return + } + #expect(message.contains("complete object")) + } + } + // MARK: - Image Analysis Tests @Test @@ -259,6 +496,1364 @@ struct GenerationTests { } } + @Test + func `GenerateText preserves ordered assistant messages from provider`() async throws { + let call1 = AgentToolCall(id: "call-1", name: "first_tool", arguments: [:]) + let call2 = AgentToolCall(id: "call-2", name: "second_tool", arguments: [:]) + let thinking1 = ModelMessage( + role: .assistant, + content: [.text("thinking-1")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.signature": "sig-1", + "anthropic.thinking.type": "thinking", + ]), + ) + let thinking2 = ModelMessage( + role: .assistant, + content: [.text("thinking-2")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.signature": "sig-2", + "anthropic.thinking.type": "thinking", + ]), + ) + let providerResponse = ProviderResponse( + text: "", + usage: nil, + finishReason: .toolCalls, + toolCalls: [call1, call2], + assistantMessages: [ + thinking1, + ModelMessage(role: .assistant, content: [.toolCall(call1)]), + thinking2, + ModelMessage(role: .assistant, content: [.toolCall(call2)]), + ], + ) + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in StaticProvider(response: providerResponse) } + + let firstTool = createTool(name: "first_tool", description: "First", parameters: [], required: []) { _ in + AnyAgentToolValue(string: "first") + } + let secondTool = createTool(name: "second_tool", description: "Second", parameters: [], required: []) { _ in + AnyAgentToolValue(string: "second") + } + + let result = try await generateText( + model: .anthropic(.fable5), + messages: [.user("go")], + tools: [firstTool, secondTool], + maxSteps: 1, + configuration: config, + ) + + #expect(result.messages[1] == thinking1) + if case let .toolCall(firstCall) = result.messages[2].content.first { + #expect(firstCall.id == "call-1") + } else { + Issue.record("Expected first tool call") + } + #expect(result.messages[3] == thinking2) + if case let .toolCall(secondCall) = result.messages[4].content.first { + #expect(secondCall.id == "call-2") + } else { + Issue.record("Expected second tool call") + } + } + + @Test + func `GenerateText merges fallback fields into partial assistant messages`() async throws { + let call = AgentToolCall(id: "call-1", name: "inspect_context", arguments: [:]) + let thinking = ModelMessage( + role: .assistant, + content: [.text("thinking-only")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + ]), + ) + let providerResponse = ProviderResponse( + text: "visible text", + finishReason: .toolCalls, + toolCalls: [call], + assistantMessages: [thinking], + ) + let seenContext = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in StaticProvider(response: providerResponse) } + let tool = AgentTool( + name: "inspect_context", + description: "Inspect context", + parameters: AgentToolParameters(properties: [:], required: []), + ) { _, context in + seenContext.messages = context.messages + return AnyAgentToolValue(string: "ok") + } + + let result = try await generateText( + model: .anthropic(.fable5), + messages: [.user("go")], + tools: [tool], + maxSteps: 1, + configuration: config, + ) + + #expect(result.messages[1] == thinking) + let fallbackMessage = try #require(result.messages.first { message in + message.role == .assistant && message.content.contains { part in + if case let .toolCall(toolCall) = part { + return toolCall.id == "call-1" + } + return false + } + }) + #expect(fallbackMessage.content.contains(.text("visible text"))) + + let contextMessages = try #require(seenContext.messages) + #expect(contextMessages.contains { message in + message.role == .assistant && message.content.contains { part in + if case let .toolCall(toolCall) = part { + return toolCall.id == "call-1" + } + return false + } + }) + } + + @Test + func `GenerateText does not duplicate concatenated native assistant text`() async throws { + let providerResponse = ProviderResponse( + text: "part onepart two", + finishReason: .stop, + assistantMessages: [ + ModelMessage(role: .assistant, content: [.text("part one")]), + ModelMessage(role: .assistant, content: [.text("part two")]), + ], + ) + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in StaticProvider(response: providerResponse) } + + let result = try await generateText( + model: .anthropic(.fable5), + messages: [.user("go")], + configuration: config, + ) + + let assistantTexts = result.messages.flatMap { message -> [String] in + guard message.role == .assistant, message.channel != .thinking else { return [] } + return message.content.compactMap { part in + if case let .text(value) = part { + return value + } + return nil + } + } + #expect(assistantTexts == ["part one", "part two"]) + } + + @Test + func `GenerateText preserves empty successful assistant turn`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: "", finishReason: .stop)) + } + + let result = try await generateText( + model: .anthropic(.fable5), + messages: [.user("go")], + configuration: config, + ) + + #expect(result.messages.count == 2) + #expect(result.messages[1].role == .assistant) + #expect(result.messages[1].content == [.text("")]) + } + + @Test + func `GenerateText hides Anthropic thinking messages from tool execution context`() async throws { + let call = AgentToolCall(id: "call-1", name: "inspect_context", arguments: [:]) + let thinking = ModelMessage( + role: .assistant, + content: [.text("private-thinking")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + ]), + ) + let providerResponse = ProviderResponse( + text: "", + finishReason: .toolCalls, + toolCalls: [call], + assistantMessages: [ + thinking, + ModelMessage(role: .assistant, content: [.toolCall(call)]), + ], + ) + let seenContext = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in StaticProvider(response: providerResponse) } + let tool = AgentTool( + name: "inspect_context", + description: "Inspect context", + parameters: AgentToolParameters(properties: [:], required: []), + ) { _, context in + seenContext.messages = context.messages + return AnyAgentToolValue(string: "ok") + } + + let result = try await generateText( + model: .anthropic(.fable5), + messages: [.user("go")], + tools: [tool], + maxSteps: 1, + configuration: config, + ) + + #expect(result.messages.contains { $0.channel == .thinking }) + let contextMessages = try #require(seenContext.messages) + #expect(contextMessages.allSatisfy { message in + message.metadata?.customData?["anthropic.thinking.signature"] == nil + }) + #expect(contextMessages.contains { message in + message.content.contains { content in + if case let .toolCall(toolCall) = content { + return toolCall.id == "call-1" + } + return false + } + }) + } + + @Test + func `GenerateText strips provider-neutral thinking messages from tool execution context`() async throws { + let call = AgentToolCall(id: "call-1", name: "inspect_context", arguments: [:]) + let thinking = ModelMessage( + role: .assistant, + content: [.text("visible-thinking")], + channel: .thinking, + ) + let providerResponse = ProviderResponse( + text: "", + finishReason: .toolCalls, + toolCalls: [call], + assistantMessages: [ + thinking, + ModelMessage(role: .assistant, content: [.toolCall(call)]), + ], + ) + let seenContext = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in StaticProvider(response: providerResponse) } + let tool = AgentTool( + name: "inspect_context", + description: "Inspect context", + parameters: AgentToolParameters(properties: [:], required: []), + ) { _, context in + seenContext.messages = context.messages + return AnyAgentToolValue(string: "ok") + } + + _ = try await generateText( + model: .openai(.gpt55), + messages: [.user("go")], + tools: [tool], + maxSteps: 1, + configuration: config, + ) + + let contextMessages = try #require(seenContext.messages) + #expect(contextMessages.allSatisfy { $0.channel != .thinking }) + } + + @Test + func `GenerateText skips usage tracking for non-billable refusal`() async throws { + let providerResponse = ProviderResponse( + text: "", + usage: Usage(inputTokens: 123, outputTokens: 0), + finishReason: .contentFilter, + isBillable: false, + ) + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in StaticProvider(response: providerResponse) } + let sessionId = "non-billable-\(UUID().uuidString)" + _ = UsageTracker.shared.startSession(sessionId) + defer { _ = UsageTracker.shared.endSession(sessionId) } + + let result = try await generateText( + model: .anthropic(.fable5), + messages: [.user("blocked")], + configuration: config, + sessionId: sessionId, + ) + + #expect(result.finishReason == .contentFilter) + #expect(result.usage?.inputTokens == 123) + #expect(UsageTracker.shared.getSession(sessionId)?.operations.isEmpty == true) + } + + @Test + func `GenerateText preserves content filter finish reason across client stop conditions`() async throws { + let providerResponse = ProviderResponse( + text: "Refused STOP by policy", + usage: Usage(inputTokens: 10, outputTokens: 0), + finishReason: .contentFilter, + isBillable: false, + ) + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in StaticProvider(response: providerResponse) } + + let result = try await generateText( + model: .anthropic(.fable5), + messages: [.user("blocked")], + settings: GenerationSettings(stopConditions: StringStopCondition("STOP")), + configuration: config, + ) + + #expect(result.text.isEmpty) + #expect(result.finishReason == .contentFilter) + } + + @Test + func `GenerateText persists client stop truncation across generated message history`() async throws { + let providerResponse = ProviderResponse( + text: "safe STOPleak", + usage: Usage(inputTokens: 10, outputTokens: 3), + finishReason: .stop, + assistantMessages: [ + ModelMessage(role: .assistant, content: [.text("safe STOP")]), + ModelMessage(role: .assistant, content: [.text("leak")]), + ], + ) + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in StaticProvider(response: providerResponse) } + + let result = try await generateText( + model: .anthropic(.fable5), + messages: [.user("continue")], + settings: GenerationSettings(stopConditions: StringStopCondition("STOP")), + configuration: config, + ) + + #expect(result.text == "safe ") + let generatedText = result.messages + .dropFirst() + .flatMap { message in + message.content.compactMap { part in + if case let .text(text) = part { + return text + } + return nil + } + } + .joined() + #expect(generatedText == "safe ") + #expect(!generatedText.contains("STOP")) + #expect(!generatedText.contains("leak")) + } + + @Test + func `GenerateText truncates only final step after tool history`() async throws { + let call = AgentToolCall(id: "call-1", name: "inspect_context", arguments: [:]) + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + SequenceProvider(responses: [ + ProviderResponse( + text: "Checking...", + finishReason: .toolCalls, + toolCalls: [call], + assistantMessages: [ + ModelMessage(role: .assistant, content: [.text("Checking..."), .toolCall(call)]), + ], + ), + ProviderResponse( + text: "answer STOP leak", + finishReason: .stop, + assistantMessages: [ + ModelMessage(role: .assistant, content: [.text("answer STOP leak")]), + ], + ), + ]) + } + let tool = AgentTool( + name: "inspect_context", + description: "Inspect context", + parameters: AgentToolParameters(properties: [:], required: []), + ) { _, _ in + AnyAgentToolValue(string: "ok") + } + + let result = try await generateText( + model: .anthropic(.fable5), + messages: [.user("continue")], + tools: [tool], + settings: GenerationSettings(stopConditions: StringStopCondition("STOP")), + maxSteps: 2, + configuration: config, + ) + + #expect(result.text == "answer ") + let assistantTexts = result.messages + .filter { $0.role == .assistant && $0.channel != .thinking } + .flatMap { message in + message.content.compactMap { part in + if case let .text(text) = part { + return text + } + return nil + } + } + #expect(assistantTexts.contains("Checking...")) + #expect(assistantTexts.last == "answer ") + #expect(!assistantTexts.joined().contains("STOP")) + #expect(!assistantTexts.joined().contains("leak")) + } + + @Test + func `GenerateText tracks billable refusal with generated output`() async throws { + let providerResponse = ProviderResponse( + text: "", + usage: Usage(inputTokens: 123, outputTokens: 4), + finishReason: .contentFilter, + isBillable: true, + ) + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in StaticProvider(response: providerResponse) } + let sessionId = "billable-\(UUID().uuidString)" + _ = UsageTracker.shared.startSession(sessionId) + defer { _ = UsageTracker.shared.endSession(sessionId) } + + _ = try await generateText( + model: .anthropic(.fable5), + messages: [.user("blocked late")], + configuration: config, + sessionId: sessionId, + ) + + let operation = try #require(UsageTracker.shared.getSession(sessionId)?.operations.first) + #expect(operation.usage.inputTokens == 123) + #expect(operation.usage.outputTokens == 4) + #expect((operation.usage.cost?.total ?? 0) > 0) + } + + @Test + func `GenerateText strips Anthropic thinking before non-Anthropic providers`() async throws { + let seenMessages = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: "ok", finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = ModelMessage( + role: .assistant, + content: [.text("private")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + ]), + ) + + _ = try await generateText( + model: .openai(.gpt55), + messages: [.user("hi"), thinking, .assistant("visible")], + configuration: config, + ) + + let messages = try #require(seenMessages.messages) + #expect(messages.count == 2) + #expect(messages.allSatisfy { $0.channel != .thinking }) + } + + @Test + func `GenerateText strips Anthropic thinking from other Claude models`() async throws { + let seenMessages = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: "ok", finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = try ModelMessage( + role: .assistant, + content: [.text("private")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.model": "claude-fable-5", + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + "tachikoma.reasoning.provider": "anthropic", + "tachikoma.reasoning.model": "claude-fable-5", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://api.anthropic.com")), + ]), + ) + + _ = try await generateText( + model: .anthropic(.opus48), + messages: [.user("hi"), thinking, .assistant("visible")], + configuration: config, + ) + + let messages = try #require(seenMessages.messages) + #expect(messages.count == 2) + #expect(messages.allSatisfy { $0.channel != .thinking }) + } + + @Test + func `GenerateText strips unknown Anthropic thinking before Fable`() async throws { + let seenMessages = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: "ok", finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = ModelMessage( + role: .assistant, + content: [.text("private")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + ]), + ) + + _ = try await generateText( + model: .anthropic(.fable5), + messages: [.user("hi"), thinking, .assistant("visible")], + configuration: config, + ) + + let messages = try #require(seenMessages.messages) + #expect(messages.count == 2) + #expect(messages.allSatisfy { $0.channel != .thinking }) + } + + @Test + func `GenerateText strips unknown Anthropic thinking before custom Fable id`() async throws { + let seenMessages = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: "ok", finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = ModelMessage( + role: .assistant, + content: [.text("private")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + ]), + ) + + _ = try await generateText( + model: .anthropic(.custom("claude-fable-5")), + messages: [.user("hi"), thinking, .assistant("visible")], + configuration: config, + ) + + let messages = try #require(seenMessages.messages) + #expect(messages.count == 2) + #expect(messages.allSatisfy { $0.channel != .thinking }) + } + + @Test + func `GenerateText preserves legacy unknown Anthropic thinking for non-Fable Claude`() async throws { + let seenMessages = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: "ok", finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = ModelMessage( + role: .assistant, + content: [.text("private")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + ]), + ) + + _ = try await generateText( + model: .anthropic(.opus48), + messages: [.user("hi"), thinking, .assistant("visible")], + configuration: config, + ) + + let messages = try #require(seenMessages.messages) + #expect(messages.count == 3) + #expect(messages[1].channel == .thinking) + } + + @Test + func `GenerateText keeps Anthropic thinking for same Claude model`() async throws { + let seenMessages = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: "ok", finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = try ModelMessage( + role: .assistant, + content: [.text("private")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.model": "claude-fable-5", + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + "tachikoma.reasoning.provider": "anthropic", + "tachikoma.reasoning.model": "claude-fable-5", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://api.anthropic.com")), + ]), + ) + + _ = try await generateText( + model: .anthropic(.fable5), + messages: [.user("hi"), thinking, .assistant("visible")], + configuration: config, + ) + + let messages = try #require(seenMessages.messages) + #expect(messages.count == 3) + #expect(messages[1].channel == .thinking) + } + + @Test + func `GenerateText preserves direct custom Anthropic thinking for same model`() async throws { + let seenMessages = MessageBox() + let provider = StaticProvider( + modelId: "claude-fable-5", + response: ProviderResponse(text: "ok", finishReason: .stop), + ) { request in + seenMessages.messages = request.messages + } + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in provider } + let thinking = ModelMessage( + role: .assistant, + content: [.text("private")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.model": "claude-fable-5", + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + "tachikoma.reasoning.provider": "custom-anthropic", + "tachikoma.reasoning.model": "claude-fable-5", + ]), + ) + + _ = try await generateText( + model: .custom(provider: provider), + messages: [.user("hi"), thinking, .assistant("visible")], + configuration: config, + ) + + let messages = try #require(seenMessages.messages) + #expect(messages.count == 3) + #expect(messages[1].channel == .thinking) + } + + @Test + func `GenerateText preserves direct AnthropicProvider thinking for same custom model`() async throws { + let seenMessages = MessageBox() + let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) + let directProvider = try AnthropicProvider(model: .fable5, configuration: config) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: "ok", finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = try ModelMessage( + role: .assistant, + content: [.text("private")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.model": "claude-fable-5", + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + "tachikoma.reasoning.provider": "anthropic", + "tachikoma.reasoning.model": "claude-fable-5", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://api.anthropic.com")), + ]), + ) + + _ = try await generateText( + model: .custom(provider: directProvider), + messages: [.user("hi"), thinking, .assistant("visible")], + configuration: config, + ) + + let messages = try #require(seenMessages.messages) + #expect(messages.count == 3) + #expect(messages[1].channel == .thinking) + } + + @Test + func `GenerateText preserves Anthropic-compatible thinking for same model`() async throws { + let seenMessages = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: "ok", finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = try ModelMessage( + role: .assistant, + content: [.text("private")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.model": "claude-proxy-model", + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + "tachikoma.reasoning.provider": "anthropic-compatible", + "tachikoma.reasoning.model": "claude-proxy-model", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://example.test")), + ]), + ) + + _ = try await generateText( + model: .anthropicCompatible(modelId: "claude-proxy-model", baseURL: "https://example.test"), + messages: [.user("hi"), thinking, .assistant("visible")], + configuration: config, + ) + + let messages = try #require(seenMessages.messages) + #expect(messages.count == 3) + #expect(messages[1].channel == .thinking) + } + + @Test + func `GenerateText tags fallback reasoning for Anthropic-compatible Fable`() async throws { + let providerResponse = ProviderResponse( + text: "ok", + finishReason: .stop, + reasoning: [ProviderReasoningBlock(text: "private", signature: "sig")], + ) + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in StaticProvider(response: providerResponse) } + + let result = try await generateText( + model: .anthropicCompatible(modelId: "claude-fable-5", baseURL: "https://example.test"), + messages: [.user("hi")], + configuration: config, + ) + + let thinking = try #require(result.messages.first { $0.channel == .thinking }) + #expect(thinking.metadata?.customData?["anthropic.thinking.model"] == "claude-fable-5") + #expect(thinking.metadata?.customData?["anthropic.thinking.signature"] == "sig") + #expect(thinking.metadata?.customData?["tachikoma.reasoning.provider"] == "anthropic-compatible") + #expect(thinking.metadata?.customData?["tachikoma.reasoning.base_url"] == ReasoningEndpointIdentity + .canonical("https://example.test")) + } + + @Test + func `GenerateText tags fallback reasoning for direct custom Fable`() async throws { + let provider = StaticProvider( + modelId: "claude-fable-5", + response: ProviderResponse( + text: "ok", + finishReason: .stop, + reasoning: [ProviderReasoningBlock(text: "private", signature: "sig")], + ), + ) + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in provider } + + let result = try await generateText( + model: .custom(provider: provider), + messages: [.user("hi")], + configuration: config, + ) + + let thinking = try #require(result.messages.first { $0.channel == .thinking }) + #expect(thinking.metadata?.customData?["anthropic.thinking.model"] == "claude-fable-5") + #expect(thinking.metadata?.customData?["anthropic.thinking.signature"] == "sig") + } + + @Test + func `GenerateText keeps fallback reasoning provider-neutral without Anthropic target`() async throws { + let providerResponse = ProviderResponse( + text: "ok", + finishReason: .stop, + reasoning: [ProviderReasoningBlock(text: "visible reasoning", signature: "sig")], + ) + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in StaticProvider(response: providerResponse) } + + let result = try await generateText( + model: .openai(.gpt55), + messages: [.user("hi")], + configuration: config, + ) + + let thinking = try #require(result.messages.first { $0.channel == .thinking }) + #expect(thinking.metadata?.customData?["anthropic.thinking.type"] == nil) + #expect(thinking.metadata?.customData?["anthropic.thinking.signature"] == nil) + #expect(thinking.metadata?.customData?["tachikoma.reasoning.type"] == "thinking") + #expect(thinking.metadata?.customData?["tachikoma.reasoning.signature"] == "sig") + #expect(result.messages.toUIMessages().contains { $0.content == "visible reasoning" }) + } + + @Test + func `StreamText strips provider-neutral thinking before provider replay`() async throws { + let seenMessages = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: "ok", finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = ModelMessage( + role: .assistant, + content: [.text("visible reasoning")], + channel: .thinking, + metadata: .init(customData: [ + "tachikoma.reasoning.type": "thinking", + "tachikoma.reasoning.signature": "sig", + ]), + ) + + _ = try await streamText( + model: .openai(.gpt55), + messages: [.user("hi"), thinking, .assistant("visible")], + configuration: config, + ) + + let messages = try #require(seenMessages.messages) + #expect(messages.count == 2) + #expect(messages.allSatisfy { $0.channel != .thinking }) + } + + @Test + func `StreamText strips Anthropic thinking before non-Anthropic providers`() async throws { + let seenMessages = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: "ok", finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = ModelMessage( + role: .assistant, + content: [.text("private")], + channel: .thinking, + metadata: .init(customData: ["anthropic.thinking.signature": "sig"]), + ) + + _ = try await streamText( + model: .openai(.gpt55), + messages: [.user("hi"), thinking, .assistant("visible")], + configuration: config, + ) + + let messages = try #require(seenMessages.messages) + #expect(messages.count == 2) + #expect(messages.allSatisfy { $0.channel != .thinking }) + } + + @Test + func `StreamText stop conditions preserve terminal content filter over local stop`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider( + response: ProviderResponse(text: "", finishReason: .stop), + capabilities: ModelCapabilities(supportsStreaming: true), + streamDeltas: [ + .text("blocked"), + .done(finishReason: .contentFilter), + ], + ) + } + + let result = try await streamText( + model: .openaiCompatible(modelId: "compatible-model", baseURL: "https://example.test"), + messages: [.user("blocked")], + settings: GenerationSettings(stopConditions: StringStopCondition("blocked")), + configuration: config, + ) + + var deltas: [TextStreamDelta] = [] + for try await delta in result.stream { + deltas.append(delta) + } + + #expect(!deltas.contains { $0.type == .textDelta && $0.content == "blocked" }) + #expect(deltas.contains { $0.type == .done && $0.finishReason == .contentFilter }) + } + + @Test + func `StreamText stays incremental by default when terminal content filter arrives`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider( + response: ProviderResponse(text: "", finishReason: .stop), + capabilities: ModelCapabilities(supportsStreaming: true), + streamDeltas: [ + .text("blocked"), + .done(finishReason: .contentFilter), + ], + ) + } + + let result = try await streamText( + model: .openaiCompatible(modelId: "compatible-model", baseURL: "https://example.test"), + messages: [.user("blocked")], + configuration: config, + ) + + var deltas: [TextStreamDelta] = [] + for try await delta in result.stream { + deltas.append(delta) + } + + #expect(deltas.contains { $0.type == .textDelta && $0.content == "blocked" }) + #expect(deltas.contains { $0.type == .done && $0.finishReason == .contentFilter }) + } + + @Test + func `StreamText explicit terminal buffering suppresses late refused text`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider( + response: ProviderResponse(text: "", finishReason: .stop), + capabilities: ModelCapabilities(supportsStreaming: true), + streamDeltas: [ + .text("blocked"), + .done(finishReason: .contentFilter), + ], + ) + } + + let result = try await streamText( + model: .openaiCompatible(modelId: "compatible-model", baseURL: "https://example.test"), + messages: [.user("blocked")], + settings: GenerationSettings(streamBuffering: .untilTerminal), + configuration: config, + ) + + var deltas: [TextStreamDelta] = [] + for try await delta in result.stream { + deltas.append(delta) + } + + #expect(!deltas.contains { $0.type == .textDelta && $0.content == "blocked" }) + #expect(deltas.contains { $0.type == .done && $0.finishReason == .contentFilter }) + } + + @Test + func `StreamText counts suppressed buffered refusal tokens`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider( + response: ProviderResponse(text: "", finishReason: .stop), + capabilities: ModelCapabilities(supportsStreaming: true), + streamDeltas: [ + .text("billable refused output"), + .done(finishReason: .contentFilter), + ], + ) + } + let sessionId = "buffered-refusal-\(UUID().uuidString)" + _ = UsageTracker.shared.startSession(sessionId) + defer { _ = UsageTracker.shared.endSession(sessionId) } + + let result = try await streamText( + model: .openaiCompatible(modelId: "compatible-model", baseURL: "https://example.test"), + messages: [.user("blocked")], + settings: GenerationSettings(streamBuffering: .untilTerminal), + configuration: config, + sessionId: sessionId, + ) + + for try await _ in result.stream {} + + let operation = try #require(UsageTracker.shared.getSession(sessionId)?.operations.last) + #expect(operation.usage.outputTokens > 0) + } + + @Test + func `StreamText stop conditions ignore reasoning deltas`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider( + response: ProviderResponse(text: "", finishReason: .stop), + capabilities: ModelCapabilities(supportsStreaming: true), + streamDeltas: [ + .reasoning("hidden STOP"), + .text("visible"), + .done(finishReason: .stop), + ], + ) + } + + let result = try await streamText( + model: .openaiCompatible(modelId: "compatible-model", baseURL: "https://example.test"), + messages: [.user("hi")], + settings: GenerationSettings(stopConditions: StringStopCondition("STOP")), + configuration: config, + ) + + var deltas: [TextStreamDelta] = [] + for try await delta in result.stream { + deltas.append(delta) + } + + #expect(deltas.contains { $0.type == .reasoning && $0.content == "hidden STOP" }) + #expect(deltas.contains { $0.type == .textDelta && $0.content == "visible" }) + #expect(deltas.contains { $0.type == .done && $0.finishReason == .stop }) + } + + @Test + func `StreamText stop conditions can finish before provider terminal status`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + let provider = StaticProvider( + response: ProviderResponse(text: "", finishReason: .stop), + capabilities: ModelCapabilities(supportsStreaming: true), + streamDeltas: [ + .text("partial"), + ], + ) + config.setProviderFactoryOverride { _, _ in provider } + + let result = try await streamText( + model: .custom(provider: provider), + messages: [.user("hi")], + settings: GenerationSettings(stopConditions: StringStopCondition("partial")), + configuration: config, + ) + + var deltas: [TextStreamDelta] = [] + for try await delta in result.stream { + deltas.append(delta) + } + #expect(deltas.contains { $0.type == .textDelta && $0.content == "partial" }) + #expect(deltas.contains { $0.type == .done && $0.finishReason == .stop }) + } + + @Test + func `GenerateObject strips Anthropic thinking before non-Anthropic providers`() async throws { + struct Payload: Codable, Sendable, Equatable { + let ok: Bool + } + + let seenMessages = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: #"{"ok":true}"#, finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = ModelMessage( + role: .assistant, + content: [.text("private")], + channel: .thinking, + metadata: .init(customData: ["anthropic.thinking.signature": "sig"]), + ) + + let result = try await generateObject( + model: .openai(.gpt55), + messages: [.user("hi"), thinking, .assistant("visible")], + schema: Payload.self, + configuration: config, + ) + + #expect(result.object == Payload(ok: true)) + let messages = try #require(seenMessages.messages) + #expect(messages.count == 2) + #expect(messages.allSatisfy { $0.channel != .thinking }) + } + + @Test + func `GenerateObject strips provider-neutral thinking before non-Anthropic providers`() async throws { + struct Payload: Codable, Sendable, Equatable { + let ok: Bool + } + + let seenMessages = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: #"{"ok":true}"#, finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = ModelMessage( + role: .assistant, + content: [.text("neutral reasoning")], + channel: .thinking, + ) + + let result = try await generateObject( + model: .openai(.gpt55), + messages: [.user("hi"), thinking, .assistant("visible")], + schema: Payload.self, + configuration: config, + ) + + #expect(result.object == Payload(ok: true)) + let messages = try #require(seenMessages.messages) + #expect(messages.count == 2) + #expect(messages.allSatisfy { $0.channel != .thinking }) + } + + @Test + func `GenerateObject surfaces content filter before JSON parsing`() async throws { + struct Payload: Codable, Sendable, Equatable { + let ok: Bool + } + + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: "", finishReason: .contentFilter)) + } + + do { + _ = try await generateObject( + model: .anthropic(.fable5), + messages: [.user("blocked")], + schema: Payload.self, + configuration: config, + ) + Issue.record("Expected content filter error") + } catch let error as TachikomaError { + guard case let .apiError(message) = error else { + Issue.record("Expected apiError, got \(error)") + return + } + #expect(message.contains("content filter")) + } + } + + @Test + func `GenerateText preserves reasoning-only assistant boundary`() async throws { + let thinking = ModelMessage( + role: .assistant, + content: [.text("thinking-only")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + ]), + ) + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse( + text: "", + finishReason: .length, + assistantMessages: [thinking], + )) + } + + let result = try await generateText( + model: .anthropic(.fable5), + messages: [.user("think")], + configuration: config, + ) + + #expect(result.messages.count == 3) + #expect(result.messages[1] == thinking) + #expect(result.messages[2].role == .assistant) + #expect(result.messages[2].content == [.text("")]) + #expect(result.messages[2].metadata?.customData?["tachikoma.internal.boundary"] == "reasoning_only") + } + + @Test + func `GenerateObject strips Anthropic reasoning boundary before non-Anthropic providers`() async throws { + struct Payload: Codable, Sendable, Equatable { + let ok: Bool + } + + let seenMessages = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: #"{"ok":true}"#, finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = try ModelMessage( + role: .assistant, + content: [.text("private")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.model": "claude-fable-5", + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + "tachikoma.reasoning.provider": "anthropic", + "tachikoma.reasoning.model": "claude-fable-5", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://api.anthropic.com")), + ]), + ) + let boundary = ModelMessage( + role: .assistant, + content: [.text("")], + metadata: .init(customData: ["tachikoma.internal.boundary": "reasoning_only"]), + ) + + let result = try await generateObject( + model: .openai(.gpt55), + messages: [.user("hi"), thinking, boundary, .assistant("visible")], + schema: Payload.self, + configuration: config, + ) + + #expect(result.object == Payload(ok: true)) + let messages = try #require(seenMessages.messages) + #expect(messages.count == 2) + #expect(messages.allSatisfy { $0.metadata?.customData?["tachikoma.internal.boundary"] == nil }) + } + + @Test + func `GenerateText keeps matching Anthropic reasoning boundary for replay`() async throws { + let seenMessages = MessageBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: "ok", finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = try ModelMessage( + role: .assistant, + content: [.text("private")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.model": "claude-fable-5", + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + "tachikoma.reasoning.provider": "anthropic", + "tachikoma.reasoning.model": "claude-fable-5", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://api.anthropic.com")), + ]), + ) + let boundary = ModelMessage( + role: .assistant, + content: [.text("")], + metadata: .init(customData: ["tachikoma.internal.boundary": "reasoning_only"]), + ) + + _ = try await generateText( + model: .anthropic(.fable5), + messages: [.user("hi"), thinking, boundary, .assistant("visible")], + configuration: config, + ) + + let messages = try #require(seenMessages.messages) + #expect(messages.count == 4) + if messages.count >= 3 { + #expect(messages[1].channel == .thinking) + #expect(messages[2].metadata?.customData?["tachikoma.internal.boundary"] == "reasoning_only") + } + } + + @Test + func `GenerateText strips Anthropic thinking from different configured endpoint`() async throws { + let seenMessages = MessageBox() + let config = TachikomaConfiguration( + apiKeys: [:], + baseURLs: ["anthropic": "https://user:secret@proxy.example.test?token=secret#frag"], + ) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse(text: "ok", finishReason: .stop)) { request in + seenMessages.messages = request.messages + } + } + let thinking = try ModelMessage( + role: .assistant, + content: [.text("private")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.model": "claude-fable-5", + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + "tachikoma.reasoning.provider": "anthropic", + "tachikoma.reasoning.model": "claude-fable-5", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://api.anthropic.com")), + ]), + ) + + _ = try await generateText( + model: .anthropic(.fable5), + messages: [.user("hi"), thinking, .assistant("visible")], + configuration: config, + ) + + let messages = try #require(seenMessages.messages) + #expect(messages.count == 2) + #expect(messages.allSatisfy { $0.channel != .thinking }) + } + + @Test + func `GenerateText preserves content filter user turn`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse( + text: "Refused by policy", + usage: Usage(inputTokens: 1, outputTokens: 0), + finishReason: .contentFilter, + )) + } + + let result = try await generateText( + model: .anthropic(.fable5), + messages: [.user("blocked")], + configuration: config, + ) + + #expect(result.text.isEmpty) + #expect(result.messages.count == 1) + #expect(result.messages.first?.role == .user) + #expect(result.messages.first?.content == [.text("blocked")]) + } + + @Test + func `GenerateText tags OpenRouter reasoning with configured endpoint`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setBaseURL("https://user:secret@proxy.example.test/api/v1?token=secret#frag", for: .custom("openrouter")) + config.setProviderFactoryOverride { _, _ in + StaticProvider(response: ProviderResponse( + text: "", + finishReason: .toolCalls, + toolCalls: [AgentToolCall(id: "call-1", name: "lookup", arguments: [:])], + reasoning: [ + ProviderReasoningBlock( + text: "", + type: "openrouter_reasoning_details", + rawJSON: #"[{"type":"reasoning.encrypted","data":"sealed"}]"#, + ), + ], + )) + } + + let result = try await generateText( + model: .openRouter(modelId: "anthropic/claude-fable-5"), + messages: [.user("hi")], + configuration: config, + ) + + let thinking = try #require(result.messages.first { $0.channel == .thinking }) + #expect(thinking.metadata?.customData?["tachikoma.reasoning.provider"] == "openrouter") + #expect(thinking.metadata?.customData?["tachikoma.reasoning.model"] == "anthropic/claude-fable-5") + let endpointIdentity = try #require(thinking.metadata?.customData?["tachikoma.reasoning.base_url"]) + #expect(endpointIdentity == ReasoningEndpointIdentity + .canonical("https://proxy.example.test/api/v1?token=secret")) + #expect(endpointIdentity.contains("secret") == false) + #expect(endpointIdentity.contains("token") == false) + #expect(thinking.metadata?.customData?["openrouter.reasoning_details"]?.contains("sealed") == true) + } + // MARK: - Image Input Type Tests @Test @@ -306,3 +1901,95 @@ struct GenerationTests { } } } + +@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) +private struct StaticProvider: ModelProvider { + let modelId: String + let response: ProviderResponse + let capabilities: ModelCapabilities + let onGenerate: (@Sendable (ProviderRequest) -> Void)? + let streamDeltas: [TextStreamDelta] + + init( + modelId: String = "static-provider", + response: ProviderResponse, + capabilities: ModelCapabilities = ModelCapabilities(), + streamDeltas: [TextStreamDelta] = [], + onGenerate: (@Sendable (ProviderRequest) -> Void)? = nil, + ) { + self.modelId = modelId + self.response = response + self.capabilities = capabilities + self.streamDeltas = streamDeltas + self.onGenerate = onGenerate + } + + var baseURL: String? { + nil + } + + var apiKey: String? { + nil + } + + func generateText(request: ProviderRequest) async throws -> ProviderResponse { + self.onGenerate?(request) + return self.response + } + + func streamText(request: ProviderRequest) async throws -> AsyncThrowingStream { + self.onGenerate?(request) + return AsyncThrowingStream { continuation in + for delta in self.streamDeltas { + continuation.yield(delta) + } + continuation.finish() + } + } +} + +@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *) +private struct SequenceProvider: ModelProvider { + let modelId = "sequence-provider" + let baseURL: String? = nil + let apiKey: String? = nil + let capabilities = ModelCapabilities() + private let queue: ResponseQueue + + init(responses: [ProviderResponse]) { + self.queue = ResponseQueue(responses: responses) + } + + func generateText(request _: ProviderRequest) async throws -> ProviderResponse { + self.queue.next() + } + + func streamText(request _: ProviderRequest) async throws -> AsyncThrowingStream { + AsyncThrowingStream { continuation in + continuation.finish() + } + } +} + +private final class ResponseQueue: @unchecked Sendable { + private let lock = NSLock() + private var responses: [ProviderResponse] + + init(responses: [ProviderResponse]) { + self.responses = responses + } + + func next() -> ProviderResponse { + self.lock.lock() + defer { self.lock.unlock() } + + if self.responses.count > 1 { + return self.responses.removeFirst() + } + return self.responses[0] + } +} + +private final class MessageBox: @unchecked Sendable { + var messages: [ModelMessage]? +} diff --git a/Tests/TachikomaTests/Core/LanguageModelCoverageTests.swift b/Tests/TachikomaTests/Core/LanguageModelCoverageTests.swift index 44a47b0..f868850 100644 --- a/Tests/TachikomaTests/Core/LanguageModelCoverageTests.swift +++ b/Tests/TachikomaTests/Core/LanguageModelCoverageTests.swift @@ -76,7 +76,7 @@ struct LanguageModelCoverageTests { func `LanguageModel top level switches`() { let baseModels: [LanguageModel] = [ .openai(.gpt55), - .anthropic(.opus48), + .anthropic(.fable5), .google(.gemini35Flash), .mistral(.medium35), .groq(.llama3370b), diff --git a/Tests/TachikomaTests/Core/MinimalModernAPITests.swift b/Tests/TachikomaTests/Core/MinimalModernAPITests.swift index 0f2b6bf..311416f 100644 --- a/Tests/TachikomaTests/Core/MinimalModernAPITests.swift +++ b/Tests/TachikomaTests/Core/MinimalModernAPITests.swift @@ -42,6 +42,124 @@ struct MinimalModernAPITests { } } + @Test + func `Streaming default value`() { + #expect(Model.default.supportsStreaming == false) + #expect(Model.defaultStreaming == .openai(.gpt55)) + #expect(Model.defaultStreaming.supportsStreaming == true) + } + + @Test + func `Agent default model preserves execution default`() { + let agent = Agent(name: "test", instructions: "test", context: ()) + + #expect(agent.model == .default) + } + + @Test + func `Agent stream uses streaming fallback for execution default`() async throws { + let seenModel = MinimalModelBox() + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { model, _ in + seenModel.model = model + return MinimalStreamingProvider(deltas: [ + .text("ok"), + .done(finishReason: .stop), + ]) + } + let agent = Agent(name: "test", instructions: "test", configuration: config, context: ()) + + let stream = try await agent.stream("hi") + var received = "" + for try await delta in stream where delta.type == .textDelta { + received += delta.content ?? "" + } + + #expect(agent.model == .default) + #expect(seenModel.model == .openai(.gpt55)) + #expect(!received.isEmpty) + } + + @Test + func `Agent stream rejects explicit execution default`() async throws { + let agent = Agent(name: "test", instructions: "test", model: .default, context: ()) + + await #expect(throws: TachikomaError.self) { + _ = try await agent.stream("hi") + } + } + + @Test + func `Agent stream rejects nonstreaming model after mutation`() async throws { + let agent = Agent(name: "test", instructions: "test", context: ()) + agent.model = .anthropic(.fable5) + + await #expect(throws: TachikomaError.self) { + _ = try await agent.stream("hi") + } + } + + @Test + func `Agent stream flushes buffered text on natural completion`() async throws { + let provider = MinimalStreamingProvider(deltas: [ + .text("ok"), + ]) + let agent = Agent( + name: "test", + instructions: "test", + model: .custom(provider: provider), + context: (), + ) + + let stream = try await agent.stream("hi") + var received = "" + for try await delta in stream where delta.type == .textDelta { + received += delta.content ?? "" + } + + #expect(received == "ok") + } + + @Test + func `Agent stream flushes buffered compatible text when done has no finish reason`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + MinimalStreamingProvider(deltas: [ + .text("ok"), + .done(), + ]) + } + let agent = Agent( + name: "test", + instructions: "test", + model: .openaiCompatible(modelId: "gpt-compatible", baseURL: "https://example.test"), + configuration: config, + context: (), + ) + + let stream = try await agent.stream("hi") + var received = "" + for try await delta in stream where delta.type == .textDelta { + received += delta.content ?? "" + } + + #expect(received == "ok") + #expect(agent.conversation.messages.map(\.content) == ["test", "hi", "ok"]) + } + + @Test + func `Agent conversation uses agent configuration`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + MinimalStaticProvider(response: ProviderResponse(text: "configured", finishReason: .stop)) + } + let agent = Agent(name: "test", instructions: "test", configuration: config, context: ()) + + let text = try await agent.conversation.continueConversation(using: .openai(.gpt55)) + + #expect(text == "configured") + } + // MARK: - Tool System Tests @Test @@ -126,4 +244,757 @@ extension MinimalModernAPITests { #expect(message.role == .user) #expect(message.content == "Test") } + + @Test + func `Conversation preserves signed thinking messages`() { + let conversation = Conversation() + let signedThinking = ModelMessage( + role: .assistant, + content: [.text("private reasoning")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + ]), + ) + + conversation.replaceModelMessages([.user("hi"), signedThinking, .assistant("hello")]) + + let messages = conversation.getModelMessages() + #expect(messages.count == 3) + #expect(messages[1] == signedThinking) + #expect(conversation.messages[1].content == "private reasoning") + } + + @Test + func `Conversation merge preserves messages appended after snapshot`() { + let conversation = Conversation() + conversation.addUserMessage("original") + let snapshotCount = conversation.messages.count + conversation.addUserMessage("concurrent") + + conversation.mergeGeneratedMessages( + [.user("original"), .assistant("generated")], + replacingPrefixCount: snapshotCount, + ) + + let messages = conversation.getModelMessages() + #expect(messages.map(\.role) == [.user, .assistant, .user]) + if case let .text(text) = messages[2].content.first { + #expect(text == "concurrent") + } else { + Issue.record("Expected preserved concurrent user message") + } + } + + @Test + func `Conversation refusal rollback preserves messages appended after snapshot`() { + let conversation = Conversation() + conversation.addUserMessage("blocked") + let snapshotIDs = conversation.messages.map(\.id) + conversation.addUserMessage("concurrent") + + let didReplace = conversation.replaceModelMessages([], validatingSnapshotIDs: snapshotIDs) + + #expect(didReplace == true) + #expect(conversation.messages.map(\.content) == ["concurrent"]) + } + + @Test + func `Conversation lock removes cancelled waiters`() async throws { + let conversation = Conversation() + let probe = ConversationLockProbe() + + let first = Task { + try await conversation.withContinuationLock { + await probe.markFirstStarted() + await probe.waitForRelease() + } + } + + await probe.waitUntilFirstStarted() + + let second = Task { + try await conversation.withContinuationLock { + await probe.markSecondRan() + } + } + + try await Task.sleep(nanoseconds: 10_000_000) + second.cancel() + + do { + try await second.value + Issue.record("Expected queued waiter to be cancelled") + } catch is CancellationError { + // Expected + } + + await probe.releaseFirst() + try await first.value + + try await conversation.withContinuationLock { + await probe.markThirdRan() + } + + #expect(await probe.secondRan == false) + #expect(await probe.thirdRan == true) + } + + @Test + func `Conversation append generated messages preserves concurrent appends`() { + let conversation = Conversation() + conversation.addUserMessage("original") + let anchorID = conversation.messages[0].id + conversation.addUserMessage("concurrent") + + conversation.appendGeneratedMessages([.assistant("generated")], afterMessageID: anchorID) + + #expect(conversation.messages.map(\.content) == ["original", "generated", "concurrent"]) + } + + @Test + func `Conversation continue persists generated message from empty history`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + MinimalStaticProvider(response: ProviderResponse(text: "hello", finishReason: .stop)) + } + let conversation = Conversation(configuration: config) + + let text = try await conversation.continueConversation(using: .anthropic(.opus48)) + + #expect(text == "hello") + #expect(conversation.messages.map(\.content) == ["hello"]) + } + + @Test + func `Conversation continue rolls back refused trailing user turn`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + MinimalStaticProvider(response: ProviderResponse(text: "Refused by policy", finishReason: .contentFilter)) + } + let conversation = Conversation(configuration: config) + conversation.addUserMessage("blocked") + + let text = try await conversation.continueConversation(using: .anthropic(.fable5)) + + #expect(text.isEmpty) + #expect(conversation.messages.isEmpty) + } + + @Test + func `Conversation continue preserves completed tool history after late refusal`() async throws { + let provider = MinimalSequenceProvider(responses: [ + ProviderResponse( + text: "", + finishReason: .toolCalls, + toolCalls: [AgentToolCall(id: "call-1", name: "side_effect", arguments: [:])], + ), + ProviderResponse(text: "Refused by policy", finishReason: .contentFilter), + ]) + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in provider } + let conversation = Conversation(configuration: config) + conversation.addUserMessage("do it") + + let text = try await conversation.continueConversation( + using: .anthropic(.fable5), + tools: [sideEffectTool], + maxSteps: 2, + ) + + #expect(text.isEmpty) + let messages = conversation.getModelMessages() + #expect(messages.map(\.role) == [.user, .assistant, .tool]) + #expect(messages[0].content == [.text("do it")]) + #expect(messages[1].content.contains { part in + if case let .toolCall(toolCall) = part { + return toolCall.id == "call-1" + } + return false + }) + #expect(messages[2].content.contains { part in + if case let .toolResult(toolResult) = part { + return toolResult.toolCallId == "call-1" + } + return false + }) + } + + @Test + func `Agent stream rejects non-streaming model before mutating conversation`() async throws { + let agent = Agent( + name: "test", + instructions: "test", + model: .anthropic(.fable5), + context: (), + ) + + await #expect(throws: TachikomaError.self) { + _ = try await agent.stream("hi") + } + + #expect(agent.conversation.messages.map(\.content) == ["test"]) + } + + @Test + func `Conversation streaming rolls back refused trailing user turn`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + MinimalStreamingProvider(deltas: [ + .text("partial"), + .done(finishReason: .contentFilter), + ]) + } + let conversation = Conversation(configuration: config) + conversation.addUserMessage("blocked") + + let stream = try await conversation.continueConversationStreaming(using: .openai(.gpt55)) + var received = "" + for try await chunk in stream { + received += chunk + } + + #expect(received == "partial") + #expect(conversation.messages.isEmpty) + } + + @Test + func `Conversation streaming flushes buffered text on natural completion`() async throws { + let provider = MinimalStreamingProvider(deltas: [ + .text("ok"), + ]) + let conversation = Conversation(configuration: TachikomaConfiguration(loadFromEnvironment: false)) + + let stream = try await conversation.continueConversationStreaming(using: .custom(provider: provider)) + var received = "" + for try await chunk in stream { + received += chunk + } + + #expect(received == "ok") + #expect(conversation.messages.map(\.content) == ["ok"]) + } + + @Test + func `Conversation streaming flushes buffered compatible text when done has no finish reason`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + MinimalStreamingProvider(deltas: [ + .text("ok"), + .done(), + ]) + } + let conversation = Conversation(configuration: config) + + let stream = try await conversation.continueConversationStreaming( + using: .openaiCompatible(modelId: "gpt-compatible", baseURL: "https://example.test"), + ) + var received = "" + for try await chunk in stream { + received += chunk + } + + #expect(received == "ok") + #expect(conversation.messages.map(\.content) == ["ok"]) + } + + @Test + func `Conversation streaming flushes compatible text when stream ends without done`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + MinimalStreamingProvider(deltas: [ + .text("partial"), + ]) + } + let conversation = Conversation(configuration: config) + + let stream = try await conversation.continueConversationStreaming( + using: .openaiCompatible(modelId: "gpt-compatible", baseURL: "https://example.test"), + ) + + var received = "" + for try await chunk in stream { + received += chunk + } + + #expect(received == "partial") + #expect(conversation.messages.map(\.content) == ["partial"]) + } +} + +@Suite(.serialized) +private struct AgentRefusalTests { + @Test + func `Agent execute rolls back refused user turn`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + MinimalStaticProvider(response: ProviderResponse(text: "Refused by policy", finishReason: .contentFilter)) + } + + let agent = Agent( + name: "test", + instructions: "test", + model: .anthropic(.fable5), + configuration: config, + context: (), + ) + + let response = try await agent.execute("blocked") + + #expect(response.text.isEmpty) + #expect(response.finishReason == .contentFilter) + #expect(agent.conversation.messages.map(\.content) == ["test"]) + } + + @Test + func `Agent stream stays incremental by default when terminal content filter arrives`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + MinimalStreamingProvider(deltas: [ + .text("partial"), + .done(finishReason: .contentFilter), + ]) + } + + let agent = Agent( + name: "test", + instructions: "test", + model: .openai(.gpt55), + configuration: config, + context: (), + ) + + let stream = try await agent.stream("blocked") + var received: [TextStreamDelta] = [] + for try await delta in stream { + received.append(delta) + } + + #expect(received.contains { $0.type == .textDelta && $0.content == "partial" }) + #expect(received.contains { $0.type == .done && $0.finishReason == .contentFilter }) + #expect(agent.conversation.messages.map(\.content) == ["test"]) + } + + @Test + func `Agent stream explicit terminal buffering errors when stream ends without done`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + MinimalStreamingProvider(deltas: [ + .text("partial"), + ]) + } + + let agent = Agent( + name: "test", + instructions: "test", + model: .openaiCompatible(modelId: "gpt-compatible", baseURL: "https://example.test"), + settings: GenerationSettings(streamBuffering: .untilTerminal), + configuration: config, + context: (), + ) + + let stream = try await agent.stream("hi") + do { + for try await _ in stream {} + Issue.record("Expected missing terminal status error") + } catch let error as TachikomaError { + guard case let .apiError(message) = error else { + Issue.record("Expected apiError, got \(error)") + return + } + #expect(message.contains("completion status")) + } + + #expect(!agent.conversation.messages.map(\.content).contains("partial")) + } + + @Test + func `Agent stream explicit terminal buffering suppresses Azure OpenAI refusals`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + MinimalStreamingProvider(deltas: [ + .text("partial"), + .done(finishReason: .contentFilter), + ]) + } + + let agent = Agent( + name: "test", + instructions: "test", + model: .azureOpenAI(deployment: "gpt-compatible", endpoint: "https://example.openai.azure.com"), + settings: GenerationSettings(streamBuffering: .untilTerminal), + configuration: config, + context: (), + ) + + let stream = try await agent.stream("blocked") + var received: [TextStreamDelta] = [] + for try await delta in stream { + received.append(delta) + } + + #expect(!received.contains { $0.type == .textDelta && $0.content == "partial" }) + #expect(received.contains { $0.type == .done && $0.finishReason == .contentFilter }) + #expect(agent.conversation.messages.map(\.content) == ["test"]) + } + + @Test + func `Agent stream explicit terminal buffering suppresses Google refusals`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + MinimalStreamingProvider(deltas: [ + .text("partial"), + .done(finishReason: .contentFilter), + ]) + } + + let agent = Agent( + name: "test", + instructions: "test", + model: .google(.gemini25Flash), + settings: GenerationSettings(streamBuffering: .untilTerminal), + configuration: config, + context: (), + ) + + let stream = try await agent.stream("blocked") + var received: [TextStreamDelta] = [] + for try await delta in stream { + received.append(delta) + } + + #expect(!received.contains { $0.type == .textDelta && $0.content == "partial" }) + #expect(received.contains { $0.type == .done && $0.finishReason == .contentFilter }) + #expect(agent.conversation.messages.map(\.content) == ["test"]) + } + + @Test + func `Agent stream explicit terminal buffering suppresses registered custom OpenAI refusals`() async throws { + try await self.withRegisteredCustomProvider( + """ + { + "customProviders": { + "proxy": { + "type": "openai", + "options": { "baseURL": "https://example.test/v1" } + } + } + } + """, + ) { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + MinimalStreamingProvider( + modelId: "proxy/gpt-compatible", + deltas: [ + .text("partial"), + .done(finishReason: .contentFilter), + ], + ) + } + + let agent = Agent( + name: "test", + instructions: "test", + model: .custom(provider: MinimalStreamingProvider(modelId: "proxy/gpt-compatible", deltas: [])), + settings: GenerationSettings(streamBuffering: .untilTerminal), + configuration: config, + context: (), + ) + + let stream = try await agent.stream("blocked") + var received: [TextStreamDelta] = [] + for try await delta in stream { + received.append(delta) + } + + #expect(!received.contains { $0.type == .textDelta && $0.content == "partial" }) + #expect(received.contains { $0.type == .done && $0.finishReason == .contentFilter }) + #expect(agent.conversation.messages.map(\.content) == ["test"]) + } + } + + @Test + func `Agent stream releases continuation gate when consumer stops early`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in + StallingStreamingProvider() + } + let agent = Agent( + name: "test", + instructions: "test", + model: .custom(provider: StallingStreamingProvider()), + configuration: config, + context: (), + ) + + do { + let stream = try await agent.stream("first") + var iterator = stream.makeAsyncIterator() + let firstDelta = try await iterator.next() + #expect(firstDelta?.type == .textDelta) + #expect(firstDelta?.content == "partial") + } + try await Task.sleep(nanoseconds: 10_000_000) + + let response = try await withTimeout(0.2) { + try await agent.execute("second") + } + #expect(response.text == "after") + } + + @Test + func `Agent execute preserves completed tool history after late refusal`() async throws { + let provider = MinimalSequenceProvider(responses: [ + ProviderResponse( + text: "", + finishReason: .toolCalls, + toolCalls: [AgentToolCall(id: "call-1", name: "side_effect", arguments: [:])], + ), + ProviderResponse(text: "Refused by policy", finishReason: .contentFilter), + ]) + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setProviderFactoryOverride { _, _ in provider } + + let agent = Agent( + name: "test", + instructions: "test", + model: .anthropic(.fable5), + tools: [sideEffectTool], + configuration: config, + context: (), + ) + + let response = try await agent.execute("do it") + + #expect(response.text.isEmpty) + #expect(response.finishReason == .contentFilter) + let messages = agent.conversation.getModelMessages() + #expect(messages.map(\.role) == [.system, .user, .assistant, .tool]) + #expect(messages[1].content == [.text("do it")]) + #expect(messages[2].content.contains { part in + if case let .toolCall(toolCall) = part { + return toolCall.id == "call-1" + } + return false + }) + #expect(messages[3].content.contains { part in + if case let .toolResult(toolResult) = part { + return toolResult.toolCallId == "call-1" + } + return false + }) + } + + private func withRegisteredCustomProvider( + _ configJSON: String, + operation: () async throws -> Void, + ) async throws { + let originalProfile = TachikomaConfiguration.profileDirectoryName + let tempProfile = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString) + let emptyProfile = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString) + + try FileManager.default.createDirectory(at: tempProfile, withIntermediateDirectories: true) + try FileManager.default.createDirectory(at: emptyProfile, withIntermediateDirectories: true) + try configJSON.write(to: tempProfile.appendingPathComponent("config.json"), atomically: true, encoding: .utf8) + try #"{"customProviders":{}}"#.write( + to: emptyProfile.appendingPathComponent("config.json"), + atomically: true, + encoding: .utf8, + ) + + TachikomaConfiguration.profileDirectoryName = tempProfile.path + CustomProviderRegistry.shared.loadFromProfile() + + do { + try await operation() + TachikomaConfiguration.profileDirectoryName = emptyProfile.path + CustomProviderRegistry.shared.loadFromProfile() + TachikomaConfiguration.profileDirectoryName = originalProfile + } catch { + TachikomaConfiguration.profileDirectoryName = emptyProfile.path + CustomProviderRegistry.shared.loadFromProfile() + TachikomaConfiguration.profileDirectoryName = originalProfile + throw error + } + } +} + +private struct StallingStreamingProvider: ModelProvider { + let modelId = "stalling-streaming" + let baseURL: String? = nil + let apiKey: String? = nil + let capabilities = ModelCapabilities(supportsStreaming: true) + + func generateText(request _: ProviderRequest) async throws -> ProviderResponse { + ProviderResponse(text: "after") + } + + func streamText(request _: ProviderRequest) async throws -> AsyncThrowingStream { + AsyncThrowingStream { continuation in + continuation.yield(.text("partial")) + } + } +} + +private actor ConversationLockProbe { + var secondRan = false + var thirdRan = false + private var firstStarted = false + private var firstStartedWaiters: [CheckedContinuation] = [] + private var releaseWaiters: [CheckedContinuation] = [] + + func markFirstStarted() { + self.firstStarted = true + let waiters = self.firstStartedWaiters + self.firstStartedWaiters.removeAll() + for waiter in waiters { + waiter.resume() + } + } + + func waitUntilFirstStarted() async { + if self.firstStarted { + return + } + + await withCheckedContinuation { continuation in + self.firstStartedWaiters.append(continuation) + } + } + + func waitForRelease() async { + await withCheckedContinuation { continuation in + self.releaseWaiters.append(continuation) + } + } + + func releaseFirst() { + let waiters = self.releaseWaiters + self.releaseWaiters.removeAll() + for waiter in waiters { + waiter.resume() + } + } + + func markSecondRan() { + self.secondRan = true + } + + func markThirdRan() { + self.thirdRan = true + } +} + +private final class MinimalModelBox: @unchecked Sendable { + private let lock = NSLock() + private var _model: LanguageModel? + + var model: LanguageModel? { + get { + self.lock.lock() + defer { self.lock.unlock() } + return self._model + } + set { + self.lock.lock() + self._model = newValue + self.lock.unlock() + } + } +} + +private struct MinimalStaticProvider: ModelProvider { + let modelId = "minimal-static" + let baseURL: String? = nil + let apiKey: String? = nil + let capabilities = ModelCapabilities() + let response: ProviderResponse + + func generateText(request _: ProviderRequest) async throws -> ProviderResponse { + self.response + } + + func streamText(request _: ProviderRequest) async throws -> AsyncThrowingStream { + AsyncThrowingStream { continuation in + continuation.finish() + } + } +} + +private struct MinimalStreamingProvider: ModelProvider { + let modelId: String + let baseURL: String? = nil + let apiKey: String? = nil + let capabilities = ModelCapabilities(supportsStreaming: true) + let deltas: [TextStreamDelta] + + init(modelId: String = "minimal-streaming", deltas: [TextStreamDelta]) { + self.modelId = modelId + self.deltas = deltas + } + + func generateText(request _: ProviderRequest) async throws -> ProviderResponse { + ProviderResponse(text: "") + } + + func streamText(request _: ProviderRequest) async throws -> AsyncThrowingStream { + AsyncThrowingStream { continuation in + for delta in self.deltas { + continuation.yield(delta) + } + continuation.finish() + } + } +} + +private struct MinimalSequenceProvider: ModelProvider { + let modelId = "minimal-sequence" + let baseURL: String? = nil + let apiKey: String? = nil + let capabilities = ModelCapabilities() + private let queue: MinimalResponseQueue + + init(responses: [ProviderResponse]) { + self.queue = MinimalResponseQueue(responses: responses) + } + + func generateText(request _: ProviderRequest) async throws -> ProviderResponse { + self.queue.next() + } + + func streamText(request _: ProviderRequest) async throws -> AsyncThrowingStream { + AsyncThrowingStream { continuation in + continuation.finish() + } + } +} + +private final class MinimalResponseQueue: @unchecked Sendable { + private let lock = NSLock() + private var responses: [ProviderResponse] + + init(responses: [ProviderResponse]) { + self.responses = responses + } + + func next() -> ProviderResponse { + self.lock.lock() + defer { self.lock.unlock() } + + if self.responses.count > 1 { + return self.responses.removeFirst() + } + return self.responses[0] + } +} + +private let sideEffectTool = Tachikoma.createTool( + name: "side_effect", + description: "Records an external action", + parameters: [], + required: [], +) { _ in + AnyAgentToolValue(string: "done") } diff --git a/Tests/TachikomaTests/Core/ModelCapabilitiesTests.swift b/Tests/TachikomaTests/Core/ModelCapabilitiesTests.swift index 7900982..fd41f76 100644 --- a/Tests/TachikomaTests/Core/ModelCapabilitiesTests.swift +++ b/Tests/TachikomaTests/Core/ModelCapabilitiesTests.swift @@ -63,6 +63,7 @@ enum ModelCapabilitiesTests { @Test func `Claude models support thinking`() { let models: [LanguageModel] = [ + .anthropic(.fable5), .anthropic(.opus47), .anthropic(.opus4), .anthropic(.sonnet46), @@ -79,8 +80,8 @@ enum ModelCapabilitiesTests { } @Test - func `Claude Opus 4_7 and 4_8 advertise adaptive thinking without sampling options`() { - for model in [LanguageModel.anthropic(.opus47), .anthropic(.opus48)] { + func `Claude Fable 5 and Opus 4_7 plus 4_8 advertise adaptive thinking without sampling options`() { + for model in [LanguageModel.anthropic(.fable5), .anthropic(.opus47), .anthropic(.opus48)] { let capabilities = ModelCapabilityRegistry.shared.capabilities(for: model) #expect(!capabilities.supportsTemperature) @@ -196,6 +197,19 @@ enum ModelCapabilitiesTests { #expect(validated.providerOptions.openai?.previousResponseId == "test-123") // Kept } + @Test + func `Validate settings preserves stream buffering mode`() { + let settings = GenerationSettings( + temperature: 0.7, + streamBuffering: .untilTerminal, + ) + + let validated = settings.validated(for: .openai(.gpt55)) + + #expect(validated.temperature == nil) + #expect(validated.streamBuffering == .untilTerminal) + } + @Test func `Validate settings for GPT-5 strips unsupported options`() { let settings = GenerationSettings( @@ -290,6 +304,49 @@ enum ModelCapabilitiesTests { #expect(validated.providerOptions.anthropic?.thinking != nil) #expect(validated.providerOptions.anthropic?.cacheControl == .persistent) } + + @Test + func `Validate Anthropic-compatible Fable strips unsupported sampling`() { + let settings = GenerationSettings( + temperature: 0.7, + topP: 0.9, + topK: 40, + providerOptions: .init( + anthropic: .init(thinking: .adaptive), + ), + ) + + let validated = settings.validated(for: LanguageModel.anthropicCompatible( + modelId: "claude-fable-5", + baseURL: "https://example.test", + )) + + #expect(validated.temperature == nil) + #expect(validated.topP == nil) + #expect(validated.topK == nil) + #expect(validated.providerOptions.anthropic?.thinking != nil) + } + + @Test + func `Validate direct custom Fable strips unsupported sampling`() { + let settings = GenerationSettings( + temperature: 0.7, + topP: 0.9, + topK: 40, + providerOptions: .init( + anthropic: .init(thinking: .adaptive), + ), + ) + + let validated = settings.validated( + for: LanguageModel.anthropic(.custom("anthropic.claude-fable-5")), + ) + + #expect(validated.temperature == nil) + #expect(validated.topP == nil) + #expect(validated.topK == nil) + #expect(validated.providerOptions.anthropic?.thinking != nil) + } } struct CustomModelTests { diff --git a/Tests/TachikomaTests/Core/ModelParsingTests.swift b/Tests/TachikomaTests/Core/ModelParsingTests.swift index e88d108..54e6a4c 100644 --- a/Tests/TachikomaTests/Core/ModelParsingTests.swift +++ b/Tests/TachikomaTests/Core/ModelParsingTests.swift @@ -43,10 +43,19 @@ struct ModelParsingTests { } } + @Test + func `parse Claude Fable 5 model id`() throws { + #expect(LanguageModel.parse(from: "claude-fable-5") == .anthropic(.fable5)) + #expect(LanguageModel.parse(from: "fable") == .anthropic(.fable5)) + #expect(try ModelSelector.parseModel("fable5") == .anthropic(.fable5)) + #expect(LanguageModel.parse(from: "my-fable5-7b") == nil) + } + @Test func `parse Claude Opus 4.8 model id`() { let parsed = LanguageModel.parse(from: "claude-opus-4-8") #expect(parsed == .anthropic(.opus48)) + #expect(LanguageModel.parse(from: "my-opus48-distill") == nil) } @Test @@ -56,9 +65,10 @@ struct ModelParsingTests { } @Test - func `parse shorthand Claude alias`() { + func `parse shorthand Claude alias`() throws { let parsed = LanguageModel.parse(from: "claude") #expect(parsed == .anthropic(.opus48)) + #expect(try ModelSelector.parseModel("anthropic") == .anthropic(.opus48)) } @Test @@ -75,6 +85,7 @@ struct ModelParsingTests { @Test func `parse provider qualified latest hosted models`() throws { + #expect(LanguageModel.parse(from: "anthropic/claude-fable-5") == .anthropic(.fable5)) #expect(LanguageModel.parse(from: "anthropic/claude-opus-4-8") == .anthropic(.opus48)) #expect(LanguageModel.parse(from: "google/gemini-3.5-flash") == .google(.gemini35Flash)) #expect(LanguageModel.parse(from: "xai/grok-4.3-latest") == .grok(.grok43)) diff --git a/Tests/TachikomaTests/Core/OpenAICompatibleHelperTests.swift b/Tests/TachikomaTests/Core/OpenAICompatibleHelperTests.swift index 87ee5e8..6887c38 100644 --- a/Tests/TachikomaTests/Core/OpenAICompatibleHelperTests.swift +++ b/Tests/TachikomaTests/Core/OpenAICompatibleHelperTests.swift @@ -121,6 +121,49 @@ struct OpenAICompatibleHelperTests { #expect(deltas == "Hello world") } + @Test + func `streamText maps content filter finish reasons`() async throws { + let request = ProviderRequest( + messages: [ModelMessage(role: .user, content: [.text("blocked")])], + ) + + let deltas = try await withMockedSession { urlRequest in + let sse = """ + data: {\"id\":\"chunk_1\",\"choices\":[{\"delta\":{\"content\":\"partial\"},\"index\":0,\"finish_reason\":null}]} + + data: {\"id\":\"chunk_2\",\"choices\":[{\"delta\":{},\"index\":0,\"finish_reason\":\"content_filter\"}]} + + data: [DONE] + + """.utf8Data() + let response = HTTPURLResponse( + url: urlRequest.url!, + statusCode: 200, + httpVersion: nil, + headerFields: ["Content-Type": "text/event-stream"], + )! + return (response, sse) + } operation: { session in + let stream = try await OpenAICompatibleHelper.streamText( + request: request, + modelId: "compatible-model", + baseURL: "https://mock.compatible", + apiKey: "sk-test", + providerName: "TestProvider", + session: session, + ) + + var deltas: [TextStreamDelta] = [] + for try await delta in stream { + deltas.append(delta) + } + return deltas + } + + #expect(deltas.contains { $0.type == .textDelta && $0.content == "partial" }) + #expect(deltas.contains { $0.type == .done && $0.finishReason == .contentFilter }) + } + @Test func `OpenAI-compatible provider forwards configured headers`() async throws { let request = ProviderRequest( @@ -149,6 +192,266 @@ struct OpenAICompatibleHelperTests { } } + @Test + func `generateText decodes OpenRouter reasoning details`() async throws { + let response = try await withMockedSession { urlRequest in + let reasoningDetails: [[String: String]] = [["type": "reasoning.encrypted", "data": "sealed"]] + let toolCall: [String: Any] = [ + "id": "call-1", + "type": "function", + "function": ["name": "lookup", "arguments": "{}"], + ] + let toolCalls = [toolCall] + let choice: [String: Any] = [ + "index": 0, + "message": [ + "role": "assistant", + "content": NSNull(), + "reasoning_details": reasoningDetails, + "tool_calls": toolCalls, + ], + "finish_reason": "tool_calls", + ] + let payload: [String: Any] = [ + "id": "chatcmpl-test", + "object": "chat.completion", + "created": 1_700_000_000, + "model": "anthropic/claude-fable-5", + "choices": [choice], + ] + return try self.jsonResponse(for: urlRequest, data: JSONSerialization.data(withJSONObject: payload)) + } operation: { session in + try await OpenAICompatibleHelper.generateText( + request: ProviderRequest(messages: [.user("hi")]), + modelId: "anthropic/claude-fable-5", + baseURL: "https://mock.compatible", + apiKey: "sk-test", + providerName: "OpenRouter", + session: session, + ) + } + + let reasoning = try #require(response.reasoning.first) + #expect(reasoning.type == "openrouter_reasoning_details") + #expect(reasoning.rawJSON?.contains("reasoning.encrypted") == true) + #expect(response.toolCalls?.first?.id == "call-1") + } + + @Test + func `generateText strips unsupported Fable sampling for OpenRouter route`() async throws { + let capture = CapturedRequest() + let request = ProviderRequest( + messages: [ModelMessage(role: .user, content: [.text("ping")])], + settings: GenerationSettings(maxTokens: 128, temperature: 0.7), + ) + + _ = try await self.withMockedSession { urlRequest in + capture.body = self.bodyData(from: urlRequest) + return self.jsonResponse(for: urlRequest, data: Self.chatCompletionPayload(text: "pong")) + } operation: { session in + try await OpenAICompatibleHelper.generateText( + request: request, + modelId: "anthropic/claude-fable-5", + baseURL: "https://mock.compatible", + apiKey: "sk-test", + providerName: "OpenRouter", + session: session, + ) + } + + let bodyJSON = try #require(capture.body).jsonObject() + #expect(bodyJSON["temperature"] == nil) + #expect(bodyJSON["max_tokens"] as? Int == 128) + } + + @Test + func `generateText replays OpenRouter reasoning details on assistant tool messages`() async throws { + let capture = CapturedRequest() + let rawReasoning = #"[{"type":"reasoning.encrypted","data":"sealed"}]"# + let call = AgentToolCall(id: "call-1", name: "lookup", arguments: [:]) + let request = try ProviderRequest(messages: [ + .user("hi"), + ModelMessage( + role: .assistant, + content: [.text("")], + channel: .thinking, + metadata: .init(customData: [ + "openrouter.reasoning_details": rawReasoning, + "tachikoma.reasoning.provider": "openrouter", + "tachikoma.reasoning.model": "anthropic/claude-fable-5", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://mock.compatible")), + ]), + ), + ModelMessage(role: .assistant, content: [.toolCall(call)]), + ModelMessage( + role: .tool, + content: [.toolResult(.success(toolCallId: "call-1", result: AnyAgentToolValue(string: "ok")))], + ), + ]) + + _ = try await self.withMockedSession { urlRequest in + capture.body = self.bodyData(from: urlRequest) + return self.jsonResponse(for: urlRequest, data: Self.chatCompletionPayload(text: "done")) + } operation: { session in + try await OpenAICompatibleHelper.generateText( + request: request, + modelId: "anthropic/claude-fable-5", + baseURL: "https://mock.compatible", + apiKey: "sk-test", + providerName: "OpenRouter", + session: session, + ) + } + + let bodyJSON = try #require(capture.body).jsonObject() + let messages = try #require(bodyJSON["messages"] as? [[String: Any]]) + let assistant = try #require(messages.first { $0["role"] as? String == "assistant" }) + let details = try #require(assistant["reasoning_details"] as? [[String: Any]]) + #expect(details.first?["type"] as? String == "reasoning.encrypted") + #expect(details.first?["data"] as? String == "sealed") + #expect(assistant["tool_calls"] != nil) + } + + @Test + func `generateText replays OpenRouter reasoning details on reasoning-only assistant boundary`() async throws { + let capture = CapturedRequest() + let rawReasoning = #"[{"type":"reasoning.encrypted","data":"sealed"}]"# + let request = try ProviderRequest(messages: [ + .user("first"), + ModelMessage( + role: .assistant, + content: [.text("")], + channel: .thinking, + metadata: .init(customData: [ + "openrouter.reasoning_details": rawReasoning, + "tachikoma.reasoning.provider": "openrouter", + "tachikoma.reasoning.model": "anthropic/claude-fable-5", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://mock.compatible")), + ]), + ), + ModelMessage( + role: .assistant, + content: [.text("")], + metadata: .init(customData: ["tachikoma.internal.boundary": "reasoning_only"]), + ), + .user("next"), + ]) + + _ = try await self.withMockedSession { urlRequest in + capture.body = self.bodyData(from: urlRequest) + return self.jsonResponse(for: urlRequest, data: Self.chatCompletionPayload(text: "done")) + } operation: { session in + try await OpenAICompatibleHelper.generateText( + request: request, + modelId: "anthropic/claude-fable-5", + baseURL: "https://mock.compatible", + apiKey: "sk-test", + providerName: "OpenRouter", + session: session, + ) + } + + let bodyJSON = try #require(capture.body).jsonObject() + let messages = try #require(bodyJSON["messages"] as? [[String: Any]]) + let assistantIndex = try #require(messages.firstIndex { $0["role"] as? String == "assistant" }) + let assistant = messages[assistantIndex] + let details = try #require(assistant["reasoning_details"] as? [[String: Any]]) + #expect(details.first?["data"] as? String == "sealed") + let nextMessage = try #require(messages.indices + .contains(assistantIndex + 1) ? messages[assistantIndex + 1] : nil) + #expect(nextMessage["role"] as? String == "user") + } + + @Test + func `generateText does not replay OpenRouter reasoning from another endpoint`() async throws { + let capture = CapturedRequest() + let rawReasoning = #"[{"type":"reasoning.encrypted","data":"sealed"}]"# + let call = AgentToolCall(id: "call-1", name: "lookup", arguments: [:]) + let request = try ProviderRequest(messages: [ + .user("hi"), + ModelMessage( + role: .assistant, + content: [.text("")], + channel: .thinking, + metadata: .init(customData: [ + "openrouter.reasoning_details": rawReasoning, + "tachikoma.reasoning.provider": "openrouter", + "tachikoma.reasoning.model": "anthropic/claude-fable-5", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://other.example.test")), + ]), + ), + ModelMessage(role: .assistant, content: [.toolCall(call)]), + ModelMessage( + role: .tool, + content: [.toolResult(.success(toolCallId: "call-1", result: AnyAgentToolValue(string: "ok")))], + ), + ]) + + _ = try await self.withMockedSession { urlRequest in + capture.body = self.bodyData(from: urlRequest) + return self.jsonResponse(for: urlRequest, data: Self.chatCompletionPayload(text: "done")) + } operation: { session in + try await OpenAICompatibleHelper.generateText( + request: request, + modelId: "anthropic/claude-fable-5", + baseURL: "https://mock.compatible", + apiKey: "sk-test", + providerName: "OpenRouter", + session: session, + ) + } + + let bodyJSON = try #require(capture.body).jsonObject() + let messages = try #require(bodyJSON["messages"] as? [[String: Any]]) + let assistantMessages = messages.filter { $0["role"] as? String == "assistant" } + #expect(assistantMessages.allSatisfy { $0["reasoning_details"] == nil }) + } + + @Test + func `generateText drops unmatched OpenRouter reasoning instead of serializing it as text`() async throws { + let capture = CapturedRequest() + let request = try ProviderRequest(messages: [ + .user("hi"), + ModelMessage( + role: .assistant, + content: [.text("private reasoning")], + channel: .thinking, + metadata: .init(customData: [ + "openrouter.reasoning": "private reasoning", + "tachikoma.reasoning.provider": "openrouter", + "tachikoma.reasoning.model": "other-model", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://mock.compatible")), + ]), + ), + .assistant("visible"), + ]) + + _ = try await self.withMockedSession { urlRequest in + capture.body = self.bodyData(from: urlRequest) + return self.jsonResponse(for: urlRequest, data: Self.chatCompletionPayload(text: "done")) + } operation: { session in + try await OpenAICompatibleHelper.generateText( + request: request, + modelId: "anthropic/claude-fable-5", + baseURL: "https://mock.compatible", + apiKey: "sk-test", + providerName: "OpenRouter", + session: session, + ) + } + + let bodyJSON = try #require(capture.body).jsonObject() + let messages = try #require(bodyJSON["messages"] as? [[String: Any]]) + let assistantMessages = messages.filter { $0["role"] as? String == "assistant" } + #expect(assistantMessages.count == 1) + #expect(assistantMessages.first?["content"] as? String == "visible") + #expect(try String(data: #require(capture.body), encoding: .utf8)?.contains("private reasoning") == false) + } + @Test func `non-200 responses surface TachikomaError.apiError`() async { await self.withMockedSession { urlRequest in diff --git a/Tests/TachikomaTests/Core/StopConditionsTests.swift b/Tests/TachikomaTests/Core/StopConditionsTests.swift index 5f197b9..a3c5cdb 100644 --- a/Tests/TachikomaTests/Core/StopConditionsTests.swift +++ b/Tests/TachikomaTests/Core/StopConditionsTests.swift @@ -180,6 +180,30 @@ struct StopConditionsTests { #expect(!collectedText.contains("ignored")) } + @Test + func `Stop conditions finish immediately after local match`() async throws { + let stream = AsyncThrowingStream { continuation in + Task { + continuation.yield(TextStreamDelta(type: .textDelta, content: "STOP")) + try? await Task.sleep(nanoseconds: 2_000_000_000) + continuation.yield(TextStreamDelta(type: .textDelta, content: "late")) + continuation.yield(TextStreamDelta(type: .done, finishReason: .length)) + continuation.finish() + } + } + + let start = Date() + var received: [TextStreamDelta] = [] + for try await delta in stream.stopWhen(StringStopCondition("STOP")) { + received.append(delta) + } + + #expect(Date().timeIntervalSince(start) < 0.5) + #expect(received.map(\.content).compactMap(\.self) == ["STOP"]) + #expect(received.last?.type == .done) + #expect(received.last?.finishReason == .stop) + } + // MARK: - Builder Pattern Tests @Test diff --git a/Tests/TachikomaTests/HarmonyFeatures/ResponseCacheTests.swift b/Tests/TachikomaTests/HarmonyFeatures/ResponseCacheTests.swift index 6bc87ad..37c14fb 100644 --- a/Tests/TachikomaTests/HarmonyFeatures/ResponseCacheTests.swift +++ b/Tests/TachikomaTests/HarmonyFeatures/ResponseCacheTests.swift @@ -59,6 +59,67 @@ struct ResponseCacheTests { #expect(cached?.finishReason == .stop) } + @Test + func `ResponseCache keys include reasoning metadata`() async { + let cache = ResponseCache() + let response = ProviderResponse(text: "cached", usage: nil, finishReason: .stop) + + func request(signature: String) -> ProviderRequest { + ProviderRequest( + messages: [ + .user("Hello"), + ModelMessage( + role: .assistant, + content: [.text("thinking")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.signature": signature, + "anthropic.thinking.type": "thinking", + ]), + ), + .assistant("Hi"), + ], + tools: nil, + settings: .default, + ) + } + + await cache.store(response, for: request(signature: "sig-a")) + + #expect(await cache.get(for: request(signature: "sig-a"))?.text == "cached") + #expect(await cache.get(for: request(signature: "sig-b")) == nil) + } + + @Test + func `CacheEntry size includes reasoning and assistant messages`() { + let small = CacheEntry(response: ProviderResponse(text: "ok")) + let largePayload = String(repeating: "x", count: 4096) + let large = CacheEntry(response: ProviderResponse( + text: "ok", + reasoning: [ + ProviderReasoningBlock(text: largePayload, signature: largePayload, type: "thinking"), + ProviderReasoningBlock( + text: "", + type: "openrouter_reasoning_details", + rawJSON: largePayload, + ), + ], + assistantMessages: [ + ModelMessage( + role: .assistant, + content: [.text(largePayload)], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.model": "claude-fable-5", + "anthropic.thinking.signature": largePayload, + ]), + ), + ], + )) + + #expect(large.estimatedMemorySize() > small.estimatedMemorySize() + 12000) + } + @Test func `ResponseCache cache miss`() async { let cache = ResponseCache() @@ -267,6 +328,99 @@ struct ResponseCacheTests { #expect(key1.hash != key3.hash) } + @Test + func `CacheKey includes reasoning effort and Anthropic thinking options`() { + let messages = [ModelMessage.user("Hello")] + let lowEffort = ProviderRequest( + messages: messages, + settings: GenerationSettings( + reasoningEffort: .low, + providerOptions: .init(anthropic: .init(thinking: .adaptive)), + ), + ) + let highEffort = ProviderRequest( + messages: messages, + settings: GenerationSettings( + reasoningEffort: .high, + providerOptions: .init(anthropic: .init(thinking: .adaptive)), + ), + ) + let disabledThinking = ProviderRequest( + messages: messages, + settings: GenerationSettings( + reasoningEffort: .low, + providerOptions: .init(anthropic: .init(thinking: .disabled)), + ), + ) + + #expect(CacheKey(from: lowEffort).hash != CacheKey(from: highEffort).hash) + #expect(CacheKey(from: lowEffort).hash != CacheKey(from: disabledThinking).hash) + } + + @Test + func `CacheKey includes string stop condition values`() { + let endRequest = ProviderRequest( + messages: [ModelMessage.user("Hello")], + settings: GenerationSettings(stopConditions: StringStopCondition("END")), + ) + let stopRequest = ProviderRequest( + messages: [ModelMessage.user("Hello")], + settings: GenerationSettings(stopConditions: StringStopCondition("STOP")), + ) + + #expect(CacheKey(from: endRequest).hash != CacheKey(from: stopRequest).hash) + } + + @Test + func `CacheKey encodes composite stop conditions without delimiter collisions`() async { + let cache = ResponseCache() + let splitRequest = ProviderRequest( + messages: [ModelMessage.user("Hello")], + settings: GenerationSettings(stopConditions: AnyStopCondition( + StringStopCondition("a"), + StringStopCondition("b"), + )), + ) + let joinedRequest = ProviderRequest( + messages: [ModelMessage.user("Hello")], + settings: GenerationSettings(stopConditions: AnyStopCondition( + StringStopCondition("a,string:true:b"), + )), + ) + + #expect(CacheKey(from: splitRequest).hash != CacheKey(from: joinedRequest).hash) + + await cache.store(ProviderResponse(text: "split", finishReason: .stop), for: splitRequest) + let joinedCached = await cache.get(for: joinedRequest) + + #expect(joinedCached == nil) + } + + @Test + func `CacheKey marks custom stop conditions uncacheable`() { + let request = ProviderRequest( + messages: [ModelMessage.user("Hello")], + settings: GenerationSettings(stopConditions: PredicateStopCondition { _, _ in false }), + ) + + let key = CacheKey(from: request) + #expect(key.isCacheable == false) + } + + @Test + func `ResponseCache skips custom stop condition entries`() async { + let cache = ResponseCache() + let request = ProviderRequest( + messages: [ModelMessage.user("Hello")], + settings: GenerationSettings(stopConditions: PredicateStopCondition { _, _ in false }), + ) + + await cache.store(ProviderResponse(text: "cached", finishReason: .stop), for: request) + let cached = await cache.get(for: request) + + #expect(cached == nil) + } + @Test func `CacheKey includes tools in hash`() { let tool1 = AgentTool( @@ -362,6 +516,38 @@ struct ResponseCacheTests { #expect(callCount.value == 1) // Provider not called again } + @Test + func `CachedProvider keys include provider endpoint identity`() async throws { + let cache = ResponseCache() + let callCountA = Box(value: 0) + let callCountB = Box(value: 0) + var providerA = ResponseCacheMockProvider( + model: .openaiCompatible(modelId: "shared-model", baseURL: "https://gateway.test/v1?tenant=a"), + response: ProviderResponse(text: "tenant-a", usage: nil, finishReason: .stop), + mockModelId: "shared-model", + mockBaseURL: "https://gateway.test/v1?tenant=a", + ) + var providerB = ResponseCacheMockProvider( + model: .openaiCompatible(modelId: "shared-model", baseURL: "https://gateway.test/v1?tenant=b"), + response: ProviderResponse(text: "tenant-b", usage: nil, finishReason: .stop), + mockModelId: "shared-model", + mockBaseURL: "https://gateway.test/v1?tenant=b", + ) + providerA.onGenerateText = { _ in callCountA.value += 1 } + providerB.onGenerateText = { _ in callCountB.value += 1 } + + let cachedA = await cache.wrapProvider(providerA) + let cachedB = await cache.wrapProvider(providerB) + let request = ProviderRequest(messages: [ModelMessage.user("Test")], tools: nil, settings: .default) + + #expect(try await cachedA.generateText(request: request).text == "tenant-a") + #expect(try await cachedB.generateText(request: request).text == "tenant-b") + #expect(try await cachedA.generateText(request: request).text == "tenant-a") + #expect(try await cachedB.generateText(request: request).text == "tenant-b") + #expect(callCountA.value == 1) + #expect(callCountB.value == 1) + } + @Test func `CachedProvider doesn't cache streaming`() async throws { let cache = ResponseCache() @@ -397,15 +583,17 @@ struct ResponseCacheTests { private struct ResponseCacheMockProvider: ModelProvider { let model: LanguageModel let response: ProviderResponse + let mockModelId: String + let mockBaseURL: String? var onGenerateText: (@Sendable (ProviderRequest) -> Void)? var onStreamText: (@Sendable (ProviderRequest) -> Void)? var modelId: String { - "mock-model" + self.mockModelId } var baseURL: String? { - nil + self.mockBaseURL } var apiKey: String? { @@ -419,11 +607,15 @@ private struct ResponseCacheMockProvider: ModelProvider { init( model: LanguageModel, response: ProviderResponse, + mockModelId: String = "mock-model", + mockBaseURL: String? = nil, onGenerateText: (@Sendable (ProviderRequest) -> Void)? = nil, onStreamText: (@Sendable (ProviderRequest) -> Void)? = nil, ) { self.model = model self.response = response + self.mockModelId = mockModelId + self.mockBaseURL = mockBaseURL self.onGenerateText = onGenerateText self.onStreamText = onStreamText } diff --git a/Tests/TachikomaTests/Providers/AnthropicInterleavedDefaultsTests.swift b/Tests/TachikomaTests/Providers/AnthropicInterleavedDefaultsTests.swift index 1d5c95d..4fb271f 100644 --- a/Tests/TachikomaTests/Providers/AnthropicInterleavedDefaultsTests.swift +++ b/Tests/TachikomaTests/Providers/AnthropicInterleavedDefaultsTests.swift @@ -1,4 +1,7 @@ import Foundation +#if canImport(FoundationNetworking) +import FoundationNetworking +#endif import Testing @testable import Tachikoma @@ -22,6 +25,19 @@ struct AnthropicInterleavedDefaultsTests { #expect(parts.contains("fine-grained-tool-streaming-2025-05-14")) } + @Test + func `Endpoint identity includes routing query without exposing raw values`() { + let tenantA = ReasoningEndpointIdentity.canonical("https://gateway.test/v1?tenant=a") + let tenantB = ReasoningEndpointIdentity.canonical("https://gateway.test/v1?tenant=b") + + #expect(tenantA != tenantB) + #expect(tenantA?.hasPrefix("sha256:") == true) + #expect(tenantA?.contains("tenant") == false) + #expect(tenantA?.contains("gateway") == false) + #expect(ReasoningEndpointIdentity.canonical("https://gateway.test/v1/?tenant=a") == tenantA) + #expect(ReasoningEndpointIdentity.canonical("https://user:secret@gateway.test/v1?tenant=a#frag") == tenantA) + } + @Test func `Provider request includes beta header and thinking payload`() throws { let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) @@ -126,6 +142,163 @@ struct AnthropicInterleavedDefaultsTests { #expect(json["max_tokens"] as? Int == 64) } + @Test + func `Fable 5 request omits thinking config and uses effort output config`() throws { + let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) + let provider = try AnthropicProvider(model: .fable5, configuration: config) + + let settings = GenerationSettings( + maxTokens: 128_000, + temperature: 0.7, + topP: 0.9, + topK: 40, + reasoningEffort: .high, + providerOptions: .init(anthropic: .init(thinking: .adaptive)), + ) + + let request = ProviderRequest( + messages: [.user("hi")], + settings: settings, + ) + + let urlRequest = try provider.makeURLRequest(for: request, stream: false) + let body = try #require(urlRequest.httpBody) + let json = try #require(try JSONSerialization.jsonObject(with: body) as? [String: Any]) + + #expect(json["model"] as? String == "claude-fable-5") + #expect(json["temperature"] == nil) + #expect(json["top_p"] == nil) + #expect(json["top_k"] == nil) + #expect(json["thinking"] == nil) + let outputConfig = try #require(json["output_config"] as? [String: Any]) + #expect(outputConfig["effort"] as? String == "high") + #expect(json["max_tokens"] as? Int == 128_000) + #expect(urlRequest.value(forHTTPHeaderField: "anthropic-beta") == nil) + } + + @Test + func `Fable 5 request uses model-aware default output budget`() throws { + let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) + let provider = try AnthropicProvider(model: .fable5, configuration: config) + + let request = ProviderRequest(messages: [.user("hi")]) + let urlRequest = try provider.makeURLRequest(for: request, stream: false) + let body = try #require(urlRequest.httpBody) + let json = try #require(try JSONSerialization.jsonObject(with: body) as? [String: Any]) + + #expect(json["max_tokens"] as? Int == 16384) + #expect(urlRequest.timeoutInterval == 1800) + } + + @Test + func `Fable 5 long output requests extend non-streaming timeout`() throws { + let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) + let provider = try AnthropicProvider(model: .fable5, configuration: config) + + let urlRequest = try provider.makeURLRequest( + for: ProviderRequest( + messages: [.user("long")], + settings: GenerationSettings(maxTokens: 128_000), + ), + stream: false, + ) + + #expect(urlRequest.timeoutInterval == 1800) + } + + @Test + func `Opus long output requests extend non-streaming timeout`() throws { + let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) + + for model in [LanguageModel.Anthropic.opus47, .opus48] { + let provider = try AnthropicProvider(model: model, configuration: config) + let urlRequest = try provider.makeURLRequest( + for: ProviderRequest( + messages: [.user("long")], + settings: GenerationSettings(maxTokens: 128_000), + ), + stream: false, + ) + + #expect(urlRequest.timeoutInterval == 1800) + } + } + + @Test + func `Custom Fable model id uses Fable request defaults`() throws { + let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) + let provider = try AnthropicProvider(model: .custom("claude-fable-5"), configuration: config) + + let request = ProviderRequest(messages: [.user("hi")]) + let urlRequest = try provider.makeURLRequest(for: request, stream: false) + let body = try #require(urlRequest.httpBody) + let json = try #require(try JSONSerialization.jsonObject(with: body) as? [String: Any]) + + #expect(provider.capabilities.supportsStreaming == false) + #expect(provider.capabilities.contextLength == 1_000_000) + #expect(provider.capabilities.maxOutputTokens == 128_000) + #expect(LanguageModel.anthropic(.custom("claude-fable-5")).supportsStreaming == false) + #expect(LanguageModel.anthropic(.custom("claude-fable-5")).contextLength == 1_000_000) + #expect(LanguageModel.Anthropic.custom("claude-fable-5").maxOutputTokens == 128_000) + #expect(json["model"] as? String == "claude-fable-5") + #expect(json["thinking"] == nil) + #expect(json["max_tokens"] as? Int == 16384) + } + + @Test + func `Qualified custom Fable model id uses Fable request defaults`() throws { + let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) + let provider = try AnthropicProvider(model: .custom("anthropic.claude-fable-5"), configuration: config) + + let request = ProviderRequest(messages: [.user("hi")]) + let urlRequest = try provider.makeURLRequest(for: request, stream: false) + let body = try #require(urlRequest.httpBody) + let json = try #require(try JSONSerialization.jsonObject(with: body) as? [String: Any]) + + #expect(provider.capabilities.supportsStreaming == false) + #expect(provider.capabilities.contextLength == 1_000_000) + #expect(provider.capabilities.maxOutputTokens == 128_000) + #expect(LanguageModel.anthropic(.custom("anthropic.claude-fable-5")).contextLength == 1_000_000) + #expect(LanguageModel.Anthropic.custom("anthropic.claude-fable-5").maxOutputTokens == 128_000) + #expect(json["model"] as? String == "anthropic.claude-fable-5") + #expect(json["thinking"] == nil) + #expect(json["max_tokens"] as? Int == 16384) + } + + @Test + func `Fable 5 rejects disabled thinking mode`() throws { + let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) + let provider = try AnthropicProvider(model: .fable5, configuration: config) + let settings = GenerationSettings( + maxTokens: 64, + providerOptions: .init(anthropic: .init(thinking: .disabled)), + ) + + #expect(throws: TachikomaError.self) { + _ = try provider.makeURLRequest( + for: ProviderRequest(messages: [.user("hi")], settings: settings), + stream: false, + ) + } + } + + @Test + func `Custom Fable model id rejects disabled thinking mode`() throws { + let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) + let provider = try AnthropicProvider(model: .custom("claude-fable-5"), configuration: config) + let settings = GenerationSettings( + maxTokens: 64, + providerOptions: .init(anthropic: .init(thinking: .disabled)), + ) + + #expect(throws: TachikomaError.self) { + _ = try provider.makeURLRequest( + for: ProviderRequest(messages: [.user("hi")], settings: settings), + stream: false, + ) + } + } + @Test func `Opus reasoning effort is kept when thinking is disabled`() throws { let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) @@ -300,6 +473,29 @@ struct AnthropicInterleavedDefaultsTests { #expect(delta.signature == "sig") } + @Test + func `Stream delta decodes message_delta stop reason without delta type`() throws { + let data = try #require( + "{\"stop_reason\":\"refusal\",\"stop_sequence\":null}".data(using: .utf8), + ) + let delta = try JSONDecoder().decode(AnthropicStreamDelta.self, from: data) + #expect(delta.type.isEmpty) + #expect(delta.stopReason == "refusal") + } + + @Test + func `Stream event decodes partial usage with stop reason`() throws { + let data = try #require( + #"{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":42}}"# + .data(using: .utf8), + ) + let event = try JSONDecoder().decode(AnthropicStreamEvent.self, from: data) + + #expect(event.delta?.stopReason == "end_turn") + #expect(event.usage?.inputTokens == 0) + #expect(event.usage?.outputTokens == 42) + } + @Test func `Signed thinking blocks are preserved for assistant messages`() throws { let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) @@ -338,7 +534,125 @@ struct AnthropicInterleavedDefaultsTests { } @Test - func `Redacted thinking blocks preserve signature without text`() throws { + func `Fable 5 preserves signed thinking history while omitting request thinking field`() throws { + let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) + let provider = try AnthropicProvider(model: .fable5, configuration: config) + let signedThinking = try ModelMessage( + role: .assistant, + content: [.text("fable thinking")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.model": "claude-fable-5", + "anthropic.thinking.signature": "sig-fable", + "anthropic.thinking.type": "thinking", + "tachikoma.reasoning.provider": "anthropic", + "tachikoma.reasoning.model": "claude-fable-5", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://api.anthropic.com")), + ]), + ) + + let request = ProviderRequest( + messages: [.user("hi"), signedThinking, .assistant("hello"), .user("continue")], + settings: GenerationSettings(maxTokens: 64), + ) + + let urlRequest = try provider.makeURLRequest(for: request, stream: false) + let body = try #require(urlRequest.httpBody) + let json = try #require(try JSONSerialization.jsonObject(with: body) as? [String: Any]) + let messages = try #require(json["messages"] as? [[String: Any]]) + let assistant = try #require(messages[1]["content"] as? [[String: Any]]) + + #expect(json["thinking"] == nil) + #expect(assistant.first?["type"] as? String == "thinking") + #expect(assistant.first?["thinking"] as? String == "fable thinking") + #expect(assistant.first?["signature"] as? String == "sig-fable") + } + + @Test + func `Fable 5 drops mismatched signed thinking history in direct provider requests`() throws { + let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) + let provider = try AnthropicProvider(model: .fable5, configuration: config) + let signedThinking = try ModelMessage( + role: .assistant, + content: [.text("foreign thinking")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.model": "claude-fable-5", + "anthropic.thinking.signature": "sig-foreign", + "anthropic.thinking.type": "thinking", + "tachikoma.reasoning.provider": "anthropic", + "tachikoma.reasoning.model": "claude-fable-5", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://other.example.test")), + ]), + ) + + let request = ProviderRequest( + messages: [.user("hi"), signedThinking, .assistant("hello"), .user("continue")], + settings: GenerationSettings(maxTokens: 64), + ) + + let urlRequest = try provider.makeURLRequest(for: request, stream: false) + let body = try #require(urlRequest.httpBody) + let json = try #require(try JSONSerialization.jsonObject(with: body) as? [String: Any]) + let messages = try #require(json["messages"] as? [[String: Any]]) + let assistant = try #require(messages[1]["content"] as? [[String: Any]]) + + #expect(assistant.count == 1) + #expect(assistant.first?["type"] as? String == "text") + #expect(assistant.first?["text"] as? String == "hello") + #expect(String(data: body, encoding: .utf8)?.contains("foreign thinking") == false) + } + + @Test + func `Fable 5 rejects assistant prefill requests`() throws { + let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) + let provider = try AnthropicProvider(model: .fable5, configuration: config) + + #expect(throws: TachikomaError.self) { + _ = try provider.makeURLRequest( + for: ProviderRequest(messages: [.user("hi"), .assistant("prefill")]), + stream: false, + ) + } + } + + @Test + func `Anthropic refusal stop reason maps to content filter`() { + #expect(AnthropicProvider.mapFinishReason("refusal") == .contentFilter) + #expect(AnthropicProvider.mapFinishReason("model_context_window_exceeded") == .length) + } + + @Test + func `Anthropic refusal response decodes stop details explanation`() throws { + let data = """ + { + "id": "msg_test", + "type": "message", + "role": "assistant", + "content": [], + "model": "claude-fable-5", + "stop_reason": "refusal", + "stop_details": { + "category": "cyber", + "explanation": "I cannot help with that request." + }, + "usage": { + "input_tokens": 10, + "output_tokens": 0 + } + } + """.data(using: .utf8)! + + let response = try JSONDecoder().decode(AnthropicMessageResponse.self, from: data) + + #expect(response.stopDetails?.category == "cyber") + #expect(response.stopDetails?.explanation == "I cannot help with that request.") + } + + @Test + func `Redacted thinking blocks preserve opaque data`() throws { let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) let provider = try AnthropicProvider(model: .opus45, configuration: config) @@ -349,10 +663,9 @@ struct AnthropicInterleavedDefaultsTests { let redacted = ModelMessage( role: .assistant, - content: [.text("")], + content: [.text("opaque-redacted-data")], channel: .thinking, metadata: .init(customData: [ - "anthropic.thinking.signature": "sig-redacted", "anthropic.thinking.type": "redacted_thinking", ]), ) @@ -369,8 +682,232 @@ struct AnthropicInterleavedDefaultsTests { let assistant = try #require(messages[1]["content"] as? [[String: Any]]) #expect(assistant.first?["type"] as? String == "redacted_thinking") - #expect((assistant.first?["redacted_thinking"] as? String)?.isEmpty == true) - #expect(assistant.first?["signature"] as? String == "sig-redacted") + #expect(assistant.first?["data"] as? String == "opaque-redacted-data") + #expect(assistant.first?["signature"] == nil) + } + + @Test + func `Redacted thinking response decodes opaque data`() throws { + let data = try #require( + """ + {"type":"redacted_thinking","data":"opaque-redacted-data"} + """.data(using: .utf8), + ) + + let content = try JSONDecoder().decode(AnthropicResponseContent.self, from: data) + guard case let .redactedThinking(redacted) = content else { + Issue.record("Expected redacted thinking content") + return + } + #expect(redacted.data == "opaque-redacted-data") + } + + @Test + func `Consecutive thinking blocks are preserved in order`() throws { + let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) + let provider = try AnthropicProvider(model: .fable5, configuration: config) + + let signedThinking = try ModelMessage( + role: .assistant, + content: [.text("signed")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.model": "claude-fable-5", + "anthropic.thinking.signature": "sig", + "anthropic.thinking.type": "thinking", + "tachikoma.reasoning.provider": "anthropic", + "tachikoma.reasoning.model": "claude-fable-5", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://api.anthropic.com")), + ]), + ) + let redactedThinking = try ModelMessage( + role: .assistant, + content: [.text("opaque")], + channel: .thinking, + metadata: .init(customData: [ + "anthropic.thinking.model": "claude-fable-5", + "anthropic.thinking.type": "redacted_thinking", + "tachikoma.reasoning.provider": "anthropic", + "tachikoma.reasoning.model": "claude-fable-5", + "tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity + .canonical("https://api.anthropic.com")), + ]), + ) + + let request = ProviderRequest( + messages: [.user("hi"), signedThinking, redactedThinking, .assistant("hello"), .user("continue")], + settings: GenerationSettings(maxTokens: 64), + ) + + let urlRequest = try provider.makeURLRequest(for: request, stream: false) + let body = try #require(urlRequest.httpBody) + let json = try #require(try JSONSerialization.jsonObject(with: body) as? [String: Any]) + let messages = try #require(json["messages"] as? [[String: Any]]) + let assistant = try #require(messages[1]["content"] as? [[String: Any]]) + + #expect(assistant.count == 3) + #expect(assistant[0]["type"] as? String == "thinking") + #expect(assistant[0]["thinking"] as? String == "signed") + #expect(assistant[0]["signature"] as? String == "sig") + #expect(assistant[1]["type"] as? String == "redacted_thinking") + #expect(assistant[1]["data"] as? String == "opaque") + #expect(assistant[2]["type"] as? String == "text") + #expect(assistant[2]["text"] as? String == "hello") + } + + @Test + func `Current Anthropic models expose documented output caps`() { + #expect(LanguageModel.Anthropic.fable5.maxOutputTokens == 128_000) + #expect(LanguageModel.Anthropic.opus47.maxOutputTokens == 128_000) + #expect(LanguageModel.Anthropic.opus48.maxOutputTokens == 128_000) + #expect(LanguageModel.Anthropic.sonnet46.maxOutputTokens == 64000) + #expect(LanguageModel.Anthropic.haiku45.maxOutputTokens == 64000) + } + + @Test + func `Fable and Opus 4_8 streaming are disabled until rollback is supported`() async throws { + let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]) + let provider = try AnthropicProvider(model: .fable5, configuration: config) + let opusProvider = try AnthropicProvider(model: .opus48, configuration: config) + + #expect(provider.capabilities.supportsStreaming == false) + #expect(LanguageModel.anthropic(.fable5).supportsStreaming == false) + #expect(opusProvider.capabilities.supportsStreaming == false) + #expect(LanguageModel.anthropic(.opus47).supportsStreaming == true) + #expect(LanguageModel.anthropic(.opus48).supportsStreaming == false) + #expect(LanguageModel.anthropic(.sonnet46).supportsStreaming == true) + #expect(LanguageModel.anthropic(.sonnet45).supportsStreaming == true) + #expect(LanguageModel.anthropic(.haiku45).supportsStreaming == true) + await #expect(throws: TachikomaError.self) { + _ = try await provider.streamText(request: ProviderRequest(messages: [.user("hi")])) + } + await #expect(throws: TachikomaError.self) { + _ = try await opusProvider.streamText(request: ProviderRequest(messages: [.user("hi")])) + } + } + + @Test + func `Opus 4_8 detection avoids substring false positives`() { + #expect(LanguageModel.Anthropic.isOpus48(modelId: "claude-opus-4-8") == true) + #expect(LanguageModel.Anthropic.isOpus48(modelId: "anthropic/claude-opus-4.8") == true) + #expect(LanguageModel.Anthropic.isOpus48(modelId: "my-opus48-distill") == false) + #expect(LanguageModel.Anthropic.isOpus48(modelId: "opus480") == false) + } + + @Test + func `Fable detection avoids substring false positives`() { + #expect(LanguageModel.Anthropic.isFable(modelId: "claude-fable-5") == true) + #expect(LanguageModel.Anthropic.isFable(modelId: "anthropic/claude-fable-5") == true) + #expect(LanguageModel.Anthropic.isFable(modelId: "vendor/claude-fable-50") == false) + #expect(LanguageModel.Anthropic.isFable(modelId: "my-claude-fable-5-distill") == false) + } + + @Test + func `Anthropic-compatible provider tags native thinking with wrapper identity`() async throws { + let sessionConfig = URLSessionConfiguration.ephemeral + sessionConfig.protocolClasses = [AnthropicIdentityURLProtocol.self] + let provider = try AnthropicProvider( + model: .custom("claude-fable-5"), + configuration: TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]), + reasoningProvider: "anthropic-compatible", + reasoningModelId: "claude-fable-5", + reasoningBaseURL: "https://user:secret@example.test/path?token=secret#frag", + urlSession: URLSession(configuration: sessionConfig), + ) + + let response = try await provider.generateText(request: ProviderRequest(messages: [.user("hi")])) + let thinking = try #require(response.assistantMessages.first { $0.channel == .thinking }) + #expect(thinking.metadata?.customData?["tachikoma.reasoning.provider"] == "anthropic-compatible") + #expect(thinking.metadata?.customData?["tachikoma.reasoning.model"] == "claude-fable-5") + let endpointIdentity = thinking.metadata?.customData?["tachikoma.reasoning.base_url"] + #expect(endpointIdentity == ReasoningEndpointIdentity.canonical("https://example.test/path?token=secret")) + #expect(endpointIdentity?.hasPrefix("sha256:") == true) + #expect(endpointIdentity?.contains("path") == false) + #expect(endpointIdentity?.contains("secret") == false) + #expect(endpointIdentity?.contains("token") == false) + #expect(thinking.metadata?.customData?["anthropic.thinking.signature"] == "sig") + } + + @Test + func `Compatible refusal-prone Anthropic streaming and capabilities are disabled`() async throws { + let config = TachikomaConfiguration(apiKeys: ["anthropic_compatible": "test-key"]) + let provider = try AnthropicCompatibleProvider( + modelId: "claude-fable-5", + baseURL: "https://example.test", + configuration: config, + ) + let opusProvider = try AnthropicCompatibleProvider( + modelId: "claude-opus-4-8", + baseURL: "https://example.test", + configuration: config, + ) + let overriddenProvider = try AnthropicCompatibleProvider( + modelId: "claude-fable-5", + baseURL: "https://example.test", + configuration: config, + capabilities: ModelCapabilities(supportsStreaming: true), + ) + + #expect(provider.capabilities.supportsStreaming == false) + #expect(opusProvider.capabilities.supportsStreaming == false) + #expect(overriddenProvider.capabilities.supportsStreaming == false) + #expect(provider.capabilities.contextLength == 1_000_000) + #expect(provider.capabilities.maxOutputTokens == 128_000) + #expect(LanguageModel.anthropicCompatible( + modelId: "claude-fable-5", + baseURL: "https://example.test", + ).supportsStreaming == false) + #expect(LanguageModel.anthropicCompatible( + modelId: "claude-opus-4-8", + baseURL: "https://example.test", + ).supportsStreaming == false) + #expect(LanguageModel.openaiCompatible( + modelId: "claude-fable-5", + baseURL: "https://example.test", + ).supportsStreaming == false) + #expect(LanguageModel.anthropicCompatible( + modelId: "claude-fable-5", + baseURL: "https://example.test", + ).contextLength == 1_000_000) + #expect(LanguageModel.anthropicCompatible( + modelId: "anthropic.claude-fable-5", + baseURL: "https://example.test", + ).contextLength == 1_000_000) + let openAICompatibleProvider = try OpenAICompatibleProvider( + modelId: "claude-fable-5", + baseURL: "https://example.test", + configuration: TachikomaConfiguration(apiKeys: ["openai_compatible": "test-key"]), + ) + let openRouterProvider = try OpenRouterProvider( + modelId: "anthropic/claude-fable-5", + configuration: TachikomaConfiguration(apiKeys: ["openrouter": "test-key"]), + ) + let togetherProvider = try TogetherProvider( + modelId: "anthropic/claude-fable-5", + configuration: TachikomaConfiguration(apiKeys: ["together": "test-key"]), + ) + #expect(openAICompatibleProvider.capabilities.supportsStreaming == false) + #expect(openRouterProvider.capabilities.supportsStreaming == false) + #expect(togetherProvider.capabilities.supportsStreaming == false) + #expect(openAICompatibleProvider.capabilities.contextLength == 1_000_000) + #expect(openAICompatibleProvider.capabilities.maxOutputTokens == 128_000) + #expect(openRouterProvider.capabilities.contextLength == 1_000_000) + #expect(openRouterProvider.capabilities.maxOutputTokens == 128_000) + #expect(togetherProvider.capabilities.contextLength == 1_000_000) + #expect(togetherProvider.capabilities.maxOutputTokens == 128_000) + await #expect(throws: TachikomaError.self) { + _ = try await provider.streamText(request: ProviderRequest(messages: [.user("hi")])) + } + await #expect(throws: TachikomaError.self) { + _ = try await openAICompatibleProvider.streamText(request: ProviderRequest(messages: [.user("hi")])) + } + await #expect(throws: TachikomaError.self) { + _ = try await openRouterProvider.streamText(request: ProviderRequest(messages: [.user("hi")])) + } + await #expect(throws: TachikomaError.self) { + _ = try await togetherProvider.streamText(request: ProviderRequest(messages: [.user("hi")])) + } } @Test @@ -400,3 +937,49 @@ struct AnthropicInterleavedDefaultsTests { #expect(assistant.first?["type"] as? String == "text") } } + +private final class AnthropicIdentityURLProtocol: URLProtocol { + override class func canInit(with _: URLRequest) -> Bool { + true + } + + override class func canonicalRequest(for request: URLRequest) -> URLRequest { + request + } + + override func startLoading() { + guard + let url = self.request.url, + let response = HTTPURLResponse( + url: url, + statusCode: 200, + httpVersion: nil, + headerFields: ["Content-Type": "application/json"], + ) else + { + self.client?.urlProtocol(self, didFailWithError: TachikomaError.invalidInput("Missing mock response")) + return + } + + let body = """ + { + "id": "msg_test", + "type": "message", + "role": "assistant", + "model": "claude-fable-5", + "content": [ + {"type": "thinking", "thinking": "private", "signature": "sig"}, + {"type": "text", "text": "ok"} + ], + "stop_reason": "end_turn", + "usage": {"input_tokens": 1, "output_tokens": 2} + } + """.data(using: .utf8) ?? Data() + + self.client?.urlProtocol(self, didReceive: response, cacheStoragePolicy: .notAllowed) + self.client?.urlProtocol(self, didLoad: body) + self.client?.urlProtocolDidFinishLoading(self) + } + + override func stopLoading() {} +} diff --git a/Tests/TachikomaTests/Providers/Integration/ProviderSystemTests.swift b/Tests/TachikomaTests/Providers/Integration/ProviderSystemTests.swift index 22dd0b9..79cbda3 100644 --- a/Tests/TachikomaTests/Providers/Integration/ProviderSystemTests.swift +++ b/Tests/TachikomaTests/Providers/Integration/ProviderSystemTests.swift @@ -22,13 +22,15 @@ struct ProviderSystemTests { @Test func `Provider Factory - Anthropic Provider Creation`() async throws { try await TestHelpers.withTestConfiguration(apiKeys: ["anthropic": "test-key"]) { config in - let model = Model.anthropic(.opus47) + let model = Model.anthropic(.fable5) let provider = try ProviderFactory.createProvider(for: model, configuration: config) - #expect(provider.modelId == "claude-opus-4-7") + #expect(provider.modelId == "claude-fable-5") #expect(provider.capabilities.supportsVision == true) #expect(provider.capabilities.supportsTools == true) - #expect(provider.capabilities.supportsStreaming == true) + #expect(provider.capabilities.supportsStreaming == false) + #expect(provider.capabilities.contextLength == 1_000_000) + #expect(provider.capabilities.maxOutputTokens == 128_000) } } @@ -138,6 +140,7 @@ struct ProviderSystemTests { #expect(Model.openai(.gpt5Mini).supportsVision == true) #expect(Model.openai(.custom("text-only-openai")).supportsVision == false) + #expect(Model.anthropic(.fable5).supportsVision == true) #expect(Model.anthropic(.opus4).supportsVision == true) #expect(Model.anthropic(.sonnet46).supportsVision == true) @@ -153,6 +156,7 @@ struct ProviderSystemTests { #expect(Model.openai(.gpt55).supportsTools == true) #expect(Model.openai(.gpt55).supportsTools == true) + #expect(Model.anthropic(.fable5).supportsTools == true) #expect(Model.anthropic(.opus4).supportsTools == true) #expect(Model.anthropic(.sonnet46).supportsTools == true) @@ -167,6 +171,19 @@ struct ProviderSystemTests { func `Model Capabilities - Streaming Support`() { #expect(Model.openai(.gpt55).supportsStreaming == true) #expect(Model.anthropic(.opus4).supportsStreaming == true) + #expect(Model.anthropic(.opus47).supportsStreaming == true) + #expect(Model.anthropic(.opus48).supportsStreaming == false) + #expect(Model.anthropic(.fable5).supportsStreaming == false) + #expect(Model.openRouter(modelId: "anthropic/claude-fable-5").supportsStreaming == false) + #expect(Model.openRouter(modelId: "anthropic/claude-opus-4-8").supportsStreaming == false) + #expect(Model.openaiCompatible( + modelId: "anthropic/claude-fable-5", + baseURL: "https://example.test", + ).supportsStreaming == false) + #expect(Model.openaiCompatible( + modelId: "sonnet4-local", + baseURL: "https://example.test", + ).supportsStreaming == true) #expect(Model.grok(.grok43).supportsStreaming == true) #expect(Model.ollama(.llama33).supportsStreaming == true) } diff --git a/Tests/TachikomaTests/Providers/OpenAIResponsesProviderTests.swift b/Tests/TachikomaTests/Providers/OpenAIResponsesProviderTests.swift index 4ee3e98..97142b0 100644 --- a/Tests/TachikomaTests/Providers/OpenAIResponsesProviderTests.swift +++ b/Tests/TachikomaTests/Providers/OpenAIResponsesProviderTests.swift @@ -204,6 +204,7 @@ struct OpenAIResponsesProviderTests { choices: nil, usage: nil, metadata: nil, + incompleteDetails: nil, ) let providerResponse = try OpenAIResponsesProvider.convertToProviderResponse(response) @@ -216,6 +217,159 @@ struct OpenAIResponsesProviderTests { #expect(providerResponse.finishReason == .toolCalls) } + @Test + func `GPT-5 incomplete content filter response maps finish reason`() throws { + let output = OpenAIResponsesResponse.ResponsesOutput( + id: "out_1", + type: "message", + status: "incomplete", + content: [ + .init(type: "output_text", text: "blocked partial", toolCall: nil), + ], + role: "assistant", + toolCall: nil, + ) + + let response = try JSONDecoder().decode(OpenAIResponsesResponse.self, from: #require(""" + { + "id": "resp_1", + "object": "response", + "created_at": 0, + "status": "incomplete", + "model": "gpt-5", + "output": [ + { + "id": "out_1", + "type": "message", + "status": "incomplete", + "role": "assistant", + "content": [ + { "type": "output_text", "text": "blocked partial" } + ] + } + ], + "incomplete_details": { "reason": "content_filter" } + } + """.data(using: .utf8))) + + let providerResponse = try OpenAIResponsesProvider.convertToProviderResponse(response) + + #expect(output.status == "incomplete") + #expect(providerResponse.text.isEmpty) + #expect(providerResponse.finishReason == .contentFilter) + } + + @Test + func `GPT-5 incomplete content filter discards parsed tool calls`() throws { + let toolCall = OpenAIResponsesResponse.ResponsesToolCall( + id: "call_1", + type: "function", + function: .init(name: "see", arguments: "{\"mode\":\"screen\"}"), + ) + let output = OpenAIResponsesResponse.ResponsesOutput( + id: "out_1", + type: "message", + status: "incomplete", + content: [ + .init(type: "output_text", text: "blocked partial", toolCall: nil), + .init(type: "tool_call", text: nil, toolCall: toolCall), + ], + role: "assistant", + toolCall: nil, + ) + let response = OpenAIResponsesResponse( + id: "resp_1", + object: "response", + createdAt: 0, + created: nil, + status: "incomplete", + model: "gpt-5", + output: [output], + choices: nil, + usage: nil, + metadata: nil, + incompleteDetails: .init(reason: "content_filter"), + ) + + let providerResponse = try OpenAIResponsesProvider.convertToProviderResponse(response) + + #expect(providerResponse.text.isEmpty) + #expect(providerResponse.toolCalls == nil) + #expect(providerResponse.finishReason == .contentFilter) + } + + @Test + func `GPT-5 completed refusal output maps to content filter`() throws { + let output = OpenAIResponsesResponse.ResponsesOutput( + id: "out_1", + type: "message", + status: "completed", + content: [ + .init(type: "refusal", refusal: "I cannot help with that."), + ], + role: "assistant", + toolCall: nil, + ) + let response = OpenAIResponsesResponse( + id: "resp_1", + object: "response", + createdAt: 0, + created: nil, + status: "completed", + model: "gpt-5", + output: [output], + choices: nil, + usage: nil, + metadata: nil, + incompleteDetails: nil, + ) + + let providerResponse = try OpenAIResponsesProvider.convertToProviderResponse(response) + + #expect(providerResponse.text.isEmpty) + #expect(providerResponse.toolCalls == nil) + #expect(providerResponse.finishReason == .contentFilter) + } + + @Test + func `Alternate choices content filter suppresses text and tool calls`() throws { + let response = try JSONDecoder().decode(OpenAIResponsesResponse.self, from: #require(""" + { + "id": "chatcmpl_1", + "object": "chat.completion", + "created": 0, + "model": "gpt-5", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "blocked partial", + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": { + "name": "see", + "arguments": "{\\"mode\\":\\"screen\\"}" + } + } + ] + }, + "finish_reason": "content_filter", + "logprobs": null + } + ] + } + """.data(using: .utf8))) + + let providerResponse = try OpenAIResponsesProvider.convertToProviderResponse(response) + + #expect(providerResponse.text.isEmpty) + #expect(providerResponse.toolCalls == nil) + #expect(providerResponse.finishReason == .contentFilter) + } + @Test func `Responses provider hits /v1/responses and encodes body`() async throws { let config = TachikomaConfiguration(loadFromEnvironment: false) @@ -528,18 +682,198 @@ struct OpenAIResponsesProviderTests { let stream = try await provider.streamText(request: self.sampleRequest) var collected = "" + var receivedDone = false for try await delta in stream { switch delta.type { case .textDelta: collected.append(delta.content ?? "") case .done: - break + receivedDone = true case .toolCall, .toolResult, .reasoning: break } } #expect(collected == "Hello world") + #expect(receivedDone) + } + } + + @Test + func `Responses provider marks completed tool streams as tool calls`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setAPIKey("live-openai", for: .openai) + + try await self.withMockedSession { request in + #expect(request.url?.path == "/v1/responses") + let payload = Self.responsesStreamPayload(chunks: [ + Self.streamEventJSON([ + "type": "response.output_item.added", + "item": [ + "id": "item_1", + "type": "function_call", + "name": "lookup", + ], + ]), + Self.streamEventJSON([ + "type": "response.function_call_arguments.done", + "item_id": "item_1", + "arguments": #"{"query":"weather"}"#, + ]), + Self.streamEventJSON(["type": "response.completed"]), + ]) + return NetworkMocking.streamResponse(for: request, data: payload) + } operation: { session in + let provider = try OpenAIResponsesProvider(model: .gpt55, configuration: config, session: session) + let stream = try await provider.streamText(request: self.sampleRequest) + + var sawToolCall = false + var finishReason: FinishReason? + for try await delta in stream { + if delta.type == .toolCall { + sawToolCall = true + } + if delta.type == .done { + finishReason = delta.finishReason + } + } + + #expect(sawToolCall) + #expect(finishReason == .toolCalls) + } + } + + @Test + func `Responses provider maps incomplete content filter stream finish reason`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setAPIKey("live-openai", for: .openai) + + try await self.withMockedSession { request in + #expect(request.url?.path == "/v1/responses") + let payload = Self.responsesStreamPayload(chunks: [ + Self.streamChunkJSON(content: "partial", finishReason: nil), + Self.streamEventJSON([ + "type": "response.incomplete", + "response": [ + "incomplete_details": ["reason": "content_filter"], + ], + ]), + ]) + return NetworkMocking.streamResponse(for: request, data: payload) + } operation: { session in + let provider = try OpenAIResponsesProvider(model: .gpt55, configuration: config, session: session) + let stream = try await provider.streamText(request: self.sampleRequest) + + var collected = "" + var finishReason: FinishReason? + for try await delta in stream { + if case .textDelta = delta.type { + collected.append(delta.content ?? "") + } + if delta.type == .done { + finishReason = delta.finishReason + } + } + + #expect(collected == "partial") + #expect(finishReason == .contentFilter) + } + } + + @Test + func `Responses provider maps refusal stream events to content filter`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setAPIKey("live-openai", for: .openai) + + try await self.withMockedSession { request in + #expect(request.url?.path == "/v1/responses") + let payload = Self.responsesStreamPayload(chunks: [ + Self.streamEventJSON([ + "type": "response.refusal.delta", + "delta": "no", + ]), + Self.streamEventJSON(["type": "response.refusal.done"]), + Self.streamEventJSON(["type": "response.completed"]), + ]) + return NetworkMocking.streamResponse(for: request, data: payload) + } operation: { session in + let provider = try OpenAIResponsesProvider(model: .gpt55, configuration: config, session: session) + let stream = try await provider.streamText(request: self.sampleRequest) + + var finishReason: FinishReason? + for try await delta in stream where delta.type == .done { + finishReason = delta.finishReason + } + + #expect(finishReason == .contentFilter) + } + } + + @Test + func `Responses provider throws on failed stream event`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setAPIKey("live-openai", for: .openai) + + try await self.withMockedSession { request in + #expect(request.url?.path == "/v1/responses") + let payload = Self.responsesStreamPayload(chunks: [ + Self.streamEventJSON([ + "type": "response.failed", + "response": [ + "error": [ + "message": "stream failed after partial output", + ], + ], + ]), + ]) + return NetworkMocking.streamResponse(for: request, data: payload) + } operation: { session in + let provider = try OpenAIResponsesProvider(model: .gpt55, configuration: config, session: session) + let stream = try await provider.streamText(request: self.sampleRequest) + + do { + for try await _ in stream {} + Issue.record("Expected stream failure") + } catch let error as TachikomaError { + guard case let .apiError(message) = error else { + Issue.record("Expected apiError, got \(error)") + return + } + #expect(message.contains("response.failed")) + #expect(message.contains("stream failed after partial output")) + } + } + } + + @Test + func `Responses provider throws on error stream event`() async throws { + let config = TachikomaConfiguration(loadFromEnvironment: false) + config.setAPIKey("live-openai", for: .openai) + + try await self.withMockedSession { request in + #expect(request.url?.path == "/v1/responses") + let payload = Self.responsesStreamPayload(chunks: [ + Self.streamEventJSON([ + "type": "error", + "message": "top-level stream error", + ]), + ]) + return NetworkMocking.streamResponse(for: request, data: payload) + } operation: { session in + let provider = try OpenAIResponsesProvider(model: .gpt55, configuration: config, session: session) + let stream = try await provider.streamText(request: self.sampleRequest) + + do { + for try await _ in stream {} + Issue.record("Expected stream failure") + } catch let error as TachikomaError { + guard case let .apiError(message) = error else { + Issue.record("Expected apiError, got \(error)") + return + } + #expect(message.contains("error")) + #expect(message.contains("top-level stream error")) + } } } @@ -657,6 +991,11 @@ struct OpenAIResponsesProviderTests { return String(data: data, encoding: .utf8)! } + private static func streamEventJSON(_ event: [String: Any]) -> String { + let data = try! JSONSerialization.data(withJSONObject: event) + return String(data: data, encoding: .utf8)! + } + private func withMockedSession( handler: @Sendable @escaping (URLRequest) throws -> (HTTPURLResponse, Data), operation: (URLSession) async throws -> T, diff --git a/Tests/TachikomaTests/Providers/ProviderEndToEndTests.swift b/Tests/TachikomaTests/Providers/ProviderEndToEndTests.swift index a419023..ce64a39 100644 --- a/Tests/TachikomaTests/Providers/ProviderEndToEndTests.swift +++ b/Tests/TachikomaTests/Providers/ProviderEndToEndTests.swift @@ -449,6 +449,37 @@ struct ProviderEndToEndTests { } } + @Test + func `MiniMax reasoning metadata is bound to configured endpoint`() async throws { + let baseURL = "https://minimax-proxy.test/anthropic?tenant=a" + try await NetworkMocking.withMockedNetwork { request in + #expect(request.url?.host == "minimax-proxy.test") + #expect(request.value(forHTTPHeaderField: "Authorization") == "Bearer live-minimax") + return NetworkMocking.jsonResponse( + for: request, + data: Self.anthropicPayloadWithThinking( + text: "MiniMax ok", + thinking: "native-thought", + signature: "sig-mm", + ), + ) + } operation: { + let config = Self.makeConfiguration { config in + config.setAPIKey("live-minimax", for: .minimax) + config.setBaseURL(baseURL, for: .minimax) + } + let provider = try ProviderFactory.createProvider(for: .minimax(.m27), configuration: config) + let response = try await provider.generateText(request: Self.basicRequest) + let thinkingMessage = try #require(response.assistantMessages.first { $0.channel == .thinking }) + let metadata = try #require(thinkingMessage.metadata?.customData) + + #expect(metadata["tachikoma.reasoning.provider"] == "minimax") + #expect(metadata["tachikoma.reasoning.model"] == "MiniMax-M2.7") + #expect(metadata["anthropic.thinking.signature"] == "sig-mm") + #expect(metadata["tachikoma.reasoning.base_url"] == ReasoningEndpointIdentity.canonical(baseURL)) + } + } + @Test func `MiniMax China provider uses China endpoint and bearer auth`() async throws { try await NetworkMocking.withMockedNetwork { request in @@ -591,6 +622,25 @@ struct ProviderEndToEndTests { return try! JSONSerialization.data(withJSONObject: dict) } + private static func anthropicPayloadWithThinking(text: String, thinking: String, signature: String) -> Data { + let dict: [String: Any] = [ + "id": "msg_1", + "type": "message", + "role": "assistant", + "content": [ + ["type": "thinking", "thinking": thinking, "signature": signature], + ["type": "text", "text": text], + ], + "model": "MiniMax-M2.7", + "stop_reason": "end_turn", + "usage": [ + "input_tokens": 12, + "output_tokens": 6, + ], + ] + return try! JSONSerialization.data(withJSONObject: dict) + } + private static func googleStreamPayload(text: String) -> Data { let json: [String: Any] = [ "candidates": [ diff --git a/Tests/TachikomaTests/UIIntegrationTests.swift b/Tests/TachikomaTests/UIIntegrationTests.swift index b11eef7..c9019c6 100644 --- a/Tests/TachikomaTests/UIIntegrationTests.swift +++ b/Tests/TachikomaTests/UIIntegrationTests.swift @@ -79,6 +79,70 @@ struct UIIntegrationTests { #expect(uiMessages[0].toolCalls?.count == 1) } + @Test + func `Thinking ModelMessages are hidden from UI messages`() { + let thinking = ModelMessage( + role: .assistant, + content: [.text("private reasoning")], + channel: .thinking, + metadata: .init(customData: ["anthropic.thinking.signature": "sig"]), + ) + let visible = ModelMessage(role: .assistant, content: [.text("Visible answer")]) + + let uiMessages = [thinking, visible].toUIMessages() + + #expect(uiMessages.count == 1) + #expect(uiMessages[0].content == "Visible answer") + } + + @Test + func `Provider-neutral thinking ModelMessages remain visible in UI messages`() { + let thinking = ModelMessage( + role: .assistant, + content: [.text("visible reasoning")], + channel: .thinking, + ) + + let uiMessages = [thinking].toUIMessages() + + #expect(uiMessages.count == 1) + #expect(uiMessages[0].content == "visible reasoning") + } + + @Test + func `Provider-native reasoning ModelMessages are hidden from UI messages`() { + let reasoning = ModelMessage( + role: .assistant, + content: [.text("openrouter reasoning")], + channel: .thinking, + metadata: .init(customData: [ + "tachikoma.reasoning.provider": "openrouter", + "tachikoma.reasoning.model": "anthropic/claude-fable-5", + ]), + ) + let visible = ModelMessage(role: .assistant, content: [.text("Visible answer")]) + + let uiMessages = [reasoning, visible].toUIMessages() + + #expect(uiMessages.count == 1) + #expect(uiMessages[0].content == "Visible answer") + } + + @Test + func `Synthetic reasoning boundaries are hidden from UI messages`() { + let boundary = ModelMessage( + role: .assistant, + content: [.text("")], + metadata: .init(customData: ["tachikoma.internal.boundary": "reasoning_only"]), + ) + let visible = ModelMessage(role: .assistant, content: [.text("Visible answer")]) + + let uiMessages = [boundary, visible].toUIMessages() + + #expect(uiMessages.count == 1) + #expect(uiMessages[0].content == "Visible answer") + } + @Test func `StreamTextResult to UI Message Stream`() async { // Create a mock stream diff --git a/Tests/TachikomaTests/Utilities/UsageTrackingTests.swift b/Tests/TachikomaTests/Utilities/UsageTrackingTests.swift index 2ddaf51..20a2273 100644 --- a/Tests/TachikomaTests/Utilities/UsageTrackingTests.swift +++ b/Tests/TachikomaTests/Utilities/UsageTrackingTests.swift @@ -123,6 +123,16 @@ struct UsageTrackingTests { #expect(gpt5MiniCost.total == 5.00) // Test Anthropic pricing + let claudeFableCost = calculator.calculateCost(for: .anthropic(.fable5), usage: usage) + #expect(claudeFableCost.input == 10.00) + #expect(claudeFableCost.output == 50.00) + #expect(claudeFableCost.total == 60.00) + + let customClaudeFableCost = calculator.calculateCost(for: .anthropic(.custom("claude-fable-5")), usage: usage) + #expect(customClaudeFableCost.input == 10.00) + #expect(customClaudeFableCost.output == 50.00) + #expect(customClaudeFableCost.total == 60.00) + let claudeOpusCost = calculator.calculateCost(for: .anthropic(.opus48), usage: usage) #expect(claudeOpusCost.input == 5.00) #expect(claudeOpusCost.output == 25.00) diff --git a/docs/models.md b/docs/models.md index cac804f..b636bd1 100644 --- a/docs/models.md +++ b/docs/models.md @@ -4,7 +4,8 @@ Tachikoma ships with a built-in model catalog (`CaseIterable` enums) plus suppor ## Default -- `LanguageModel.default`: `claude-opus-4-7` +- `LanguageModel.default`: `claude-opus-4-8` +- `LanguageModel.defaultStreaming`: `gpt-5.5` ## OpenAI (`LanguageModel.OpenAI`) @@ -17,6 +18,8 @@ Notes: ## Anthropic (`LanguageModel.Anthropic`) +- `claude-fable-5` (1M context, 128K max output, non-streaming, explicit opt-in) +- `claude-opus-4-8` (1M context, 128K max output, non-streaming until refusal rollback is streaming-safe) - `claude-opus-4-7` - `claude-opus-4-5` - `claude-opus-4-1-20250805`