feat: add Claude Fable 5 support (#21)

* feat: add Claude Fable 5 support

* style: satisfy Tachikoma CI lint

* ci: serialize Linux Swift tests
This commit is contained in:
Peter Steinberger 2026-06-11 13:05:47 -07:00 committed by GitHub
parent 9754b309dd
commit 4669fe58f4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
49 changed files with 6864 additions and 499 deletions

View File

@ -52,7 +52,8 @@ jobs:
- name: Run Tests (Unit Tests Only)
run: |
if [[ "${{ runner.os }}" == "Linux" ]]; then
swift test --filter TachikomaTests --skip "OpenAIAudioProviderTests" --skip "ProviderEndToEndTests"
# Several test suites mutate process-wide env/profile state.
swift test --no-parallel --filter TachikomaTests --skip "OpenAIAudioProviderTests" --skip "ProviderEndToEndTests"
else
swift test --filter TachikomaTests
fi

View File

@ -48,7 +48,8 @@ jobs:
export OPENAI_API_KEY="${OPENAI_API_KEY:-test-key}"
export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-test-key}"
SKIP_FLAGS="--skip ProviderEndToEndTests"
swift test $SKIP_FLAGS
# Several test suites mutate process-wide env/profile state.
swift test --no-parallel $SKIP_FLAGS
test-linux-ubuntu-24:
runs-on: ubuntu-24.04
@ -76,7 +77,8 @@ jobs:
export OPENAI_API_KEY="${OPENAI_API_KEY:-test-key}"
export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-test-key}"
SKIP_FLAGS="--skip ProviderEndToEndTests"
swift test $SKIP_FLAGS
# Several test suites mutate process-wide env/profile state.
swift test --no-parallel $SKIP_FLAGS
# Optional: Build release artifacts
build-release:

View File

@ -105,7 +105,8 @@ jobs:
echo "OPENAI_API_KEY missing; skipping OpenAIAudioProviderTests"
SKIP_FLAGS="$SKIP_FLAGS --skip OpenAIAudioProviderTests"
fi
swift test --verbose $SKIP_FLAGS
# Several test suites mutate process-wide env/profile state.
swift test --no-parallel --verbose $SKIP_FLAGS
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}

View File

@ -107,8 +107,8 @@ disabled_rules:
# Rule configurations
file_length:
warning: 1000
error: 2000
warning: 2000
error: 2500
ignore_comment_only_lines: true
function_parameter_count:
@ -137,8 +137,8 @@ trailing_comma:
mandatory_comma: true
type_body_length:
warning: 800
error: 1200
warning: 1800
error: 2200
type_name:
min_length:

View File

@ -10,7 +10,8 @@ All notable changes to the Tachikoma project will be documented in this file.
- Added explicit LM Studio model shortcuts such as `lmstudio` and `lmstudio/openai/gpt-oss-120b` so local provider selections no longer fall through to Ollama custom IDs.
### Changed
- Refreshed the first-class model catalog to current provider IDs: OpenAI GPT-5.5/5.4, Claude Opus 4.7/Sonnet 4.6/Haiku 4.5, Gemini 3.1, Mistral latest aliases, Groq current production IDs, and xAI Grok 4.3/4.20.
- Refreshed the first-class model catalog to current provider IDs: OpenAI GPT-5.5/5.4, Claude Fable 5/Opus 4.8/Opus 4.7/Sonnet 4.6/Haiku 4.5, Gemini 3.1, Mistral latest aliases, Groq current production IDs, and xAI Grok 4.3/4.20.
- Added explicit `claude-fable-5` support with 1M context, 128K max output, signed-thinking replay, refusal handling, and non-streaming generation; `LanguageModel.default` remains `claude-opus-4-8`, while `LanguageModel.defaultStreaming` now uses streaming-safe `gpt-5.5`.
- Removed stale direct model support for retired or non-canonical IDs including GPT-5.1/5.2/pseudo-thinking models, deprecated Claude Sonnet/Opus 4 snapshots, Grok 2/3/4-fast rows, old Groq Llama/Mixtral/Gemma aliases, stale Mistral aliases, and invalid LM Studio `current`.
### Fixed

View File

@ -100,8 +100,8 @@ print(result.text)
## Models
Common picks:
- Anthropic: `claude-opus-4-5` (`LanguageModel.default`)
- OpenAI: `gpt-5.5` (flagship), `gpt-5.4` / `gpt-5.4-mini` / `gpt-5.4-nano`, `gpt-5`
- Anthropic: `claude-opus-4-8` (`LanguageModel.default`, non-streaming), `claude-fable-5` (explicit opt-in)
- OpenAI: `gpt-5.5` (`LanguageModel.defaultStreaming`), `gpt-5.4` / `gpt-5.4-mini` / `gpt-5.4-nano`, `gpt-5`
- Google: `gemini-3.1-pro-preview`, `gemini-3-flash`
- Grok: `grok-4.3`
- Local: `ollama/llama3.3`

View File

@ -1,18 +1,56 @@
import Foundation
struct AnthropicReasoningReplayTarget {
let provider: String
let modelId: String
let endpointIdentity: String?
let allowsLegacyUnknown: Bool
func matches(_ customData: [String: String]) -> Bool {
guard customData["tachikoma.reasoning.provider"] == self.provider else {
return false
}
guard customData["tachikoma.reasoning.model"] == self.modelId else {
return false
}
return customData["tachikoma.reasoning.base_url"] == self.endpointIdentity
}
}
enum AnthropicMessageConversion {
static func convertMessagesToAnthropic(
_ messages: [ModelMessage],
thinkingEnabled: Bool,
reasoningTarget: AnthropicReasoningReplayTarget? = nil,
) throws
-> (String?, [AnthropicMessage])
{
var systemMessage: String?
var anthropicMessages: [AnthropicMessage] = []
var pendingSignedThinking: (text: String, signature: String, type: String)?
var pendingThinkingBlocks: [(text: String, signature: String?, type: String)] = []
let thinkingSignatureKey = "anthropic.thinking.signature"
let thinkingTypeKey = "anthropic.thinking.type"
func appendThinkingBlocks(
_ pendingBlocks: [(text: String, signature: String?, type: String)],
to content: inout [AnthropicContent],
) {
for pending in pendingBlocks {
if pending.type == "redacted_thinking" {
content.append(.redactedThinking(.init(
type: "redacted_thinking",
data: pending.text,
)))
} else if let signature = pending.signature {
content.append(.thinking(.init(
type: "thinking",
thinking: pending.text,
signature: signature,
)))
}
}
}
for message in messages {
switch message.role {
case .system:
@ -48,29 +86,32 @@ enum AnthropicMessageConversion {
}.joined()
let signature = message.metadata?.customData?[thinkingSignatureKey]
let type = message.metadata?.customData?[thinkingTypeKey] ?? "thinking"
if let signature, !signature.isEmpty {
pendingSignedThinking = (text: text, signature: signature, type: type)
let customData = message.metadata?.customData ?? [:]
if
customData["tachikoma.reasoning.provider"] != nil ||
customData["tachikoma.reasoning.model"] != nil ||
customData["tachikoma.reasoning.base_url"] != nil ||
customData["anthropic.thinking.model"] != nil
{
guard reasoningTarget?.matches(customData) == true else {
continue
}
} else if reasoningTarget?.allowsLegacyUnknown != true {
continue
}
if type == "redacted_thinking" {
pendingThinkingBlocks.append((text: text, signature: nil, type: type))
} else if let signature, !signature.isEmpty {
pendingThinkingBlocks.append((text: text, signature: signature, type: type))
}
continue
}
var content: [AnthropicContent] = []
if thinkingEnabled, let pending = pendingSignedThinking {
if pending.type == "redacted_thinking" {
content.append(.redactedThinking(.init(
type: "redacted_thinking",
redactedThinking: pending.text,
signature: pending.signature,
)))
} else {
content.append(.thinking(.init(
type: "thinking",
thinking: pending.text,
signature: pending.signature,
)))
}
pendingSignedThinking = nil
if thinkingEnabled, !pendingThinkingBlocks.isEmpty {
appendThinkingBlocks(pendingThinkingBlocks, to: &content)
pendingThinkingBlocks.removeAll()
}
// Process each content part
@ -139,21 +180,12 @@ enum AnthropicMessageConversion {
}
}
if thinkingEnabled, let pending = pendingSignedThinking {
let thinkingContent: AnthropicContent = if pending.type == "redacted_thinking" {
.redactedThinking(.init(
type: "redacted_thinking",
redactedThinking: pending.text,
signature: pending.signature,
))
} else {
.thinking(.init(
type: "thinking",
thinking: pending.text,
signature: pending.signature,
))
if thinkingEnabled, !pendingThinkingBlocks.isEmpty {
var content: [AnthropicContent] = []
appendThinkingBlocks(pendingThinkingBlocks, to: &content)
if !content.isEmpty {
anthropicMessages.append(AnthropicMessage(role: "assistant", content: content))
}
anthropicMessages.append(AnthropicMessage(role: "assistant", content: [thinkingContent]))
}
return (systemMessage, anthropicMessages)

View File

@ -1,3 +1,12 @@
#if canImport(CryptoKit)
import CryptoKit
private typealias ReasoningEndpointHasher = CryptoKit.SHA256
#else
import Crypto
private typealias ReasoningEndpointHasher = Crypto.SHA256
#endif
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
@ -17,22 +26,35 @@ public final class AnthropicProvider: ModelProvider {
private let auth: TKAuthValue
private let betaHeader: String
private let additionalHeaders: [String: String]
private let reasoningProvider: String
private let reasoningModelId: String
private let reasoningBaseURL: String?
private let urlSession: URLSession
private static let requiredBetaFlags: [String] = [
"interleaved-thinking-2025-05-14",
"fine-grained-tool-streaming-2025-05-14",
]
public init(
model: LanguageModel.Anthropic,
configuration: TachikomaConfiguration,
additionalHeaders: [String: String] = [:],
authOverride: TKAuthValue? = nil,
reasoningProvider: String = "anthropic",
reasoningModelId: String? = nil,
reasoningBaseURL: String? = nil,
urlSession: URLSession = .shared,
) throws {
self.model = model
self.modelId = model.modelId
self.baseURL = configuration.getBaseURL(for: .anthropic) ?? "https://api.anthropic.com"
self.additionalHeaders = additionalHeaders
self.reasoningProvider = reasoningProvider
self.reasoningModelId = reasoningModelId ?? model.modelId
self.reasoningBaseURL = ReasoningEndpointIdentity.canonical(
reasoningBaseURL ?? (reasoningProvider == "anthropic" ? self.baseURL : nil),
)
self.urlSession = urlSession
if let authOverride {
self.auth = authOverride
@ -57,16 +79,18 @@ public final class AnthropicProvider: ModelProvider {
throw TachikomaError.authenticationFailed("ANTHROPIC_API_KEY not found")
}
self.betaHeader = Self.mergedBetaHeader(configuration: configuration, auth: self.auth)
self.betaHeader = Self.mergedBetaHeader(configuration: configuration, auth: self.auth, model: model)
let isFable = Self.isFable(model: model)
let supportsSafeStreaming = !Self.hasStreamingRefusalRisk(model: model)
self.capabilities = ModelCapabilities(
supportsVision: model.supportsVision,
supportsTools: model.supportsTools,
supportsStreaming: true,
supportsStreaming: supportsSafeStreaming,
supportsAudioInput: model.supportsAudioInput,
supportsAudioOutput: model.supportsAudioOutput,
contextLength: model.contextLength,
maxOutputTokens: 4096,
contextLength: isFable ? 1_000_000 : model.contextLength,
maxOutputTokens: isFable ? 128_000 : model.maxOutputTokens,
)
}
@ -94,6 +118,16 @@ public final class AnthropicProvider: ModelProvider {
}
private static func mergedBetaHeader(configuration: TachikomaConfiguration, auth: TKAuthValue) -> String {
self.mergedBetaHeader(configuration: configuration, auth: auth, model: nil)
}
private static func mergedBetaHeader(
configuration: TachikomaConfiguration,
auth: TKAuthValue,
model: LanguageModel.Anthropic?,
)
-> String
{
var existing: String?
if case let .bearer(_, betaHeader) = auth {
existing = betaHeader
@ -103,6 +137,14 @@ public final class AnthropicProvider: ModelProvider {
existing = configuration.credentialValue(for: "ANTHROPIC_BETA_HEADER")
}
if let model, Self.isFable(model: model) {
return existing?
.split(separator: ",")
.map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
.filter { !$0.isEmpty }
.joined(separator: ",") ?? ""
}
return Self.mergedBetaHeader(existing: existing)
}
@ -117,9 +159,13 @@ public final class AnthropicProvider: ModelProvider {
case .disabled:
return nil
case .adaptive:
if Self.isFable(model: model) { return nil }
guard self.usesAdaptiveThinking(model: model) else { return nil }
return AnthropicThinking(type: "adaptive", budgetTokens: nil)
case let .enabled(budgetTokens):
if Self.isFable(model: model) {
return nil
}
if case .opus48 = model {
return AnthropicThinking(type: "adaptive", budgetTokens: nil)
}
@ -148,6 +194,7 @@ public final class AnthropicProvider: ModelProvider {
}
private func usesAdaptiveThinking(model: LanguageModel.Anthropic) -> Bool {
if Self.isFable(model: model) { return true }
if case .opus48 = model { return true }
if case .opus47 = model { return true }
if case .sonnet46 = model { return true }
@ -155,11 +202,12 @@ public final class AnthropicProvider: ModelProvider {
}
private func supportsEffort(model: LanguageModel.Anthropic) -> Bool {
if Self.isFable(model: model) { return true }
switch model {
case .opus48, .opus47, .opus45, .sonnet46:
true
return true
default:
false
return false
}
}
@ -202,6 +250,19 @@ public final class AnthropicProvider: ModelProvider {
}
let validatedSettings = request.settings.validated(for: .anthropic(self.model))
if
Self.isFable(model: self.model),
case .disabled = validatedSettings.providerOptions.anthropic?.thinking
{
throw TachikomaError.invalidConfiguration(
"Claude Fable 5 always uses adaptive thinking; disabled thinking is not supported",
)
}
if Self.isFable(model: self.model), request.messages.last?.role == .assistant {
throw TachikomaError.invalidConfiguration(
"Claude Fable 5 does not support assistant prefill requests",
)
}
let requestedThinking = self.anthropicThinking(
from: validatedSettings.providerOptions.anthropic?.thinking,
model: self.model,
@ -214,11 +275,19 @@ public final class AnthropicProvider: ModelProvider {
var thinking: AnthropicThinking?
let systemMessage: String?
let messages: [AnthropicMessage]
let preserveSignedThinking = requestedThinking != nil || self.requiresSignedThinkingReplay(model: self.model)
let reasoningTarget = AnthropicReasoningReplayTarget(
provider: self.reasoningProvider,
modelId: self.reasoningModelId,
endpointIdentity: self.reasoningBaseURL,
allowsLegacyUnknown: !Self.isFable(model: self.model),
)
do {
thinking = requestedThinking
(systemMessage, messages) = try AnthropicMessageConversion.convertMessagesToAnthropic(
request.messages,
thinkingEnabled: requestedThinking != nil,
thinkingEnabled: preserveSignedThinking,
reasoningTarget: reasoningTarget,
)
} catch {
// If we can't provide signed thinking blocks for a cached/history session, fall back to non-thinking mode.
@ -227,14 +296,20 @@ public final class AnthropicProvider: ModelProvider {
(systemMessage, messages) = try AnthropicMessageConversion.convertMessagesToAnthropic(
request.messages,
thinkingEnabled: false,
reasoningTarget: reasoningTarget,
)
} else {
throw error
}
}
let maxTokens = validatedSettings.maxTokens ?? self.defaultMaxTokens(for: self.model)
if !stream, Self.requiresExtendedNonStreamingTimeout(model: self.model, maxTokens: maxTokens) {
urlRequest.timeoutInterval = 1800
}
let anthropicRequest = try AnthropicMessageRequest(
model: modelId,
maxTokens: validatedSettings.maxTokens ?? 1024,
maxTokens: maxTokens,
temperature: thinking == nil ? validatedSettings.temperature : nil,
system: systemMessage,
messages: messages,
@ -270,7 +345,7 @@ public final class AnthropicProvider: ModelProvider {
}
}
let (data, response) = try await URLSession.shared.data(for: urlRequest)
let (data, response) = try await self.urlSession.data(for: urlRequest)
guard let httpResponse = response as? HTTPURLResponse else {
throw TachikomaError.networkError(NSError(domain: "Invalid response", code: 0))
@ -302,7 +377,7 @@ public final class AnthropicProvider: ModelProvider {
switch content {
case let .text(textContent):
textContent.text
case .toolUse:
case .thinking, .redactedThinking, .toolUse:
nil
}
}.joined()
@ -312,19 +387,63 @@ public final class AnthropicProvider: ModelProvider {
outputTokens: anthropicResponse.usage.outputTokens,
)
let finishReason: FinishReason? = switch anthropicResponse.stopReason {
case "end_turn": .stop
case "max_tokens": .length
case "tool_use": .toolCalls
case "stop_sequence": .stop
default: .other
let finishReason = Self.mapFinishReason(anthropicResponse.stopReason)
if finishReason == .contentFilter {
let fallbackRefusalText = if let category = anthropicResponse.stopDetails?.category {
"Request refused by Anthropic content filter (\(category))"
} else {
"Request refused by Anthropic content filter"
}
let refusalText = anthropicResponse.stopDetails?.explanation ?? fallbackRefusalText
return ProviderResponse(
text: refusalText,
usage: usage,
finishReason: finishReason,
toolCalls: nil,
reasoning: [],
assistantMessages: [],
isBillable: usage.outputTokens > 0,
)
}
// Convert tool calls if present
let toolCalls = anthropicResponse.content.compactMap { content -> AgentToolCall? in
var reasoning: [ProviderReasoningBlock] = []
var toolCalls: [AgentToolCall] = []
var assistantMessages: [ModelMessage] = []
for content in anthropicResponse.content {
switch content {
case .text:
return nil
case let .text(textContent):
if !textContent.text.isEmpty {
assistantMessages.append(.assistant(textContent.text))
}
case let .thinking(thinking):
let block = ProviderReasoningBlock(
text: thinking.thinking,
signature: thinking.signature,
type: thinking.type,
)
reasoning.append(block)
assistantMessages.append(ModelMessage(
role: .assistant,
content: [.text(thinking.thinking)],
channel: .thinking,
metadata: .init(customData: self.reasoningMetadata(
type: thinking.type,
signature: thinking.signature,
)),
))
case let .redactedThinking(thinking):
let block = ProviderReasoningBlock(
text: thinking.data,
type: thinking.type,
)
reasoning.append(block)
assistantMessages.append(ModelMessage(
role: .assistant,
content: [.text(thinking.data)],
channel: .thinking,
metadata: .init(customData: self.reasoningMetadata(type: thinking.type)),
))
case let .toolUse(toolUse):
// Convert input to AnyAgentToolValue dictionary
var arguments: [String: AnyAgentToolValue] = [:]
@ -340,11 +459,13 @@ public final class AnthropicProvider: ModelProvider {
}
}
return AgentToolCall(
let toolCall = AgentToolCall(
id: toolUse.id,
name: toolUse.name,
arguments: arguments,
)
toolCalls.append(toolCall)
assistantMessages.append(ModelMessage(role: .assistant, content: [.toolCall(toolCall)]))
}
}
@ -353,9 +474,61 @@ public final class AnthropicProvider: ModelProvider {
usage: usage,
finishReason: finishReason,
toolCalls: toolCalls.isEmpty ? nil : toolCalls,
reasoning: reasoning,
assistantMessages: assistantMessages,
)
}
private func reasoningMetadata(type: String, signature: String? = nil) -> [String: String] {
var metadata = [
"anthropic.thinking.model": self.reasoningModelId,
"anthropic.thinking.type": type,
"tachikoma.reasoning.provider": self.reasoningProvider,
"tachikoma.reasoning.model": self.reasoningModelId,
]
if let signature, !signature.isEmpty {
metadata["anthropic.thinking.signature"] = signature
}
if let reasoningBaseURL {
metadata["tachikoma.reasoning.base_url"] = reasoningBaseURL
}
return metadata
}
private func requiresSignedThinkingReplay(model: LanguageModel.Anthropic) -> Bool {
Self.isFable(model: model)
}
private func defaultMaxTokens(for model: LanguageModel.Anthropic) -> Int {
if Self.isFable(model: model) { return min(128_000, 16384) }
return 1024
}
private static func isFable(model: LanguageModel.Anthropic) -> Bool {
LanguageModel.Anthropic.isFable(modelId: model.modelId)
}
private static func hasStreamingRefusalRisk(model: LanguageModel.Anthropic) -> Bool {
LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: model.modelId)
}
private static func requiresExtendedNonStreamingTimeout(model: LanguageModel.Anthropic, maxTokens: Int) -> Bool {
self.isFable(model: model) || maxTokens >= 64000
}
static func mapFinishReason(_ stopReason: String?) -> FinishReason? {
switch stopReason {
case "end_turn": .stop
case "max_tokens": .length
case "tool_use": .toolCalls
case "stop_sequence": .stop
case "model_context_window_exceeded": .length
case "refusal": .contentFilter
case nil: nil
default: .other
}
}
private func applyAuth(to request: inout URLRequest, secret: String) {
switch self.auth {
case .apiKey:
@ -363,10 +536,19 @@ public final class AnthropicProvider: ModelProvider {
case .bearer:
request.setValue("Bearer " + secret, forHTTPHeaderField: "Authorization")
}
request.setValue(self.betaHeader, forHTTPHeaderField: "anthropic-beta")
if !self.betaHeader.isEmpty {
request.setValue(self.betaHeader, forHTTPHeaderField: "anthropic-beta")
}
}
public func streamText(request: ProviderRequest) async throws -> AsyncThrowingStream<TextStreamDelta, Error> {
guard !Self.hasStreamingRefusalRisk(model: self.model) else {
let message = "\(self.model.modelId) streaming is disabled because Anthropic refusals require rollback-aware handling"
throw TachikomaError.invalidConfiguration(
"\(message); use generateText instead",
)
}
let urlRequest = try self.makeURLRequest(for: request, stream: true)
// Debug logging only when explicitly enabled
@ -417,7 +599,7 @@ public final class AnthropicProvider: ModelProvider {
(Data, URLResponse),
Error,
>) in
URLSession.shared.dataTask(with: urlRequest) { data, response, error in
self.urlSession.dataTask(with: urlRequest) { data, response, error in
if let error {
continuation.resume(throwing: error)
} else if let data, let response {
@ -445,7 +627,7 @@ public final class AnthropicProvider: ModelProvider {
let lines = String(data: data, encoding: .utf8)?.components(separatedBy: "\n") ?? []
#else
// macOS/iOS: Use streaming API
let (bytes, response) = try await URLSession.shared.bytes(for: urlRequest)
let (bytes, response) = try await self.urlSession.bytes(for: urlRequest)
guard let httpResponse = response as? HTTPURLResponse else {
throw TachikomaError.networkError(NSError(domain: "Invalid response", code: 0))
@ -469,6 +651,7 @@ public final class AnthropicProvider: ModelProvider {
var currentReasoningSignature: String?
var currentReasoningType: String?
var reasoningSignatureEmitted = false
var finishReason: FinishReason?
do {
for try await line in bytes.lines {
@ -502,7 +685,7 @@ public final class AnthropicProvider: ModelProvider {
currentReasoningType = nil
reasoningSignatureEmitted = false
}
continuation.yield(TextStreamDelta.done())
continuation.yield(.done(finishReason: finishReason))
break
}
@ -533,6 +716,12 @@ public final class AnthropicProvider: ModelProvider {
currentReasoningSignature = nil
currentReasoningType = block.type
reasoningSignatureEmitted = false
if block.type == "redacted_thinking", let data = block.data {
continuation.yield(TextStreamDelta.reasoning(
data,
type: "redacted_thinking",
))
}
continue
}
}
@ -637,7 +826,9 @@ public final class AnthropicProvider: ModelProvider {
case "message_delta":
// Message-level updates (usage, etc.)
// Usage is typically included in the done event, not separately
if let stopReason = event.delta?.stopReason {
finishReason = Self.mapFinishReason(stopReason)
}
continue
case "message_stop":
@ -657,7 +848,7 @@ public final class AnthropicProvider: ModelProvider {
currentReasoningType = nil
reasoningSignatureEmitted = false
}
continuation.yield(TextStreamDelta.done())
continuation.yield(.done(finishReason: finishReason))
default:
// Unknown event type, skip
@ -694,6 +885,7 @@ public final class AnthropicProvider: ModelProvider {
var currentReasoningSignature: String?
var currentReasoningType: String?
var reasoningSignatureEmitted = false
var finishReason: FinishReason?
do {
for line in lines {
@ -720,7 +912,7 @@ public final class AnthropicProvider: ModelProvider {
type: currentReasoningType,
))
}
continuation.yield(TextStreamDelta.done())
continuation.yield(.done(finishReason: finishReason))
break
}
@ -739,6 +931,12 @@ public final class AnthropicProvider: ModelProvider {
currentReasoningSignature = nil
currentReasoningType = block.type
reasoningSignatureEmitted = false
if block.type == "redacted_thinking", let data = block.data {
continuation.yield(TextStreamDelta.reasoning(
data,
type: "redacted_thinking",
))
}
}
case "content_block_delta":
if let delta = event.delta {
@ -761,6 +959,10 @@ public final class AnthropicProvider: ModelProvider {
accumulatedReasoning += thinking
}
}
case "message_delta":
if let stopReason = event.delta?.stopReason {
finishReason = Self.mapFinishReason(stopReason)
}
case "message_stop":
if !accumulatedText.isEmpty {
continuation.yield(TextStreamDelta.text(accumulatedText))
@ -772,7 +974,7 @@ public final class AnthropicProvider: ModelProvider {
type: currentReasoningType,
))
}
continuation.yield(TextStreamDelta.done())
continuation.yield(.done(finishReason: finishReason))
default:
continue
}
@ -836,6 +1038,37 @@ public final class AnthropicProvider: ModelProvider {
}
}
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
enum ReasoningEndpointIdentity {
static func canonical(_ rawValue: String?) -> String? {
guard
let trimmed = rawValue?.trimmingCharacters(in: .whitespacesAndNewlines),
!trimmed.isEmpty,
var components = URLComponents(string: trimmed),
let scheme = components.scheme?.lowercased(),
let host = components.host?.lowercased() else
{
return nil
}
components.scheme = scheme
components.host = host
components.user = nil
components.password = nil
components.fragment = nil
while components.path.count > 1, components.path.hasSuffix("/") {
components.path.removeLast()
}
guard let value = components.string else { return nil }
guard let data = value.data(using: .utf8) else { return nil }
let digest = ReasoningEndpointHasher.hash(data: data)
.map { String(format: "%02x", $0) }
.joined()
return "sha256:\(digest)"
}
}
/// Provider for Ollama models
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public final class OllamaProvider: ModelProvider {

View File

@ -35,10 +35,11 @@ public func generateText(
var currentMessages = messages
var allSteps: [GenerationStep] = []
var totalUsage = Usage(inputTokens: 0, outputTokens: 0)
var finalResponseStartIndex = messages.count
for stepIndex in 0..<maxSteps {
let request = ProviderRequest(
messages: currentMessages,
messages: currentMessages.sanitizedForProvider(model, configuration: resolvedConfiguration),
tools: tools,
settings: settings,
)
@ -51,8 +52,23 @@ public func generateText(
try await provider.generateText(request: request)
}
// Track usage with proper session management
if let usage = response.usage {
let isContentFiltered = response.finishReason == .contentFilter
let responseText = isContentFiltered ? "" : response.text
let responseToolCalls = isContentFiltered ? [] : (response.toolCalls ?? [])
let responseReasoning = isContentFiltered ? [] : response.reasoning
let responseAssistantMessages = isContentFiltered ? [] : response.assistantMessages
let responseMessageStartIndex = currentMessages.count
finalResponseStartIndex = responseMessageStartIndex
let responseHistoryMessages = model.responseHistoryMessages(
nativeMessages: responseAssistantMessages,
text: responseText,
reasoning: responseReasoning,
toolCalls: responseToolCalls,
configuration: resolvedConfiguration,
)
// Track billable usage with proper session management.
if response.isBillable, let usage = response.usage {
let actualSessionId = sessionId ?? "generation-\(UUID().uuidString)"
// Start session if not already started
@ -86,8 +102,8 @@ public func generateText(
// Create step record
let step = GenerationStep(
stepIndex: stepIndex,
text: response.text,
toolCalls: response.toolCalls ?? [],
text: responseText,
toolCalls: responseToolCalls,
toolResults: [],
usage: response.usage,
finishReason: response.finishReason,
@ -95,18 +111,26 @@ public func generateText(
allSteps.append(step)
// Add assistant message
var assistantContent: [ModelMessage.ContentPart] = [.text(response.text)]
if isContentFiltered {
break
}
if !responseHistoryMessages.isEmpty {
currentMessages.append(contentsOf: responseHistoryMessages)
if responseHistoryMessages.allSatisfy({ $0.channel == .thinking }) {
currentMessages.append(ModelMessage(
role: .assistant,
content: [.text("")],
metadata: .init(customData: ["tachikoma.internal.boundary": "reasoning_only"]),
))
}
}
// Handle tool calls
if let toolCalls = response.toolCalls, !toolCalls.isEmpty {
// Add tool calls to assistant message
assistantContent.append(contentsOf: toolCalls.map { .toolCall($0) })
currentMessages.append(ModelMessage(role: .assistant, content: assistantContent))
if !responseToolCalls.isEmpty {
// Execute tools
var toolResults: [AgentToolResult] = []
for toolCall in toolCalls {
for toolCall in responseToolCalls {
if let tool = tools?.first(where: { $0.name == toolCall.name }) {
do {
// Debug: Log tool call details in verbose mode
@ -124,7 +148,7 @@ public func generateText(
// Create execution context with full conversation and model info
let context = ToolExecutionContext(
messages: currentMessages,
messages: currentMessages.sanitizedForToolContext(),
model: model,
settings: settings,
sessionId: sessionId ?? "generation-\(UUID().uuidString)",
@ -161,8 +185,8 @@ public func generateText(
// Update step with tool results
allSteps[stepIndex] = GenerationStep(
stepIndex: stepIndex,
text: response.text,
toolCalls: toolCalls,
text: responseText,
toolCalls: responseToolCalls,
toolResults: toolResults,
usage: response.usage,
finishReason: response.finishReason,
@ -174,17 +198,17 @@ public func generateText(
}
} else {
// No tool calls, we're done
currentMessages.append(ModelMessage(role: .assistant, content: assistantContent))
break
}
}
// Extract final text from last step
var finalText = allSteps.last?.text ?? ""
let originalFinalText = finalText
var finalFinishReason = allSteps.last?.finishReason ?? .other
// Apply stop conditions if configured
if let stopCondition = settings.stopConditions {
if finalFinishReason != .contentFilter, let stopCondition = settings.stopConditions {
// Check if we should stop and truncate the text
if await stopCondition.shouldStop(text: finalText, delta: nil) {
// Truncate text based on the type of stop condition
@ -217,13 +241,16 @@ public func generateText(
}
}
}
let finalMessages = finalText == originalFinalText
? currentMessages
: currentMessages.replacingGeneratedAssistantText(after: finalResponseStartIndex, with: finalText)
return GenerateTextResult(
text: finalText,
usage: totalUsage,
finishReason: finalFinishReason,
steps: allSteps,
messages: currentMessages,
messages: finalMessages,
)
}
@ -254,6 +281,9 @@ public func streamText(
{
// Debug logging only when explicitly enabled via environment variable or verbose flag
let resolvedConfiguration = TachikomaConfiguration.resolve(configuration)
guard model.supportsStreaming else {
throw TachikomaError.invalidConfiguration("\(model.modelId) does not support streaming")
}
let debugEnabled = ProcessInfo.processInfo.environment["DEBUG_TACHIKOMA"] != nil ||
resolvedConfiguration.verbose
if debugEnabled {
@ -275,7 +305,7 @@ public func streamText(
}
let request = ProviderRequest(
messages: messages,
messages: messages.sanitizedForProvider(model, configuration: resolvedConfiguration),
tools: tools,
settings: settings,
)
@ -296,12 +326,6 @@ public func streamText(
stream = try await provider.streamText(request: request)
}
// Apply stop conditions if configured
if let stopCondition = settings.stopConditions {
// Wrap the stream with stop condition checking
stream = stream.stopWhen(stopCondition)
}
// Use provided session or create a new one for tracking streaming usage
let actualSessionId = sessionId ?? "streaming-\(UUID().uuidString)"
if sessionId == nil {
@ -311,23 +335,84 @@ public func streamText(
// Wrap the stream to track usage when it completes
let capturedModel = model
let capturedSessionId = actualSessionId
let capturedStream = stream
let shouldEndSession = sessionId == nil
let buffersUntilDone = model.buffersTextStreamUntilDone(settings: settings)
if !buffersUntilDone, let stopCondition = settings.stopConditions {
stream = stream.stopWhen(stopCondition)
}
let capturedStream = stream
let capturedStopCondition = buffersUntilDone ? settings.stopConditions : nil
let trackedStream = AsyncThrowingStream<TextStreamDelta, Error> { continuation in
Task {
do {
let totalInputTokens = 0
var totalOutputTokens = 0
var bufferedDeltas: [TextStreamDelta] = []
var bufferedVisibleText = ""
var didReceiveTerminal = false
var didTriggerLocalStop = false
for try await delta in capturedStream {
continuation.yield(delta)
func track(_ delta: TextStreamDelta) {
// Track tokens as they come in (approximate)
if case .textDelta = delta.type, let content = delta.content {
// Rough approximation: ~4 characters per token
totalOutputTokens += max(1, content.count / 4)
}
}
func yieldAndTrack(_ delta: TextStreamDelta) {
track(delta)
continuation.yield(delta)
}
if let capturedStopCondition {
await capturedStopCondition.reset()
}
for try await delta in capturedStream {
if buffersUntilDone, delta.type != .done {
if !didTriggerLocalStop {
bufferedDeltas.append(delta)
track(delta)
if
let capturedStopCondition,
case .textDelta = delta.type,
let content = delta.content
{
bufferedVisibleText += content
didTriggerLocalStop = await capturedStopCondition.shouldStop(
text: bufferedVisibleText,
delta: content,
)
}
}
continue
}
if case .done = delta.type {
didReceiveTerminal = true
if buffersUntilDone {
if delta.finishReason == .contentFilter {
bufferedDeltas.removeAll()
yieldAndTrack(delta)
} else {
for bufferedDelta in bufferedDeltas {
continuation.yield(bufferedDelta)
}
bufferedDeltas.removeAll()
if didTriggerLocalStop {
yieldAndTrack(TextStreamDelta.done(usage: delta.usage, finishReason: .stop))
} else {
yieldAndTrack(delta)
}
}
} else {
yieldAndTrack(delta)
}
} else {
yieldAndTrack(delta)
}
if case .done = delta.type {
// Record final usage (this is approximate for streaming)
@ -348,6 +433,10 @@ public func streamText(
}
}
if buffersUntilDone, !didReceiveTerminal, !bufferedDeltas.isEmpty {
throw TachikomaError.apiError("Stream ended before provider completion status was received")
}
continuation.finish()
} catch {
if shouldEndSession {
@ -392,7 +481,7 @@ public func generateObject<T: Codable & Sendable>(
let provider = try resolvedConfiguration.makeProvider(for: model)
let request = ProviderRequest(
messages: messages,
messages: messages.sanitizedForProvider(model, configuration: resolvedConfiguration),
tools: nil,
settings: settings,
outputFormat: .json,
@ -406,6 +495,10 @@ public func generateObject<T: Codable & Sendable>(
try await provider.generateText(request: request)
}
if response.finishReason == .contentFilter {
throw TachikomaError.apiError("Response was blocked by the provider content filter")
}
// Parse the JSON response into the expected type
guard let jsonData = response.text.data(using: .utf8) else {
throw TachikomaError.invalidInput("Response text is not valid UTF-8")
@ -446,11 +539,14 @@ public func streamObject<T: Codable & Sendable>(
-> StreamObjectResult<T>
{
let resolvedConfiguration = TachikomaConfiguration.resolve(configuration)
guard model.supportsStreaming else {
throw TachikomaError.invalidConfiguration("\(model.modelId) does not support streaming")
}
let provider = try resolvedConfiguration.makeProvider(for: model)
// Create request with JSON output format
let request = ProviderRequest(
messages: messages,
messages: messages.sanitizedForProvider(model, configuration: resolvedConfiguration),
tools: nil,
settings: settings,
outputFormat: .json,
@ -458,6 +554,7 @@ public func streamObject<T: Codable & Sendable>(
// Get the text stream from the provider
let stream = try await provider.streamText(request: request)
let buffersUntilDone = model.buffersObjectStreamUntilDone(settings: settings)
// Create a new stream that attempts to parse partial JSON objects
let objectStream = AsyncThrowingStream<ObjectStreamDelta<T>, Error> { continuation in
@ -466,6 +563,37 @@ public func streamObject<T: Codable & Sendable>(
var accumulatedText = ""
var lastValidObject: T?
var hasStarted = false
var bufferedStartDelta: ObjectStreamDelta<T>?
var didFinishObject = false
func publishCompleteObject(allowLastValidObjectFallback: Bool) throws {
if buffersUntilDone, let bufferedStartDelta {
continuation.yield(bufferedStartDelta)
}
if
let jsonData = accumulatedText.data(using: .utf8),
let finalObject = try? JSONDecoder().decode(T.self, from: jsonData)
{
continuation.yield(ObjectStreamDelta(
type: .complete,
object: finalObject,
rawText: accumulatedText,
))
} else if allowLastValidObjectFallback, let lastValidObject {
// If we have a last valid object, use it as complete
continuation.yield(ObjectStreamDelta(
type: .complete,
object: lastValidObject,
rawText: accumulatedText,
))
} else {
throw TachikomaError.invalidInput(
"Failed to parse complete object from stream",
)
}
continuation.yield(ObjectStreamDelta(type: .done))
didFinishObject = true
}
for try await delta in stream {
if case .textDelta = delta.type, let content = delta.content {
@ -474,7 +602,16 @@ public func streamObject<T: Codable & Sendable>(
// Signal stream start
if !hasStarted {
hasStarted = true
continuation.yield(ObjectStreamDelta(type: .start))
let startDelta = ObjectStreamDelta<T>(type: .start)
if buffersUntilDone {
bufferedStartDelta = startDelta
} else {
continuation.yield(startDelta)
}
}
if buffersUntilDone {
continue
}
// Attempt to parse the accumulated JSON
@ -482,44 +619,51 @@ public func streamObject<T: Codable & Sendable>(
// Try to parse as complete object
if let object = try? JSONDecoder().decode(T.self, from: jsonData) {
lastValidObject = object
continuation.yield(ObjectStreamDelta(
let objectDelta = ObjectStreamDelta(
type: .partial,
object: object,
rawText: accumulatedText,
))
)
continuation.yield(objectDelta)
} else if let partialObject = attemptPartialParse(T.self, from: accumulatedText) {
// Attempt to parse as partial object
lastValidObject = partialObject
continuation.yield(ObjectStreamDelta(
let objectDelta = ObjectStreamDelta(
type: .partial,
object: partialObject,
rawText: accumulatedText,
))
)
continuation.yield(objectDelta)
}
}
} else if case .done = delta.type {
// Final parse attempt
if
let jsonData = accumulatedText.data(using: .utf8),
let finalObject = try? JSONDecoder().decode(T.self, from: jsonData)
{
continuation.yield(ObjectStreamDelta(
type: .complete,
object: finalObject,
rawText: accumulatedText,
))
} else if let lastValidObject {
// If we have a last valid object, use it as complete
continuation.yield(ObjectStreamDelta(
type: .complete,
object: lastValidObject,
rawText: accumulatedText,
))
} else {
throw TachikomaError.invalidInput(
"Failed to parse complete object from stream",
)
if delta.finishReason == .contentFilter {
throw TachikomaError.apiError("Response was blocked by the provider content filter")
}
try publishCompleteObject(allowLastValidObjectFallback: delta.finishReason == .stop || delta
.finishReason == nil)
}
}
if !didFinishObject, hasStarted {
if buffersUntilDone {
throw TachikomaError.apiError("Stream ended before provider completion status was received")
} else if
let jsonData = accumulatedText.data(using: .utf8),
let finalObject = try? JSONDecoder().decode(T.self, from: jsonData)
{
continuation.yield(ObjectStreamDelta(
type: .complete,
object: finalObject,
rawText: accumulatedText,
))
continuation.yield(ObjectStreamDelta(type: .done))
} else if let lastValidObject {
continuation.yield(ObjectStreamDelta(
type: .complete,
object: lastValidObject,
rawText: accumulatedText,
))
continuation.yield(ObjectStreamDelta(type: .done))
}
}
@ -599,6 +743,463 @@ private func fixPartialJSON(_ json: String) -> String {
return fixed
}
extension LanguageModel {
fileprivate func buffersTextStreamUntilDone(settings: GenerationSettings) -> Bool {
self.hasAnthropicStreamingRefusalRisk ||
settings.streamBuffering == .untilTerminal ||
(settings.stopConditions != nil && self.canEmitTerminalContentFilterAfterText)
}
fileprivate func buffersObjectStreamUntilDone(settings: GenerationSettings) -> Bool {
settings.streamBuffering == .untilTerminal ||
self.hasAnthropicStreamingRefusalRisk
}
private var hasAnthropicStreamingRefusalRisk: Bool {
switch self {
case let .anthropic(model):
return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: model.modelId)
case let .anthropicCompatible(modelId, _):
return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId)
case let .openRouter(modelId), let .together(modelId):
return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId)
case let .openaiCompatible(modelId, _):
return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId)
case let .custom(provider):
if
let parsed = ProviderParser.parse(provider.modelId),
CustomProviderRegistry.shared.get(parsed.provider)?.kind == .anthropic
{
return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: parsed.model)
}
return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: provider.modelId)
default:
return false
}
}
private var canEmitTerminalContentFilterAfterText: Bool {
switch self {
case .openai,
.openaiCompatible,
.openRouter,
.together,
.replicate,
.google,
.mistral,
.groq,
.grok,
.azureOpenAI:
return true
case let .custom(provider):
guard
let parsed = ProviderParser.parse(provider.modelId),
let registeredProvider = CustomProviderRegistry.shared.get(parsed.provider) else
{
return false
}
switch registeredProvider.kind {
case .openai:
return true
case .anthropic:
return false
}
default:
return false
}
}
}
private struct ReasoningReplayTarget {
let provider: String
let modelId: String
let baseURL: String?
let allowsLegacyUnknown: Bool
func matches(_ customData: [String: String]) -> Bool {
guard customData["tachikoma.reasoning.provider"] == self.provider else {
return false
}
guard customData["tachikoma.reasoning.model"] == self.modelId else {
return false
}
return customData["tachikoma.reasoning.base_url"] == self.endpointIdentity
}
var endpointIdentity: String? {
ReasoningEndpointIdentity.canonical(self.baseURL)
}
}
extension [ModelMessage] {
fileprivate func replacingGeneratedAssistantText(after prefixCount: Int, with text: String) -> [ModelMessage] {
guard self.indices.contains(prefixCount) else {
return self
}
var messages = self
var cursor = text.startIndex
for messageIndex in prefixCount..<messages.count {
let message = messages[messageIndex]
guard message.role == .assistant, message.channel != .thinking else {
continue
}
var content: [ModelMessage.ContentPart] = []
for part in message.content {
guard case let .text(originalText) = part else {
content.append(part)
continue
}
guard cursor < text.endIndex else {
continue
}
let remainingCount = text.distance(from: cursor, to: text.endIndex)
let takeCount = Swift.min(originalText.count, remainingCount)
let endIndex = text.index(cursor, offsetBy: takeCount)
content.append(.text(String(text[cursor..<endIndex])))
cursor = endIndex
}
messages[messageIndex] = ModelMessage(
id: message.id,
role: message.role,
content: content,
timestamp: message.timestamp,
channel: message.channel,
metadata: message.metadata,
)
}
return messages
}
}
extension [ModelMessage] {
fileprivate func sanitizedForProvider(
_ model: LanguageModel,
configuration: TachikomaConfiguration,
)
-> [ModelMessage]
{
if let target = model.anthropicThinkingReplayTarget(configuration: configuration) {
var sanitized: [ModelMessage] = []
for message in self {
if message.isSyntheticReasoningBoundary {
if sanitized.last?.channel == .thinking {
sanitized.append(message)
}
continue
}
guard message.channel == .thinking else {
sanitized.append(message)
continue
}
guard !message.hasOpenRouterReasoningReplayMetadata else {
continue
}
guard let producerModel = message.metadata?.customData?["anthropic.thinking.model"] else {
if
target.allowsLegacyUnknown,
message.metadata?.customData?["anthropic.thinking.type"] != nil
{
sanitized.append(message)
}
continue
}
let customData = message.metadata?.customData ?? [:]
if producerModel == target.modelId, target.matches(customData) {
sanitized.append(message)
}
}
return sanitized
}
if let target = model.openRouterReasoningReplayTarget(configuration: configuration) {
var sanitized: [ModelMessage] = []
for message in self {
if message.isSyntheticReasoningBoundary {
if sanitized.last?.channel == .thinking {
sanitized.append(message)
}
continue
}
guard message.channel == .thinking else {
sanitized.append(message)
continue
}
guard message.hasOpenRouterReasoningReplayMetadata else {
continue
}
if target.matches(message.metadata?.customData ?? [:]) {
sanitized.append(message)
}
}
return sanitized
}
return self.filter { !$0.isSyntheticReasoningBoundary && $0.channel != .thinking }
}
}
extension ModelMessage {
private var hasAnthropicThinkingReplayMetadata: Bool {
guard let customData = metadata?.customData else { return false }
return customData["anthropic.thinking.model"] != nil ||
customData["anthropic.thinking.type"] != nil ||
customData["anthropic.thinking.signature"] != nil
}
fileprivate var hasOpenRouterReasoningReplayMetadata: Bool {
guard let customData = metadata?.customData else { return false }
return customData["openrouter.reasoning_details"] != nil ||
customData["openrouter.reasoning"] != nil
}
private var hasProviderReasoningReplayMetadata: Bool {
self.hasAnthropicThinkingReplayMetadata || self.hasOpenRouterReasoningReplayMetadata
}
fileprivate var isSyntheticReasoningBoundary: Bool {
metadata?.customData?["tachikoma.internal.boundary"] == "reasoning_only"
}
}
extension [ModelMessage] {
fileprivate func sanitizedForToolContext() -> [ModelMessage] {
self.filter { $0.channel != .thinking && !$0.isSyntheticReasoningBoundary }
}
fileprivate func containsAssistantText(_ text: String) -> Bool {
guard !text.isEmpty else { return true }
let assistantTexts = self.flatMap { message -> [String] in
guard message.role == .assistant, message.channel != .thinking else {
return []
}
return message.content.compactMap { part in
if case let .text(value) = part {
return value
}
return nil
}
}
return assistantTexts.contains(text) || assistantTexts.joined() == text
}
fileprivate func containsReasoningBlock(_ reasoning: ProviderReasoningBlock) -> Bool {
self.contains { message in
message.role == .assistant && message.channel == .thinking && message.content.contains { part in
guard case let .text(value) = part else { return false }
if let signature = reasoning.signature, !signature.isEmpty {
return message.metadata?.customData?["anthropic.thinking.signature"] == signature ||
message.metadata?.customData?["tachikoma.reasoning.signature"] == signature
}
return value == reasoning.text
}
}
}
fileprivate func containsToolCall(id: String) -> Bool {
self.contains { message in
message.role == .assistant && message.content.contains { part in
if case let .toolCall(toolCall) = part {
return toolCall.id == id
}
return false
}
}
}
}
extension LanguageModel {
fileprivate func responseHistoryMessages(
nativeMessages: [ModelMessage],
text: String,
reasoning: [ProviderReasoningBlock],
toolCalls: [AgentToolCall],
configuration: TachikomaConfiguration,
)
-> [ModelMessage]
{
var history = nativeMessages
for reasoningBlock in reasoning where !history.containsReasoningBlock(reasoningBlock) {
history.append(ModelMessage(
role: .assistant,
content: [.text(reasoningBlock.text)],
channel: .thinking,
metadata: .init(customData: self.anthropicThinkingMetadata(
for: reasoningBlock,
configuration: configuration,
)),
))
}
let missingToolCalls = toolCalls.filter { !history.containsToolCall(id: $0.id) }
let isMissingText = !history.containsAssistantText(text)
let needsFallbackBoundary = nativeMessages.isEmpty && text.isEmpty && missingToolCalls.isEmpty
guard isMissingText || !missingToolCalls.isEmpty || needsFallbackBoundary else {
return history
}
var fallbackContent: [ModelMessage.ContentPart] = []
if isMissingText || needsFallbackBoundary {
fallbackContent.append(.text(text))
}
fallbackContent.append(contentsOf: missingToolCalls.map { .toolCall($0) })
history.append(ModelMessage(role: .assistant, content: fallbackContent))
return history
}
fileprivate func anthropicThinkingReplayTarget(configuration: TachikomaConfiguration) -> ReasoningReplayTarget? {
switch self {
case let .anthropic(model):
return ReasoningReplayTarget(
provider: "anthropic",
modelId: model.modelId,
baseURL: configuration.getBaseURL(for: .anthropic) ?? Provider.anthropic.defaultBaseURL,
allowsLegacyUnknown: !LanguageModel.Anthropic.isFable(modelId: model.modelId),
)
case let .anthropicCompatible(modelId, baseURL):
return ReasoningReplayTarget(
provider: "anthropic-compatible",
modelId: modelId,
baseURL: baseURL,
allowsLegacyUnknown: !LanguageModel.Anthropic.isFable(modelId: modelId),
)
case let .minimax(model):
return ReasoningReplayTarget(
provider: "minimax",
modelId: model.modelId,
baseURL: configuration.getBaseURL(for: .minimax) ?? Provider.minimax.defaultBaseURL,
allowsLegacyUnknown: true,
)
case let .minimaxCN(model):
return ReasoningReplayTarget(
provider: "minimax-cn",
modelId: model.modelId,
baseURL: configuration.getBaseURL(for: .minimaxCN) ?? Provider.minimaxCN.defaultBaseURL,
allowsLegacyUnknown: true,
)
case let .custom(provider):
if let directAnthropicProvider = provider as? AnthropicProvider {
return ReasoningReplayTarget(
provider: "anthropic",
modelId: directAnthropicProvider.modelId,
baseURL: directAnthropicProvider.baseURL ?? Provider.anthropic.defaultBaseURL,
allowsLegacyUnknown: !LanguageModel.Anthropic.isFable(modelId: directAnthropicProvider.modelId),
)
}
if let compatibleProvider = provider as? AnthropicCompatibleProvider {
return ReasoningReplayTarget(
provider: "anthropic-compatible",
modelId: compatibleProvider.modelId,
baseURL: compatibleProvider.baseURL,
allowsLegacyUnknown: !LanguageModel.Anthropic.isFable(modelId: compatibleProvider.modelId),
)
}
guard
let parsed = ProviderParser.parse(provider.modelId),
let registeredProvider = CustomProviderRegistry.shared.get(parsed.provider),
registeredProvider.kind == .anthropic else
{
return provider.modelId.contains("claude") || provider.modelId.contains("anthropic")
? ReasoningReplayTarget(
provider: "custom-anthropic",
modelId: provider.modelId,
baseURL: provider.baseURL,
allowsLegacyUnknown: !LanguageModel.Anthropic.isFable(modelId: provider.modelId),
)
: nil
}
return ReasoningReplayTarget(
provider: "custom-anthropic",
modelId: parsed.model,
baseURL: registeredProvider.baseURL,
allowsLegacyUnknown: !LanguageModel.Anthropic.isFable(modelId: parsed.model),
)
default:
return nil
}
}
fileprivate func openRouterReasoningReplayTarget(configuration: TachikomaConfiguration) -> ReasoningReplayTarget? {
switch self {
case let .openRouter(modelId):
ReasoningReplayTarget(
provider: "openrouter",
modelId: modelId,
baseURL: configuration.getBaseURL(for: .custom("openrouter")) ?? "https://openrouter.ai/api/v1",
allowsLegacyUnknown: false,
)
default:
nil
}
}
private func anthropicThinkingMetadata(
for reasoning: ProviderReasoningBlock,
configuration: TachikomaConfiguration,
)
-> [String: String]
{
if
let rawJSON = reasoning.rawJSON,
let target = self.openRouterReasoningReplayTarget(configuration: configuration)
{
var metadata = [
"openrouter.reasoning_details": rawJSON,
"tachikoma.reasoning.type": reasoning.type,
"tachikoma.reasoning.provider": target.provider,
"tachikoma.reasoning.model": target.modelId,
]
if let endpointIdentity = target.endpointIdentity {
metadata["tachikoma.reasoning.base_url"] = endpointIdentity
}
return metadata
}
if
reasoning.type == "openrouter_reasoning",
let target = self.openRouterReasoningReplayTarget(configuration: configuration)
{
var metadata = [
"openrouter.reasoning": reasoning.text,
"tachikoma.reasoning.type": reasoning.type,
"tachikoma.reasoning.provider": target.provider,
"tachikoma.reasoning.model": target.modelId,
]
if let endpointIdentity = target.endpointIdentity {
metadata["tachikoma.reasoning.base_url"] = endpointIdentity
}
return metadata
}
guard let target = self.anthropicThinkingReplayTarget(configuration: configuration) else {
var customData = ["tachikoma.reasoning.type": reasoning.type]
if let signature = reasoning.signature, !signature.isEmpty {
customData["tachikoma.reasoning.signature"] = signature
}
return customData
}
var customData = [
"anthropic.thinking.type": reasoning.type,
"anthropic.thinking.model": target.modelId,
"tachikoma.reasoning.provider": target.provider,
"tachikoma.reasoning.model": target.modelId,
]
if let endpointIdentity = target.endpointIdentity {
customData["tachikoma.reasoning.base_url"] = endpointIdentity
}
if let signature = reasoning.signature, !signature.isEmpty {
customData["anthropic.thinking.signature"] = signature
}
return customData
}
}
// MARK: - Convenience Functions
/// Simple text generation from a prompt (convenience wrapper) - with Model enum
@ -746,7 +1347,7 @@ public func analyze(
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public func stream(
_ prompt: String,
using model: LanguageModel = .default,
using model: LanguageModel = .defaultStreaming,
system: String? = nil,
maxTokens: Int? = nil,
temperature: Double? = nil,

View File

@ -259,7 +259,7 @@ public final class ModelCapabilityRegistry: @unchecked Sendable {
),
)
// Opus 4.7+ maps requested thinking to Anthropic's adaptive thinking request shape.
// Fable 5 and Opus 4.7+ map requested thinking to Anthropic's adaptive thinking request shape.
let claudeAdaptiveThinkingCapabilities = ModelParameterCapabilities(
supportsTemperature: false,
supportsTopP: false,
@ -270,6 +270,7 @@ public final class ModelCapabilityRegistry: @unchecked Sendable {
),
excludedParameters: ["temperature", "topP", "topK"],
)
self.capabilities["anthropic:claude-fable-5"] = claudeAdaptiveThinkingCapabilities
self.capabilities["anthropic:claude-opus-4-8"] = claudeAdaptiveThinkingCapabilities
self.capabilities["anthropic:claude-opus-4-7"] = claudeAdaptiveThinkingCapabilities
self.capabilities["anthropic:claude-opus-4-5"] = claude4Capabilities
@ -344,6 +345,19 @@ public final class ModelCapabilityRegistry: @unchecked Sendable {
return registered
}
if self.isAnthropicFableCompatible(model) {
return ModelParameterCapabilities(
supportsTemperature: false,
supportsTopP: false,
supportsTopK: false,
supportedProviderOptions: .init(
supportsThinking: true,
supportsCacheControl: true,
),
excludedParameters: ["temperature", "topP", "topK"],
)
}
// Return provider-based defaults
switch model {
case .openai:
@ -382,6 +396,30 @@ public final class ModelCapabilityRegistry: @unchecked Sendable {
return ModelParameterCapabilities()
}
}
private func isAnthropicFableCompatible(_ model: LanguageModel) -> Bool {
switch model {
case let .anthropic(anthropic):
return LanguageModel.Anthropic.isFable(modelId: anthropic.modelId)
case let .anthropicCompatible(modelId, _):
return LanguageModel.Anthropic.isFable(modelId: modelId)
case let .openRouter(modelId),
let .openaiCompatible(modelId, _),
let .together(modelId):
return LanguageModel.Anthropic.isFable(modelId: modelId)
case let .custom(provider):
guard
let parsed = ProviderParser.parse(provider.modelId),
LanguageModel.Anthropic.isFable(modelId: parsed.model),
CustomProviderRegistry.shared.get(parsed.provider)?.kind == .anthropic else
{
return false
}
return true
default:
return false
}
}
}
// MARK: - GenerationSettings Extension
@ -441,6 +479,7 @@ extension GenerationSettings {
stopConditions: stopConditions,
seed: seed,
providerOptions: adjustedProviderOptions,
streamBuffering: self.streamBuffering,
)
}

View File

@ -36,14 +36,24 @@ struct OpenAICompatibleHelper {
}
// Extract stop sequences from stop conditions
let stopSequences = Self.extractStopSequences(from: request.settings.stopConditions)
let settings = Self.validatedSettings(
request.settings,
providerName: providerName,
modelId: modelId,
baseURL: baseURL,
)
let stopSequences = Self.extractStopSequences(from: settings.stopConditions)
// Convert request to OpenAI-compatible format
let openAIRequest = try OpenAIChatRequest(
model: modelId,
messages: convertMessages(request.messages),
temperature: request.settings.temperature,
maxTokens: request.settings.maxTokens,
messages: convertMessages(
request.messages,
replayOpenRouterReasoningForModel: providerName == "OpenRouter" ? modelId : nil,
replayOpenRouterReasoningForBaseURL: providerName == "OpenRouter" ? baseURL : nil,
),
temperature: settings.temperature,
maxTokens: settings.maxTokens,
tools: request.tools?.compactMap { try self.convertTool($0) },
stream: false,
stop: stopSequences.isEmpty ? nil : stopSequences,
@ -100,14 +110,9 @@ struct OpenAICompatibleHelper {
let usage = openAIResponse.usage.map {
Usage(inputTokens: $0.promptTokens ?? 0, outputTokens: $0.completionTokens ?? 0)
}
let reasoning = Self.reasoningBlocks(from: choice.message)
let finishReason: FinishReason? = switch choice.finishReason {
case "stop": .stop
case "length": .length
case "tool_calls": .toolCalls
case "content_filter": .contentFilter
default: .other
}
let finishReason = Self.mapFinishReason(choice.finishReason)
// Convert tool calls if present
let toolCalls = choice.message.toolCalls?.compactMap { openAIToolCall -> AgentToolCall? in
@ -142,6 +147,7 @@ struct OpenAICompatibleHelper {
usage: usage,
finishReason: finishReason,
toolCalls: toolCalls,
reasoning: reasoning,
)
}
@ -173,14 +179,27 @@ struct OpenAICompatibleHelper {
}
// Extract stop sequences from stop conditions
let stopSequences = Self.extractStopSequences(from: request.settings.stopConditions)
guard !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId) else {
throw TachikomaError.invalidConfiguration("\(modelId) does not support streaming")
}
let settings = Self.validatedSettings(
request.settings,
providerName: providerName,
modelId: modelId,
baseURL: baseURL,
)
let stopSequences = Self.extractStopSequences(from: settings.stopConditions)
// Convert request to OpenAI-compatible format
let openAIRequest = try OpenAIChatRequest(
model: modelId,
messages: convertMessages(request.messages),
temperature: request.settings.temperature,
maxTokens: request.settings.maxTokens,
messages: convertMessages(
request.messages,
replayOpenRouterReasoningForModel: providerName == "OpenRouter" ? modelId : nil,
replayOpenRouterReasoningForBaseURL: providerName == "OpenRouter" ? baseURL : nil,
),
temperature: settings.temperature,
maxTokens: settings.maxTokens,
tools: request.tools?.compactMap { try self.convertTool($0) },
stream: true,
stop: stopSequences.isEmpty ? nil : stopSequences,
@ -346,8 +365,10 @@ struct OpenAICompatibleHelper {
}
}
if choice.finishReason != nil {
continuation.yield(TextStreamDelta.done())
if let finishReason = choice.finishReason {
continuation.yield(TextStreamDelta.done(
finishReason: Self.mapFinishReason(finishReason),
))
break
}
}
@ -427,9 +448,10 @@ struct OpenAICompatibleHelper {
}
if let finishReason = choice.finishReason {
if finishReason == "stop" || finishReason == "tool_calls" {
continuation.yield(TextStreamDelta.done())
}
continuation.yield(TextStreamDelta.done(
finishReason: Self.mapFinishReason(finishReason),
))
break
}
}
} catch {
@ -456,6 +478,39 @@ struct OpenAICompatibleHelper {
// MARK: - Helper Methods
private static func mapFinishReason(_ reason: String?) -> FinishReason? {
switch reason {
case "stop": .stop
case "length": .length
case "tool_calls": .toolCalls
case "content_filter": .contentFilter
case nil: nil
default: .other
}
}
private static func validatedSettings(
_ settings: GenerationSettings,
providerName: String,
modelId: String,
baseURL: String,
)
-> GenerationSettings
{
settings.validated(for: self.languageModel(providerName: providerName, modelId: modelId, baseURL: baseURL))
}
private static func languageModel(providerName: String, modelId: String, baseURL: String) -> LanguageModel {
switch providerName.lowercased() {
case "openrouter":
.openRouter(modelId: modelId)
case "together":
.together(modelId: modelId)
default:
.openaiCompatible(modelId: modelId, baseURL: baseURL)
}
}
/// Extract native stop sequences from stop conditions
private static func extractStopSequences(from stopCondition: (any StopCondition)?) -> [String] {
// Extract native stop sequences from stop conditions
@ -512,18 +567,55 @@ struct OpenAICompatibleHelper {
}
}
private static func convertMessages(_ messages: [ModelMessage]) throws -> [OpenAIChatMessage] {
messages.map { message in
private static func convertMessages(
_ messages: [ModelMessage],
replayOpenRouterReasoningForModel modelId: String?,
replayOpenRouterReasoningForBaseURL baseURL: String?,
) throws
-> [OpenAIChatMessage]
{
var converted: [OpenAIChatMessage] = []
var pendingReasoningDetails: [JSONValue] = []
var pendingReasoningText: [String] = []
let endpointIdentity = ReasoningEndpointIdentity.canonical(baseURL)
for message in messages {
if
message.channel == .thinking,
let customData = message.metadata?.customData,
customData["tachikoma.reasoning.provider"] == "openrouter",
customData["tachikoma.reasoning.model"] == modelId,
customData["tachikoma.reasoning.base_url"] == endpointIdentity,
let rawReasoningDetails = customData["openrouter.reasoning_details"]
{
pendingReasoningDetails.append(contentsOf: Self.decodeReasoningDetails(rawReasoningDetails))
continue
}
if
message.channel == .thinking,
let customData = message.metadata?.customData,
customData["tachikoma.reasoning.provider"] == "openrouter",
customData["tachikoma.reasoning.model"] == modelId,
customData["tachikoma.reasoning.base_url"] == endpointIdentity,
let reasoning = customData["openrouter.reasoning"]
{
pendingReasoningText.append(reasoning)
continue
}
if message.channel == .thinking {
continue
}
switch message.role {
case .system:
return OpenAIChatMessage(role: "system", content: message.content.compactMap { part in
converted.append(OpenAIChatMessage(role: "system", content: message.content.compactMap { part in
if case let .text(text) = part { return text }
return nil
}.joined())
}.joined()))
case .user:
if message.content.count == 1, case let .text(text) = message.content.first! {
// Simple text message
return OpenAIChatMessage(role: "user", content: text)
converted.append(OpenAIChatMessage(role: "user", content: text))
} else {
// Multi-modal message
let content = message.content.compactMap { contentPart -> OpenAIChatMessageContent? in
@ -540,7 +632,7 @@ struct OpenAICompatibleHelper {
return nil // Skip tool calls and results in user messages
}
}
return OpenAIChatMessage(role: "user", content: content)
converted.append(OpenAIChatMessage(role: "user", content: content))
}
case .assistant:
// Check if this assistant message contains tool calls
@ -571,15 +663,25 @@ struct OpenAICompatibleHelper {
// If we have tool calls, create a message with tool calls
if !toolCalls.isEmpty {
return OpenAIChatMessage(
converted.append(OpenAIChatMessage(
role: "assistant",
content: textContent.isEmpty ? nil : textContent,
toolCalls: toolCalls,
)
reasoning: pendingReasoningText.isEmpty ? nil : pendingReasoningText.joined(separator: "\n"),
reasoningDetails: pendingReasoningDetails.isEmpty ? nil : pendingReasoningDetails,
))
} else {
// Regular text message
return OpenAIChatMessage(role: "assistant", content: textContent)
converted.append(OpenAIChatMessage(
role: "assistant",
content: textContent,
toolCalls: nil,
reasoning: pendingReasoningText.isEmpty ? nil : pendingReasoningText.joined(separator: "\n"),
reasoningDetails: pendingReasoningDetails.isEmpty ? nil : pendingReasoningDetails,
))
}
pendingReasoningText.removeAll()
pendingReasoningDetails.removeAll()
case .tool:
// Extract tool call ID and result content from tool result
var toolCallId: String?
@ -598,9 +700,44 @@ struct OpenAICompatibleHelper {
}
}
return OpenAIChatMessage(role: "tool", content: resultContent, toolCallId: toolCallId)
converted.append(OpenAIChatMessage(role: "tool", content: resultContent, toolCallId: toolCallId))
}
}
return converted
}
private static func reasoningBlocks(from message: OpenAIChatResponse.Message) -> [ProviderReasoningBlock] {
var blocks: [ProviderReasoningBlock] = []
if let details = message.reasoningDetails, !details.isEmpty {
blocks.append(ProviderReasoningBlock(
text: message.reasoning ?? "",
type: "openrouter_reasoning_details",
rawJSON: Self.encodeReasoningDetails(details),
))
} else if let reasoning = message.reasoning, !reasoning.isEmpty {
blocks.append(ProviderReasoningBlock(
text: reasoning,
type: "openrouter_reasoning",
rawJSON: nil,
))
}
return blocks
}
private static func encodeReasoningDetails(_ details: [JSONValue]) -> String? {
guard let data = try? JSONEncoder().encode(details) else { return nil }
return String(data: data, encoding: .utf8)
}
private static func decodeReasoningDetails(_ rawJSON: String) -> [JSONValue] {
guard
let data = rawJSON.data(using: .utf8),
let details = try? JSONDecoder().decode([JSONValue].self, from: data) else
{
return []
}
return details
}
private static func convertTool(_ tool: AgentTool) throws -> OpenAITool {

View File

@ -13,6 +13,17 @@ public protocol StopCondition: Sendable {
func reset() async
}
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
protocol StableCacheKeyStopCondition {
var stableCacheKey: String? { get }
}
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
private func compositeStableCacheKey(kind: String, children: [String]) -> String {
let encodedChildren = children.map { "\($0.utf8.count):\($0)" }.joined()
return "\(kind):[\(encodedChildren)]"
}
// MARK: - Built-in Stop Conditions
/// Stop when a specific string is encountered
@ -39,6 +50,13 @@ public struct StringStopCondition: StopCondition {
public func reset() async {}
}
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
extension StringStopCondition: StableCacheKeyStopCondition {
var stableCacheKey: String? {
"string:\(self.caseSensitive):\(self.stopString)"
}
}
/// Stop when a regex pattern is matched
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public struct RegexStopCondition: StopCondition {
@ -82,6 +100,13 @@ public struct RegexStopCondition: StopCondition {
}
}
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
extension RegexStopCondition: StableCacheKeyStopCondition {
var stableCacheKey: String? {
"regex:\(self.pattern)"
}
}
/// Stop after a certain number of tokens
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public actor TokenCountStopCondition: StopCondition {
@ -182,6 +207,15 @@ public struct AnyStopCondition: StopCondition {
}
}
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
extension AnyStopCondition: StableCacheKeyStopCondition {
var stableCacheKey: String? {
let keys = self.conditions.compactMap { ($0 as? StableCacheKeyStopCondition)?.stableCacheKey }
guard keys.count == self.conditions.count else { return nil }
return compositeStableCacheKey(kind: "any", children: keys)
}
}
/// Stop when all conditions are met
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public struct AllStopCondition: StopCondition {
@ -211,6 +245,15 @@ public struct AllStopCondition: StopCondition {
}
}
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
extension AllStopCondition: StableCacheKeyStopCondition {
var stableCacheKey: String? {
let keys = self.conditions.compactMap { ($0 as? StableCacheKeyStopCondition)?.stableCacheKey }
guard keys.count == self.conditions.count else { return nil }
return compositeStableCacheKey(kind: "all", children: keys)
}
}
// MARK: - Stateful Stop Conditions
/// Stop when a pattern appears consecutively N times
@ -386,6 +429,13 @@ public struct NeverStopCondition: StopCondition {
public func reset() async {}
}
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
extension NeverStopCondition: StableCacheKeyStopCondition {
var stableCacheKey: String? {
"never"
}
}
// MARK: - Integration with Generation Functions
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
@ -431,9 +481,8 @@ extension AsyncThrowingStream where Element == TextStreamDelta {
// Check stop condition
if await condition.shouldStop(text: accumulatedText, delta: content) {
// Yield the current delta then stop
continuation.yield(delta)
continuation.yield(TextStreamDelta.done())
continuation.yield(TextStreamDelta.done(finishReason: .stop))
continuation.finish()
return
}

View File

@ -326,6 +326,11 @@ public enum ImageInput: Sendable {
/// Settings for text generation
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public struct GenerationSettings: Sendable {
public enum StreamBufferingMode: String, Sendable, Codable {
case incremental
case untilTerminal
}
public let maxTokens: Int?
public let temperature: Double?
public let topP: Double?
@ -337,6 +342,7 @@ public struct GenerationSettings: Sendable {
public let stopConditions: (any StopCondition)?
public let seed: Int?
public let providerOptions: ProviderOptions
public let streamBuffering: StreamBufferingMode
public init(
maxTokens: Int? = nil,
@ -350,6 +356,7 @@ public struct GenerationSettings: Sendable {
stopConditions: (any StopCondition)? = nil,
seed: Int? = nil,
providerOptions: ProviderOptions = .init(),
streamBuffering: StreamBufferingMode = .incremental,
) {
self.maxTokens = maxTokens
self.temperature = temperature
@ -362,9 +369,27 @@ public struct GenerationSettings: Sendable {
self.stopConditions = stopConditions
self.seed = seed
self.providerOptions = providerOptions
self.streamBuffering = streamBuffering
}
public static let `default` = GenerationSettings()
public func withStreamBuffering(_ mode: StreamBufferingMode) -> GenerationSettings {
GenerationSettings(
maxTokens: self.maxTokens,
temperature: self.temperature,
topP: self.topP,
topK: self.topK,
frequencyPenalty: self.frequencyPenalty,
presencePenalty: self.presencePenalty,
stopSequences: self.stopSequences,
reasoningEffort: self.reasoningEffort,
stopConditions: self.stopConditions,
seed: self.seed,
providerOptions: self.providerOptions,
streamBuffering: mode,
)
}
}
/// Manual Codable conformance excluding non-codable stopConditions
@ -380,6 +405,7 @@ extension GenerationSettings: Codable {
case reasoningEffort
case seed
case providerOptions
case streamBuffering
}
public init(from decoder: Decoder) throws {
@ -394,6 +420,8 @@ extension GenerationSettings: Codable {
self.reasoningEffort = try container.decodeIfPresent(ReasoningEffort.self, forKey: .reasoningEffort)
self.seed = try container.decodeIfPresent(Int.self, forKey: .seed)
self.providerOptions = try container.decodeIfPresent(ProviderOptions.self, forKey: .providerOptions) ?? .init()
self.streamBuffering = try container
.decodeIfPresent(StreamBufferingMode.self, forKey: .streamBuffering) ?? .incremental
self.stopConditions = nil // Can't decode function types
}
@ -409,6 +437,7 @@ extension GenerationSettings: Codable {
try container.encodeIfPresent(self.reasoningEffort, forKey: .reasoningEffort)
try container.encodeIfPresent(self.seed, forKey: .seed)
try container.encode(self.providerOptions, forKey: .providerOptions)
try container.encode(self.streamBuffering, forKey: .streamBuffering)
// Don't encode stopConditions since it can't be serialized
}
}

View File

@ -132,7 +132,9 @@ extension [ModelMessage] {
/// Convert model messages to UI messages for display
public func toUIMessages() -> [UIMessage] {
// Convert model messages to UI messages for display
map { modelMessage in
compactMap { modelMessage in
guard !modelMessage.isProviderNativeReasoningBlock else { return nil }
guard !modelMessage.isSyntheticReasoningBoundary else { return nil }
var content = ""
var attachments: [UIAttachment] = []
var toolCalls: [AgentToolCall] = []
@ -179,6 +181,20 @@ extension [ModelMessage] {
}
}
extension ModelMessage {
fileprivate var isProviderNativeReasoningBlock: Bool {
guard channel == .thinking, let customData = metadata?.customData else { return false }
return customData["anthropic.thinking.model"] != nil ||
customData["anthropic.thinking.type"] != nil ||
customData["anthropic.thinking.signature"] != nil ||
customData["tachikoma.reasoning.provider"] != nil
}
fileprivate var isSyntheticReasoningBoundary: Bool {
metadata?.customData?["tachikoma.internal.boundary"] == "reasoning_only"
}
}
// MARK: - Streaming Extensions
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)

View File

@ -1,7 +1,5 @@
import Foundation
// swiftlint:disable file_length
// MARK: - Modern Language Model System
/// Language model selection following AI SDK patterns
@ -156,7 +154,8 @@ public enum LanguageModel: Sendable, CustomStringConvertible, Hashable {
}
public enum Anthropic: Sendable, Hashable, CaseIterable {
// Claude 4.x / 4.5+ Series
// Claude 5 / 4.x Series
case fable5
case opus48
case opus47
case opus45
@ -170,6 +169,7 @@ public enum LanguageModel: Sendable, CustomStringConvertible, Hashable {
public static var allCases: [Anthropic] {
[
.fable5,
.opus48,
.opus47,
.opus45,
@ -183,6 +183,7 @@ public enum LanguageModel: Sendable, CustomStringConvertible, Hashable {
public var modelId: String {
switch self {
case let .custom(id): id
case .fable5: "claude-fable-5"
case .opus48: "claude-opus-4-8"
case .opus47: "claude-opus-4-7"
case .opus45: "claude-opus-4-5"
@ -195,7 +196,7 @@ public enum LanguageModel: Sendable, CustomStringConvertible, Hashable {
public var supportsVision: Bool {
switch self {
case .opus48, .opus47, .opus45, .opus4, .sonnet46, .sonnet45, .haiku45:
case .fable5, .opus48, .opus47, .opus45, .opus4, .sonnet46, .sonnet45, .haiku45:
true
case .custom: true // Assume custom models support vision
}
@ -217,12 +218,82 @@ public enum LanguageModel: Sendable, CustomStringConvertible, Hashable {
public var contextLength: Int {
switch self {
case .opus48, .opus47, .sonnet46: 1_000_000
case .fable5, .opus48, .opus47, .sonnet46: 1_000_000
case .haiku45: 200_000
case .opus45, .opus4, .sonnet45: 500_000
case .custom: 200_000 // Default assumption
case let .custom(id):
Self.isFable(modelId: id) ? 1_000_000 : 200_000 // Default assumption
}
}
public var maxOutputTokens: Int {
switch self {
case .fable5, .opus48, .opus47: 128_000
case .sonnet46, .haiku45: 64000
case let .custom(id):
Self.isFable(modelId: id) ? 128_000 : 8192
case .opus45, .opus4, .sonnet45: 4096
}
}
public var supportsStreaming: Bool {
!Self.hasStreamingRefusalRisk(modelId: self.modelId)
}
public static func isFable(modelId: String) -> Bool {
let normalized = modelId.lowercased()
let pathSegments = normalized
.components(separatedBy: CharacterSet(charactersIn: "/:@"))
.filter { !$0.isEmpty }
let dotSegments = pathSegments.flatMap { $0.components(separatedBy: ".") }
.filter { !$0.isEmpty }
let segments = pathSegments + dotSegments
let canonicalSegments: Set = [
"claude-fable-5",
"fable-5",
"fable5",
"fable",
]
return normalized == Self.fable5.modelId || segments.contains { segment in
if canonicalSegments.contains(segment) { return true }
let compactSegment = segment
.replacingOccurrences(of: "-", with: "")
.replacingOccurrences(of: "_", with: "")
.replacingOccurrences(of: ".", with: "")
return compactSegment == "claudefable5" || compactSegment == "fable5"
}
}
public static func isOpus48(modelId: String) -> Bool {
let normalized = modelId.lowercased()
let compactExact = normalized
.replacingOccurrences(of: "-", with: "")
.replacingOccurrences(of: "_", with: "")
.replacingOccurrences(of: ".", with: "")
let pathSegments = normalized
.components(separatedBy: CharacterSet(charactersIn: "/:@"))
.filter { !$0.isEmpty }
let dotSegments = pathSegments.flatMap { $0.components(separatedBy: ".") }
.filter { !$0.isEmpty }
let segments = pathSegments + dotSegments
let canonicalSegments: Set = [
"claude-opus-4-8",
"opus-4-8",
"opus48",
]
return normalized == Self.opus48.modelId || segments.contains { segment in
if canonicalSegments.contains(segment) { return true }
let compactSegment = segment
.replacingOccurrences(of: "-", with: "")
.replacingOccurrences(of: "_", with: "")
.replacingOccurrences(of: ".", with: "")
return compactSegment == "claudeopus48" || compactSegment == "opus48"
} || compactExact == "claudeopus48" || compactExact == "opus48"
}
public static func hasStreamingRefusalRisk(modelId: String) -> Bool {
self.isFable(modelId: modelId) || self.isOpus48(modelId: modelId)
}
}
public enum Google: String, Sendable, Hashable, CaseIterable {
@ -783,8 +854,40 @@ public enum LanguageModel: Sendable, CustomStringConvertible, Hashable {
}
public var supportsStreaming: Bool {
// All models support streaming by default
true
if case let .anthropic(model) = self {
return model.supportsStreaming
}
if case let .anthropicCompatible(modelId, _) = self {
return !Anthropic.hasStreamingRefusalRisk(modelId: modelId)
}
if case let .openRouter(modelId) = self, modelId.lowercased().hasPrefix("anthropic/") {
return !Anthropic.hasStreamingRefusalRisk(modelId: modelId)
}
if case let .together(modelId) = self, modelId.lowercased().hasPrefix("anthropic/") {
return !Anthropic.hasStreamingRefusalRisk(modelId: modelId)
}
if case let .openaiCompatible(modelId, _) = self {
let normalized = modelId.lowercased()
guard
normalized.contains("claude") ||
normalized.hasPrefix("anthropic/") ||
normalized.hasPrefix("anthropic.") else
{
return true
}
return !Anthropic.hasStreamingRefusalRisk(modelId: modelId)
}
if
case let .custom(provider) = self,
let parsed = ProviderParser.parse(provider.modelId),
CustomProviderRegistry.shared.get(parsed.provider)?.kind == .anthropic
{
return !Anthropic.hasStreamingRefusalRisk(modelId: parsed.model)
}
if case let .custom(provider) = self {
return provider.capabilities.supportsStreaming
}
return true
}
public var providerName: String {
@ -829,10 +932,11 @@ public enum LanguageModel: Sendable, CustomStringConvertible, Hashable {
// MARK: - Default Model
public static let `default`: LanguageModel = .anthropic(.opus48)
public static let defaultStreaming: LanguageModel = .openai(.gpt55)
// MARK: - Convenience Static Properties
/// Default Claude model (opus48)
/// Default Claude model (Opus 4.8)
public static let claude: LanguageModel = .anthropic(.opus48)
/// Default Grok model (Grok 4.3)
@ -967,10 +1071,14 @@ extension LanguageModel {
model.contextLength
case .azureOpenAI:
128_000 // conservative default matching OpenAI tier
case .openRouter, .together, .replicate:
128_000 // Common default
case .openaiCompatible, .anthropicCompatible:
case let .openRouter(modelId), let .together(modelId):
Anthropic.isFable(modelId: modelId) ? 1_000_000 : 128_000
case .replicate:
128_000 // Common default
case let .openaiCompatible(modelId, _):
Anthropic.isFable(modelId: modelId) ? 1_000_000 : 128_000
case let .anthropicCompatible(modelId, _):
Anthropic.isFable(modelId: modelId) ? 1_000_000 : 128_000
case let .custom(provider):
provider.capabilities.contextLength
}
@ -1224,6 +1332,13 @@ extension LanguageModel {
// MARK: Anthropic models
func matchesExactAlias(_ aliases: Set<String>, compactAliases: Set<String> = []) -> Bool {
aliases.contains(normalized) ||
aliases.contains(dashed) ||
aliases.contains(dotted) ||
compactAliases.contains(compact)
}
if dotted.contains("claude-3") || compact.contains("claude3") {
return nil
}
@ -1237,12 +1352,31 @@ extension LanguageModel {
}
if
dotted.contains("claude-opus-4-8") ||
dotted.contains("claude-opus-4.8") ||
compact.contains("claudeopus48") ||
dotted.contains("opus-4-8") ||
dotted.contains("opus-4.8") ||
compact.contains("opus48")
matchesExactAlias(
[
"claude-fable-5",
"fable-5",
"fable.5",
"fable5",
"fable",
],
compactAliases: ["claudefable5", "fable5"],
)
{
return .anthropic(.fable5)
}
if
matchesExactAlias(
[
"claude-opus-4-8",
"claude-opus-4.8",
"opus-4-8",
"opus-4.8",
"opus48",
],
compactAliases: ["claudeopus48", "opus48"],
)
{
return .anthropic(.opus48)
}
@ -1563,8 +1697,8 @@ extension LanguageModel {
}
private static func looksAnthropic(_ normalized: String) -> Bool {
normalized.contains("claude") || normalized.contains("opus") || normalized.contains("sonnet") ||
normalized.contains("haiku") || normalized == "anthropic"
normalized.contains("claude") || normalized.contains("fable") || normalized.contains("opus") ||
normalized.contains("sonnet") || normalized.contains("haiku") || normalized == "anthropic"
}
private static func looksGoogle(_ normalized: String) -> Bool {
@ -1660,5 +1794,3 @@ extension LanguageModel {
}
}
}
// swiftlint:enable file_length

View File

@ -84,6 +84,27 @@ public struct ProviderResponse: Sendable {
public let usage: Usage?
public let finishReason: FinishReason?
public let toolCalls: [AgentToolCall]?
public let reasoning: [ProviderReasoningBlock]
public let assistantMessages: [ModelMessage]
public let isBillable: Bool
public init(
text: String,
usage: Usage? = nil,
finishReason: FinishReason? = nil,
toolCalls: [AgentToolCall]? = nil,
reasoning: [ProviderReasoningBlock] = [],
assistantMessages: [ModelMessage] = [],
isBillable: Bool = true,
) {
self.text = text
self.usage = usage
self.finishReason = finishReason
self.toolCalls = toolCalls
self.reasoning = reasoning
self.assistantMessages = assistantMessages
self.isBillable = isBillable
}
public init(
text: String,
@ -91,9 +112,30 @@ public struct ProviderResponse: Sendable {
finishReason: FinishReason? = nil,
toolCalls: [AgentToolCall]? = nil,
) {
self.text = text
self.usage = usage
self.finishReason = finishReason
self.toolCalls = toolCalls
self.init(
text: text,
usage: usage,
finishReason: finishReason,
toolCalls: toolCalls,
reasoning: [],
assistantMessages: [],
isBillable: true,
)
}
}
/// Provider-native signed reasoning block that must be replayed in later requests.
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public struct ProviderReasoningBlock: Sendable, Equatable {
public let text: String
public let signature: String?
public let type: String
public let rawJSON: String?
public init(text: String, signature: String? = nil, type: String = "thinking", rawJSON: String? = nil) {
self.text = text
self.signature = signature
self.type = type
self.rawJSON = rawJSON
}
}

View File

@ -162,6 +162,8 @@ public struct ModelSelector {
private static func parseAnthropicModel(_ input: String) -> Model.Anthropic? {
switch input {
// Direct matches
case "claude-fable-5", "claude-fable-5-latest", "fable-5", "fable.5", "fable5", "fable":
return .fable5
case "claude-opus-4-8", "claude-opus-4.8", "opus-4-8", "opus-4.8", "opus48",
"claude-opus-4-8-latest":
return .opus48
@ -183,7 +185,7 @@ public struct ModelSelector {
case "claude-haiku", "haiku":
return .haiku45
case "anthropic":
return .opus48 // Default Anthropic model
return .opus48
default:
// Check if it's a Claude model ID
if self.isUnsupportedLegacyAnthropicModel(input) {
@ -518,6 +520,7 @@ public func getAllAvailableModels() -> String {
output += "\nShortcuts:\n"
output += " • claude, claude-opus, opus → claude-opus-4-8\n"
output += " • fable → claude-fable-5\n"
output += " • gpt → gpt-5.5\n"
output += " • gemini → gemini-3.5-flash\n"
output += " • minimax → MiniMax-M2.7\n"
@ -526,7 +529,7 @@ public func getAllAvailableModels() -> String {
output += " • llama, llama3 → llama3.3\n"
output += "\nCustom Models:\n"
output += " • OpenRouter: anthropic/claude-opus-4-8\n"
output += " • OpenRouter: anthropic/claude-fable-5\n"
output += " • Custom OpenAI: custom-gpt-model\n"
output += " • Local Ollama: any-model:tag\n"

View File

@ -139,13 +139,11 @@ enum AnthropicContent: Codable {
struct RedactedThinkingContent: Codable {
let type: String
let redactedThinking: String
let signature: String
let data: String
enum CodingKeys: String, CodingKey {
case type
case redactedThinking = "redacted_thinking"
case signature
case data
}
}
@ -402,12 +400,14 @@ struct AnthropicMessageResponse: Codable {
let model: String
let stopReason: String?
let stopSequence: String?
let stopDetails: StopDetails?
let usage: AnthropicUsage
enum CodingKeys: String, CodingKey {
case id, type, role, content, model, usage
case stopReason = "stop_reason"
case stopSequence = "stop_sequence"
case stopDetails = "stop_details"
}
init(from decoder: Decoder) throws {
@ -419,12 +419,20 @@ struct AnthropicMessageResponse: Codable {
self.model = try container.decode(String.self, forKey: .model)
self.stopReason = try container.decodeIfPresent(String.self, forKey: .stopReason)
self.stopSequence = try container.decodeIfPresent(String.self, forKey: .stopSequence)
self.stopDetails = try container.decodeIfPresent(StopDetails.self, forKey: .stopDetails)
self.usage = try container.decode(AnthropicUsage.self, forKey: .usage)
}
struct StopDetails: Codable {
let category: String?
let explanation: String?
}
}
enum AnthropicResponseContent: Codable {
case text(TextContent)
case thinking(ThinkingContent)
case redactedThinking(RedactedThinkingContent)
case toolUse(ToolUseContent)
struct TextContent: Codable {
@ -461,6 +469,22 @@ enum AnthropicResponseContent: Codable {
}
}
struct ThinkingContent: Codable {
let type: String
let thinking: String
let signature: String
}
struct RedactedThinkingContent: Codable {
let type: String
let data: String
enum CodingKeys: String, CodingKey {
case type
case data
}
}
struct ToolUseContent: Codable {
let type: String
let id: String
@ -540,6 +564,10 @@ enum AnthropicResponseContent: Codable {
switch type {
case "text":
self = try .text(TextContent(from: decoder))
case "thinking":
self = try .thinking(ThinkingContent(from: decoder))
case "redacted_thinking":
self = try .redactedThinking(RedactedThinkingContent(from: decoder))
case "tool_use":
self = try .toolUse(ToolUseContent(from: decoder))
default:
@ -558,6 +586,10 @@ enum AnthropicResponseContent: Codable {
switch self {
case let .text(content):
try content.encode(to: encoder)
case let .thinking(content):
try content.encode(to: encoder)
case let .redactedThinking(content):
try content.encode(to: encoder)
case let .toolUse(content):
try content.encode(to: encoder)
}
@ -567,6 +599,8 @@ enum AnthropicResponseContent: Codable {
case type
case text
case thinking
case redactedThinking = "redacted_thinking"
case signature
}
}
@ -578,6 +612,17 @@ struct AnthropicUsage: Codable {
case inputTokens = "input_tokens"
case outputTokens = "output_tokens"
}
init(inputTokens: Int, outputTokens: Int) {
self.inputTokens = inputTokens
self.outputTokens = outputTokens
}
init(from decoder: Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self)
self.inputTokens = try container.decodeIfPresent(Int.self, forKey: .inputTokens) ?? 0
self.outputTokens = try container.decodeIfPresent(Int.self, forKey: .outputTokens) ?? 0
}
}
// MARK: - Streaming Types
@ -612,12 +657,11 @@ struct AnthropicStreamContentBlock: Codable {
let text: String?
let input: Any?
let thinking: String?
let redactedThinking: String?
let data: String?
let signature: String?
enum CodingKeys: String, CodingKey {
case type, id, name, text, input, thinking, signature
case redactedThinking = "redacted_thinking"
case type, id, name, text, input, thinking, data, signature
}
init(from decoder: Decoder) throws {
@ -627,7 +671,7 @@ struct AnthropicStreamContentBlock: Codable {
self.name = try? container.decode(String.self, forKey: .name)
self.text = try? container.decode(String.self, forKey: .text)
self.thinking = try? container.decode(String.self, forKey: .thinking)
self.redactedThinking = try? container.decode(String.self, forKey: .redactedThinking)
self.data = try? container.decode(String.self, forKey: .data)
self.signature = try? container.decode(String.self, forKey: .signature)
// Decode input as generic JSON if present
@ -653,7 +697,7 @@ struct AnthropicStreamContentBlock: Codable {
try container.encodeIfPresent(self.name, forKey: .name)
try container.encodeIfPresent(self.text, forKey: .text)
try container.encodeIfPresent(self.thinking, forKey: .thinking)
try container.encodeIfPresent(self.redactedThinking, forKey: .redactedThinking)
try container.encodeIfPresent(self.data, forKey: .data)
try container.encodeIfPresent(self.signature, forKey: .signature)
if let input {
let data = try JSONSerialization.data(withJSONObject: input)
@ -677,6 +721,17 @@ struct AnthropicStreamDelta: Codable {
case stopReason = "stop_reason"
case stopSequence = "stop_sequence"
}
init(from decoder: Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self)
self.type = try container.decodeIfPresent(String.self, forKey: .type) ?? ""
self.text = try container.decodeIfPresent(String.self, forKey: .text)
self.thinking = try container.decodeIfPresent(String.self, forKey: .thinking)
self.signature = try container.decodeIfPresent(String.self, forKey: .signature)
self.partialJson = try container.decodeIfPresent(String.self, forKey: .partialJson)
self.stopReason = try container.decodeIfPresent(String.self, forKey: .stopReason)
self.stopSequence = try container.decodeIfPresent(String.self, forKey: .stopSequence)
}
}
struct AnthropicErrorResponse: Codable {

View File

@ -10,6 +10,8 @@ public final class AnthropicCompatibleProvider: ModelProvider {
public let capabilities: ModelCapabilities
private let configuration: TachikomaConfiguration
private let auth: TKAuthValue?
private let reasoningProvider: String
private let reasoningBaseURL: String?
public init(
modelId: String,
@ -19,11 +21,16 @@ public final class AnthropicCompatibleProvider: ModelProvider {
additionalHeaders: [String: String] = [:],
auth: TKAuthValue? = nil,
capabilities: ModelCapabilities? = nil,
reasoningProvider: String = "anthropic-compatible",
reasoningBaseURL: String? = nil,
includeReasoningBaseURL: Bool = true,
) throws {
self.modelId = modelId
self.baseURL = baseURL
self.configuration = configuration
self.additionalHeaders = additionalHeaders
self.reasoningProvider = reasoningProvider
self.reasoningBaseURL = includeReasoningBaseURL ? (reasoningBaseURL ?? baseURL) : nil
// Try explicit provider key, then configuration, then common environment variable patterns.
if let key = apiKey {
@ -51,12 +58,24 @@ public final class AnthropicCompatibleProvider: ModelProvider {
self.auth = nil
}
self.capabilities = capabilities ?? ModelCapabilities(
let isFable = LanguageModel.Anthropic.isFable(modelId: modelId)
let supportsSafeStreaming = !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId)
let baseCapabilities = capabilities ?? ModelCapabilities(
supportsVision: true,
supportsTools: true,
supportsStreaming: true,
contextLength: 200_000,
maxOutputTokens: 8192,
supportsStreaming: supportsSafeStreaming,
contextLength: isFable ? 1_000_000 : 200_000,
maxOutputTokens: isFable ? 128_000 : 8192,
)
self.capabilities = supportsSafeStreaming ? baseCapabilities : ModelCapabilities(
supportsVision: baseCapabilities.supportsVision,
supportsTools: baseCapabilities.supportsTools,
supportsStreaming: false,
supportsAudioInput: baseCapabilities.supportsAudioInput,
supportsAudioOutput: baseCapabilities.supportsAudioOutput,
contextLength: baseCapabilities.contextLength,
maxOutputTokens: baseCapabilities.maxOutputTokens,
costPerToken: baseCapabilities.costPerToken,
)
}
@ -89,6 +108,9 @@ public final class AnthropicCompatibleProvider: ModelProvider {
configuration: compatConfig,
additionalHeaders: self.additionalHeaders,
authOverride: self.auth,
reasoningProvider: self.reasoningProvider,
reasoningModelId: self.modelId,
reasoningBaseURL: self.reasoningBaseURL,
)
}
}

View File

@ -40,12 +40,13 @@ public final class OpenAICompatibleProvider: ModelProvider {
self.apiKey = nil // Some compatible APIs don't require keys
}
let isFable = LanguageModel.Anthropic.isFable(modelId: modelId)
self.capabilities = ModelCapabilities(
supportsVision: false,
supportsTools: true,
supportsStreaming: true,
contextLength: 128_000,
maxOutputTokens: 4096,
supportsStreaming: !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId),
contextLength: isFable ? 1_000_000 : 128_000,
maxOutputTokens: isFable ? 128_000 : 4096,
)
}
@ -63,8 +64,12 @@ public final class OpenAICompatibleProvider: ModelProvider {
}
public func streamText(request: ProviderRequest) async throws -> AsyncThrowingStream<TextStreamDelta, Error> {
guard !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: self.modelId) else {
throw TachikomaError.invalidConfiguration("\(self.modelId) does not support streaming")
}
// Use OpenAI-compatible streaming implementation
try await OpenAICompatibleHelper.streamText(
return try await OpenAICompatibleHelper.streamText(
request: request,
modelId: self.modelId,
baseURL: self.baseURL!,

View File

@ -213,6 +213,21 @@ public final class OpenAIResponsesProvider: ModelProvider {
// Parse the entire response for Linux
let responseText = String(data: data, encoding: .utf8) ?? ""
let lines = responseText.components(separatedBy: "\n")
var streamState = ResponsesStreamState()
for line in lines {
if
try Self.processResponsesStreamLine(
line,
model: self.model,
state: &streamState,
continuation: continuation,
)
{
return
}
}
continuation.finish()
return
#else
// macOS/iOS: Use streaming API
let (bytes, response) = try await self.session.bytes(for: finalURLRequest)
@ -242,157 +257,18 @@ public final class OpenAIResponsesProvider: ModelProvider {
throw TachikomaError.apiError("Failed to start streaming: \(errorMessage)")
}
var previousContent = "" // Track previously sent content for GPT-5 preambles
struct PartialToolCall {
var id: String
var name: String?
var arguments: String
}
var pendingToolCalls: [String: PartialToolCall] = [:]
var streamState = ResponsesStreamState()
for try await line in bytes.lines {
// Handle SSE format
if line.hasPrefix("data: ") {
let jsonString = String(line.dropFirst(6))
if ProcessInfo.processInfo.environment["DEBUG_TACHIKOMA_STREAM"] != nil {
Self.debugLog("raw stream: \(jsonString)")
}
if jsonString == "[DONE]" {
continuation.finish()
return
}
if let data = jsonString.data(using: .utf8) {
// Responses API event streams use typed event payloads.
if Self.usesResponsesEventStream(self.model) {
if
let event = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
let eventType = event["type"] as? String
{
if ProcessInfo.processInfo.environment["DEBUG_TACHIKOMA"] != nil {
Self.debugLog("event: \(eventType) payload: \(event)")
}
switch eventType {
case "response.output_text.delta":
if let delta = event["delta"] as? String, !delta.isEmpty {
continuation.yield(TextStreamDelta.text(delta))
}
case "response.output_item.added":
if
let item = event["item"] as? [String: Any],
let itemType = item["type"] as? String,
itemType == "function_call"
{
let identifier = (item["id"] as? String) ??
(item["call_id"] as? String) ?? UUID().uuidString
var partial = pendingToolCalls[identifier] ?? PartialToolCall(
id: identifier,
name: nil,
arguments: "",
)
if let name = item["name"] as? String {
partial.name = name
}
pendingToolCalls[identifier] = partial
}
case "response.function_call_arguments.delta":
if
let itemId = event["item_id"] as? String,
let delta = event["delta"] as? String
{
var partial = pendingToolCalls[itemId] ?? PartialToolCall(
id: itemId,
name: nil,
arguments: "",
)
partial.arguments.append(delta)
pendingToolCalls[itemId] = partial
}
case "response.function_call_arguments.done":
if
let itemId = event["item_id"] as? String,
let arguments = event["arguments"] as? String
{
var partial = pendingToolCalls[itemId] ?? PartialToolCall(
id: itemId,
name: nil,
arguments: "",
)
partial.arguments = arguments
pendingToolCalls[itemId] = partial
if
let name = partial.name,
let toolCall = Self.makeToolCall(
id: itemId,
name: name,
argumentsJSON: arguments,
)
{
continuation.yield(.tool(toolCall))
pendingToolCalls.removeValue(forKey: itemId)
}
}
case "response.completed":
continuation.finish()
return
default:
break
}
}
} else {
// Try alternate Responses API delta format.
do {
let chunk = try JSONDecoder().decode(
OpenAIResponsesStreamChunk.self,
from: data,
)
// Convert to TextStreamDelta
if
let choice = chunk.choices.first,
let content = choice.delta.content,
!content.isEmpty
{
// Handle accumulated content for models with preambles
if content.hasPrefix(previousContent), !previousContent.isEmpty {
// This is accumulated content, extract just the delta
let delta = String(content.dropFirst(previousContent.count))
if !delta.isEmpty {
continuation.yield(TextStreamDelta.text(delta))
previousContent = content // Update the accumulated content
}
} else {
// This is a true delta or the first chunk
continuation.yield(TextStreamDelta.text(content))
previousContent += content // Accumulate for comparison
}
}
// Check for finish
if
let choice = chunk.choices.first,
choice.finishReason != nil
{
continuation.finish()
return
}
} catch {
// Ignore parsing errors for incomplete chunks
}
}
}
} else if line.hasPrefix("event: ") {
// Track event types for GPT-5 streaming (but we handle them in data lines)
// This helps us understand the stream structure
if
try Self.processResponsesStreamLine(
line,
model: self.model,
state: &streamState,
continuation: continuation,
)
{
return
}
}
@ -405,6 +281,201 @@ public final class OpenAIResponsesProvider: ModelProvider {
}
}
private struct ResponsesStreamState {
struct PartialToolCall {
var id: String
var name: String?
var arguments: String
}
var previousContent = ""
var pendingToolCalls: [String: PartialToolCall] = [:]
var didYieldToolCall = false
var didReceiveRefusal = false
}
private static func processResponsesStreamLine(
_ line: String,
model: LanguageModel.OpenAI,
state: inout ResponsesStreamState,
continuation: AsyncThrowingStream<TextStreamDelta, Error>.Continuation,
) throws
-> Bool
{
guard line.hasPrefix("data: ") else {
return false
}
let jsonString = String(line.dropFirst(6))
if ProcessInfo.processInfo.environment["DEBUG_TACHIKOMA_STREAM"] != nil {
Self.debugLog("raw stream: \(jsonString)")
}
if jsonString == "[DONE]" {
continuation.finish()
return true
}
guard let data = jsonString.data(using: .utf8) else {
return false
}
if Self.usesResponsesEventStream(model) {
guard
let event = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
let eventType = event["type"] as? String else
{
return false
}
if ProcessInfo.processInfo.environment["DEBUG_TACHIKOMA"] != nil {
Self.debugLog("event: \(eventType) payload: \(event)")
}
switch eventType {
case "response.output_text.delta":
if let delta = event["delta"] as? String, !delta.isEmpty {
continuation.yield(TextStreamDelta.text(delta))
}
case "response.output_item.added":
if
let item = event["item"] as? [String: Any],
let itemType = item["type"] as? String,
itemType == "function_call"
{
let identifier = (item["id"] as? String) ??
(item["call_id"] as? String) ?? UUID().uuidString
var partial = state.pendingToolCalls[identifier] ?? ResponsesStreamState.PartialToolCall(
id: identifier,
name: nil,
arguments: "",
)
if let name = item["name"] as? String {
partial.name = name
}
state.pendingToolCalls[identifier] = partial
}
case "response.function_call_arguments.delta":
if
let itemId = event["item_id"] as? String,
let delta = event["delta"] as? String
{
var partial = state.pendingToolCalls[itemId] ?? ResponsesStreamState.PartialToolCall(
id: itemId,
name: nil,
arguments: "",
)
partial.arguments.append(delta)
state.pendingToolCalls[itemId] = partial
}
case "response.function_call_arguments.done":
if
let itemId = event["item_id"] as? String,
let arguments = event["arguments"] as? String
{
var partial = state.pendingToolCalls[itemId] ?? ResponsesStreamState.PartialToolCall(
id: itemId,
name: nil,
arguments: "",
)
partial.arguments = arguments
state.pendingToolCalls[itemId] = partial
if
let name = partial.name,
let toolCall = Self.makeToolCall(id: itemId, name: name, argumentsJSON: arguments)
{
continuation.yield(.tool(toolCall))
state.didYieldToolCall = true
state.pendingToolCalls.removeValue(forKey: itemId)
}
}
case "response.refusal.delta",
"response.refusal.done":
state.didReceiveRefusal = true
case "response.completed":
let finishReason: FinishReason = state.didReceiveRefusal
? .contentFilter
: (state.didYieldToolCall ? .toolCalls : .stop)
continuation.yield(.done(finishReason: finishReason))
continuation.finish()
return true
case "response.incomplete":
let finishReason = Self.finishReasonForIncompleteResponseEvent(event)
continuation.yield(.done(finishReason: finishReason))
continuation.finish()
return true
case "response.failed",
"error":
throw TachikomaError.apiError(Self.errorMessageForResponseStreamEvent(event))
default:
break
}
return false
}
do {
let chunk = try JSONDecoder().decode(OpenAIResponsesStreamChunk.self, from: data)
if
let choice = chunk.choices.first,
let content = choice.delta.content,
!content.isEmpty
{
if content.hasPrefix(state.previousContent), !state.previousContent.isEmpty {
let delta = String(content.dropFirst(state.previousContent.count))
if !delta.isEmpty {
continuation.yield(TextStreamDelta.text(delta))
state.previousContent = content
}
} else {
continuation.yield(TextStreamDelta.text(content))
state.previousContent += content
}
}
if let choice = chunk.choices.first, let finishReason = choice.finishReason {
continuation.yield(.done(finishReason: Self.finishReasonForChatStream(finishReason)))
continuation.finish()
return true
}
} catch {
// Ignore parsing errors for incomplete chunks.
}
return false
}
private static func finishReasonForChatStream(_ reason: String) -> FinishReason {
switch reason {
case "stop": .stop
case "length": .length
case "tool_calls": .toolCalls
case "content_filter": .contentFilter
default: .other
}
}
private static func errorMessageForResponseStreamEvent(_ event: [String: Any]) -> String {
let eventType = event["type"] as? String ?? "error"
let errorPayload = (event["error"] as? [String: Any]) ??
(event["response"] as? [String: Any]).flatMap { $0["error"] as? [String: Any] }
if let message = errorPayload?["message"] as? String, !message.isEmpty {
return "OpenAI Responses API stream \(eventType): \(message)"
}
if let message = event["message"] as? String, !message.isEmpty {
return "OpenAI Responses API stream \(eventType): \(message)"
}
return "OpenAI Responses API stream \(eventType)"
}
private func authHeader() -> (String, String, String) {
switch self.auth {
case let .apiKey(key):
@ -558,6 +629,29 @@ public final class OpenAIResponsesProvider: ModelProvider {
}
}
private static func finishReasonForIncompleteResponseEvent(_ event: [String: Any]) -> FinishReason {
guard
let response = event["response"] as? [String: Any],
let incompleteDetails = response["incomplete_details"] as? [String: Any],
let reason = incompleteDetails["reason"] as? String else
{
return .other
}
return Self.finishReasonForIncompleteReason(reason)
}
private static func finishReasonForIncompleteReason(_ reason: String?) -> FinishReason {
switch reason {
case "content_filter":
.contentFilter
case "max_output_tokens":
.length
default:
.other
}
}
private func makeMessageEntry(role: String, message: ModelMessage) -> ResponsesMessage? {
let parts = self.convertContentParts(for: message)
guard !parts.isEmpty else { return nil }
@ -756,14 +850,15 @@ public final class OpenAIResponsesProvider: ModelProvider {
static func convertToProviderResponse(_ response: OpenAIResponsesResponse) throws -> ProviderResponse {
// Handle GPT-5 output arrays and alternate choices arrays.
let text: String
let toolCalls: [AgentToolCall]?
let finishReason: FinishReason?
var text: String
var toolCalls: [AgentToolCall]?
var finishReason: FinishReason?
if let outputs = response.output {
// GPT-5 format with output array
var collectedText = ""
var collectedToolCalls: [AgentToolCall] = []
var didCollectRefusal = false
for output in outputs {
if output.type == "message" {
@ -774,6 +869,10 @@ public final class OpenAIResponsesProvider: ModelProvider {
if let textSegment = chunk.text {
collectedText.append(textSegment)
}
case "refusal":
if chunk.refusal != nil || chunk.text != nil {
didCollectRefusal = true
}
case "tool_call":
if
let toolCall = chunk.toolCall,
@ -795,11 +894,21 @@ public final class OpenAIResponsesProvider: ModelProvider {
}
text = collectedText
toolCalls = collectedToolCalls.isEmpty ? nil : collectedToolCalls
if let toolCalls, !toolCalls.isEmpty {
finishReason = .toolCalls
let incompleteFinishReason = response.status == "incomplete"
? Self.finishReasonForIncompleteReason(response.incompleteDetails?.reason)
: nil
if incompleteFinishReason == .contentFilter || didCollectRefusal {
text = ""
toolCalls = nil
finishReason = .contentFilter
} else {
finishReason = .stop
toolCalls = collectedToolCalls.isEmpty ? nil : collectedToolCalls
}
if finishReason == nil, let toolCalls, !toolCalls.isEmpty {
finishReason = .toolCalls
}
if finishReason == nil {
finishReason = incompleteFinishReason ?? .stop
}
} else if let choices = response.choices, let choice = choices.first {
// Alternate format with choices array.
@ -814,11 +923,17 @@ public final class OpenAIResponsesProvider: ModelProvider {
case "stop": finishReason = .stop
case "length": finishReason = .length
case "tool_calls": finishReason = .toolCalls
case "content_filter": finishReason = .contentFilter
default: finishReason = .stop
}
} else {
finishReason = nil
}
if finishReason == .contentFilter || choice.message.refusal != nil {
text = ""
toolCalls = nil
finishReason = .contentFilter
}
} else {
throw TachikomaError.apiError("No output or choices in response")
}

View File

@ -484,11 +484,17 @@ struct OpenAIResponsesResponse: Codable {
let choices: [ResponsesChoice]? // Alternate responses can use choices array
let usage: ResponsesUsage?
let metadata: ResponsesMetadata?
let incompleteDetails: IncompleteDetails?
enum CodingKeys: String, CodingKey {
case id, object, status, model, output, choices, usage, metadata
case createdAt = "created_at"
case created
case incompleteDetails = "incomplete_details"
}
struct IncompleteDetails: Codable {
let reason: String?
}
/// GPT-5 output format
@ -513,11 +519,25 @@ struct OpenAIResponsesResponse: Codable {
struct OutputContent: Codable {
let type: String
let text: String?
let refusal: String?
let toolCall: ResponsesToolCall?
init(
type: String,
text: String? = nil,
refusal: String? = nil,
toolCall: ResponsesToolCall? = nil,
) {
self.type = type
self.text = text
self.refusal = refusal
self.toolCall = toolCall
}
enum CodingKeys: String, CodingKey {
case type
case text
case refusal
case toolCall = "tool_call"
}
}

View File

@ -77,11 +77,14 @@ struct OpenAIChatMessage: Codable {
let content: Either<String, [OpenAIChatMessageContent]>?
let toolCallId: String?
let toolCalls: [AgentToolCall]?
let reasoning: String?
let reasoningDetails: [JSONValue]?
enum CodingKeys: String, CodingKey {
case role, content
case role, content, reasoning
case toolCallId = "tool_call_id"
case toolCalls = "tool_calls"
case reasoningDetails = "reasoning_details"
}
struct AgentToolCall: Codable {
@ -100,6 +103,8 @@ struct OpenAIChatMessage: Codable {
self.content = .left(content)
self.toolCallId = toolCallId
self.toolCalls = nil
self.reasoning = nil
self.reasoningDetails = nil
}
init(role: String, content: [OpenAIChatMessageContent], toolCallId: String? = nil) {
@ -107,13 +112,23 @@ struct OpenAIChatMessage: Codable {
self.content = .right(content)
self.toolCallId = toolCallId
self.toolCalls = nil
self.reasoning = nil
self.reasoningDetails = nil
}
init(role: String, content: String? = nil, toolCalls: [AgentToolCall]?) {
init(
role: String,
content: String? = nil,
toolCalls: [AgentToolCall]?,
reasoning: String? = nil,
reasoningDetails: [JSONValue]? = nil,
) {
self.role = role
self.content = content.map { .left($0) }
self.toolCallId = nil
self.toolCalls = toolCalls
self.reasoning = reasoning
self.reasoningDetails = reasoningDetails
}
}
@ -246,10 +261,13 @@ struct OpenAIChatResponse: Codable {
let role: String
let content: String?
let toolCalls: [AgentToolCall]?
let reasoning: String?
let reasoningDetails: [JSONValue]?
enum CodingKeys: String, CodingKey {
case role, content
case role, content, reasoning
case toolCalls = "tool_calls"
case reasoningDetails = "reasoning_details"
}
}

View File

@ -33,12 +33,13 @@ public final class OpenRouterProvider: ModelProvider {
throw TachikomaError.authenticationFailed("OPENROUTER_API_KEY not found")
}
let isFable = LanguageModel.Anthropic.isFable(modelId: modelId)
self.capabilities = ModelCapabilities(
supportsVision: true,
supportsTools: true,
supportsStreaming: true,
contextLength: 128_000,
maxOutputTokens: 4096,
supportsStreaming: !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId),
contextLength: isFable ? 1_000_000 : 128_000,
maxOutputTokens: isFable ? 128_000 : 4096,
)
self.defaultHeaders = [
@ -67,6 +68,9 @@ public final class OpenRouterProvider: ModelProvider {
guard let baseURL, let apiKey else {
throw TachikomaError.invalidConfiguration("OpenRouter provider missing base URL or API key")
}
guard !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: self.modelId) else {
throw TachikomaError.invalidConfiguration("\(self.modelId) does not support streaming")
}
return try await OpenAICompatibleHelper.streamText(
request: request,

View File

@ -125,6 +125,8 @@ public struct ProviderFactory {
configuration: configuration,
apiKey: custom.apiKey,
additionalHeaders: custom.headers,
reasoningProvider: "custom-anthropic",
reasoningBaseURL: custom.baseURL,
)
}
}
@ -141,10 +143,10 @@ public struct ProviderFactory {
) throws
-> any ModelProvider
{
try AnthropicCompatibleProvider(
let baseURL = configuration.getBaseURL(for: provider) ?? provider.defaultBaseURL ?? "https://api.minimax.io/anthropic"
return try AnthropicCompatibleProvider(
modelId: model.modelId,
baseURL: configuration.getBaseURL(for: provider) ?? provider
.defaultBaseURL ?? "https://api.minimax.io/anthropic",
baseURL: baseURL,
configuration: configuration,
apiKey: apiKey,
// MiniMax's Anthropic-compatible setup uses Claude Code-style Authorization auth, not Anthropic x-api-key.
@ -156,6 +158,8 @@ public struct ProviderFactory {
contextLength: model.contextLength,
maxOutputTokens: 8192,
),
reasoningProvider: provider == .minimaxCN ? "minimax-cn" : "minimax",
reasoningBaseURL: baseURL,
)
}
}

View File

@ -8,7 +8,7 @@ public enum ProviderParser {
/// The provider name (e.g., "openai", "anthropic", "ollama")
public let provider: String
/// The model name (e.g., "gpt-5.5", "claude-opus-4-7", "llava:latest")
/// The model name (e.g., "gpt-5.5", "claude-fable-5", "llava:latest")
public let model: String
/// The full string representation (e.g., "openai/gpt-5.5")
@ -44,7 +44,7 @@ public enum ProviderParser {
}
/// Parse a comma-separated list of providers
/// - Parameter providersString: String like "openai/gpt-5.5,anthropic/claude-opus-4-7,ollama/llava:latest"
/// - Parameter providersString: String like "openai/gpt-5.5,anthropic/claude-fable-5,ollama/llava:latest"
/// - Returns: Array of parsed configurations
public static func parseList(_ providersString: String) -> [ProviderConfig] {
// Parse a comma-separated list of providers
@ -54,7 +54,7 @@ public enum ProviderParser {
}
/// Get the first provider from a comma-separated list
/// - Parameter providersString: String like "openai/gpt-5.5,anthropic/claude-opus-4-7"
/// - Parameter providersString: String like "openai/gpt-5.5,anthropic/claude-fable-5"
/// - Returns: First parsed configuration or nil if none valid
public static func parseFirst(_ providersString: String) -> ProviderConfig? {
// Get the first provider from a comma-separated list
@ -270,8 +270,10 @@ public enum ProviderParser {
}
return switch normalized {
case "claude-fable-5", "claude-fable-5-latest", "fable-5", "fable.5", "fable5", "fable":
.anthropic(.fable5)
case "claude-opus-4-8", "claude-opus-4.8", "claude-opus-4-8-latest", "opus-4-8", "opus-4.8",
"opus48":
"opus48", "claude", "claude-latest", "claude_latest", "claudelatest", "claude-default", "claude_default":
.anthropic(.opus48)
case "claude-opus-4-7", "claude-opus-4.7", "claude-opus-4-7-latest", "opus-4-7", "opus-4.7", "opus47":
.anthropic(.opus47)

View File

@ -27,12 +27,13 @@ public final class TogetherProvider: ModelProvider {
throw TachikomaError.authenticationFailed("TOGETHER_API_KEY not found")
}
let isFable = LanguageModel.Anthropic.isFable(modelId: modelId)
self.capabilities = ModelCapabilities(
supportsVision: true,
supportsTools: true,
supportsStreaming: true,
contextLength: 128_000,
maxOutputTokens: 4096,
supportsStreaming: !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId),
contextLength: isFable ? 1_000_000 : 128_000,
maxOutputTokens: isFable ? 128_000 : 4096,
)
}
@ -55,6 +56,9 @@ public final class TogetherProvider: ModelProvider {
guard let baseURL, let apiKey else {
throw TachikomaError.invalidConfiguration("Together provider missing base URL or API key")
}
guard !LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: self.modelId) else {
throw TachikomaError.invalidConfiguration("\(self.modelId) does not support streaming")
}
return try await OpenAICompatibleHelper.streamText(
request: request,

View File

@ -6,18 +6,44 @@ import UIKit
// MARK: - Cache Key
/// Hashable key for cache entries
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public struct CacheProviderIdentity: Hashable, Sendable {
public let providerKind: String
public let modelId: String
public let endpointIdentity: String?
public init(providerKind: String, modelId: String, baseURL: String?) {
self.providerKind = providerKind
self.modelId = modelId
self.endpointIdentity = ReasoningEndpointIdentity.canonical(baseURL)
}
}
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
struct CacheKey: Hashable {
let hash: String
let model: String? // Store model ID for invalidation
let isCacheable: Bool
init(from request: ProviderRequest, model: String? = nil) {
self.model = model
init(from request: ProviderRequest, providerIdentity: CacheProviderIdentity? = nil, model: String? = nil) {
self.model = providerIdentity?.modelId ?? model
// Create a unique hash from the request
var hasher = Hasher()
if let providerIdentity {
hasher.combine(providerIdentity.providerKind)
hasher.combine(providerIdentity.modelId)
hasher.combine(providerIdentity.endpointIdentity)
}
// Combine message content
for message in request.messages {
hasher.combine(message.role.rawValue)
hasher.combine(message.channel?.rawValue)
hasher.combine(message.metadata?.conversationId)
hasher.combine(message.metadata?.turnId)
for key in message.metadata?.customData?.keys.sorted() ?? [] {
hasher.combine(key)
hasher.combine(message.metadata?.customData?[key])
}
for part in message.content {
switch part {
case let .text(text):
@ -41,8 +67,32 @@ struct CacheKey: Hashable {
hasher.combine(request.settings.temperature)
hasher.combine(request.settings.maxTokens)
hasher.combine(request.settings.topP)
hasher.combine(request.settings.topK)
hasher.combine(request.settings.frequencyPenalty)
hasher.combine(request.settings.presencePenalty)
hasher.combine(request.settings.stopSequences)
hasher.combine(request.settings.reasoningEffort?.rawValue)
hasher.combine(request.settings.seed)
if let stopConditions = request.settings.stopConditions {
guard let cacheKey = (stopConditions as? StableCacheKeyStopCondition)?.stableCacheKey else {
self.hash = ""
self.isCacheable = false
return
}
hasher.combine(cacheKey)
}
if let providerOptionsData = try? Self.providerOptionsEncoder.encode(request.settings.providerOptions) {
hasher.combine(providerOptionsData)
}
self.hash = String(hasher.finalize())
self.isCacheable = true
}
private static let providerOptionsEncoder: JSONEncoder = {
let encoder = JSONEncoder()
encoder.outputFormatting = [.sortedKeys]
return encoder
}()
}
// MARK: - Response Cache
@ -75,11 +125,16 @@ public actor ResponseCache {
public func get(
for request: ProviderRequest,
ttlOverride: TimeInterval? = nil,
providerIdentity: CacheProviderIdentity? = nil,
)
-> ProviderResponse?
{
// Get cached response with TTL validation
let key = CacheKey(from: request)
let key = CacheKey(from: request, providerIdentity: providerIdentity)
guard key.isCacheable else {
self.statistics.recordMiss()
return nil
}
guard let entry = cache[key] else {
self.statistics.recordMiss()
@ -109,9 +164,13 @@ public actor ResponseCache {
for request: ProviderRequest,
ttl: TimeInterval? = nil,
priority: CachePriority = .normal,
providerIdentity: CacheProviderIdentity? = nil,
) {
// Store response with custom TTL and priority
let key = CacheKey(from: request)
let key = CacheKey(from: request, providerIdentity: providerIdentity)
guard key.isCacheable else {
return
}
// Check memory limit
if self.shouldEvictForMemory() {
@ -457,9 +516,33 @@ final class CacheEntry: @unchecked Sendable {
// Rough estimation based on response content
let textSize = self.response.text.utf8.count
let toolCallsSize = (response.toolCalls?.count ?? 0) * 100 // Estimate 100 bytes per tool call
let reasoningSize = self.response.reasoning.reduce(0) { total, block in
total + block.text.utf8.count +
(block.signature?.utf8.count ?? 0) +
(block.rawJSON?.utf8.count ?? 0) +
block.type.utf8.count
}
let assistantMessageSize = self.response.assistantMessages.reduce(0) { total, message in
let contentSize = message.content.reduce(0) { contentTotal, part in
switch part {
case let .text(text):
contentTotal + text.utf8.count
case let .image(image):
contentTotal + image.mimeType.utf8.count + image.data.utf8.count
case let .toolCall(call):
contentTotal + call.id.utf8.count + call.name.utf8.count + 100
case let .toolResult(result):
contentTotal + result.toolCallId.utf8.count + 100
}
}
let metadataSize = (message.metadata?.customData ?? [:]).reduce(0) { metadataTotal, pair in
metadataTotal + pair.key.utf8.count + pair.value.utf8.count
}
return total + contentSize + metadataSize + (message.channel?.rawValue.utf8.count ?? 0)
}
let usageSize = 50 // Fixed overhead for usage data
return textSize + toolCallsSize + usageSize + 100 // 100 bytes overhead
return textSize + toolCallsSize + reasoningSize + assistantMessageSize + usageSize + 100
}
}
@ -546,6 +629,7 @@ extension ResponseCache {
public struct CacheAwareProvider<Base: ModelProvider>: ModelProvider {
let provider: Base
let cache: ResponseCache
private let providerIdentity: CacheProviderIdentity
public var modelId: String {
self.provider.modelId
@ -563,11 +647,21 @@ public struct CacheAwareProvider<Base: ModelProvider>: ModelProvider {
self.provider.capabilities
}
init(provider: Base, cache: ResponseCache) {
self.provider = provider
self.cache = cache
self.providerIdentity = CacheProviderIdentity(
providerKind: String(reflecting: Base.self),
modelId: provider.modelId,
baseURL: provider.baseURL,
)
}
public func generateText(request: ProviderRequest) async throws -> ProviderResponse {
// Check cache with smart TTL based on request type
let ttl = self.determineTTL(for: request)
if let cached = await cache.get(for: request, ttlOverride: ttl) {
if let cached = await cache.get(for: request, ttlOverride: ttl, providerIdentity: self.providerIdentity) {
return cached
}
@ -575,7 +669,13 @@ public struct CacheAwareProvider<Base: ModelProvider>: ModelProvider {
let response = try await provider.generateText(request: request)
let priority = self.determinePriority(for: request)
await self.cache.store(response, for: request, ttl: ttl, priority: priority)
await self.cache.store(
response,
for: request,
ttl: ttl,
priority: priority,
providerIdentity: self.providerIdentity,
)
return response
}

View File

@ -543,6 +543,7 @@ public struct ModelCostCalculator: Sendable {
// Anthropic Pricing (as of 2026)
case let .anthropic(anthropicModel):
switch anthropicModel {
case .fable5: (10.00, 50.00)
case .opus48: (5.00, 25.00)
case .opus47: (5.00, 25.00)
case .opus45: (5.00, 25.00)
@ -550,7 +551,8 @@ public struct ModelCostCalculator: Sendable {
case .sonnet46: (3.00, 15.00)
case .sonnet45: (4.00, 18.00)
case .haiku45: (1.20, 6.00)
case .custom: (3.00, 15.00) // Default estimate
case let .custom(id):
id.lowercased().contains("claude-fable-5") ? (10.00, 50.00) : (3.00, 15.00)
}
// Google Pricing (standard tier, as of 2026)
case let .google(googleModel):

View File

@ -26,32 +26,43 @@ public final class Agent<Context>: @unchecked Sendable {
public private(set) var tools: [AgentTool]
/// Language model used by this agent
public var model: LanguageModel
public var model: LanguageModel {
didSet {
self.usesImplicitDefaultModel = false
}
}
/// Generation settings for the agent
public var settings: GenerationSettings
/// Provider configuration for generation and streaming
private let configuration: TachikomaConfiguration
/// The context instance passed to tool executions
private let context: Context
/// Current conversation history
public private(set) var conversation: Conversation
private var usesImplicitDefaultModel: Bool
public init(
name: String,
instructions: String,
model: LanguageModel = .default,
model: LanguageModel? = nil,
tools: [AgentTool] = [],
settings: GenerationSettings = .default,
configuration: TachikomaConfiguration = .current,
context: Context,
) {
self.name = name
self.instructions = instructions
self.model = model
self.usesImplicitDefaultModel = model == nil
self.model = model ?? .default
self.tools = tools
self.settings = settings
self.configuration = configuration
self.context = context
self.conversation = Conversation()
self.conversation = Conversation(configuration: configuration)
// Add system message with instructions
self.conversation.addSystemMessage(instructions)
@ -71,66 +82,133 @@ public final class Agent<Context>: @unchecked Sendable {
/// Execute a single message with the agent
public func execute(_ message: String) async throws -> AgentResponse {
// Add user message to conversation
self.conversation.addUserMessage(message)
let conversation = self.conversation
let model = self.model
let tools = self.tools
let settings = self.settings
// Generate response using the conversation
let result = try await generateText(
model: model,
messages: conversation.getModelMessages(),
tools: self.tools.isEmpty ? nil : self.tools,
settings: self.settings,
maxSteps: 5, // Allow multi-step tool execution
)
return try await conversation.withContinuationLock {
conversation.addUserMessage(message)
let conversationMessages = conversation.messages
let modelMessages = conversationMessages.map { $0.toModelMessage() }
let snapshotIDs = conversationMessages.map(\.id)
let anchorID = conversationMessages.last?.id
let result = try await generateText(
model: model,
messages: modelMessages,
tools: tools.isEmpty ? nil : tools,
settings: settings,
maxSteps: 5, // Allow multi-step tool execution
configuration: self.configuration,
)
// Add assistant response to conversation
self.conversation.addAssistantMessage(result.text)
// Add any tool calls and results to conversation
for step in result.steps {
if !step.toolCalls.isEmpty {
for _ in step.toolCalls {
// Tool calls are already added by generateText
let didMerge: Bool
if result.finishReason == .contentFilter {
didMerge = conversation.mergeContentFilterResult(
result.messages,
originalMessages: modelMessages,
afterMessageID: anchorID,
validatingSnapshotIDs: snapshotIDs,
)
} else if let anchorID {
let generatedMessages = Array(result.messages.dropFirst(modelMessages.count))
let didMerge = conversation.appendGeneratedMessages(
generatedMessages,
afterMessageID: anchorID,
validatingSnapshotIDs: snapshotIDs,
)
guard didMerge else {
throw TachikomaError.invalidConfiguration(
"Conversation changed during generation; refusing to merge response",
)
}
return AgentResponse(
text: result.text,
usage: result.usage,
finishReason: result.finishReason ?? .other,
steps: result.steps,
conversationLength: conversation.messages.count,
)
} else {
didMerge = conversation.messages.isEmpty
}
if !step.toolResults.isEmpty {
for _ in step.toolResults {
// Tool results are already added by generateText
}
guard didMerge else {
throw TachikomaError.invalidConfiguration(
"Conversation changed during generation; refusing to merge response",
)
}
return AgentResponse(
text: result.text,
usage: result.usage,
finishReason: result.finishReason ?? .other,
steps: result.steps,
conversationLength: conversation.messages.count,
)
}
return AgentResponse(
text: result.text,
usage: result.usage,
finishReason: result.finishReason ?? .other,
steps: result.steps,
conversationLength: self.conversation.messages.count,
)
}
/// Stream a response from the agent
public func stream(_ message: String) async throws -> AsyncThrowingStream<TextStreamDelta, Error> {
let streamingModel = if self.usesImplicitDefaultModel {
LanguageModel.defaultStreaming
} else if self.model.supportsStreaming {
self.model
} else {
self.model
}
guard streamingModel.supportsStreaming else {
throw TachikomaError.invalidConfiguration("\(self.model.modelId) does not support streaming")
}
let conversation = self.conversation
try await conversation.acquireContinuationLock()
let gateRelease = AsyncReleaseOnce {
await conversation.releaseContinuationLock()
}
// Add user message to conversation
self.conversation.addUserMessage(message)
conversation.addUserMessage(message)
let conversationMessages = conversation.messages
let modelMessages = conversationMessages.map { $0.toModelMessage() }
let snapshotIDs = conversationMessages.map(\.id)
let buffersUntilDone = self.settings.streamBuffering == .untilTerminal
// Stream response
let streamResult = try await streamText(
model: model,
messages: conversation.getModelMessages(),
tools: self.tools.isEmpty ? nil : self.tools,
settings: self.settings,
maxSteps: 5,
)
let streamResult: StreamTextResult
do {
streamResult = try await streamText(
model: streamingModel,
messages: modelMessages,
tools: self.tools.isEmpty ? nil : self.tools,
settings: self.settings,
maxSteps: 5,
configuration: self.configuration,
)
} catch {
gateRelease.release()
throw error
}
// Track final message in conversation (this is approximate for streaming)
return AsyncThrowingStream<TextStreamDelta, Error> { continuation in
Task {
let producer = Task {
defer {
gateRelease.release()
}
do {
var assistantText = ""
var bufferedDeltas: [TextStreamDelta] = []
var didReceiveTerminal = false
for try await delta in streamResult.stream {
continuation.yield(delta)
try Task.checkCancellation()
if buffersUntilDone {
bufferedDeltas.append(delta)
} else {
continuation.yield(delta)
}
// Collect assistant text
if case .textDelta = delta.type, let content = delta.content {
@ -138,25 +216,60 @@ public final class Agent<Context>: @unchecked Sendable {
}
if case .done = delta.type {
didReceiveTerminal = true
guard delta.finishReason != .contentFilter else {
let didRollback = conversation.replaceModelMessages(
modelMessages.droppingLastUserTurn(),
validatingSnapshotIDs: snapshotIDs,
)
guard didRollback else {
throw TachikomaError.invalidConfiguration(
"Conversation changed during streaming; refusing to merge response",
)
}
assistantText = ""
bufferedDeltas.removeAll()
if buffersUntilDone {
continuation.yield(delta)
}
continue
}
if buffersUntilDone {
for bufferedDelta in bufferedDeltas {
continuation.yield(bufferedDelta)
}
bufferedDeltas.removeAll()
}
// Add final assistant message to conversation
if !assistantText.isEmpty {
self.conversation.addAssistantMessage(assistantText)
conversation.addAssistantMessage(assistantText)
assistantText = ""
}
}
}
if buffersUntilDone, !didReceiveTerminal, !bufferedDeltas.isEmpty {
throw TachikomaError.apiError("Stream ended before provider completion status was received")
}
if !buffersUntilDone, !assistantText.isEmpty {
try Task.checkCancellation()
conversation.addAssistantMessage(assistantText)
}
continuation.finish()
} catch {
continuation.finish(throwing: error)
}
}
continuation.onTermination = { @Sendable _ in
producer.cancel()
}
}
}
/// Reset the agent's conversation history
public func resetConversation() {
// Reset the agent's conversation history
self.conversation = Conversation()
self.conversation = Conversation(configuration: self.configuration)
self.conversation.addSystemMessage(self.instructions)
}
@ -169,7 +282,7 @@ public final class Agent<Context>: @unchecked Sendable {
public func updateInstructions(_ newInstructions: String) {
// Create new conversation with updated instructions
let oldMessages = self.conversation.getModelMessages().filter { $0.role != .system }
self.conversation = Conversation()
self.conversation = Conversation(configuration: self.configuration)
self.conversation.addSystemMessage(newInstructions)
// Re-add non-system messages
@ -179,6 +292,30 @@ public final class Agent<Context>: @unchecked Sendable {
}
}
final class AsyncReleaseOnce: @unchecked Sendable {
private let lock = NSLock()
private var didRelease = false
private let operation: @Sendable () async -> Void
init(operation: @escaping @Sendable () async -> Void) {
self.operation = operation
}
func release() {
self.lock.lock()
guard !self.didRelease else {
self.lock.unlock()
return
}
self.didRelease = true
self.lock.unlock()
Task {
await self.operation()
}
}
}
/// Response from an agent execution
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public struct AgentResponse: Sendable {
@ -537,3 +674,33 @@ public enum SessionStatus: String, Codable, Sendable, CaseIterable {
case failed
case cancelled
}
extension LanguageModel {
var requiresTerminalRefusalBuffering: Bool {
switch self {
case let .anthropic(model):
return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: model.modelId)
case let .anthropicCompatible(modelId, _):
return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId)
case let .openRouter(modelId), let .together(modelId):
return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId)
case let .openaiCompatible(modelId, _):
return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: modelId)
case let .custom(provider):
guard
let parsed = ProviderParser.parse(provider.modelId),
let registeredProvider = CustomProviderRegistry.shared.get(parsed.provider) else
{
return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: provider.modelId)
}
switch registeredProvider.kind {
case .openai:
return false
case .anthropic:
return LanguageModel.Anthropic.hasStreamingRefusalRisk(modelId: parsed.model)
}
default:
return false
}
}
}

View File

@ -3,10 +3,66 @@ import Tachikoma
// MARK: - Conversation Management
private actor ContinuationGate {
private struct Waiter {
let id: UUID
let continuation: CheckedContinuation<Bool, Never>
}
private var isLocked = false
private var waiters: [Waiter] = []
func acquire() async throws {
try Task.checkCancellation()
if !self.isLocked {
self.isLocked = true
return
}
let id = UUID()
let acquired = await withTaskCancellationHandler {
await withCheckedContinuation { continuation in
self.waiters.append(Waiter(id: id, continuation: continuation))
}
} onCancel: {
Task { await self.cancelWaiter(id) }
}
guard acquired else {
throw CancellationError()
}
if Task.isCancelled {
self.release()
throw CancellationError()
}
}
private func cancelWaiter(_ id: UUID) {
guard let index = self.waiters.firstIndex(where: { $0.id == id }) else {
return
}
let waiter = self.waiters.remove(at: index)
waiter.continuation.resume(returning: false)
}
func release() {
if self.waiters.isEmpty {
self.isLocked = false
} else {
let waiter = self.waiters.removeFirst()
waiter.continuation.resume(returning: true)
}
}
}
/// A conversation with an AI model
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public final class Conversation: @unchecked Sendable {
private let lock = NSLock()
private let continuationGate = ContinuationGate()
private var _messages: [ConversationMessage] = []
/// The configuration used by this conversation
@ -59,7 +115,6 @@ public final class Conversation: @unchecked Sendable {
/// Get messages as ModelMessage array for API compatibility
public func getModelMessages() -> [ModelMessage] {
// Get messages as ModelMessage array for API compatibility
self.messages.map { $0.toModelMessage() }
}
@ -72,31 +127,193 @@ public final class Conversation: @unchecked Sendable {
self.lock.unlock()
}
/// Continue the conversation with a model
public func continueConversation(using model: Model? = nil, tools _: [AgentTool]? = nil) async throws -> String {
// Convert conversation messages to model messages
let modelMessages = self.messages.map { conversationMessage in
ModelMessage(
id: conversationMessage.id,
role: ModelMessage.Role(rawValue: conversationMessage.role.rawValue) ?? .user,
content: [.text(conversationMessage.content)],
timestamp: conversationMessage.timestamp,
/// Replace the conversation with lossless ModelMessage history.
public func replaceModelMessages(_ modelMessages: [ModelMessage]) {
self.lock.lock()
self._messages = modelMessages.map { ConversationMessage.from($0) }
self.lock.unlock()
}
/// Replace the conversation only if the original snapshot is still current.
public func replaceModelMessages(
_ modelMessages: [ModelMessage],
validatingSnapshotIDs snapshotIDs: [String],
)
-> Bool
{
self.lock.lock()
defer { self.lock.unlock() }
guard self._messages.count >= snapshotIDs.count else {
return false
}
let currentPrefixIDs = self._messages.prefix(snapshotIDs.count).map(\.id)
guard currentPrefixIDs == snapshotIDs else {
return false
}
let laterMessages = self._messages.dropFirst(snapshotIDs.count)
self._messages = modelMessages.map { ConversationMessage.from($0) } + laterMessages
return true
}
/// Replace the original snapshot with generated history while preserving later appends.
public func mergeGeneratedMessages(_ modelMessages: [ModelMessage], replacingPrefixCount prefixCount: Int) {
self.lock.lock()
let laterMessages = self._messages.dropFirst(min(prefixCount, self._messages.count))
self._messages = modelMessages.map { ConversationMessage.from($0) } + laterMessages
self.lock.unlock()
}
/// Insert generated response messages after the snapshot anchor while preserving concurrent appends.
public func appendGeneratedMessages(_ modelMessages: [ModelMessage], afterMessageID messageID: String) {
guard !modelMessages.isEmpty else { return }
self.lock.lock()
let conversationMessages = modelMessages.map { ConversationMessage.from($0) }
if let index = self._messages.firstIndex(where: { $0.id == messageID }) {
self._messages.insert(contentsOf: conversationMessages, at: self._messages.index(after: index))
} else {
self._messages.append(contentsOf: conversationMessages)
}
self.lock.unlock()
}
/// Insert generated response messages only if the snapshot prefix is still current.
public func appendGeneratedMessages(
_ modelMessages: [ModelMessage],
afterMessageID messageID: String,
validatingSnapshotIDs snapshotIDs: [String],
)
-> Bool
{
guard !modelMessages.isEmpty else { return true }
self.lock.lock()
defer { self.lock.unlock() }
guard self._messages.count >= snapshotIDs.count else {
return false
}
let currentPrefixIDs = self._messages.prefix(snapshotIDs.count).map(\.id)
guard currentPrefixIDs == snapshotIDs else {
return false
}
let conversationMessages = modelMessages.map { ConversationMessage.from($0) }
if let index = self._messages.firstIndex(where: { $0.id == messageID }) {
self._messages.insert(contentsOf: conversationMessages, at: self._messages.index(after: index))
} else {
self._messages.append(contentsOf: conversationMessages)
}
return true
}
/// Merge a refused generation without losing completed tool steps.
public func mergeContentFilterResult(
_ resultMessages: [ModelMessage],
originalMessages: [ModelMessage],
afterMessageID _: String?,
validatingSnapshotIDs snapshotIDs: [String],
)
-> Bool
{
let generatedMessages = Array(resultMessages.dropFirst(originalMessages.count))
if !generatedMessages.isEmpty {
return self.replaceModelMessages(
originalMessages + generatedMessages,
validatingSnapshotIDs: snapshotIDs,
)
}
// Generate response using the core API
let response = try await generateText(
model: model ?? .default,
messages: modelMessages,
tools: [],
settings: .default,
configuration: configuration,
return self.replaceModelMessages(
originalMessages.droppingLastUserTurn(),
validatingSnapshotIDs: snapshotIDs,
)
}
// Add the response to the conversation
self.addAssistantMessage(response.text)
public func removeMessage(id: String) {
self.lock.lock()
self._messages.removeAll { $0.id == id }
self.lock.unlock()
}
return response.text
public func withContinuationLock<T>(_ operation: () async throws -> T) async throws -> T {
try await self.acquireContinuationLock()
do {
let result = try await operation()
await self.releaseContinuationLock()
return result
} catch {
await self.releaseContinuationLock()
throw error
}
}
public func acquireContinuationLock() async throws {
try await self.continuationGate.acquire()
}
public func releaseContinuationLock() async {
await self.continuationGate.release()
}
/// Continue the conversation with a model
public func continueConversation(
using model: Model? = nil,
tools: [AgentTool]? = nil,
maxSteps: Int = 5,
) async throws
-> String
{
try await self.withContinuationLock {
let conversationMessages = self.messages
let modelMessages = conversationMessages.map { $0.toModelMessage() }
let snapshotIDs = conversationMessages.map(\.id)
let anchorID = conversationMessages.last?.id
// Generate response using the core API
let response = try await generateText(
model: model ?? .default,
messages: modelMessages,
tools: tools,
settings: .default,
maxSteps: maxSteps,
configuration: configuration,
)
let didMerge: Bool
if response.finishReason == .contentFilter {
didMerge = self.mergeContentFilterResult(
response.messages,
originalMessages: modelMessages,
afterMessageID: anchorID,
validatingSnapshotIDs: snapshotIDs,
)
} else if let anchorID {
let generatedMessages = Array(response.messages.dropFirst(modelMessages.count))
didMerge = self.appendGeneratedMessages(
generatedMessages,
afterMessageID: anchorID,
validatingSnapshotIDs: snapshotIDs,
)
} else if self.messages.isEmpty {
self.replaceModelMessages(response.messages)
didMerge = true
} else {
didMerge = false
}
guard didMerge else {
throw TachikomaError.invalidConfiguration(
"Conversation changed during generation; refusing to merge response",
)
}
return response.text
}
}
/// Continue the conversation with a model, streaming the response
@ -106,43 +323,90 @@ public final class Conversation: @unchecked Sendable {
) async throws
-> AsyncThrowingStream<String, Error>
{
// Convert conversation messages to model messages
let modelMessages = self.messages.map { conversationMessage in
ModelMessage(
id: conversationMessage.id,
role: ModelMessage.Role(rawValue: conversationMessage.role.rawValue) ?? .user,
content: [.text(conversationMessage.content)],
timestamp: conversationMessage.timestamp,
)
try await self.acquireContinuationLock()
let gateRelease = AsyncReleaseOnce {
await self.releaseContinuationLock()
}
let conversationMessages = self.messages
let modelMessages = conversationMessages.map { $0.toModelMessage() }
let snapshotIDs = conversationMessages.map(\.id)
let resolvedModel = model ?? .defaultStreaming
let streamSettings = GenerationSettings.default
let buffersUntilDone = streamSettings.streamBuffering == .untilTerminal ||
resolvedModel.requiresTerminalRefusalBuffering
// Generate response using the core API
let responseStream = try await streamText(
model: model ?? .default,
messages: modelMessages,
tools: tools ?? [], // Use provided tools or empty array
settings: .default,
configuration: configuration,
)
let responseStream: StreamTextResult
do {
responseStream = try await streamText(
model: resolvedModel,
messages: modelMessages,
tools: tools ?? [], // Use provided tools or empty array
settings: streamSettings,
configuration: self.configuration,
)
} catch {
gateRelease.release()
throw error
}
// Create a new stream to process the response and update the conversation
return AsyncThrowingStream<String, Error> { continuation in
Task {
let producer = Task {
defer {
gateRelease.release()
}
var fullResponse = ""
var isContentFiltered = false
var bufferedText: [String] = []
var didApproveBufferedResponse = !buffersUntilDone
var didReceiveTerminal = false
do {
for try await delta in responseStream.stream {
try Task.checkCancellation()
switch delta.type {
case .textDelta:
if let text = delta.content {
continuation.yield(text)
if buffersUntilDone {
bufferedText.append(text)
} else {
continuation.yield(text)
}
fullResponse += text
}
case .done where delta.finishReason == .contentFilter:
didReceiveTerminal = true
isContentFiltered = true
let didRollback = self.replaceModelMessages(
modelMessages.droppingLastUserTurn(),
validatingSnapshotIDs: snapshotIDs,
)
guard didRollback else {
throw TachikomaError.invalidConfiguration(
"Conversation changed during streaming; refusing to merge response",
)
}
fullResponse = ""
bufferedText.removeAll()
case .done:
didReceiveTerminal = true
if buffersUntilDone {
for text in bufferedText {
continuation.yield(text)
}
didApproveBufferedResponse = true
bufferedText.removeAll()
}
default:
break
}
}
if buffersUntilDone, !didReceiveTerminal, !bufferedText.isEmpty {
throw TachikomaError.apiError("Stream ended before provider completion status was received")
}
// Add the full response to the conversation
if !fullResponse.isEmpty {
if !isContentFiltered, !fullResponse.isEmpty, didApproveBufferedResponse {
try Task.checkCancellation()
self.addAssistantMessage(fullResponse)
}
continuation.finish()
@ -150,10 +414,20 @@ public final class Conversation: @unchecked Sendable {
continuation.finish(throwing: error)
}
}
continuation.onTermination = { @Sendable _ in
producer.cancel()
}
}
}
}
extension [ModelMessage] {
func droppingLastUserTurn() -> [ModelMessage] {
guard self.last?.role == .user else { return self }
return Array(self.dropLast())
}
}
/// A message in a conversation
@available(macOS 13.0, iOS 16.0, watchOS 9.0, tvOS 16.0, *)
public struct ConversationMessage: Sendable, Codable, Equatable {
@ -161,6 +435,9 @@ public struct ConversationMessage: Sendable, Codable, Equatable {
public let role: Role
public let content: String
public let timestamp: Date
public let contentParts: [ModelMessage.ContentPart]?
public let channel: ResponseChannel?
public let metadata: MessageMetadata?
public enum Role: String, Sendable, Codable, CaseIterable {
case system
@ -169,11 +446,22 @@ public struct ConversationMessage: Sendable, Codable, Equatable {
case tool
}
public init(id: String = UUID().uuidString, role: Role, content: String, timestamp: Date = Date()) {
public init(
id: String = UUID().uuidString,
role: Role,
content: String,
timestamp: Date = Date(),
contentParts: [ModelMessage.ContentPart]? = nil,
channel: ResponseChannel? = nil,
metadata: MessageMetadata? = nil,
) {
self.id = id
self.role = role
self.content = content
self.timestamp = timestamp
self.contentParts = contentParts
self.channel = channel
self.metadata = metadata
}
/// Convert to ModelMessage for API compatibility
@ -189,8 +477,10 @@ public struct ConversationMessage: Sendable, Codable, Equatable {
return ModelMessage(
id: self.id,
role: modelRole,
content: [.text(self.content)],
content: self.contentParts ?? [.text(self.content)],
timestamp: self.timestamp,
channel: self.channel,
metadata: self.metadata,
)
}
@ -219,6 +509,9 @@ public struct ConversationMessage: Sendable, Codable, Equatable {
role: role,
content: textContent,
timestamp: modelMessage.timestamp,
contentParts: modelMessage.content,
channel: modelMessage.channel,
metadata: modelMessage.metadata,
)
}
}

View File

@ -191,7 +191,7 @@ struct MCPClientTests {
let imageData = Data("test".utf8)
let imageResponse = ToolResponse.image(data: imageData, mimeType: "image/png")
#expect(imageResponse.content.count == 1)
if case .image(data: let data, mimeType: let mimeType, annotations: _, _meta: _) = imageResponse.content.first {
if case let .image(data: data, mimeType: mimeType, annotations: _, _meta: _) = imageResponse.content.first {
#expect(data == imageData.base64EncodedString())
#expect(mimeType == "image/png")
} else {

View File

@ -130,7 +130,7 @@ struct MCPToolAdapterTests {
#expect(response.isError == false)
#expect(response.content.count == 1)
if case .image(data: let data, mimeType: let mimeType, annotations: _, _meta: _) = response.content[0] {
if case let .image(data: data, mimeType: mimeType, annotations: _, _meta: _) = response.content[0] {
#expect(data == imageData.base64EncodedString())
#expect(mimeType == "image/jpeg")
} else {

View File

@ -120,6 +120,30 @@ struct CustomProviderRegistryTests {
let compatibleClaude = try #require(resolvedClaude as? AnthropicCompatibleProvider)
#expect(compatibleClaude.apiKey == "claude-provider-key")
let fableModel = LanguageModel.custom(
provider: DynamicCustomProvider(modelId: "claude-proxy/claude-fable-5"),
)
#expect(fableModel.supportsStreaming == false)
let resolvedFable = try ProviderFactory.createProvider(
for: fableModel,
configuration: TachikomaConfiguration(loadFromEnvironment: false),
)
let compatibleFable = try #require(resolvedFable as? AnthropicCompatibleProvider)
#expect(compatibleFable.capabilities.supportsStreaming == false)
let directFableProvider = DynamicCustomProvider(
modelId: "claude-fable-5",
capabilities: ModelCapabilities(supportsStreaming: false),
)
let directFableModel = LanguageModel.custom(provider: directFableProvider)
#expect(directFableModel.supportsStreaming == false)
let unrelatedFableNamedProvider = DynamicCustomProvider(
modelId: "local-claude-fable-5-benchmark",
capabilities: ModelCapabilities(supportsStreaming: true),
)
#expect(LanguageModel.custom(provider: unrelatedFableNamedProvider).supportsStreaming == true)
#expect(CustomProviderRegistry.shared.get("missing") == nil)
}
@ -175,10 +199,11 @@ private final class DynamicCustomProvider: ModelProvider {
let modelId: String
let baseURL: String? = nil
let apiKey: String? = nil
let capabilities = ModelCapabilities()
let capabilities: ModelCapabilities
init(modelId: String) {
init(modelId: String, capabilities: ModelCapabilities = ModelCapabilities()) {
self.modelId = modelId
self.capabilities = capabilities
}
func generateText(request _: ProviderRequest) async throws -> ProviderResponse {

File diff suppressed because it is too large Load Diff

View File

@ -76,7 +76,7 @@ struct LanguageModelCoverageTests {
func `LanguageModel top level switches`() {
let baseModels: [LanguageModel] = [
.openai(.gpt55),
.anthropic(.opus48),
.anthropic(.fable5),
.google(.gemini35Flash),
.mistral(.medium35),
.groq(.llama3370b),

View File

@ -42,6 +42,124 @@ struct MinimalModernAPITests {
}
}
@Test
func `Streaming default value`() {
#expect(Model.default.supportsStreaming == false)
#expect(Model.defaultStreaming == .openai(.gpt55))
#expect(Model.defaultStreaming.supportsStreaming == true)
}
@Test
func `Agent default model preserves execution default`() {
let agent = Agent(name: "test", instructions: "test", context: ())
#expect(agent.model == .default)
}
@Test
func `Agent stream uses streaming fallback for execution default`() async throws {
let seenModel = MinimalModelBox()
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { model, _ in
seenModel.model = model
return MinimalStreamingProvider(deltas: [
.text("ok"),
.done(finishReason: .stop),
])
}
let agent = Agent(name: "test", instructions: "test", configuration: config, context: ())
let stream = try await agent.stream("hi")
var received = ""
for try await delta in stream where delta.type == .textDelta {
received += delta.content ?? ""
}
#expect(agent.model == .default)
#expect(seenModel.model == .openai(.gpt55))
#expect(!received.isEmpty)
}
@Test
func `Agent stream rejects explicit execution default`() async throws {
let agent = Agent(name: "test", instructions: "test", model: .default, context: ())
await #expect(throws: TachikomaError.self) {
_ = try await agent.stream("hi")
}
}
@Test
func `Agent stream rejects nonstreaming model after mutation`() async throws {
let agent = Agent(name: "test", instructions: "test", context: ())
agent.model = .anthropic(.fable5)
await #expect(throws: TachikomaError.self) {
_ = try await agent.stream("hi")
}
}
@Test
func `Agent stream flushes buffered text on natural completion`() async throws {
let provider = MinimalStreamingProvider(deltas: [
.text("ok"),
])
let agent = Agent(
name: "test",
instructions: "test",
model: .custom(provider: provider),
context: (),
)
let stream = try await agent.stream("hi")
var received = ""
for try await delta in stream where delta.type == .textDelta {
received += delta.content ?? ""
}
#expect(received == "ok")
}
@Test
func `Agent stream flushes buffered compatible text when done has no finish reason`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in
MinimalStreamingProvider(deltas: [
.text("ok"),
.done(),
])
}
let agent = Agent(
name: "test",
instructions: "test",
model: .openaiCompatible(modelId: "gpt-compatible", baseURL: "https://example.test"),
configuration: config,
context: (),
)
let stream = try await agent.stream("hi")
var received = ""
for try await delta in stream where delta.type == .textDelta {
received += delta.content ?? ""
}
#expect(received == "ok")
#expect(agent.conversation.messages.map(\.content) == ["test", "hi", "ok"])
}
@Test
func `Agent conversation uses agent configuration`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in
MinimalStaticProvider(response: ProviderResponse(text: "configured", finishReason: .stop))
}
let agent = Agent(name: "test", instructions: "test", configuration: config, context: ())
let text = try await agent.conversation.continueConversation(using: .openai(.gpt55))
#expect(text == "configured")
}
// MARK: - Tool System Tests
@Test
@ -126,4 +244,757 @@ extension MinimalModernAPITests {
#expect(message.role == .user)
#expect(message.content == "Test")
}
@Test
func `Conversation preserves signed thinking messages`() {
let conversation = Conversation()
let signedThinking = ModelMessage(
role: .assistant,
content: [.text("private reasoning")],
channel: .thinking,
metadata: .init(customData: [
"anthropic.thinking.signature": "sig",
"anthropic.thinking.type": "thinking",
]),
)
conversation.replaceModelMessages([.user("hi"), signedThinking, .assistant("hello")])
let messages = conversation.getModelMessages()
#expect(messages.count == 3)
#expect(messages[1] == signedThinking)
#expect(conversation.messages[1].content == "private reasoning")
}
@Test
func `Conversation merge preserves messages appended after snapshot`() {
let conversation = Conversation()
conversation.addUserMessage("original")
let snapshotCount = conversation.messages.count
conversation.addUserMessage("concurrent")
conversation.mergeGeneratedMessages(
[.user("original"), .assistant("generated")],
replacingPrefixCount: snapshotCount,
)
let messages = conversation.getModelMessages()
#expect(messages.map(\.role) == [.user, .assistant, .user])
if case let .text(text) = messages[2].content.first {
#expect(text == "concurrent")
} else {
Issue.record("Expected preserved concurrent user message")
}
}
@Test
func `Conversation refusal rollback preserves messages appended after snapshot`() {
let conversation = Conversation()
conversation.addUserMessage("blocked")
let snapshotIDs = conversation.messages.map(\.id)
conversation.addUserMessage("concurrent")
let didReplace = conversation.replaceModelMessages([], validatingSnapshotIDs: snapshotIDs)
#expect(didReplace == true)
#expect(conversation.messages.map(\.content) == ["concurrent"])
}
@Test
func `Conversation lock removes cancelled waiters`() async throws {
let conversation = Conversation()
let probe = ConversationLockProbe()
let first = Task {
try await conversation.withContinuationLock {
await probe.markFirstStarted()
await probe.waitForRelease()
}
}
await probe.waitUntilFirstStarted()
let second = Task {
try await conversation.withContinuationLock {
await probe.markSecondRan()
}
}
try await Task.sleep(nanoseconds: 10_000_000)
second.cancel()
do {
try await second.value
Issue.record("Expected queued waiter to be cancelled")
} catch is CancellationError {
// Expected
}
await probe.releaseFirst()
try await first.value
try await conversation.withContinuationLock {
await probe.markThirdRan()
}
#expect(await probe.secondRan == false)
#expect(await probe.thirdRan == true)
}
@Test
func `Conversation append generated messages preserves concurrent appends`() {
let conversation = Conversation()
conversation.addUserMessage("original")
let anchorID = conversation.messages[0].id
conversation.addUserMessage("concurrent")
conversation.appendGeneratedMessages([.assistant("generated")], afterMessageID: anchorID)
#expect(conversation.messages.map(\.content) == ["original", "generated", "concurrent"])
}
@Test
func `Conversation continue persists generated message from empty history`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in
MinimalStaticProvider(response: ProviderResponse(text: "hello", finishReason: .stop))
}
let conversation = Conversation(configuration: config)
let text = try await conversation.continueConversation(using: .anthropic(.opus48))
#expect(text == "hello")
#expect(conversation.messages.map(\.content) == ["hello"])
}
@Test
func `Conversation continue rolls back refused trailing user turn`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in
MinimalStaticProvider(response: ProviderResponse(text: "Refused by policy", finishReason: .contentFilter))
}
let conversation = Conversation(configuration: config)
conversation.addUserMessage("blocked")
let text = try await conversation.continueConversation(using: .anthropic(.fable5))
#expect(text.isEmpty)
#expect(conversation.messages.isEmpty)
}
@Test
func `Conversation continue preserves completed tool history after late refusal`() async throws {
let provider = MinimalSequenceProvider(responses: [
ProviderResponse(
text: "",
finishReason: .toolCalls,
toolCalls: [AgentToolCall(id: "call-1", name: "side_effect", arguments: [:])],
),
ProviderResponse(text: "Refused by policy", finishReason: .contentFilter),
])
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in provider }
let conversation = Conversation(configuration: config)
conversation.addUserMessage("do it")
let text = try await conversation.continueConversation(
using: .anthropic(.fable5),
tools: [sideEffectTool],
maxSteps: 2,
)
#expect(text.isEmpty)
let messages = conversation.getModelMessages()
#expect(messages.map(\.role) == [.user, .assistant, .tool])
#expect(messages[0].content == [.text("do it")])
#expect(messages[1].content.contains { part in
if case let .toolCall(toolCall) = part {
return toolCall.id == "call-1"
}
return false
})
#expect(messages[2].content.contains { part in
if case let .toolResult(toolResult) = part {
return toolResult.toolCallId == "call-1"
}
return false
})
}
@Test
func `Agent stream rejects non-streaming model before mutating conversation`() async throws {
let agent = Agent(
name: "test",
instructions: "test",
model: .anthropic(.fable5),
context: (),
)
await #expect(throws: TachikomaError.self) {
_ = try await agent.stream("hi")
}
#expect(agent.conversation.messages.map(\.content) == ["test"])
}
@Test
func `Conversation streaming rolls back refused trailing user turn`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in
MinimalStreamingProvider(deltas: [
.text("partial"),
.done(finishReason: .contentFilter),
])
}
let conversation = Conversation(configuration: config)
conversation.addUserMessage("blocked")
let stream = try await conversation.continueConversationStreaming(using: .openai(.gpt55))
var received = ""
for try await chunk in stream {
received += chunk
}
#expect(received == "partial")
#expect(conversation.messages.isEmpty)
}
@Test
func `Conversation streaming flushes buffered text on natural completion`() async throws {
let provider = MinimalStreamingProvider(deltas: [
.text("ok"),
])
let conversation = Conversation(configuration: TachikomaConfiguration(loadFromEnvironment: false))
let stream = try await conversation.continueConversationStreaming(using: .custom(provider: provider))
var received = ""
for try await chunk in stream {
received += chunk
}
#expect(received == "ok")
#expect(conversation.messages.map(\.content) == ["ok"])
}
@Test
func `Conversation streaming flushes buffered compatible text when done has no finish reason`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in
MinimalStreamingProvider(deltas: [
.text("ok"),
.done(),
])
}
let conversation = Conversation(configuration: config)
let stream = try await conversation.continueConversationStreaming(
using: .openaiCompatible(modelId: "gpt-compatible", baseURL: "https://example.test"),
)
var received = ""
for try await chunk in stream {
received += chunk
}
#expect(received == "ok")
#expect(conversation.messages.map(\.content) == ["ok"])
}
@Test
func `Conversation streaming flushes compatible text when stream ends without done`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in
MinimalStreamingProvider(deltas: [
.text("partial"),
])
}
let conversation = Conversation(configuration: config)
let stream = try await conversation.continueConversationStreaming(
using: .openaiCompatible(modelId: "gpt-compatible", baseURL: "https://example.test"),
)
var received = ""
for try await chunk in stream {
received += chunk
}
#expect(received == "partial")
#expect(conversation.messages.map(\.content) == ["partial"])
}
}
@Suite(.serialized)
private struct AgentRefusalTests {
@Test
func `Agent execute rolls back refused user turn`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in
MinimalStaticProvider(response: ProviderResponse(text: "Refused by policy", finishReason: .contentFilter))
}
let agent = Agent(
name: "test",
instructions: "test",
model: .anthropic(.fable5),
configuration: config,
context: (),
)
let response = try await agent.execute("blocked")
#expect(response.text.isEmpty)
#expect(response.finishReason == .contentFilter)
#expect(agent.conversation.messages.map(\.content) == ["test"])
}
@Test
func `Agent stream stays incremental by default when terminal content filter arrives`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in
MinimalStreamingProvider(deltas: [
.text("partial"),
.done(finishReason: .contentFilter),
])
}
let agent = Agent(
name: "test",
instructions: "test",
model: .openai(.gpt55),
configuration: config,
context: (),
)
let stream = try await agent.stream("blocked")
var received: [TextStreamDelta] = []
for try await delta in stream {
received.append(delta)
}
#expect(received.contains { $0.type == .textDelta && $0.content == "partial" })
#expect(received.contains { $0.type == .done && $0.finishReason == .contentFilter })
#expect(agent.conversation.messages.map(\.content) == ["test"])
}
@Test
func `Agent stream explicit terminal buffering errors when stream ends without done`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in
MinimalStreamingProvider(deltas: [
.text("partial"),
])
}
let agent = Agent(
name: "test",
instructions: "test",
model: .openaiCompatible(modelId: "gpt-compatible", baseURL: "https://example.test"),
settings: GenerationSettings(streamBuffering: .untilTerminal),
configuration: config,
context: (),
)
let stream = try await agent.stream("hi")
do {
for try await _ in stream {}
Issue.record("Expected missing terminal status error")
} catch let error as TachikomaError {
guard case let .apiError(message) = error else {
Issue.record("Expected apiError, got \(error)")
return
}
#expect(message.contains("completion status"))
}
#expect(!agent.conversation.messages.map(\.content).contains("partial"))
}
@Test
func `Agent stream explicit terminal buffering suppresses Azure OpenAI refusals`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in
MinimalStreamingProvider(deltas: [
.text("partial"),
.done(finishReason: .contentFilter),
])
}
let agent = Agent(
name: "test",
instructions: "test",
model: .azureOpenAI(deployment: "gpt-compatible", endpoint: "https://example.openai.azure.com"),
settings: GenerationSettings(streamBuffering: .untilTerminal),
configuration: config,
context: (),
)
let stream = try await agent.stream("blocked")
var received: [TextStreamDelta] = []
for try await delta in stream {
received.append(delta)
}
#expect(!received.contains { $0.type == .textDelta && $0.content == "partial" })
#expect(received.contains { $0.type == .done && $0.finishReason == .contentFilter })
#expect(agent.conversation.messages.map(\.content) == ["test"])
}
@Test
func `Agent stream explicit terminal buffering suppresses Google refusals`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in
MinimalStreamingProvider(deltas: [
.text("partial"),
.done(finishReason: .contentFilter),
])
}
let agent = Agent(
name: "test",
instructions: "test",
model: .google(.gemini25Flash),
settings: GenerationSettings(streamBuffering: .untilTerminal),
configuration: config,
context: (),
)
let stream = try await agent.stream("blocked")
var received: [TextStreamDelta] = []
for try await delta in stream {
received.append(delta)
}
#expect(!received.contains { $0.type == .textDelta && $0.content == "partial" })
#expect(received.contains { $0.type == .done && $0.finishReason == .contentFilter })
#expect(agent.conversation.messages.map(\.content) == ["test"])
}
@Test
func `Agent stream explicit terminal buffering suppresses registered custom OpenAI refusals`() async throws {
try await self.withRegisteredCustomProvider(
"""
{
"customProviders": {
"proxy": {
"type": "openai",
"options": { "baseURL": "https://example.test/v1" }
}
}
}
""",
) {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in
MinimalStreamingProvider(
modelId: "proxy/gpt-compatible",
deltas: [
.text("partial"),
.done(finishReason: .contentFilter),
],
)
}
let agent = Agent(
name: "test",
instructions: "test",
model: .custom(provider: MinimalStreamingProvider(modelId: "proxy/gpt-compatible", deltas: [])),
settings: GenerationSettings(streamBuffering: .untilTerminal),
configuration: config,
context: (),
)
let stream = try await agent.stream("blocked")
var received: [TextStreamDelta] = []
for try await delta in stream {
received.append(delta)
}
#expect(!received.contains { $0.type == .textDelta && $0.content == "partial" })
#expect(received.contains { $0.type == .done && $0.finishReason == .contentFilter })
#expect(agent.conversation.messages.map(\.content) == ["test"])
}
}
@Test
func `Agent stream releases continuation gate when consumer stops early`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in
StallingStreamingProvider()
}
let agent = Agent(
name: "test",
instructions: "test",
model: .custom(provider: StallingStreamingProvider()),
configuration: config,
context: (),
)
do {
let stream = try await agent.stream("first")
var iterator = stream.makeAsyncIterator()
let firstDelta = try await iterator.next()
#expect(firstDelta?.type == .textDelta)
#expect(firstDelta?.content == "partial")
}
try await Task.sleep(nanoseconds: 10_000_000)
let response = try await withTimeout(0.2) {
try await agent.execute("second")
}
#expect(response.text == "after")
}
@Test
func `Agent execute preserves completed tool history after late refusal`() async throws {
let provider = MinimalSequenceProvider(responses: [
ProviderResponse(
text: "",
finishReason: .toolCalls,
toolCalls: [AgentToolCall(id: "call-1", name: "side_effect", arguments: [:])],
),
ProviderResponse(text: "Refused by policy", finishReason: .contentFilter),
])
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setProviderFactoryOverride { _, _ in provider }
let agent = Agent(
name: "test",
instructions: "test",
model: .anthropic(.fable5),
tools: [sideEffectTool],
configuration: config,
context: (),
)
let response = try await agent.execute("do it")
#expect(response.text.isEmpty)
#expect(response.finishReason == .contentFilter)
let messages = agent.conversation.getModelMessages()
#expect(messages.map(\.role) == [.system, .user, .assistant, .tool])
#expect(messages[1].content == [.text("do it")])
#expect(messages[2].content.contains { part in
if case let .toolCall(toolCall) = part {
return toolCall.id == "call-1"
}
return false
})
#expect(messages[3].content.contains { part in
if case let .toolResult(toolResult) = part {
return toolResult.toolCallId == "call-1"
}
return false
})
}
private func withRegisteredCustomProvider(
_ configJSON: String,
operation: () async throws -> Void,
) async throws {
let originalProfile = TachikomaConfiguration.profileDirectoryName
let tempProfile = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString)
let emptyProfile = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString)
try FileManager.default.createDirectory(at: tempProfile, withIntermediateDirectories: true)
try FileManager.default.createDirectory(at: emptyProfile, withIntermediateDirectories: true)
try configJSON.write(to: tempProfile.appendingPathComponent("config.json"), atomically: true, encoding: .utf8)
try #"{"customProviders":{}}"#.write(
to: emptyProfile.appendingPathComponent("config.json"),
atomically: true,
encoding: .utf8,
)
TachikomaConfiguration.profileDirectoryName = tempProfile.path
CustomProviderRegistry.shared.loadFromProfile()
do {
try await operation()
TachikomaConfiguration.profileDirectoryName = emptyProfile.path
CustomProviderRegistry.shared.loadFromProfile()
TachikomaConfiguration.profileDirectoryName = originalProfile
} catch {
TachikomaConfiguration.profileDirectoryName = emptyProfile.path
CustomProviderRegistry.shared.loadFromProfile()
TachikomaConfiguration.profileDirectoryName = originalProfile
throw error
}
}
}
private struct StallingStreamingProvider: ModelProvider {
let modelId = "stalling-streaming"
let baseURL: String? = nil
let apiKey: String? = nil
let capabilities = ModelCapabilities(supportsStreaming: true)
func generateText(request _: ProviderRequest) async throws -> ProviderResponse {
ProviderResponse(text: "after")
}
func streamText(request _: ProviderRequest) async throws -> AsyncThrowingStream<TextStreamDelta, Error> {
AsyncThrowingStream { continuation in
continuation.yield(.text("partial"))
}
}
}
private actor ConversationLockProbe {
var secondRan = false
var thirdRan = false
private var firstStarted = false
private var firstStartedWaiters: [CheckedContinuation<Void, Never>] = []
private var releaseWaiters: [CheckedContinuation<Void, Never>] = []
func markFirstStarted() {
self.firstStarted = true
let waiters = self.firstStartedWaiters
self.firstStartedWaiters.removeAll()
for waiter in waiters {
waiter.resume()
}
}
func waitUntilFirstStarted() async {
if self.firstStarted {
return
}
await withCheckedContinuation { continuation in
self.firstStartedWaiters.append(continuation)
}
}
func waitForRelease() async {
await withCheckedContinuation { continuation in
self.releaseWaiters.append(continuation)
}
}
func releaseFirst() {
let waiters = self.releaseWaiters
self.releaseWaiters.removeAll()
for waiter in waiters {
waiter.resume()
}
}
func markSecondRan() {
self.secondRan = true
}
func markThirdRan() {
self.thirdRan = true
}
}
private final class MinimalModelBox: @unchecked Sendable {
private let lock = NSLock()
private var _model: LanguageModel?
var model: LanguageModel? {
get {
self.lock.lock()
defer { self.lock.unlock() }
return self._model
}
set {
self.lock.lock()
self._model = newValue
self.lock.unlock()
}
}
}
private struct MinimalStaticProvider: ModelProvider {
let modelId = "minimal-static"
let baseURL: String? = nil
let apiKey: String? = nil
let capabilities = ModelCapabilities()
let response: ProviderResponse
func generateText(request _: ProviderRequest) async throws -> ProviderResponse {
self.response
}
func streamText(request _: ProviderRequest) async throws -> AsyncThrowingStream<TextStreamDelta, Error> {
AsyncThrowingStream { continuation in
continuation.finish()
}
}
}
private struct MinimalStreamingProvider: ModelProvider {
let modelId: String
let baseURL: String? = nil
let apiKey: String? = nil
let capabilities = ModelCapabilities(supportsStreaming: true)
let deltas: [TextStreamDelta]
init(modelId: String = "minimal-streaming", deltas: [TextStreamDelta]) {
self.modelId = modelId
self.deltas = deltas
}
func generateText(request _: ProviderRequest) async throws -> ProviderResponse {
ProviderResponse(text: "")
}
func streamText(request _: ProviderRequest) async throws -> AsyncThrowingStream<TextStreamDelta, any Error> {
AsyncThrowingStream { continuation in
for delta in self.deltas {
continuation.yield(delta)
}
continuation.finish()
}
}
}
private struct MinimalSequenceProvider: ModelProvider {
let modelId = "minimal-sequence"
let baseURL: String? = nil
let apiKey: String? = nil
let capabilities = ModelCapabilities()
private let queue: MinimalResponseQueue
init(responses: [ProviderResponse]) {
self.queue = MinimalResponseQueue(responses: responses)
}
func generateText(request _: ProviderRequest) async throws -> ProviderResponse {
self.queue.next()
}
func streamText(request _: ProviderRequest) async throws -> AsyncThrowingStream<TextStreamDelta, Error> {
AsyncThrowingStream { continuation in
continuation.finish()
}
}
}
private final class MinimalResponseQueue: @unchecked Sendable {
private let lock = NSLock()
private var responses: [ProviderResponse]
init(responses: [ProviderResponse]) {
self.responses = responses
}
func next() -> ProviderResponse {
self.lock.lock()
defer { self.lock.unlock() }
if self.responses.count > 1 {
return self.responses.removeFirst()
}
return self.responses[0]
}
}
private let sideEffectTool = Tachikoma.createTool(
name: "side_effect",
description: "Records an external action",
parameters: [],
required: [],
) { _ in
AnyAgentToolValue(string: "done")
}

View File

@ -63,6 +63,7 @@ enum ModelCapabilitiesTests {
@Test
func `Claude models support thinking`() {
let models: [LanguageModel] = [
.anthropic(.fable5),
.anthropic(.opus47),
.anthropic(.opus4),
.anthropic(.sonnet46),
@ -79,8 +80,8 @@ enum ModelCapabilitiesTests {
}
@Test
func `Claude Opus 4_7 and 4_8 advertise adaptive thinking without sampling options`() {
for model in [LanguageModel.anthropic(.opus47), .anthropic(.opus48)] {
func `Claude Fable 5 and Opus 4_7 plus 4_8 advertise adaptive thinking without sampling options`() {
for model in [LanguageModel.anthropic(.fable5), .anthropic(.opus47), .anthropic(.opus48)] {
let capabilities = ModelCapabilityRegistry.shared.capabilities(for: model)
#expect(!capabilities.supportsTemperature)
@ -196,6 +197,19 @@ enum ModelCapabilitiesTests {
#expect(validated.providerOptions.openai?.previousResponseId == "test-123") // Kept
}
@Test
func `Validate settings preserves stream buffering mode`() {
let settings = GenerationSettings(
temperature: 0.7,
streamBuffering: .untilTerminal,
)
let validated = settings.validated(for: .openai(.gpt55))
#expect(validated.temperature == nil)
#expect(validated.streamBuffering == .untilTerminal)
}
@Test
func `Validate settings for GPT-5 strips unsupported options`() {
let settings = GenerationSettings(
@ -290,6 +304,49 @@ enum ModelCapabilitiesTests {
#expect(validated.providerOptions.anthropic?.thinking != nil)
#expect(validated.providerOptions.anthropic?.cacheControl == .persistent)
}
@Test
func `Validate Anthropic-compatible Fable strips unsupported sampling`() {
let settings = GenerationSettings(
temperature: 0.7,
topP: 0.9,
topK: 40,
providerOptions: .init(
anthropic: .init(thinking: .adaptive),
),
)
let validated = settings.validated(for: LanguageModel.anthropicCompatible(
modelId: "claude-fable-5",
baseURL: "https://example.test",
))
#expect(validated.temperature == nil)
#expect(validated.topP == nil)
#expect(validated.topK == nil)
#expect(validated.providerOptions.anthropic?.thinking != nil)
}
@Test
func `Validate direct custom Fable strips unsupported sampling`() {
let settings = GenerationSettings(
temperature: 0.7,
topP: 0.9,
topK: 40,
providerOptions: .init(
anthropic: .init(thinking: .adaptive),
),
)
let validated = settings.validated(
for: LanguageModel.anthropic(.custom("anthropic.claude-fable-5")),
)
#expect(validated.temperature == nil)
#expect(validated.topP == nil)
#expect(validated.topK == nil)
#expect(validated.providerOptions.anthropic?.thinking != nil)
}
}
struct CustomModelTests {

View File

@ -43,10 +43,19 @@ struct ModelParsingTests {
}
}
@Test
func `parse Claude Fable 5 model id`() throws {
#expect(LanguageModel.parse(from: "claude-fable-5") == .anthropic(.fable5))
#expect(LanguageModel.parse(from: "fable") == .anthropic(.fable5))
#expect(try ModelSelector.parseModel("fable5") == .anthropic(.fable5))
#expect(LanguageModel.parse(from: "my-fable5-7b") == nil)
}
@Test
func `parse Claude Opus 4.8 model id`() {
let parsed = LanguageModel.parse(from: "claude-opus-4-8")
#expect(parsed == .anthropic(.opus48))
#expect(LanguageModel.parse(from: "my-opus48-distill") == nil)
}
@Test
@ -56,9 +65,10 @@ struct ModelParsingTests {
}
@Test
func `parse shorthand Claude alias`() {
func `parse shorthand Claude alias`() throws {
let parsed = LanguageModel.parse(from: "claude")
#expect(parsed == .anthropic(.opus48))
#expect(try ModelSelector.parseModel("anthropic") == .anthropic(.opus48))
}
@Test
@ -75,6 +85,7 @@ struct ModelParsingTests {
@Test
func `parse provider qualified latest hosted models`() throws {
#expect(LanguageModel.parse(from: "anthropic/claude-fable-5") == .anthropic(.fable5))
#expect(LanguageModel.parse(from: "anthropic/claude-opus-4-8") == .anthropic(.opus48))
#expect(LanguageModel.parse(from: "google/gemini-3.5-flash") == .google(.gemini35Flash))
#expect(LanguageModel.parse(from: "xai/grok-4.3-latest") == .grok(.grok43))

View File

@ -121,6 +121,49 @@ struct OpenAICompatibleHelperTests {
#expect(deltas == "Hello world")
}
@Test
func `streamText maps content filter finish reasons`() async throws {
let request = ProviderRequest(
messages: [ModelMessage(role: .user, content: [.text("blocked")])],
)
let deltas = try await withMockedSession { urlRequest in
let sse = """
data: {\"id\":\"chunk_1\",\"choices\":[{\"delta\":{\"content\":\"partial\"},\"index\":0,\"finish_reason\":null}]}
data: {\"id\":\"chunk_2\",\"choices\":[{\"delta\":{},\"index\":0,\"finish_reason\":\"content_filter\"}]}
data: [DONE]
""".utf8Data()
let response = HTTPURLResponse(
url: urlRequest.url!,
statusCode: 200,
httpVersion: nil,
headerFields: ["Content-Type": "text/event-stream"],
)!
return (response, sse)
} operation: { session in
let stream = try await OpenAICompatibleHelper.streamText(
request: request,
modelId: "compatible-model",
baseURL: "https://mock.compatible",
apiKey: "sk-test",
providerName: "TestProvider",
session: session,
)
var deltas: [TextStreamDelta] = []
for try await delta in stream {
deltas.append(delta)
}
return deltas
}
#expect(deltas.contains { $0.type == .textDelta && $0.content == "partial" })
#expect(deltas.contains { $0.type == .done && $0.finishReason == .contentFilter })
}
@Test
func `OpenAI-compatible provider forwards configured headers`() async throws {
let request = ProviderRequest(
@ -149,6 +192,266 @@ struct OpenAICompatibleHelperTests {
}
}
@Test
func `generateText decodes OpenRouter reasoning details`() async throws {
let response = try await withMockedSession { urlRequest in
let reasoningDetails: [[String: String]] = [["type": "reasoning.encrypted", "data": "sealed"]]
let toolCall: [String: Any] = [
"id": "call-1",
"type": "function",
"function": ["name": "lookup", "arguments": "{}"],
]
let toolCalls = [toolCall]
let choice: [String: Any] = [
"index": 0,
"message": [
"role": "assistant",
"content": NSNull(),
"reasoning_details": reasoningDetails,
"tool_calls": toolCalls,
],
"finish_reason": "tool_calls",
]
let payload: [String: Any] = [
"id": "chatcmpl-test",
"object": "chat.completion",
"created": 1_700_000_000,
"model": "anthropic/claude-fable-5",
"choices": [choice],
]
return try self.jsonResponse(for: urlRequest, data: JSONSerialization.data(withJSONObject: payload))
} operation: { session in
try await OpenAICompatibleHelper.generateText(
request: ProviderRequest(messages: [.user("hi")]),
modelId: "anthropic/claude-fable-5",
baseURL: "https://mock.compatible",
apiKey: "sk-test",
providerName: "OpenRouter",
session: session,
)
}
let reasoning = try #require(response.reasoning.first)
#expect(reasoning.type == "openrouter_reasoning_details")
#expect(reasoning.rawJSON?.contains("reasoning.encrypted") == true)
#expect(response.toolCalls?.first?.id == "call-1")
}
@Test
func `generateText strips unsupported Fable sampling for OpenRouter route`() async throws {
let capture = CapturedRequest()
let request = ProviderRequest(
messages: [ModelMessage(role: .user, content: [.text("ping")])],
settings: GenerationSettings(maxTokens: 128, temperature: 0.7),
)
_ = try await self.withMockedSession { urlRequest in
capture.body = self.bodyData(from: urlRequest)
return self.jsonResponse(for: urlRequest, data: Self.chatCompletionPayload(text: "pong"))
} operation: { session in
try await OpenAICompatibleHelper.generateText(
request: request,
modelId: "anthropic/claude-fable-5",
baseURL: "https://mock.compatible",
apiKey: "sk-test",
providerName: "OpenRouter",
session: session,
)
}
let bodyJSON = try #require(capture.body).jsonObject()
#expect(bodyJSON["temperature"] == nil)
#expect(bodyJSON["max_tokens"] as? Int == 128)
}
@Test
func `generateText replays OpenRouter reasoning details on assistant tool messages`() async throws {
let capture = CapturedRequest()
let rawReasoning = #"[{"type":"reasoning.encrypted","data":"sealed"}]"#
let call = AgentToolCall(id: "call-1", name: "lookup", arguments: [:])
let request = try ProviderRequest(messages: [
.user("hi"),
ModelMessage(
role: .assistant,
content: [.text("")],
channel: .thinking,
metadata: .init(customData: [
"openrouter.reasoning_details": rawReasoning,
"tachikoma.reasoning.provider": "openrouter",
"tachikoma.reasoning.model": "anthropic/claude-fable-5",
"tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity
.canonical("https://mock.compatible")),
]),
),
ModelMessage(role: .assistant, content: [.toolCall(call)]),
ModelMessage(
role: .tool,
content: [.toolResult(.success(toolCallId: "call-1", result: AnyAgentToolValue(string: "ok")))],
),
])
_ = try await self.withMockedSession { urlRequest in
capture.body = self.bodyData(from: urlRequest)
return self.jsonResponse(for: urlRequest, data: Self.chatCompletionPayload(text: "done"))
} operation: { session in
try await OpenAICompatibleHelper.generateText(
request: request,
modelId: "anthropic/claude-fable-5",
baseURL: "https://mock.compatible",
apiKey: "sk-test",
providerName: "OpenRouter",
session: session,
)
}
let bodyJSON = try #require(capture.body).jsonObject()
let messages = try #require(bodyJSON["messages"] as? [[String: Any]])
let assistant = try #require(messages.first { $0["role"] as? String == "assistant" })
let details = try #require(assistant["reasoning_details"] as? [[String: Any]])
#expect(details.first?["type"] as? String == "reasoning.encrypted")
#expect(details.first?["data"] as? String == "sealed")
#expect(assistant["tool_calls"] != nil)
}
@Test
func `generateText replays OpenRouter reasoning details on reasoning-only assistant boundary`() async throws {
let capture = CapturedRequest()
let rawReasoning = #"[{"type":"reasoning.encrypted","data":"sealed"}]"#
let request = try ProviderRequest(messages: [
.user("first"),
ModelMessage(
role: .assistant,
content: [.text("")],
channel: .thinking,
metadata: .init(customData: [
"openrouter.reasoning_details": rawReasoning,
"tachikoma.reasoning.provider": "openrouter",
"tachikoma.reasoning.model": "anthropic/claude-fable-5",
"tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity
.canonical("https://mock.compatible")),
]),
),
ModelMessage(
role: .assistant,
content: [.text("")],
metadata: .init(customData: ["tachikoma.internal.boundary": "reasoning_only"]),
),
.user("next"),
])
_ = try await self.withMockedSession { urlRequest in
capture.body = self.bodyData(from: urlRequest)
return self.jsonResponse(for: urlRequest, data: Self.chatCompletionPayload(text: "done"))
} operation: { session in
try await OpenAICompatibleHelper.generateText(
request: request,
modelId: "anthropic/claude-fable-5",
baseURL: "https://mock.compatible",
apiKey: "sk-test",
providerName: "OpenRouter",
session: session,
)
}
let bodyJSON = try #require(capture.body).jsonObject()
let messages = try #require(bodyJSON["messages"] as? [[String: Any]])
let assistantIndex = try #require(messages.firstIndex { $0["role"] as? String == "assistant" })
let assistant = messages[assistantIndex]
let details = try #require(assistant["reasoning_details"] as? [[String: Any]])
#expect(details.first?["data"] as? String == "sealed")
let nextMessage = try #require(messages.indices
.contains(assistantIndex + 1) ? messages[assistantIndex + 1] : nil)
#expect(nextMessage["role"] as? String == "user")
}
@Test
func `generateText does not replay OpenRouter reasoning from another endpoint`() async throws {
let capture = CapturedRequest()
let rawReasoning = #"[{"type":"reasoning.encrypted","data":"sealed"}]"#
let call = AgentToolCall(id: "call-1", name: "lookup", arguments: [:])
let request = try ProviderRequest(messages: [
.user("hi"),
ModelMessage(
role: .assistant,
content: [.text("")],
channel: .thinking,
metadata: .init(customData: [
"openrouter.reasoning_details": rawReasoning,
"tachikoma.reasoning.provider": "openrouter",
"tachikoma.reasoning.model": "anthropic/claude-fable-5",
"tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity
.canonical("https://other.example.test")),
]),
),
ModelMessage(role: .assistant, content: [.toolCall(call)]),
ModelMessage(
role: .tool,
content: [.toolResult(.success(toolCallId: "call-1", result: AnyAgentToolValue(string: "ok")))],
),
])
_ = try await self.withMockedSession { urlRequest in
capture.body = self.bodyData(from: urlRequest)
return self.jsonResponse(for: urlRequest, data: Self.chatCompletionPayload(text: "done"))
} operation: { session in
try await OpenAICompatibleHelper.generateText(
request: request,
modelId: "anthropic/claude-fable-5",
baseURL: "https://mock.compatible",
apiKey: "sk-test",
providerName: "OpenRouter",
session: session,
)
}
let bodyJSON = try #require(capture.body).jsonObject()
let messages = try #require(bodyJSON["messages"] as? [[String: Any]])
let assistantMessages = messages.filter { $0["role"] as? String == "assistant" }
#expect(assistantMessages.allSatisfy { $0["reasoning_details"] == nil })
}
@Test
func `generateText drops unmatched OpenRouter reasoning instead of serializing it as text`() async throws {
let capture = CapturedRequest()
let request = try ProviderRequest(messages: [
.user("hi"),
ModelMessage(
role: .assistant,
content: [.text("private reasoning")],
channel: .thinking,
metadata: .init(customData: [
"openrouter.reasoning": "private reasoning",
"tachikoma.reasoning.provider": "openrouter",
"tachikoma.reasoning.model": "other-model",
"tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity
.canonical("https://mock.compatible")),
]),
),
.assistant("visible"),
])
_ = try await self.withMockedSession { urlRequest in
capture.body = self.bodyData(from: urlRequest)
return self.jsonResponse(for: urlRequest, data: Self.chatCompletionPayload(text: "done"))
} operation: { session in
try await OpenAICompatibleHelper.generateText(
request: request,
modelId: "anthropic/claude-fable-5",
baseURL: "https://mock.compatible",
apiKey: "sk-test",
providerName: "OpenRouter",
session: session,
)
}
let bodyJSON = try #require(capture.body).jsonObject()
let messages = try #require(bodyJSON["messages"] as? [[String: Any]])
let assistantMessages = messages.filter { $0["role"] as? String == "assistant" }
#expect(assistantMessages.count == 1)
#expect(assistantMessages.first?["content"] as? String == "visible")
#expect(try String(data: #require(capture.body), encoding: .utf8)?.contains("private reasoning") == false)
}
@Test
func `non-200 responses surface TachikomaError.apiError`() async {
await self.withMockedSession { urlRequest in

View File

@ -180,6 +180,30 @@ struct StopConditionsTests {
#expect(!collectedText.contains("ignored"))
}
@Test
func `Stop conditions finish immediately after local match`() async throws {
let stream = AsyncThrowingStream<TextStreamDelta, Error> { continuation in
Task {
continuation.yield(TextStreamDelta(type: .textDelta, content: "STOP"))
try? await Task.sleep(nanoseconds: 2_000_000_000)
continuation.yield(TextStreamDelta(type: .textDelta, content: "late"))
continuation.yield(TextStreamDelta(type: .done, finishReason: .length))
continuation.finish()
}
}
let start = Date()
var received: [TextStreamDelta] = []
for try await delta in stream.stopWhen(StringStopCondition("STOP")) {
received.append(delta)
}
#expect(Date().timeIntervalSince(start) < 0.5)
#expect(received.map(\.content).compactMap(\.self) == ["STOP"])
#expect(received.last?.type == .done)
#expect(received.last?.finishReason == .stop)
}
// MARK: - Builder Pattern Tests
@Test

View File

@ -59,6 +59,67 @@ struct ResponseCacheTests {
#expect(cached?.finishReason == .stop)
}
@Test
func `ResponseCache keys include reasoning metadata`() async {
let cache = ResponseCache()
let response = ProviderResponse(text: "cached", usage: nil, finishReason: .stop)
func request(signature: String) -> ProviderRequest {
ProviderRequest(
messages: [
.user("Hello"),
ModelMessage(
role: .assistant,
content: [.text("thinking")],
channel: .thinking,
metadata: .init(customData: [
"anthropic.thinking.signature": signature,
"anthropic.thinking.type": "thinking",
]),
),
.assistant("Hi"),
],
tools: nil,
settings: .default,
)
}
await cache.store(response, for: request(signature: "sig-a"))
#expect(await cache.get(for: request(signature: "sig-a"))?.text == "cached")
#expect(await cache.get(for: request(signature: "sig-b")) == nil)
}
@Test
func `CacheEntry size includes reasoning and assistant messages`() {
let small = CacheEntry(response: ProviderResponse(text: "ok"))
let largePayload = String(repeating: "x", count: 4096)
let large = CacheEntry(response: ProviderResponse(
text: "ok",
reasoning: [
ProviderReasoningBlock(text: largePayload, signature: largePayload, type: "thinking"),
ProviderReasoningBlock(
text: "",
type: "openrouter_reasoning_details",
rawJSON: largePayload,
),
],
assistantMessages: [
ModelMessage(
role: .assistant,
content: [.text(largePayload)],
channel: .thinking,
metadata: .init(customData: [
"anthropic.thinking.model": "claude-fable-5",
"anthropic.thinking.signature": largePayload,
]),
),
],
))
#expect(large.estimatedMemorySize() > small.estimatedMemorySize() + 12000)
}
@Test
func `ResponseCache cache miss`() async {
let cache = ResponseCache()
@ -267,6 +328,99 @@ struct ResponseCacheTests {
#expect(key1.hash != key3.hash)
}
@Test
func `CacheKey includes reasoning effort and Anthropic thinking options`() {
let messages = [ModelMessage.user("Hello")]
let lowEffort = ProviderRequest(
messages: messages,
settings: GenerationSettings(
reasoningEffort: .low,
providerOptions: .init(anthropic: .init(thinking: .adaptive)),
),
)
let highEffort = ProviderRequest(
messages: messages,
settings: GenerationSettings(
reasoningEffort: .high,
providerOptions: .init(anthropic: .init(thinking: .adaptive)),
),
)
let disabledThinking = ProviderRequest(
messages: messages,
settings: GenerationSettings(
reasoningEffort: .low,
providerOptions: .init(anthropic: .init(thinking: .disabled)),
),
)
#expect(CacheKey(from: lowEffort).hash != CacheKey(from: highEffort).hash)
#expect(CacheKey(from: lowEffort).hash != CacheKey(from: disabledThinking).hash)
}
@Test
func `CacheKey includes string stop condition values`() {
let endRequest = ProviderRequest(
messages: [ModelMessage.user("Hello")],
settings: GenerationSettings(stopConditions: StringStopCondition("END")),
)
let stopRequest = ProviderRequest(
messages: [ModelMessage.user("Hello")],
settings: GenerationSettings(stopConditions: StringStopCondition("STOP")),
)
#expect(CacheKey(from: endRequest).hash != CacheKey(from: stopRequest).hash)
}
@Test
func `CacheKey encodes composite stop conditions without delimiter collisions`() async {
let cache = ResponseCache()
let splitRequest = ProviderRequest(
messages: [ModelMessage.user("Hello")],
settings: GenerationSettings(stopConditions: AnyStopCondition(
StringStopCondition("a"),
StringStopCondition("b"),
)),
)
let joinedRequest = ProviderRequest(
messages: [ModelMessage.user("Hello")],
settings: GenerationSettings(stopConditions: AnyStopCondition(
StringStopCondition("a,string:true:b"),
)),
)
#expect(CacheKey(from: splitRequest).hash != CacheKey(from: joinedRequest).hash)
await cache.store(ProviderResponse(text: "split", finishReason: .stop), for: splitRequest)
let joinedCached = await cache.get(for: joinedRequest)
#expect(joinedCached == nil)
}
@Test
func `CacheKey marks custom stop conditions uncacheable`() {
let request = ProviderRequest(
messages: [ModelMessage.user("Hello")],
settings: GenerationSettings(stopConditions: PredicateStopCondition { _, _ in false }),
)
let key = CacheKey(from: request)
#expect(key.isCacheable == false)
}
@Test
func `ResponseCache skips custom stop condition entries`() async {
let cache = ResponseCache()
let request = ProviderRequest(
messages: [ModelMessage.user("Hello")],
settings: GenerationSettings(stopConditions: PredicateStopCondition { _, _ in false }),
)
await cache.store(ProviderResponse(text: "cached", finishReason: .stop), for: request)
let cached = await cache.get(for: request)
#expect(cached == nil)
}
@Test
func `CacheKey includes tools in hash`() {
let tool1 = AgentTool(
@ -362,6 +516,38 @@ struct ResponseCacheTests {
#expect(callCount.value == 1) // Provider not called again
}
@Test
func `CachedProvider keys include provider endpoint identity`() async throws {
let cache = ResponseCache()
let callCountA = Box(value: 0)
let callCountB = Box(value: 0)
var providerA = ResponseCacheMockProvider(
model: .openaiCompatible(modelId: "shared-model", baseURL: "https://gateway.test/v1?tenant=a"),
response: ProviderResponse(text: "tenant-a", usage: nil, finishReason: .stop),
mockModelId: "shared-model",
mockBaseURL: "https://gateway.test/v1?tenant=a",
)
var providerB = ResponseCacheMockProvider(
model: .openaiCompatible(modelId: "shared-model", baseURL: "https://gateway.test/v1?tenant=b"),
response: ProviderResponse(text: "tenant-b", usage: nil, finishReason: .stop),
mockModelId: "shared-model",
mockBaseURL: "https://gateway.test/v1?tenant=b",
)
providerA.onGenerateText = { _ in callCountA.value += 1 }
providerB.onGenerateText = { _ in callCountB.value += 1 }
let cachedA = await cache.wrapProvider(providerA)
let cachedB = await cache.wrapProvider(providerB)
let request = ProviderRequest(messages: [ModelMessage.user("Test")], tools: nil, settings: .default)
#expect(try await cachedA.generateText(request: request).text == "tenant-a")
#expect(try await cachedB.generateText(request: request).text == "tenant-b")
#expect(try await cachedA.generateText(request: request).text == "tenant-a")
#expect(try await cachedB.generateText(request: request).text == "tenant-b")
#expect(callCountA.value == 1)
#expect(callCountB.value == 1)
}
@Test
func `CachedProvider doesn't cache streaming`() async throws {
let cache = ResponseCache()
@ -397,15 +583,17 @@ struct ResponseCacheTests {
private struct ResponseCacheMockProvider: ModelProvider {
let model: LanguageModel
let response: ProviderResponse
let mockModelId: String
let mockBaseURL: String?
var onGenerateText: (@Sendable (ProviderRequest) -> Void)?
var onStreamText: (@Sendable (ProviderRequest) -> Void)?
var modelId: String {
"mock-model"
self.mockModelId
}
var baseURL: String? {
nil
self.mockBaseURL
}
var apiKey: String? {
@ -419,11 +607,15 @@ private struct ResponseCacheMockProvider: ModelProvider {
init(
model: LanguageModel,
response: ProviderResponse,
mockModelId: String = "mock-model",
mockBaseURL: String? = nil,
onGenerateText: (@Sendable (ProviderRequest) -> Void)? = nil,
onStreamText: (@Sendable (ProviderRequest) -> Void)? = nil,
) {
self.model = model
self.response = response
self.mockModelId = mockModelId
self.mockBaseURL = mockBaseURL
self.onGenerateText = onGenerateText
self.onStreamText = onStreamText
}

View File

@ -1,4 +1,7 @@
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Testing
@testable import Tachikoma
@ -22,6 +25,19 @@ struct AnthropicInterleavedDefaultsTests {
#expect(parts.contains("fine-grained-tool-streaming-2025-05-14"))
}
@Test
func `Endpoint identity includes routing query without exposing raw values`() {
let tenantA = ReasoningEndpointIdentity.canonical("https://gateway.test/v1?tenant=a")
let tenantB = ReasoningEndpointIdentity.canonical("https://gateway.test/v1?tenant=b")
#expect(tenantA != tenantB)
#expect(tenantA?.hasPrefix("sha256:") == true)
#expect(tenantA?.contains("tenant") == false)
#expect(tenantA?.contains("gateway") == false)
#expect(ReasoningEndpointIdentity.canonical("https://gateway.test/v1/?tenant=a") == tenantA)
#expect(ReasoningEndpointIdentity.canonical("https://user:secret@gateway.test/v1?tenant=a#frag") == tenantA)
}
@Test
func `Provider request includes beta header and thinking payload`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
@ -126,6 +142,163 @@ struct AnthropicInterleavedDefaultsTests {
#expect(json["max_tokens"] as? Int == 64)
}
@Test
func `Fable 5 request omits thinking config and uses effort output config`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
let provider = try AnthropicProvider(model: .fable5, configuration: config)
let settings = GenerationSettings(
maxTokens: 128_000,
temperature: 0.7,
topP: 0.9,
topK: 40,
reasoningEffort: .high,
providerOptions: .init(anthropic: .init(thinking: .adaptive)),
)
let request = ProviderRequest(
messages: [.user("hi")],
settings: settings,
)
let urlRequest = try provider.makeURLRequest(for: request, stream: false)
let body = try #require(urlRequest.httpBody)
let json = try #require(try JSONSerialization.jsonObject(with: body) as? [String: Any])
#expect(json["model"] as? String == "claude-fable-5")
#expect(json["temperature"] == nil)
#expect(json["top_p"] == nil)
#expect(json["top_k"] == nil)
#expect(json["thinking"] == nil)
let outputConfig = try #require(json["output_config"] as? [String: Any])
#expect(outputConfig["effort"] as? String == "high")
#expect(json["max_tokens"] as? Int == 128_000)
#expect(urlRequest.value(forHTTPHeaderField: "anthropic-beta") == nil)
}
@Test
func `Fable 5 request uses model-aware default output budget`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
let provider = try AnthropicProvider(model: .fable5, configuration: config)
let request = ProviderRequest(messages: [.user("hi")])
let urlRequest = try provider.makeURLRequest(for: request, stream: false)
let body = try #require(urlRequest.httpBody)
let json = try #require(try JSONSerialization.jsonObject(with: body) as? [String: Any])
#expect(json["max_tokens"] as? Int == 16384)
#expect(urlRequest.timeoutInterval == 1800)
}
@Test
func `Fable 5 long output requests extend non-streaming timeout`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
let provider = try AnthropicProvider(model: .fable5, configuration: config)
let urlRequest = try provider.makeURLRequest(
for: ProviderRequest(
messages: [.user("long")],
settings: GenerationSettings(maxTokens: 128_000),
),
stream: false,
)
#expect(urlRequest.timeoutInterval == 1800)
}
@Test
func `Opus long output requests extend non-streaming timeout`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
for model in [LanguageModel.Anthropic.opus47, .opus48] {
let provider = try AnthropicProvider(model: model, configuration: config)
let urlRequest = try provider.makeURLRequest(
for: ProviderRequest(
messages: [.user("long")],
settings: GenerationSettings(maxTokens: 128_000),
),
stream: false,
)
#expect(urlRequest.timeoutInterval == 1800)
}
}
@Test
func `Custom Fable model id uses Fable request defaults`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
let provider = try AnthropicProvider(model: .custom("claude-fable-5"), configuration: config)
let request = ProviderRequest(messages: [.user("hi")])
let urlRequest = try provider.makeURLRequest(for: request, stream: false)
let body = try #require(urlRequest.httpBody)
let json = try #require(try JSONSerialization.jsonObject(with: body) as? [String: Any])
#expect(provider.capabilities.supportsStreaming == false)
#expect(provider.capabilities.contextLength == 1_000_000)
#expect(provider.capabilities.maxOutputTokens == 128_000)
#expect(LanguageModel.anthropic(.custom("claude-fable-5")).supportsStreaming == false)
#expect(LanguageModel.anthropic(.custom("claude-fable-5")).contextLength == 1_000_000)
#expect(LanguageModel.Anthropic.custom("claude-fable-5").maxOutputTokens == 128_000)
#expect(json["model"] as? String == "claude-fable-5")
#expect(json["thinking"] == nil)
#expect(json["max_tokens"] as? Int == 16384)
}
@Test
func `Qualified custom Fable model id uses Fable request defaults`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
let provider = try AnthropicProvider(model: .custom("anthropic.claude-fable-5"), configuration: config)
let request = ProviderRequest(messages: [.user("hi")])
let urlRequest = try provider.makeURLRequest(for: request, stream: false)
let body = try #require(urlRequest.httpBody)
let json = try #require(try JSONSerialization.jsonObject(with: body) as? [String: Any])
#expect(provider.capabilities.supportsStreaming == false)
#expect(provider.capabilities.contextLength == 1_000_000)
#expect(provider.capabilities.maxOutputTokens == 128_000)
#expect(LanguageModel.anthropic(.custom("anthropic.claude-fable-5")).contextLength == 1_000_000)
#expect(LanguageModel.Anthropic.custom("anthropic.claude-fable-5").maxOutputTokens == 128_000)
#expect(json["model"] as? String == "anthropic.claude-fable-5")
#expect(json["thinking"] == nil)
#expect(json["max_tokens"] as? Int == 16384)
}
@Test
func `Fable 5 rejects disabled thinking mode`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
let provider = try AnthropicProvider(model: .fable5, configuration: config)
let settings = GenerationSettings(
maxTokens: 64,
providerOptions: .init(anthropic: .init(thinking: .disabled)),
)
#expect(throws: TachikomaError.self) {
_ = try provider.makeURLRequest(
for: ProviderRequest(messages: [.user("hi")], settings: settings),
stream: false,
)
}
}
@Test
func `Custom Fable model id rejects disabled thinking mode`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
let provider = try AnthropicProvider(model: .custom("claude-fable-5"), configuration: config)
let settings = GenerationSettings(
maxTokens: 64,
providerOptions: .init(anthropic: .init(thinking: .disabled)),
)
#expect(throws: TachikomaError.self) {
_ = try provider.makeURLRequest(
for: ProviderRequest(messages: [.user("hi")], settings: settings),
stream: false,
)
}
}
@Test
func `Opus reasoning effort is kept when thinking is disabled`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
@ -300,6 +473,29 @@ struct AnthropicInterleavedDefaultsTests {
#expect(delta.signature == "sig")
}
@Test
func `Stream delta decodes message_delta stop reason without delta type`() throws {
let data = try #require(
"{\"stop_reason\":\"refusal\",\"stop_sequence\":null}".data(using: .utf8),
)
let delta = try JSONDecoder().decode(AnthropicStreamDelta.self, from: data)
#expect(delta.type.isEmpty)
#expect(delta.stopReason == "refusal")
}
@Test
func `Stream event decodes partial usage with stop reason`() throws {
let data = try #require(
#"{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":42}}"#
.data(using: .utf8),
)
let event = try JSONDecoder().decode(AnthropicStreamEvent.self, from: data)
#expect(event.delta?.stopReason == "end_turn")
#expect(event.usage?.inputTokens == 0)
#expect(event.usage?.outputTokens == 42)
}
@Test
func `Signed thinking blocks are preserved for assistant messages`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
@ -338,7 +534,125 @@ struct AnthropicInterleavedDefaultsTests {
}
@Test
func `Redacted thinking blocks preserve signature without text`() throws {
func `Fable 5 preserves signed thinking history while omitting request thinking field`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
let provider = try AnthropicProvider(model: .fable5, configuration: config)
let signedThinking = try ModelMessage(
role: .assistant,
content: [.text("fable thinking")],
channel: .thinking,
metadata: .init(customData: [
"anthropic.thinking.model": "claude-fable-5",
"anthropic.thinking.signature": "sig-fable",
"anthropic.thinking.type": "thinking",
"tachikoma.reasoning.provider": "anthropic",
"tachikoma.reasoning.model": "claude-fable-5",
"tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity
.canonical("https://api.anthropic.com")),
]),
)
let request = ProviderRequest(
messages: [.user("hi"), signedThinking, .assistant("hello"), .user("continue")],
settings: GenerationSettings(maxTokens: 64),
)
let urlRequest = try provider.makeURLRequest(for: request, stream: false)
let body = try #require(urlRequest.httpBody)
let json = try #require(try JSONSerialization.jsonObject(with: body) as? [String: Any])
let messages = try #require(json["messages"] as? [[String: Any]])
let assistant = try #require(messages[1]["content"] as? [[String: Any]])
#expect(json["thinking"] == nil)
#expect(assistant.first?["type"] as? String == "thinking")
#expect(assistant.first?["thinking"] as? String == "fable thinking")
#expect(assistant.first?["signature"] as? String == "sig-fable")
}
@Test
func `Fable 5 drops mismatched signed thinking history in direct provider requests`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
let provider = try AnthropicProvider(model: .fable5, configuration: config)
let signedThinking = try ModelMessage(
role: .assistant,
content: [.text("foreign thinking")],
channel: .thinking,
metadata: .init(customData: [
"anthropic.thinking.model": "claude-fable-5",
"anthropic.thinking.signature": "sig-foreign",
"anthropic.thinking.type": "thinking",
"tachikoma.reasoning.provider": "anthropic",
"tachikoma.reasoning.model": "claude-fable-5",
"tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity
.canonical("https://other.example.test")),
]),
)
let request = ProviderRequest(
messages: [.user("hi"), signedThinking, .assistant("hello"), .user("continue")],
settings: GenerationSettings(maxTokens: 64),
)
let urlRequest = try provider.makeURLRequest(for: request, stream: false)
let body = try #require(urlRequest.httpBody)
let json = try #require(try JSONSerialization.jsonObject(with: body) as? [String: Any])
let messages = try #require(json["messages"] as? [[String: Any]])
let assistant = try #require(messages[1]["content"] as? [[String: Any]])
#expect(assistant.count == 1)
#expect(assistant.first?["type"] as? String == "text")
#expect(assistant.first?["text"] as? String == "hello")
#expect(String(data: body, encoding: .utf8)?.contains("foreign thinking") == false)
}
@Test
func `Fable 5 rejects assistant prefill requests`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
let provider = try AnthropicProvider(model: .fable5, configuration: config)
#expect(throws: TachikomaError.self) {
_ = try provider.makeURLRequest(
for: ProviderRequest(messages: [.user("hi"), .assistant("prefill")]),
stream: false,
)
}
}
@Test
func `Anthropic refusal stop reason maps to content filter`() {
#expect(AnthropicProvider.mapFinishReason("refusal") == .contentFilter)
#expect(AnthropicProvider.mapFinishReason("model_context_window_exceeded") == .length)
}
@Test
func `Anthropic refusal response decodes stop details explanation`() throws {
let data = """
{
"id": "msg_test",
"type": "message",
"role": "assistant",
"content": [],
"model": "claude-fable-5",
"stop_reason": "refusal",
"stop_details": {
"category": "cyber",
"explanation": "I cannot help with that request."
},
"usage": {
"input_tokens": 10,
"output_tokens": 0
}
}
""".data(using: .utf8)!
let response = try JSONDecoder().decode(AnthropicMessageResponse.self, from: data)
#expect(response.stopDetails?.category == "cyber")
#expect(response.stopDetails?.explanation == "I cannot help with that request.")
}
@Test
func `Redacted thinking blocks preserve opaque data`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
let provider = try AnthropicProvider(model: .opus45, configuration: config)
@ -349,10 +663,9 @@ struct AnthropicInterleavedDefaultsTests {
let redacted = ModelMessage(
role: .assistant,
content: [.text("")],
content: [.text("opaque-redacted-data")],
channel: .thinking,
metadata: .init(customData: [
"anthropic.thinking.signature": "sig-redacted",
"anthropic.thinking.type": "redacted_thinking",
]),
)
@ -369,8 +682,232 @@ struct AnthropicInterleavedDefaultsTests {
let assistant = try #require(messages[1]["content"] as? [[String: Any]])
#expect(assistant.first?["type"] as? String == "redacted_thinking")
#expect((assistant.first?["redacted_thinking"] as? String)?.isEmpty == true)
#expect(assistant.first?["signature"] as? String == "sig-redacted")
#expect(assistant.first?["data"] as? String == "opaque-redacted-data")
#expect(assistant.first?["signature"] == nil)
}
@Test
func `Redacted thinking response decodes opaque data`() throws {
let data = try #require(
"""
{"type":"redacted_thinking","data":"opaque-redacted-data"}
""".data(using: .utf8),
)
let content = try JSONDecoder().decode(AnthropicResponseContent.self, from: data)
guard case let .redactedThinking(redacted) = content else {
Issue.record("Expected redacted thinking content")
return
}
#expect(redacted.data == "opaque-redacted-data")
}
@Test
func `Consecutive thinking blocks are preserved in order`() throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
let provider = try AnthropicProvider(model: .fable5, configuration: config)
let signedThinking = try ModelMessage(
role: .assistant,
content: [.text("signed")],
channel: .thinking,
metadata: .init(customData: [
"anthropic.thinking.model": "claude-fable-5",
"anthropic.thinking.signature": "sig",
"anthropic.thinking.type": "thinking",
"tachikoma.reasoning.provider": "anthropic",
"tachikoma.reasoning.model": "claude-fable-5",
"tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity
.canonical("https://api.anthropic.com")),
]),
)
let redactedThinking = try ModelMessage(
role: .assistant,
content: [.text("opaque")],
channel: .thinking,
metadata: .init(customData: [
"anthropic.thinking.model": "claude-fable-5",
"anthropic.thinking.type": "redacted_thinking",
"tachikoma.reasoning.provider": "anthropic",
"tachikoma.reasoning.model": "claude-fable-5",
"tachikoma.reasoning.base_url": #require(ReasoningEndpointIdentity
.canonical("https://api.anthropic.com")),
]),
)
let request = ProviderRequest(
messages: [.user("hi"), signedThinking, redactedThinking, .assistant("hello"), .user("continue")],
settings: GenerationSettings(maxTokens: 64),
)
let urlRequest = try provider.makeURLRequest(for: request, stream: false)
let body = try #require(urlRequest.httpBody)
let json = try #require(try JSONSerialization.jsonObject(with: body) as? [String: Any])
let messages = try #require(json["messages"] as? [[String: Any]])
let assistant = try #require(messages[1]["content"] as? [[String: Any]])
#expect(assistant.count == 3)
#expect(assistant[0]["type"] as? String == "thinking")
#expect(assistant[0]["thinking"] as? String == "signed")
#expect(assistant[0]["signature"] as? String == "sig")
#expect(assistant[1]["type"] as? String == "redacted_thinking")
#expect(assistant[1]["data"] as? String == "opaque")
#expect(assistant[2]["type"] as? String == "text")
#expect(assistant[2]["text"] as? String == "hello")
}
@Test
func `Current Anthropic models expose documented output caps`() {
#expect(LanguageModel.Anthropic.fable5.maxOutputTokens == 128_000)
#expect(LanguageModel.Anthropic.opus47.maxOutputTokens == 128_000)
#expect(LanguageModel.Anthropic.opus48.maxOutputTokens == 128_000)
#expect(LanguageModel.Anthropic.sonnet46.maxOutputTokens == 64000)
#expect(LanguageModel.Anthropic.haiku45.maxOutputTokens == 64000)
}
@Test
func `Fable and Opus 4_8 streaming are disabled until rollback is supported`() async throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic": "test-key"])
let provider = try AnthropicProvider(model: .fable5, configuration: config)
let opusProvider = try AnthropicProvider(model: .opus48, configuration: config)
#expect(provider.capabilities.supportsStreaming == false)
#expect(LanguageModel.anthropic(.fable5).supportsStreaming == false)
#expect(opusProvider.capabilities.supportsStreaming == false)
#expect(LanguageModel.anthropic(.opus47).supportsStreaming == true)
#expect(LanguageModel.anthropic(.opus48).supportsStreaming == false)
#expect(LanguageModel.anthropic(.sonnet46).supportsStreaming == true)
#expect(LanguageModel.anthropic(.sonnet45).supportsStreaming == true)
#expect(LanguageModel.anthropic(.haiku45).supportsStreaming == true)
await #expect(throws: TachikomaError.self) {
_ = try await provider.streamText(request: ProviderRequest(messages: [.user("hi")]))
}
await #expect(throws: TachikomaError.self) {
_ = try await opusProvider.streamText(request: ProviderRequest(messages: [.user("hi")]))
}
}
@Test
func `Opus 4_8 detection avoids substring false positives`() {
#expect(LanguageModel.Anthropic.isOpus48(modelId: "claude-opus-4-8") == true)
#expect(LanguageModel.Anthropic.isOpus48(modelId: "anthropic/claude-opus-4.8") == true)
#expect(LanguageModel.Anthropic.isOpus48(modelId: "my-opus48-distill") == false)
#expect(LanguageModel.Anthropic.isOpus48(modelId: "opus480") == false)
}
@Test
func `Fable detection avoids substring false positives`() {
#expect(LanguageModel.Anthropic.isFable(modelId: "claude-fable-5") == true)
#expect(LanguageModel.Anthropic.isFable(modelId: "anthropic/claude-fable-5") == true)
#expect(LanguageModel.Anthropic.isFable(modelId: "vendor/claude-fable-50") == false)
#expect(LanguageModel.Anthropic.isFable(modelId: "my-claude-fable-5-distill") == false)
}
@Test
func `Anthropic-compatible provider tags native thinking with wrapper identity`() async throws {
let sessionConfig = URLSessionConfiguration.ephemeral
sessionConfig.protocolClasses = [AnthropicIdentityURLProtocol.self]
let provider = try AnthropicProvider(
model: .custom("claude-fable-5"),
configuration: TachikomaConfiguration(apiKeys: ["anthropic": "test-key"]),
reasoningProvider: "anthropic-compatible",
reasoningModelId: "claude-fable-5",
reasoningBaseURL: "https://user:secret@example.test/path?token=secret#frag",
urlSession: URLSession(configuration: sessionConfig),
)
let response = try await provider.generateText(request: ProviderRequest(messages: [.user("hi")]))
let thinking = try #require(response.assistantMessages.first { $0.channel == .thinking })
#expect(thinking.metadata?.customData?["tachikoma.reasoning.provider"] == "anthropic-compatible")
#expect(thinking.metadata?.customData?["tachikoma.reasoning.model"] == "claude-fable-5")
let endpointIdentity = thinking.metadata?.customData?["tachikoma.reasoning.base_url"]
#expect(endpointIdentity == ReasoningEndpointIdentity.canonical("https://example.test/path?token=secret"))
#expect(endpointIdentity?.hasPrefix("sha256:") == true)
#expect(endpointIdentity?.contains("path") == false)
#expect(endpointIdentity?.contains("secret") == false)
#expect(endpointIdentity?.contains("token") == false)
#expect(thinking.metadata?.customData?["anthropic.thinking.signature"] == "sig")
}
@Test
func `Compatible refusal-prone Anthropic streaming and capabilities are disabled`() async throws {
let config = TachikomaConfiguration(apiKeys: ["anthropic_compatible": "test-key"])
let provider = try AnthropicCompatibleProvider(
modelId: "claude-fable-5",
baseURL: "https://example.test",
configuration: config,
)
let opusProvider = try AnthropicCompatibleProvider(
modelId: "claude-opus-4-8",
baseURL: "https://example.test",
configuration: config,
)
let overriddenProvider = try AnthropicCompatibleProvider(
modelId: "claude-fable-5",
baseURL: "https://example.test",
configuration: config,
capabilities: ModelCapabilities(supportsStreaming: true),
)
#expect(provider.capabilities.supportsStreaming == false)
#expect(opusProvider.capabilities.supportsStreaming == false)
#expect(overriddenProvider.capabilities.supportsStreaming == false)
#expect(provider.capabilities.contextLength == 1_000_000)
#expect(provider.capabilities.maxOutputTokens == 128_000)
#expect(LanguageModel.anthropicCompatible(
modelId: "claude-fable-5",
baseURL: "https://example.test",
).supportsStreaming == false)
#expect(LanguageModel.anthropicCompatible(
modelId: "claude-opus-4-8",
baseURL: "https://example.test",
).supportsStreaming == false)
#expect(LanguageModel.openaiCompatible(
modelId: "claude-fable-5",
baseURL: "https://example.test",
).supportsStreaming == false)
#expect(LanguageModel.anthropicCompatible(
modelId: "claude-fable-5",
baseURL: "https://example.test",
).contextLength == 1_000_000)
#expect(LanguageModel.anthropicCompatible(
modelId: "anthropic.claude-fable-5",
baseURL: "https://example.test",
).contextLength == 1_000_000)
let openAICompatibleProvider = try OpenAICompatibleProvider(
modelId: "claude-fable-5",
baseURL: "https://example.test",
configuration: TachikomaConfiguration(apiKeys: ["openai_compatible": "test-key"]),
)
let openRouterProvider = try OpenRouterProvider(
modelId: "anthropic/claude-fable-5",
configuration: TachikomaConfiguration(apiKeys: ["openrouter": "test-key"]),
)
let togetherProvider = try TogetherProvider(
modelId: "anthropic/claude-fable-5",
configuration: TachikomaConfiguration(apiKeys: ["together": "test-key"]),
)
#expect(openAICompatibleProvider.capabilities.supportsStreaming == false)
#expect(openRouterProvider.capabilities.supportsStreaming == false)
#expect(togetherProvider.capabilities.supportsStreaming == false)
#expect(openAICompatibleProvider.capabilities.contextLength == 1_000_000)
#expect(openAICompatibleProvider.capabilities.maxOutputTokens == 128_000)
#expect(openRouterProvider.capabilities.contextLength == 1_000_000)
#expect(openRouterProvider.capabilities.maxOutputTokens == 128_000)
#expect(togetherProvider.capabilities.contextLength == 1_000_000)
#expect(togetherProvider.capabilities.maxOutputTokens == 128_000)
await #expect(throws: TachikomaError.self) {
_ = try await provider.streamText(request: ProviderRequest(messages: [.user("hi")]))
}
await #expect(throws: TachikomaError.self) {
_ = try await openAICompatibleProvider.streamText(request: ProviderRequest(messages: [.user("hi")]))
}
await #expect(throws: TachikomaError.self) {
_ = try await openRouterProvider.streamText(request: ProviderRequest(messages: [.user("hi")]))
}
await #expect(throws: TachikomaError.self) {
_ = try await togetherProvider.streamText(request: ProviderRequest(messages: [.user("hi")]))
}
}
@Test
@ -400,3 +937,49 @@ struct AnthropicInterleavedDefaultsTests {
#expect(assistant.first?["type"] as? String == "text")
}
}
private final class AnthropicIdentityURLProtocol: URLProtocol {
override class func canInit(with _: URLRequest) -> Bool {
true
}
override class func canonicalRequest(for request: URLRequest) -> URLRequest {
request
}
override func startLoading() {
guard
let url = self.request.url,
let response = HTTPURLResponse(
url: url,
statusCode: 200,
httpVersion: nil,
headerFields: ["Content-Type": "application/json"],
) else
{
self.client?.urlProtocol(self, didFailWithError: TachikomaError.invalidInput("Missing mock response"))
return
}
let body = """
{
"id": "msg_test",
"type": "message",
"role": "assistant",
"model": "claude-fable-5",
"content": [
{"type": "thinking", "thinking": "private", "signature": "sig"},
{"type": "text", "text": "ok"}
],
"stop_reason": "end_turn",
"usage": {"input_tokens": 1, "output_tokens": 2}
}
""".data(using: .utf8) ?? Data()
self.client?.urlProtocol(self, didReceive: response, cacheStoragePolicy: .notAllowed)
self.client?.urlProtocol(self, didLoad: body)
self.client?.urlProtocolDidFinishLoading(self)
}
override func stopLoading() {}
}

View File

@ -22,13 +22,15 @@ struct ProviderSystemTests {
@Test
func `Provider Factory - Anthropic Provider Creation`() async throws {
try await TestHelpers.withTestConfiguration(apiKeys: ["anthropic": "test-key"]) { config in
let model = Model.anthropic(.opus47)
let model = Model.anthropic(.fable5)
let provider = try ProviderFactory.createProvider(for: model, configuration: config)
#expect(provider.modelId == "claude-opus-4-7")
#expect(provider.modelId == "claude-fable-5")
#expect(provider.capabilities.supportsVision == true)
#expect(provider.capabilities.supportsTools == true)
#expect(provider.capabilities.supportsStreaming == true)
#expect(provider.capabilities.supportsStreaming == false)
#expect(provider.capabilities.contextLength == 1_000_000)
#expect(provider.capabilities.maxOutputTokens == 128_000)
}
}
@ -138,6 +140,7 @@ struct ProviderSystemTests {
#expect(Model.openai(.gpt5Mini).supportsVision == true)
#expect(Model.openai(.custom("text-only-openai")).supportsVision == false)
#expect(Model.anthropic(.fable5).supportsVision == true)
#expect(Model.anthropic(.opus4).supportsVision == true)
#expect(Model.anthropic(.sonnet46).supportsVision == true)
@ -153,6 +156,7 @@ struct ProviderSystemTests {
#expect(Model.openai(.gpt55).supportsTools == true)
#expect(Model.openai(.gpt55).supportsTools == true)
#expect(Model.anthropic(.fable5).supportsTools == true)
#expect(Model.anthropic(.opus4).supportsTools == true)
#expect(Model.anthropic(.sonnet46).supportsTools == true)
@ -167,6 +171,19 @@ struct ProviderSystemTests {
func `Model Capabilities - Streaming Support`() {
#expect(Model.openai(.gpt55).supportsStreaming == true)
#expect(Model.anthropic(.opus4).supportsStreaming == true)
#expect(Model.anthropic(.opus47).supportsStreaming == true)
#expect(Model.anthropic(.opus48).supportsStreaming == false)
#expect(Model.anthropic(.fable5).supportsStreaming == false)
#expect(Model.openRouter(modelId: "anthropic/claude-fable-5").supportsStreaming == false)
#expect(Model.openRouter(modelId: "anthropic/claude-opus-4-8").supportsStreaming == false)
#expect(Model.openaiCompatible(
modelId: "anthropic/claude-fable-5",
baseURL: "https://example.test",
).supportsStreaming == false)
#expect(Model.openaiCompatible(
modelId: "sonnet4-local",
baseURL: "https://example.test",
).supportsStreaming == true)
#expect(Model.grok(.grok43).supportsStreaming == true)
#expect(Model.ollama(.llama33).supportsStreaming == true)
}

View File

@ -204,6 +204,7 @@ struct OpenAIResponsesProviderTests {
choices: nil,
usage: nil,
metadata: nil,
incompleteDetails: nil,
)
let providerResponse = try OpenAIResponsesProvider.convertToProviderResponse(response)
@ -216,6 +217,159 @@ struct OpenAIResponsesProviderTests {
#expect(providerResponse.finishReason == .toolCalls)
}
@Test
func `GPT-5 incomplete content filter response maps finish reason`() throws {
let output = OpenAIResponsesResponse.ResponsesOutput(
id: "out_1",
type: "message",
status: "incomplete",
content: [
.init(type: "output_text", text: "blocked partial", toolCall: nil),
],
role: "assistant",
toolCall: nil,
)
let response = try JSONDecoder().decode(OpenAIResponsesResponse.self, from: #require("""
{
"id": "resp_1",
"object": "response",
"created_at": 0,
"status": "incomplete",
"model": "gpt-5",
"output": [
{
"id": "out_1",
"type": "message",
"status": "incomplete",
"role": "assistant",
"content": [
{ "type": "output_text", "text": "blocked partial" }
]
}
],
"incomplete_details": { "reason": "content_filter" }
}
""".data(using: .utf8)))
let providerResponse = try OpenAIResponsesProvider.convertToProviderResponse(response)
#expect(output.status == "incomplete")
#expect(providerResponse.text.isEmpty)
#expect(providerResponse.finishReason == .contentFilter)
}
@Test
func `GPT-5 incomplete content filter discards parsed tool calls`() throws {
let toolCall = OpenAIResponsesResponse.ResponsesToolCall(
id: "call_1",
type: "function",
function: .init(name: "see", arguments: "{\"mode\":\"screen\"}"),
)
let output = OpenAIResponsesResponse.ResponsesOutput(
id: "out_1",
type: "message",
status: "incomplete",
content: [
.init(type: "output_text", text: "blocked partial", toolCall: nil),
.init(type: "tool_call", text: nil, toolCall: toolCall),
],
role: "assistant",
toolCall: nil,
)
let response = OpenAIResponsesResponse(
id: "resp_1",
object: "response",
createdAt: 0,
created: nil,
status: "incomplete",
model: "gpt-5",
output: [output],
choices: nil,
usage: nil,
metadata: nil,
incompleteDetails: .init(reason: "content_filter"),
)
let providerResponse = try OpenAIResponsesProvider.convertToProviderResponse(response)
#expect(providerResponse.text.isEmpty)
#expect(providerResponse.toolCalls == nil)
#expect(providerResponse.finishReason == .contentFilter)
}
@Test
func `GPT-5 completed refusal output maps to content filter`() throws {
let output = OpenAIResponsesResponse.ResponsesOutput(
id: "out_1",
type: "message",
status: "completed",
content: [
.init(type: "refusal", refusal: "I cannot help with that."),
],
role: "assistant",
toolCall: nil,
)
let response = OpenAIResponsesResponse(
id: "resp_1",
object: "response",
createdAt: 0,
created: nil,
status: "completed",
model: "gpt-5",
output: [output],
choices: nil,
usage: nil,
metadata: nil,
incompleteDetails: nil,
)
let providerResponse = try OpenAIResponsesProvider.convertToProviderResponse(response)
#expect(providerResponse.text.isEmpty)
#expect(providerResponse.toolCalls == nil)
#expect(providerResponse.finishReason == .contentFilter)
}
@Test
func `Alternate choices content filter suppresses text and tool calls`() throws {
let response = try JSONDecoder().decode(OpenAIResponsesResponse.self, from: #require("""
{
"id": "chatcmpl_1",
"object": "chat.completion",
"created": 0,
"model": "gpt-5",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "blocked partial",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {
"name": "see",
"arguments": "{\\"mode\\":\\"screen\\"}"
}
}
]
},
"finish_reason": "content_filter",
"logprobs": null
}
]
}
""".data(using: .utf8)))
let providerResponse = try OpenAIResponsesProvider.convertToProviderResponse(response)
#expect(providerResponse.text.isEmpty)
#expect(providerResponse.toolCalls == nil)
#expect(providerResponse.finishReason == .contentFilter)
}
@Test
func `Responses provider hits /v1/responses and encodes body`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
@ -528,18 +682,198 @@ struct OpenAIResponsesProviderTests {
let stream = try await provider.streamText(request: self.sampleRequest)
var collected = ""
var receivedDone = false
for try await delta in stream {
switch delta.type {
case .textDelta:
collected.append(delta.content ?? "")
case .done:
break
receivedDone = true
case .toolCall, .toolResult, .reasoning:
break
}
}
#expect(collected == "Hello world")
#expect(receivedDone)
}
}
@Test
func `Responses provider marks completed tool streams as tool calls`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setAPIKey("live-openai", for: .openai)
try await self.withMockedSession { request in
#expect(request.url?.path == "/v1/responses")
let payload = Self.responsesStreamPayload(chunks: [
Self.streamEventJSON([
"type": "response.output_item.added",
"item": [
"id": "item_1",
"type": "function_call",
"name": "lookup",
],
]),
Self.streamEventJSON([
"type": "response.function_call_arguments.done",
"item_id": "item_1",
"arguments": #"{"query":"weather"}"#,
]),
Self.streamEventJSON(["type": "response.completed"]),
])
return NetworkMocking.streamResponse(for: request, data: payload)
} operation: { session in
let provider = try OpenAIResponsesProvider(model: .gpt55, configuration: config, session: session)
let stream = try await provider.streamText(request: self.sampleRequest)
var sawToolCall = false
var finishReason: FinishReason?
for try await delta in stream {
if delta.type == .toolCall {
sawToolCall = true
}
if delta.type == .done {
finishReason = delta.finishReason
}
}
#expect(sawToolCall)
#expect(finishReason == .toolCalls)
}
}
@Test
func `Responses provider maps incomplete content filter stream finish reason`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setAPIKey("live-openai", for: .openai)
try await self.withMockedSession { request in
#expect(request.url?.path == "/v1/responses")
let payload = Self.responsesStreamPayload(chunks: [
Self.streamChunkJSON(content: "partial", finishReason: nil),
Self.streamEventJSON([
"type": "response.incomplete",
"response": [
"incomplete_details": ["reason": "content_filter"],
],
]),
])
return NetworkMocking.streamResponse(for: request, data: payload)
} operation: { session in
let provider = try OpenAIResponsesProvider(model: .gpt55, configuration: config, session: session)
let stream = try await provider.streamText(request: self.sampleRequest)
var collected = ""
var finishReason: FinishReason?
for try await delta in stream {
if case .textDelta = delta.type {
collected.append(delta.content ?? "")
}
if delta.type == .done {
finishReason = delta.finishReason
}
}
#expect(collected == "partial")
#expect(finishReason == .contentFilter)
}
}
@Test
func `Responses provider maps refusal stream events to content filter`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setAPIKey("live-openai", for: .openai)
try await self.withMockedSession { request in
#expect(request.url?.path == "/v1/responses")
let payload = Self.responsesStreamPayload(chunks: [
Self.streamEventJSON([
"type": "response.refusal.delta",
"delta": "no",
]),
Self.streamEventJSON(["type": "response.refusal.done"]),
Self.streamEventJSON(["type": "response.completed"]),
])
return NetworkMocking.streamResponse(for: request, data: payload)
} operation: { session in
let provider = try OpenAIResponsesProvider(model: .gpt55, configuration: config, session: session)
let stream = try await provider.streamText(request: self.sampleRequest)
var finishReason: FinishReason?
for try await delta in stream where delta.type == .done {
finishReason = delta.finishReason
}
#expect(finishReason == .contentFilter)
}
}
@Test
func `Responses provider throws on failed stream event`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setAPIKey("live-openai", for: .openai)
try await self.withMockedSession { request in
#expect(request.url?.path == "/v1/responses")
let payload = Self.responsesStreamPayload(chunks: [
Self.streamEventJSON([
"type": "response.failed",
"response": [
"error": [
"message": "stream failed after partial output",
],
],
]),
])
return NetworkMocking.streamResponse(for: request, data: payload)
} operation: { session in
let provider = try OpenAIResponsesProvider(model: .gpt55, configuration: config, session: session)
let stream = try await provider.streamText(request: self.sampleRequest)
do {
for try await _ in stream {}
Issue.record("Expected stream failure")
} catch let error as TachikomaError {
guard case let .apiError(message) = error else {
Issue.record("Expected apiError, got \(error)")
return
}
#expect(message.contains("response.failed"))
#expect(message.contains("stream failed after partial output"))
}
}
}
@Test
func `Responses provider throws on error stream event`() async throws {
let config = TachikomaConfiguration(loadFromEnvironment: false)
config.setAPIKey("live-openai", for: .openai)
try await self.withMockedSession { request in
#expect(request.url?.path == "/v1/responses")
let payload = Self.responsesStreamPayload(chunks: [
Self.streamEventJSON([
"type": "error",
"message": "top-level stream error",
]),
])
return NetworkMocking.streamResponse(for: request, data: payload)
} operation: { session in
let provider = try OpenAIResponsesProvider(model: .gpt55, configuration: config, session: session)
let stream = try await provider.streamText(request: self.sampleRequest)
do {
for try await _ in stream {}
Issue.record("Expected stream failure")
} catch let error as TachikomaError {
guard case let .apiError(message) = error else {
Issue.record("Expected apiError, got \(error)")
return
}
#expect(message.contains("error"))
#expect(message.contains("top-level stream error"))
}
}
}
@ -657,6 +991,11 @@ struct OpenAIResponsesProviderTests {
return String(data: data, encoding: .utf8)!
}
private static func streamEventJSON(_ event: [String: Any]) -> String {
let data = try! JSONSerialization.data(withJSONObject: event)
return String(data: data, encoding: .utf8)!
}
private func withMockedSession<T>(
handler: @Sendable @escaping (URLRequest) throws -> (HTTPURLResponse, Data),
operation: (URLSession) async throws -> T,

View File

@ -449,6 +449,37 @@ struct ProviderEndToEndTests {
}
}
@Test
func `MiniMax reasoning metadata is bound to configured endpoint`() async throws {
let baseURL = "https://minimax-proxy.test/anthropic?tenant=a"
try await NetworkMocking.withMockedNetwork { request in
#expect(request.url?.host == "minimax-proxy.test")
#expect(request.value(forHTTPHeaderField: "Authorization") == "Bearer live-minimax")
return NetworkMocking.jsonResponse(
for: request,
data: Self.anthropicPayloadWithThinking(
text: "MiniMax ok",
thinking: "native-thought",
signature: "sig-mm",
),
)
} operation: {
let config = Self.makeConfiguration { config in
config.setAPIKey("live-minimax", for: .minimax)
config.setBaseURL(baseURL, for: .minimax)
}
let provider = try ProviderFactory.createProvider(for: .minimax(.m27), configuration: config)
let response = try await provider.generateText(request: Self.basicRequest)
let thinkingMessage = try #require(response.assistantMessages.first { $0.channel == .thinking })
let metadata = try #require(thinkingMessage.metadata?.customData)
#expect(metadata["tachikoma.reasoning.provider"] == "minimax")
#expect(metadata["tachikoma.reasoning.model"] == "MiniMax-M2.7")
#expect(metadata["anthropic.thinking.signature"] == "sig-mm")
#expect(metadata["tachikoma.reasoning.base_url"] == ReasoningEndpointIdentity.canonical(baseURL))
}
}
@Test
func `MiniMax China provider uses China endpoint and bearer auth`() async throws {
try await NetworkMocking.withMockedNetwork { request in
@ -591,6 +622,25 @@ struct ProviderEndToEndTests {
return try! JSONSerialization.data(withJSONObject: dict)
}
private static func anthropicPayloadWithThinking(text: String, thinking: String, signature: String) -> Data {
let dict: [String: Any] = [
"id": "msg_1",
"type": "message",
"role": "assistant",
"content": [
["type": "thinking", "thinking": thinking, "signature": signature],
["type": "text", "text": text],
],
"model": "MiniMax-M2.7",
"stop_reason": "end_turn",
"usage": [
"input_tokens": 12,
"output_tokens": 6,
],
]
return try! JSONSerialization.data(withJSONObject: dict)
}
private static func googleStreamPayload(text: String) -> Data {
let json: [String: Any] = [
"candidates": [

View File

@ -79,6 +79,70 @@ struct UIIntegrationTests {
#expect(uiMessages[0].toolCalls?.count == 1)
}
@Test
func `Thinking ModelMessages are hidden from UI messages`() {
let thinking = ModelMessage(
role: .assistant,
content: [.text("private reasoning")],
channel: .thinking,
metadata: .init(customData: ["anthropic.thinking.signature": "sig"]),
)
let visible = ModelMessage(role: .assistant, content: [.text("Visible answer")])
let uiMessages = [thinking, visible].toUIMessages()
#expect(uiMessages.count == 1)
#expect(uiMessages[0].content == "Visible answer")
}
@Test
func `Provider-neutral thinking ModelMessages remain visible in UI messages`() {
let thinking = ModelMessage(
role: .assistant,
content: [.text("visible reasoning")],
channel: .thinking,
)
let uiMessages = [thinking].toUIMessages()
#expect(uiMessages.count == 1)
#expect(uiMessages[0].content == "visible reasoning")
}
@Test
func `Provider-native reasoning ModelMessages are hidden from UI messages`() {
let reasoning = ModelMessage(
role: .assistant,
content: [.text("openrouter reasoning")],
channel: .thinking,
metadata: .init(customData: [
"tachikoma.reasoning.provider": "openrouter",
"tachikoma.reasoning.model": "anthropic/claude-fable-5",
]),
)
let visible = ModelMessage(role: .assistant, content: [.text("Visible answer")])
let uiMessages = [reasoning, visible].toUIMessages()
#expect(uiMessages.count == 1)
#expect(uiMessages[0].content == "Visible answer")
}
@Test
func `Synthetic reasoning boundaries are hidden from UI messages`() {
let boundary = ModelMessage(
role: .assistant,
content: [.text("")],
metadata: .init(customData: ["tachikoma.internal.boundary": "reasoning_only"]),
)
let visible = ModelMessage(role: .assistant, content: [.text("Visible answer")])
let uiMessages = [boundary, visible].toUIMessages()
#expect(uiMessages.count == 1)
#expect(uiMessages[0].content == "Visible answer")
}
@Test
func `StreamTextResult to UI Message Stream`() async {
// Create a mock stream

View File

@ -123,6 +123,16 @@ struct UsageTrackingTests {
#expect(gpt5MiniCost.total == 5.00)
// Test Anthropic pricing
let claudeFableCost = calculator.calculateCost(for: .anthropic(.fable5), usage: usage)
#expect(claudeFableCost.input == 10.00)
#expect(claudeFableCost.output == 50.00)
#expect(claudeFableCost.total == 60.00)
let customClaudeFableCost = calculator.calculateCost(for: .anthropic(.custom("claude-fable-5")), usage: usage)
#expect(customClaudeFableCost.input == 10.00)
#expect(customClaudeFableCost.output == 50.00)
#expect(customClaudeFableCost.total == 60.00)
let claudeOpusCost = calculator.calculateCost(for: .anthropic(.opus48), usage: usage)
#expect(claudeOpusCost.input == 5.00)
#expect(claudeOpusCost.output == 25.00)

View File

@ -4,7 +4,8 @@ Tachikoma ships with a built-in model catalog (`CaseIterable` enums) plus suppor
## Default
- `LanguageModel.default`: `claude-opus-4-7`
- `LanguageModel.default`: `claude-opus-4-8`
- `LanguageModel.defaultStreaming`: `gpt-5.5`
## OpenAI (`LanguageModel.OpenAI`)
@ -17,6 +18,8 @@ Notes:
## Anthropic (`LanguageModel.Anthropic`)
- `claude-fable-5` (1M context, 128K max output, non-streaming, explicit opt-in)
- `claude-opus-4-8` (1M context, 128K max output, non-streaming until refusal rollback is streaming-safe)
- `claude-opus-4-7`
- `claude-opus-4-5`
- `claude-opus-4-1-20250805`