claude-code-proxy/shared/frontend/types.ts

308 lines
6.5 KiB
TypeScript
Raw Normal View History

Local fork: hardening + ops improvements (timeout knob, demotion, /livez, drain) This commit captures both the prior accumulated work-in-progress (framework migration web/→svelte/, postgres storage, conversation viewer, dashboard auth, OpenAPI spec, integration tests) AND today's operational improvements layered on top. History wasn't checkpointed incrementally; happy to split it via interactive rebase if a reviewer wants smaller commits. Today's changes (in addition to the older WIP): 1. Configurable upstream response-header timeout - ANTHROPIC_RESPONSE_HEADER_TIMEOUT env (default 300s) - Replaces hardcoded 300s in provider/anthropic.go that was firing on opus + 1M-context + extended thinking non-streaming requests - Files: internal/config/config.go, internal/provider/anthropic.go 2. Structured forward-error diagnostic logging - When a forward to Anthropic fails, log a single key=value line with request_id, model, stream, body_bytes, has_thinking, anthropic_beta, query, elapsed, ctx_err — alongside the existing human-readable error line for back-compat - Files: internal/handler/handlers.go (logForwardFailure) 3. Full SSE protocol passthrough + Flusher fix - handler/handlers.go: forward all SSE lines verbatim (event:, id:, retry:, : comments, blank-line terminators), not only data:. Previous code produced malformed SSE for strict parsers. - middleware/logging.go: explicit Flush() method on responseWriter. Embedding http.ResponseWriter (interface) does not auto-promote Flush(), so every w.(http.Flusher) check in the streaming handler was returning ok=false and SSE writes buffered in net/http until the body closed. 4. Non-streaming → streaming demotion (feature-flagged) - ANTHROPIC_DEMOTE_NONSTREAMING env (default false) - When enabled and the routed provider is anthropic, force stream=true upstream for clients that asked for stream=false. Receive SSE, accumulate via accumulateSSEToMessage (handles text, tool_use with partial_json reassembly, thinking, signature, citations_delta, usage merge), and synthesize a single non-streaming JSON response. - Eliminates the ResponseHeaderTimeout class of failure entirely. - Body rewrite uses json.Decoder + UseNumber() to preserve integer precision in unknown nested fields (tool inputs from prior turns). - Files: internal/config/config.go, internal/handler/handlers.go, cmd/proxy/main.go, cmd/proxy/main_test.go 5. Live operational state: /livez gauge + graceful drain - New internal/runtime package: atomic in-flight counter + draining flag - New middleware/inflight.go: increments runtime gauge, applied to /v1/* subrouter so Messages, ChatCompletions, and ProxyPassthrough are all counted - /v1/* moved to a gorilla/mux subrouter so the InFlight middleware applies surgically; /health, /livez, /openapi.* remain on parent router (unauthenticated, uncounted) - Health handler returns 503 draining when runtime.IsDraining() is true, so Traefik stops routing to a slot before drain begins - New /livez handler returns {status, in_flight, draining, timestamp} - SIGTERM handler in main.go: SetDraining(true), poll for in_flight==0 with 32-min ceiling and 1s tick (logs every 10s), then srv.Shutdown - Auth bypass list extended with /livez - Files: internal/runtime/runtime.go (new), internal/middleware/inflight.go (new), internal/middleware/auth.go, internal/handler/handlers.go (Health, Livez, runtime import), cmd/proxy/main.go (subrouter, drain loop) 6. OpenAPI spec updates - Document Health 503 response and new DrainingResponse schema - Add /livez path with LivezResponse schema - Files: internal/handler/openapi.go Verified: go build ./... clean, go test ./... all pass, go vet clean. Three rounds of codex peer review across changes 1-5; all feedback addressed (citations_delta, json.Number precision, drain-loop logging via lastLog timestamp, PathPrefix tightened to "/v1/").
2026-05-02 15:15:58 -06:00
export type JsonPrimitive = string | number | boolean | null;
export type JsonValue = JsonPrimitive | JsonObject | JsonValue[];
export type JsonObject = { [key: string]: JsonValue };
export interface HeaderRule {
header: string;
action: 'block' | 'set' | 'replace';
value?: string;
find?: string;
enabled: boolean;
}
export interface ProxySettings {
requestHeaderRules: HeaderRule[];
responseHeaderRules: HeaderRule[];
}
export interface CacheControl {
type: string;
}
export interface SystemMessage {
text: string;
type: string;
cache_control?: CacheControl;
}
export interface ToolInput extends Record<string, unknown> {
file_path?: string;
old_string?: string;
new_string?: string;
command?: string;
description?: string;
offset?: number;
limit?: number;
replace_all?: boolean;
content?: string;
pattern?: string;
glob?: string;
path?: string;
prompt?: string;
todos?: TodoItem[];
}
export interface ToolParameterSchema extends Record<string, unknown> {
type?: string | string[];
properties?: Record<string, Record<string, unknown>>;
required?: string[];
}
export interface ToolDefinition {
name: string;
description: string;
input_schema?: ToolParameterSchema;
parameters?: ToolParameterSchema;
}
export interface TodoItem extends Record<string, unknown> {
task?: string;
description?: string;
content?: string;
title?: string;
text?: string;
priority: 'high' | 'medium' | 'low';
status: 'pending' | 'in_progress' | 'completed';
}
interface BaseContentBlock extends Record<string, unknown> {
type: string;
text?: string;
name?: string;
id?: string;
input?: ToolInput;
thinking?: string;
content?: unknown;
tool_use_id?: string;
tool_call_id?: string;
is_error?: boolean;
}
export interface TextContentBlock extends BaseContentBlock {
type: 'text';
text: string;
}
export interface ToolUseContentBlock extends BaseContentBlock {
type: 'tool_use';
id?: string;
name?: string;
input?: ToolInput;
text?: string;
}
export interface ToolResultContentBlock extends BaseContentBlock {
type: 'tool_result';
id?: string;
tool_use_id?: string;
tool_call_id?: string;
content?: unknown;
text?: string;
is_error?: boolean;
}
export interface ImageContentBlock extends BaseContentBlock {
type: 'image';
source?: {
type: string;
media_type: string;
data: string;
};
data?: string;
media_type?: string;
}
export interface ThinkingContentBlock extends BaseContentBlock {
type: 'thinking';
thinking?: string;
}
export interface GenericContentBlock extends BaseContentBlock {
type: string;
}
export type ContentBlock =
| TextContentBlock
| ToolUseContentBlock
| ToolResultContentBlock
| ImageContentBlock
| ThinkingContentBlock
| GenericContentBlock;
export type MessageContent = string | ContentBlock | ContentBlock[] | Record<string, unknown>;
export interface RequestMessage {
role: string;
content: MessageContent;
}
export interface PromptGrade {
score: number;
maxScore?: number;
feedback: string;
improvedPrompt: string;
criteria: Record<string, { score: number; feedback: string }>;
gradingTimestamp: string;
isProcessing?: boolean;
}
export interface Request {
id: string;
conversationId?: string;
turnNumber?: number;
isRoot?: boolean;
timestamp: string;
method: string;
endpoint: string;
headers: Record<string, string[]>;
originalModel?: string;
routedModel?: string;
body?: {
model?: string;
messages?: RequestMessage[];
system?: SystemMessage[];
tools?: ToolDefinition[];
max_tokens?: number;
temperature?: number;
stream?: boolean;
};
response?: {
statusCode: number;
headers: Record<string, string[]>;
body?: {
usage?: {
input_tokens?: number;
output_tokens?: number;
cache_creation_input_tokens?: number;
cache_read_input_tokens?: number;
service_tier?: string;
};
content?: MessageContent;
[key: string]: unknown;
};
bodyText?: string;
responseTime: number;
streamingChunks?: string[];
isStreaming: boolean;
completedAt: string;
};
promptGrade?: PromptGrade;
}
export interface ConversationSummary {
id: string;
requestCount: number;
startTime: string;
lastActivity: string;
duration: number;
firstMessage: string;
lastMessage: string;
projectPath: string;
projectName: string;
}
export interface Conversation {
sessionId: string;
projectPath: string;
projectName: string;
messages: Array<{
parentUuid: string | null;
isSidechain: boolean;
userType: string;
cwd: string;
sessionId: string;
version: string;
type: 'user' | 'assistant' | 'system';
message: unknown;
uuid: string;
timestamp: string;
}>;
startTime: string;
endTime: string;
messageCount: number;
}
export interface RequestSummary {
requestId: string;
timestamp: string;
method: string;
endpoint: string;
model?: string;
originalModel?: string;
routedModel?: string;
statusCode?: number;
responseTime?: number;
usage?: {
input_tokens?: number;
output_tokens?: number;
cache_creation_input_tokens?: number;
cache_read_input_tokens?: number;
service_tier?: string;
};
conversationHash?: string;
messageCount?: number;
stopReason?: string;
}
export interface ConversationGroup {
conversationHash: string;
latestRequest: RequestSummary;
turnCount: number;
totalTokens: number;
totalResponseTime: number;
firstTimestamp: string;
lastTimestamp: string;
requestIds: string[];
}
export interface DashboardStats {
dailyStats: DailyTokens[];
}
export interface DailyTokens {
date: string;
tokens: number;
requests: number;
models?: Record<string, { tokens: number; requests: number }>;
}
export interface HourlyStatsResponse {
hourlyStats: HourlyTokens[];
todayTokens: number;
todayRequests: number;
avgResponseTime: number;
}
export interface HourlyTokens {
hour: number;
label?: string;
tokens: number;
requests: number;
models?: Record<string, { tokens: number; requests: number }>;
}
export interface ModelStatsResponse {
modelStats: ModelTokens[];
}
export interface ModelTokens {
model: string;
tokens: number;
requests: number;
}
export interface UsageStats {
total_requests: number;
total_input_tokens: number;
total_output_tokens: number;
total_cache_tokens: number;
requests_by_model: Record<string, {
request_count: number;
input_tokens: number;
output_tokens: number;
cache_tokens: number;
}>;
start_date?: string;
end_date?: string;
}