This commit captures both the prior accumulated work-in-progress
(framework migration web/→svelte/, postgres storage, conversation
viewer, dashboard auth, OpenAPI spec, integration tests) AND today's
operational improvements layered on top. History wasn't checkpointed
incrementally; happy to split it via interactive rebase if a reviewer
wants smaller commits.
Today's changes (in addition to the older WIP):
1. Configurable upstream response-header timeout
- ANTHROPIC_RESPONSE_HEADER_TIMEOUT env (default 300s)
- Replaces hardcoded 300s in provider/anthropic.go that was firing
on opus + 1M-context + extended thinking non-streaming requests
- Files: internal/config/config.go, internal/provider/anthropic.go
2. Structured forward-error diagnostic logging
- When a forward to Anthropic fails, log a single key=value line
with request_id, model, stream, body_bytes, has_thinking,
anthropic_beta, query, elapsed, ctx_err — alongside the existing
human-readable error line for back-compat
- Files: internal/handler/handlers.go (logForwardFailure)
3. Full SSE protocol passthrough + Flusher fix
- handler/handlers.go: forward all SSE lines verbatim (event:, id:,
retry:, : comments, blank-line terminators), not only data:.
Previous code produced malformed SSE for strict parsers.
- middleware/logging.go: explicit Flush() method on responseWriter.
Embedding http.ResponseWriter (interface) does not auto-promote
Flush(), so every w.(http.Flusher) check in the streaming
handler was returning ok=false and SSE writes buffered in net/http
until the body closed.
4. Non-streaming → streaming demotion (feature-flagged)
- ANTHROPIC_DEMOTE_NONSTREAMING env (default false)
- When enabled and the routed provider is anthropic, force stream=true
upstream for clients that asked for stream=false. Receive SSE,
accumulate via accumulateSSEToMessage (handles text, tool_use with
partial_json reassembly, thinking, signature, citations_delta,
usage merge), and synthesize a single non-streaming JSON response.
- Eliminates the ResponseHeaderTimeout class of failure entirely.
- Body rewrite uses json.Decoder + UseNumber() to preserve integer
precision in unknown nested fields (tool inputs from prior turns).
- Files: internal/config/config.go, internal/handler/handlers.go,
cmd/proxy/main.go, cmd/proxy/main_test.go
5. Live operational state: /livez gauge + graceful drain
- New internal/runtime package: atomic in-flight counter + draining flag
- New middleware/inflight.go: increments runtime gauge, applied to
/v1/* subrouter so Messages, ChatCompletions, and ProxyPassthrough
are all counted
- /v1/* moved to a gorilla/mux subrouter so the InFlight middleware
applies surgically; /health, /livez, /openapi.* remain on parent
router (unauthenticated, uncounted)
- Health handler returns 503 draining when runtime.IsDraining() is
true, so Traefik stops routing to a slot before drain begins
- New /livez handler returns {status, in_flight, draining, timestamp}
- SIGTERM handler in main.go: SetDraining(true), poll for in_flight==0
with 32-min ceiling and 1s tick (logs every 10s), then srv.Shutdown
- Auth bypass list extended with /livez
- Files: internal/runtime/runtime.go (new),
internal/middleware/inflight.go (new),
internal/middleware/auth.go,
internal/handler/handlers.go (Health, Livez, runtime import),
cmd/proxy/main.go (subrouter, drain loop)
6. OpenAPI spec updates
- Document Health 503 response and new DrainingResponse schema
- Add /livez path with LivezResponse schema
- Files: internal/handler/openapi.go
Verified: go build ./... clean, go test ./... all pass, go vet clean.
Three rounds of codex peer review across changes 1-5; all feedback
addressed (citations_delta, json.Number precision, drain-loop logging
via lastLog timestamp, PathPrefix tightened to "/v1/").
158 lines
4.2 KiB
Go
158 lines
4.2 KiB
Go
package provider
|
|
|
|
import (
|
|
"compress/gzip"
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
"path"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/seifghazi/claude-code-monitor/internal/config"
|
|
)
|
|
|
|
type AnthropicProvider struct {
|
|
client *http.Client
|
|
config *config.AnthropicProviderConfig
|
|
}
|
|
|
|
func NewAnthropicProvider(cfg *config.AnthropicProviderConfig) Provider {
|
|
respHeaderTimeout := cfg.ResponseHeaderTimeout
|
|
if respHeaderTimeout <= 0 {
|
|
respHeaderTimeout = 300 * time.Second
|
|
}
|
|
return &AnthropicProvider{
|
|
client: &http.Client{
|
|
// No Client.Timeout: a global timeout would cancel long streaming
|
|
// responses mid-flight. Per-phase timeouts on the Transport plus the
|
|
// 30-min context in handlers.Messages bound the request instead.
|
|
Transport: &http.Transport{
|
|
DialContext: (&net.Dialer{
|
|
Timeout: 30 * time.Second,
|
|
KeepAlive: 30 * time.Second,
|
|
}).DialContext,
|
|
TLSHandshakeTimeout: 30 * time.Second,
|
|
// Tunable via ANTHROPIC_RESPONSE_HEADER_TIMEOUT — opus + extended
|
|
// thinking on large contexts can take longer than the 300s default.
|
|
ResponseHeaderTimeout: respHeaderTimeout,
|
|
ExpectContinueTimeout: 1 * time.Second,
|
|
MaxIdleConns: 100,
|
|
MaxIdleConnsPerHost: 10,
|
|
IdleConnTimeout: 90 * time.Second,
|
|
},
|
|
},
|
|
config: cfg,
|
|
}
|
|
}
|
|
|
|
func (p *AnthropicProvider) Name() string {
|
|
return "anthropic"
|
|
}
|
|
|
|
func (p *AnthropicProvider) ForwardRequest(ctx context.Context, originalReq *http.Request) (*http.Response, error) {
|
|
// Clone the request to avoid modifying the original
|
|
proxyReq := originalReq.Clone(ctx)
|
|
|
|
// Parse the configured base URL
|
|
baseURL, err := url.Parse(p.config.BaseURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse base URL '%s': %w", p.config.BaseURL, err)
|
|
}
|
|
|
|
if baseURL.Scheme == "" || baseURL.Host == "" {
|
|
return nil, fmt.Errorf("invalid base URL, scheme and host are required: %s", p.config.BaseURL)
|
|
}
|
|
|
|
// Update the destination URL
|
|
proxyReq.URL.Scheme = baseURL.Scheme
|
|
proxyReq.URL.Host = baseURL.Host
|
|
proxyReq.URL.Path = path.Join(baseURL.Path, originalReq.URL.Path)
|
|
|
|
// Preserve query parameters
|
|
proxyReq.URL.RawQuery = originalReq.URL.RawQuery
|
|
|
|
// Update request headers
|
|
proxyReq.RequestURI = ""
|
|
proxyReq.Host = baseURL.Host
|
|
|
|
// Remove hop-by-hop headers
|
|
removeHopByHopHeaders(proxyReq.Header)
|
|
|
|
// Add required headers if not present
|
|
if proxyReq.Header.Get("anthropic-version") == "" {
|
|
proxyReq.Header.Set("anthropic-version", p.config.Version)
|
|
}
|
|
|
|
// Handle Accept-Encoding: We accept gzip from upstream for efficiency,
|
|
// but we decompress before forwarding to the client. This is transparent
|
|
// to the client - they receive uncompressed data regardless of what they requested.
|
|
// We preserve gzip if client already requested it, otherwise add it.
|
|
clientEncoding := proxyReq.Header.Get("Accept-Encoding")
|
|
if clientEncoding == "" || !strings.Contains(clientEncoding, "gzip") {
|
|
proxyReq.Header.Set("Accept-Encoding", "gzip")
|
|
}
|
|
|
|
// Forward the request
|
|
resp, err := p.client.Do(proxyReq)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to forward request: %w", err)
|
|
}
|
|
|
|
// Handle gzip-encoded responses
|
|
if resp.Header.Get("Content-Encoding") == "gzip" {
|
|
resp.Header.Del("Content-Encoding")
|
|
resp.Header.Del("Content-Length")
|
|
gzipReader, err := gzip.NewReader(resp.Body)
|
|
if err != nil {
|
|
resp.Body.Close()
|
|
return nil, fmt.Errorf("failed to create gzip reader: %w", err)
|
|
}
|
|
resp.Body = &gzipResponseBody{
|
|
Reader: gzipReader,
|
|
closer: resp.Body,
|
|
}
|
|
}
|
|
|
|
return resp, nil
|
|
}
|
|
|
|
type gzipResponseBody struct {
|
|
io.Reader
|
|
closer io.Closer
|
|
}
|
|
|
|
func (g *gzipResponseBody) Close() error {
|
|
if gzReader, ok := g.Reader.(*gzip.Reader); ok {
|
|
gzReader.Close()
|
|
}
|
|
return g.closer.Close()
|
|
}
|
|
|
|
func removeHopByHopHeaders(header http.Header) {
|
|
hopByHopHeaders := []string{
|
|
"Connection",
|
|
"Keep-Alive",
|
|
"Proxy-Authenticate",
|
|
"Proxy-Authorization",
|
|
"TE",
|
|
"Trailers",
|
|
"Transfer-Encoding",
|
|
"Upgrade",
|
|
}
|
|
|
|
for _, h := range hopByHopHeaders {
|
|
header.Del(h)
|
|
}
|
|
|
|
// Remove any headers specified in the Connection header
|
|
if connection := header.Get("Connection"); connection != "" {
|
|
for _, h := range strings.Split(connection, ",") {
|
|
header.Del(strings.TrimSpace(h))
|
|
}
|
|
header.Del("Connection")
|
|
}
|
|
}
|