claude-code-proxy/proxy/internal/handler/openapi.go

package handler

import (
	"encoding/json"
	"net/http"

	"gopkg.in/yaml.v3"
)

// openAPISpec is the embedded OpenAPI 3.0 specification for the proxy API.
var openAPISpec = `
openapi: "3.0.3"
info:
  title: Claude Code Proxy API
  description: |
    An Anthropic API proxy that provides request logging, model routing, usage
    analytics, and a dashboard UI.  The proxy exposes two groups of endpoints:

    **Proxy endpoints** – drop-in replacements for the upstream Anthropic API.
    Point your Claude Code (or any Anthropic SDK client) at this proxy and all
    requests are forwarded, logged, and optionally re-routed to a different model
    or provider.

    **Dashboard endpoints** – read-only analytics and configuration APIs that
    power the built-in web dashboard.  These are protected by HTTP Basic Auth
    when DASHBOARD_PASSWORD is set.
  version: "1.0.0"
  contact:
    name: Claude Code Proxy
  license:
    name: MIT

servers:
  - url: /
    description: This proxy instance

tags:
  - name: proxy
    description: |
      Drop-in Anthropic API proxy endpoints.  Authenticate with the same
      x-api-key / Authorization header you use for the upstream Anthropic API.
  - name: dashboard
    description: |
      Analytics and configuration endpoints for the web dashboard.
      Protected by DASHBOARD_PASSWORD basic auth when configured.
  - name: health
    description: Health and discovery endpoints (no auth required).

paths:
  # ── Proxy endpoints ────────────────────────────────────────────────────
  /v1/messages:
    post:
      operationId: createMessage
      tags: [proxy]
      summary: Create a message (Anthropic Messages API)
      description: |
        Forwards the request to the upstream Anthropic (or routed) provider.
        Supports both streaming (SSE) and non-streaming responses.  The proxy
        logs the request/response, applies any configured model routing rules
        and header rules, then returns the upstream response verbatim.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/AnthropicRequest"
      responses:
        "200":
          description: Successful message response (non-streaming)
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/AnthropicResponse"
            text/event-stream:
              schema:
                type: string
                description: SSE stream of Anthropic streaming events
        "400":
          description: Invalid request
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
        "500":
          description: Upstream or internal error
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"

  /v1/chat/completions:
    post:
      operationId: chatCompletions
      tags: [proxy]
      summary: Chat completions (OpenAI-compatible – not supported)
      description: |
        Returns a 400 error directing callers to use /v1/messages instead.
        This endpoint exists for compatibility detection only.
      requestBody:
        content:
          application/json:
            schema:
              type: object
      responses:
        "400":
          description: Not supported – use /v1/messages
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"

  /v1/models:
    get:
      operationId: listModels
      tags: [proxy]
      summary: List available models
      description: |
        Returns the list of models known to the proxy.  The proxy uses
        pattern-based routing so any model accepted by the upstream provider
        will work; this endpoint currently returns an empty list.
      responses:
        "200":
          description: Model list
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ModelsResponse"

  # ── Health & discovery ─────────────────────────────────────────────────
  /health:
    get:
      operationId: healthCheck
      tags: [health]
      summary: Health check (binary up/ready signal for load balancers)
      description: |
        Returns 200 with status=healthy while the process is accepting
        traffic, and 503 with status=draining once a SIGTERM has been
        received. Traefik (or any LB doing health-based routing) should
        treat 503 as "stop sending new requests to this backend", which is
        the signal the graceful-drain loop relies on.
      responses:
        "200":
          description: Service is healthy
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/HealthResponse"
        "503":
          description: Service is draining (SIGTERM received). Stop routing here.
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/DrainingResponse"

  /livez:
    get:
      operationId: livenessProbe
      tags: [health]
      summary: Live operational state (in-flight gauge + draining flag)
      description: |
        Always returns 200 with the current in-flight request count and
        draining flag. Distinct from /health, which is a binary up/ready
        signal — /livez is for observability and deploy-time orchestration
        ("how many requests are still active before I cycle this slot?").
      responses:
        "200":
          description: Operational state
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/LivezResponse"

  /openapi.json:
    get:
      operationId: getOpenAPISpec
      tags: [health]
      summary: OpenAPI specification (JSON)
      responses:
        "200":
          description: The OpenAPI 3.0 spec for this API
          content:
            application/json:
              schema:
                type: object

  /openapi.yaml:
    get:
      operationId: getOpenAPISpecYAML
      tags: [health]
      summary: OpenAPI specification (YAML)
      responses:
        "200":
          description: The OpenAPI 3.0 spec for this API
          content:
            application/x-yaml:
              schema:
                type: string

  # ── Dashboard endpoints ────────────────────────────────────────────────
  /api/requests:
    get:
      operationId: getRequests
      tags: [dashboard]
      summary: List logged requests
      parameters:
        - name: page
          in: query
          schema: { type: integer, default: 1 }
        - name: limit
          in: query
          schema: { type: integer, default: 10 }
        - name: model
          in: query
          schema: { type: string, default: "all" }
          description: Filter by model name (substring match) or "all"
      responses:
        "200":
          description: Paginated request list
          content:
            application/json:
              schema:
                type: object
                properties:
                  requests:
                    type: array
                    items:
                      $ref: "#/components/schemas/RequestLog"
                  total:
                    type: integer
    delete:
      operationId: deleteRequests
      tags: [dashboard]
      summary: Clear all logged requests
      responses:
        "200":
          description: Requests cleared
          content:
            application/json:
              schema:
                type: object
                properties:
                  message: { type: string }
                  deleted: { type: integer }

  /api/requests/summary:
    get:
      operationId: getRequestsSummary
      tags: [dashboard]
      summary: Lightweight request summaries for fast list rendering
      parameters:
        - name: model
          in: query
          schema: { type: string, default: "all" }
        - name: start
          in: query
          schema: { type: string, format: date-time }
          description: Start of time range (UTC ISO 8601)
        - name: end
          in: query
          schema: { type: string, format: date-time }
          description: End of time range (UTC ISO 8601)
        - name: offset
          in: query
          schema: { type: integer, default: 0 }
        - name: limit
          in: query
          schema: { type: integer, default: 0 }
          description: Max results (0 = unlimited)
      responses:
        "200":
          description: Paginated request summaries
          content:
            application/json:
              schema:
                type: object
                properties:
                  requests:
                    type: array
                    items:
                      $ref: "#/components/schemas/RequestSummary"
                  total: { type: integer }
                  offset: { type: integer }
                  limit: { type: integer }

  /api/requests/latest-date:
    get:
      operationId: getLatestRequestDate
      tags: [dashboard]
      summary: Date of the most recent logged request
      responses:
        "200":
          content:
            application/json:
              schema:
                type: object
                properties:
                  latestDate: { type: string, format: date-time }

  /api/requests/{id}:
    get:
      operationId: getRequestByID
      tags: [dashboard]
      summary: Get a single request by ID
      parameters:
        - name: id
          in: path
          required: true
          schema: { type: string }
          description: Short or full request ID
      responses:
        "200":
          content:
            application/json:
              schema:
                type: object
                properties:
                  request:
                    $ref: "#/components/schemas/RequestLog"
                  fullId: { type: string }
        "404":
          description: Request not found

  /api/stats:
    get:
      operationId: getStats
      tags: [dashboard]
      summary: Aggregated usage statistics
      parameters:
        - name: start_date
          in: query
          schema: { type: string }
        - name: end_date
          in: query
          schema: { type: string }
        - name: model
          in: query
          schema: { type: string }
        - name: org
          in: query
          schema: { type: string }
      responses:
        "200":
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/UsageStats"

  /api/stats/dashboard:
    get:
      operationId: getDashboardStats
      tags: [dashboard]
      summary: Daily token usage for dashboard charts
      parameters:
        - name: start
          in: query
          schema: { type: string, format: date-time }
        - name: end
          in: query
          schema: { type: string, format: date-time }
        - name: org
          in: query
          schema: { type: string }
      responses:
        "200":
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/DashboardStats"

  /api/stats/hourly:
    get:
      operationId: getHourlyStats
      tags: [dashboard]
      summary: Hourly token usage breakdown
      parameters:
        - name: start
          in: query
          required: true
          schema: { type: string, format: date-time }
        - name: end
          in: query
          required: true
          schema: { type: string, format: date-time }
        - name: bucket
          in: query
          schema: { type: integer, default: 60 }
          description: Bucket size in minutes
        - name: org
          in: query
          schema: { type: string }
      responses:
        "200":
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/HourlyStatsResponse"

  /api/stats/models:
    get:
      operationId: getModelStats
      tags: [dashboard]
      summary: Per-model token usage breakdown
      parameters:
        - name: start
          in: query
          required: true
          schema: { type: string, format: date-time }
        - name: end
          in: query
          required: true
          schema: { type: string, format: date-time }
        - name: org
          in: query
          schema: { type: string }
      responses:
        "200":
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ModelStatsResponse"

  /api/stats/organizations:
    get:
      operationId: getOrganizations
      tags: [dashboard]
      summary: List distinct organization IDs
      responses:
        "200":
          content:
            application/json:
              schema:
                type: object
                properties:
                  organizations:
                    type: array
                    items: { type: string }

  /api/conversations:
    get:
      operationId: getConversations
      tags: [dashboard]
      summary: List conversations (grouped by session)
      parameters:
        - name: model
          in: query
          schema: { type: string, default: "all" }
        - name: page
          in: query
          schema: { type: integer, default: 1 }
        - name: limit
          in: query
          schema: { type: integer, default: 10 }
      responses:
        "200":
          content:
            application/json:
              schema:
                type: object
                properties:
                  conversations:
                    type: array
                    items:
                      type: object
                      properties:
                        id: { type: string }
                        requestCount: { type: integer }
                        startTime: { type: string, format: date-time }
                        lastActivity: { type: string, format: date-time }
                        duration: { type: integer, description: "Duration in ms" }
                        firstMessage: { type: string }
                        projectPath: { type: string }
                        projectName: { type: string }
                        model: { type: string }
                  hasMore: { type: boolean }
                  total: { type: integer }
                  page: { type: integer }
                  limit: { type: integer }

  /api/conversations/{id}:
    get:
      operationId: getConversationByID
      tags: [dashboard]
      summary: Get a single conversation by session ID
      parameters:
        - name: id
          in: path
          required: true
          schema: { type: string }
        - name: project
          in: query
          required: true
          schema: { type: string }
          description: Project path the conversation belongs to
      responses:
        "200":
          content:
            application/json:
              schema:
                type: object
        "404":
          description: Conversation not found

  /api/conversations/project:
    get:
      operationId: getConversationsByProject
      tags: [dashboard]
      summary: List conversations for a specific project
      parameters:
        - name: project
          in: query
          required: true
          schema: { type: string }
      responses:
        "200":
          content:
            application/json:
              schema:
                type: object

  /api/settings:
    get:
      operationId: getSettings
      tags: [dashboard]
      summary: Get current proxy settings
      responses:
        "200":
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ProxySettings"
    put:
      operationId: saveSettings
      tags: [dashboard]
      summary: Update proxy settings
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/ProxySettings"
      responses:
        "200":
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ProxySettings"

components:
  securitySchemes:
    apiKey:
      type: apiKey
      in: header
      name: x-api-key
      description: Anthropic API key (forwarded to upstream)
    bearerAuth:
      type: http
      scheme: bearer
      description: Bearer token authentication
    dashboardBasicAuth:
      type: http
      scheme: basic
      description: Dashboard password (username is ignored)

  schemas:
    ErrorResponse:
      type: object
      properties:
        error: { type: string }
        details: { type: string }

    HealthResponse:
      type: object
      properties:
        status: { type: string, example: "healthy" }
        timestamp: { type: string, format: date-time }

    DrainingResponse:
      type: object
      properties:
        status: { type: string, example: "draining" }
        timestamp: { type: string, format: date-time }
        in_flight: { type: integer, example: 3 }

    LivezResponse:
      type: object
      properties:
        status: { type: string, example: "ok" }
        timestamp: { type: string, format: date-time }
        in_flight: { type: integer, example: 0 }
        draining: { type: boolean, example: false }

    AnthropicRequest:
      type: object
      required: [model, messages, max_tokens]
      properties:
        model:
          type: string
          description: |
            Model ID to use.  The proxy may re-route this to a different
            model/provider based on configured routing rules.
          example: "claude-sonnet-4-5-20250514"
        messages:
          type: array
          items:
            $ref: "#/components/schemas/AnthropicMessage"
        max_tokens:
          type: integer
          example: 1024
        temperature:
          type: number
          format: float
        system:
          type: array
          items:
            $ref: "#/components/schemas/SystemMessage"
        stream:
          type: boolean
          default: false
        tools:
          type: array
          items:
            $ref: "#/components/schemas/Tool"
        tool_choice:
          description: Tool choice configuration

    AnthropicMessage:
      type: object
      required: [role, content]
      properties:
        role:
          type: string
          enum: [user, assistant]
        content:
          description: String or array of content blocks
          oneOf:
            - type: string
            - type: array
              items:
                type: object
                properties:
                  type: { type: string }
                  text: { type: string }

    SystemMessage:
      type: object
      properties:
        type: { type: string, example: "text" }
        text: { type: string }
        cache_control:
          type: object
          properties:
            type: { type: string, example: "ephemeral" }

    Tool:
      type: object
      properties:
        name: { type: string }
        description: { type: string }
        input_schema:
          type: object
          properties:
            type: {}
            properties: { type: object }
            required:
              type: array
              items: { type: string }

    AnthropicResponse:
      type: object
      properties:
        id: { type: string }
        type: { type: string, example: "message" }
        role: { type: string, example: "assistant" }
        model: { type: string }
        stop_reason: { type: string }
        stop_sequence: { type: string, nullable: true }
        content:
          type: array
          items:
            type: object
            properties:
              type: { type: string }
              text: { type: string }
        usage:
          $ref: "#/components/schemas/AnthropicUsage"

    AnthropicUsage:
      type: object
      properties:
        input_tokens: { type: integer }
        output_tokens: { type: integer }
        cache_creation_input_tokens: { type: integer }
        cache_read_input_tokens: { type: integer }
        service_tier: { type: string }

    ModelsResponse:
      type: object
      properties:
        object: { type: string, example: "list" }
        data:
          type: array
          items:
            type: object
            properties:
              id: { type: string }
              object: { type: string }
              created: { type: integer }
              owned_by: { type: string }

    RequestLog:
      type: object
      properties:
        requestId: { type: string }
        timestamp: { type: string, format: date-time }
        method: { type: string }
        endpoint: { type: string }
        model: { type: string }
        originalModel: { type: string }
        routedModel: { type: string }
        userAgent: { type: string }
        contentType: { type: string }
        conversationHash: { type: string }
        messageCount: { type: integer }
        organizationId: { type: string }
        response:
          $ref: "#/components/schemas/ResponseLog"

    ResponseLog:
      type: object
      properties:
        statusCode: { type: integer }
        responseTime: { type: integer, description: "Response time in ms" }
        isStreaming: { type: boolean }
        completedAt: { type: string, format: date-time }
        streamError: { type: string }
        rateLimit:
          $ref: "#/components/schemas/RateLimitInfo"

    RateLimitInfo:
      type: object
      properties:
        organizationId: { type: string }
        requestsLimit: { type: integer }
        requestsRemaining: { type: integer }
        requestsReset: { type: string }
        tokensLimit: { type: integer }
        tokensRemaining: { type: integer }
        tokensReset: { type: string }
        unifiedStatus: { type: string }
        unifiedUtilization5h: { type: number }
        unifiedReset5h: { type: string }
        unifiedUtilization7d: { type: number }
        unifiedReset7d: { type: string }

    RequestSummary:
      type: object
      properties:
        requestId: { type: string }
        timestamp: { type: string, format: date-time }
        method: { type: string }
        endpoint: { type: string }
        model: { type: string }
        originalModel: { type: string }
        routedModel: { type: string }
        statusCode: { type: integer }
        responseTime: { type: integer }
        usage:
          $ref: "#/components/schemas/AnthropicUsage"
        conversationHash: { type: string }
        messageCount: { type: integer }
        stopReason: { type: string }

    UsageStats:
      type: object
      properties:
        total_requests: { type: integer }
        total_input_tokens: { type: integer, format: int64 }
        total_output_tokens: { type: integer, format: int64 }
        total_cache_tokens: { type: integer, format: int64 }
        requests_by_model:
          type: object
          additionalProperties:
            type: object
            properties:
              request_count: { type: integer }
              input_tokens: { type: integer, format: int64 }
              output_tokens: { type: integer, format: int64 }
              cache_tokens: { type: integer, format: int64 }
        start_date: { type: string }
        end_date: { type: string }

    DashboardStats:
      type: object
      properties:
        dailyStats:
          type: array
          items:
            type: object
            properties:
              date: { type: string }
              tokens: { type: integer, format: int64 }
              requests: { type: integer }

    HourlyStatsResponse:
      type: object
      properties:
        hourlyStats:
          type: array
          items:
            type: object
            properties:
              hour: { type: integer }
              label: { type: string }
              tokens: { type: integer, format: int64 }
              requests: { type: integer }
        todayTokens: { type: integer, format: int64 }
        todayRequests: { type: integer }
        avgResponseTime: { type: integer, format: int64 }

    ModelStatsResponse:
      type: object
      properties:
        modelStats:
          type: array
          items:
            type: object
            properties:
              model: { type: string }
              tokens: { type: integer, format: int64 }
              requests: { type: integer }

    ProxySettings:
      type: object
      properties:
        requestHeaderRules:
          type: array
          items:
            $ref: "#/components/schemas/HeaderRule"
        responseHeaderRules:
          type: array
          items:
            $ref: "#/components/schemas/HeaderRule"

    HeaderRule:
      type: object
      properties:
        header: { type: string, description: "Header name (case-insensitive)" }
        action:
          type: string
          enum: [block, set, replace]
        value: { type: string }
        find: { type: string, description: "For replace action: string to find" }
        enabled: { type: boolean }

security:
  - apiKey: []
  - bearerAuth: []
`

// OpenAPIJSON serves the OpenAPI spec as JSON.
func (h *Handler) OpenAPIJSON(w http.ResponseWriter, r *http.Request) {
	var spec interface{}
	if err := yaml.Unmarshal([]byte(openAPISpec), &spec); err != nil {
		writeErrorResponse(w, "Failed to parse OpenAPI spec", http.StatusInternalServerError)
		return
	}
	spec = convertYAMLToJSON(spec)
	w.Header().Set("Content-Type", "application/json")
	w.Header().Set("Access-Control-Allow-Origin", "*")
	json.NewEncoder(w).Encode(spec)
}

// OpenAPIYAML serves the OpenAPI spec as YAML.
func (h *Handler) OpenAPIYAML(w http.ResponseWriter, r *http.Request) {
	w.Header().Set("Content-Type", "application/x-yaml")
	w.Header().Set("Access-Control-Allow-Origin", "*")
	w.Write([]byte(openAPISpec))
}

// convertYAMLToJSON recursively converts map[string]interface{} (from yaml) to
// JSON-compatible types. yaml.v3 uses map[string]interface{} by default so this
// mainly handles nested maps.
func convertYAMLToJSON(v interface{}) interface{} {
	switch val := v.(type) {
	case map[string]interface{}:
		out := make(map[string]interface{}, len(val))
		for k, v2 := range val {
			out[k] = convertYAMLToJSON(v2)
		}
		return out
	case []interface{}:
		out := make([]interface{}, len(val))
		for i, v2 := range val {
			out[i] = convertYAMLToJSON(v2)
		}
		return out
	default:
		return v
	}
}
-												Local fork: hardening + ops improvements (timeout knob, demotion, /livez, drain)

This commit captures both the prior accumulated work-in-progress
(framework migration web/→svelte/, postgres storage, conversation
viewer, dashboard auth, OpenAPI spec, integration tests) AND today's
operational improvements layered on top. History wasn't checkpointed
incrementally; happy to split it via interactive rebase if a reviewer
wants smaller commits.

Today's changes (in addition to the older WIP):

1. Configurable upstream response-header timeout
   - ANTHROPIC_RESPONSE_HEADER_TIMEOUT env (default 300s)
   - Replaces hardcoded 300s in provider/anthropic.go that was firing
     on opus + 1M-context + extended thinking non-streaming requests
   - Files: internal/config/config.go, internal/provider/anthropic.go

2. Structured forward-error diagnostic logging
   - When a forward to Anthropic fails, log a single key=value line
     with request_id, model, stream, body_bytes, has_thinking,
     anthropic_beta, query, elapsed, ctx_err — alongside the existing
     human-readable error line for back-compat
   - Files: internal/handler/handlers.go (logForwardFailure)

3. Full SSE protocol passthrough + Flusher fix
   - handler/handlers.go: forward all SSE lines verbatim (event:, id:,
     retry:, : comments, blank-line terminators), not only data:.
     Previous code produced malformed SSE for strict parsers.
   - middleware/logging.go: explicit Flush() method on responseWriter.
     Embedding http.ResponseWriter (interface) does not auto-promote
     Flush(), so every w.(http.Flusher) check in the streaming
     handler was returning ok=false and SSE writes buffered in net/http
     until the body closed.

4. Non-streaming → streaming demotion (feature-flagged)
   - ANTHROPIC_DEMOTE_NONSTREAMING env (default false)
   - When enabled and the routed provider is anthropic, force stream=true
     upstream for clients that asked for stream=false. Receive SSE,
     accumulate via accumulateSSEToMessage (handles text, tool_use with
     partial_json reassembly, thinking, signature, citations_delta,
     usage merge), and synthesize a single non-streaming JSON response.
   - Eliminates the ResponseHeaderTimeout class of failure entirely.
   - Body rewrite uses json.Decoder + UseNumber() to preserve integer
     precision in unknown nested fields (tool inputs from prior turns).
   - Files: internal/config/config.go, internal/handler/handlers.go,
     cmd/proxy/main.go, cmd/proxy/main_test.go

5. Live operational state: /livez gauge + graceful drain
   - New internal/runtime package: atomic in-flight counter + draining flag
   - New middleware/inflight.go: increments runtime gauge, applied to
     /v1/* subrouter so Messages, ChatCompletions, and ProxyPassthrough
     are all counted
   - /v1/* moved to a gorilla/mux subrouter so the InFlight middleware
     applies surgically; /health, /livez, /openapi.* remain on parent
     router (unauthenticated, uncounted)
   - Health handler returns 503 draining when runtime.IsDraining() is
     true, so Traefik stops routing to a slot before drain begins
   - New /livez handler returns {status, in_flight, draining, timestamp}
   - SIGTERM handler in main.go: SetDraining(true), poll for in_flight==0
     with 32-min ceiling and 1s tick (logs every 10s), then srv.Shutdown
   - Auth bypass list extended with /livez
   - Files: internal/runtime/runtime.go (new),
     internal/middleware/inflight.go (new),
     internal/middleware/auth.go,
     internal/handler/handlers.go (Health, Livez, runtime import),
     cmd/proxy/main.go (subrouter, drain loop)

6. OpenAPI spec updates
   - Document Health 503 response and new DrainingResponse schema
   - Add /livez path with LivezResponse schema
   - Files: internal/handler/openapi.go

Verified: go build ./... clean, go test ./... all pass, go vet clean.
Three rounds of codex peer review across changes 1-5; all feedback
addressed (citations_delta, json.Number precision, drain-loop logging
via lastLog timestamp, PathPrefix tightened to "/v1/").

											
										
										
											2026-05-02 15:15:58 -06:00
+								package handler
 								import (
 									"encoding/json"
 									"net/http"
 									"gopkg.in/yaml.v3"
 								)
 								// openAPISpec is the embedded OpenAPI 3.0 specification for the proxy API.
 								var openAPISpec = `
 								openapi: "3.0.3"
 								info:
 								  title: Claude Code Proxy API
 								  description: |
 								    An Anthropic API proxy that provides request logging, model routing, usage
 								    analytics, and a dashboard UI.  The proxy exposes two groups of endpoints:
 								    **Proxy endpoints** – drop-in replacements for the upstream Anthropic API.
 								    Point your Claude Code (or any Anthropic SDK client) at this proxy and all
 								    requests are forwarded, logged, and optionally re-routed to a different model
 								    or provider.
 								    **Dashboard endpoints** – read-only analytics and configuration APIs that
 								    power the built-in web dashboard.  These are protected by HTTP Basic Auth
 								    when DASHBOARD_PASSWORD is set.
 								  version: "1.0.0"
 								  contact:
 								    name: Claude Code Proxy
 								  license:
 								    name: MIT
 								servers:
 								  - url: /
 								    description: This proxy instance
 								tags:
 								  - name: proxy
 								    description: |
 								      Drop-in Anthropic API proxy endpoints.  Authenticate with the same
 								      x-api-key / Authorization header you use for the upstream Anthropic API.
 								  - name: dashboard
 								    description: |
 								      Analytics and configuration endpoints for the web dashboard.
 								      Protected by DASHBOARD_PASSWORD basic auth when configured.
 								  - name: health
 								    description: Health and discovery endpoints (no auth required).
 								paths:
 								  # ── Proxy endpoints ────────────────────────────────────────────────────
 								  /v1/messages:
 								    post:
 								      operationId: createMessage
 								      tags: [proxy]
 								      summary: Create a message (Anthropic Messages API)
 								      description: |
 								        Forwards the request to the upstream Anthropic (or routed) provider.
 								        Supports both streaming (SSE) and non-streaming responses.  The proxy
 								        logs the request/response, applies any configured model routing rules
 								        and header rules, then returns the upstream response verbatim.
 								      requestBody:
 								        required: true
 								        content:
 								          application/json:
 								            schema:
 								              $ref: "#/components/schemas/AnthropicRequest"
 								      responses:
 								        "200":
 								          description: Successful message response (non-streaming)
 								          content:
 								            application/json:
 								              schema:
 								                $ref: "#/components/schemas/AnthropicResponse"
 								            text/event-stream:
 								              schema:
 								                type: string
 								                description: SSE stream of Anthropic streaming events
 								        "400":
 								          description: Invalid request
 								          content:
 								            application/json:
 								              schema:
 								                $ref: "#/components/schemas/ErrorResponse"
 								        "500":
 								          description: Upstream or internal error
 								          content:
 								            application/json:
 								              schema:
 								                $ref: "#/components/schemas/ErrorResponse"
 								  /v1/chat/completions:
 								    post:
 								      operationId: chatCompletions
 								      tags: [proxy]
 								      summary: Chat completions (OpenAI-compatible – not supported)
 								      description: |
 								        Returns a 400 error directing callers to use /v1/messages instead.
 								        This endpoint exists for compatibility detection only.
 								      requestBody:
 								        content:
 								          application/json:
 								            schema:
 								              type: object
 								      responses:
 								        "400":
 								          description: Not supported – use /v1/messages
 								          content:
 								            application/json:
 								              schema:
 								                $ref: "#/components/schemas/ErrorResponse"
 								  /v1/models:
 								    get:
 								      operationId: listModels
 								      tags: [proxy]
 								      summary: List available models
 								      description: |
 								        Returns the list of models known to the proxy.  The proxy uses
 								        pattern-based routing so any model accepted by the upstream provider
 								        will work; this endpoint currently returns an empty list.
 								      responses:
 								        "200":
 								          description: Model list
 								          content:
 								            application/json:
 								              schema:
 								                $ref: "#/components/schemas/ModelsResponse"
 								  # ── Health & discovery ─────────────────────────────────────────────────
 								  /health:
 								    get:
 								      operationId: healthCheck
 								      tags: [health]
 								      summary: Health check (binary up/ready signal for load balancers)
 								      description: |
 								        Returns 200 with status=healthy while the process is accepting
 								        traffic, and 503 with status=draining once a SIGTERM has been
 								        received. Traefik (or any LB doing health-based routing) should
 								        treat 503 as "stop sending new requests to this backend", which is
 								        the signal the graceful-drain loop relies on.
 								      responses:
 								        "200":
 								          description: Service is healthy
 								          content:
 								            application/json:
 								              schema:
 								                $ref: "#/components/schemas/HealthResponse"
 								        "503":
 								          description: Service is draining (SIGTERM received). Stop routing here.
 								          content:
 								            application/json:
 								              schema:
 								                $ref: "#/components/schemas/DrainingResponse"
 								  /livez:
 								    get:
 								      operationId: livenessProbe
 								      tags: [health]
 								      summary: Live operational state (in-flight gauge + draining flag)
 								      description: |
 								        Always returns 200 with the current in-flight request count and
 								        draining flag. Distinct from /health, which is a binary up/ready
 								        signal — /livez is for observability and deploy-time orchestration
 								        ("how many requests are still active before I cycle this slot?").
 								      responses:
 								        "200":
 								          description: Operational state
 								          content:
 								            application/json:
 								              schema:
 								                $ref: "#/components/schemas/LivezResponse"
 								  /openapi.json:
 								    get:
 								      operationId: getOpenAPISpec
 								      tags: [health]
 								      summary: OpenAPI specification (JSON)
 								      responses:
 								        "200":
 								          description: The OpenAPI 3.0 spec for this API
 								          content:
 								            application/json:
 								              schema:
 								                type: object
 								  /openapi.yaml:
 								    get:
 								      operationId: getOpenAPISpecYAML
 								      tags: [health]
 								      summary: OpenAPI specification (YAML)
 								      responses:
 								        "200":
 								          description: The OpenAPI 3.0 spec for this API
 								          content:
 								            application/x-yaml:
 								              schema:
 								                type: string
 								  # ── Dashboard endpoints ────────────────────────────────────────────────
 								  /api/requests:
 								    get:
 								      operationId: getRequests
 								      tags: [dashboard]
 								      summary: List logged requests
 								      parameters:
 								        - name: page
 								          in: query
 								          schema: { type: integer, default: 1 }
 								        - name: limit
 								          in: query
 								          schema: { type: integer, default: 10 }
 								        - name: model
 								          in: query
 								          schema: { type: string, default: "all" }
 								          description: Filter by model name (substring match) or "all"
 								      responses:
 								        "200":
 								          description: Paginated request list
 								          content:
 								            application/json:
 								              schema:
 								                type: object
 								                properties:
 								                  requests:
 								                    type: array
 								                    items:
 								                      $ref: "#/components/schemas/RequestLog"
 								                  total:
 								                    type: integer
 								    delete:
 								      operationId: deleteRequests
 								      tags: [dashboard]
 								      summary: Clear all logged requests
 								      responses:
 								        "200":
 								          description: Requests cleared
 								          content:
 								            application/json:
 								              schema:
 								                type: object
 								                properties:
 								                  message: { type: string }
 								                  deleted: { type: integer }
 								  /api/requests/summary:
 								    get:
 								      operationId: getRequestsSummary
 								      tags: [dashboard]
 								      summary: Lightweight request summaries for fast list rendering
 								      parameters:
 								        - name: model
 								          in: query
 								          schema: { type: string, default: "all" }
 								        - name: start
 								          in: query
 								          schema: { type: string, format: date-time }
 								          description: Start of time range (UTC ISO 8601)
 								        - name: end
 								          in: query
 								          schema: { type: string, format: date-time }
 								          description: End of time range (UTC ISO 8601)
 								        - name: offset
 								          in: query
 								          schema: { type: integer, default: 0 }
 								        - name: limit
 								          in: query
 								          schema: { type: integer, default: 0 }
 								          description: Max results (0 = unlimited)
 								      responses:
 								        "200":
 								          description: Paginated request summaries
 								          content:
 								            application/json:
 								              schema:
 								                type: object
 								                properties:
 								                  requests:
 								                    type: array
 								                    items:
 								                      $ref: "#/components/schemas/RequestSummary"
 								                  total: { type: integer }
 								                  offset: { type: integer }
 								                  limit: { type: integer }
 								  /api/requests/latest-date:
 								    get:
 								      operationId: getLatestRequestDate
 								      tags: [dashboard]
 								      summary: Date of the most recent logged request
 								      responses:
 								        "200":
 								          content:
 								            application/json:
 								              schema:
 								                type: object
 								                properties:
 								                  latestDate: { type: string, format: date-time }
 								  /api/requests/{id}:
 								    get:
 								      operationId: getRequestByID
 								      tags: [dashboard]
 								      summary: Get a single request by ID
 								      parameters:
 								        - name: id
 								          in: path
 								          required: true
 								          schema: { type: string }
 								          description: Short or full request ID
 								      responses:
 								        "200":
 								          content:
 								            application/json:
 								              schema:
 								                type: object
 								                properties:
 								                  request:
 								                    $ref: "#/components/schemas/RequestLog"
 								                  fullId: { type: string }
 								        "404":
 								          description: Request not found
 								  /api/stats:
 								    get:
 								      operationId: getStats
 								      tags: [dashboard]
 								      summary: Aggregated usage statistics
 								      parameters:
 								        - name: start_date
 								          in: query
 								          schema: { type: string }
 								        - name: end_date
 								          in: query
 								          schema: { type: string }
 								        - name: model
 								          in: query
 								          schema: { type: string }
 								        - name: org
 								          in: query
 								          schema: { type: string }
 								      responses:
 								        "200":
 								          content:
 								            application/json:
 								              schema:
 								                $ref: "#/components/schemas/UsageStats"
 								  /api/stats/dashboard:
 								    get:
 								      operationId: getDashboardStats
 								      tags: [dashboard]
 								      summary: Daily token usage for dashboard charts
 								      parameters:
 								        - name: start
 								          in: query
 								          schema: { type: string, format: date-time }
 								        - name: end
 								          in: query
 								          schema: { type: string, format: date-time }
 								        - name: org
 								          in: query
 								          schema: { type: string }
 								      responses:
 								        "200":
 								          content:
 								            application/json:
 								              schema:
 								                $ref: "#/components/schemas/DashboardStats"
 								  /api/stats/hourly:
 								    get:
 								      operationId: getHourlyStats
 								      tags: [dashboard]
 								      summary: Hourly token usage breakdown
 								      parameters:
 								        - name: start
 								          in: query
 								          required: true
 								          schema: { type: string, format: date-time }
 								        - name: end
 								          in: query
 								          required: true
 								          schema: { type: string, format: date-time }
 								        - name: bucket
 								          in: query
 								          schema: { type: integer, default: 60 }
 								          description: Bucket size in minutes
 								        - name: org
 								          in: query
 								          schema: { type: string }
 								      responses:
 								        "200":
 								          content:
 								            application/json:
 								              schema:
 								                $ref: "#/components/schemas/HourlyStatsResponse"
 								  /api/stats/models:
 								    get:
 								      operationId: getModelStats
 								      tags: [dashboard]
 								      summary: Per-model token usage breakdown
 								      parameters:
 								        - name: start
 								          in: query
 								          required: true
 								          schema: { type: string, format: date-time }
 								        - name: end
 								          in: query
 								          required: true
 								          schema: { type: string, format: date-time }
 								        - name: org
 								          in: query
 								          schema: { type: string }
 								      responses:
 								        "200":
 								          content:
 								            application/json:
 								              schema:
 								                $ref: "#/components/schemas/ModelStatsResponse"
 								  /api/stats/organizations:
 								    get:
 								      operationId: getOrganizations
 								      tags: [dashboard]
 								      summary: List distinct organization IDs
 								      responses:
 								        "200":
 								          content:
 								            application/json:
 								              schema:
 								                type: object
 								                properties:
 								                  organizations:
 								                    type: array
 								                    items: { type: string }
 								  /api/conversations:
 								    get:
 								      operationId: getConversations
 								      tags: [dashboard]
 								      summary: List conversations (grouped by session)
 								      parameters:
 								        - name: model
 								          in: query
 								          schema: { type: string, default: "all" }
 								        - name: page
 								          in: query
 								          schema: { type: integer, default: 1 }
 								        - name: limit
 								          in: query
 								          schema: { type: integer, default: 10 }
 								      responses:
 								        "200":
 								          content:
 								            application/json:
 								              schema:
 								                type: object
 								                properties:
 								                  conversations:
 								                    type: array
 								                    items:
 								                      type: object
 								                      properties:
 								                        id: { type: string }
 								                        requestCount: { type: integer }
 								                        startTime: { type: string, format: date-time }
 								                        lastActivity: { type: string, format: date-time }
 								                        duration: { type: integer, description: "Duration in ms" }
 								                        firstMessage: { type: string }
 								                        projectPath: { type: string }
 								                        projectName: { type: string }
 								                        model: { type: string }
 								                  hasMore: { type: boolean }
 								                  total: { type: integer }
 								                  page: { type: integer }
 								                  limit: { type: integer }
 								  /api/conversations/{id}:
 								    get:
 								      operationId: getConversationByID
 								      tags: [dashboard]
 								      summary: Get a single conversation by session ID
 								      parameters:
 								        - name: id
 								          in: path
 								          required: true
 								          schema: { type: string }
 								        - name: project
 								          in: query
 								          required: true
 								          schema: { type: string }
 								          description: Project path the conversation belongs to
 								      responses:
 								        "200":
 								          content:
 								            application/json:
 								              schema:
 								                type: object
 								        "404":
 								          description: Conversation not found
 								  /api/conversations/project:
 								    get:
 								      operationId: getConversationsByProject
 								      tags: [dashboard]
 								      summary: List conversations for a specific project
 								      parameters:
 								        - name: project
 								          in: query
 								          required: true
 								          schema: { type: string }
 								      responses:
 								        "200":
 								          content:
 								            application/json:
 								              schema:
 								                type: object
 								  /api/settings:
 								    get:
 								      operationId: getSettings
 								      tags: [dashboard]
 								      summary: Get current proxy settings
 								      responses:
 								        "200":
 								          content:
 								            application/json:
 								              schema:
 								                $ref: "#/components/schemas/ProxySettings"
 								    put:
 								      operationId: saveSettings
 								      tags: [dashboard]
 								      summary: Update proxy settings
 								      requestBody:
 								        required: true
 								        content:
 								          application/json:
 								            schema:
 								              $ref: "#/components/schemas/ProxySettings"
 								      responses:
 								        "200":
 								          content:
 								            application/json:
 								              schema:
 								                $ref: "#/components/schemas/ProxySettings"
 								components:
 								  securitySchemes:
 								    apiKey:
 								      type: apiKey
 								      in: header
 								      name: x-api-key
 								      description: Anthropic API key (forwarded to upstream)
 								    bearerAuth:
 								      type: http
 								      scheme: bearer
 								      description: Bearer token authentication
 								    dashboardBasicAuth:
 								      type: http
 								      scheme: basic
 								      description: Dashboard password (username is ignored)
 								  schemas:
 								    ErrorResponse:
 								      type: object
 								      properties:
 								        error: { type: string }
 								        details: { type: string }
 								    HealthResponse:
 								      type: object
 								      properties:
 								        status: { type: string, example: "healthy" }
 								        timestamp: { type: string, format: date-time }
 								    DrainingResponse:
 								      type: object
 								      properties:
 								        status: { type: string, example: "draining" }
 								        timestamp: { type: string, format: date-time }
 								        in_flight: { type: integer, example: 3 }
 								    LivezResponse:
 								      type: object
 								      properties:
 								        status: { type: string, example: "ok" }
 								        timestamp: { type: string, format: date-time }
 								        in_flight: { type: integer, example: 0 }
 								        draining: { type: boolean, example: false }
 								    AnthropicRequest:
 								      type: object
 								      required: [model, messages, max_tokens]
 								      properties:
 								        model:
 								          type: string
 								          description: |
 								            Model ID to use.  The proxy may re-route this to a different
 								            model/provider based on configured routing rules.
 								          example: "claude-sonnet-4-5-20250514"
 								        messages:
 								          type: array
 								          items:
 								            $ref: "#/components/schemas/AnthropicMessage"
 								        max_tokens:
 								          type: integer
 								          example: 1024
 								        temperature:
 								          type: number
 								          format: float
 								        system:
 								          type: array
 								          items:
 								            $ref: "#/components/schemas/SystemMessage"
 								        stream:
 								          type: boolean
 								          default: false
 								        tools:
 								          type: array
 								          items:
 								            $ref: "#/components/schemas/Tool"
 								        tool_choice:
 								          description: Tool choice configuration
 								    AnthropicMessage:
 								      type: object
 								      required: [role, content]
 								      properties:
 								        role:
 								          type: string
 								          enum: [user, assistant]
 								        content:
 								          description: String or array of content blocks
 								          oneOf:
 								            - type: string
 								            - type: array
 								              items:
 								                type: object
 								                properties:
 								                  type: { type: string }
 								                  text: { type: string }
 								    SystemMessage:
 								      type: object
 								      properties:
 								        type: { type: string, example: "text" }
 								        text: { type: string }
 								        cache_control:
 								          type: object
 								          properties:
 								            type: { type: string, example: "ephemeral" }
 								    Tool:
 								      type: object
 								      properties:
 								        name: { type: string }
 								        description: { type: string }
 								        input_schema:
 								          type: object
 								          properties:
 								            type: {}
 								            properties: { type: object }
 								            required:
 								              type: array
 								              items: { type: string }
 								    AnthropicResponse:
 								      type: object
 								      properties:
 								        id: { type: string }
 								        type: { type: string, example: "message" }
 								        role: { type: string, example: "assistant" }
 								        model: { type: string }
 								        stop_reason: { type: string }
 								        stop_sequence: { type: string, nullable: true }
 								        content:
 								          type: array
 								          items:
 								            type: object
 								            properties:
 								              type: { type: string }
 								              text: { type: string }
 								        usage:
 								          $ref: "#/components/schemas/AnthropicUsage"
 								    AnthropicUsage:
 								      type: object
 								      properties:
 								        input_tokens: { type: integer }
 								        output_tokens: { type: integer }
 								        cache_creation_input_tokens: { type: integer }
 								        cache_read_input_tokens: { type: integer }
 								        service_tier: { type: string }
 								    ModelsResponse:
 								      type: object
 								      properties:
 								        object: { type: string, example: "list" }
 								        data:
 								          type: array
 								          items:
 								            type: object
 								            properties:
 								              id: { type: string }
 								              object: { type: string }
 								              created: { type: integer }
 								              owned_by: { type: string }
 								    RequestLog:
 								      type: object
 								      properties:
 								        requestId: { type: string }
 								        timestamp: { type: string, format: date-time }
 								        method: { type: string }
 								        endpoint: { type: string }
 								        model: { type: string }
 								        originalModel: { type: string }
 								        routedModel: { type: string }
 								        userAgent: { type: string }
 								        contentType: { type: string }
 								        conversationHash: { type: string }
 								        messageCount: { type: integer }
 								        organizationId: { type: string }
 								        response:
 								          $ref: "#/components/schemas/ResponseLog"
 								    ResponseLog:
 								      type: object
 								      properties:
 								        statusCode: { type: integer }
 								        responseTime: { type: integer, description: "Response time in ms" }
 								        isStreaming: { type: boolean }
 								        completedAt: { type: string, format: date-time }
 								        streamError: { type: string }
 								        rateLimit:
 								          $ref: "#/components/schemas/RateLimitInfo"
 								    RateLimitInfo:
 								      type: object
 								      properties:
 								        organizationId: { type: string }
 								        requestsLimit: { type: integer }
 								        requestsRemaining: { type: integer }
 								        requestsReset: { type: string }
 								        tokensLimit: { type: integer }
 								        tokensRemaining: { type: integer }
 								        tokensReset: { type: string }
 								        unifiedStatus: { type: string }
 								        unifiedUtilization5h: { type: number }
 								        unifiedReset5h: { type: string }
 								        unifiedUtilization7d: { type: number }
 								        unifiedReset7d: { type: string }
 								    RequestSummary:
 								      type: object
 								      properties:
 								        requestId: { type: string }
 								        timestamp: { type: string, format: date-time }
 								        method: { type: string }
 								        endpoint: { type: string }
 								        model: { type: string }
 								        originalModel: { type: string }
 								        routedModel: { type: string }
 								        statusCode: { type: integer }
 								        responseTime: { type: integer }
 								        usage:
 								          $ref: "#/components/schemas/AnthropicUsage"
 								        conversationHash: { type: string }
 								        messageCount: { type: integer }
 								        stopReason: { type: string }
 								    UsageStats:
 								      type: object
 								      properties:
 								        total_requests: { type: integer }
 								        total_input_tokens: { type: integer, format: int64 }
 								        total_output_tokens: { type: integer, format: int64 }
 								        total_cache_tokens: { type: integer, format: int64 }
 								        requests_by_model:
 								          type: object
 								          additionalProperties:
 								            type: object
 								            properties:
 								              request_count: { type: integer }
 								              input_tokens: { type: integer, format: int64 }
 								              output_tokens: { type: integer, format: int64 }
 								              cache_tokens: { type: integer, format: int64 }
 								        start_date: { type: string }
 								        end_date: { type: string }
 								    DashboardStats:
 								      type: object
 								      properties:
 								        dailyStats:
 								          type: array
 								          items:
 								            type: object
 								            properties:
 								              date: { type: string }
 								              tokens: { type: integer, format: int64 }
 								              requests: { type: integer }
 								    HourlyStatsResponse:
 								      type: object
 								      properties:
 								        hourlyStats:
 								          type: array
 								          items:
 								            type: object
 								            properties:
 								              hour: { type: integer }
 								              label: { type: string }
 								              tokens: { type: integer, format: int64 }
 								              requests: { type: integer }
 								        todayTokens: { type: integer, format: int64 }
 								        todayRequests: { type: integer }
 								        avgResponseTime: { type: integer, format: int64 }
 								    ModelStatsResponse:
 								      type: object
 								      properties:
 								        modelStats:
 								          type: array
 								          items:
 								            type: object
 								            properties:
 								              model: { type: string }
 								              tokens: { type: integer, format: int64 }
 								              requests: { type: integer }
 								    ProxySettings:
 								      type: object
 								      properties:
 								        requestHeaderRules:
 								          type: array
 								          items:
 								            $ref: "#/components/schemas/HeaderRule"
 								        responseHeaderRules:
 								          type: array
 								          items:
 								            $ref: "#/components/schemas/HeaderRule"
 								    HeaderRule:
 								      type: object
 								      properties:
 								        header: { type: string, description: "Header name (case-insensitive)" }
 								        action:
 								          type: string
 								          enum: [block, set, replace]
 								        value: { type: string }
 								        find: { type: string, description: "For replace action: string to find" }
 								        enabled: { type: boolean }
 								security:
 								  - apiKey: []
 								  - bearerAuth: []
 								`
 								// OpenAPIJSON serves the OpenAPI spec as JSON.
 								func (h *Handler) OpenAPIJSON(w http.ResponseWriter, r *http.Request) {
 									var spec interface{}
 									if err := yaml.Unmarshal([]byte(openAPISpec), &spec); err != nil {
 										writeErrorResponse(w, "Failed to parse OpenAPI spec", http.StatusInternalServerError)
 										return
 									}
 									spec = convertYAMLToJSON(spec)
 									w.Header().Set("Content-Type", "application/json")
 									w.Header().Set("Access-Control-Allow-Origin", "*")
 									json.NewEncoder(w).Encode(spec)
 								}
 								// OpenAPIYAML serves the OpenAPI spec as YAML.
 								func (h *Handler) OpenAPIYAML(w http.ResponseWriter, r *http.Request) {
 									w.Header().Set("Content-Type", "application/x-yaml")
 									w.Header().Set("Access-Control-Allow-Origin", "*")
 									w.Write([]byte(openAPISpec))
 								}
 								// convertYAMLToJSON recursively converts map[string]interface{} (from yaml) to
 								// JSON-compatible types. yaml.v3 uses map[string]interface{} by default so this
 								// mainly handles nested maps.
 								func convertYAMLToJSON(v interface{}) interface{} {
 									switch val := v.(type) {
 									case map[string]interface{}:
 										out := make(map[string]interface{}, len(val))
 										for k, v2 := range val {
 											out[k] = convertYAMLToJSON(v2)
 										}
 										return out
 									case []interface{}:
 										out := make([]interface{}, len(val))
 										for i, v2 := range val {
 											out[i] = convertYAMLToJSON(v2)
 										}
 										return out
 									default:
 										return v
 									}
 								}