claude-code-proxy/proxy/internal/provider/openai.go

package provider

import (
	"bytes"
	"compress/gzip"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"net"
	"net/http"
	"net/url"
	"strings"
	"time"

	"github.com/seifghazi/claude-code-monitor/internal/config"
	"github.com/seifghazi/claude-code-monitor/internal/model"
	"github.com/seifghazi/claude-code-monitor/internal/sse"
)

type OpenAIProvider struct {
	client *http.Client
	config *config.OpenAIProviderConfig
}

func NewOpenAIProvider(cfg *config.OpenAIProviderConfig) Provider {
	return &OpenAIProvider{
		client: &http.Client{
			// No timeout set here - we rely on context cancellation for timeouts.
			// Setting Timeout here would apply to the entire request+response cycle,
			// which causes "context canceled" errors for long-running streaming
			// requests with large "thinking" content blocks.
			// The server's WriteTimeout handles individual write operations,
			// and the context passed to ForwardRequest controls the overall timeout.
			Transport: &http.Transport{
				// Connection timeouts
				DialContext: (&net.Dialer{
					Timeout:   30 * time.Second,
					KeepAlive: 30 * time.Second,
				}).DialContext,
				TLSHandshakeTimeout:   30 * time.Second,
				ResponseHeaderTimeout: 300 * time.Second, // Time to wait for response headers (high for 1M context)
				ExpectContinueTimeout: 1 * time.Second,
				// Connection pooling
				MaxIdleConns:        100,
				MaxIdleConnsPerHost: 10,
				IdleConnTimeout:     90 * time.Second,
			},
		},
		config: cfg,
	}
}

func (p *OpenAIProvider) Name() string {
	return "openai"
}

func (p *OpenAIProvider) ForwardRequest(ctx context.Context, originalReq *http.Request) (*http.Response, error) {
	// First, we need to convert the Anthropic request to OpenAI format
	bodyBytes, err := io.ReadAll(originalReq.Body)
	if err != nil {
		return nil, fmt.Errorf("failed to read request body: %w", err)
	}
	originalReq.Body = io.NopCloser(bytes.NewReader(bodyBytes))

	var anthropicReq model.AnthropicRequest
	if err := json.Unmarshal(bodyBytes, &anthropicReq); err != nil {
		return nil, fmt.Errorf("failed to parse anthropic request: %w", err)
	}

	// Convert to OpenAI format
	openAIReq := convertAnthropicToOpenAI(&anthropicReq)
	newBodyBytes, err := json.Marshal(openAIReq)
	if err != nil {
		return nil, fmt.Errorf("failed to marshal openai request: %w", err)
	}

	// Clone the request with new body
	proxyReq := originalReq.Clone(ctx)
	proxyReq.Body = io.NopCloser(bytes.NewReader(newBodyBytes))
	proxyReq.ContentLength = int64(len(newBodyBytes))

	// Parse the configured base URL
	baseURL, err := url.Parse(p.config.BaseURL)
	if err != nil {
		return nil, fmt.Errorf("failed to parse base URL '%s': %w", p.config.BaseURL, err)
	}

	// Update the destination URL for OpenAI
	proxyReq.URL.Scheme = baseURL.Scheme
	proxyReq.URL.Host = baseURL.Host
	proxyReq.URL.Path = "/v1/chat/completions" // OpenAI endpoint

	// Update request headers
	proxyReq.RequestURI = ""
	proxyReq.Host = baseURL.Host

	// Remove Anthropic-specific headers
	proxyReq.Header.Del("anthropic-version")
	proxyReq.Header.Del("x-api-key")
	proxyReq.Header.Del("Authorization")

	// Determine which API key to use
	apiKey := p.config.APIKey

	// Check for client-provided API key if allowed
	if p.config.AllowClientAPIKey && p.config.ClientAPIKeyHeader != "" {
		if clientKey := originalReq.Header.Get(p.config.ClientAPIKeyHeader); clientKey != "" {
			apiKey = clientKey
		}
	}

	// Add OpenAI headers
	if apiKey != "" {
		proxyReq.Header.Set("Authorization", "Bearer "+apiKey)
	}
	proxyReq.Header.Set("Content-Type", "application/json")

	// Remove the client API key header from the proxied request
	if p.config.ClientAPIKeyHeader != "" {
		proxyReq.Header.Del(p.config.ClientAPIKeyHeader)
	}

	// Forward the request
	resp, err := p.client.Do(proxyReq)
	if err != nil {
		return nil, fmt.Errorf("failed to forward request: %w", err)
	}

	// Check for error responses
	if resp.StatusCode >= 400 {
		// Read the error body for debugging
		errorBody, _ := io.ReadAll(resp.Body)
		resp.Body.Close()

		// Log the error details
		// OpenAI API error - will be returned to client

		// Create an error response in Anthropic format
		errorResp := map[string]interface{}{
			"type": "error",
			"error": map[string]interface{}{
				"type":    "api_error",
				"message": fmt.Sprintf("OpenAI API error: %s", string(errorBody)),
			},
		}
		errorJSON, _ := json.Marshal(errorResp)

		// Create a new response with the error
		resp.Body = io.NopCloser(bytes.NewReader(errorJSON))
		resp.Header.Set("Content-Type", "application/json")
		resp.Header.Del("Content-Encoding")
		resp.ContentLength = int64(len(errorJSON))

		return resp, nil
	}

	// Handle gzip-encoded responses
	var bodyReader io.ReadCloser = resp.Body
	if resp.Header.Get("Content-Encoding") == "gzip" {
		gzReader, err := gzip.NewReader(resp.Body)
		if err != nil {
			resp.Body.Close()
			return nil, fmt.Errorf("failed to create gzip reader: %w", err)
		}
		bodyReader = gzReader
		resp.Header.Del("Content-Encoding")
		resp.Header.Del("Content-Length")
	}

	// For streaming responses, we need to convert back to Anthropic format
	if anthropicReq.Stream {
		// Create a pipe to transform the response
		pr, pw := io.Pipe()

		// Start a goroutine to transform the stream
		go func() {
			defer bodyReader.Close()
			if err := transformOpenAIStreamToAnthropic(bodyReader, pw); err != nil {
				_ = pw.CloseWithError(err)
				return
			}
			_ = pw.Close()
		}()

		// Replace the response body with our transformed stream
		resp.Body = pr
	} else {
		// For non-streaming, read and convert the response
		respBody, err := io.ReadAll(bodyReader)
		bodyReader.Close()
		if err != nil {
			return nil, fmt.Errorf("failed to read response body: %w", err)
		}

		// Convert OpenAI response back to Anthropic format
		transformedBody := transformOpenAIResponseToAnthropic(respBody)
		resp.Body = io.NopCloser(bytes.NewReader(transformedBody))
		resp.ContentLength = int64(len(transformedBody))
		resp.Header.Set("Content-Length", fmt.Sprintf("%d", len(transformedBody)))
	}

	return resp, nil
}

// extractSystemMessages combines all system messages into a single string for OpenAI.
func extractSystemMessages(system []model.AnthropicSystemMessage) string {
	if len(system) == 0 {
		return ""
	}
	var parts []string
	for _, sysMsg := range system {
		parts = append(parts, sysMsg.Text)
	}
	return strings.Join(parts, "\n\n")
}

// convertToolResultContent converts the various formats of tool result content to a string.
func convertToolResultContent(content interface{}) string {
	if content == nil {
		return ""
	}

	switch v := content.(type) {
	case string:
		return v
	case []interface{}:
		var result string
		for _, c := range v {
			if contentMap, ok := c.(map[string]interface{}); ok {
				if contentMap["type"] == "text" {
					if text, ok := contentMap["text"].(string); ok {
						result += text + "\n"
					}
				} else if text, hasText := contentMap["text"]; hasText {
					result += fmt.Sprintf("%v\n", text)
				} else {
					if jsonBytes, err := json.Marshal(contentMap); err == nil {
						result += string(jsonBytes) + "\n"
					} else {
						result += fmt.Sprintf("%v\n", contentMap)
					}
				}
			}
		}
		return result
	case map[string]interface{}:
		if v["type"] == "text" {
			if text, ok := v["text"].(string); ok {
				return text
			}
		}
		if jsonBytes, err := json.Marshal(v); err == nil {
			return string(jsonBytes)
		}
		return fmt.Sprintf("%v", v)
	default:
		if jsonBytes, err := json.Marshal(content); err == nil {
			return string(jsonBytes)
		}
		return fmt.Sprintf("%v", content)
	}
}

// convertMessageContent converts an Anthropic message's content to a plain text string for OpenAI.
// It handles both content arrays (with possible tool results) and simple string content.
func convertMessageContent(msg model.AnthropicMessage) string {
	contentArray, ok := msg.Content.([]interface{})
	if !ok {
		// Handle simple string content via GetContentBlocks
		contentBlocks := msg.GetContentBlocks()
		var parts []string
		for _, block := range contentBlocks {
			if block.Type == "text" {
				parts = append(parts, block.Text)
			}
		}
		content := strings.Join(parts, "\n")
		if content == "" {
			content = "..."
		}
		return content
	}

	// Check if this message contains tool results
	hasToolResults := false
	for _, item := range contentArray {
		if block, ok := item.(map[string]interface{}); ok {
			if blockType, _ := block["type"].(string); blockType == "tool_result" {
				hasToolResults = true
				break
			}
		}
	}

	if hasToolResults {
		return convertContentArrayWithToolResults(contentArray)
	}
	return convertRegularContentArray(contentArray)
}

// convertContentArrayWithToolResults handles content arrays that contain tool_result blocks.
func convertContentArrayWithToolResults(contentArray []interface{}) string {
	textContent := ""

	for _, item := range contentArray {
		block, ok := item.(map[string]interface{})
		if !ok {
			continue
		}
		blockType, _ := block["type"].(string)
		switch blockType {
		case "text":
			if text, ok := block["text"].(string); ok {
				textContent += text + "\n"
			}
		case "tool_result":
			toolID := ""
			if id, ok := block["tool_use_id"].(string); ok {
				toolID = id
			}
			resultContent := convertToolResultContent(block["content"])
			textContent += fmt.Sprintf("Tool result for %s:\n%s\n", toolID, resultContent)
		}
	}

	if textContent == "" {
		return "..."
	}
	return strings.TrimSpace(textContent)
}

// convertRegularContentArray handles content arrays with only text blocks.
func convertRegularContentArray(contentArray []interface{}) string {
	var parts []string
	for _, item := range contentArray {
		if block, ok := item.(map[string]interface{}); ok {
			if blockType, _ := block["type"].(string); blockType == "text" {
				if text, ok := block["text"].(string); ok {
					parts = append(parts, text)
				}
			}
		}
	}
	content := strings.Join(parts, "\n")
	if content == "" {
		content = "..."
	}
	return content
}

// convertToolsToOpenAI converts Anthropic tool definitions to OpenAI format.
func convertToolsToOpenAI(tools []model.Tool) []map[string]interface{} {
	result := make([]map[string]interface{}, 0, len(tools))
	for _, tool := range tools {
		if tool.Name == "" {
			continue
		}

		parameters := make(map[string]interface{})
		if tool.InputSchema.Type != nil {
			parameters["type"] = tool.InputSchema.Type
		} else {
			parameters["type"] = "object"
		}

		if tool.InputSchema.Properties != nil {
			fixedProperties := make(map[string]interface{})
			for propName, propValue := range tool.InputSchema.Properties {
				if prop, ok := propValue.(map[string]interface{}); ok {
					if propType, hasType := prop["type"]; hasType && propType == "array" {
						if _, hasItems := prop["items"]; !hasItems {
							prop["items"] = map[string]interface{}{"type": "string"}
						}
					}
					fixedProperties[propName] = prop
				} else {
					fixedProperties[propName] = propValue
				}
			}
			parameters["properties"] = fixedProperties
		} else {
			parameters["properties"] = make(map[string]interface{})
		}

		if len(tool.InputSchema.Required) > 0 {
			parameters["required"] = tool.InputSchema.Required
		}

		functionDef := map[string]interface{}{
			"name":       tool.Name,
			"parameters": parameters,
		}
		if tool.Description != "" {
			functionDef["description"] = tool.Description
		}

		result = append(result, map[string]interface{}{
			"type":     "function",
			"function": functionDef,
		})
	}
	return result
}

// convertToolChoice converts Anthropic tool_choice to OpenAI format.
func convertToolChoice(toolChoice interface{}) interface{} {
	if toolChoice == nil {
		return nil
	}
	toolChoiceMap, ok := toolChoice.(map[string]interface{})
	if !ok {
		return nil
	}
	switch toolChoiceMap["type"] {
	case "auto":
		return "auto"
	case "any":
		return "required"
	case "tool":
		if name, ok := toolChoiceMap["name"].(string); ok {
			return map[string]interface{}{
				"type": "function",
				"function": map[string]interface{}{
					"name": name,
				},
			}
		}
		return "auto"
	default:
		return "auto"
	}
}

func convertAnthropicToOpenAI(req *model.AnthropicRequest) map[string]interface{} {
	messages := []map[string]interface{}{}

	// Add system message if present
	if systemContent := extractSystemMessages(req.System); systemContent != "" {
		messages = append(messages, map[string]interface{}{
			"role":    "system",
			"content": systemContent,
		})
	}

	// Convert conversation messages
	for _, msg := range req.Messages {
		messages = append(messages, map[string]interface{}{
			"role":    msg.Role,
			"content": convertMessageContent(msg),
		})
	}

	// Get model-specific max token limit
	maxTokensLimit := getModelMaxTokens(req.Model)
	if maxTokensLimit > 0 && req.MaxTokens > maxTokensLimit {
		req.MaxTokens = maxTokensLimit
	}

	openAIReq := map[string]interface{}{
		"model":                 req.Model,
		"messages":              messages,
		"stream":                req.Stream,
		"max_completion_tokens": req.MaxTokens,
	}

	if req.Stream {
		openAIReq["stream_options"] = map[string]interface{}{
			"include_usage": true,
		}
	}

	// o-series models don't support temperature
	isOSeriesModel := strings.HasPrefix(req.Model, "o1") || strings.HasPrefix(req.Model, "o3")
	if !isOSeriesModel {
		openAIReq["temperature"] = req.Temperature
	}

	// Convert tools and tool_choice
	if len(req.Tools) > 0 {
		openAIReq["tools"] = convertToolsToOpenAI(req.Tools)

		if req.ToolChoice != nil {
			if choice := convertToolChoice(req.ToolChoice); choice != nil {
				openAIReq["tool_choice"] = choice
			}
		}
	}

	return openAIReq
}

func getMapKeys(m map[string]interface{}) []string {
	keys := make([]string, 0, len(m))
	for k := range m {
		keys = append(keys, k)
	}
	return keys
}

// getModelMaxTokens returns the max output tokens for known models
// Returns 0 for unknown models, letting the API handle validation
func getModelMaxTokens(model string) int {
	// Model-specific max completion token limits
	modelLimits := map[string]int{
		// GPT-4 Turbo and GPT-4o models
		"gpt-4-turbo":         4096,
		"gpt-4-turbo-preview": 4096,
		"gpt-4o":              16384,
		"gpt-4o-mini":         16384,
		"gpt-4o-2024-05-13":   16384,
		"gpt-4o-2024-08-06":   16384,
		// GPT-4 models
		"gpt-4":      8192,
		"gpt-4-32k":  8192,
		"gpt-4-0613": 8192,
		// GPT-3.5 models
		"gpt-3.5-turbo":      4096,
		"gpt-3.5-turbo-16k":  4096,
		"gpt-3.5-turbo-0125": 4096,
		"gpt-3.5-turbo-1106": 4096,
		// o1 reasoning models
		"o1":         100000,
		"o1-preview": 32768,
		"o1-mini":    65536,
		// o3 reasoning models (estimated based on o1 patterns)
		"o3":      100000,
		"o3-mini": 65536,
	}

	// Check for exact match first
	if limit, ok := modelLimits[model]; ok {
		return limit
	}

	// Check for prefix matches for versioned models
	for prefix, limit := range modelLimits {
		if strings.HasPrefix(model, prefix) {
			return limit
		}
	}

	// Return 0 for unknown models - let the API validate
	return 0
}

func transformOpenAIResponseToAnthropic(respBody []byte) []byte {
	// This is a simplified transformation
	// In production, you'd want to handle all fields properly
	var openAIResp map[string]interface{}
	if err := json.Unmarshal(respBody, &openAIResp); err != nil {
		return respBody // Return as-is if we can't parse
	}

	// Extract the assistant's message
	var contentBlocks []map[string]interface{}

	if choices, ok := openAIResp["choices"].([]interface{}); ok && len(choices) > 0 {
		if choice, ok := choices[0].(map[string]interface{}); ok {
			if msg, ok := choice["message"].(map[string]interface{}); ok {
				// Handle regular text content
				if content, ok := msg["content"].(string); ok && content != "" {
					contentBlocks = append(contentBlocks, map[string]interface{}{
						"type": "text",
						"text": content,
					})
				}

				// Handle tool calls
				if toolCalls, ok := msg["tool_calls"].([]interface{}); ok {
					// Since this proxy forwards to Claude/Anthropic API, we should always
					// use tool_use blocks so Claude can execute the tools properly
					// (regardless of which model generated the response)
					for _, tc := range toolCalls {
						if toolCall, ok := tc.(map[string]interface{}); ok {
							if function, ok := toolCall["function"].(map[string]interface{}); ok {
								// Convert OpenAI tool call to Anthropic tool_use format
								anthropicToolUse := map[string]interface{}{
									"type": "tool_use",
									"id":   toolCall["id"],
									"name": function["name"],
								}

								// Parse the arguments JSON string
								if argsStr, ok := function["arguments"].(string); ok {
									var args map[string]interface{}
									if err := json.Unmarshal([]byte(argsStr), &args); err == nil {
										anthropicToolUse["input"] = args
									} else {
										// If parsing fails, wrap in a raw field like Python does
										// Failed to parse tool arguments - skip
										anthropicToolUse["input"] = map[string]interface{}{"raw": argsStr}
									}
								} else if args, ok := function["arguments"].(map[string]interface{}); ok {
									// Already a map, use directly
									anthropicToolUse["input"] = args
								} else {
									// Fallback for any other type
									anthropicToolUse["input"] = map[string]interface{}{"raw": fmt.Sprintf("%v", function["arguments"])}
								}

								contentBlocks = append(contentBlocks, anthropicToolUse)
							}
						}
					}
				}
			}
		}
	}

	// If no content blocks were created, add a default empty text block
	if len(contentBlocks) == 0 {
		contentBlocks = []map[string]interface{}{
			{"type": "text", "text": ""},
		}
	}

	// Build Anthropic-style response
	anthropicResp := map[string]interface{}{
		"id":      openAIResp["id"],
		"type":    "message",
		"role":    "assistant",
		"content": contentBlocks,
		"model":   openAIResp["model"],
	}

	// Convert OpenAI usage format to Anthropic format
	if usage, ok := openAIResp["usage"].(map[string]interface{}); ok {
		anthropicUsage := map[string]interface{}{}

		// Map prompt_tokens to input_tokens
		if promptTokens, ok := usage["prompt_tokens"].(float64); ok {
			anthropicUsage["input_tokens"] = int(promptTokens)
		}

		// Map completion_tokens to output_tokens
		if completionTokens, ok := usage["completion_tokens"].(float64); ok {
			anthropicUsage["output_tokens"] = int(completionTokens)
		}

		// Include total_tokens if needed (though Anthropic format doesn't typically use it)
		if totalTokens, ok := usage["total_tokens"].(float64); ok {
			anthropicUsage["total_tokens"] = int(totalTokens)
		}

		anthropicResp["usage"] = anthropicUsage
	}

	result, _ := json.Marshal(anthropicResp)
	return result
}

func transformOpenAIStreamToAnthropic(openAIStream io.Reader, anthropicStream io.Writer) error {
	var messageStarted bool
	var contentStarted bool
	var sawDone bool

	err := sse.ForEachLine(openAIStream, func(line string) error {

		// Skip empty lines
		if line == "" {
			return nil
		}

		// Handle SSE data lines
		if strings.HasPrefix(line, "data: ") {
			data := strings.TrimPrefix(line, "data: ")

			// Handle end of stream
			if data == "[DONE]" {
				sawDone = true
				// Send Anthropic-style completion
				if contentStarted {
					if _, err := fmt.Fprintf(anthropicStream, "data: {\"type\":\"content_block_stop\",\"index\":0}\n\n"); err != nil {
						return err
					}
				}
				if messageStarted {
					if _, err := fmt.Fprintf(anthropicStream, "data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null}}\n\n"); err != nil {
						return err
					}
					if _, err := fmt.Fprintf(anthropicStream, "data: {\"type\":\"message_stop\"}\n\n"); err != nil {
						return err
					}
				}
				return nil
			}

			// Parse OpenAI response
			var openAIChunk map[string]interface{}
			if err := json.Unmarshal([]byte(data), &openAIChunk); err != nil {
				return nil
			}

			// Check for usage data BEFORE processing choices
			// According to OpenAI docs, usage is sent in the final chunk with empty choices array
			if usage, hasUsage := openAIChunk["usage"].(map[string]interface{}); hasUsage {
				// Convert OpenAI usage to Anthropic format
				anthropicUsage := map[string]interface{}{}

				// Handle both float64 and int types
				if promptTokens, ok := usage["prompt_tokens"].(float64); ok {
					anthropicUsage["input_tokens"] = int(promptTokens)
				} else if promptTokens, ok := usage["prompt_tokens"].(int); ok {
					anthropicUsage["input_tokens"] = promptTokens
				}

				if completionTokens, ok := usage["completion_tokens"].(float64); ok {
					anthropicUsage["output_tokens"] = int(completionTokens)
				} else if completionTokens, ok := usage["completion_tokens"].(int); ok {
					anthropicUsage["output_tokens"] = completionTokens
				}

				if len(anthropicUsage) > 0 {
					// Send usage data in a message_delta event
					usageDelta := map[string]interface{}{
						"type":  "message_delta",
						"delta": map[string]interface{}{},
						"usage": anthropicUsage,
					}
					usageJSON, _ := json.Marshal(usageDelta)
					if _, err := fmt.Fprintf(anthropicStream, "data: %s\n\n", usageJSON); err != nil {
						return err
					}
				}
			}

			// Extract choices array
			choices, ok := openAIChunk["choices"].([]interface{})
			if !ok || len(choices) == 0 {
				// Skip further processing if no choices, but we already handled usage above
				return nil
			}

			choice, ok := choices[0].(map[string]interface{})
			if !ok {
				return nil
			}

			delta, ok := choice["delta"].(map[string]interface{})
			if !ok {
				return nil
			}

			// Handle first chunk - send message_start
			if !messageStarted {
				messageStarted = true
				messageStart := map[string]interface{}{
					"type": "message_start",
					"message": map[string]interface{}{
						"id":            openAIChunk["id"],
						"type":          "message",
						"role":          "assistant",
						"model":         openAIChunk["model"],
						"content":       []interface{}{},
						"stop_reason":   nil,
						"stop_sequence": nil,
						"usage":         map[string]interface{}{
							// Empty usage - will be updated in final chunk
						},
					},
				}
				startJSON, _ := json.Marshal(messageStart)
				if _, err := fmt.Fprintf(anthropicStream, "data: %s\n\n", startJSON); err != nil {
					return err
				}
			}

			// Handle content
			if content, hasContent := delta["content"].(string); hasContent && content != "" {
				if !contentStarted {
					contentStarted = true
					// Send content_block_start
					blockStart := map[string]interface{}{
						"type":  "content_block_start",
						"index": 0,
						"content_block": map[string]interface{}{
							"type": "text",
							"text": "",
						},
					}
					blockStartJSON, _ := json.Marshal(blockStart)
					if _, err := fmt.Fprintf(anthropicStream, "data: %s\n\n", blockStartJSON); err != nil {
						return err
					}
				}

				// Send content_block_delta
				contentDelta := map[string]interface{}{
					"type":  "content_block_delta",
					"index": 0,
					"delta": map[string]interface{}{
						"type": "text_delta",
						"text": content,
					},
				}
				deltaJSON, _ := json.Marshal(contentDelta)
				if _, err := fmt.Fprintf(anthropicStream, "data: %s\n\n", deltaJSON); err != nil {
					return err
				}
			}

		}
		return nil
	})

	if err != nil {
		return err
	}

	if !sawDone {
		return io.ErrUnexpectedEOF
	}

	return nil
}