package main import ( "bufio" "bytes" "context" "crypto/rand" "crypto/tls" "encoding/base64" "encoding/json" "flag" "fmt" "io" "log" "mime/multipart" "net/http" "os" "os/signal" "regexp" "strings" "sync" "sync/atomic" "syscall" "time" ) // 版本和API常量 const ( Version = "1.0.0" TargetURL = "https://chat.qwen.ai/api/chat/completions" ModelsURL = "https://chat.qwen.ai/api/models" FilesURL = "https://chat.qwen.ai/api/v1/files/" TasksURL = "https://chat.qwen.ai/api/v1/tasks/status/" ) // 默认模型列表(当获取接口失败时使用) var DefaultModels = []string{ "qwen-max-latest", "qwen-plus-latest", "qwen2.5-vl-72b-instruct", "qwen2.5-14b-instruct-1m", "qvq-72b-preview", "qwq-32b-preview", "qwen2.5-coder-32b-instruct", "qwen-turbo-latest", "qwen2.5-72b-instruct", } // 扩展模型变种后缀 var ModelSuffixes = []string{ "", "-thinking", "-search", "-thinking-search", "-draw", } // 日志级别常量 const ( LogLevelDebug = "debug" LogLevelInfo = "info" LogLevelWarn = "warn" LogLevelError = "error" ) // WorkerPool 工作池结构体,用于管理goroutine type WorkerPool struct { taskQueue chan *Task workerCount int shutdownChannel chan struct{} wg sync.WaitGroup } // Task 任务结构体,包含请求处理所需数据 type Task struct { r *http.Request w http.ResponseWriter done chan struct{} reqID string isStream bool apiReq APIRequest path string } // Semaphore 信号量实现,用于限制并发数量 type Semaphore struct { sem chan struct{} } // 配置结构体 type Config struct { Port string Address string LogLevel string DevMode bool MaxRetries int Timeout int VerifySSL bool WorkerCount int QueueSize int MaxConcurrent int APIPrefix string } // APIRequest OpenAI兼容的请求结构体 type APIRequest struct { Model string `json:"model"` Messages []APIMessage `json:"messages"` Stream bool `json:"stream"` Temperature float64 `json:"temperature,omitempty"` MaxTokens int `json:"max_tokens,omitempty"` } // APIMessage 消息结构体 type APIMessage struct { Role string `json:"role"` Content interface{} `json:"content"` FeatureConfig interface{} `json:"feature_config,omitempty"` ChatType string `json:"chat_type,omitempty"` Extra interface{} `json:"extra,omitempty"` } // 内容项目结构体(处理图像等内容) type ContentItem struct { Type string `json:"type,omitempty"` Text string `json:"text,omitempty"` ImageURL *ImageURL `json:"image_url,omitempty"` Image string `json:"image,omitempty"` } // ImageURL 图像URL结构体 type ImageURL struct { URL string `json:"url"` } // QwenRequest 通义千问API请求结构体 type QwenRequest struct { Model string `json:"model"` Messages []APIMessage `json:"messages"` Stream bool `json:"stream"` ChatType string `json:"chat_type,omitempty"` ID string `json:"id,omitempty"` IncrementalOutput bool `json:"incremental_output,omitempty"` Size string `json:"size,omitempty"` } // QwenResponse 通义千问API响应结构体 type QwenResponse struct { Messages []struct { Role string `json:"role"` Content string `json:"content"` Extra struct { Wanx struct { TaskID string `json:"task_id"` } `json:"wanx"` } `json:"extra"` } `json:"messages"` Choices []struct { Delta struct { Content string `json:"content"` } `json:"delta"` FinishReason string `json:"finish_reason"` } `json:"choices"` Usage struct { PromptTokens int `json:"prompt_tokens"` CompletionTokens int `json:"completion_tokens"` TotalTokens int `json:"total_tokens"` } `json:"usage"` } // FileUploadResponse 文件上传响应 type FileUploadResponse struct { ID string `json:"id"` } // TaskStatusResponse 任务状态响应 type TaskStatusResponse struct { Content string `json:"content"` } // StreamChunk OpenAI兼容的流式响应块 type StreamChunk struct { ID string `json:"id"` Object string `json:"object"` Created int64 `json:"created"` Model string `json:"model"` Choices []struct { Index int `json:"index"` Delta struct { Role string `json:"role,omitempty"` Content string `json:"content,omitempty"` } `json:"delta"` FinishReason *string `json:"finish_reason,omitempty"` } `json:"choices"` } // CompletionResponse OpenAI兼容的完成响应 type CompletionResponse struct { ID string `json:"id"` Object string `json:"object"` Created int64 `json:"created"` Model string `json:"model"` Choices []struct { Index int `json:"index"` Message struct { Role string `json:"role"` Content string `json:"content"` } `json:"message"` FinishReason string `json:"finish_reason"` } `json:"choices"` Usage struct { PromptTokens int `json:"prompt_tokens"` CompletionTokens int `json:"completion_tokens"` TotalTokens int `json:"total_tokens"` } `json:"usage"` } // ImagesResponse 图像生成响应 type ImagesResponse struct { Created int64 `json:"created"` Data []ImageURL `json:"data"` } // ImagesRequest 图像生成请求 type ImagesRequest struct { Model string `json:"model"` Prompt string `json:"prompt"` N int `json:"n"` Size string `json:"size"` } // ModelData 模型数据 type ModelData struct { ID string `json:"id"` Object string `json:"object"` Created int64 `json:"created"` OwnedBy string `json:"owned_by"` } // ModelsResponse 模型列表响应 type ModelsResponse struct { Object string `json:"object"` Data []ModelData `json:"data"` } // 全局变量 var ( appConfig *Config logger *log.Logger logLevel string logMutex sync.Mutex workerPool *WorkerPool requestSem *Semaphore requestCount uint64 = 0 countMutex sync.Mutex // 性能指标 requestCounter int64 successCounter int64 errorCounter int64 avgResponseTime int64 queuedRequests int64 rejectedRequests int64 ) // NewSemaphore 创建新的信号量 func NewSemaphore(size int) *Semaphore { return &Semaphore{ sem: make(chan struct{}, size), } } // Acquire 获取信号量(阻塞) func (s *Semaphore) Acquire() { s.sem <- struct{}{} } // Release 释放信号量 func (s *Semaphore) Release() { <-s.sem } // TryAcquire 尝试获取信号量(非阻塞) func (s *Semaphore) TryAcquire() bool { select { case s.sem <- struct{}{}: return true default: return false } } // NewWorkerPool 创建并启动一个新的工作池 func NewWorkerPool(workerCount int, queueSize int) *WorkerPool { pool := &WorkerPool{ taskQueue: make(chan *Task, queueSize), workerCount: workerCount, shutdownChannel: make(chan struct{}), } pool.Start() return pool } // Start 启动工作池中的worker goroutines func (pool *WorkerPool) Start() { // 启动工作goroutine for i := 0; i < pool.workerCount; i++ { pool.wg.Add(1) go func(workerID int) { defer pool.wg.Done() logInfo("Worker %d 已启动", workerID) for { select { case task, ok := <-pool.taskQueue: if !ok { // 队列已关闭,退出worker logInfo("Worker %d 收到队列关闭信号,准备退出", workerID) return } logDebug("Worker %d 处理任务 reqID:%s", workerID, task.reqID) // 处理任务 switch task.path { case "/v1/models": handleModels(task.w, task.r) case "/v1/chat/completions": if task.isStream { handleStreamingRequest(task.w, task.r, task.apiReq, task.reqID) } else { handleNonStreamingRequest(task.w, task.r, task.apiReq, task.reqID) } case "/v1/images/generations": handleImageGenerations(task.w, task.r, task.apiReq, task.reqID) } // 通知任务完成 close(task.done) case <-pool.shutdownChannel: // 收到关闭信号,退出worker logInfo("Worker %d 收到关闭信号,准备退出", workerID) return } } }(i) } } // SubmitTask 提交任务到工作池,非阻塞 func (pool *WorkerPool) SubmitTask(task *Task) (bool, error) { select { case pool.taskQueue <- task: // 任务成功添加到队列 return true, nil default: // 队列已满 return false, fmt.Errorf("任务队列已满") } } // Shutdown 关闭工作池 func (pool *WorkerPool) Shutdown() { logInfo("正在关闭工作池...") // 发送关闭信号给所有worker close(pool.shutdownChannel) // 等待所有worker退出 pool.wg.Wait() // 关闭任务队列 close(pool.taskQueue) logInfo("工作池已关闭") } // 日志函数 func initLogger(level string) { logger = log.New(os.Stdout, "[QwenAPI] ", log.LstdFlags) logLevel = level } func logDebug(format string, v ...interface{}) { if logLevel == LogLevelDebug { logMutex.Lock() logger.Printf("[DEBUG] "+format, v...) logMutex.Unlock() } } func logInfo(format string, v ...interface{}) { if logLevel == LogLevelDebug || logLevel == LogLevelInfo { logMutex.Lock() logger.Printf("[INFO] "+format, v...) logMutex.Unlock() } } func logWarn(format string, v ...interface{}) { if logLevel == LogLevelDebug || logLevel == LogLevelInfo || logLevel == LogLevelWarn { logMutex.Lock() logger.Printf("[WARN] "+format, v...) logMutex.Unlock() } } func logError(format string, v ...interface{}) { logMutex.Lock() logger.Printf("[ERROR] "+format, v...) logMutex.Unlock() // 错误计数 atomic.AddInt64(&errorCounter, 1) } // 解析命令行参数 func parseFlags() *Config { cfg := &Config{} flag.StringVar(&cfg.Port, "port", "8080", "Port to listen on") flag.StringVar(&cfg.Address, "address", "localhost", "Address to listen on") flag.StringVar(&cfg.LogLevel, "log-level", LogLevelInfo, "Log level (debug, info, warn, error)") flag.BoolVar(&cfg.DevMode, "dev", false, "Enable development mode with enhanced logging") flag.IntVar(&cfg.MaxRetries, "max-retries", 3, "Maximum number of retries for failed requests") flag.IntVar(&cfg.Timeout, "timeout", 300, "Request timeout in seconds") flag.BoolVar(&cfg.VerifySSL, "verify-ssl", true, "Verify SSL certificates") flag.IntVar(&cfg.WorkerCount, "workers", 50, "Number of worker goroutines in the pool") flag.IntVar(&cfg.QueueSize, "queue-size", 500, "Size of the task queue") flag.IntVar(&cfg.MaxConcurrent, "max-concurrent", 100, "Maximum number of concurrent requests") flag.StringVar(&cfg.APIPrefix, "api-prefix", "", "API prefix for all endpoints") flag.Parse() // 如果开发模式开启,自动设置日志级别为debug if cfg.DevMode && cfg.LogLevel != LogLevelDebug { cfg.LogLevel = LogLevelDebug fmt.Println("开发模式已启用,日志级别设置为debug") } return cfg } // 从请求头中提取令牌 func extractToken(r *http.Request) (string, error) { // 获取 Authorization 头部 authHeader := r.Header.Get("Authorization") if authHeader == "" { return "", fmt.Errorf("missing Authorization header") } // 验证格式并提取令牌 if !strings.HasPrefix(authHeader, "Bearer ") { return "", fmt.Errorf("invalid Authorization header format, must start with 'Bearer '") } // 提取令牌值 token := strings.TrimPrefix(authHeader, "Bearer ") if token == "" { return "", fmt.Errorf("empty token in Authorization header") } return token, nil } // 设置CORS头 func setCORSHeaders(w http.ResponseWriter) { w.Header().Set("Access-Control-Allow-Origin", "*") w.Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS") w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization") } // 生成UUID func generateUUID() string { b := make([]byte, 16) _, err := rand.Read(b) if err != nil { return fmt.Sprintf("%d", time.Now().UnixNano()) } return fmt.Sprintf("%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:]) } // 安全的HTTP客户端,支持禁用SSL验证 func getHTTPClient() *http.Client { tr := &http.Transport{ MaxIdleConnsPerHost: 100, IdleConnTimeout: 90 * time.Second, TLSClientConfig: nil, // 默认配置 } // 如果配置了禁用SSL验证 if !appConfig.VerifySSL { tr.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} } return &http.Client{ Timeout: time.Duration(appConfig.Timeout) * time.Second, Transport: tr, } } // 主入口函数 func main() { // 解析配置 appConfig = parseFlags() // 初始化日志 initLogger(appConfig.LogLevel) logInfo("启动服务: 地址=%s, 端口=%s, 版本=%s, 日志级别=%s", appConfig.Address, appConfig.Port, Version, appConfig.LogLevel) // 创建工作池和信号量 workerPool = NewWorkerPool(appConfig.WorkerCount, appConfig.QueueSize) requestSem = NewSemaphore(appConfig.MaxConcurrent) logInfo("工作池已创建: %d个worker, 队列大小为%d", appConfig.WorkerCount, appConfig.QueueSize) // 配置更高的并发处理能力 http.DefaultTransport.(*http.Transport).MaxIdleConnsPerHost = 100 http.DefaultTransport.(*http.Transport).MaxIdleConns = 100 http.DefaultTransport.(*http.Transport).IdleConnTimeout = 90 * time.Second // 创建自定义服务器,支持更高并发 server := &http.Server{ Addr: appConfig.Address + ":" + appConfig.Port, ReadTimeout: time.Duration(appConfig.Timeout) * time.Second, WriteTimeout: time.Duration(appConfig.Timeout) * time.Second, IdleTimeout: 120 * time.Second, Handler: nil, // 使用默认的ServeMux } // API路径前缀 apiPrefix := appConfig.APIPrefix // 创建处理器 http.HandleFunc(apiPrefix+"/v1/models", func(w http.ResponseWriter, r *http.Request) { setCORSHeaders(w) if r.Method == "OPTIONS" { w.WriteHeader(http.StatusOK) return } // 计数 countMutex.Lock() requestCount++ currentCount := requestCount countMutex.Unlock() reqID := generateRequestID() logInfo("[reqID:%s] 收到模型列表请求 #%d", reqID, currentCount) // 请求计数 atomic.AddInt64(&requestCounter, 1) startTime := time.Now() // 创建任务 task := &Task{ r: r, w: w, done: make(chan struct{}), reqID: reqID, path: "/v1/models", } // 尝试获取信号量 if !requestSem.TryAcquire() { // 请求数量超过限制 atomic.AddInt64(&rejectedRequests, 1) logWarn("[reqID:%s] 请求被拒绝: 当前并发请求数已达上限", reqID) w.Header().Set("Retry-After", "30") http.Error(w, "Server is busy, please try again later", http.StatusServiceUnavailable) return } // 释放信号量(在函数返回时) defer requestSem.Release() // 添加到任务队列 atomic.AddInt64(&queuedRequests, 1) submitted, err := workerPool.SubmitTask(task) if !submitted { atomic.AddInt64(&queuedRequests, -1) atomic.AddInt64(&rejectedRequests, 1) logError("[reqID:%s] 提交任务失败: %v", reqID, err) w.Header().Set("Retry-After", "60") http.Error(w, "Server queue is full, please try again later", http.StatusServiceUnavailable) return } logInfo("[reqID:%s] 任务已提交到队列", reqID) // 等待任务完成或超时 select { case <-task.done: // 任务已完成 logInfo("[reqID:%s] 任务已完成", reqID) case <-r.Context().Done(): // 请求被取消或超时 logWarn("[reqID:%s] 请求被取消或超时", reqID) } // 请求处理完成,更新指标 atomic.AddInt64(&queuedRequests, -1) elapsed := time.Since(startTime).Milliseconds() // 更新平均响应时间 atomic.AddInt64(&avgResponseTime, elapsed) if r.Context().Err() == nil { // 成功计数增加 atomic.AddInt64(&successCounter, 1) logInfo("[reqID:%s] 请求处理成功,耗时: %dms", reqID, elapsed) } else { logError("[reqID:%s] 请求处理失败: %v, 耗时: %dms", reqID, r.Context().Err(), elapsed) } }) http.HandleFunc(apiPrefix+"/v1/chat/completions", func(w http.ResponseWriter, r *http.Request) { setCORSHeaders(w) if r.Method == "OPTIONS" { w.WriteHeader(http.StatusOK) return } // 计数器增加 countMutex.Lock() requestCount++ currentCount := requestCount countMutex.Unlock() reqID := generateRequestID() logInfo("[reqID:%s] 收到新请求 #%d", reqID, currentCount) // 请求计数 atomic.AddInt64(&requestCounter, 1) startTime := time.Now() // 尝试获取信号量 if !requestSem.TryAcquire() { // 请求数量超过限制 atomic.AddInt64(&rejectedRequests, 1) logWarn("[reqID:%s] 请求 #%d 被拒绝: 当前并发请求数已达上限", reqID, currentCount) w.Header().Set("Retry-After", "30") http.Error(w, "Server is busy, please try again later", http.StatusServiceUnavailable) return } // 释放信号量(在函数返回时) defer requestSem.Release() // 解析请求体 var apiReq APIRequest if err := json.NewDecoder(r.Body).Decode(&apiReq); err != nil { logError("[reqID:%s] 解析请求失败: %v", reqID, err) http.Error(w, "Invalid request body", http.StatusBadRequest) return } // 创建任务 task := &Task{ r: r, w: w, done: make(chan struct{}), reqID: reqID, isStream: apiReq.Stream, apiReq: apiReq, path: "/v1/chat/completions", } // 添加到任务队列 atomic.AddInt64(&queuedRequests, 1) submitted, err := workerPool.SubmitTask(task) if !submitted { atomic.AddInt64(&queuedRequests, -1) atomic.AddInt64(&rejectedRequests, 1) logError("[reqID:%s] 提交任务失败: %v", reqID, err) w.Header().Set("Retry-After", "60") http.Error(w, "Server queue is full, please try again later", http.StatusServiceUnavailable) return } logInfo("[reqID:%s] 任务已提交到队列", reqID) // 等待任务完成或超时 select { case <-task.done: // 任务已完成 logInfo("[reqID:%s] 任务已完成", reqID) case <-r.Context().Done(): // 请求被取消或超时 logWarn("[reqID:%s] 请求被取消或超时", reqID) } // 请求处理完成,更新指标 atomic.AddInt64(&queuedRequests, -1) elapsed := time.Since(startTime).Milliseconds() // 更新平均响应时间 atomic.AddInt64(&avgResponseTime, elapsed) if r.Context().Err() == nil { // 成功计数增加 atomic.AddInt64(&successCounter, 1) logInfo("[reqID:%s] 请求处理成功,耗时: %dms", reqID, elapsed) } else { logError("[reqID:%s] 请求处理失败: %v, 耗时: %dms", reqID, r.Context().Err(), elapsed) } }) http.HandleFunc(apiPrefix+"/v1/images/generations", func(w http.ResponseWriter, r *http.Request) { setCORSHeaders(w) if r.Method == "OPTIONS" { w.WriteHeader(http.StatusOK) return } // 计数器增加 countMutex.Lock() requestCount++ currentCount := requestCount countMutex.Unlock() reqID := generateRequestID() logInfo("[reqID:%s] 收到图像生成请求 #%d", reqID, currentCount) // 请求计数 atomic.AddInt64(&requestCounter, 1) startTime := time.Now() // 尝试获取信号量 if !requestSem.TryAcquire() { // 请求数量超过限制 atomic.AddInt64(&rejectedRequests, 1) logWarn("[reqID:%s] 请求 #%d 被拒绝: 当前并发请求数已达上限", reqID, currentCount) w.Header().Set("Retry-After", "30") http.Error(w, "Server is busy, please try again later", http.StatusServiceUnavailable) return } // 释放信号量(在函数返回时) defer requestSem.Release() // 解析请求体 var apiReq APIRequest if err := json.NewDecoder(r.Body).Decode(&apiReq); err != nil { logError("[reqID:%s] 解析请求失败: %v", reqID, err) http.Error(w, "Invalid request body", http.StatusBadRequest) return } // 创建任务 task := &Task{ r: r, w: w, done: make(chan struct{}), reqID: reqID, apiReq: apiReq, path: "/v1/images/generations", } // 添加到任务队列 atomic.AddInt64(&queuedRequests, 1) submitted, err := workerPool.SubmitTask(task) if !submitted { atomic.AddInt64(&queuedRequests, -1) atomic.AddInt64(&rejectedRequests, 1) logError("[reqID:%s] 提交任务失败: %v", reqID, err) w.Header().Set("Retry-After", "60") http.Error(w, "Server queue is full, please try again later", http.StatusServiceUnavailable) return } logInfo("[reqID:%s] 任务已提交到队列", reqID) // 等待任务完成或超时 select { case <-task.done: // 任务已完成 logInfo("[reqID:%s] 任务已完成", reqID) case <-r.Context().Done(): // 请求被取消或超时 logWarn("[reqID:%s] 请求被取消或超时", reqID) } // 请求处理完成,更新指标 atomic.AddInt64(&queuedRequests, -1) elapsed := time.Since(startTime).Milliseconds() // 更新平均响应时间 atomic.AddInt64(&avgResponseTime, elapsed) if r.Context().Err() == nil { // 成功计数增加 atomic.AddInt64(&successCounter, 1) logInfo("[reqID:%s] 请求处理成功,耗时: %dms", reqID, elapsed) } else { logError("[reqID:%s] 请求处理失败: %v, 耗时: %dms", reqID, r.Context().Err(), elapsed) } }) // 添加健康检查端点 http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) { setCORSHeaders(w) if r.Method == "OPTIONS" { w.WriteHeader(http.StatusOK) return } // 获取各种计数器的值 reqCount := atomic.LoadInt64(&requestCounter) succCount := atomic.LoadInt64(&successCounter) errCount := atomic.LoadInt64(&errorCounter) queuedCount := atomic.LoadInt64(&queuedRequests) rejectedCount := atomic.LoadInt64(&rejectedRequests) // 计算平均响应时间 var avgTime int64 = 0 if reqCount > 0 { avgTime = atomic.LoadInt64(&avgResponseTime) / reqCount } // 构建响应 stats := map[string]interface{}{ "status": "ok", "version": Version, "requests": reqCount, "success": succCount, "errors": errCount, "queued": queuedCount, "rejected": rejectedCount, "avg_time_ms": avgTime, "worker_count": workerPool.workerCount, "queue_size": len(workerPool.taskQueue), "queue_capacity": cap(workerPool.taskQueue), "queue_percent": float64(len(workerPool.taskQueue)) / float64(cap(workerPool.taskQueue)) * 100, "concurrent_limit": appConfig.MaxConcurrent, } w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) json.NewEncoder(w).Encode(stats) }) // 创建停止通道 stop := make(chan os.Signal, 1) signal.Notify(stop, os.Interrupt, syscall.SIGTERM) // 在goroutine中启动服务器 go func() { logInfo("Starting proxy server on %s", server.Addr) if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { logError("Failed to start server: %v", err) os.Exit(1) } }() // 等待停止信号 <-stop // 创建上下文用于优雅关闭 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() // 优雅关闭服务器 logInfo("Server is shutting down...") if err := server.Shutdown(ctx); err != nil { logError("Server shutdown failed: %v", err) } // 关闭工作池 workerPool.Shutdown() logInfo("Server gracefully stopped") } // 生成请求ID func generateRequestID() string { return fmt.Sprintf("%x", time.Now().UnixNano()) } // 处理模型列表请求 func handleModels(w http.ResponseWriter, r *http.Request) { logInfo("处理模型列表请求") // 从请求中提取token authToken, err := extractToken(r) if err != nil { logWarn("提取token失败: %v", err) // 使用默认模型列表 returnDefaultModels(w) return } // 请求通义千问API获取模型列表 client := getHTTPClient() req, err := http.NewRequest("GET", ModelsURL, nil) if err != nil { logError("创建请求失败: %v", err) returnDefaultModels(w) return } // 设置请求头 req.Header.Set("Authorization", "Bearer "+authToken) req.Header.Set("User-Agent", "Mozilla/5.0") // 发送请求 resp, err := client.Do(req) if err != nil { logError("请求模型列表失败: %v", err) returnDefaultModels(w) return } defer resp.Body.Close() // 检查响应状态 if resp.StatusCode != http.StatusOK { logError("获取模型列表返回非200状态码: %d", resp.StatusCode) returnDefaultModels(w) return } // 解析响应 var qwenResp struct { Data []struct { ID string `json:"id"` } `json:"data"` } if err := json.NewDecoder(resp.Body).Decode(&qwenResp); err != nil { logError("解析模型列表响应失败: %v", err) returnDefaultModels(w) return } // 提取模型ID models := make([]string, 0, len(qwenResp.Data)) for _, model := range qwenResp.Data { models = append(models, model.ID) } // 如果没有获取到模型,使用默认列表 if len(models) == 0 { logWarn("未获取到模型,使用默认列表") returnDefaultModels(w) return } // 扩展模型列表,增加变种后缀 expandedModels := make([]ModelData, 0, len(models)*len(ModelSuffixes)) for _, model := range models { for _, suffix := range ModelSuffixes { expandedModels = append(expandedModels, ModelData{ ID: model + suffix, Object: "model", Created: time.Now().Unix(), OwnedBy: "qwen", }) } } // 构建响应 modelsResp := ModelsResponse{ Object: "list", Data: expandedModels, } // 返回响应 w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(modelsResp) } // 返回默认模型列表 func returnDefaultModels(w http.ResponseWriter) { // 扩展默认模型列表,增加变种后缀 expandedModels := make([]ModelData, 0, len(DefaultModels)*len(ModelSuffixes)) for _, model := range DefaultModels { for _, suffix := range ModelSuffixes { expandedModels = append(expandedModels, ModelData{ ID: model + suffix, Object: "model", Created: time.Now().Unix(), OwnedBy: "qwen", }) } } // 构建响应 modelsResp := ModelsResponse{ Object: "list", Data: expandedModels, } // 返回响应 w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(modelsResp) } // 处理聊天完成请求(流式) func handleStreamingRequest(w http.ResponseWriter, r *http.Request, apiReq APIRequest, reqID string) { logInfo("[reqID:%s] 处理流式请求", reqID) // 从请求中提取token authToken, err := extractToken(r) if err != nil { logError("[reqID:%s] 提取token失败: %v", reqID, err) http.Error(w, "无效的认证信息", http.StatusUnauthorized) return } // 检查消息 if len(apiReq.Messages) == 0 { logError("[reqID:%s] 消息为空", reqID) http.Error(w, "消息为空", http.StatusBadRequest) return } // 准备模型名和聊天类型 modelName := "qwen-turbo-latest" if apiReq.Model != "" { modelName = apiReq.Model } chatType := "t2t" // 处理特殊模型名后缀 if strings.Contains(modelName, "-draw") { handleDrawRequest(w, r, apiReq, reqID, authToken) return } // 处理思考模式 if strings.Contains(modelName, "-thinking") { modelName = strings.Replace(modelName, "-thinking", "", 1) lastMsgIdx := len(apiReq.Messages) - 1 if lastMsgIdx >= 0 { apiReq.Messages[lastMsgIdx].FeatureConfig = map[string]interface{}{ "thinking_enabled": true, } } } // 处理搜索模式 if strings.Contains(modelName, "-search") { modelName = strings.Replace(modelName, "-search", "", 1) chatType = "search" lastMsgIdx := len(apiReq.Messages) - 1 if lastMsgIdx >= 0 { apiReq.Messages[lastMsgIdx].ChatType = "search" } } // 处理图片消息 lastMsgIdx := len(apiReq.Messages) - 1 if lastMsgIdx >= 0 { lastMsg := apiReq.Messages[lastMsgIdx] // 检查内容是否为数组 contentArray, ok := lastMsg.Content.([]interface{}) if ok { // 处理内容数组 for i, item := range contentArray { itemMap, isMap := item.(map[string]interface{}) if !isMap { continue } // 检查是否包含图像URL if imageURL, hasImageURL := itemMap["image_url"]; hasImageURL { imageURLMap, isMap := imageURL.(map[string]interface{}) if !isMap { continue } // 获取URL url, hasURL := imageURLMap["url"].(string) if !hasURL { continue } // 上传图像 imageID, uploadErr := uploadImage(url, authToken) if uploadErr != nil { logError("[reqID:%s] 上传图像失败: %v", reqID, uploadErr) continue } // 替换内容 contentArrayCopy := make([]interface{}, len(contentArray)) copy(contentArrayCopy, contentArray) contentArrayCopy[i] = map[string]interface{}{ "type": "image", "image": imageID, } apiReq.Messages[lastMsgIdx].Content = contentArrayCopy break } } } } // 创建通义千问请求 qwenReq := QwenRequest{ Model: modelName, Messages: apiReq.Messages, Stream: true, ChatType: chatType, ID: generateUUID(), } // 序列化请求 reqData, err := json.Marshal(qwenReq) if err != nil { logError("[reqID:%s] 序列化请求失败: %v", reqID, err) http.Error(w, "内部服务器错误", http.StatusInternalServerError) return } // 创建HTTP请求 req, err := http.NewRequestWithContext(r.Context(), "POST", TargetURL, bytes.NewBuffer(reqData)) if err != nil { logError("[reqID:%s] 创建请求失败: %v", reqID, err) http.Error(w, "内部服务器错误", http.StatusInternalServerError) return } // 设置请求头 req.Header.Set("Content-Type", "application/json") req.Header.Set("Authorization", "Bearer "+authToken) req.Header.Set("User-Agent", "Mozilla/5.0") // 发送请求 client := getHTTPClient() resp, err := client.Do(req) if err != nil { logError("[reqID:%s] 发送请求失败: %v", reqID, err) http.Error(w, "连接到API失败", http.StatusBadGateway) return } defer resp.Body.Close() // 检查响应状态 if resp.StatusCode != http.StatusOK { bodyBytes, _ := io.ReadAll(resp.Body) logError("[reqID:%s] API返回非200状态码: %d, 响应: %s", reqID, resp.StatusCode, string(bodyBytes)) http.Error(w, fmt.Sprintf("API错误,状态码: %d", resp.StatusCode), resp.StatusCode) return } // 设置响应头 w.Header().Set("Content-Type", "text/event-stream") w.Header().Set("Cache-Control", "no-cache") w.Header().Set("Connection", "keep-alive") // 创建响应ID和时间戳 respID := fmt.Sprintf("chatcmpl-%s", generateUUID()) createdTime := time.Now().Unix() // 创建读取器和Flusher reader := bufio.NewReaderSize(resp.Body, 16384) flusher, ok := w.(http.Flusher) if !ok { logError("[reqID:%s] 流式传输不支持", reqID) http.Error(w, "流式传输不支持", http.StatusInternalServerError) return } // 发送角色块 roleChunk := createRoleChunk(respID, createdTime, modelName) w.Write([]byte("data: " + string(roleChunk) + "\n\n")) flusher.Flush() // 用于去重的前一个内容 previousContent := "" // 创建正则表达式来查找 data: 行 dataRegex := regexp.MustCompile(`(?m)^data: (.+)$`) // 持续读取响应 buffer := "" pendingContent := "" // 用于累积内容,解决流处理断开问题 for { // 添加超时检测 select { case <-r.Context().Done(): logWarn("[reqID:%s] 请求超时或被客户端取消", reqID) return default: // 继续处理 } // 读取一块数据 chunk := make([]byte, 4096) n, err := reader.Read(chunk) if err != nil { if err != io.EOF { logError("[reqID:%s] 读取响应出错: %v", reqID, err) return } break } // 添加到缓冲区 buffer += string(chunk[:n]) // 更稳健的处理方式:按行分割并只处理完整行 lines := strings.Split(buffer, "\n") // 保留最后可能不完整的行 if len(lines) > 0 { buffer = lines[len(lines)-1] } // 处理所有完整的行(除最后一行外) for i := 0; i < len(lines)-1; i++ { line := lines[i] if !strings.HasPrefix(line, "data: ") { continue } // 提取数据部分 dataStr := strings.TrimPrefix(line, "data: ") // 处理[DONE]消息 if dataStr == "[DONE]" { logDebug("[reqID:%s] 收到[DONE]消息", reqID) w.Write([]byte("data: [DONE]\n\n")) flusher.Flush() continue } // 解析JSON var qwenResp QwenResponse if err := json.Unmarshal([]byte(dataStr), &qwenResp); err != nil { logWarn("[reqID:%s] 解析JSON失败: %v, data: %s", reqID, err, dataStr) continue } // 处理块 for _, choice := range qwenResp.Choices { content := choice.Delta.Content // 改进去重逻辑 - 只处理重复前缀 if previousContent != "" && strings.HasPrefix(content, previousContent) { // 计算新增内容 newContent := content[len(previousContent):] if newContent != "" { // 创建内容块 - 只发送新部分 contentChunk := createContentChunk(respID, createdTime, modelName, newContent) w.Write([]byte("data: " + string(contentChunk) + "\n\n")) flusher.Flush() pendingContent += newContent // 累积内容 } } else if content != "" { // 直接发送完整内容 contentChunk := createContentChunk(respID, createdTime, modelName, content) w.Write([]byte("data: " + string(contentChunk) + "\n\n")) flusher.Flush() pendingContent += content // 累积内容 } // 更新前一个内容为完整内容 if content != "" { previousContent = content } // 处理完成标志 if choice.FinishReason != "" { finishReason := choice.FinishReason doneChunk := createDoneChunk(respID, createdTime, modelName, finishReason) w.Write([]byte("data: " + string(doneChunk) + "\n\n")) flusher.Flush() } } } } // 检查是否有累积的内容需要作为最终响应 if pendingContent != "" { logInfo("[reqID:%s] 流处理完成,累积内容长度: %d", reqID, len(pendingContent)) } // 发送结束信号(如果没有正常结束) finishReason := "stop" doneChunk := createDoneChunk(respID, createdTime, modelName, finishReason) w.Write([]byte("data: " + string(doneChunk) + "\n\n")) w.Write([]byte("data: [DONE]\n\n")) flusher.Flush() } // 处理聊天完成请求(非流式) func handleNonStreamingRequest(w http.ResponseWriter, r *http.Request, apiReq APIRequest, reqID string) { logInfo("[reqID:%s] 处理非流式请求", reqID) // 从请求中提取token authToken, err := extractToken(r) if err != nil { logError("[reqID:%s] 提取token失败: %v", reqID, err) http.Error(w, "无效的认证信息", http.StatusUnauthorized) return } // 检查消息 if len(apiReq.Messages) == 0 { logError("[reqID:%s] 消息为空", reqID) http.Error(w, "消息为空", http.StatusBadRequest) return } // 准备模型名和聊天类型 modelName := "qwen-turbo-latest" if apiReq.Model != "" { modelName = apiReq.Model } chatType := "t2t" // 处理特殊模型名后缀 if strings.Contains(modelName, "-draw") { handleDrawRequest(w, r, apiReq, reqID, authToken) return } // 处理思考模式 if strings.Contains(modelName, "-thinking") { modelName = strings.Replace(modelName, "-thinking", "", 1) lastMsgIdx := len(apiReq.Messages) - 1 if lastMsgIdx >= 0 { apiReq.Messages[lastMsgIdx].FeatureConfig = map[string]interface{}{ "thinking_enabled": true, } } } // 处理搜索模式 if strings.Contains(modelName, "-search") { modelName = strings.Replace(modelName, "-search", "", 1) chatType = "search" lastMsgIdx := len(apiReq.Messages) - 1 if lastMsgIdx >= 0 { apiReq.Messages[lastMsgIdx].ChatType = "search" } } // 处理图片消息 lastMsgIdx := len(apiReq.Messages) - 1 if lastMsgIdx >= 0 { lastMsg := apiReq.Messages[lastMsgIdx] // 检查内容是否为数组 contentArray, ok := lastMsg.Content.([]interface{}) if ok { // 处理内容数组 for i, item := range contentArray { itemMap, isMap := item.(map[string]interface{}) if !isMap { continue } // 检查是否包含图像URL if imageURL, hasImageURL := itemMap["image_url"]; hasImageURL { imageURLMap, isMap := imageURL.(map[string]interface{}) if !isMap { continue } // 获取URL url, hasURL := imageURLMap["url"].(string) if !hasURL { continue } // 上传图像 imageID, uploadErr := uploadImage(url, authToken) if uploadErr != nil { logError("[reqID:%s] 上传图像失败: %v", reqID, uploadErr) continue } // 替换内容 contentArrayCopy := make([]interface{}, len(contentArray)) copy(contentArrayCopy, contentArray) contentArrayCopy[i] = map[string]interface{}{ "type": "image", "image": imageID, } apiReq.Messages[lastMsgIdx].Content = contentArrayCopy break } } } } // 创建通义千问请求 - 通过流式请求来获取非流式响应 qwenReq := QwenRequest{ Model: modelName, Messages: apiReq.Messages, Stream: true, // 使用流式API ChatType: chatType, ID: generateUUID(), } // 序列化请求 reqData, err := json.Marshal(qwenReq) if err != nil { logError("[reqID:%s] 序列化请求失败: %v", reqID, err) http.Error(w, "内部服务器错误", http.StatusInternalServerError) return } // 创建HTTP请求 req, err := http.NewRequestWithContext(r.Context(), "POST", TargetURL, bytes.NewBuffer(reqData)) if err != nil { logError("[reqID:%s] 创建请求失败: %v", reqID, err) http.Error(w, "内部服务器错误", http.StatusInternalServerError) return } // 设置请求头 req.Header.Set("Content-Type", "application/json") req.Header.Set("Authorization", "Bearer "+authToken) req.Header.Set("User-Agent", "Mozilla/5.0") // 发送请求 client := getHTTPClient() resp, err := client.Do(req) if err != nil { logError("[reqID:%s] 发送请求失败: %v", reqID, err) http.Error(w, "连接到API失败", http.StatusBadGateway) return } defer resp.Body.Close() // 检查响应状态 if resp.StatusCode != http.StatusOK { bodyBytes, _ := io.ReadAll(resp.Body) logError("[reqID:%s] API返回非200状态码: %d, 响应: %s", reqID, resp.StatusCode, string(bodyBytes)) http.Error(w, fmt.Sprintf("API错误,状态码: %d", resp.StatusCode), resp.StatusCode) return } // 从流式响应中提取完整内容 fullContent, err := extractFullContentFromStream(resp.Body, reqID) if err != nil { logError("[reqID:%s] 提取内容失败: %v", reqID, err) http.Error(w, "解析响应失败", http.StatusInternalServerError) return } // 创建非流式响应 completionResponse := CompletionResponse{ ID: fmt.Sprintf("chatcmpl-%s", generateUUID()), Object: "chat.completion", Created: time.Now().Unix(), Model: modelName, Choices: []struct { Index int `json:"index"` Message struct { Role string `json:"role"` Content string `json:"content"` } `json:"message"` FinishReason string `json:"finish_reason"` }{ { Index: 0, Message: struct { Role string `json:"role"` Content string `json:"content"` }{ Role: "assistant", Content: fullContent, }, FinishReason: "stop", }, }, Usage: struct { PromptTokens int `json:"prompt_tokens"` CompletionTokens int `json:"completion_tokens"` TotalTokens int `json:"total_tokens"` }{ PromptTokens: estimateTokens(apiReq.Messages), CompletionTokens: len(fullContent) / 4, TotalTokens: estimateTokens(apiReq.Messages) + len(fullContent)/4, }, } // 返回响应 w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(completionResponse) } // 处理图像生成请求 func handleImageGenerations(w http.ResponseWriter, r *http.Request, apiReq APIRequest, reqID string) { logInfo("[reqID:%s] 处理图像生成请求", reqID) // 从请求中提取token authToken, err := extractToken(r) if err != nil { logError("[reqID:%s] 提取token失败: %v", reqID, err) http.Error(w, "无效的认证信息", http.StatusUnauthorized) return } // 解析图像生成请求 var imgReq ImagesRequest if err := json.NewDecoder(r.Body).Decode(&imgReq); err != nil { logError("[reqID:%s] 解析图像请求失败: %v", reqID, err) http.Error(w, "无效的请求体", http.StatusBadRequest) return } // 默认值设置 if imgReq.Model == "" { imgReq.Model = "qwen-max-latest-draw" } if imgReq.Size == "" { imgReq.Size = "1024*1024" } if imgReq.N <= 0 { imgReq.N = 1 } // 获取纯模型名(去除-draw后缀) modelName := strings.Replace(imgReq.Model, "-draw", "", 1) modelName = strings.Replace(modelName, "-thinking", "", 1) modelName = strings.Replace(modelName, "-search", "", 1) // 创建图像生成任务 qwenReq := QwenRequest{ Stream: false, IncrementalOutput: true, ChatType: "t2i", Model: modelName, Messages: []APIMessage{ { Role: "user", Content: imgReq.Prompt, ChatType: "t2i", Extra: map[string]interface{}{}, FeatureConfig: map[string]interface{}{ "thinking_enabled": false, }, }, }, ID: generateUUID(), Size: imgReq.Size, } // 序列化请求 reqData, err := json.Marshal(qwenReq) if err != nil { logError("[reqID:%s] 序列化请求失败: %v", reqID, err) http.Error(w, "内部服务器错误", http.StatusInternalServerError) return } // 创建HTTP请求 req, err := http.NewRequestWithContext(r.Context(), "POST", TargetURL, bytes.NewBuffer(reqData)) if err != nil { logError("[reqID:%s] 创建请求失败: %v", reqID, err) http.Error(w, "内部服务器错误", http.StatusInternalServerError) return } // 设置请求头 req.Header.Set("Content-Type", "application/json") req.Header.Set("Authorization", "Bearer "+authToken) req.Header.Set("User-Agent", "Mozilla/5.0") // 发送请求 client := getHTTPClient() resp, err := client.Do(req) if err != nil { logError("[reqID:%s] 发送请求失败: %v", reqID, err) http.Error(w, "连接到API失败", http.StatusBadGateway) return } defer resp.Body.Close() // 检查响应状态 if resp.StatusCode != http.StatusOK { bodyBytes, _ := io.ReadAll(resp.Body) logError("[reqID:%s] API返回非200状态码: %d, 响应: %s", reqID, resp.StatusCode, string(bodyBytes)) http.Error(w, fmt.Sprintf("API错误,状态码: %d", resp.StatusCode), resp.StatusCode) return } // 解析响应获取任务ID var qwenResp QwenResponse if err := json.NewDecoder(resp.Body).Decode(&qwenResp); err != nil { logError("[reqID:%s] 解析响应失败: %v", reqID, err) http.Error(w, "解析响应失败", http.StatusInternalServerError) return } // 提取任务ID taskID := "" for _, msg := range qwenResp.Messages { if msg.Role == "assistant" && msg.Extra.Wanx.TaskID != "" { taskID = msg.Extra.Wanx.TaskID break } } if taskID == "" { logError("[reqID:%s] 无法获取图像生成任务ID", reqID) http.Error(w, "无法获取图像生成任务ID", http.StatusInternalServerError) return } // 轮询等待图像生成完成 var imageURL string for i := 0; i < 30; i++ { select { case <-r.Context().Done(): logWarn("[reqID:%s] 请求超时或被客户端取消", reqID) http.Error(w, "请求超时", http.StatusGatewayTimeout) return default: // 继续处理 } // 检查任务状态 statusURL := TasksURL + taskID statusReq, err := http.NewRequestWithContext(r.Context(), "GET", statusURL, nil) if err != nil { logError("[reqID:%s] 创建状态请求失败: %v", reqID, err) time.Sleep(6 * time.Second) continue } // 设置请求头 statusReq.Header.Set("Authorization", "Bearer "+authToken) statusReq.Header.Set("User-Agent", "Mozilla/5.0") // 发送请求 statusResp, err := client.Do(statusReq) if err != nil { logError("[reqID:%s] 发送状态请求失败: %v", reqID, err) time.Sleep(6 * time.Second) continue } // 解析响应 var statusData TaskStatusResponse if err := json.NewDecoder(statusResp.Body).Decode(&statusData); err != nil { logError("[reqID:%s] 解析状态响应失败: %v", reqID, err) statusResp.Body.Close() time.Sleep(6 * time.Second) continue } statusResp.Body.Close() // 检查是否有内容 if statusData.Content != "" { imageURL = statusData.Content break } time.Sleep(6 * time.Second) } if imageURL == "" { logError("[reqID:%s] 图像生成超时", reqID) http.Error(w, "图像生成超时", http.StatusGatewayTimeout) return } // 构造图像列表 images := make([]ImageURL, imgReq.N) for i := 0; i < imgReq.N; i++ { images[i] = ImageURL{URL: imageURL} } // 返回响应 imgResp := ImagesResponse{ Created: time.Now().Unix(), Data: images, } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(imgResp) } // 处理特殊的绘图请求 func handleDrawRequest(w http.ResponseWriter, r *http.Request, apiReq APIRequest, reqID string, authToken string) { logInfo("[reqID:%s] 处理绘图请求", reqID) // 获取绘图提示 var prompt string if len(apiReq.Messages) > 0 { lastMsg := apiReq.Messages[len(apiReq.Messages)-1] prompt, _ = lastMsg.Content.(string) } if prompt == "" { logError("[reqID:%s] 绘图提示为空", reqID) http.Error(w, "绘图提示为空", http.StatusBadRequest) return } // 准备绘图请求参数 size := "1024*1024" modelName := strings.Replace(apiReq.Model, "-draw", "", 1) modelName = strings.Replace(modelName, "-thinking", "", 1) modelName = strings.Replace(modelName, "-search", "", 1) // 创建绘图请求 qwenReq := QwenRequest{ Stream: false, IncrementalOutput: true, ChatType: "t2i", Model: modelName, Messages: []APIMessage{ { Role: "user", Content: prompt, ChatType: "t2i", Extra: map[string]interface{}{}, FeatureConfig: map[string]interface{}{ "thinking_enabled": false, }, }, }, ID: generateUUID(), Size: size, } // 序列化请求 reqData, err := json.Marshal(qwenReq) if err != nil { logError("[reqID:%s] 序列化请求失败: %v", reqID, err) http.Error(w, "内部服务器错误", http.StatusInternalServerError) return } // 创建HTTP请求 req, err := http.NewRequestWithContext(r.Context(), "POST", TargetURL, bytes.NewBuffer(reqData)) if err != nil { logError("[reqID:%s] 创建请求失败: %v", reqID, err) http.Error(w, "内部服务器错误", http.StatusInternalServerError) return } // 设置请求头 req.Header.Set("Content-Type", "application/json") req.Header.Set("Authorization", "Bearer "+authToken) req.Header.Set("User-Agent", "Mozilla/5.0") // 发送请求 client := getHTTPClient() resp, err := client.Do(req) if err != nil { logError("[reqID:%s] 发送请求失败: %v", reqID, err) http.Error(w, "连接到API失败", http.StatusBadGateway) return } defer resp.Body.Close() // 检查响应状态 if resp.StatusCode != http.StatusOK { bodyBytes, _ := io.ReadAll(resp.Body) logError("[reqID:%s] API返回非200状态码: %d, 响应: %s", reqID, resp.StatusCode, string(bodyBytes)) http.Error(w, fmt.Sprintf("API错误,状态码: %d", resp.StatusCode), resp.StatusCode) return } // 解析响应获取任务ID var qwenResp QwenResponse if err := json.NewDecoder(resp.Body).Decode(&qwenResp); err != nil { logError("[reqID:%s] 解析响应失败: %v", reqID, err) http.Error(w, "解析响应失败", http.StatusInternalServerError) return } // 提取任务ID taskID := "" for _, msg := range qwenResp.Messages { if msg.Role == "assistant" && msg.Extra.Wanx.TaskID != "" { taskID = msg.Extra.Wanx.TaskID break } } if taskID == "" { logError("[reqID:%s] 无法获取图像生成任务ID", reqID) http.Error(w, "无法获取图像生成任务ID", http.StatusInternalServerError) return } // 轮询等待图像生成完成 var imageURL string for i := 0; i < 30; i++ { select { case <-r.Context().Done(): logWarn("[reqID:%s] 请求超时或被客户端取消", reqID) http.Error(w, "请求超时", http.StatusGatewayTimeout) return default: // 继续处理 } // 检查任务状态 statusURL := TasksURL + taskID statusReq, err := http.NewRequestWithContext(r.Context(), "GET", statusURL, nil) if err != nil { logError("[reqID:%s] 创建状态请求失败: %v", reqID, err) time.Sleep(6 * time.Second) continue } // 设置请求头 statusReq.Header.Set("Authorization", "Bearer "+authToken) statusReq.Header.Set("User-Agent", "Mozilla/5.0") // 发送请求 statusResp, err := client.Do(statusReq) if err != nil { logError("[reqID:%s] 发送状态请求失败: %v", reqID, err) time.Sleep(6 * time.Second) continue } // 解析响应 var statusData TaskStatusResponse if err := json.NewDecoder(statusResp.Body).Decode(&statusData); err != nil { logError("[reqID:%s] 解析状态响应失败: %v", reqID, err) statusResp.Body.Close() time.Sleep(6 * time.Second) continue } statusResp.Body.Close() // 检查是否有内容 if statusData.Content != "" { imageURL = statusData.Content break } time.Sleep(6 * time.Second) } if imageURL == "" { logError("[reqID:%s] 图像生成超时", reqID) http.Error(w, "图像生成超时", http.StatusGatewayTimeout) return } // 返回OpenAI标准格式响应(使用Markdown嵌入图片) completionResponse := CompletionResponse{ ID: fmt.Sprintf("chatcmpl-%s", generateUUID()), Object: "chat.completion", Created: time.Now().Unix(), Model: apiReq.Model, Choices: []struct { Index int `json:"index"` Message struct { Role string `json:"role"` Content string `json:"content"` } `json:"message"` FinishReason string `json:"finish_reason"` }{ { Index: 0, Message: struct { Role string `json:"role"` Content string `json:"content"` }{ Role: "assistant", Content: fmt.Sprintf("![%s](%s)", imageURL, imageURL), }, FinishReason: "stop", }, }, Usage: struct { PromptTokens int `json:"prompt_tokens"` CompletionTokens int `json:"completion_tokens"` TotalTokens int `json:"total_tokens"` }{ PromptTokens: 1024, CompletionTokens: 1024, TotalTokens: 2048, }, } // 返回响应 w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(completionResponse) } // 从流式响应中提取完整内容 func extractFullContentFromStream(body io.ReadCloser, reqID string) (string, error) { var contentBuilder strings.Builder // 创建读取器 reader := bufio.NewReaderSize(body, 16384) // 持续读取响应 buffer := "" for { // 读取一块数据 chunk := make([]byte, 4096) n, err := reader.Read(chunk) if err != nil { if err != io.EOF { return contentBuilder.String(), err } break } // 添加到缓冲区 buffer += string(chunk[:n]) // 更稳健的处理方式:按行分割并只处理完整行 lines := strings.Split(buffer, "\n") // 保留最后可能不完整的行 if len(lines) > 0 { buffer = lines[len(lines)-1] } // 处理所有完整的行(除最后一行外) for i := 0; i < len(lines)-1; i++ { line := lines[i] if !strings.HasPrefix(line, "data: ") { continue } // 提取数据部分 dataStr := strings.TrimPrefix(line, "data: ") // 处理[DONE]消息 if dataStr == "[DONE]" { logDebug("[reqID:%s] 非流式模式收到[DONE]消息", reqID) continue } // 解析JSON var qwenResp QwenResponse if err := json.Unmarshal([]byte(dataStr), &qwenResp); err != nil { logWarn("[reqID:%s] 解析JSON失败: %v, data: %s", reqID, err, dataStr) continue } // 提取内容 - 累积所有delta内容片段 for _, choice := range qwenResp.Choices { if choice.Delta.Content != "" { contentBuilder.WriteString(choice.Delta.Content) } } } } // 记录提取的内容长度 contentStr := contentBuilder.String() logInfo("[reqID:%s] 非流式模式:成功提取完整内容,长度: %d", reqID, len(contentStr)) return contentStr, nil } // 上传图像到千问API func uploadImage(base64Data string, authToken string) (string, error) { // 从base64数据中提取图片数据 if !strings.HasPrefix(base64Data, "data:") { return "", fmt.Errorf("invalid base64 data format") } parts := strings.SplitN(base64Data, ",", 2) if len(parts) != 2 { return "", fmt.Errorf("invalid base64 data format") } imageData, err := base64.StdEncoding.DecodeString(parts[1]) if err != nil { return "", fmt.Errorf("failed to decode base64 data: %v", err) } // 创建multipart表单 body := bytes.Buffer{} writer := multipart.NewWriter(&body) // 添加文件 part, err := writer.CreateFormFile("file", fmt.Sprintf("image-%d.jpg", time.Now().UnixNano())) if err != nil { return "", fmt.Errorf("failed to create form file: %v", err) } if _, err := part.Write(imageData); err != nil { return "", fmt.Errorf("failed to write image data: %v", err) } // 关闭writer if err := writer.Close(); err != nil { return "", fmt.Errorf("failed to close writer: %v", err) } // 创建HTTP请求 req, err := http.NewRequest("POST", FilesURL, &body) if err != nil { return "", fmt.Errorf("failed to create request: %v", err) } // 设置请求头 req.Header.Set("Content-Type", writer.FormDataContentType()) req.Header.Set("Authorization", "Bearer "+authToken) req.Header.Set("User-Agent", "Mozilla/5.0") // 发送请求 client := getHTTPClient() resp, err := client.Do(req) if err != nil { return "", fmt.Errorf("failed to send request: %v", err) } defer resp.Body.Close() // 检查响应状态 if resp.StatusCode != http.StatusOK { bodyBytes, _ := io.ReadAll(resp.Body) return "", fmt.Errorf("API returned non-200 status code: %d, response: %s", resp.StatusCode, string(bodyBytes)) } // 解析响应 var uploadResp FileUploadResponse if err := json.NewDecoder(resp.Body).Decode(&uploadResp); err != nil { return "", fmt.Errorf("failed to parse response: %v", err) } return uploadResp.ID, nil } // 创建角色块 func createRoleChunk(id string, created int64, model string) []byte { chunk := StreamChunk{ ID: id, Object: "chat.completion.chunk", Created: created, Model: model, Choices: []struct { Index int `json:"index"` Delta struct { Role string `json:"role,omitempty"` Content string `json:"content,omitempty"` } `json:"delta"` FinishReason *string `json:"finish_reason,omitempty"` }{ { Index: 0, Delta: struct { Role string `json:"role,omitempty"` Content string `json:"content,omitempty"` }{ Role: "assistant", }, }, }, } data, _ := json.Marshal(chunk) return data } // 创建内容块 func createContentChunk(id string, created int64, model string, content string) []byte { chunk := StreamChunk{ ID: id, Object: "chat.completion.chunk", Created: created, Model: model, Choices: []struct { Index int `json:"index"` Delta struct { Role string `json:"role,omitempty"` Content string `json:"content,omitempty"` } `json:"delta"` FinishReason *string `json:"finish_reason,omitempty"` }{ { Index: 0, Delta: struct { Role string `json:"role,omitempty"` Content string `json:"content,omitempty"` }{ Content: content, }, }, }, } data, _ := json.Marshal(chunk) return data } // 创建完成块 func createDoneChunk(id string, created int64, model string, reason string) []byte { finishReason := reason chunk := StreamChunk{ ID: id, Object: "chat.completion.chunk", Created: created, Model: model, Choices: []struct { Index int `json:"index"` Delta struct { Role string `json:"role,omitempty"` Content string `json:"content,omitempty"` } `json:"delta"` FinishReason *string `json:"finish_reason,omitempty"` }{ { Index: 0, Delta: struct { Role string `json:"role,omitempty"` Content string `json:"content,omitempty"` }{}, FinishReason: &finishReason, }, }, } data, _ := json.Marshal(chunk) return data } // 估算tokens(简单实现) func estimateTokens(messages []APIMessage) int { var total int for _, msg := range messages { switch content := msg.Content.(type) { case string: total += len(content) / 4 case []interface{}: for _, item := range content { if itemMap, ok := item.(map[string]interface{}); ok { if text, ok := itemMap["text"].(string); ok { total += len(text) / 4 } } } } } return total }