mirror of
https://github.com/BlueSkyXN/AI2API.git
synced 2025-09-26 20:51:19 +08:00
1370 lines
38 KiB
Go
1370 lines
38 KiB
Go
package main
|
||
|
||
import (
|
||
"bufio"
|
||
"bytes"
|
||
"context"
|
||
"crypto/tls"
|
||
"encoding/json"
|
||
"flag"
|
||
"fmt"
|
||
"io"
|
||
"log"
|
||
"net/http"
|
||
"os"
|
||
"os/signal"
|
||
"strings"
|
||
"sync"
|
||
"sync/atomic"
|
||
"syscall"
|
||
"time"
|
||
)
|
||
|
||
// WorkerPool 工作池结构体,用于管理goroutine
|
||
type WorkerPool struct {
|
||
taskQueue chan *Task
|
||
workerCount int
|
||
shutdownChannel chan struct{}
|
||
wg sync.WaitGroup
|
||
}
|
||
|
||
// Task 任务结构体,包含请求处理所需数据
|
||
type Task struct {
|
||
r *http.Request
|
||
w http.ResponseWriter
|
||
done chan struct{}
|
||
reqID string
|
||
isStream bool
|
||
hunyuanReq HunyuanRequest
|
||
}
|
||
|
||
// NewWorkerPool 创建并启动一个新的工作池
|
||
func NewWorkerPool(workerCount int, queueSize int) *WorkerPool {
|
||
pool := &WorkerPool{
|
||
taskQueue: make(chan *Task, queueSize),
|
||
workerCount: workerCount,
|
||
shutdownChannel: make(chan struct{}),
|
||
}
|
||
|
||
pool.Start()
|
||
return pool
|
||
}
|
||
|
||
// Start 启动工作池中的worker goroutines
|
||
func (pool *WorkerPool) Start() {
|
||
// 启动工作goroutine
|
||
for i := 0; i < pool.workerCount; i++ {
|
||
pool.wg.Add(1)
|
||
go func(workerID int) {
|
||
defer pool.wg.Done()
|
||
|
||
logInfo("Worker %d 已启动", workerID)
|
||
|
||
for {
|
||
select {
|
||
case task, ok := <-pool.taskQueue:
|
||
if !ok {
|
||
// 队列已关闭,退出worker
|
||
logInfo("Worker %d 收到队列关闭信号,准备退出", workerID)
|
||
return
|
||
}
|
||
|
||
logDebug("Worker %d 处理任务 reqID:%s", workerID, task.reqID)
|
||
|
||
// 处理任务
|
||
if task.isStream {
|
||
err := handleStreamingRequest(task.w, task.r, task.hunyuanReq, task.reqID)
|
||
if err != nil {
|
||
logError("Worker %d 处理流式任务失败: %v", workerID, err)
|
||
}
|
||
} else {
|
||
err := handleNonStreamingRequest(task.w, task.r, task.hunyuanReq, task.reqID)
|
||
if err != nil {
|
||
logError("Worker %d 处理非流式任务失败: %v", workerID, err)
|
||
}
|
||
}
|
||
|
||
// 通知任务完成
|
||
close(task.done)
|
||
|
||
case <-pool.shutdownChannel:
|
||
// 收到关闭信号,退出worker
|
||
logInfo("Worker %d 收到关闭信号,准备退出", workerID)
|
||
return
|
||
}
|
||
}
|
||
}(i)
|
||
}
|
||
}
|
||
|
||
// SubmitTask 提交任务到工作池,非阻塞
|
||
func (pool *WorkerPool) SubmitTask(task *Task) (bool, error) {
|
||
select {
|
||
case pool.taskQueue <- task:
|
||
// 任务成功添加到队列
|
||
return true, nil
|
||
default:
|
||
// 队列已满
|
||
return false, fmt.Errorf("任务队列已满")
|
||
}
|
||
}
|
||
|
||
// Shutdown 关闭工作池
|
||
func (pool *WorkerPool) Shutdown() {
|
||
logInfo("正在关闭工作池...")
|
||
|
||
// 发送关闭信号给所有worker
|
||
close(pool.shutdownChannel)
|
||
|
||
// 等待所有worker退出
|
||
pool.wg.Wait()
|
||
|
||
// 关闭任务队列
|
||
close(pool.taskQueue)
|
||
|
||
logInfo("工作池已关闭")
|
||
}
|
||
|
||
// Semaphore 信号量实现,用于限制并发数量
|
||
type Semaphore struct {
|
||
sem chan struct{}
|
||
}
|
||
|
||
// NewSemaphore 创建新的信号量
|
||
func NewSemaphore(size int) *Semaphore {
|
||
return &Semaphore{
|
||
sem: make(chan struct{}, size),
|
||
}
|
||
}
|
||
|
||
// Acquire 获取信号量(阻塞)
|
||
func (s *Semaphore) Acquire() {
|
||
s.sem <- struct{}{}
|
||
}
|
||
|
||
// Release 释放信号量
|
||
func (s *Semaphore) Release() {
|
||
<-s.sem
|
||
}
|
||
|
||
// TryAcquire 尝试获取信号量(非阻塞)
|
||
func (s *Semaphore) TryAcquire() bool {
|
||
select {
|
||
case s.sem <- struct{}{}:
|
||
return true
|
||
default:
|
||
return false
|
||
}
|
||
}
|
||
|
||
// 配置结构体用于存储命令行参数
|
||
type Config struct {
|
||
Port string // 代理服务器监听端口
|
||
Address string // 代理服务器监听地址
|
||
LogLevel string // 日志级别
|
||
DevMode bool // 开发模式标志
|
||
MaxRetries int // 最大重试次数
|
||
Timeout int // 请求超时时间(秒)
|
||
VerifySSL bool // 是否验证SSL证书
|
||
ModelName string // 默认模型名称
|
||
BearerToken string // Bearer Token (默认提供公开Token)
|
||
WorkerCount int // 工作池中的worker数量
|
||
QueueSize int // 任务队列大小
|
||
MaxConcurrent int // 最大并发请求数
|
||
}
|
||
|
||
// 支持的模型列表
|
||
var SupportedModels = []string{
|
||
"hunyuan-t1-latest",
|
||
"hunyuan-turbos-latest",
|
||
}
|
||
|
||
// 腾讯混元 API 目标URL
|
||
const (
|
||
TargetURL = "https://llm.hunyuan.tencent.com/aide/api/v2/triton_image/demo_text_chat/"
|
||
Version = "1.0.0" // 版本号
|
||
)
|
||
|
||
// 日志级别
|
||
const (
|
||
LogLevelDebug = "debug"
|
||
LogLevelInfo = "info"
|
||
LogLevelWarn = "warn"
|
||
LogLevelError = "error"
|
||
)
|
||
|
||
// 解析命令行参数并返回 Config 实例
|
||
func parseFlags() *Config {
|
||
cfg := &Config{}
|
||
flag.StringVar(&cfg.Port, "port", "6666", "Port to listen on")
|
||
flag.StringVar(&cfg.Address, "address", "localhost", "Address to listen on")
|
||
flag.StringVar(&cfg.LogLevel, "log-level", LogLevelInfo, "Log level (debug, info, warn, error)")
|
||
flag.BoolVar(&cfg.DevMode, "dev", false, "Enable development mode with enhanced logging")
|
||
flag.IntVar(&cfg.MaxRetries, "max-retries", 3, "Maximum number of retries for failed requests")
|
||
flag.IntVar(&cfg.Timeout, "timeout", 300, "Request timeout in seconds")
|
||
flag.BoolVar(&cfg.VerifySSL, "verify-ssl", true, "Verify SSL certificates")
|
||
flag.StringVar(&cfg.ModelName, "model", "hunyuan-t1-latest", "Default Hunyuan model name")
|
||
flag.StringVar(&cfg.BearerToken, "token", "7auGXNATFSKl7dF", "Bearer token for Hunyuan API")
|
||
flag.IntVar(&cfg.WorkerCount, "workers", 50, "Number of worker goroutines in the pool")
|
||
flag.IntVar(&cfg.QueueSize, "queue-size", 500, "Size of the task queue")
|
||
flag.IntVar(&cfg.MaxConcurrent, "max-concurrent", 100, "Maximum number of concurrent requests")
|
||
flag.Parse()
|
||
|
||
// 如果开发模式开启,自动设置日志级别为debug
|
||
if cfg.DevMode && cfg.LogLevel != LogLevelDebug {
|
||
cfg.LogLevel = LogLevelDebug
|
||
fmt.Println("开发模式已启用,日志级别设置为debug")
|
||
}
|
||
|
||
return cfg
|
||
}
|
||
|
||
// 全局配置变量
|
||
var (
|
||
appConfig *Config
|
||
)
|
||
|
||
// 性能指标
|
||
var (
|
||
requestCounter int64
|
||
successCounter int64
|
||
errorCounter int64
|
||
avgResponseTime int64
|
||
latencyHistogram [10]int64 // 0-100ms, 100-200ms, ... >1s
|
||
queuedRequests int64 // 当前在队列中的请求数
|
||
rejectedRequests int64 // 被拒绝的请求数
|
||
)
|
||
|
||
// 并发控制组件
|
||
var (
|
||
workerPool *WorkerPool // 工作池
|
||
requestSem *Semaphore // 请求信号量
|
||
)
|
||
|
||
// 日志记录器
|
||
var (
|
||
logger *log.Logger
|
||
logLevel string
|
||
logMutex sync.Mutex
|
||
)
|
||
|
||
// 日志初始化
|
||
func initLogger(level string) {
|
||
logger = log.New(os.Stdout, "[HunyuanAPI] ", log.LstdFlags)
|
||
logLevel = level
|
||
}
|
||
|
||
// 根据日志级别记录日志
|
||
func logDebug(format string, v ...interface{}) {
|
||
if logLevel == LogLevelDebug {
|
||
logMutex.Lock()
|
||
logger.Printf("[DEBUG] "+format, v...)
|
||
logMutex.Unlock()
|
||
}
|
||
}
|
||
|
||
func logInfo(format string, v ...interface{}) {
|
||
if logLevel == LogLevelDebug || logLevel == LogLevelInfo {
|
||
logMutex.Lock()
|
||
logger.Printf("[INFO] "+format, v...)
|
||
logMutex.Unlock()
|
||
}
|
||
}
|
||
|
||
func logWarn(format string, v ...interface{}) {
|
||
if logLevel == LogLevelDebug || logLevel == LogLevelInfo || logLevel == LogLevelWarn {
|
||
logMutex.Lock()
|
||
logger.Printf("[WARN] "+format, v...)
|
||
logMutex.Unlock()
|
||
}
|
||
}
|
||
|
||
func logError(format string, v ...interface{}) {
|
||
logMutex.Lock()
|
||
logger.Printf("[ERROR] "+format, v...)
|
||
logMutex.Unlock()
|
||
|
||
// 错误计数
|
||
atomic.AddInt64(&errorCounter, 1)
|
||
}
|
||
|
||
// OpenAI/DeepSeek 消息格式
|
||
type APIMessage struct {
|
||
Role string `json:"role"`
|
||
Content interface{} `json:"content"` // 使用interface{}以支持各种类型
|
||
}
|
||
|
||
// OpenAI/DeepSeek 请求格式
|
||
type APIRequest struct {
|
||
Model string `json:"model"`
|
||
Messages []APIMessage `json:"messages"`
|
||
Stream bool `json:"stream"`
|
||
Temperature float64 `json:"temperature,omitempty"`
|
||
MaxTokens int `json:"max_tokens,omitempty"`
|
||
}
|
||
|
||
// 腾讯混元请求格式
|
||
type HunyuanRequest struct {
|
||
Stream bool `json:"stream"`
|
||
Model string `json:"model"`
|
||
QueryID string `json:"query_id"`
|
||
Messages []APIMessage `json:"messages"`
|
||
StreamModeration bool `json:"stream_moderation"`
|
||
EnableEnhancement bool `json:"enable_enhancement"`
|
||
}
|
||
|
||
// 腾讯混元响应格式
|
||
type HunyuanResponse struct {
|
||
ID string `json:"id"`
|
||
Object string `json:"object"`
|
||
Created int64 `json:"created"`
|
||
Model string `json:"model"`
|
||
SystemFingerprint string `json:"system_fingerprint"`
|
||
Choices []Choice `json:"choices"`
|
||
Note string `json:"note,omitempty"`
|
||
}
|
||
|
||
// 选择结构
|
||
type Choice struct {
|
||
Index int `json:"index"`
|
||
Delta Delta `json:"delta"`
|
||
FinishReason *string `json:"finish_reason"`
|
||
}
|
||
|
||
// Delta结构,包含内容和推理内容
|
||
type Delta struct {
|
||
Role string `json:"role,omitempty"`
|
||
Content string `json:"content,omitempty"`
|
||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||
}
|
||
|
||
// DeepSeek 流式响应格式
|
||
type StreamChunk struct {
|
||
ID string `json:"id"`
|
||
Object string `json:"object"`
|
||
Created int64 `json:"created"`
|
||
Model string `json:"model"`
|
||
Choices []struct {
|
||
Index int `json:"index"`
|
||
FinishReason *string `json:"finish_reason,omitempty"`
|
||
Delta struct {
|
||
Role string `json:"role,omitempty"`
|
||
Content string `json:"content,omitempty"`
|
||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||
} `json:"delta"`
|
||
} `json:"choices"`
|
||
}
|
||
|
||
// 非流式响应格式
|
||
type CompletionResponse struct {
|
||
ID string `json:"id"`
|
||
Object string `json:"object"`
|
||
Created int64 `json:"created"`
|
||
Model string `json:"model"`
|
||
Choices []struct {
|
||
Index int `json:"index"`
|
||
FinishReason string `json:"finish_reason"`
|
||
Message struct {
|
||
Role string `json:"role"`
|
||
Content string `json:"content"`
|
||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||
} `json:"message"`
|
||
} `json:"choices"`
|
||
Usage struct {
|
||
PromptTokens int `json:"prompt_tokens"`
|
||
CompletionTokens int `json:"completion_tokens"`
|
||
TotalTokens int `json:"total_tokens"`
|
||
} `json:"usage"`
|
||
}
|
||
|
||
// 请求计数和互斥锁,用于监控
|
||
var (
|
||
requestCount uint64 = 0
|
||
countMutex sync.Mutex
|
||
)
|
||
|
||
// 主入口函数
|
||
func main() {
|
||
// 解析配置
|
||
appConfig = parseFlags()
|
||
|
||
// 初始化日志
|
||
initLogger(appConfig.LogLevel)
|
||
|
||
logInfo("启动服务: TargetURL=%s, Address=%s, Port=%s, Version=%s, LogLevel=%s, 支持模型=%v, BearerToken=***, WorkerCount=%d, QueueSize=%d, MaxConcurrent=%d",
|
||
TargetURL, appConfig.Address, appConfig.Port, Version, appConfig.LogLevel, SupportedModels,
|
||
appConfig.WorkerCount, appConfig.QueueSize, appConfig.MaxConcurrent)
|
||
|
||
// 创建工作池和信号量
|
||
workerPool = NewWorkerPool(appConfig.WorkerCount, appConfig.QueueSize)
|
||
requestSem = NewSemaphore(appConfig.MaxConcurrent)
|
||
|
||
logInfo("工作池已创建: %d个worker, 队列大小为%d", appConfig.WorkerCount, appConfig.QueueSize)
|
||
|
||
// 配置更高的并发处理能力
|
||
http.DefaultTransport.(*http.Transport).MaxIdleConnsPerHost = 100
|
||
http.DefaultTransport.(*http.Transport).MaxIdleConns = 100
|
||
http.DefaultTransport.(*http.Transport).IdleConnTimeout = 90 * time.Second
|
||
|
||
// 创建自定义服务器,支持更高并发
|
||
server := &http.Server{
|
||
Addr: appConfig.Address + ":" + appConfig.Port,
|
||
ReadTimeout: time.Duration(appConfig.Timeout) * time.Second,
|
||
WriteTimeout: time.Duration(appConfig.Timeout) * time.Second,
|
||
IdleTimeout: 120 * time.Second,
|
||
Handler: nil, // 使用默认的ServeMux
|
||
}
|
||
|
||
// 创建处理器
|
||
http.HandleFunc("/v1/models", func(w http.ResponseWriter, r *http.Request) {
|
||
setCORSHeaders(w)
|
||
if r.Method == "OPTIONS" {
|
||
w.WriteHeader(http.StatusOK)
|
||
return
|
||
}
|
||
handleModelsRequest(w, r)
|
||
})
|
||
|
||
http.HandleFunc("/v1/chat/completions", func(w http.ResponseWriter, r *http.Request) {
|
||
setCORSHeaders(w)
|
||
if r.Method == "OPTIONS" {
|
||
w.WriteHeader(http.StatusOK)
|
||
return
|
||
}
|
||
|
||
// 计数器增加
|
||
countMutex.Lock()
|
||
requestCount++
|
||
currentCount := requestCount
|
||
countMutex.Unlock()
|
||
|
||
logInfo("收到新请求 #%d", currentCount)
|
||
|
||
// 请求计数
|
||
atomic.AddInt64(&requestCounter, 1)
|
||
|
||
// 尝试获取信号量
|
||
if !requestSem.TryAcquire() {
|
||
// 请求数量超过限制
|
||
atomic.AddInt64(&rejectedRequests, 1)
|
||
logWarn("请求 #%d 被拒绝: 当前并发请求数已达上限", currentCount)
|
||
w.Header().Set("Retry-After", "30")
|
||
http.Error(w, "Server is busy, please try again later", http.StatusServiceUnavailable)
|
||
return
|
||
}
|
||
|
||
// 释放信号量(在函数返回时)
|
||
defer requestSem.Release()
|
||
|
||
// 处理请求
|
||
handleChatCompletionRequestWithPool(w, r, currentCount)
|
||
})
|
||
|
||
// 添加健康检查端点
|
||
http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
|
||
setCORSHeaders(w)
|
||
if r.Method == "OPTIONS" {
|
||
w.WriteHeader(http.StatusOK)
|
||
return
|
||
}
|
||
|
||
// 获取各种计数器的值
|
||
reqCount := atomic.LoadInt64(&requestCounter)
|
||
succCount := atomic.LoadInt64(&successCounter)
|
||
errCount := atomic.LoadInt64(&errorCounter)
|
||
queuedCount := atomic.LoadInt64(&queuedRequests)
|
||
rejectedCount := atomic.LoadInt64(&rejectedRequests)
|
||
|
||
// 计算平均响应时间
|
||
var avgTime int64 = 0
|
||
if reqCount > 0 {
|
||
avgTime = atomic.LoadInt64(&avgResponseTime) / max(reqCount, 1)
|
||
}
|
||
|
||
// 构建延迟直方图数据
|
||
histogram := make([]int64, 10)
|
||
for i := 0; i < 10; i++ {
|
||
histogram[i] = atomic.LoadInt64(&latencyHistogram[i])
|
||
}
|
||
|
||
// 构建响应
|
||
stats := map[string]interface{}{
|
||
"status": "ok",
|
||
"version": Version,
|
||
"requests": reqCount,
|
||
"success": succCount,
|
||
"errors": errCount,
|
||
"queued": queuedCount,
|
||
"rejected": rejectedCount,
|
||
"avg_time_ms": avgTime,
|
||
"histogram_ms": histogram,
|
||
"worker_count": workerPool.workerCount,
|
||
"queue_size": len(workerPool.taskQueue),
|
||
"queue_capacity": cap(workerPool.taskQueue),
|
||
"queue_percent": float64(len(workerPool.taskQueue)) / float64(cap(workerPool.taskQueue)) * 100,
|
||
"concurrent_limit": appConfig.MaxConcurrent,
|
||
}
|
||
|
||
w.Header().Set("Content-Type", "application/json")
|
||
w.WriteHeader(http.StatusOK)
|
||
json.NewEncoder(w).Encode(stats)
|
||
})
|
||
|
||
// 创建停止通道
|
||
stop := make(chan os.Signal, 1)
|
||
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
|
||
|
||
// 在goroutine中启动服务器
|
||
go func() {
|
||
logInfo("Starting proxy server on %s", server.Addr)
|
||
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||
logError("Failed to start server: %v", err)
|
||
os.Exit(1)
|
||
}
|
||
}()
|
||
|
||
// 等待停止信号
|
||
<-stop
|
||
|
||
// 创建上下文用于优雅关闭
|
||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||
defer cancel()
|
||
|
||
// 优雅关闭服务器
|
||
logInfo("Server is shutting down...")
|
||
if err := server.Shutdown(ctx); err != nil {
|
||
logError("Server shutdown failed: %v", err)
|
||
}
|
||
|
||
// 关闭工作池
|
||
workerPool.Shutdown()
|
||
|
||
logInfo("Server gracefully stopped")
|
||
}
|
||
|
||
// 设置CORS头
|
||
func setCORSHeaders(w http.ResponseWriter) {
|
||
w.Header().Set("Access-Control-Allow-Origin", "*")
|
||
w.Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
|
||
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
|
||
}
|
||
|
||
// 验证消息格式
|
||
func validateMessages(messages []APIMessage) (bool, string) {
|
||
reqID := generateRequestID()
|
||
logDebug("[reqID:%s] 验证消息格式", reqID)
|
||
|
||
if messages == nil || len(messages) == 0 {
|
||
return false, "Messages array is required"
|
||
}
|
||
|
||
for _, msg := range messages {
|
||
if msg.Role == "" || msg.Content == nil {
|
||
return false, "Invalid message format: each message must have role and content"
|
||
}
|
||
}
|
||
|
||
return true, ""
|
||
}
|
||
|
||
// 从请求头中提取令牌
|
||
func extractToken(r *http.Request) (string, error) {
|
||
// 获取 Authorization 头部
|
||
authHeader := r.Header.Get("Authorization")
|
||
if authHeader == "" {
|
||
return "", fmt.Errorf("missing Authorization header")
|
||
}
|
||
|
||
// 验证格式并提取令牌
|
||
if !strings.HasPrefix(authHeader, "Bearer ") {
|
||
return "", fmt.Errorf("invalid Authorization header format, must start with 'Bearer '")
|
||
}
|
||
|
||
// 提取令牌值
|
||
token := strings.TrimPrefix(authHeader, "Bearer ")
|
||
if token == "" {
|
||
return "", fmt.Errorf("empty token in Authorization header")
|
||
}
|
||
|
||
return token, nil
|
||
}
|
||
|
||
// 转换任意类型的内容为字符串
|
||
func contentToString(content interface{}) string {
|
||
if content == nil {
|
||
return ""
|
||
}
|
||
|
||
switch v := content.(type) {
|
||
case string:
|
||
return v
|
||
default:
|
||
jsonBytes, err := json.Marshal(v)
|
||
if err != nil {
|
||
logWarn("将内容转换为JSON失败: %v", err)
|
||
return ""
|
||
}
|
||
return string(jsonBytes)
|
||
}
|
||
}
|
||
|
||
// 生成请求ID
|
||
func generateQueryID() string {
|
||
return fmt.Sprintf("%s%d", getRandomString(8), time.Now().UnixNano())
|
||
}
|
||
|
||
// 判断模型是否在支持列表中
|
||
func isModelSupported(modelName string) bool {
|
||
for _, supportedModel := range SupportedModels {
|
||
if modelName == supportedModel {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
// 处理模型列表请求
|
||
func handleModelsRequest(w http.ResponseWriter, r *http.Request) {
|
||
logInfo("处理模型列表请求")
|
||
|
||
// 返回模型列表
|
||
w.Header().Set("Content-Type", "application/json")
|
||
w.WriteHeader(http.StatusOK)
|
||
|
||
// 构建模型数据
|
||
modelData := make([]map[string]interface{}, 0, len(SupportedModels))
|
||
for _, model := range SupportedModels {
|
||
modelData = append(modelData, map[string]interface{}{
|
||
"id": model,
|
||
"object": "model",
|
||
"created": time.Now().Unix(),
|
||
"owned_by": "TencentCloud",
|
||
"capabilities": map[string]interface{}{
|
||
"chat": true,
|
||
"completions": true,
|
||
"reasoning": true,
|
||
},
|
||
})
|
||
}
|
||
|
||
modelsList := map[string]interface{}{
|
||
"object": "list",
|
||
"data": modelData,
|
||
}
|
||
|
||
json.NewEncoder(w).Encode(modelsList)
|
||
}
|
||
|
||
// 处理聊天补全请求(使用工作池)
|
||
func handleChatCompletionRequestWithPool(w http.ResponseWriter, r *http.Request, requestNum uint64) {
|
||
reqID := generateRequestID()
|
||
startTime := time.Now()
|
||
logInfo("[reqID:%s] 处理聊天补全请求 #%d", reqID, requestNum)
|
||
|
||
// 设置超时上下文
|
||
ctx, cancel := context.WithTimeout(r.Context(), time.Duration(appConfig.Timeout)*time.Second)
|
||
defer cancel()
|
||
|
||
// 包含超时上下文的请求
|
||
r = r.WithContext(ctx)
|
||
|
||
// 添加恢复机制,防止panic
|
||
defer func() {
|
||
if r := recover(); r != nil {
|
||
logError("[reqID:%s] 处理请求时发生panic: %v", reqID, r)
|
||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||
}
|
||
}()
|
||
|
||
// 解析请求体
|
||
var apiReq APIRequest
|
||
if err := json.NewDecoder(r.Body).Decode(&apiReq); err != nil {
|
||
logError("[reqID:%s] 解析请求失败: %v", reqID, err)
|
||
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||
return
|
||
}
|
||
|
||
// 验证消息格式
|
||
valid, errMsg := validateMessages(apiReq.Messages)
|
||
if !valid {
|
||
logError("[reqID:%s] 消息格式验证失败: %s", reqID, errMsg)
|
||
http.Error(w, errMsg, http.StatusBadRequest)
|
||
return
|
||
}
|
||
|
||
// 是否使用流式处理
|
||
isStream := apiReq.Stream
|
||
|
||
// 确定使用的模型
|
||
modelName := appConfig.ModelName
|
||
if apiReq.Model != "" {
|
||
// 检查请求的模型是否是我们支持的
|
||
if isModelSupported(apiReq.Model) {
|
||
modelName = apiReq.Model
|
||
} else {
|
||
logWarn("[reqID:%s] 请求的模型 %s 不支持,使用默认模型 %s", reqID, apiReq.Model, modelName)
|
||
}
|
||
}
|
||
|
||
logInfo("[reqID:%s] 使用模型: %s", reqID, modelName)
|
||
|
||
// 创建混元API请求
|
||
hunyuanReq := HunyuanRequest{
|
||
Stream: true, // 混元API总是使用流式响应
|
||
Model: modelName,
|
||
QueryID: generateQueryID(),
|
||
Messages: apiReq.Messages,
|
||
StreamModeration: true,
|
||
EnableEnhancement: false,
|
||
}
|
||
|
||
// 创建任务
|
||
task := &Task{
|
||
r: r,
|
||
w: w,
|
||
done: make(chan struct{}),
|
||
reqID: reqID,
|
||
isStream: isStream,
|
||
hunyuanReq: hunyuanReq,
|
||
}
|
||
|
||
// 添加到任务队列
|
||
atomic.AddInt64(&queuedRequests, 1)
|
||
submitted, err := workerPool.SubmitTask(task)
|
||
if !submitted {
|
||
atomic.AddInt64(&queuedRequests, -1)
|
||
atomic.AddInt64(&rejectedRequests, 1)
|
||
logError("[reqID:%s] 提交任务失败: %v", reqID, err)
|
||
w.Header().Set("Retry-After", "60")
|
||
http.Error(w, "Server queue is full, please try again later", http.StatusServiceUnavailable)
|
||
return
|
||
}
|
||
|
||
logInfo("[reqID:%s] 任务已提交到队列", reqID)
|
||
|
||
// 等待任务完成或超时
|
||
select {
|
||
case <-task.done:
|
||
// 任务已完成
|
||
logInfo("[reqID:%s] 任务已完成", reqID)
|
||
case <-r.Context().Done():
|
||
// 请求被取消或超时
|
||
logWarn("[reqID:%s] 请求被取消或超时", reqID)
|
||
// 注意:虽然请求被取消,但worker可能仍在处理任务
|
||
}
|
||
|
||
// 请求处理完成,更新指标
|
||
atomic.AddInt64(&queuedRequests, -1)
|
||
elapsed := time.Since(startTime).Milliseconds()
|
||
|
||
// 更新延迟直方图
|
||
bucketIndex := min(int(elapsed/100), 9)
|
||
atomic.AddInt64(&latencyHistogram[bucketIndex], 1)
|
||
|
||
// 更新平均响应时间
|
||
atomic.AddInt64(&avgResponseTime, elapsed)
|
||
|
||
if r.Context().Err() == nil {
|
||
// 成功计数增加
|
||
atomic.AddInt64(&successCounter, 1)
|
||
logInfo("[reqID:%s] 请求处理成功,耗时: %dms", reqID, elapsed)
|
||
} else {
|
||
logError("[reqID:%s] 请求处理失败: %v, 耗时: %dms", reqID, r.Context().Err(), elapsed)
|
||
}
|
||
}
|
||
|
||
// 处理聊天补全请求(原实现,已不使用)
|
||
func handleChatCompletionRequest(w http.ResponseWriter, r *http.Request) {
|
||
reqID := generateRequestID()
|
||
startTime := time.Now()
|
||
logInfo("[reqID:%s] 处理聊天补全请求", reqID)
|
||
|
||
// 解析请求体
|
||
var apiReq APIRequest
|
||
if err := json.NewDecoder(r.Body).Decode(&apiReq); err != nil {
|
||
logError("[reqID:%s] 解析请求失败: %v", reqID, err)
|
||
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||
return
|
||
}
|
||
|
||
// 验证消息格式
|
||
valid, errMsg := validateMessages(apiReq.Messages)
|
||
if !valid {
|
||
logError("[reqID:%s] 消息格式验证失败: %s", reqID, errMsg)
|
||
http.Error(w, errMsg, http.StatusBadRequest)
|
||
return
|
||
}
|
||
|
||
// 是否使用流式处理
|
||
isStream := apiReq.Stream
|
||
|
||
// 确定使用的模型
|
||
modelName := appConfig.ModelName
|
||
if apiReq.Model != "" {
|
||
// 检查请求的模型是否是我们支持的
|
||
if isModelSupported(apiReq.Model) {
|
||
modelName = apiReq.Model
|
||
} else {
|
||
logWarn("[reqID:%s] 请求的模型 %s 不支持,使用默认模型 %s", reqID, apiReq.Model, modelName)
|
||
}
|
||
}
|
||
|
||
logInfo("[reqID:%s] 使用模型: %s", reqID, modelName)
|
||
|
||
// 创建混元API请求
|
||
hunyuanReq := HunyuanRequest{
|
||
Stream: true, // 混元API总是使用流式响应
|
||
Model: modelName,
|
||
QueryID: generateQueryID(),
|
||
Messages: apiReq.Messages,
|
||
StreamModeration: true,
|
||
EnableEnhancement: false,
|
||
}
|
||
|
||
// 转发请求到混元API
|
||
var responseErr error
|
||
if isStream {
|
||
responseErr = handleStreamingRequest(w, r, hunyuanReq, reqID)
|
||
} else {
|
||
responseErr = handleNonStreamingRequest(w, r, hunyuanReq, reqID)
|
||
}
|
||
|
||
// 请求处理完成,更新指标
|
||
elapsed := time.Since(startTime).Milliseconds()
|
||
|
||
// 更新延迟直方图
|
||
bucketIndex := min(int(elapsed/100), 9)
|
||
atomic.AddInt64(&latencyHistogram[bucketIndex], 1)
|
||
|
||
// 更新平均响应时间
|
||
atomic.AddInt64(&avgResponseTime, elapsed)
|
||
|
||
if responseErr == nil {
|
||
// 成功计数增加
|
||
atomic.AddInt64(&successCounter, 1)
|
||
logInfo("[reqID:%s] 请求处理成功,耗时: %dms", reqID, elapsed)
|
||
} else {
|
||
logError("[reqID:%s] 请求处理失败: %v, 耗时: %dms", reqID, responseErr, elapsed)
|
||
}
|
||
}
|
||
|
||
// 安全的HTTP客户端,支持禁用SSL验证
|
||
func getHTTPClient() *http.Client {
|
||
tr := &http.Transport{
|
||
MaxIdleConnsPerHost: 100,
|
||
IdleConnTimeout: 90 * time.Second,
|
||
TLSClientConfig: nil, // 默认配置
|
||
}
|
||
|
||
// 如果配置了禁用SSL验证
|
||
if !appConfig.VerifySSL {
|
||
tr.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
|
||
}
|
||
|
||
return &http.Client{
|
||
Timeout: time.Duration(appConfig.Timeout) * time.Second,
|
||
Transport: tr,
|
||
}
|
||
}
|
||
|
||
// 处理流式请求
|
||
func handleStreamingRequest(w http.ResponseWriter, r *http.Request, hunyuanReq HunyuanRequest, reqID string) error {
|
||
logInfo("[reqID:%s] 处理流式请求", reqID)
|
||
|
||
// 序列化请求
|
||
jsonData, err := json.Marshal(hunyuanReq)
|
||
if err != nil {
|
||
logError("[reqID:%s] 序列化请求失败: %v", reqID, err)
|
||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||
return err
|
||
}
|
||
|
||
// 创建请求
|
||
httpReq, err := http.NewRequestWithContext(r.Context(), "POST", TargetURL, bytes.NewBuffer(jsonData))
|
||
if err != nil {
|
||
logError("[reqID:%s] 创建请求失败: %v", reqID, err)
|
||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||
return err
|
||
}
|
||
|
||
// 设置请求头
|
||
httpReq.Header.Set("Content-Type", "application/json")
|
||
httpReq.Header.Set("Model", hunyuanReq.Model)
|
||
setCommonHeaders(httpReq)
|
||
|
||
// 创建HTTP客户端
|
||
client := getHTTPClient()
|
||
|
||
// 发送请求
|
||
resp, err := client.Do(httpReq)
|
||
if err != nil {
|
||
logError("[reqID:%s] 发送请求失败: %v", reqID, err)
|
||
http.Error(w, "Failed to connect to API", http.StatusBadGateway)
|
||
return err
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
// 检查响应状态
|
||
if resp.StatusCode != http.StatusOK {
|
||
logError("[reqID:%s] API返回非200状态码: %d", reqID, resp.StatusCode)
|
||
|
||
bodyBytes, _ := io.ReadAll(resp.Body)
|
||
logError("[reqID:%s] 错误响应内容: %s", reqID, string(bodyBytes))
|
||
|
||
http.Error(w, fmt.Sprintf("API error with status code: %d", resp.StatusCode), resp.StatusCode)
|
||
return fmt.Errorf("API返回非200状态码: %d", resp.StatusCode)
|
||
}
|
||
|
||
// 设置响应头
|
||
w.Header().Set("Content-Type", "text/event-stream")
|
||
w.Header().Set("Cache-Control", "no-cache")
|
||
w.Header().Set("Connection", "keep-alive")
|
||
|
||
// 创建响应ID和时间戳
|
||
respID := fmt.Sprintf("chatcmpl-%s", getRandomString(10))
|
||
createdTime := time.Now().Unix()
|
||
|
||
// 创建读取器
|
||
reader := bufio.NewReaderSize(resp.Body, 16384)
|
||
|
||
// 创建Flusher
|
||
flusher, ok := w.(http.Flusher)
|
||
if !ok {
|
||
logError("[reqID:%s] Streaming not supported", reqID)
|
||
http.Error(w, "Streaming not supported", http.StatusInternalServerError)
|
||
return fmt.Errorf("streaming not supported")
|
||
}
|
||
|
||
// 发送角色块
|
||
roleChunk := createRoleChunk(respID, createdTime, hunyuanReq.Model)
|
||
w.Write([]byte("data: " + string(roleChunk) + "\n\n"))
|
||
flusher.Flush()
|
||
|
||
// 持续读取响应
|
||
for {
|
||
// 添加超时检测
|
||
select {
|
||
case <-r.Context().Done():
|
||
logWarn("[reqID:%s] 请求超时或被客户端取消", reqID)
|
||
return fmt.Errorf("请求超时或被取消")
|
||
default:
|
||
// 继续处理
|
||
}
|
||
|
||
// 读取一行数据
|
||
line, err := reader.ReadBytes('\n')
|
||
if err != nil {
|
||
if err != io.EOF {
|
||
logError("[reqID:%s] 读取响应出错: %v", reqID, err)
|
||
return err
|
||
}
|
||
break
|
||
}
|
||
|
||
// 处理数据行
|
||
lineStr := string(line)
|
||
if strings.HasPrefix(lineStr, "data: ") {
|
||
jsonStr := strings.TrimPrefix(lineStr, "data: ")
|
||
jsonStr = strings.TrimSpace(jsonStr)
|
||
|
||
// 特殊处理[DONE]消息
|
||
if jsonStr == "[DONE]" {
|
||
logDebug("[reqID:%s] 收到[DONE]消息", reqID)
|
||
w.Write([]byte("data: [DONE]\n\n"))
|
||
flusher.Flush()
|
||
break
|
||
}
|
||
|
||
// 解析混元响应
|
||
var hunyuanResp HunyuanResponse
|
||
if err := json.Unmarshal([]byte(jsonStr), &hunyuanResp); err != nil {
|
||
logWarn("[reqID:%s] 解析JSON失败: %v, data: %s", reqID, err, jsonStr)
|
||
continue
|
||
}
|
||
|
||
// 处理各种类型的内容
|
||
for _, choice := range hunyuanResp.Choices {
|
||
if choice.Delta.Content != "" {
|
||
// 发送内容块
|
||
contentChunk := createContentChunk(respID, createdTime, hunyuanReq.Model, choice.Delta.Content)
|
||
w.Write([]byte("data: " + string(contentChunk) + "\n\n"))
|
||
flusher.Flush()
|
||
}
|
||
|
||
if choice.Delta.ReasoningContent != "" {
|
||
// 发送推理内容块
|
||
reasoningChunk := createReasoningChunk(respID, createdTime, hunyuanReq.Model, choice.Delta.ReasoningContent)
|
||
w.Write([]byte("data: " + string(reasoningChunk) + "\n\n"))
|
||
flusher.Flush()
|
||
}
|
||
|
||
// 处理完成标志
|
||
if choice.FinishReason != nil {
|
||
finishReason := *choice.FinishReason
|
||
if finishReason != "" {
|
||
doneChunk := createDoneChunk(respID, createdTime, hunyuanReq.Model, finishReason)
|
||
w.Write([]byte("data: " + string(doneChunk) + "\n\n"))
|
||
flusher.Flush()
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// 发送结束信号(如果没有正常结束)
|
||
finishReason := "stop"
|
||
doneChunk := createDoneChunk(respID, createdTime, hunyuanReq.Model, finishReason)
|
||
w.Write([]byte("data: " + string(doneChunk) + "\n\n"))
|
||
w.Write([]byte("data: [DONE]\n\n"))
|
||
flusher.Flush()
|
||
|
||
return nil
|
||
}
|
||
|
||
// 处理非流式请求
|
||
func handleNonStreamingRequest(w http.ResponseWriter, r *http.Request, hunyuanReq HunyuanRequest, reqID string) error {
|
||
logInfo("[reqID:%s] 处理非流式请求", reqID)
|
||
|
||
// 序列化请求
|
||
jsonData, err := json.Marshal(hunyuanReq)
|
||
if err != nil {
|
||
logError("[reqID:%s] 序列化请求失败: %v", reqID, err)
|
||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||
return err
|
||
}
|
||
|
||
// 创建请求
|
||
httpReq, err := http.NewRequestWithContext(r.Context(), "POST", TargetURL, bytes.NewBuffer(jsonData))
|
||
if err != nil {
|
||
logError("[reqID:%s] 创建请求失败: %v", reqID, err)
|
||
http.Error(w, "Internal server error", http.StatusInternalServerError)
|
||
return err
|
||
}
|
||
|
||
// 设置请求头
|
||
httpReq.Header.Set("Content-Type", "application/json")
|
||
httpReq.Header.Set("Model", hunyuanReq.Model)
|
||
setCommonHeaders(httpReq)
|
||
|
||
// 创建HTTP客户端
|
||
client := getHTTPClient()
|
||
|
||
// 发送请求
|
||
resp, err := client.Do(httpReq)
|
||
if err != nil {
|
||
logError("[reqID:%s] 发送请求失败: %v", reqID, err)
|
||
http.Error(w, "Failed to connect to API", http.StatusBadGateway)
|
||
return err
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
// 检查响应状态
|
||
if resp.StatusCode != http.StatusOK {
|
||
logError("[reqID:%s] API返回非200状态码: %d", reqID, resp.StatusCode)
|
||
|
||
bodyBytes, _ := io.ReadAll(resp.Body)
|
||
logError("[reqID:%s] 错误响应内容: %s", reqID, string(bodyBytes))
|
||
|
||
http.Error(w, fmt.Sprintf("API error with status code: %d", resp.StatusCode), resp.StatusCode)
|
||
return fmt.Errorf("API返回非200状态码: %d", resp.StatusCode)
|
||
}
|
||
|
||
// 读取完整的流式响应
|
||
bodyBytes, err := io.ReadAll(resp.Body)
|
||
if err != nil {
|
||
logError("[reqID:%s] 读取响应失败: %v", reqID, err)
|
||
http.Error(w, "Failed to read API response", http.StatusInternalServerError)
|
||
return err
|
||
}
|
||
|
||
// 解析流式响应并提取完整内容
|
||
fullContent, reasoningContent, err := extractFullContentFromStream(bodyBytes, reqID)
|
||
if err != nil {
|
||
logError("[reqID:%s] 解析流式响应失败: %v", reqID, err)
|
||
http.Error(w, "Failed to parse streaming response", http.StatusInternalServerError)
|
||
return err
|
||
}
|
||
|
||
// 构建完整的非流式响应
|
||
completionResponse := CompletionResponse{
|
||
ID: fmt.Sprintf("chatcmpl-%s", getRandomString(10)),
|
||
Object: "chat.completion",
|
||
Created: time.Now().Unix(),
|
||
Model: hunyuanReq.Model,
|
||
Choices: []struct {
|
||
Index int `json:"index"`
|
||
FinishReason string `json:"finish_reason"`
|
||
Message struct {
|
||
Role string `json:"role"`
|
||
Content string `json:"content"`
|
||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||
} `json:"message"`
|
||
}{
|
||
{
|
||
Index: 0,
|
||
FinishReason: "stop",
|
||
Message: struct {
|
||
Role string `json:"role"`
|
||
Content string `json:"content"`
|
||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||
}{
|
||
Role: "assistant",
|
||
Content: fullContent,
|
||
ReasoningContent: reasoningContent,
|
||
},
|
||
},
|
||
},
|
||
Usage: struct {
|
||
PromptTokens int `json:"prompt_tokens"`
|
||
CompletionTokens int `json:"completion_tokens"`
|
||
TotalTokens int `json:"total_tokens"`
|
||
}{
|
||
PromptTokens: len(formatMessages(hunyuanReq.Messages)) / 4,
|
||
CompletionTokens: len(fullContent) / 4,
|
||
TotalTokens: (len(formatMessages(hunyuanReq.Messages)) + len(fullContent)) / 4,
|
||
},
|
||
}
|
||
|
||
// 返回响应
|
||
w.Header().Set("Content-Type", "application/json")
|
||
if err := json.NewEncoder(w).Encode(completionResponse); err != nil {
|
||
logError("[reqID:%s] 编码响应失败: %v", reqID, err)
|
||
http.Error(w, "Failed to encode response", http.StatusInternalServerError)
|
||
return err
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// 从流式响应中提取完整内容
|
||
func extractFullContentFromStream(bodyBytes []byte, reqID string) (string, string, error) {
|
||
bodyStr := string(bodyBytes)
|
||
lines := strings.Split(bodyStr, "\n")
|
||
|
||
// 内容累积器
|
||
var contentBuilder strings.Builder
|
||
var reasoningBuilder strings.Builder
|
||
|
||
// 解析每一行
|
||
for _, line := range lines {
|
||
if strings.HasPrefix(line, "data: ") && !strings.Contains(line, "[DONE]") {
|
||
jsonStr := strings.TrimPrefix(line, "data: ")
|
||
jsonStr = strings.TrimSpace(jsonStr)
|
||
|
||
// 解析JSON
|
||
var hunyuanResp HunyuanResponse
|
||
if err := json.Unmarshal([]byte(jsonStr), &hunyuanResp); err != nil {
|
||
continue // 跳过无效JSON
|
||
}
|
||
|
||
// 提取内容和推理内容
|
||
for _, choice := range hunyuanResp.Choices {
|
||
if choice.Delta.Content != "" {
|
||
contentBuilder.WriteString(choice.Delta.Content)
|
||
}
|
||
if choice.Delta.ReasoningContent != "" {
|
||
reasoningBuilder.WriteString(choice.Delta.ReasoningContent)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return contentBuilder.String(), reasoningBuilder.String(), nil
|
||
}
|
||
|
||
// 创建角色块
|
||
func createRoleChunk(id string, created int64, model string) []byte {
|
||
chunk := StreamChunk{
|
||
ID: id,
|
||
Object: "chat.completion.chunk",
|
||
Created: created,
|
||
Model: model,
|
||
Choices: []struct {
|
||
Index int `json:"index"`
|
||
FinishReason *string `json:"finish_reason,omitempty"`
|
||
Delta struct {
|
||
Role string `json:"role,omitempty"`
|
||
Content string `json:"content,omitempty"`
|
||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||
} `json:"delta"`
|
||
}{
|
||
{
|
||
Index: 0,
|
||
Delta: struct {
|
||
Role string `json:"role,omitempty"`
|
||
Content string `json:"content,omitempty"`
|
||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||
}{
|
||
Role: "assistant",
|
||
},
|
||
},
|
||
},
|
||
}
|
||
|
||
data, _ := json.Marshal(chunk)
|
||
return data
|
||
}
|
||
|
||
// 创建内容块
|
||
func createContentChunk(id string, created int64, model string, content string) []byte {
|
||
chunk := StreamChunk{
|
||
ID: id,
|
||
Object: "chat.completion.chunk",
|
||
Created: created,
|
||
Model: model,
|
||
Choices: []struct {
|
||
Index int `json:"index"`
|
||
FinishReason *string `json:"finish_reason,omitempty"`
|
||
Delta struct {
|
||
Role string `json:"role,omitempty"`
|
||
Content string `json:"content,omitempty"`
|
||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||
} `json:"delta"`
|
||
}{
|
||
{
|
||
Index: 0,
|
||
Delta: struct {
|
||
Role string `json:"role,omitempty"`
|
||
Content string `json:"content,omitempty"`
|
||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||
}{
|
||
Content: content,
|
||
},
|
||
},
|
||
},
|
||
}
|
||
|
||
data, _ := json.Marshal(chunk)
|
||
return data
|
||
}
|
||
|
||
// 创建推理内容块
|
||
func createReasoningChunk(id string, created int64, model string, reasoningContent string) []byte {
|
||
chunk := StreamChunk{
|
||
ID: id,
|
||
Object: "chat.completion.chunk",
|
||
Created: created,
|
||
Model: model,
|
||
Choices: []struct {
|
||
Index int `json:"index"`
|
||
FinishReason *string `json:"finish_reason,omitempty"`
|
||
Delta struct {
|
||
Role string `json:"role,omitempty"`
|
||
Content string `json:"content,omitempty"`
|
||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||
} `json:"delta"`
|
||
}{
|
||
{
|
||
Index: 0,
|
||
Delta: struct {
|
||
Role string `json:"role,omitempty"`
|
||
Content string `json:"content,omitempty"`
|
||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||
}{
|
||
ReasoningContent: reasoningContent,
|
||
},
|
||
},
|
||
},
|
||
}
|
||
|
||
data, _ := json.Marshal(chunk)
|
||
return data
|
||
}
|
||
|
||
// 创建完成块
|
||
func createDoneChunk(id string, created int64, model string, reason string) []byte {
|
||
finishReason := reason
|
||
chunk := StreamChunk{
|
||
ID: id,
|
||
Object: "chat.completion.chunk",
|
||
Created: created,
|
||
Model: model,
|
||
Choices: []struct {
|
||
Index int `json:"index"`
|
||
FinishReason *string `json:"finish_reason,omitempty"`
|
||
Delta struct {
|
||
Role string `json:"role,omitempty"`
|
||
Content string `json:"content,omitempty"`
|
||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||
} `json:"delta"`
|
||
}{
|
||
{
|
||
Index: 0,
|
||
FinishReason: &finishReason,
|
||
Delta: struct {
|
||
Role string `json:"role,omitempty"`
|
||
Content string `json:"content,omitempty"`
|
||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||
}{},
|
||
},
|
||
},
|
||
}
|
||
|
||
data, _ := json.Marshal(chunk)
|
||
return data
|
||
}
|
||
|
||
// 设置常见的请求头 - 参考Python版本
|
||
func setCommonHeaders(req *http.Request) {
|
||
req.Header.Set("accept", "*/*")
|
||
req.Header.Set("accept-language", "zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7")
|
||
req.Header.Set("authorization", "Bearer "+appConfig.BearerToken)
|
||
req.Header.Set("dnt", "1")
|
||
req.Header.Set("origin", "https://llm.hunyuan.tencent.com")
|
||
req.Header.Set("polaris", "stream-server-online-sbs-10697")
|
||
req.Header.Set("priority", "u=1, i")
|
||
req.Header.Set("referer", "https://llm.hunyuan.tencent.com/")
|
||
req.Header.Set("sec-ch-ua", "\"Chromium\";v=\"134\", \"Not:A-Brand\";v=\"24\", \"Google Chrome\";v=\"134\"")
|
||
req.Header.Set("sec-ch-ua-mobile", "?0")
|
||
req.Header.Set("sec-ch-ua-platform", "\"Windows\"")
|
||
req.Header.Set("sec-fetch-dest", "empty")
|
||
req.Header.Set("sec-fetch-mode", "cors")
|
||
req.Header.Set("sec-fetch-site", "same-origin")
|
||
req.Header.Set("staffname", "staryxzhang")
|
||
req.Header.Set("wsid", "10697")
|
||
req.Header.Set("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36")
|
||
}
|
||
|
||
// 生成请求ID
|
||
func generateRequestID() string {
|
||
return fmt.Sprintf("%x", time.Now().UnixNano())
|
||
}
|
||
|
||
// 生成随机字符串
|
||
func getRandomString(length int) string {
|
||
const charset = "abcdefghijklmnopqrstuvwxyz0123456789"
|
||
b := make([]byte, length)
|
||
for i := range b {
|
||
b[i] = charset[time.Now().UnixNano()%int64(len(charset))]
|
||
time.Sleep(1 * time.Nanosecond)
|
||
}
|
||
return string(b)
|
||
}
|
||
|
||
// 格式化消息为字符串
|
||
func formatMessages(messages []APIMessage) string {
|
||
var result strings.Builder
|
||
for _, msg := range messages {
|
||
result.WriteString(msg.Role)
|
||
result.WriteString(": ")
|
||
result.WriteString(contentToString(msg.Content))
|
||
result.WriteString("\n")
|
||
}
|
||
return result.String()
|
||
}
|
||
|
||
// 获取两个整数中的最小值
|
||
func min(a, b int) int {
|
||
if a < b {
|
||
return a
|
||
}
|
||
return b
|
||
}
|
||
|
||
// 获取两个整数中的最大值
|
||
func max(a, b int64) int64 {
|
||
if a > b {
|
||
return a
|
||
}
|
||
return b
|
||
} |