support local model

2025-09-26 21:01:19 +08:00 · 2025-02-17 13:41:04 +08:00
parent 09159b926a
commit a9e301150a
177 changed files with 6139 additions and 205 deletions
--- a/app/apinto/driver.go
+++ b/app/apinto/driver.go
@@ -19,6 +19,7 @@ import (
 	"github.com/eolinker/apinto/drivers/ai-provider/moonshot"
 	"github.com/eolinker/apinto/drivers/ai-provider/novita"
 	"github.com/eolinker/apinto/drivers/ai-provider/nvidia"
+	"github.com/eolinker/apinto/drivers/ai-provider/ollama"
 	"github.com/eolinker/apinto/drivers/ai-provider/openAI"
 	"github.com/eolinker/apinto/drivers/ai-provider/openrouter"
 	"github.com/eolinker/apinto/drivers/ai-provider/perfxcloud"
@@ -149,6 +150,7 @@ func driverRegister(extenderRegister eosc.IExtenderDriverRegister) {
 	vertex_ai.Register(extenderRegister)
 	fakegpt.Register(extenderRegister)
 	zhinao.Register(extenderRegister)
+	ollama.Register(extenderRegister)

 	ai_provider.Register(extenderRegister)
 	ai_key.Register(extenderRegister)
--- a/application/auth/aksk/sign.go
+++ b/application/auth/aksk/sign.go
@@ -39,7 +39,7 @@ func buildHexCanonicalRequest(ctx http_service.IHttpContext, signedHeaders []str
 	for i := 0; i < len(queryArgs); i++ {
 		params := strings.Split(queryArgs[i], "=")
 		if len(params) != 2 {
-			//query.Set(params[0], "")
+			//query.SetProvider(params[0], "")
 			continue
 		}
 		query.Set(params[0], params[1])
--- a/convert/manager.go
+++ b/convert/manager.go
@@ -127,60 +127,95 @@ func DelKeyResource(provider string, resourceId string) {
 	keyPoolManager.DelKeySource(provider, resourceId)
 }

-func DelProvider(provider string) {
-	providerManager.Del(provider)
+func DelProvider(id string) {
+	provider := balanceManager.Del(id)
+	if provider != "" {
 		keyPoolManager.Del(provider)
+	}
+
 }

 var (
-	providerManager = NewProviderManager()
+	balanceManager = NewBalanceManager()
 )

-type ProviderManager struct {
+type BalanceManager struct {
 	providers   eosc.Untyped[string, IProvider]
-	providerSorts []IProvider
+	ids         eosc.Untyped[string, eosc.Untyped[string, IProvider]]
+	balances    eosc.Untyped[string, IProvider]
+	balanceSort []IProvider
 }

-func NewProviderManager() *ProviderManager {
-	return &ProviderManager{
+func NewBalanceManager() *BalanceManager {
+	return &BalanceManager{
 		providers: eosc.BuildUntyped[string, IProvider](),
+		balances:  eosc.BuildUntyped[string, IProvider](),
+		ids:       eosc.BuildUntyped[string, eosc.Untyped[string, IProvider]](),
 	}
 }

-func (m *ProviderManager) Set(provider string, p IProvider) {
-	m.providers.Set(provider, p)
-	m.sortProviders()
+func (m *BalanceManager) SetProvider(id string, p IProvider) {
+	m.providers.Set(p.Provider(), p)
+	m.balances.Set(id, p)
+	tmp, has := m.ids.Get(p.Provider())
+	if !has {
+		tmp = eosc.BuildUntyped[string, IProvider]()
+	}
+	tmp.Set(id, p)
+	m.ids.Set(p.Provider(), tmp)
+	m.sortBalances()
 }

-func (m *ProviderManager) Get(provider string) (IProvider, bool) {
+func (m *BalanceManager) Get(provider string) (IProvider, bool) {
 	return m.providers.Get(provider)
 }

-func (m *ProviderManager) sortProviders() {
-	providers := m.providers.List()
-	sort.Slice(providers, func(i, j int) bool {
-		return providers[i].Priority() < providers[j].Priority()
+func (m *BalanceManager) sortBalances() {
+	balances := m.balances.List()
+	tmpBalances := make([]IProvider, 0, len(balances))
+	for _, b := range balances {
+		if b.Priority() == 0 {
+			continue
+		}
+		tmpBalances = append(tmpBalances, b)
+	}
+	sort.Slice(tmpBalances, func(i, j int) bool {
+		return tmpBalances[i].Priority() < tmpBalances[j].Priority()
 	})
-	m.providerSorts = providers
+	m.balanceSort = tmpBalances
 }

-func (m *ProviderManager) Del(provider string) {
-	m.providers.Del(provider)
-	m.sortProviders()
+func (m *BalanceManager) Del(id string) string {
+	p, ok := m.balances.Del(id)
+	if !ok {
+		return ""
+	}
+	tmp, has := m.ids.Get(p.Provider())
+	if !has {
+		return ""
+	}
+	tmp.Del(id)
+	if tmp.Count() < 1 {
+		m.providers.Del(p.Provider())
+		return p.Provider()
+	}
+
+	m.sortBalances()
+	return ""
 }

-func (m *ProviderManager) Providers() []IProvider {
-	return m.providerSorts
+func (m *BalanceManager) Balances() []IProvider {
+	return m.balanceSort
 }

-func Providers() []IProvider {
-	return providerManager.Providers()
+func Balances() []IProvider {
+	return balanceManager.Balances()
 }

-func SetProvider(provider string, p IProvider) {
-	providerManager.Set(provider, p)
+func SetProvider(id string, p IProvider) {
+	balanceManager.SetProvider(id, p)
 }

 func GetProvider(provider string) (IProvider, bool) {
-	return providerManager.Get(provider)
+	return balanceManager.Get(provider)
 }
--- a/convert/provider.go
+++ b/convert/provider.go
@@ -33,7 +33,7 @@ const (

 const (
 	ModeChat       Mode = "chat"
-	ModeComplete Mode = "complete"
+	ModeCompletion Mode = "completion"
 )

 type Mode string
--- a/drivers/.gitignore
+++ b/drivers/.gitignore
@@ -1,2 +1,2 @@
 *.DS_Store
-**/.env
+../.env
--- a/drivers/ai-provider/authropic/llm/claude-3-5-haiku-20241022.yaml
+++ b/drivers/ai-provider/authropic/llm/claude-3-5-haiku-20241022.yaml
@@ -0,0 +1,38 @@
+model: claude-3-5-haiku-20241022
+label:
+  en_US: claude-3-5-haiku-20241022
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '1.00'
+  output: '5.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/authropic/llm/claude-3-5-sonnet-20241022.yaml
+++ b/drivers/ai-provider/authropic/llm/claude-3-5-sonnet-20241022.yaml
@@ -0,0 +1,40 @@
+model: claude-3-5-sonnet-20241022
+label:
+  en_US: claude-3-5-sonnet-20241022
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '3.00'
+  output: '15.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/authropic/mode_test.go
+++ b/drivers/ai-provider/authropic/mode_test.go
@@ -3,15 +3,16 @@ package anthropic
 import (
 	_ "embed"
 	"fmt"
-	"github.com/eolinker/apinto/convert"
-	http_context "github.com/eolinker/apinto/node/http-context"
-	"github.com/joho/godotenv"
-	"github.com/valyala/fasthttp"
 	"net/url"
 	"os"
 	"testing"
 	"time"

+	"github.com/eolinker/apinto/convert"
+	http_context "github.com/eolinker/apinto/node/http-context"
+	"github.com/joho/godotenv"
+	"github.com/valyala/fasthttp"
+
 	ai_provider "github.com/eolinker/apinto/drivers/ai-provider"
 )

@@ -179,7 +180,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req := fasthttp.AcquireRequest()
 	u := fasthttp.AcquireURI()

-	// Set request URI and path
+	// SetProvider request URI and path
 	uri, _ := url.Parse(rawURL)
 	u.SetPath(uri.Path)
 	u.SetScheme(uri.Scheme)
@@ -188,7 +189,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req.SetURI(u)
 	req.Header.SetMethod("POST")

-	// Set headers
+	// SetProvider headers
 	for k, v := range headers {
 		req.Header.Set(k, v)
 	}
--- a/drivers/ai-provider/baichuan/mode_test.go
+++ b/drivers/ai-provider/baichuan/mode_test.go
@@ -3,15 +3,16 @@ package baichuan
 import (
 	_ "embed"
 	"fmt"
-	"github.com/eolinker/apinto/convert"
-	http_context "github.com/eolinker/apinto/node/http-context"
-	"github.com/joho/godotenv"
-	"github.com/valyala/fasthttp"
 	"net/url"
 	"os"
 	"testing"
 	"time"

+	"github.com/eolinker/apinto/convert"
+	http_context "github.com/eolinker/apinto/node/http-context"
+	"github.com/joho/godotenv"
+	"github.com/valyala/fasthttp"
+
 	ai_provider "github.com/eolinker/apinto/drivers/ai-provider"
 )

@@ -179,7 +180,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req := fasthttp.AcquireRequest()
 	u := fasthttp.AcquireURI()

-	// Set request URI and path
+	// SetProvider request URI and path
 	uri, _ := url.Parse(rawURL)
 	u.SetPath(uri.Path)
 	u.SetScheme(uri.Scheme)
@@ -188,7 +189,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req.SetURI(u)
 	req.Header.SetMethod("POST")

-	// Set headers
+	// SetProvider headers
 	for k, v := range headers {
 		req.Header.Set(k, v)
 	}
--- a/drivers/ai-provider/bedrock/llm/ai21.jamba-1-5-large-v1.0.yaml
+++ b/drivers/ai-provider/bedrock/llm/ai21.jamba-1-5-large-v1.0.yaml
@@ -0,0 +1,26 @@
+model: ai21.jamba-1-5-large-v1:0
+label:
+  en_US: Jamba 1.5 Large
+model_type: llm
+model_properties:
+  mode: completion
+  context_size: 256000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 1
+    min: 0.0
+    max: 2.0
+  - name: top_p
+    use_template: top_p
+  - name: max_gen_len
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+pricing:
+  input: '0.002'
+  output: '0.008'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/ai21.jamba-1-5-mini-v1.0.yaml
+++ b/drivers/ai-provider/bedrock/llm/ai21.jamba-1-5-mini-v1.0.yaml
@@ -0,0 +1,26 @@
+model: ai21.jamba-1-5-mini-v1:0
+label:
+  en_US: Jamba 1.5 Mini
+model_type: llm
+model_properties:
+  mode: completion
+  context_size: 256000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 1
+    min: 0.0
+    max: 2.0
+  - name: top_p
+    use_template: top_p
+  - name: max_gen_len
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+pricing:
+  input: '0.0002'
+  output: '0.0004'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/amazon.nova-lite-v1.yaml
+++ b/drivers/ai-provider/bedrock/llm/amazon.nova-lite-v1.yaml
@@ -0,0 +1,53 @@
+model: amazon.nova-lite-v1:0
+label:
+  en_US: Nova Lite V1
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+  - vision
+model_properties:
+  mode: chat
+  context_size: 300000
+parameter_rules:
+  - name: max_new_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 5000
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+  input: '0.00006'
+  output: '0.00024'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/amazon.nova-micro-v1.yaml
+++ b/drivers/ai-provider/bedrock/llm/amazon.nova-micro-v1.yaml
@@ -0,0 +1,52 @@
+model: amazon.nova-micro-v1:0
+label:
+  en_US: Nova Micro V1
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: max_new_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 5000
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+  input: '0.000035'
+  output: '0.00014'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/amazon.nova-pro-v1.yaml
+++ b/drivers/ai-provider/bedrock/llm/amazon.nova-pro-v1.yaml
@@ -0,0 +1,53 @@
+model: amazon.nova-pro-v1:0
+label:
+  en_US: Nova Pro V1
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+  - vision
+model_properties:
+  mode: chat
+  context_size: 300000
+parameter_rules:
+  - name: max_new_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 5000
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+  input: '0.0008'
+  output: '0.0032'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/anthropic.claude-3-5-haiku-v1.yaml
+++ b/drivers/ai-provider/bedrock/llm/anthropic.claude-3-5-haiku-v1.yaml
@@ -0,0 +1,60 @@
+model: anthropic.claude-3-5-haiku-20241022-v1:0
+label:
+  en_US: Claude 3.5 Haiku
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+  # docs: https://docs.anthropic.com/claude/docs/system-prompts
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.001'
+  output: '0.005'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/anthropic.claude-3-sonnet-v2.yaml
+++ b/drivers/ai-provider/bedrock/llm/anthropic.claude-3-sonnet-v2.yaml
@@ -0,0 +1,60 @@
+model: anthropic.claude-3-5-sonnet-20241022-v2:0
+label:
+  en_US: Claude 3.5 Sonnet V2
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.003'
+  output: '0.015'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/eu.anthropic.claude-3-sonnet-v2.yaml
+++ b/drivers/ai-provider/bedrock/llm/eu.anthropic.claude-3-sonnet-v2.yaml
@@ -0,0 +1,60 @@
+model: eu.anthropic.claude-3-5-sonnet-20241022-v2:0
+label:
+  en_US: Claude 3.5 Sonnet V2(EU.Cross Region Inference)
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    type: int
+    default: 4096
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.003'
+  output: '0.015'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/us.amazon.nova-lite-v1.yaml
+++ b/drivers/ai-provider/bedrock/llm/us.amazon.nova-lite-v1.yaml
@@ -0,0 +1,53 @@
+model: us.amazon.nova-lite-v1:0
+label:
+  en_US: Nova Lite V1 (US.Cross Region Inference)
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+  - vision
+model_properties:
+  mode: chat
+  context_size: 300000
+parameter_rules:
+  - name: max_new_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 5000
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+  input: '0.00006'
+  output: '0.00024'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/us.amazon.nova-micro-v1.yaml
+++ b/drivers/ai-provider/bedrock/llm/us.amazon.nova-micro-v1.yaml
@@ -0,0 +1,52 @@
+model: us.amazon.nova-micro-v1:0
+label:
+  en_US: Nova Micro V1 (US.Cross Region Inference)
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: max_new_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 5000
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+  input: '0.000035'
+  output: '0.00014'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/us.amazon.nova-pro-v1.yaml
+++ b/drivers/ai-provider/bedrock/llm/us.amazon.nova-pro-v1.yaml
@@ -0,0 +1,53 @@
+model: us.amazon.nova-pro-v1:0
+label:
+  en_US: Nova Pro V1 (US.Cross Region Inference)
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+  - vision
+model_properties:
+  mode: chat
+  context_size: 300000
+parameter_rules:
+  - name: max_new_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 5000
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+  input: '0.0008'
+  output: '0.0032'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/us.anthropic.claude-3-5-haiku-v1.yaml
+++ b/drivers/ai-provider/bedrock/llm/us.anthropic.claude-3-5-haiku-v1.yaml
@@ -0,0 +1,60 @@
+model: us.anthropic.claude-3-5-haiku-20241022-v1:0
+label:
+  en_US: Claude 3.5 Haiku(US.Cross Region Inference)
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+  # docs: https://docs.anthropic.com/claude/docs/system-prompts
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.001'
+  output: '0.005'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/us.anthropic.claude-3-sonnet-v2.yaml
+++ b/drivers/ai-provider/bedrock/llm/us.anthropic.claude-3-sonnet-v2.yaml
@@ -0,0 +1,60 @@
+model: us.anthropic.claude-3-5-sonnet-20241022-v2:0
+label:
+  en_US: Claude 3.5 Sonnet V2(US.Cross Region Inference)
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.003'
+  output: '0.015'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/us.meta.llama3-2-11b-instruct-v1.0.yaml
+++ b/drivers/ai-provider/bedrock/llm/us.meta.llama3-2-11b-instruct-v1.0.yaml
@@ -0,0 +1,29 @@
+model: us.meta.llama3-2-11b-instruct-v1:0
+label:
+  en_US: US Meta Llama 3.2 11B Instruct
+model_type: llm
+features:
+  - vision
+  - tool-call
+model_properties:
+  mode: completion
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.5
+    min: 0.0
+    max: 1
+  - name: top_p
+    use_template: top_p
+  - name: max_gen_len
+    use_template: max_tokens
+    required: true
+    default: 512
+    min: 1
+    max: 2048
+pricing:
+  input: '0.00035'
+  output: '0.00035'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/us.meta.llama3-2-1b-instruct-v1.0.yaml
+++ b/drivers/ai-provider/bedrock/llm/us.meta.llama3-2-1b-instruct-v1.0.yaml
@@ -0,0 +1,26 @@
+model: us.meta.llama3-2-1b-instruct-v1:0
+label:
+  en_US: US Meta Llama 3.2 1B Instruct
+model_type: llm
+model_properties:
+  mode: completion
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.5
+    min: 0.0
+    max: 1
+  - name: top_p
+    use_template: top_p
+  - name: max_gen_len
+    use_template: max_tokens
+    required: true
+    default: 512
+    min: 1
+    max: 2048
+pricing:
+  input: '0.0001'
+  output: '0.0001'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/us.meta.llama3-2-3b-instruct-v1.0.yaml
+++ b/drivers/ai-provider/bedrock/llm/us.meta.llama3-2-3b-instruct-v1.0.yaml
@@ -0,0 +1,26 @@
+model: us.meta.llama3-2-3b-instruct-v1:0
+label:
+  en_US: US Meta Llama 3.2 3B Instruct
+model_type: llm
+model_properties:
+  mode: completion
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.5
+    min: 0.0
+    max: 1
+  - name: top_p
+    use_template: top_p
+  - name: max_gen_len
+    use_template: max_tokens
+    required: true
+    default: 512
+    min: 1
+    max: 2048
+pricing:
+  input: '0.00015'
+  output: '0.00015'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/llm/us.meta.llama3-2-90b-instruct-v1.0.yaml
+++ b/drivers/ai-provider/bedrock/llm/us.meta.llama3-2-90b-instruct-v1.0.yaml
@@ -0,0 +1,31 @@
+model: us.meta.llama3-2-90b-instruct-v1:0
+label:
+  en_US: US Meta Llama 3.2 90B Instruct
+model_type: llm
+features:
+  - tool-call
+model_properties:
+  mode: completion
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.5
+    min: 0.0
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 0.9
+    min: 0
+    max: 1
+  - name: max_gen_len
+    use_template: max_tokens
+    required: true
+    default: 512
+    min: 1
+    max: 2048
+pricing:
+  input: '0.002'
+  output: '0.002'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/bedrock/mode.go
+++ b/drivers/ai-provider/bedrock/mode.go
@@ -16,6 +16,7 @@ type FNewModelMode func(string) IModelMode
 var (
 	modelModes = map[string]FNewModelMode{
 		convert.ModeChat.String():       NewChat,
+		convert.ModeCompletion.String(): NewChat,
 	}
 )

--- a/drivers/ai-provider/cohere/mode_test.go
+++ b/drivers/ai-provider/cohere/mode_test.go
@@ -178,7 +178,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req := fasthttp.AcquireRequest()
 	u := fasthttp.AcquireURI()

-	// Set request URI and path
+	// SetProvider request URI and path
 	uri, _ := url.Parse(rawURL)
 	u.SetPath(uri.Path)
 	u.SetScheme(uri.Scheme)
@@ -187,7 +187,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req.SetURI(u)
 	req.Header.SetMethod("POST")

-	// Set headers
+	// SetProvider headers
 	for k, v := range headers {
 		req.Header.Set(k, v)
 	}
--- a/drivers/ai-provider/deepseek/deepseek_test.go
+++ b/drivers/ai-provider/deepseek/deepseek_test.go
@@ -208,7 +208,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req := fasthttp.AcquireRequest()
 	u := fasthttp.AcquireURI()

-	// Set request URI and path
+	// SetProvider request URI and path
 	uri, _ := url.Parse(rawURL)
 	u.SetPath(uri.Path)
 	u.SetScheme(uri.Scheme)
@@ -217,7 +217,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req.SetURI(u)
 	req.Header.SetMethod("POST")

-	// Set headers
+	// SetProvider headers
 	for k, v := range headers {
 		req.Header.Set(k, v)
 	}
--- a/drivers/ai-provider/deepseek/llm/deepseek-reasoner.yaml
+++ b/drivers/ai-provider/deepseek/llm/deepseek-reasoner.yaml
@@ -0,0 +1,21 @@
+model: deepseek-reasoner
+label:
+  zh_Hans: deepseek-reasoner
+  en_US: deepseek-reasoner
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 64000
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 8192
+    default: 4096
+pricing:
+  input: "4"
+  output: "16"
+  unit: "0.000001"
+  currency: RMB
--- a/drivers/ai-provider/executor.go
+++ b/drivers/ai-provider/executor.go
@@ -98,12 +98,12 @@ func (e *executor) reset(cfg *Config) error {
 	e.provider = cfg.Provider
 	e.modelConfig = extender
 	e.disable = false
-	convert.SetProvider(cfg.Provider, e)
+	convert.SetProvider(e.Id(), e)
 	return nil
 }

 func (e *executor) Stop() error {
-	convert.DelProvider(e.provider)
+	convert.DelProvider(e.Id())
 	return nil
 }

--- a/drivers/ai-provider/fireworks/fireworks_test.go
+++ b/drivers/ai-provider/fireworks/fireworks_test.go
@@ -208,7 +208,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req := fasthttp.AcquireRequest()
 	u := fasthttp.AcquireURI()

-	// Set request URI and path
+	// SetProvider request URI and path
 	uri, _ := url.Parse(rawURL)
 	u.SetPath(uri.Path)
 	u.SetScheme(uri.Scheme)
@@ -217,7 +217,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req.SetURI(u)
 	req.Header.SetMethod("POST")

-	// Set headers
+	// SetProvider headers
 	for k, v := range headers {
 		req.Header.Set(k, v)
 	}
--- a/drivers/ai-provider/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
+++ b/drivers/ai-provider/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+label:
+  zh_Hans: Llama 3.2 11B Vision Instruct
+  en_US: Llama 3.2 11B Vision Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.2'
+  output: '0.2'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/fireworks/llm/llama-v3p2-1b-instruct.yaml
+++ b/drivers/ai-provider/fireworks/llm/llama-v3p2-1b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-1b-instruct
+label:
+  zh_Hans: Llama 3.2 1B Instruct
+  en_US: Llama 3.2 1B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.1'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/fireworks/llm/llama-v3p2-3b-instruct.yaml
+++ b/drivers/ai-provider/fireworks/llm/llama-v3p2-3b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-3b-instruct
+label:
+  zh_Hans: Llama 3.2 3B Instruct
+  en_US: Llama 3.2 3B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.1'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
+++ b/drivers/ai-provider/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+label:
+  zh_Hans: Llama 3.2 90B Vision Instruct
+  en_US: Llama 3.2 90B Vision Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.9'
+  output: '0.9'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/fireworks/llm/qwen2p5-72b-instruct.yaml
+++ b/drivers/ai-provider/fireworks/llm/qwen2p5-72b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/qwen2p5-72b-instruct
+label:
+  zh_Hans: Qwen2.5 72B Instruct
+  en_US: Qwen2.5 72B Instruct
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+  - name: max_tokens
+    use_template: max_tokens
+  - name: context_length_exceeded_behavior
+    default: None
+    label:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    help:
+      zh_Hans: 上下文长度超出行为
+      en_US: Context Length Exceeded Behavior
+    type: string
+    options:
+      - None
+      - truncate
+      - error
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.9'
+  output: '0.9'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-1.5-flash-001.yaml
+++ b/drivers/ai-provider/google/llm/gemini-1.5-flash-001.yaml
@@ -0,0 +1,41 @@
+model: gemini-1.5-flash-001
+label:
+  en_US: Gemini 1.5 Flash 001
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-1.5-flash-002.yaml
+++ b/drivers/ai-provider/google/llm/gemini-1.5-flash-002.yaml
@@ -0,0 +1,41 @@
+model: gemini-1.5-flash-002
+label:
+  en_US: Gemini 1.5 Flash 002
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
+++ b/drivers/ai-provider/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
@@ -0,0 +1,41 @@
+model: gemini-1.5-flash-8b-exp-0924
+label:
+  en_US: Gemini 1.5 Flash 8B 0924
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-1.5-flash.yaml
+++ b/drivers/ai-provider/google/llm/gemini-1.5-flash.yaml
@@ -0,0 +1,41 @@
+model: gemini-1.5-flash
+label:
+  en_US: Gemini 1.5 Flash
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-1.5-pro-001.yaml
+++ b/drivers/ai-provider/google/llm/gemini-1.5-pro-001.yaml
@@ -0,0 +1,41 @@
+model: gemini-1.5-pro-001
+label:
+  en_US: Gemini 1.5 Pro 001
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-1.5-pro-002.yaml
+++ b/drivers/ai-provider/google/llm/gemini-1.5-pro-002.yaml
@@ -0,0 +1,41 @@
+model: gemini-1.5-pro-002
+label:
+  en_US: Gemini 1.5 Pro 002
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-1.5-pro.yaml
+++ b/drivers/ai-provider/google/llm/gemini-1.5-pro.yaml
@@ -0,0 +1,41 @@
+model: gemini-1.5-pro
+label:
+  en_US: Gemini 1.5 Pro
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-2.0-flash-001.yaml
+++ b/drivers/ai-provider/google/llm/gemini-2.0-flash-001.yaml
@@ -0,0 +1,41 @@
+model: gemini-2.0-flash-001
+label:
+  en_US: Gemini 2.0 Flash 001
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-2.0-flash-exp.yaml
+++ b/drivers/ai-provider/google/llm/gemini-2.0-flash-exp.yaml
@@ -0,0 +1,41 @@
+model: gemini-2.0-flash-exp
+label:
+  en_US: Gemini 2.0 Flash Exp
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-2.0-flash-lite-preview-02-05.yaml
+++ b/drivers/ai-provider/google/llm/gemini-2.0-flash-lite-preview-02-05.yaml
@@ -0,0 +1,41 @@
+model: gemini-2.0-flash-lite-preview-02-05
+label:
+  en_US: Gemini 2.0 Flash Lite Preview 0205
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-2.0-flash-thinking-exp-01-21.yaml
+++ b/drivers/ai-provider/google/llm/gemini-2.0-flash-thinking-exp-01-21.yaml
@@ -0,0 +1,39 @@
+model: gemini-2.0-flash-thinking-exp-01-21
+label:
+  en_US: Gemini 2.0 Flash Thinking Exp 01-21
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 32767
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-2.0-flash-thinking-exp-1219.yaml
+++ b/drivers/ai-provider/google/llm/gemini-2.0-flash-thinking-exp-1219.yaml
@@ -0,0 +1,39 @@
+model: gemini-2.0-flash-thinking-exp-1219
+label:
+  en_US: Gemini 2.0 Flash Thinking Exp 1219
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 32767
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-2.0-pro-exp-02-05.yaml
+++ b/drivers/ai-provider/google/llm/gemini-2.0-pro-exp-02-05.yaml
@@ -0,0 +1,41 @@
+model: gemini-2.0-pro-exp-02-05
+label:
+  en_US: Gemini 2.0 pro exp 02-05
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-exp-1114.yaml
+++ b/drivers/ai-provider/google/llm/gemini-exp-1114.yaml
@@ -0,0 +1,41 @@
+model: gemini-exp-1114
+label:
+  en_US: Gemini exp 1114
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 32767
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-exp-1121.yaml
+++ b/drivers/ai-provider/google/llm/gemini-exp-1121.yaml
@@ -0,0 +1,41 @@
+model: gemini-exp-1121
+label:
+  en_US: Gemini exp 1121
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 32767
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/gemini-exp-1206.yaml
+++ b/drivers/ai-provider/google/llm/gemini-exp-1206.yaml
@@ -0,0 +1,41 @@
+model: gemini-exp-1206
+label:
+  en_US: Gemini exp 1206
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 2097152
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/llm/learnlm-1.5-pro-experimental.yaml
+++ b/drivers/ai-provider/google/llm/learnlm-1.5-pro-experimental.yaml
@@ -0,0 +1,41 @@
+model: learnlm-1.5-pro-experimental
+label:
+  en_US: LearnLM 1.5 Pro Experimental
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 32767
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/google/mode_test.go
+++ b/drivers/ai-provider/google/mode_test.go
@@ -178,7 +178,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req := fasthttp.AcquireRequest()
 	u := fasthttp.AcquireURI()

-	// Set request URI and path
+	// SetProvider request URI and path
 	uri, _ := url.Parse(rawURL)
 	u.SetPath(uri.Path)
 	u.SetScheme(uri.Scheme)
@@ -187,7 +187,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req.SetURI(u)
 	req.Header.SetMethod("POST")

-	// Set headers
+	// SetProvider headers
 	for k, v := range headers {
 		req.Header.Set(k, v)
 	}
--- a/drivers/ai-provider/groq/converter.go
+++ b/drivers/ai-provider/groq/converter.go
@@ -75,6 +75,7 @@ func (e *converterDriver) GetModel(model string) (convert.FGenerateConfig, bool)
 			result["response_format"] = map[string]interface{}{
 				"type": modelCfg.ResponseFormat,
 			}
+			result["stream"] = false
 		}
 		return result, nil
 	}, true
--- a/drivers/ai-provider/groq/llm/deepseek-r1-distill-llama-70b.yaml
+++ b/drivers/ai-provider/groq/llm/deepseek-r1-distill-llama-70b.yaml
@@ -0,0 +1,36 @@
+model: deepseek-r1-distill-llama-70b
+label:
+  en_US: DeepSeek R1 Distill Llama 70b
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '3.00'
+  output: '3.00'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/groq/llm/gemma-7b-it.yaml
+++ b/drivers/ai-provider/groq/llm/gemma-7b-it.yaml
@@ -0,0 +1,37 @@
+model: gemma-7b-it
+label:
+  zh_Hans: Gemma 7B Instruction Tuned
+  en_US: Gemma 7B Instruction Tuned
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/groq/llm/gemma2-9b-it.yaml
+++ b/drivers/ai-provider/groq/llm/gemma2-9b-it.yaml
@@ -0,0 +1,37 @@
+model: gemma2-9b-it
+label:
+  zh_Hans: Gemma 2 9B Instruction Tuned
+  en_US: Gemma 2 9B Instruction Tuned
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/groq/llm/llama-3.2-11b-text-preview.yaml
+++ b/drivers/ai-provider/groq/llm/llama-3.2-11b-text-preview.yaml
@@ -0,0 +1,38 @@
+model: llama-3.2-11b-text-preview
+deprecated: true
+label:
+  zh_Hans: Llama 3.2 11B Text (Preview)
+  en_US: Llama 3.2 11B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/groq/llm/llama-3.2-11b-vision-preview.yaml
+++ b/drivers/ai-provider/groq/llm/llama-3.2-11b-vision-preview.yaml
@@ -0,0 +1,38 @@
+model: llama-3.2-11b-vision-preview
+label:
+  zh_Hans: Llama 3.2 11B Vision (Preview)
+  en_US: Llama 3.2 11B Vision (Preview)
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/groq/llm/llama-3.2-1b-preview.yaml
+++ b/drivers/ai-provider/groq/llm/llama-3.2-1b-preview.yaml
@@ -0,0 +1,37 @@
+model: llama-3.2-1b-preview
+label:
+  zh_Hans: Llama 3.2 1B Text (Preview)
+  en_US: Llama 3.2 1B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/groq/llm/llama-3.2-3b-preview.yaml
+++ b/drivers/ai-provider/groq/llm/llama-3.2-3b-preview.yaml
@@ -0,0 +1,37 @@
+model: llama-3.2-3b-preview
+label:
+  zh_Hans: Llama 3.2 3B Text (Preview)
+  en_US: Llama 3.2 3B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/groq/llm/llama-3.2-90b-text-preview.yaml
+++ b/drivers/ai-provider/groq/llm/llama-3.2-90b-text-preview.yaml
@@ -0,0 +1,38 @@
+model: llama-3.2-90b-text-preview
+depraceted: true
+label:
+  zh_Hans: Llama 3.2 90B Text (Preview)
+  en_US: Llama 3.2 90B Text (Preview)
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/groq/llm/llama-3.2-90b-vision-preview.yaml
+++ b/drivers/ai-provider/groq/llm/llama-3.2-90b-vision-preview.yaml
@@ -0,0 +1,38 @@
+model: llama-3.2-90b-vision-preview
+label:
+  zh_Hans: Llama 3.2 90B Vision (Preview)
+  en_US: Llama 3.2 90B Vision (Preview)
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '0.05'
+  output: '0.1'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/groq/llm/llama-3.3-70b-specdec.yaml
+++ b/drivers/ai-provider/groq/llm/llama-3.3-70b-specdec.yaml
@@ -0,0 +1,38 @@
+model: llama-3.3-70b-specdec
+label:
+  zh_Hans: Llama 3.3 70B Specdec
+  en_US: Llama 3.3 70B Specdec
+model_type: llm
+features:
+  - agent-thought
+  - multi-tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 32768
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: "0.05"
+  output: "0.1"
+  unit: "0.000001"
+  currency: USD
--- a/drivers/ai-provider/groq/llm/llama-3.3-70b-versatile.yaml
+++ b/drivers/ai-provider/groq/llm/llama-3.3-70b-versatile.yaml
@@ -0,0 +1,38 @@
+model: llama-3.3-70b-versatile
+label:
+  zh_Hans: Llama 3.3 70B Versatile
+  en_US: Llama 3.3 70B Versatile
+model_type: llm
+features:
+  - agent-thought
+  - multi-tool-call
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 32768
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: "0.05"
+  output: "0.1"
+  unit: "0.000001"
+  currency: USD
--- a/drivers/ai-provider/groq/llm/llama-guard-3-8b.yaml
+++ b/drivers/ai-provider/groq/llm/llama-guard-3-8b.yaml
@@ -0,0 +1,37 @@
+model: llama-guard-3-8b
+label:
+  zh_Hans: Llama-Guard-3-8B
+  en_US: Llama-Guard-3-8B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '0.20'
+  output: '0.20'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml
+++ b/drivers/ai-provider/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml
@@ -0,0 +1,38 @@
+model: llama3-groq-70b-8192-tool-use-preview
+label:
+  zh_Hans: Llama3-groq-70b-8192-tool-use (PREVIEW)
+  en_US: Llama3-groq-70b-8192-tool-use (PREVIEW)
+model_type: llm
+features:
+  - agent-thought
+  - multi-tool-call
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '0.05'
+  output: '0.08'
+  unit: '0.000001'
+  currency: USD
--- a/drivers/ai-provider/groq/mode_test.go
+++ b/drivers/ai-provider/groq/mode_test.go
@@ -3,17 +3,16 @@ package groq
 import (
 	_ "embed"
 	"fmt"
-	"github.com/eolinker/apinto/convert"
-	http_context "github.com/eolinker/apinto/node/http-context"
-	"github.com/joho/godotenv"
-	"github.com/valyala/fasthttp"
 	"net"
 	"net/url"
 	"os"
 	"testing"
 	"time"

-	ai_provider "github.com/eolinker/apinto/drivers/ai-provider"
+	"github.com/eolinker/apinto/convert"
+	http_context "github.com/eolinker/apinto/node/http-context"
+	"github.com/joho/godotenv"
+	"github.com/valyala/fasthttp"
 )

 var (
@@ -54,24 +53,24 @@ func TestSentTo(t *testing.T) {
 		{
 			name:       "success",
 			apiKey:     os.Getenv("GROQ_VALID_API_KEY"),
-			wantStatus: ai_provider.StatusNormal,
+			wantStatus: convert.StatusNormal,
 			body:       successBody,
 		},
 		{
 			name:       "invalid request",
 			apiKey:     os.Getenv("GROQ_VALID_API_KEY"),
-			wantStatus: ai_provider.StatusInvalidRequest,
+			wantStatus: convert.StatusInvalidRequest,
 			body:       failBody,
 		},
 		{
 			name:       "invalid key",
 			apiKey:     os.Getenv("GROQ_INVALID_API_KEY"),
-			wantStatus: ai_provider.StatusInvalid,
+			wantStatus: convert.StatusInvalid,
 		},
 		{
 			name:       "expired key",
 			apiKey:     os.Getenv("GROQ_EXPIRE_API_KEY"),
-			wantStatus: ai_provider.StatusInvalid,
+			wantStatus: convert.StatusInvalid,
 		},
 	}

@@ -112,14 +111,14 @@ func runTest(apiKey string, requestBody []byte, wantStatus string) error {
 	ctx := createMockHttpContext("/openai/v1/chat/completions", nil, nil, requestBody)

 	// Execute the conversion process
-	err = executeConverter(ctx, handler, "llama3-8b-8192", baseDomain)
+	err = executeConverter(ctx, handler, "llama3-70b-8192", baseDomain)
 	if err != nil {
 		return fmt.Errorf("failed to execute conversion process: %w", err)
 	}

 	// Check the status
-	if ai_provider.GetAIStatus(ctx) != wantStatus {
-		return fmt.Errorf("unexpected status: got %s, expected %s", ai_provider.GetAIStatus(ctx), wantStatus)
+	if convert.GetAIStatus(ctx) != wantStatus {
+		return fmt.Errorf("unexpected status: got %s, expected %s", convert.GetAIStatus(ctx), wantStatus)
 	}

 	return nil
@@ -128,7 +127,7 @@ func runTest(apiKey string, requestBody []byte, wantStatus string) error {
 // executeConverter handles the full flow of a conversion process.
 func executeConverter(ctx *http_context.HttpContext, handler convert.IConverterDriver, model string, baseUrl string) error {
 	// Balance handler setup
-	balanceHandler, err := ai_provider.NewBalanceHandler("test", baseUrl, 30*time.Second)
+	balanceHandler, err := convert.NewBalanceHandler("test", baseUrl, 30*time.Second)
 	if err != nil {
 		return fmt.Errorf("failed to create balance handler: %w", err)
 	}
@@ -180,7 +179,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req := fasthttp.AcquireRequest()
 	u := fasthttp.AcquireURI()

-	// Set request URI and path
+	// SetProvider request URI and path
 	uri, _ := url.Parse(rawURL)
 	u.SetPath(uri.Path)
 	u.SetScheme(uri.Scheme)
@@ -189,7 +188,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req.SetURI(u)
 	req.Header.SetMethod("POST")

-	// Set headers
+	// SetProvider headers
 	for k, v := range headers {
 		req.Header.Set(k, v)
 	}
--- a/drivers/ai-provider/hunyuan/hunyuan_test.go
+++ b/drivers/ai-provider/hunyuan/hunyuan_test.go
@@ -213,7 +213,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req := fasthttp.AcquireRequest()
 	u := fasthttp.AcquireURI()

-	// Set request URI and path
+	// SetProvider request URI and path
 	uri, _ := url.Parse(rawURL)
 	u.SetPath(uri.Path)
 	u.SetScheme(uri.Scheme)
@@ -222,7 +222,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req.SetURI(u)
 	req.Header.SetMethod("POST")

-	// Set headers
+	// SetProvider headers
 	for k, v := range headers {
 		req.Header.Set(k, v)
 	}
--- a/drivers/ai-provider/hunyuan/llm/hunyuan-functioncall.yaml
+++ b/drivers/ai-provider/hunyuan/llm/hunyuan-functioncall.yaml
@@ -0,0 +1,38 @@
+model: hunyuan-functioncall
+label:
+  zh_Hans: hunyuan-functioncall
+  en_US: hunyuan-functioncall
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - multi-tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 32000
+  - name: enable_enhance
+    label:
+      zh_Hans: 功能增强
+      en_US: Enable Enhancement
+    type: boolean
+    help:
+      zh_Hans: 功能增强（如搜索）开关，关闭时将直接由主模型生成回复内容，可以降低响应时延（对于流式输出时的首字时延尤为明显）。但在少数场景里，回复效果可能会下降。
+      en_US: Allow the model to perform external search to enhance the generation results.
+    required: false
+    default: true
+pricing:
+  input: '0.004'
+  output: '0.008'
+  unit: '0.001'
+  currency: RMB
--- a/drivers/ai-provider/hunyuan/llm/hunyuan-large-longcontext.yaml
+++ b/drivers/ai-provider/hunyuan/llm/hunyuan-large-longcontext.yaml
@@ -0,0 +1,38 @@
+model: hunyuan-large-longcontext
+label:
+  zh_Hans: hunyuan-large-longcontext
+  en_US: hunyuan-large-longcontext
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - multi-tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 134000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 134000
+  - name: enable_enhance
+    label:
+      zh_Hans: 功能增强
+      en_US: Enable Enhancement
+    type: boolean
+    help:
+      zh_Hans: 功能增强（如搜索）开关，关闭时将直接由主模型生成回复内容，可以降低响应时延（对于流式输出时的首字时延尤为明显）。但在少数场景里，回复效果可能会下降。
+      en_US: Allow the model to perform external search to enhance the generation results.
+    required: false
+    default: true
+pricing:
+  input: '0.006'
+  output: '0.018'
+  unit: '0.001'
+  currency: RMB
--- a/drivers/ai-provider/hunyuan/llm/hunyuan-large-role.yaml
+++ b/drivers/ai-provider/hunyuan/llm/hunyuan-large-role.yaml
@@ -0,0 +1,38 @@
+model: hunyuan-large-role
+label:
+  zh_Hans: hunyuan-large-role
+  en_US: hunyuan-large-role
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - multi-tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 32000
+  - name: enable_enhance
+    label:
+      zh_Hans: 功能增强
+      en_US: Enable Enhancement
+    type: boolean
+    help:
+      zh_Hans: 功能增强（如搜索）开关，关闭时将直接由主模型生成回复内容，可以降低响应时延（对于流式输出时的首字时延尤为明显）。但在少数场景里，回复效果可能会下降。
+      en_US: Allow the model to perform external search to enhance the generation results.
+    required: false
+    default: true
+pricing:
+  input: '0.004'
+  output: '0.008'
+  unit: '0.001'
+  currency: RMB
--- a/drivers/ai-provider/hunyuan/llm/hunyuan-large.yaml
+++ b/drivers/ai-provider/hunyuan/llm/hunyuan-large.yaml
@@ -0,0 +1,38 @@
+model: hunyuan-large
+label:
+  zh_Hans: hunyuan-large
+  en_US: hunyuan-large
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - multi-tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 32000
+  - name: enable_enhance
+    label:
+      zh_Hans: 功能增强
+      en_US: Enable Enhancement
+    type: boolean
+    help:
+      zh_Hans: 功能增强（如搜索）开关，关闭时将直接由主模型生成回复内容，可以降低响应时延（对于流式输出时的首字时延尤为明显）。但在少数场景里，回复效果可能会下降。
+      en_US: Allow the model to perform external search to enhance the generation results.
+    required: false
+    default: true
+pricing:
+  input: '0.004'
+  output: '0.012'
+  unit: '0.001'
+  currency: RMB
--- a/drivers/ai-provider/hunyuan/llm/hunyuan-role.yaml
+++ b/drivers/ai-provider/hunyuan/llm/hunyuan-role.yaml
@@ -0,0 +1,38 @@
+model: hunyuan-role
+label:
+  zh_Hans: hunyuan-role
+  en_US: hunyuan-role
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - multi-tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 32000
+  - name: enable_enhance
+    label:
+      zh_Hans: 功能增强
+      en_US: Enable Enhancement
+    type: boolean
+    help:
+      zh_Hans: 功能增强（如搜索）开关，关闭时将直接由主模型生成回复内容，可以降低响应时延（对于流式输出时的首字时延尤为明显）。但在少数场景里，回复效果可能会下降。
+      en_US: Allow the model to perform external search to enhance the generation results.
+    required: false
+    default: true
+pricing:
+  input: '0.004'
+  output: '0.008'
+  unit: '0.001'
+  currency: RMB
--- a/drivers/ai-provider/hunyuan/llm/hunyuan-turbo-latest.yaml
+++ b/drivers/ai-provider/hunyuan/llm/hunyuan-turbo-latest.yaml
@@ -0,0 +1,38 @@
+model: hunyuan-turbo-latest
+label:
+  zh_Hans: hunyuan-turbo-latest
+  en_US: hunyuan-turbo-latest
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - multi-tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 32000
+  - name: enable_enhance
+    label:
+      zh_Hans: 功能增强
+      en_US: Enable Enhancement
+    type: boolean
+    help:
+      zh_Hans: 功能增强（如搜索）开关，关闭时将直接由主模型生成回复内容，可以降低响应时延（对于流式输出时的首字时延尤为明显）。但在少数场景里，回复效果可能会下降。
+      en_US: Allow the model to perform external search to enhance the generation results.
+    required: false
+    default: true
+pricing:
+  input: '0.015'
+  output: '0.05'
+  unit: '0.001'
+  currency: RMB
--- a/drivers/ai-provider/hunyuan/llm/hunyuan-turbo-vision.yaml
+++ b/drivers/ai-provider/hunyuan/llm/hunyuan-turbo-vision.yaml
@@ -0,0 +1,39 @@
+model: hunyuan-turbo-vision
+label:
+  zh_Hans: hunyuan-turbo-vision
+  en_US: hunyuan-turbo-vision
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - multi-tool-call
+  - stream-tool-call
+  - vision
+model_properties:
+  mode: chat
+  context_size: 8000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 8000
+  - name: enable_enhance
+    label:
+      zh_Hans: 功能增强
+      en_US: Enable Enhancement
+    type: boolean
+    help:
+      zh_Hans: 功能增强（如搜索）开关，关闭时将直接由主模型生成回复内容，可以降低响应时延（对于流式输出时的首字时延尤为明显）。但在少数场景里，回复效果可能会下降。
+      en_US: Allow the model to perform external search to enhance the generation results.
+    required: false
+    default: true
+pricing:
+  input: '0.08'
+  output: '0.08'
+  unit: '0.001'
+  currency: RMB
--- a/drivers/ai-provider/minimax/llm/abab6.5t-chat.yaml
+++ b/drivers/ai-provider/minimax/llm/abab6.5t-chat.yaml
@@ -0,0 +1,44 @@
+model: abab6.5t-chat
+label:
+  en_US: Abab6.5t-Chat
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0.01
+    max: 1
+    default: 0.9
+  - name: top_p
+    use_template: top_p
+    min: 0.01
+    max: 1
+    default: 0.95
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 3072
+    min: 1
+    max: 8192
+  - name: mask_sensitive_info
+    type: boolean
+    default: true
+    label:
+      zh_Hans: 隐私保护
+      en_US: Moderate
+    help:
+      zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码，目前包括但不限于邮箱、域名、链接、证件号、家庭住址等，默认true，即开启打码
+      en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '0.005'
+  output: '0.005'
+  unit: '0.001'
+  currency: RMB
--- a/drivers/ai-provider/minimax/llm/abab7-chat-preview.yaml
+++ b/drivers/ai-provider/minimax/llm/abab7-chat-preview.yaml
@@ -0,0 +1,46 @@
+model: abab7-chat-preview
+label:
+  en_US: Abab7-chat-preview
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 245760
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0.01
+    max: 1
+    default: 0.1
+  - name: top_p
+    use_template: top_p
+    min: 0.01
+    max: 1
+    default: 0.95
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 245760
+  - name: mask_sensitive_info
+    type: boolean
+    default: true
+    label:
+      zh_Hans: 隐私保护
+      en_US: Moderate
+    help:
+      zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码，目前包括但不限于邮箱、域名、链接、证件号、家庭住址等，默认true，即开启打码
+      en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '0.1'
+  output: '0.1'
+  unit: '0.001'
+  currency: RMB
--- a/drivers/ai-provider/minimax/llm/minimax-text-01.yaml
+++ b/drivers/ai-provider/minimax/llm/minimax-text-01.yaml
@@ -0,0 +1,46 @@
+model: minimax-text-01
+label:
+  en_US: Minimax-Text-01
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 1000192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0.01
+    max: 1
+    default: 0.1
+  - name: top_p
+    use_template: top_p
+    min: 0.01
+    max: 1
+    default: 0.95
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 2048
+    min: 1
+    max: 1000192
+  - name: mask_sensitive_info
+    type: boolean
+    default: true
+    label:
+      zh_Hans: 隐私保护
+      en_US: Moderate
+    help:
+      zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码，目前包括但不限于邮箱、域名、链接、证件号、家庭住址等，默认true，即开启打码
+      en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+pricing:
+  input: '0.001'
+  output: '0.008'
+  unit: '0.001'
+  currency: RMB
--- a/drivers/ai-provider/minimax/mode_test.go
+++ b/drivers/ai-provider/minimax/mode_test.go
@@ -178,7 +178,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req := fasthttp.AcquireRequest()
 	u := fasthttp.AcquireURI()

-	// Set request URI and path
+	// SetProvider request URI and path
 	uri, _ := url.Parse(rawURL)
 	u.SetPath(uri.Path)
 	u.SetScheme(uri.Scheme)
@@ -187,7 +187,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req.SetURI(u)
 	req.Header.SetMethod("POST")

-	// Set headers
+	// SetProvider headers
 	for k, v := range headers {
 		req.Header.Set(k, v)
 	}
--- a/drivers/ai-provider/mistralai/llm/pixtral-large-2411.yaml
+++ b/drivers/ai-provider/mistralai/llm/pixtral-large-2411.yaml
@@ -0,0 +1,52 @@
+model: pixtral-large-2411
+label:
+  zh_Hans: pixtral-large-2411
+  en_US: pixtral-large-2411
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.7
+    min: 0
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 1
+    min: 0
+    max: 1
+  - name: max_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: safe_prompt
+    default: false
+    type: boolean
+    help:
+      en_US: Whether to inject a safety prompt before all conversations.
+      zh_Hans: 是否开启提示词审查
+    label:
+      en_US: SafePrompt
+      zh_Hans: 提示词审查
+  - name: random_seed
+    type: int
+    help:
+      en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+      zh_Hans: 当开启随机数种子以后，你可以通过指定一个固定的种子来使得回答结果更加稳定
+    label:
+      en_US: RandomSeed
+      zh_Hans: 随机数种子
+    default: 0
+    min: 0
+    max: 2147483647
+pricing:
+  input: '0.008'
+  output: '0.024'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/mistralai/llm/pixtral-large-latest.yaml
+++ b/drivers/ai-provider/mistralai/llm/pixtral-large-latest.yaml
@@ -0,0 +1,52 @@
+model: pixtral-large-latest
+label:
+  zh_Hans: pixtral-large-latest
+  en_US: pixtral-large-latest
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.7
+    min: 0
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 1
+    min: 0
+    max: 1
+  - name: max_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: safe_prompt
+    default: false
+    type: boolean
+    help:
+      en_US: Whether to inject a safety prompt before all conversations.
+      zh_Hans: 是否开启提示词审查
+    label:
+      en_US: SafePrompt
+      zh_Hans: 提示词审查
+  - name: random_seed
+    type: int
+    help:
+      en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+      zh_Hans: 当开启随机数种子以后，你可以通过指定一个固定的种子来使得回答结果更加稳定
+    label:
+      en_US: RandomSeed
+      zh_Hans: 随机数种子
+    default: 0
+    min: 0
+    max: 2147483647
+pricing:
+  input: '0.008'
+  output: '0.024'
+  unit: '0.001'
+  currency: USD
--- a/drivers/ai-provider/mistralai/mistralai_test.go
+++ b/drivers/ai-provider/mistralai/mistralai_test.go
@@ -209,7 +209,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req := fasthttp.AcquireRequest()
 	u := fasthttp.AcquireURI()

-	// Set request URI and path
+	// SetProvider request URI and path
 	uri, _ := url.Parse(rawURL)
 	u.SetPath(uri.Path)
 	u.SetScheme(uri.Scheme)
@@ -218,7 +218,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req.SetURI(u)
 	req.Header.SetMethod("POST")

-	// Set headers
+	// SetProvider headers
 	for k, v := range headers {
 		req.Header.Set(k, v)
 	}
--- a/drivers/ai-provider/moonshot/moonshot_test.go
+++ b/drivers/ai-provider/moonshot/moonshot_test.go
@@ -207,7 +207,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req := fasthttp.AcquireRequest()
 	u := fasthttp.AcquireURI()

-	// Set request URI and path
+	// SetProvider request URI and path
 	uri, _ := url.Parse(rawURL)
 	u.SetPath(uri.Path)
 	u.SetScheme(uri.Scheme)
@@ -216,7 +216,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
 	req.SetURI(u)
 	req.Header.SetMethod("POST")

-	// Set headers
+	// SetProvider headers
 	for k, v := range headers {
 		req.Header.Set(k, v)
 	}
--- a/drivers/ai-provider/novita/llm/L3-8B-Stheno-v3.2.yaml
+++ b/drivers/ai-provider/novita/llm/L3-8B-Stheno-v3.2.yaml
@@ -0,0 +1,41 @@
+model: Sao10K/L3-8B-Stheno-v3.2
+label:
+  zh_Hans: L3 8B Stheno V3.2
+  en_US: L3 8B Stheno V3.2
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0005'
+  output: '0.0005'
+  unit: '0.0001'
+  currency: USD
--- a/drivers/ai-provider/novita/llm/deepseek-r1.yaml
+++ b/drivers/ai-provider/novita/llm/deepseek-r1.yaml
@@ -0,0 +1,41 @@
+model: deepseek/deepseek-r1
+label:
+  zh_Hans: DeepSeek R1
+  en_US: DeepSeek R1
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 64000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.04'
+  output: '0.04'
+  unit: '0.0001'
+  currency: USD
--- a/drivers/ai-provider/novita/llm/deepseek_v3.yaml
+++ b/drivers/ai-provider/novita/llm/deepseek_v3.yaml
@@ -0,0 +1,41 @@
+model: deepseek/deepseek_v3
+label:
+  zh_Hans: DeepSeek V3
+  en_US: DeepSeek V3
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 64000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0089'
+  output: '0.0089'
+  unit: '0.0001'
+  currency: USD
--- a/drivers/ai-provider/novita/llm/l3-8b-lunaris.yaml
+++ b/drivers/ai-provider/novita/llm/l3-8b-lunaris.yaml
@@ -0,0 +1,41 @@
+model: sao10k/l3-8b-lunaris
+label:
+  zh_Hans: "Sao10k L3 8B Lunaris"
+  en_US: "Sao10k L3 8B Lunaris"
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0005'
+  output: '0.0005'
+  unit: '0.0001'
+  currency: USD
--- a/drivers/ai-provider/novita/llm/l31-70b-euryale-v2.2.yaml
+++ b/drivers/ai-provider/novita/llm/l31-70b-euryale-v2.2.yaml
@@ -0,0 +1,41 @@
+model: sao10k/l31-70b-euryale-v2.2
+label:
+  zh_Hans: L31 70B Euryale V2.2
+  en_US: L31 70B Euryale V2.2
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 16000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0148'
+  output: '0.0148'
+  unit: '0.0001'
+  currency: USD
--- a/drivers/ai-provider/novita/llm/llama-3.1-8b-instruct-bf16.yaml
+++ b/drivers/ai-provider/novita/llm/llama-3.1-8b-instruct-bf16.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3.1-8b-instruct-bf16
+label:
+  zh_Hans: Llama 3.1 8B Instruct BF16
+  en_US: Llama 3.1 8B Instruct BF16
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0006'
+  output: '0.0006'
+  unit: '0.0001'
+  currency: USD
--- a/drivers/ai-provider/novita/llm/llama-3.1-8b-instruct-max.yaml
+++ b/drivers/ai-provider/novita/llm/llama-3.1-8b-instruct-max.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3.1-8b-instruct-max
+label:
+  zh_Hans: "Llama3.1 8B Instruct Max\t"
+  en_US: "Llama3.1 8B Instruct Max\t"
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 16384
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0005'
+  output: '0.0005'
+  unit: '0.0001'
+  currency: USD
--- a/drivers/ai-provider/novita/llm/llama-3.2-11b-vision-instruct.yaml
+++ b/drivers/ai-provider/novita/llm/llama-3.2-11b-vision-instruct.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3.2-11b-vision-instruct
+label:
+  zh_Hans: "Llama 3.2 11B Vision Instruct\t"
+  en_US: "Llama 3.2 11B Vision Instruct\t"
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0006'
+  output: '0.0006'
+  unit: '0.0001'
+  currency: USD
--- a/drivers/ai-provider/novita/llm/llama-3.2-1b-instruct.yaml
+++ b/drivers/ai-provider/novita/llm/llama-3.2-1b-instruct.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3.2-1b-instruct
+label:
+  zh_Hans: "Llama 3.2 1B Instruct\t"
+  en_US: "Llama 3.2 1B Instruct\t"
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0002'
+  output: '0.0002'
+  unit: '0.0001'
+  currency: USD
--- a/drivers/ai-provider/novita/llm/llama-3.2-3b-instruct.yaml
+++ b/drivers/ai-provider/novita/llm/llama-3.2-3b-instruct.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3.2-3b-instruct
+label:
+  zh_Hans: Llama 3.2 3B Instruct
+  en_US: Llama 3.2 3B Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0003'
+  output: '0.0005'
+  unit: '0.0001'
+  currency: USD
--- a/drivers/ai-provider/novita/llm/llama-3.3-70b-instruct.yaml
+++ b/drivers/ai-provider/novita/llm/llama-3.3-70b-instruct.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3.3-70b-instruct
+label:
+  zh_Hans: Llama 3.3 70B Instruct
+  en_US: Llama 3.3 70B Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0039'
+  output: '0.0039'
+  unit: '0.0001'
+  currency: USD
--- a/drivers/ai-provider/novita/llm/llm.py
+++ b/drivers/ai-provider/novita/llm/llm.py
@@ -1,69 +0,0 @@
-from collections.abc import Generator
-from typing import Optional, Union
-
-from core.model_runtime.entities.llm_entities import LLMResult
-from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool
-from core.model_runtime.entities.model_entities import AIModelEntity
-from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
-
-
-class NovitaLargeLanguageModel(OAIAPICompatLargeLanguageModel):
-    def _update_endpoint_url(self, credentials: dict):
-        credentials["endpoint_url"] = "https://api.novita.ai/v3/openai"
-        credentials["extra_headers"] = {"X-Novita-Source": "dify.ai"}
-        return credentials
-
-    def _invoke(
-        self,
-        model: str,
-        credentials: dict,
-        prompt_messages: list[PromptMessage],
-        model_parameters: dict,
-        tools: Optional[list[PromptMessageTool]] = None,
-        stop: Optional[list[str]] = None,
-        stream: bool = True,
-        user: Optional[str] = None,
-    ) -> Union[LLMResult, Generator]:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
-        return super()._invoke(model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user)
-
-    def validate_credentials(self, model: str, credentials: dict) -> None:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
-        self._add_custom_parameters(credentials, model)
-        return super().validate_credentials(model, cred_with_endpoint)
-
-    @classmethod
-    def _add_custom_parameters(cls, credentials: dict, model: str) -> None:
-        credentials["mode"] = "chat"
-
-    def _generate(
-        self,
-        model: str,
-        credentials: dict,
-        prompt_messages: list[PromptMessage],
-        model_parameters: dict,
-        tools: Optional[list[PromptMessageTool]] = None,
-        stop: Optional[list[str]] = None,
-        stream: bool = True,
-        user: Optional[str] = None,
-    ) -> Union[LLMResult, Generator]:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
-        return super()._generate(
-            model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user
-        )
-
-    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
-
-        return super().get_customizable_model_schema(model, cred_with_endpoint)
-
-    def get_num_tokens(
-        self,
-        model: str,
-        credentials: dict,
-        prompt_messages: list[PromptMessage],
-        tools: Optional[list[PromptMessageTool]] = None,
-    ) -> int:
-        cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
-
-        return super().get_num_tokens(model, cred_with_endpoint, prompt_messages, tools)
--- a/drivers/ai-provider/novita/llm/mistral-nemo.yaml
+++ b/drivers/ai-provider/novita/llm/mistral-nemo.yaml
@@ -0,0 +1,41 @@
+model: mistralai/mistral-nemo
+label:
+  zh_Hans: Mistral Nemo
+  en_US: Mistral Nemo
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0017'
+  output: '0.0017'
+  unit: '0.0001'
+  currency: USD
--- a/drivers/ai-provider/novita/llm/openchat-7b.yaml
+++ b/drivers/ai-provider/novita/llm/openchat-7b.yaml
@@ -0,0 +1,41 @@
+model: openchat/openchat-7b
+label:
+  zh_Hans: OpenChat 7B
+  en_US: OpenChat 7B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4096
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0006'
+  output: '0.0006'
+  unit: '0.0001'
+  currency: USD
--- a/Show More
+++ b/Show More