mirror of
https://github.com/eolinker/apinto
synced 2025-09-26 21:01:19 +08:00
support local model
This commit is contained in:
@@ -19,6 +19,7 @@ import (
|
||||
"github.com/eolinker/apinto/drivers/ai-provider/moonshot"
|
||||
"github.com/eolinker/apinto/drivers/ai-provider/novita"
|
||||
"github.com/eolinker/apinto/drivers/ai-provider/nvidia"
|
||||
"github.com/eolinker/apinto/drivers/ai-provider/ollama"
|
||||
"github.com/eolinker/apinto/drivers/ai-provider/openAI"
|
||||
"github.com/eolinker/apinto/drivers/ai-provider/openrouter"
|
||||
"github.com/eolinker/apinto/drivers/ai-provider/perfxcloud"
|
||||
@@ -149,6 +150,7 @@ func driverRegister(extenderRegister eosc.IExtenderDriverRegister) {
|
||||
vertex_ai.Register(extenderRegister)
|
||||
fakegpt.Register(extenderRegister)
|
||||
zhinao.Register(extenderRegister)
|
||||
ollama.Register(extenderRegister)
|
||||
|
||||
ai_provider.Register(extenderRegister)
|
||||
ai_key.Register(extenderRegister)
|
||||
|
@@ -39,7 +39,7 @@ func buildHexCanonicalRequest(ctx http_service.IHttpContext, signedHeaders []str
|
||||
for i := 0; i < len(queryArgs); i++ {
|
||||
params := strings.Split(queryArgs[i], "=")
|
||||
if len(params) != 2 {
|
||||
//query.Set(params[0], "")
|
||||
//query.SetProvider(params[0], "")
|
||||
continue
|
||||
}
|
||||
query.Set(params[0], params[1])
|
||||
|
@@ -127,60 +127,95 @@ func DelKeyResource(provider string, resourceId string) {
|
||||
keyPoolManager.DelKeySource(provider, resourceId)
|
||||
}
|
||||
|
||||
func DelProvider(provider string) {
|
||||
providerManager.Del(provider)
|
||||
func DelProvider(id string) {
|
||||
provider := balanceManager.Del(id)
|
||||
if provider != "" {
|
||||
keyPoolManager.Del(provider)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
var (
|
||||
providerManager = NewProviderManager()
|
||||
balanceManager = NewBalanceManager()
|
||||
)
|
||||
|
||||
type ProviderManager struct {
|
||||
type BalanceManager struct {
|
||||
providers eosc.Untyped[string, IProvider]
|
||||
providerSorts []IProvider
|
||||
ids eosc.Untyped[string, eosc.Untyped[string, IProvider]]
|
||||
balances eosc.Untyped[string, IProvider]
|
||||
balanceSort []IProvider
|
||||
}
|
||||
|
||||
func NewProviderManager() *ProviderManager {
|
||||
return &ProviderManager{
|
||||
func NewBalanceManager() *BalanceManager {
|
||||
return &BalanceManager{
|
||||
providers: eosc.BuildUntyped[string, IProvider](),
|
||||
balances: eosc.BuildUntyped[string, IProvider](),
|
||||
ids: eosc.BuildUntyped[string, eosc.Untyped[string, IProvider]](),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *ProviderManager) Set(provider string, p IProvider) {
|
||||
m.providers.Set(provider, p)
|
||||
m.sortProviders()
|
||||
func (m *BalanceManager) SetProvider(id string, p IProvider) {
|
||||
m.providers.Set(p.Provider(), p)
|
||||
m.balances.Set(id, p)
|
||||
tmp, has := m.ids.Get(p.Provider())
|
||||
if !has {
|
||||
tmp = eosc.BuildUntyped[string, IProvider]()
|
||||
}
|
||||
tmp.Set(id, p)
|
||||
m.ids.Set(p.Provider(), tmp)
|
||||
m.sortBalances()
|
||||
}
|
||||
|
||||
func (m *ProviderManager) Get(provider string) (IProvider, bool) {
|
||||
func (m *BalanceManager) Get(provider string) (IProvider, bool) {
|
||||
return m.providers.Get(provider)
|
||||
}
|
||||
|
||||
func (m *ProviderManager) sortProviders() {
|
||||
providers := m.providers.List()
|
||||
sort.Slice(providers, func(i, j int) bool {
|
||||
return providers[i].Priority() < providers[j].Priority()
|
||||
func (m *BalanceManager) sortBalances() {
|
||||
balances := m.balances.List()
|
||||
tmpBalances := make([]IProvider, 0, len(balances))
|
||||
for _, b := range balances {
|
||||
if b.Priority() == 0 {
|
||||
continue
|
||||
}
|
||||
tmpBalances = append(tmpBalances, b)
|
||||
}
|
||||
sort.Slice(tmpBalances, func(i, j int) bool {
|
||||
return tmpBalances[i].Priority() < tmpBalances[j].Priority()
|
||||
})
|
||||
m.providerSorts = providers
|
||||
m.balanceSort = tmpBalances
|
||||
}
|
||||
|
||||
func (m *ProviderManager) Del(provider string) {
|
||||
m.providers.Del(provider)
|
||||
m.sortProviders()
|
||||
func (m *BalanceManager) Del(id string) string {
|
||||
p, ok := m.balances.Del(id)
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
tmp, has := m.ids.Get(p.Provider())
|
||||
if !has {
|
||||
return ""
|
||||
}
|
||||
tmp.Del(id)
|
||||
if tmp.Count() < 1 {
|
||||
m.providers.Del(p.Provider())
|
||||
return p.Provider()
|
||||
}
|
||||
|
||||
func (m *ProviderManager) Providers() []IProvider {
|
||||
return m.providerSorts
|
||||
m.sortBalances()
|
||||
return ""
|
||||
}
|
||||
|
||||
func Providers() []IProvider {
|
||||
return providerManager.Providers()
|
||||
func (m *BalanceManager) Balances() []IProvider {
|
||||
return m.balanceSort
|
||||
}
|
||||
|
||||
func SetProvider(provider string, p IProvider) {
|
||||
providerManager.Set(provider, p)
|
||||
func Balances() []IProvider {
|
||||
return balanceManager.Balances()
|
||||
}
|
||||
|
||||
func SetProvider(id string, p IProvider) {
|
||||
balanceManager.SetProvider(id, p)
|
||||
}
|
||||
|
||||
func GetProvider(provider string) (IProvider, bool) {
|
||||
return providerManager.Get(provider)
|
||||
return balanceManager.Get(provider)
|
||||
}
|
||||
|
@@ -33,7 +33,7 @@ const (
|
||||
|
||||
const (
|
||||
ModeChat Mode = "chat"
|
||||
ModeComplete Mode = "complete"
|
||||
ModeCompletion Mode = "completion"
|
||||
)
|
||||
|
||||
type Mode string
|
||||
|
2
drivers/.gitignore
vendored
2
drivers/.gitignore
vendored
@@ -1,2 +1,2 @@
|
||||
*.DS_Store
|
||||
**/.env
|
||||
../.env
|
@@ -0,0 +1,38 @@
|
||||
model: claude-3-5-haiku-20241022
|
||||
label:
|
||||
en_US: claude-3-5-haiku-20241022
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '1.00'
|
||||
output: '5.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -0,0 +1,40 @@
|
||||
model: claude-3-5-sonnet-20241022
|
||||
label:
|
||||
en_US: claude-3-5-sonnet-20241022
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '3.00'
|
||||
output: '15.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -3,15 +3,16 @@ package anthropic
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"github.com/eolinker/apinto/convert"
|
||||
http_context "github.com/eolinker/apinto/node/http-context"
|
||||
"github.com/joho/godotenv"
|
||||
"github.com/valyala/fasthttp"
|
||||
"net/url"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/eolinker/apinto/convert"
|
||||
http_context "github.com/eolinker/apinto/node/http-context"
|
||||
"github.com/joho/godotenv"
|
||||
"github.com/valyala/fasthttp"
|
||||
|
||||
ai_provider "github.com/eolinker/apinto/drivers/ai-provider"
|
||||
)
|
||||
|
||||
@@ -179,7 +180,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req := fasthttp.AcquireRequest()
|
||||
u := fasthttp.AcquireURI()
|
||||
|
||||
// Set request URI and path
|
||||
// SetProvider request URI and path
|
||||
uri, _ := url.Parse(rawURL)
|
||||
u.SetPath(uri.Path)
|
||||
u.SetScheme(uri.Scheme)
|
||||
@@ -188,7 +189,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req.SetURI(u)
|
||||
req.Header.SetMethod("POST")
|
||||
|
||||
// Set headers
|
||||
// SetProvider headers
|
||||
for k, v := range headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
@@ -3,15 +3,16 @@ package baichuan
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"github.com/eolinker/apinto/convert"
|
||||
http_context "github.com/eolinker/apinto/node/http-context"
|
||||
"github.com/joho/godotenv"
|
||||
"github.com/valyala/fasthttp"
|
||||
"net/url"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/eolinker/apinto/convert"
|
||||
http_context "github.com/eolinker/apinto/node/http-context"
|
||||
"github.com/joho/godotenv"
|
||||
"github.com/valyala/fasthttp"
|
||||
|
||||
ai_provider "github.com/eolinker/apinto/drivers/ai-provider"
|
||||
)
|
||||
|
||||
@@ -179,7 +180,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req := fasthttp.AcquireRequest()
|
||||
u := fasthttp.AcquireURI()
|
||||
|
||||
// Set request URI and path
|
||||
// SetProvider request URI and path
|
||||
uri, _ := url.Parse(rawURL)
|
||||
u.SetPath(uri.Path)
|
||||
u.SetScheme(uri.Scheme)
|
||||
@@ -188,7 +189,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req.SetURI(u)
|
||||
req.Header.SetMethod("POST")
|
||||
|
||||
// Set headers
|
||||
// SetProvider headers
|
||||
for k, v := range headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
@@ -0,0 +1,26 @@
|
||||
model: ai21.jamba-1-5-large-v1:0
|
||||
label:
|
||||
en_US: Jamba 1.5 Large
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 256000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 2.0
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '0.002'
|
||||
output: '0.008'
|
||||
unit: '0.001'
|
||||
currency: USD
|
@@ -0,0 +1,26 @@
|
||||
model: ai21.jamba-1-5-mini-v1:0
|
||||
label:
|
||||
en_US: Jamba 1.5 Mini
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 256000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 2.0
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '0.0002'
|
||||
output: '0.0004'
|
||||
unit: '0.001'
|
||||
currency: USD
|
53
drivers/ai-provider/bedrock/llm/amazon.nova-lite-v1.yaml
Normal file
53
drivers/ai-provider/bedrock/llm/amazon.nova-lite-v1.yaml
Normal file
@@ -0,0 +1,53 @@
|
||||
model: amazon.nova-lite-v1:0
|
||||
label:
|
||||
en_US: Nova Lite V1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 300000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.00006'
|
||||
output: '0.00024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
52
drivers/ai-provider/bedrock/llm/amazon.nova-micro-v1.yaml
Normal file
52
drivers/ai-provider/bedrock/llm/amazon.nova-micro-v1.yaml
Normal file
@@ -0,0 +1,52 @@
|
||||
model: amazon.nova-micro-v1:0
|
||||
label:
|
||||
en_US: Nova Micro V1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.000035'
|
||||
output: '0.00014'
|
||||
unit: '0.001'
|
||||
currency: USD
|
53
drivers/ai-provider/bedrock/llm/amazon.nova-pro-v1.yaml
Normal file
53
drivers/ai-provider/bedrock/llm/amazon.nova-pro-v1.yaml
Normal file
@@ -0,0 +1,53 @@
|
||||
model: amazon.nova-pro-v1:0
|
||||
label:
|
||||
en_US: Nova Pro V1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 300000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.0008'
|
||||
output: '0.0032'
|
||||
unit: '0.001'
|
||||
currency: USD
|
@@ -0,0 +1,60 @@
|
||||
model: anthropic.claude-3-5-haiku-20241022-v1:0
|
||||
label:
|
||||
en_US: Claude 3.5 Haiku
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
# docs: https://docs.anthropic.com/claude/docs/system-prompts
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.001'
|
||||
output: '0.005'
|
||||
unit: '0.001'
|
||||
currency: USD
|
@@ -0,0 +1,60 @@
|
||||
model: anthropic.claude-3-5-sonnet-20241022-v2:0
|
||||
label:
|
||||
en_US: Claude 3.5 Sonnet V2
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
@@ -0,0 +1,60 @@
|
||||
model: eu.anthropic.claude-3-5-sonnet-20241022-v2:0
|
||||
label:
|
||||
en_US: Claude 3.5 Sonnet V2(EU.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
53
drivers/ai-provider/bedrock/llm/us.amazon.nova-lite-v1.yaml
Normal file
53
drivers/ai-provider/bedrock/llm/us.amazon.nova-lite-v1.yaml
Normal file
@@ -0,0 +1,53 @@
|
||||
model: us.amazon.nova-lite-v1:0
|
||||
label:
|
||||
en_US: Nova Lite V1 (US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 300000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.00006'
|
||||
output: '0.00024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
52
drivers/ai-provider/bedrock/llm/us.amazon.nova-micro-v1.yaml
Normal file
52
drivers/ai-provider/bedrock/llm/us.amazon.nova-micro-v1.yaml
Normal file
@@ -0,0 +1,52 @@
|
||||
model: us.amazon.nova-micro-v1:0
|
||||
label:
|
||||
en_US: Nova Micro V1 (US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.000035'
|
||||
output: '0.00014'
|
||||
unit: '0.001'
|
||||
currency: USD
|
53
drivers/ai-provider/bedrock/llm/us.amazon.nova-pro-v1.yaml
Normal file
53
drivers/ai-provider/bedrock/llm/us.amazon.nova-pro-v1.yaml
Normal file
@@ -0,0 +1,53 @@
|
||||
model: us.amazon.nova-pro-v1:0
|
||||
label:
|
||||
en_US: Nova Pro V1 (US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 300000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.0008'
|
||||
output: '0.0032'
|
||||
unit: '0.001'
|
||||
currency: USD
|
@@ -0,0 +1,60 @@
|
||||
model: us.anthropic.claude-3-5-haiku-20241022-v1:0
|
||||
label:
|
||||
en_US: Claude 3.5 Haiku(US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
# docs: https://docs.anthropic.com/claude/docs/system-prompts
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.001'
|
||||
output: '0.005'
|
||||
unit: '0.001'
|
||||
currency: USD
|
@@ -0,0 +1,60 @@
|
||||
model: us.anthropic.claude-3-5-sonnet-20241022-v2:0
|
||||
label:
|
||||
en_US: Claude 3.5 Sonnet V2(US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
@@ -0,0 +1,29 @@
|
||||
model: us.meta.llama3-2-11b-instruct-v1:0
|
||||
label:
|
||||
en_US: US Meta Llama 3.2 11B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- vision
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
min: 0.0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.00035'
|
||||
output: '0.00035'
|
||||
unit: '0.001'
|
||||
currency: USD
|
@@ -0,0 +1,26 @@
|
||||
model: us.meta.llama3-2-1b-instruct-v1:0
|
||||
label:
|
||||
en_US: US Meta Llama 3.2 1B Instruct
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
min: 0.0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.0001'
|
||||
output: '0.0001'
|
||||
unit: '0.001'
|
||||
currency: USD
|
@@ -0,0 +1,26 @@
|
||||
model: us.meta.llama3-2-3b-instruct-v1:0
|
||||
label:
|
||||
en_US: US Meta Llama 3.2 3B Instruct
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
min: 0.0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.00015'
|
||||
output: '0.00015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
@@ -0,0 +1,31 @@
|
||||
model: us.meta.llama3-2-90b-instruct-v1:0
|
||||
label:
|
||||
en_US: US Meta Llama 3.2 90B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
min: 0.0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 0.9
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.002'
|
||||
output: '0.002'
|
||||
unit: '0.001'
|
||||
currency: USD
|
@@ -16,6 +16,7 @@ type FNewModelMode func(string) IModelMode
|
||||
var (
|
||||
modelModes = map[string]FNewModelMode{
|
||||
convert.ModeChat.String(): NewChat,
|
||||
convert.ModeCompletion.String(): NewChat,
|
||||
}
|
||||
)
|
||||
|
||||
|
@@ -178,7 +178,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req := fasthttp.AcquireRequest()
|
||||
u := fasthttp.AcquireURI()
|
||||
|
||||
// Set request URI and path
|
||||
// SetProvider request URI and path
|
||||
uri, _ := url.Parse(rawURL)
|
||||
u.SetPath(uri.Path)
|
||||
u.SetScheme(uri.Scheme)
|
||||
@@ -187,7 +187,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req.SetURI(u)
|
||||
req.Header.SetMethod("POST")
|
||||
|
||||
// Set headers
|
||||
// SetProvider headers
|
||||
for k, v := range headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
@@ -208,7 +208,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req := fasthttp.AcquireRequest()
|
||||
u := fasthttp.AcquireURI()
|
||||
|
||||
// Set request URI and path
|
||||
// SetProvider request URI and path
|
||||
uri, _ := url.Parse(rawURL)
|
||||
u.SetPath(uri.Path)
|
||||
u.SetScheme(uri.Scheme)
|
||||
@@ -217,7 +217,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req.SetURI(u)
|
||||
req.Header.SetMethod("POST")
|
||||
|
||||
// Set headers
|
||||
// SetProvider headers
|
||||
for k, v := range headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
21
drivers/ai-provider/deepseek/llm/deepseek-reasoner.yaml
Normal file
21
drivers/ai-provider/deepseek/llm/deepseek-reasoner.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
model: deepseek-reasoner
|
||||
label:
|
||||
zh_Hans: deepseek-reasoner
|
||||
en_US: deepseek-reasoner
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 64000
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 8192
|
||||
default: 4096
|
||||
pricing:
|
||||
input: "4"
|
||||
output: "16"
|
||||
unit: "0.000001"
|
||||
currency: RMB
|
@@ -98,12 +98,12 @@ func (e *executor) reset(cfg *Config) error {
|
||||
e.provider = cfg.Provider
|
||||
e.modelConfig = extender
|
||||
e.disable = false
|
||||
convert.SetProvider(cfg.Provider, e)
|
||||
convert.SetProvider(e.Id(), e)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *executor) Stop() error {
|
||||
convert.DelProvider(e.provider)
|
||||
convert.DelProvider(e.Id())
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@@ -208,7 +208,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req := fasthttp.AcquireRequest()
|
||||
u := fasthttp.AcquireURI()
|
||||
|
||||
// Set request URI and path
|
||||
// SetProvider request URI and path
|
||||
uri, _ := url.Parse(rawURL)
|
||||
u.SetPath(uri.Path)
|
||||
u.SetScheme(uri.Scheme)
|
||||
@@ -217,7 +217,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req.SetURI(u)
|
||||
req.Header.SetMethod("POST")
|
||||
|
||||
// Set headers
|
||||
// SetProvider headers
|
||||
for k, v := range headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
@@ -0,0 +1,46 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 11B Vision Instruct
|
||||
en_US: Llama 3.2 11B Vision Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.2'
|
||||
output: '0.2'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -0,0 +1,46 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 1B Instruct
|
||||
en_US: Llama 3.2 1B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.1'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -0,0 +1,46 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 3B Instruct
|
||||
en_US: Llama 3.2 3B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.1'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -0,0 +1,46 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 90B Vision Instruct
|
||||
en_US: Llama 3.2 90B Vision Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.9'
|
||||
output: '0.9'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
46
drivers/ai-provider/fireworks/llm/qwen2p5-72b-instruct.yaml
Normal file
46
drivers/ai-provider/fireworks/llm/qwen2p5-72b-instruct.yaml
Normal file
@@ -0,0 +1,46 @@
|
||||
model: accounts/fireworks/models/qwen2p5-72b-instruct
|
||||
label:
|
||||
zh_Hans: Qwen2.5 72B Instruct
|
||||
en_US: Qwen2.5 72B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.9'
|
||||
output: '0.9'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
41
drivers/ai-provider/google/llm/gemini-1.5-flash-001.yaml
Normal file
41
drivers/ai-provider/google/llm/gemini-1.5-flash-001.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: gemini-1.5-flash-001
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash 001
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
41
drivers/ai-provider/google/llm/gemini-1.5-flash-002.yaml
Normal file
41
drivers/ai-provider/google/llm/gemini-1.5-flash-002.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: gemini-1.5-flash-002
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash 002
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -0,0 +1,41 @@
|
||||
model: gemini-1.5-flash-8b-exp-0924
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash 8B 0924
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
41
drivers/ai-provider/google/llm/gemini-1.5-flash.yaml
Normal file
41
drivers/ai-provider/google/llm/gemini-1.5-flash.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: gemini-1.5-flash
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
41
drivers/ai-provider/google/llm/gemini-1.5-pro-001.yaml
Normal file
41
drivers/ai-provider/google/llm/gemini-1.5-pro-001.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: gemini-1.5-pro-001
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro 001
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
41
drivers/ai-provider/google/llm/gemini-1.5-pro-002.yaml
Normal file
41
drivers/ai-provider/google/llm/gemini-1.5-pro-002.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: gemini-1.5-pro-002
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro 002
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
41
drivers/ai-provider/google/llm/gemini-1.5-pro.yaml
Normal file
41
drivers/ai-provider/google/llm/gemini-1.5-pro.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: gemini-1.5-pro
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
41
drivers/ai-provider/google/llm/gemini-2.0-flash-001.yaml
Normal file
41
drivers/ai-provider/google/llm/gemini-2.0-flash-001.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: gemini-2.0-flash-001
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash 001
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
41
drivers/ai-provider/google/llm/gemini-2.0-flash-exp.yaml
Normal file
41
drivers/ai-provider/google/llm/gemini-2.0-flash-exp.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: gemini-2.0-flash-exp
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Exp
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -0,0 +1,41 @@
|
||||
model: gemini-2.0-flash-lite-preview-02-05
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Lite Preview 0205
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -0,0 +1,39 @@
|
||||
model: gemini-2.0-flash-thinking-exp-01-21
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Thinking Exp 01-21
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32767
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -0,0 +1,39 @@
|
||||
model: gemini-2.0-flash-thinking-exp-1219
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Thinking Exp 1219
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32767
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
41
drivers/ai-provider/google/llm/gemini-2.0-pro-exp-02-05.yaml
Normal file
41
drivers/ai-provider/google/llm/gemini-2.0-pro-exp-02-05.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: gemini-2.0-pro-exp-02-05
|
||||
label:
|
||||
en_US: Gemini 2.0 pro exp 02-05
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
41
drivers/ai-provider/google/llm/gemini-exp-1114.yaml
Normal file
41
drivers/ai-provider/google/llm/gemini-exp-1114.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: gemini-exp-1114
|
||||
label:
|
||||
en_US: Gemini exp 1114
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32767
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
41
drivers/ai-provider/google/llm/gemini-exp-1121.yaml
Normal file
41
drivers/ai-provider/google/llm/gemini-exp-1121.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: gemini-exp-1121
|
||||
label:
|
||||
en_US: Gemini exp 1121
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32767
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
41
drivers/ai-provider/google/llm/gemini-exp-1206.yaml
Normal file
41
drivers/ai-provider/google/llm/gemini-exp-1206.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: gemini-exp-1206
|
||||
label:
|
||||
en_US: Gemini exp 1206
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -0,0 +1,41 @@
|
||||
model: learnlm-1.5-pro-experimental
|
||||
label:
|
||||
en_US: LearnLM 1.5 Pro Experimental
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
- video
|
||||
- audio
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32767
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -178,7 +178,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req := fasthttp.AcquireRequest()
|
||||
u := fasthttp.AcquireURI()
|
||||
|
||||
// Set request URI and path
|
||||
// SetProvider request URI and path
|
||||
uri, _ := url.Parse(rawURL)
|
||||
u.SetPath(uri.Path)
|
||||
u.SetScheme(uri.Scheme)
|
||||
@@ -187,7 +187,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req.SetURI(u)
|
||||
req.Header.SetMethod("POST")
|
||||
|
||||
// Set headers
|
||||
// SetProvider headers
|
||||
for k, v := range headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
@@ -75,6 +75,7 @@ func (e *converterDriver) GetModel(model string) (convert.FGenerateConfig, bool)
|
||||
result["response_format"] = map[string]interface{}{
|
||||
"type": modelCfg.ResponseFormat,
|
||||
}
|
||||
result["stream"] = false
|
||||
}
|
||||
return result, nil
|
||||
}, true
|
||||
|
@@ -0,0 +1,36 @@
|
||||
model: deepseek-r1-distill-llama-70b
|
||||
label:
|
||||
en_US: DeepSeek R1 Distill Llama 70b
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '3.00'
|
||||
output: '3.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
37
drivers/ai-provider/groq/llm/gemma-7b-it.yaml
Normal file
37
drivers/ai-provider/groq/llm/gemma-7b-it.yaml
Normal file
@@ -0,0 +1,37 @@
|
||||
model: gemma-7b-it
|
||||
label:
|
||||
zh_Hans: Gemma 7B Instruction Tuned
|
||||
en_US: Gemma 7B Instruction Tuned
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
37
drivers/ai-provider/groq/llm/gemma2-9b-it.yaml
Normal file
37
drivers/ai-provider/groq/llm/gemma2-9b-it.yaml
Normal file
@@ -0,0 +1,37 @@
|
||||
model: gemma2-9b-it
|
||||
label:
|
||||
zh_Hans: Gemma 2 9B Instruction Tuned
|
||||
en_US: Gemma 2 9B Instruction Tuned
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
38
drivers/ai-provider/groq/llm/llama-3.2-11b-text-preview.yaml
Normal file
38
drivers/ai-provider/groq/llm/llama-3.2-11b-text-preview.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
model: llama-3.2-11b-text-preview
|
||||
deprecated: true
|
||||
label:
|
||||
zh_Hans: Llama 3.2 11B Text (Preview)
|
||||
en_US: Llama 3.2 11B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -0,0 +1,38 @@
|
||||
model: llama-3.2-11b-vision-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 11B Vision (Preview)
|
||||
en_US: Llama 3.2 11B Vision (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
37
drivers/ai-provider/groq/llm/llama-3.2-1b-preview.yaml
Normal file
37
drivers/ai-provider/groq/llm/llama-3.2-1b-preview.yaml
Normal file
@@ -0,0 +1,37 @@
|
||||
model: llama-3.2-1b-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 1B Text (Preview)
|
||||
en_US: Llama 3.2 1B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
37
drivers/ai-provider/groq/llm/llama-3.2-3b-preview.yaml
Normal file
37
drivers/ai-provider/groq/llm/llama-3.2-3b-preview.yaml
Normal file
@@ -0,0 +1,37 @@
|
||||
model: llama-3.2-3b-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 3B Text (Preview)
|
||||
en_US: Llama 3.2 3B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
38
drivers/ai-provider/groq/llm/llama-3.2-90b-text-preview.yaml
Normal file
38
drivers/ai-provider/groq/llm/llama-3.2-90b-text-preview.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
model: llama-3.2-90b-text-preview
|
||||
depraceted: true
|
||||
label:
|
||||
zh_Hans: Llama 3.2 90B Text (Preview)
|
||||
en_US: Llama 3.2 90B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -0,0 +1,38 @@
|
||||
model: llama-3.2-90b-vision-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 90B Vision (Preview)
|
||||
en_US: Llama 3.2 90B Vision (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
38
drivers/ai-provider/groq/llm/llama-3.3-70b-specdec.yaml
Normal file
38
drivers/ai-provider/groq/llm/llama-3.3-70b-specdec.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
model: llama-3.3-70b-specdec
|
||||
label:
|
||||
zh_Hans: Llama 3.3 70B Specdec
|
||||
en_US: Llama 3.3 70B Specdec
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- multi-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32768
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: "0.05"
|
||||
output: "0.1"
|
||||
unit: "0.000001"
|
||||
currency: USD
|
38
drivers/ai-provider/groq/llm/llama-3.3-70b-versatile.yaml
Normal file
38
drivers/ai-provider/groq/llm/llama-3.3-70b-versatile.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
model: llama-3.3-70b-versatile
|
||||
label:
|
||||
zh_Hans: Llama 3.3 70B Versatile
|
||||
en_US: Llama 3.3 70B Versatile
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- multi-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32768
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: "0.05"
|
||||
output: "0.1"
|
||||
unit: "0.000001"
|
||||
currency: USD
|
37
drivers/ai-provider/groq/llm/llama-guard-3-8b.yaml
Normal file
37
drivers/ai-provider/groq/llm/llama-guard-3-8b.yaml
Normal file
@@ -0,0 +1,37 @@
|
||||
model: llama-guard-3-8b
|
||||
label:
|
||||
zh_Hans: Llama-Guard-3-8B
|
||||
en_US: Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.20'
|
||||
output: '0.20'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -0,0 +1,38 @@
|
||||
model: llama3-groq-70b-8192-tool-use-preview
|
||||
label:
|
||||
zh_Hans: Llama3-groq-70b-8192-tool-use (PREVIEW)
|
||||
en_US: Llama3-groq-70b-8192-tool-use (PREVIEW)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- multi-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
label:
|
||||
zh_Hans: 回复格式
|
||||
en_US: Response Format
|
||||
type: string
|
||||
help:
|
||||
zh_Hans: 指定模型必须输出的格式
|
||||
en_US: specifying the format that the model must output
|
||||
required: false
|
||||
options:
|
||||
- text
|
||||
- json_object
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.08'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@@ -3,17 +3,16 @@ package groq
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"github.com/eolinker/apinto/convert"
|
||||
http_context "github.com/eolinker/apinto/node/http-context"
|
||||
"github.com/joho/godotenv"
|
||||
"github.com/valyala/fasthttp"
|
||||
"net"
|
||||
"net/url"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
ai_provider "github.com/eolinker/apinto/drivers/ai-provider"
|
||||
"github.com/eolinker/apinto/convert"
|
||||
http_context "github.com/eolinker/apinto/node/http-context"
|
||||
"github.com/joho/godotenv"
|
||||
"github.com/valyala/fasthttp"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -54,24 +53,24 @@ func TestSentTo(t *testing.T) {
|
||||
{
|
||||
name: "success",
|
||||
apiKey: os.Getenv("GROQ_VALID_API_KEY"),
|
||||
wantStatus: ai_provider.StatusNormal,
|
||||
wantStatus: convert.StatusNormal,
|
||||
body: successBody,
|
||||
},
|
||||
{
|
||||
name: "invalid request",
|
||||
apiKey: os.Getenv("GROQ_VALID_API_KEY"),
|
||||
wantStatus: ai_provider.StatusInvalidRequest,
|
||||
wantStatus: convert.StatusInvalidRequest,
|
||||
body: failBody,
|
||||
},
|
||||
{
|
||||
name: "invalid key",
|
||||
apiKey: os.Getenv("GROQ_INVALID_API_KEY"),
|
||||
wantStatus: ai_provider.StatusInvalid,
|
||||
wantStatus: convert.StatusInvalid,
|
||||
},
|
||||
{
|
||||
name: "expired key",
|
||||
apiKey: os.Getenv("GROQ_EXPIRE_API_KEY"),
|
||||
wantStatus: ai_provider.StatusInvalid,
|
||||
wantStatus: convert.StatusInvalid,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -112,14 +111,14 @@ func runTest(apiKey string, requestBody []byte, wantStatus string) error {
|
||||
ctx := createMockHttpContext("/openai/v1/chat/completions", nil, nil, requestBody)
|
||||
|
||||
// Execute the conversion process
|
||||
err = executeConverter(ctx, handler, "llama3-8b-8192", baseDomain)
|
||||
err = executeConverter(ctx, handler, "llama3-70b-8192", baseDomain)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to execute conversion process: %w", err)
|
||||
}
|
||||
|
||||
// Check the status
|
||||
if ai_provider.GetAIStatus(ctx) != wantStatus {
|
||||
return fmt.Errorf("unexpected status: got %s, expected %s", ai_provider.GetAIStatus(ctx), wantStatus)
|
||||
if convert.GetAIStatus(ctx) != wantStatus {
|
||||
return fmt.Errorf("unexpected status: got %s, expected %s", convert.GetAIStatus(ctx), wantStatus)
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -128,7 +127,7 @@ func runTest(apiKey string, requestBody []byte, wantStatus string) error {
|
||||
// executeConverter handles the full flow of a conversion process.
|
||||
func executeConverter(ctx *http_context.HttpContext, handler convert.IConverterDriver, model string, baseUrl string) error {
|
||||
// Balance handler setup
|
||||
balanceHandler, err := ai_provider.NewBalanceHandler("test", baseUrl, 30*time.Second)
|
||||
balanceHandler, err := convert.NewBalanceHandler("test", baseUrl, 30*time.Second)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create balance handler: %w", err)
|
||||
}
|
||||
@@ -180,7 +179,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req := fasthttp.AcquireRequest()
|
||||
u := fasthttp.AcquireURI()
|
||||
|
||||
// Set request URI and path
|
||||
// SetProvider request URI and path
|
||||
uri, _ := url.Parse(rawURL)
|
||||
u.SetPath(uri.Path)
|
||||
u.SetScheme(uri.Scheme)
|
||||
@@ -189,7 +188,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req.SetURI(u)
|
||||
req.Header.SetMethod("POST")
|
||||
|
||||
// Set headers
|
||||
// SetProvider headers
|
||||
for k, v := range headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
@@ -213,7 +213,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req := fasthttp.AcquireRequest()
|
||||
u := fasthttp.AcquireURI()
|
||||
|
||||
// Set request URI and path
|
||||
// SetProvider request URI and path
|
||||
uri, _ := url.Parse(rawURL)
|
||||
u.SetPath(uri.Path)
|
||||
u.SetScheme(uri.Scheme)
|
||||
@@ -222,7 +222,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req.SetURI(u)
|
||||
req.Header.SetMethod("POST")
|
||||
|
||||
// Set headers
|
||||
// SetProvider headers
|
||||
for k, v := range headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
38
drivers/ai-provider/hunyuan/llm/hunyuan-functioncall.yaml
Normal file
38
drivers/ai-provider/hunyuan/llm/hunyuan-functioncall.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
model: hunyuan-functioncall
|
||||
label:
|
||||
zh_Hans: hunyuan-functioncall
|
||||
en_US: hunyuan-functioncall
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.004'
|
||||
output: '0.008'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
@@ -0,0 +1,38 @@
|
||||
model: hunyuan-large-longcontext
|
||||
label:
|
||||
zh_Hans: hunyuan-large-longcontext
|
||||
en_US: hunyuan-large-longcontext
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 134000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 134000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.006'
|
||||
output: '0.018'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
38
drivers/ai-provider/hunyuan/llm/hunyuan-large-role.yaml
Normal file
38
drivers/ai-provider/hunyuan/llm/hunyuan-large-role.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
model: hunyuan-large-role
|
||||
label:
|
||||
zh_Hans: hunyuan-large-role
|
||||
en_US: hunyuan-large-role
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.004'
|
||||
output: '0.008'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
38
drivers/ai-provider/hunyuan/llm/hunyuan-large.yaml
Normal file
38
drivers/ai-provider/hunyuan/llm/hunyuan-large.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
model: hunyuan-large
|
||||
label:
|
||||
zh_Hans: hunyuan-large
|
||||
en_US: hunyuan-large
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.004'
|
||||
output: '0.012'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
38
drivers/ai-provider/hunyuan/llm/hunyuan-role.yaml
Normal file
38
drivers/ai-provider/hunyuan/llm/hunyuan-role.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
model: hunyuan-role
|
||||
label:
|
||||
zh_Hans: hunyuan-role
|
||||
en_US: hunyuan-role
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.004'
|
||||
output: '0.008'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
38
drivers/ai-provider/hunyuan/llm/hunyuan-turbo-latest.yaml
Normal file
38
drivers/ai-provider/hunyuan/llm/hunyuan-turbo-latest.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
model: hunyuan-turbo-latest
|
||||
label:
|
||||
zh_Hans: hunyuan-turbo-latest
|
||||
en_US: hunyuan-turbo-latest
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 32000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.015'
|
||||
output: '0.05'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
39
drivers/ai-provider/hunyuan/llm/hunyuan-turbo-vision.yaml
Normal file
39
drivers/ai-provider/hunyuan/llm/hunyuan-turbo-vision.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
model: hunyuan-turbo-vision
|
||||
label:
|
||||
zh_Hans: hunyuan-turbo-vision
|
||||
en_US: hunyuan-turbo-vision
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- multi-tool-call
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 8000
|
||||
- name: enable_enhance
|
||||
label:
|
||||
zh_Hans: 功能增强
|
||||
en_US: Enable Enhancement
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。
|
||||
en_US: Allow the model to perform external search to enhance the generation results.
|
||||
required: false
|
||||
default: true
|
||||
pricing:
|
||||
input: '0.08'
|
||||
output: '0.08'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
44
drivers/ai-provider/minimax/llm/abab6.5t-chat.yaml
Normal file
44
drivers/ai-provider/minimax/llm/abab6.5t-chat.yaml
Normal file
@@ -0,0 +1,44 @@
|
||||
model: abab6.5t-chat
|
||||
label:
|
||||
en_US: Abab6.5t-Chat
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0.01
|
||||
max: 1
|
||||
default: 0.9
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0.01
|
||||
max: 1
|
||||
default: 0.95
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 3072
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: mask_sensitive_info
|
||||
type: boolean
|
||||
default: true
|
||||
label:
|
||||
zh_Hans: 隐私保护
|
||||
en_US: Moderate
|
||||
help:
|
||||
zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
|
||||
en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0.005'
|
||||
output: '0.005'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
46
drivers/ai-provider/minimax/llm/abab7-chat-preview.yaml
Normal file
46
drivers/ai-provider/minimax/llm/abab7-chat-preview.yaml
Normal file
@@ -0,0 +1,46 @@
|
||||
model: abab7-chat-preview
|
||||
label:
|
||||
en_US: Abab7-chat-preview
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 245760
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0.01
|
||||
max: 1
|
||||
default: 0.1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0.01
|
||||
max: 1
|
||||
default: 0.95
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 245760
|
||||
- name: mask_sensitive_info
|
||||
type: boolean
|
||||
default: true
|
||||
label:
|
||||
zh_Hans: 隐私保护
|
||||
en_US: Moderate
|
||||
help:
|
||||
zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
|
||||
en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0.1'
|
||||
output: '0.1'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
46
drivers/ai-provider/minimax/llm/minimax-text-01.yaml
Normal file
46
drivers/ai-provider/minimax/llm/minimax-text-01.yaml
Normal file
@@ -0,0 +1,46 @@
|
||||
model: minimax-text-01
|
||||
label:
|
||||
en_US: Minimax-Text-01
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1000192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0.01
|
||||
max: 1
|
||||
default: 0.1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0.01
|
||||
max: 1
|
||||
default: 0.95
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 1000192
|
||||
- name: mask_sensitive_info
|
||||
type: boolean
|
||||
default: true
|
||||
label:
|
||||
zh_Hans: 隐私保护
|
||||
en_US: Moderate
|
||||
help:
|
||||
zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码,目前包括但不限于邮箱、域名、链接、证件号、家庭住址等,默认true,即开启打码
|
||||
en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
pricing:
|
||||
input: '0.001'
|
||||
output: '0.008'
|
||||
unit: '0.001'
|
||||
currency: RMB
|
@@ -178,7 +178,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req := fasthttp.AcquireRequest()
|
||||
u := fasthttp.AcquireURI()
|
||||
|
||||
// Set request URI and path
|
||||
// SetProvider request URI and path
|
||||
uri, _ := url.Parse(rawURL)
|
||||
u.SetPath(uri.Path)
|
||||
u.SetScheme(uri.Scheme)
|
||||
@@ -187,7 +187,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req.SetURI(u)
|
||||
req.Header.SetMethod("POST")
|
||||
|
||||
// Set headers
|
||||
// SetProvider headers
|
||||
for k, v := range headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
52
drivers/ai-provider/mistralai/llm/pixtral-large-2411.yaml
Normal file
52
drivers/ai-provider/mistralai/llm/pixtral-large-2411.yaml
Normal file
@@ -0,0 +1,52 @@
|
||||
model: pixtral-large-2411
|
||||
label:
|
||||
zh_Hans: pixtral-large-2411
|
||||
en_US: pixtral-large-2411
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
52
drivers/ai-provider/mistralai/llm/pixtral-large-latest.yaml
Normal file
52
drivers/ai-provider/mistralai/llm/pixtral-large-latest.yaml
Normal file
@@ -0,0 +1,52 @@
|
||||
model: pixtral-large-latest
|
||||
label:
|
||||
zh_Hans: pixtral-large-latest
|
||||
en_US: pixtral-large-latest
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
@@ -209,7 +209,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req := fasthttp.AcquireRequest()
|
||||
u := fasthttp.AcquireURI()
|
||||
|
||||
// Set request URI and path
|
||||
// SetProvider request URI and path
|
||||
uri, _ := url.Parse(rawURL)
|
||||
u.SetPath(uri.Path)
|
||||
u.SetScheme(uri.Scheme)
|
||||
@@ -218,7 +218,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req.SetURI(u)
|
||||
req.Header.SetMethod("POST")
|
||||
|
||||
// Set headers
|
||||
// SetProvider headers
|
||||
for k, v := range headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
@@ -207,7 +207,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req := fasthttp.AcquireRequest()
|
||||
u := fasthttp.AcquireURI()
|
||||
|
||||
// Set request URI and path
|
||||
// SetProvider request URI and path
|
||||
uri, _ := url.Parse(rawURL)
|
||||
u.SetPath(uri.Path)
|
||||
u.SetScheme(uri.Scheme)
|
||||
@@ -216,7 +216,7 @@ func createMockHttpContext(rawURL string, headers map[string]string, query url.V
|
||||
req.SetURI(u)
|
||||
req.Header.SetMethod("POST")
|
||||
|
||||
// Set headers
|
||||
// SetProvider headers
|
||||
for k, v := range headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
41
drivers/ai-provider/novita/llm/L3-8B-Stheno-v3.2.yaml
Normal file
41
drivers/ai-provider/novita/llm/L3-8B-Stheno-v3.2.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: Sao10K/L3-8B-Stheno-v3.2
|
||||
label:
|
||||
zh_Hans: L3 8B Stheno V3.2
|
||||
en_US: L3 8B Stheno V3.2
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0005'
|
||||
output: '0.0005'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
41
drivers/ai-provider/novita/llm/deepseek-r1.yaml
Normal file
41
drivers/ai-provider/novita/llm/deepseek-r1.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: deepseek/deepseek-r1
|
||||
label:
|
||||
zh_Hans: DeepSeek R1
|
||||
en_US: DeepSeek R1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 64000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.04'
|
||||
output: '0.04'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
41
drivers/ai-provider/novita/llm/deepseek_v3.yaml
Normal file
41
drivers/ai-provider/novita/llm/deepseek_v3.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: deepseek/deepseek_v3
|
||||
label:
|
||||
zh_Hans: DeepSeek V3
|
||||
en_US: DeepSeek V3
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 64000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0089'
|
||||
output: '0.0089'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
41
drivers/ai-provider/novita/llm/l3-8b-lunaris.yaml
Normal file
41
drivers/ai-provider/novita/llm/l3-8b-lunaris.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: sao10k/l3-8b-lunaris
|
||||
label:
|
||||
zh_Hans: "Sao10k L3 8B Lunaris"
|
||||
en_US: "Sao10k L3 8B Lunaris"
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0005'
|
||||
output: '0.0005'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
41
drivers/ai-provider/novita/llm/l31-70b-euryale-v2.2.yaml
Normal file
41
drivers/ai-provider/novita/llm/l31-70b-euryale-v2.2.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: sao10k/l31-70b-euryale-v2.2
|
||||
label:
|
||||
zh_Hans: L31 70B Euryale V2.2
|
||||
en_US: L31 70B Euryale V2.2
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 16000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0148'
|
||||
output: '0.0148'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
@@ -0,0 +1,41 @@
|
||||
model: meta-llama/llama-3.1-8b-instruct-bf16
|
||||
label:
|
||||
zh_Hans: Llama 3.1 8B Instruct BF16
|
||||
en_US: Llama 3.1 8B Instruct BF16
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0006'
|
||||
output: '0.0006'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
@@ -0,0 +1,41 @@
|
||||
model: meta-llama/llama-3.1-8b-instruct-max
|
||||
label:
|
||||
zh_Hans: "Llama3.1 8B Instruct Max\t"
|
||||
en_US: "Llama3.1 8B Instruct Max\t"
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 16384
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0005'
|
||||
output: '0.0005'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
@@ -0,0 +1,41 @@
|
||||
model: meta-llama/llama-3.2-11b-vision-instruct
|
||||
label:
|
||||
zh_Hans: "Llama 3.2 11B Vision Instruct\t"
|
||||
en_US: "Llama 3.2 11B Vision Instruct\t"
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0006'
|
||||
output: '0.0006'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
41
drivers/ai-provider/novita/llm/llama-3.2-1b-instruct.yaml
Normal file
41
drivers/ai-provider/novita/llm/llama-3.2-1b-instruct.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: meta-llama/llama-3.2-1b-instruct
|
||||
label:
|
||||
zh_Hans: "Llama 3.2 1B Instruct\t"
|
||||
en_US: "Llama 3.2 1B Instruct\t"
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0002'
|
||||
output: '0.0002'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
41
drivers/ai-provider/novita/llm/llama-3.2-3b-instruct.yaml
Normal file
41
drivers/ai-provider/novita/llm/llama-3.2-3b-instruct.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: meta-llama/llama-3.2-3b-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 3B Instruct
|
||||
en_US: Llama 3.2 3B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32768
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0003'
|
||||
output: '0.0005'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
41
drivers/ai-provider/novita/llm/llama-3.3-70b-instruct.yaml
Normal file
41
drivers/ai-provider/novita/llm/llama-3.3-70b-instruct.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: meta-llama/llama-3.3-70b-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.3 70B Instruct
|
||||
en_US: Llama 3.3 70B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0039'
|
||||
output: '0.0039'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
@@ -1,69 +0,0 @@
|
||||
from collections.abc import Generator
|
||||
from typing import Optional, Union
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMResult
|
||||
from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity
|
||||
from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
|
||||
|
||||
|
||||
class NovitaLargeLanguageModel(OAIAPICompatLargeLanguageModel):
|
||||
def _update_endpoint_url(self, credentials: dict):
|
||||
credentials["endpoint_url"] = "https://api.novita.ai/v3/openai"
|
||||
credentials["extra_headers"] = {"X-Novita-Source": "dify.ai"}
|
||||
return credentials
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
model_parameters: dict,
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
stop: Optional[list[str]] = None,
|
||||
stream: bool = True,
|
||||
user: Optional[str] = None,
|
||||
) -> Union[LLMResult, Generator]:
|
||||
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
|
||||
return super()._invoke(model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
|
||||
self._add_custom_parameters(credentials, model)
|
||||
return super().validate_credentials(model, cred_with_endpoint)
|
||||
|
||||
@classmethod
|
||||
def _add_custom_parameters(cls, credentials: dict, model: str) -> None:
|
||||
credentials["mode"] = "chat"
|
||||
|
||||
def _generate(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
model_parameters: dict,
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
stop: Optional[list[str]] = None,
|
||||
stream: bool = True,
|
||||
user: Optional[str] = None,
|
||||
) -> Union[LLMResult, Generator]:
|
||||
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
|
||||
return super()._generate(
|
||||
model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user
|
||||
)
|
||||
|
||||
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
|
||||
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
|
||||
|
||||
return super().get_customizable_model_schema(model, cred_with_endpoint)
|
||||
|
||||
def get_num_tokens(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
prompt_messages: list[PromptMessage],
|
||||
tools: Optional[list[PromptMessageTool]] = None,
|
||||
) -> int:
|
||||
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
|
||||
|
||||
return super().get_num_tokens(model, cred_with_endpoint, prompt_messages, tools)
|
41
drivers/ai-provider/novita/llm/mistral-nemo.yaml
Normal file
41
drivers/ai-provider/novita/llm/mistral-nemo.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: mistralai/mistral-nemo
|
||||
label:
|
||||
zh_Hans: Mistral Nemo
|
||||
en_US: Mistral Nemo
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0017'
|
||||
output: '0.0017'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
41
drivers/ai-provider/novita/llm/openchat-7b.yaml
Normal file
41
drivers/ai-provider/novita/llm/openchat-7b.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
model: openchat/openchat-7b
|
||||
label:
|
||||
zh_Hans: OpenChat 7B
|
||||
en_US: OpenChat 7B
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 4096
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
min: 0
|
||||
max: 2
|
||||
default: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
min: 0
|
||||
max: 1
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
min: 1
|
||||
max: 2048
|
||||
default: 512
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
min: -2
|
||||
max: 2
|
||||
default: 0
|
||||
pricing:
|
||||
input: '0.0006'
|
||||
output: '0.0006'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user