mirror of
https://github.com/eolinker/apinto
synced 2025-12-24 13:28:15 +08:00
通用sdk对接多个供应商
This commit is contained in:
@@ -36,12 +36,12 @@ func TransformData(inputJSON string, mappingRule MappingRule) (map[string]interf
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("类型转换失败 %s -> %s: %v", key, v.Value, err)
|
||||
}
|
||||
if value == "response_format" {
|
||||
resultMap[v.Value] = map[string]interface{}{
|
||||
"type": convertedValue,
|
||||
}
|
||||
continue
|
||||
}
|
||||
//if value == "response_format" {
|
||||
// resultMap[v.Value] = map[string]interface{}{
|
||||
// "type": convertedValue,
|
||||
// }
|
||||
// continue
|
||||
//}
|
||||
resultMap[v.Value] = convertedValue
|
||||
} else {
|
||||
resultMap[key] = value
|
||||
|
||||
@@ -23,16 +23,20 @@ const (
|
||||
OpenAIChatCompletePath = "/chat/completions"
|
||||
)
|
||||
|
||||
type CheckError func(ctx http_service.IHttpContext, body []byte) bool
|
||||
|
||||
type OpenAIConvert struct {
|
||||
apikey string
|
||||
path string
|
||||
checkErr CheckError
|
||||
errorCallback func(ctx http_service.IHttpContext, body []byte)
|
||||
balanceHandler eoscContext.BalanceHandler
|
||||
}
|
||||
|
||||
func NewOpenAIConvert(apikey string, baseUrl string, timeout time.Duration, errorCallback func(ctx http_service.IHttpContext, body []byte)) (*OpenAIConvert, error) {
|
||||
func NewOpenAIConvert(apikey string, baseUrl string, timeout time.Duration, checkErr CheckError, errorCallback func(ctx http_service.IHttpContext, body []byte)) (*OpenAIConvert, error) {
|
||||
c := &OpenAIConvert{
|
||||
apikey: apikey,
|
||||
checkErr: checkErr,
|
||||
errorCallback: errorCallback,
|
||||
}
|
||||
if baseUrl != "" {
|
||||
@@ -103,9 +107,7 @@ func (o *OpenAIConvert) ResponseConvert(ctx eoscContext.EoContext) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if httpContext.Response().StatusCode() != 200 {
|
||||
// 当状态码非200时,跳过解析Token次数的操作
|
||||
if (o.checkErr != nil && !o.checkErr(httpContext, body)) || httpContext.Response().StatusCode() != 200 {
|
||||
if o.errorCallback != nil {
|
||||
o.errorCallback(httpContext, body)
|
||||
}
|
||||
@@ -143,6 +145,12 @@ func (o *OpenAIConvert) streamHandler(ctx http_service.IHttpContext, p []byte) (
|
||||
log.Errorf("convert to utf-8 error: %v, line: %s", err, line)
|
||||
return p, nil
|
||||
}
|
||||
if ctx.Response().StatusCode() != 200 || (o.checkErr != nil && !o.checkErr(ctx, tmp)) {
|
||||
if o.errorCallback != nil {
|
||||
o.errorCallback(ctx, tmp)
|
||||
}
|
||||
return p, nil
|
||||
}
|
||||
line = string(tmp)
|
||||
}
|
||||
line = strings.TrimPrefix(line, "data:")
|
||||
@@ -158,17 +166,12 @@ func (o *OpenAIConvert) streamHandler(ctx http_service.IHttpContext, p []byte) (
|
||||
outputToken += getTokens(resp.Choices[0].Message.Content)
|
||||
totalToken += outputToken
|
||||
}
|
||||
|
||||
if resp.Usage.PromptTokens != 0 {
|
||||
inputToken = resp.Usage.PromptTokens
|
||||
}
|
||||
if resp.Usage.CompletionTokens != 0 {
|
||||
outputToken = resp.Usage.CompletionTokens
|
||||
}
|
||||
if resp.Usage.TotalTokens != 0 {
|
||||
totalToken = resp.Usage.TotalTokens
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Errorf("scan error: %v", err)
|
||||
return p, nil
|
||||
}
|
||||
|
||||
SetAIModelInputToken(ctx, inputToken)
|
||||
SetAIModelOutputToken(ctx, outputToken)
|
||||
SetAIModelTotalToken(ctx, totalToken)
|
||||
|
||||
@@ -35,7 +35,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.Base, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.Base, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func checkConfig(conf *Config) error {
|
||||
|
||||
@@ -59,7 +59,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -58,7 +58,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
provider: bedrock
|
||||
label:
|
||||
en_US: AWS
|
||||
description:
|
||||
en_US: AWS Bedrock's models.
|
||||
icon_small:
|
||||
en_US: icon_s_en.svg
|
||||
icon_large:
|
||||
en_US: icon_l_en.svg
|
||||
background: "#FCFDFF"
|
||||
help:
|
||||
title:
|
||||
en_US: Get your Access Key and Secret Access Key from AWS Console
|
||||
url:
|
||||
en_US: https://console.aws.amazon.com/
|
||||
supported_model_types:
|
||||
- llm
|
||||
- text-embedding
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
provider_credential_schema:
|
||||
credential_form_schemas:
|
||||
- variable: aws_access_key_id
|
||||
required: false
|
||||
label:
|
||||
en_US: Access Key (If not provided, credentials are obtained from the running environment.)
|
||||
zh_Hans: Access Key
|
||||
type: secret-input
|
||||
placeholder:
|
||||
en_US: Enter your Access Key
|
||||
zh_Hans: 在此输入您的 Access Key
|
||||
- variable: aws_secret_access_key
|
||||
required: false
|
||||
label:
|
||||
en_US: Secret Access Key
|
||||
zh_Hans: Secret Access Key
|
||||
type: secret-input
|
||||
placeholder:
|
||||
en_US: Enter your Secret Access Key
|
||||
zh_Hans: 在此输入您的 Secret Access Key
|
||||
- variable: aws_region
|
||||
required: true
|
||||
label:
|
||||
en_US: AWS Region
|
||||
zh_Hans: AWS 地区
|
||||
type: select
|
||||
default: us-east-1
|
||||
options:
|
||||
- value: us-east-1
|
||||
label:
|
||||
en_US: US East (N. Virginia)
|
||||
zh_Hans: 美国东部 (弗吉尼亚北部)
|
||||
- value: us-west-2
|
||||
label:
|
||||
en_US: US West (Oregon)
|
||||
zh_Hans: 美国西部 (俄勒冈州)
|
||||
- value: ap-southeast-1
|
||||
label:
|
||||
en_US: Asia Pacific (Singapore)
|
||||
zh_Hans: 亚太地区 (新加坡)
|
||||
- value: ap-northeast-1
|
||||
label:
|
||||
en_US: Asia Pacific (Tokyo)
|
||||
zh_Hans: 亚太地区 (东京)
|
||||
- value: eu-central-1
|
||||
label:
|
||||
en_US: Europe (Frankfurt)
|
||||
zh_Hans: 欧洲 (法兰克福)
|
||||
- value: eu-west-2
|
||||
label:
|
||||
en_US: Eu west London (London)
|
||||
zh_Hans: 欧洲西部 (伦敦)
|
||||
- value: us-gov-west-1
|
||||
label:
|
||||
en_US: AWS GovCloud (US-West)
|
||||
zh_Hans: AWS GovCloud (US-West)
|
||||
- value: ap-southeast-2
|
||||
label:
|
||||
en_US: Asia Pacific (Sydney)
|
||||
zh_Hans: 亚太地区 (悉尼)
|
||||
- variable: model_for_validation
|
||||
required: false
|
||||
label:
|
||||
en_US: Available Model Name
|
||||
zh_Hans: 可用模型名称
|
||||
type: text-input
|
||||
placeholder:
|
||||
en_US: A model you have access to (e.g. amazon.titan-text-lite-v1) for validation.
|
||||
zh_Hans: 为了进行验证,请输入一个您可用的模型名称 (例如:amazon.titan-text-lite-v1)
|
||||
address: https://api.openai.com
|
||||
@@ -37,11 +37,11 @@ func checkConfig(v interface{}) (*Config, error) {
|
||||
if conf.SecretKey == "" {
|
||||
return nil, fmt.Errorf("aws_secret_access_key is required")
|
||||
}
|
||||
if conf.Region == "" {
|
||||
return nil, fmt.Errorf("aws_region is required")
|
||||
}
|
||||
if _, ok := availableRegions[conf.Region]; !ok {
|
||||
return nil, fmt.Errorf("aws_region is invalid")
|
||||
}
|
||||
//if conf.Region == "" {
|
||||
// return nil, fmt.Errorf("aws_region is required")
|
||||
//}
|
||||
//if _, ok := availableRegions[conf.Region]; !ok {
|
||||
// return nil, fmt.Errorf("aws_region is invalid")
|
||||
//}
|
||||
return conf, nil
|
||||
}
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
model: ai21.j2-mid-v1
|
||||
label:
|
||||
en_US: J2 Mid V1
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 8191
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: topP
|
||||
use_template: top_p
|
||||
- name: maxTokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 2048
|
||||
- name: count_penalty
|
||||
label:
|
||||
en_US: Count Penalty
|
||||
required: false
|
||||
type: float
|
||||
default: 0
|
||||
min: 0
|
||||
max: 1
|
||||
- name: presence_penalty
|
||||
label:
|
||||
en_US: Presence Penalty
|
||||
required: false
|
||||
type: float
|
||||
default: 0
|
||||
min: 0
|
||||
max: 5
|
||||
- name: frequency_penalty
|
||||
label:
|
||||
en_US: Frequency Penalty
|
||||
required: false
|
||||
type: float
|
||||
default: 0
|
||||
min: 0
|
||||
max: 500
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -1,47 +0,0 @@
|
||||
model: ai21.j2-ultra-v1
|
||||
label:
|
||||
en_US: J2 Ultra V1
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 8191
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: topP
|
||||
use_template: top_p
|
||||
- name: maxTokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 2048
|
||||
- name: count_penalty
|
||||
label:
|
||||
en_US: Count Penalty
|
||||
required: false
|
||||
type: float
|
||||
default: 0
|
||||
min: 0
|
||||
max: 1
|
||||
- name: presence_penalty
|
||||
label:
|
||||
en_US: Presence Penalty
|
||||
required: false
|
||||
type: float
|
||||
default: 0
|
||||
min: 0
|
||||
max: 5
|
||||
- name: frequency_penalty
|
||||
label:
|
||||
en_US: Frequency Penalty
|
||||
required: false
|
||||
type: float
|
||||
default: 0
|
||||
min: 0
|
||||
max: 500
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -1,26 +0,0 @@
|
||||
model: ai21.jamba-1-5-large-v1:0
|
||||
label:
|
||||
en_US: Jamba 1.5 Large
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 256000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 2.0
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '0.002'
|
||||
output: '0.008'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,26 +0,0 @@
|
||||
model: ai21.jamba-1-5-mini-v1:0
|
||||
label:
|
||||
en_US: Jamba 1.5 Mini
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 256000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 2.0
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '0.0002'
|
||||
output: '0.0004'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,53 +0,0 @@
|
||||
model: amazon.nova-lite-v1:0
|
||||
label:
|
||||
en_US: Nova Lite V1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 300000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.00006'
|
||||
output: '0.00024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,52 +0,0 @@
|
||||
model: amazon.nova-micro-v1:0
|
||||
label:
|
||||
en_US: Nova Micro V1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.000035'
|
||||
output: '0.00014'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,53 +0,0 @@
|
||||
model: amazon.nova-pro-v1:0
|
||||
label:
|
||||
en_US: Nova Pro V1
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 300000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.0008'
|
||||
output: '0.0032'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,23 +0,0 @@
|
||||
model: amazon.titan-text-express-v1
|
||||
label:
|
||||
en_US: Titan Text G1 - Express
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: topP
|
||||
use_template: top_p
|
||||
- name: maxTokenCount
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 8000
|
||||
pricing:
|
||||
input: '0.0008'
|
||||
output: '0.0016'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,23 +0,0 @@
|
||||
model: amazon.titan-text-lite-v1
|
||||
label:
|
||||
en_US: Titan Text G1 - Lite
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 4096
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: topP
|
||||
use_template: top_p
|
||||
- name: maxTokenCount
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.0003'
|
||||
output: '0.0004'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,60 +0,0 @@
|
||||
model: anthropic.claude-3-5-haiku-20241022-v1:0
|
||||
label:
|
||||
en_US: Claude 3.5 Haiku
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
# docs: https://docs.anthropic.com/claude/docs/system-prompts
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.001'
|
||||
output: '0.005'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,61 +0,0 @@
|
||||
model: anthropic.claude-3-haiku-20240307-v1:0
|
||||
label:
|
||||
en_US: Claude 3 Haiku
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
# docs: https://docs.anthropic.com/claude/docs/system-prompts
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.00025'
|
||||
output: '0.00125'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,61 +0,0 @@
|
||||
model: anthropic.claude-3-opus-20240229-v1:0
|
||||
label:
|
||||
en_US: Claude 3 Opus
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
# docs: https://docs.anthropic.com/claude/docs/system-prompts
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.015'
|
||||
output: '0.075'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,60 +0,0 @@
|
||||
model: anthropic.claude-3-5-sonnet-20240620-v1:0
|
||||
label:
|
||||
en_US: Claude 3.5 Sonnet
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,60 +0,0 @@
|
||||
model: anthropic.claude-3-sonnet-20240229-v1:0
|
||||
label:
|
||||
en_US: Claude 3 Sonnet
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,60 +0,0 @@
|
||||
model: anthropic.claude-3-5-sonnet-20241022-v2:0
|
||||
label:
|
||||
en_US: Claude 3.5 Sonnet V2
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,52 +0,0 @@
|
||||
model: anthropic.claude-instant-v1
|
||||
label:
|
||||
en_US: Claude Instant 1
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 100000
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.0008'
|
||||
output: '0.0024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,53 +0,0 @@
|
||||
model: anthropic.claude-v1
|
||||
label:
|
||||
en_US: Claude 1
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 100000
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
deprecated: true
|
||||
@@ -1,54 +0,0 @@
|
||||
model: anthropic.claude-v2:1
|
||||
label:
|
||||
en_US: Claude 2.1
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,54 +0,0 @@
|
||||
model: anthropic.claude-v2
|
||||
label:
|
||||
en_US: Claude 2
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 100000
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,35 +0,0 @@
|
||||
model: cohere.command-light-text-v14
|
||||
label:
|
||||
en_US: Command Light Text V14
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 4096
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: p
|
||||
use_template: top_p
|
||||
- name: k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
min: 0
|
||||
max: 500
|
||||
default: 0
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '0.0003'
|
||||
output: '0.0006'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,44 +0,0 @@
|
||||
model: cohere.command-r-plus-v1:0
|
||||
label:
|
||||
en_US: Command R+
|
||||
model_type: llm
|
||||
features:
|
||||
- tool-call
|
||||
#- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
max: 5.0
|
||||
- name: p
|
||||
use_template: top_p
|
||||
default: 0.75
|
||||
min: 0.01
|
||||
max: 0.99
|
||||
- name: k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
default: 0
|
||||
min: 0
|
||||
max: 500
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '3'
|
||||
output: '15'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -1,43 +0,0 @@
|
||||
model: cohere.command-r-v1:0
|
||||
label:
|
||||
en_US: Command R
|
||||
model_type: llm
|
||||
features:
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
max: 5.0
|
||||
- name: p
|
||||
use_template: top_p
|
||||
default: 0.75
|
||||
min: 0.01
|
||||
max: 0.99
|
||||
- name: k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
default: 0
|
||||
min: 0
|
||||
max: 500
|
||||
- name: presence_penalty
|
||||
use_template: presence_penalty
|
||||
- name: frequency_penalty
|
||||
use_template: frequency_penalty
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '0.5'
|
||||
output: '1.5'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
||||
@@ -1,32 +0,0 @@
|
||||
model: cohere.command-text-v14
|
||||
label:
|
||||
en_US: Command Text V14
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 4096
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: p
|
||||
use_template: top_p
|
||||
- name: k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '0.0015'
|
||||
output: '0.0020'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,59 +0,0 @@
|
||||
model: eu.anthropic.claude-3-haiku-20240307-v1:0
|
||||
label:
|
||||
en_US: Claude 3 Haiku(EU.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
# docs: https://docs.anthropic.com/claude/docs/system-prompts
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.00025'
|
||||
output: '0.00125'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,58 +0,0 @@
|
||||
model: eu.anthropic.claude-3-5-sonnet-20240620-v1:0
|
||||
label:
|
||||
en_US: Claude 3.5 Sonnet(EU.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,58 +0,0 @@
|
||||
model: eu.anthropic.claude-3-sonnet-20240229-v1:0
|
||||
label:
|
||||
en_US: Claude 3 Sonnet(EU.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,60 +0,0 @@
|
||||
model: eu.anthropic.claude-3-5-sonnet-20241022-v2:0
|
||||
label:
|
||||
en_US: Claude 3.5 Sonnet V2(EU.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,23 +0,0 @@
|
||||
model: meta.llama2-13b-chat-v1
|
||||
label:
|
||||
en_US: Llama 2 Chat 13B
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 4096
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.00075'
|
||||
output: '0.00100'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,23 +0,0 @@
|
||||
model: meta.llama2-70b-chat-v1
|
||||
label:
|
||||
en_US: Llama 2 Chat 70B
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 4096
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.00195'
|
||||
output: '0.00256'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,25 +0,0 @@
|
||||
model: meta.llama3-1-405b-instruct-v1:0
|
||||
label:
|
||||
en_US: Llama 3.1 405B Instruct
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 0.9
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.00532'
|
||||
output: '0.016'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,25 +0,0 @@
|
||||
model: meta.llama3-1-70b-instruct-v1:0
|
||||
label:
|
||||
en_US: Llama 3.1 Instruct 70B
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 0.9
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.00265'
|
||||
output: '0.0035'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,25 +0,0 @@
|
||||
model: meta.llama3-1-8b-instruct-v1:0
|
||||
label:
|
||||
en_US: Llama 3.1 Instruct 8B
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 0.9
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.0003'
|
||||
output: '0.0006'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,23 +0,0 @@
|
||||
model: meta.llama3-70b-instruct-v1:0
|
||||
label:
|
||||
en_US: Llama 3 Instruct 70B
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.00265'
|
||||
output: '0.0035'
|
||||
unit: '0.00001'
|
||||
currency: USD
|
||||
@@ -1,23 +0,0 @@
|
||||
model: meta.llama3-8b-instruct-v1:0
|
||||
label:
|
||||
en_US: Llama 3 Instruct 8B
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.0004'
|
||||
output: '0.0006'
|
||||
unit: '0.0001'
|
||||
currency: USD
|
||||
@@ -1,39 +0,0 @@
|
||||
model: mistral.mistral-7b-instruct-v0:2
|
||||
label:
|
||||
en_US: Mistral 7B Instruct
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
default: 0.5
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
required: false
|
||||
default: 0.9
|
||||
- name: top_k
|
||||
use_template: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
default: 50
|
||||
max: 200
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.00015'
|
||||
output: '0.0002'
|
||||
unit: '0.00001'
|
||||
currency: USD
|
||||
@@ -1,30 +0,0 @@
|
||||
model: mistral.mistral-large-2402-v1:0
|
||||
label:
|
||||
en_US: Mistral Large
|
||||
model_type: llm
|
||||
features:
|
||||
- tool-call
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
default: 0.7
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
required: false
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,29 +0,0 @@
|
||||
model: mistral.mistral-large-2407-v1:0
|
||||
label:
|
||||
en_US: Mistral Large 2 (24.07)
|
||||
model_type: llm
|
||||
features:
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
default: 0.7
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
required: false
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.009'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,29 +0,0 @@
|
||||
model: mistral.mistral-small-2402-v1:0
|
||||
label:
|
||||
en_US: Mistral Small
|
||||
model_type: llm
|
||||
features:
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
default: 0.7
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
required: false
|
||||
default: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 4096
|
||||
pricing:
|
||||
input: '0.001'
|
||||
output: '0.03'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,39 +0,0 @@
|
||||
model: mistral.mixtral-8x7b-instruct-v0:1
|
||||
label:
|
||||
en_US: Mixtral 8X7B Instruct
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
default: 0.5
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
required: false
|
||||
default: 0.9
|
||||
- name: top_k
|
||||
use_template: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
default: 50
|
||||
max: 200
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.00045'
|
||||
output: '0.0007'
|
||||
unit: '0.00001'
|
||||
currency: USD
|
||||
@@ -1,53 +0,0 @@
|
||||
model: us.amazon.nova-lite-v1:0
|
||||
label:
|
||||
en_US: Nova Lite V1 (US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 300000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.00006'
|
||||
output: '0.00024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,52 +0,0 @@
|
||||
model: us.amazon.nova-micro-v1:0
|
||||
label:
|
||||
en_US: Nova Micro V1 (US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.000035'
|
||||
output: '0.00014'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,53 +0,0 @@
|
||||
model: us.amazon.nova-pro-v1:0
|
||||
label:
|
||||
en_US: Nova Pro V1 (US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 300000
|
||||
parameter_rules:
|
||||
- name: max_new_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 2048
|
||||
min: 1
|
||||
max: 5000
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.0008'
|
||||
output: '0.0032'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,60 +0,0 @@
|
||||
model: us.anthropic.claude-3-5-haiku-20241022-v1:0
|
||||
label:
|
||||
en_US: Claude 3.5 Haiku(US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
# docs: https://docs.anthropic.com/claude/docs/system-prompts
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.001'
|
||||
output: '0.005'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,59 +0,0 @@
|
||||
model: us.anthropic.claude-3-haiku-20240307-v1:0
|
||||
label:
|
||||
en_US: Claude 3 Haiku(US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
# docs: https://docs.anthropic.com/claude/docs/system-prompts
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.00025'
|
||||
output: '0.00125'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,59 +0,0 @@
|
||||
model: us.anthropic.claude-3-opus-20240229-v1:0
|
||||
label:
|
||||
en_US: Claude 3 Opus(US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
# docs: https://docs.anthropic.com/claude/docs/system-prompts
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.015'
|
||||
output: '0.075'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,58 +0,0 @@
|
||||
model: us.anthropic.claude-3-5-sonnet-20240620-v1:0
|
||||
label:
|
||||
en_US: Claude 3.5 Sonnet(US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,58 +0,0 @@
|
||||
model: us.anthropic.claude-3-sonnet-20240229-v1:0
|
||||
label:
|
||||
en_US: Claude 3 Sonnet(US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,60 +0,0 @@
|
||||
model: us.anthropic.claude-3-5-sonnet-20241022-v2:0
|
||||
label:
|
||||
en_US: Claude 3.5 Sonnet V2(US.Cross Region Inference)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 200000
|
||||
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
||||
parameter_rules:
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
type: int
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
help:
|
||||
zh_Hans: 停止前生成的最大令牌数。请注意,Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
|
||||
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
required: false
|
||||
type: float
|
||||
default: 1
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
help:
|
||||
zh_Hans: 生成内容的随机性。
|
||||
en_US: The amount of randomness injected into the response.
|
||||
- name: top_p
|
||||
required: false
|
||||
type: float
|
||||
default: 0.999
|
||||
min: 0.000
|
||||
max: 1.000
|
||||
help:
|
||||
zh_Hans: 在核采样中,Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p,但不能同时更改两者。
|
||||
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
|
||||
- name: top_k
|
||||
required: false
|
||||
type: int
|
||||
default: 0
|
||||
min: 0
|
||||
# tip docs from aws has error, max value is 500
|
||||
max: 500
|
||||
help:
|
||||
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
|
||||
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.003'
|
||||
output: '0.015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,29 +0,0 @@
|
||||
model: us.meta.llama3-2-11b-instruct-v1:0
|
||||
label:
|
||||
en_US: US Meta Llama 3.2 11B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- vision
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
min: 0.0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.00035'
|
||||
output: '0.00035'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,26 +0,0 @@
|
||||
model: us.meta.llama3-2-1b-instruct-v1:0
|
||||
label:
|
||||
en_US: US Meta Llama 3.2 1B Instruct
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
min: 0.0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.0001'
|
||||
output: '0.0001'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,26 +0,0 @@
|
||||
model: us.meta.llama3-2-3b-instruct-v1:0
|
||||
label:
|
||||
en_US: US Meta Llama 3.2 3B Instruct
|
||||
model_type: llm
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
min: 0.0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.00015'
|
||||
output: '0.00015'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,31 +0,0 @@
|
||||
model: us.meta.llama3-2-90b-instruct-v1:0
|
||||
label:
|
||||
en_US: US Meta Llama 3.2 90B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: completion
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.5
|
||||
min: 0.0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 0.9
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_gen_len
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 512
|
||||
min: 1
|
||||
max: 2048
|
||||
pricing:
|
||||
input: '0.002'
|
||||
output: '0.002'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -57,7 +57,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -57,7 +57,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -57,7 +57,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -57,7 +57,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -57,7 +57,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -57,7 +57,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -57,7 +57,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -53,7 +53,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert("", conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert("", conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -58,7 +58,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -15,12 +15,15 @@ import (
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/hunyuan"
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/lm-studio"
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/minimax"
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/mistralai"
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/moonshot"
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/novita"
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/nvidia"
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/openAI"
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/openrouter"
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/perfxcloud"
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/siliconflow"
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/spark"
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/stepfun"
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/tongyi"
|
||||
_ "github.com/eolinker/apinto/drivers/ai-provider/upstage"
|
||||
|
||||
@@ -53,7 +53,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert("", conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert("", conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -40,10 +40,10 @@ var (
|
||||
Value: "top_k",
|
||||
Type: "int",
|
||||
},
|
||||
{
|
||||
Value: "response_format",
|
||||
Type: "string",
|
||||
},
|
||||
//{
|
||||
// Value: "response_format",
|
||||
// Type: "string",
|
||||
//},
|
||||
{
|
||||
Value: "stream",
|
||||
Type: "bool",
|
||||
|
||||
@@ -59,7 +59,20 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, checkError, errorCallback)
|
||||
}
|
||||
|
||||
func checkError(ctx http_service.IHttpContext, body []byte) bool {
|
||||
if ctx.Response().StatusCode() != 200 {
|
||||
return false
|
||||
}
|
||||
var data Response
|
||||
err := json.Unmarshal(body, &data)
|
||||
if err != nil {
|
||||
log.Errorf("Failed to unmarshal response body: %v", err)
|
||||
return false
|
||||
}
|
||||
return data.BaseResp.StatusCode == 0
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
package mistralai
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/eolinker/eosc"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
APIKey string `json:"api_key"`
|
||||
}
|
||||
|
||||
func checkConfig(v interface{}) (*Config, error) {
|
||||
conf, ok := v.(*Config)
|
||||
if !ok {
|
||||
return nil, eosc.ErrorConfigType
|
||||
}
|
||||
if conf.APIKey == "" {
|
||||
return nil, fmt.Errorf("api_key is required")
|
||||
}
|
||||
return conf, nil
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
package mistralai
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/eolinker/eosc/eocontext"
|
||||
http_context "github.com/eolinker/eosc/eocontext/http-context"
|
||||
"github.com/eolinker/eosc/log"
|
||||
)
|
||||
|
||||
var _ ai_convert.IConverterFactory = &convertFactory{}
|
||||
|
||||
type convertFactory struct {
|
||||
}
|
||||
|
||||
func (c *convertFactory) Create(cfg string) (ai_convert.IConverterDriver, error) {
|
||||
var tmp Config
|
||||
err := json.Unmarshal([]byte(cfg), &tmp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return newConverterDriver(&tmp)
|
||||
}
|
||||
|
||||
var _ ai_convert.IConverterDriver = &converterDriver{}
|
||||
|
||||
type converterDriver struct {
|
||||
apikey string
|
||||
}
|
||||
|
||||
func newConverterDriver(cfg *Config) (ai_convert.IConverterDriver, error) {
|
||||
return &converterDriver{
|
||||
apikey: cfg.APIKey,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (e *converterDriver) GetConverter(model string) (ai_convert.IConverter, bool) {
|
||||
converter, ok := modelConvert[model]
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
return &Converter{converter: converter, apikey: e.apikey}, true
|
||||
}
|
||||
|
||||
func (e *converterDriver) GetModel(model string) (ai_convert.FGenerateConfig, bool) {
|
||||
if _, ok := modelConvert[model]; !ok {
|
||||
return nil, false
|
||||
}
|
||||
return func(cfg string) (map[string]interface{}, error) {
|
||||
|
||||
result := map[string]interface{}{
|
||||
"model": model,
|
||||
}
|
||||
if cfg != "" {
|
||||
tmp := make(map[string]interface{})
|
||||
if err := json.Unmarshal([]byte(cfg), &tmp); err != nil {
|
||||
log.Errorf("unmarshal config error: %v, cfg: %s", err, cfg)
|
||||
return result, nil
|
||||
}
|
||||
modelCfg := ai_convert.MapToStruct[ModelConfig](tmp)
|
||||
result["frequency_penalty"] = modelCfg.FrequencyPenalty
|
||||
if modelCfg.MaxTokens >= 1 {
|
||||
result["max_tokens"] = modelCfg.MaxTokens
|
||||
}
|
||||
|
||||
result["presence_penalty"] = modelCfg.PresencePenalty
|
||||
result["temperature"] = modelCfg.Temperature
|
||||
result["top_p"] = modelCfg.TopP
|
||||
if modelCfg.ResponseFormat == "" {
|
||||
modelCfg.ResponseFormat = "text"
|
||||
}
|
||||
result["response_format"] = map[string]interface{}{
|
||||
"type": modelCfg.ResponseFormat,
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}, true
|
||||
}
|
||||
|
||||
type Converter struct {
|
||||
apikey string
|
||||
converter ai_convert.IConverter
|
||||
}
|
||||
|
||||
func (c *Converter) RequestConvert(ctx eocontext.EoContext, extender map[string]interface{}) error {
|
||||
|
||||
httpContext, err := http_context.Assert(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
httpContext.Proxy().Header().SetHeader("Authorization", "Bearer "+c.apikey)
|
||||
|
||||
return c.converter.RequestConvert(httpContext, extender)
|
||||
}
|
||||
|
||||
func (c *Converter) ResponseConvert(ctx eocontext.EoContext) error {
|
||||
return c.converter.ResponseConvert(ctx)
|
||||
}
|
||||
@@ -1,79 +0,0 @@
|
||||
package mistralai
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"fmt"
|
||||
|
||||
"github.com/eolinker/apinto/drivers"
|
||||
"github.com/eolinker/eosc"
|
||||
)
|
||||
|
||||
var (
|
||||
//go:embed mistralai.yaml
|
||||
providerContent []byte
|
||||
//go:embed *
|
||||
providerDir embed.FS
|
||||
modelConvert = make(map[string]ai_convert.IConverter)
|
||||
|
||||
_ ai_convert.IConverterDriver = (*executor)(nil)
|
||||
)
|
||||
|
||||
func init() {
|
||||
models, err := ai_convert.LoadModels(providerContent, providerDir)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
for key, value := range models {
|
||||
if value.ModelProperties != nil {
|
||||
if v, ok := modelModes[value.ModelProperties.Mode]; ok {
|
||||
modelConvert[key] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type executor struct {
|
||||
drivers.WorkerBase
|
||||
ai_convert.IConverterDriver
|
||||
}
|
||||
|
||||
func (e *executor) Start() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *executor) Reset(conf interface{}, workers map[eosc.RequireId]eosc.IWorker) error {
|
||||
cfg, ok := conf.(*Config)
|
||||
if !ok {
|
||||
return fmt.Errorf("invalid config")
|
||||
}
|
||||
|
||||
return e.reset(cfg, workers)
|
||||
}
|
||||
|
||||
func (e *executor) reset(conf *Config, workers map[eosc.RequireId]eosc.IWorker) error {
|
||||
d, err := newConverterDriver(conf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
e.IConverterDriver = d
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *executor) Stop() error {
|
||||
e.IConverterDriver = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *executor) CheckSkill(skill string) bool {
|
||||
return ai_convert.CheckKeySourceSkill(skill)
|
||||
}
|
||||
|
||||
type ModelConfig struct {
|
||||
FrequencyPenalty float64 `json:"frequency_penalty"`
|
||||
MaxTokens int `json:"max_tokens"`
|
||||
PresencePenalty float64 `json:"presence_penalty"`
|
||||
ResponseFormat string `json:"response_format"`
|
||||
Temperature float64 `json:"temperature"`
|
||||
TopP float64 `json:"top_p"`
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
package mistralai
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/eolinker/eosc/common/bean"
|
||||
|
||||
"github.com/eolinker/apinto/drivers"
|
||||
"github.com/eolinker/eosc"
|
||||
)
|
||||
|
||||
var name = "mistralai"
|
||||
|
||||
var (
|
||||
converterManager ai_convert.IManager
|
||||
once sync.Once
|
||||
)
|
||||
|
||||
// Register 注册驱动
|
||||
func Register(register eosc.IExtenderDriverRegister) {
|
||||
register.RegisterExtenderDriver(name, NewFactory())
|
||||
}
|
||||
|
||||
// NewFactory 创建service_http驱动工厂
|
||||
func NewFactory() eosc.IExtenderDriverFactory {
|
||||
once.Do(func() {
|
||||
bean.Autowired(&converterManager)
|
||||
converterManager.Set(name, &convertFactory{})
|
||||
})
|
||||
return drivers.NewFactory[Config](Create)
|
||||
}
|
||||
|
||||
// Create 创建驱动实例
|
||||
func Create(id, name string, v *Config, workers map[eosc.RequireId]eosc.IWorker) (eosc.IWorker, error) {
|
||||
_, err := checkConfig(v)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
w := &executor{
|
||||
WorkerBase: drivers.Worker(id, name),
|
||||
}
|
||||
w.reset(v, workers)
|
||||
return w, nil
|
||||
}
|
||||
@@ -1,51 +0,0 @@
|
||||
model: codestral-latest
|
||||
label:
|
||||
zh_Hans: codestral-latest
|
||||
en_US: codestral-latest
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 4096
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,51 +0,0 @@
|
||||
model: mistral-embed
|
||||
label:
|
||||
zh_Hans: mistral-embed
|
||||
en_US: mistral-embed
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8192
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 1024
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,51 +0,0 @@
|
||||
model: mistral-large-latest
|
||||
label:
|
||||
zh_Hans: mistral-large-latest
|
||||
en_US: mistral-large-latest
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 8000
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,51 +0,0 @@
|
||||
model: mistral-medium-latest
|
||||
label:
|
||||
zh_Hans: mistral-medium-latest
|
||||
en_US: mistral-medium-latest
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 8000
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.0027'
|
||||
output: '0.0081'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,51 +0,0 @@
|
||||
model: mistral-small-latest
|
||||
label:
|
||||
zh_Hans: mistral-small-latest
|
||||
en_US: mistral-small-latest
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 8000
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.002'
|
||||
output: '0.006'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,51 +0,0 @@
|
||||
model: open-codestral-mamba
|
||||
label:
|
||||
zh_Hans: open-codestral-mamba
|
||||
en_US: open-codestral-mamba
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 256000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 16384
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,51 +0,0 @@
|
||||
model: open-mistral-7b
|
||||
label:
|
||||
zh_Hans: open-mistral-7b
|
||||
en_US: open-mistral-7b
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 8000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 2048
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.00025'
|
||||
output: '0.00025'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,51 +0,0 @@
|
||||
model: open-mistral-nemo
|
||||
label:
|
||||
zh_Hans: open-mistral-nemo
|
||||
en_US: open-mistral-nemo
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,51 +0,0 @@
|
||||
model: open-mixtral-8x22b
|
||||
label:
|
||||
zh_Hans: open-mixtral-8x22b
|
||||
en_US: open-mixtral-8x22b
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 64000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 8000
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.002'
|
||||
output: '0.006'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,51 +0,0 @@
|
||||
model: open-mixtral-8x7b
|
||||
label:
|
||||
zh_Hans: open-mixtral-8x7b
|
||||
en_US: open-mixtral-8x7b
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 32000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 8000
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.0007'
|
||||
output: '0.0007'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,51 +0,0 @@
|
||||
model: pixtral-12b-2409
|
||||
label:
|
||||
zh_Hans: pixtral-12b-2409
|
||||
en_US: pixtral-12b-2409
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 1024
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,52 +0,0 @@
|
||||
model: pixtral-large-2411
|
||||
label:
|
||||
zh_Hans: pixtral-large-2411
|
||||
en_US: pixtral-large-2411
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,52 +0,0 @@
|
||||
model: pixtral-large-latest
|
||||
label:
|
||||
zh_Hans: pixtral-large-latest
|
||||
en_US: pixtral-large-latest
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 128000
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
default: 0.7
|
||||
min: 0
|
||||
max: 1
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
default: 1
|
||||
min: 0
|
||||
max: 1
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: safe_prompt
|
||||
default: false
|
||||
type: boolean
|
||||
help:
|
||||
en_US: Whether to inject a safety prompt before all conversations.
|
||||
zh_Hans: 是否开启提示词审查
|
||||
label:
|
||||
en_US: SafePrompt
|
||||
zh_Hans: 提示词审查
|
||||
- name: random_seed
|
||||
type: int
|
||||
help:
|
||||
en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
||||
zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定
|
||||
label:
|
||||
en_US: RandomSeed
|
||||
zh_Hans: 随机数种子
|
||||
default: 0
|
||||
min: 0
|
||||
max: 2147483647
|
||||
pricing:
|
||||
input: '0.008'
|
||||
output: '0.024'
|
||||
unit: '0.001'
|
||||
currency: USD
|
||||
@@ -1,38 +0,0 @@
|
||||
package mistralai
|
||||
|
||||
type ClientRequest struct {
|
||||
Messages []*Message `json:"messages"`
|
||||
}
|
||||
|
||||
type Message struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
type Response struct {
|
||||
Id string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
Created int `json:"created"`
|
||||
Model string `json:"model"`
|
||||
SystemFingerprint string `json:"system_fingerprint"`
|
||||
Choices []ResponseChoice `json:"choices"`
|
||||
Usage Usage `json:"usage"`
|
||||
}
|
||||
|
||||
type ResponseChoice struct {
|
||||
Index int `json:"index"`
|
||||
Message Message `json:"message"`
|
||||
Logprobs interface{} `json:"logprobs"`
|
||||
FinishReason string `json:"finish_reason"`
|
||||
}
|
||||
|
||||
type Usage struct {
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
CompletionTokensDetails CompletionTokensDetails `json:"completion_tokens_details"`
|
||||
}
|
||||
|
||||
type CompletionTokensDetails struct {
|
||||
ReasoningTokens int `json:"reasoning_tokens"`
|
||||
}
|
||||
89
drivers/ai-provider/mistralai/mistralai.go
Normal file
89
drivers/ai-provider/mistralai/mistralai.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package mistralai
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/url"
|
||||
|
||||
ai_convert "github.com/eolinker/apinto/ai-convert"
|
||||
http_service "github.com/eolinker/eosc/eocontext/http-context"
|
||||
)
|
||||
|
||||
func init() {
|
||||
ai_convert.RegisterConverterCreateFunc("mistralai", Create)
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
APIKey string `json:"api_key"`
|
||||
BaseUrl string `json:"base_url"`
|
||||
}
|
||||
|
||||
// checkConfig validates the provided configuration.
|
||||
// It ensures the required fields are set and checks the validity of the Base URL if provided.
|
||||
//
|
||||
// Parameters:
|
||||
// - v: An interface{} expected to be a pointer to a Config struct.
|
||||
//
|
||||
// Returns:
|
||||
// - *Config: The validated configuration cast to *Config.
|
||||
// - error: An error if the validation fails, or nil if it succeeds.
|
||||
func checkConfig(conf *Config) error {
|
||||
// Check if the APIKey is provided. It is a required field.
|
||||
if conf.APIKey == "" {
|
||||
return fmt.Errorf("api_key is required")
|
||||
}
|
||||
if conf.BaseUrl != "" {
|
||||
u, err := url.Parse(conf.BaseUrl)
|
||||
if err != nil {
|
||||
// Return an error if the Base URL cannot be parsed.
|
||||
return fmt.Errorf("base url is invalid")
|
||||
}
|
||||
// Ensure the parsed URL contains both a scheme and a host.
|
||||
if u.Scheme == "" || u.Host == "" {
|
||||
return fmt.Errorf("base url is invalid")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
var conf Config
|
||||
err := json.Unmarshal([]byte(cfg), &conf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = checkConfig(&conf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
// HTTP Status Codes for Moonshot API
|
||||
// Status Code | Type | Error Message
|
||||
// ------------|---------------------|-------------------------------------
|
||||
// 200 | Success | Request was successful.
|
||||
// 400 | Client Error | Invalid request parameters (invalid_request_error).
|
||||
// 401 | Authentication Error | Invalid API key (invalid_key).
|
||||
// 403 | Forbidden | Access denied (forbidden_error).
|
||||
// 404 | Not Found | Resource not found (not_found_error).
|
||||
// 429 | Rate Limit Exceeded | Too many requests (rate_limit_error).
|
||||
// 500 | Server Error | Internal server error (server_error).
|
||||
// 503 | Service Unavailable | Service is temporarily unavailable (service_unavailable).
|
||||
switch ctx.Response().StatusCode() {
|
||||
case 422:
|
||||
// Handle the bad request error.
|
||||
ai_convert.SetAIStatusInvalidRequest(ctx)
|
||||
case 401:
|
||||
// Handle the invalid API key error.
|
||||
ai_convert.SetAIStatusInvalid(ctx)
|
||||
case 403:
|
||||
ai_convert.SetAIStatusQuotaExhausted(ctx)
|
||||
case 429:
|
||||
ai_convert.SetAIStatusExceeded(ctx)
|
||||
default:
|
||||
ai_convert.SetAIStatusInvalidRequest(ctx)
|
||||
}
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
provider: mistralai
|
||||
label:
|
||||
en_US: MistralAI
|
||||
description:
|
||||
en_US: Models provided by MistralAI, such as open-mistral-7b and mistral-large-latest.
|
||||
zh_Hans: MistralAI 提供的模型,例如 open-mistral-7b 和 mistral-large-latest。
|
||||
icon_small:
|
||||
en_US: icon_s_en.svg
|
||||
icon_large:
|
||||
en_US: icon_l_en.svg
|
||||
background: "#FFFFFF"
|
||||
help:
|
||||
title:
|
||||
en_US: Get your API Key from MistralAI
|
||||
zh_Hans: 从 MistralAI 获取 API Key
|
||||
url:
|
||||
en_US: https://console.mistral.ai/api-keys/
|
||||
supported_model_types:
|
||||
- llm
|
||||
@@ -1,231 +0,0 @@
|
||||
package mistralai
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/eolinker/eosc/eocontext"
|
||||
|
||||
http_context "github.com/eolinker/apinto/node/http-context"
|
||||
"github.com/joho/godotenv"
|
||||
"github.com/valyala/fasthttp"
|
||||
)
|
||||
|
||||
var (
|
||||
defaultConfig = `{
|
||||
"frequency_penalty": "",
|
||||
"max_tokens": 512,
|
||||
"presence_penalty": "",
|
||||
"response_format": "",
|
||||
"temperature": "",
|
||||
"top_p": "1"
|
||||
}`
|
||||
successBody = []byte(`{
|
||||
"messages": [
|
||||
{
|
||||
"content": "Hello, How are you?",
|
||||
"role": "user"
|
||||
}
|
||||
]
|
||||
}`)
|
||||
failBody = []byte(`{
|
||||
"messages": [
|
||||
{
|
||||
"content": "Hello, how can I help you?",
|
||||
"role": "yyy"
|
||||
}
|
||||
],"variables":{}
|
||||
}`)
|
||||
)
|
||||
|
||||
func validNormalFunc(ctx eocontext.EoContext) bool {
|
||||
fmt.Printf("input token: %d\n", ai_convert.GetAIModelInputToken(ctx))
|
||||
fmt.Printf("output token: %d\n", ai_convert.GetAIModelOutputToken(ctx))
|
||||
fmt.Printf("total token: %d\n", ai_convert.GetAIModelTotalToken(ctx))
|
||||
if ai_convert.GetAIModelInputToken(ctx) <= 0 {
|
||||
return false
|
||||
}
|
||||
if ai_convert.GetAIModelOutputToken(ctx) <= 0 {
|
||||
return false
|
||||
}
|
||||
return ai_convert.GetAIModelTotalToken(ctx) > 0
|
||||
}
|
||||
|
||||
// TestSentTo tests the end-to-end execution of the mistralai integration.
|
||||
func TestSentTo(t *testing.T) {
|
||||
// Load .env file
|
||||
err := godotenv.Load(".env")
|
||||
if err != nil {
|
||||
t.Fatalf("Error loading .env file: %v", err)
|
||||
}
|
||||
|
||||
// Test data for different scenarios
|
||||
testData := []struct {
|
||||
name string
|
||||
apiKey string
|
||||
wantStatus string
|
||||
body []byte
|
||||
validFunc func(ctx eocontext.EoContext) bool
|
||||
}{
|
||||
{
|
||||
name: "success",
|
||||
apiKey: os.Getenv("ValidKey"),
|
||||
wantStatus: ai_convert.StatusNormal,
|
||||
body: successBody,
|
||||
validFunc: validNormalFunc,
|
||||
},
|
||||
{
|
||||
name: "invalid request",
|
||||
apiKey: os.Getenv("ValidKey"),
|
||||
wantStatus: ai_convert.StatusInvalidRequest,
|
||||
body: failBody,
|
||||
},
|
||||
{
|
||||
name: "invalid key",
|
||||
apiKey: os.Getenv("InvalidKey"),
|
||||
wantStatus: ai_convert.StatusInvalid,
|
||||
},
|
||||
{
|
||||
name: "expired key",
|
||||
apiKey: os.Getenv("ExpiredKey"),
|
||||
wantStatus: ai_convert.StatusInvalid,
|
||||
},
|
||||
}
|
||||
|
||||
// Run tests for each scenario
|
||||
for _, data := range testData {
|
||||
t.Run(data.name, func(t *testing.T) {
|
||||
if err := runTest(data.apiKey, data.body, data.wantStatus, data.validFunc); err != nil {
|
||||
t.Fatalf("Test failed: %v", err)
|
||||
}
|
||||
})
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
// runTest handles a single test case
|
||||
func runTest(apiKey string, requestBody []byte, wantStatus string, validFunc func(ctx eocontext.EoContext) bool) error {
|
||||
cfg := &Config{
|
||||
APIKey: apiKey,
|
||||
Organization: "",
|
||||
}
|
||||
|
||||
// Create the worker
|
||||
worker, err := Create("mistralai", "mistralai", cfg, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create worker: %w", err)
|
||||
}
|
||||
|
||||
// Get the handler
|
||||
handler, ok := worker.(ai_convert.IConverterDriver)
|
||||
if !ok {
|
||||
return fmt.Errorf("worker does not implement IConverterDriver")
|
||||
}
|
||||
|
||||
// Default to success body if no body is provided
|
||||
if len(requestBody) == 0 {
|
||||
requestBody = successBody
|
||||
}
|
||||
|
||||
// Mock HTTP context
|
||||
ctx := createMockHttpContext("/xxx/xxx", nil, nil, requestBody)
|
||||
|
||||
// Execute the conversion process
|
||||
err = executeConverter(ctx, handler, "mistral-small-latest", "https://api.mistral.ai")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to execute conversion process: %w", err)
|
||||
}
|
||||
|
||||
// Check the status
|
||||
if ai_convert.GetAIStatus(ctx) != wantStatus {
|
||||
return fmt.Errorf("unexpected status: got %s, expected %s", ai_convert.GetAIStatus(ctx), wantStatus)
|
||||
}
|
||||
if validFunc != nil {
|
||||
if validFunc(ctx) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("execute validFunc failed")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// executeConverter handles the full flow of a conversion process.
|
||||
func executeConverter(ctx *http_context.HttpContext, handler ai_convert.IConverterDriver, model string, baseUrl string) error {
|
||||
// Balance handler setup
|
||||
balanceHandler, err := ai_convert.NewBalanceHandler("test", baseUrl, 30*time.Second)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create balance handler: %w", err)
|
||||
}
|
||||
|
||||
// Get model function
|
||||
fn, has := handler.GetModel(model)
|
||||
if !has {
|
||||
return fmt.Errorf("model %s not found", model)
|
||||
}
|
||||
|
||||
// Generate config
|
||||
extender, err := fn(defaultConfig)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate config: %w", err)
|
||||
}
|
||||
|
||||
// Get converter
|
||||
converter, has := handler.GetConverter(model)
|
||||
if !has {
|
||||
return fmt.Errorf("converter for model %s not found", model)
|
||||
}
|
||||
|
||||
// Convert request
|
||||
if err := converter.RequestConvert(ctx, extender); err != nil {
|
||||
return fmt.Errorf("request conversion failed: %w", err)
|
||||
}
|
||||
|
||||
// Select node via balance handler
|
||||
node, _, err := balanceHandler.Select(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("node selection failed: %w", err)
|
||||
}
|
||||
|
||||
// Send request to the node
|
||||
if err := ctx.SendTo(balanceHandler.Scheme(), node, balanceHandler.TimeOut()); err != nil {
|
||||
return fmt.Errorf("failed to send request to node: %w", err)
|
||||
}
|
||||
|
||||
// Convert response
|
||||
if err := converter.ResponseConvert(ctx); err != nil {
|
||||
return fmt.Errorf("response conversion failed: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// createMockHttpContext creates a mock fasthttp.RequestCtx and wraps it with HttpContext.
|
||||
func createMockHttpContext(rawURL string, headers map[string]string, query url.Values, body []byte) *http_context.HttpContext {
|
||||
req := fasthttp.AcquireRequest()
|
||||
u := fasthttp.AcquireURI()
|
||||
|
||||
// SetProvider request URI and path
|
||||
uri, _ := url.Parse(rawURL)
|
||||
u.SetPath(uri.Path)
|
||||
u.SetScheme(uri.Scheme)
|
||||
u.SetHost(uri.Host)
|
||||
u.SetQueryString(uri.RawQuery)
|
||||
req.SetURI(u)
|
||||
req.Header.SetMethod("POST")
|
||||
|
||||
// SetProvider headers
|
||||
for k, v := range headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
req.SetBody(body)
|
||||
|
||||
// Create HttpContext
|
||||
return http_context.NewContext(&fasthttp.RequestCtx{
|
||||
Request: *req,
|
||||
Response: fasthttp.Response{},
|
||||
}, 8099)
|
||||
}
|
||||
@@ -1,135 +0,0 @@
|
||||
package mistralai
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/eolinker/eosc"
|
||||
"github.com/eolinker/eosc/eocontext"
|
||||
http_context "github.com/eolinker/eosc/eocontext/http-context"
|
||||
)
|
||||
|
||||
var (
|
||||
modelModes = map[string]IModelMode{
|
||||
ai_convert.ModeChat.String(): NewChat(),
|
||||
}
|
||||
)
|
||||
|
||||
type IModelMode interface {
|
||||
Endpoint() string
|
||||
ai_convert.IConverter
|
||||
}
|
||||
|
||||
type Chat struct {
|
||||
endPoint string
|
||||
}
|
||||
|
||||
func NewChat() *Chat {
|
||||
return &Chat{
|
||||
endPoint: "/v1/chat/completions",
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Chat) Endpoint() string {
|
||||
return c.endPoint
|
||||
}
|
||||
|
||||
func (c *Chat) RequestConvert(ctx eocontext.EoContext, extender map[string]interface{}) error {
|
||||
httpContext, err := http_context.Assert(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
body, err := httpContext.Proxy().Body().RawBody()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// 设置转发地址
|
||||
httpContext.Proxy().URI().SetPath(c.endPoint)
|
||||
baseCfg := eosc.NewBase[ai_convert.ClientRequest]()
|
||||
err = json.Unmarshal(body, baseCfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
messages := make([]Message, 0, len(baseCfg.Config.Messages)+1)
|
||||
for _, m := range baseCfg.Config.Messages {
|
||||
messages = append(messages, Message{
|
||||
Role: m.Role,
|
||||
Content: m.Content,
|
||||
})
|
||||
}
|
||||
baseCfg.SetAppend("messages", messages)
|
||||
for k, v := range extender {
|
||||
baseCfg.SetAppend(k, v)
|
||||
}
|
||||
body, err = json.Marshal(baseCfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
httpContext.Proxy().Body().SetRaw("application/json", body)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Chat) ResponseConvert(ctx eocontext.EoContext) error {
|
||||
httpContext, err := http_context.Assert(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
body := httpContext.Response().GetBody()
|
||||
data := eosc.NewBase[Response]()
|
||||
err = json.Unmarshal(body, data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// HTTP Status Codes for Moonshot API
|
||||
// Status Code | Type | Error Message
|
||||
// ------------|---------------------|-------------------------------------
|
||||
// 200 | Success | Request was successful.
|
||||
// 400 | Client Error | Invalid request parameters (invalid_request_error).
|
||||
// 401 | Authentication Error | Invalid API key (invalid_key).
|
||||
// 403 | Forbidden | Access denied (forbidden_error).
|
||||
// 404 | Not Found | Resource not found (not_found_error).
|
||||
// 429 | Rate Limit Exceeded | Too many requests (rate_limit_error).
|
||||
// 500 | Server Error | Internal server error (server_error).
|
||||
// 503 | Service Unavailable | Service is temporarily unavailable (service_unavailable).
|
||||
switch httpContext.Response().StatusCode() {
|
||||
case 200:
|
||||
// Calculate the token consumption for a successful request.
|
||||
usage := data.Config.Usage
|
||||
ai_convert.SetAIStatusNormal(ctx)
|
||||
ai_convert.SetAIModelInputToken(ctx, usage.PromptTokens)
|
||||
ai_convert.SetAIModelOutputToken(ctx, usage.CompletionTokens)
|
||||
ai_convert.SetAIModelTotalToken(ctx, usage.TotalTokens)
|
||||
case 422:
|
||||
// Handle the bad request error.
|
||||
ai_convert.SetAIStatusInvalidRequest(ctx)
|
||||
case 401:
|
||||
// Handle the invalid API key error.
|
||||
ai_convert.SetAIStatusInvalid(ctx)
|
||||
case 403:
|
||||
ai_convert.SetAIStatusQuotaExhausted(ctx)
|
||||
case 429:
|
||||
ai_convert.SetAIStatusExceeded(ctx)
|
||||
default:
|
||||
ai_convert.SetAIStatusInvalidRequest(ctx)
|
||||
}
|
||||
|
||||
responseBody := &ai_convert.ClientResponse{}
|
||||
if len(data.Config.Choices) > 0 {
|
||||
msg := data.Config.Choices[0]
|
||||
responseBody.Message = &ai_convert.Message{
|
||||
Role: msg.Message.Role,
|
||||
Content: msg.Message.Content,
|
||||
}
|
||||
responseBody.FinishReason = msg.FinishReason
|
||||
} else {
|
||||
responseBody.Code = -1
|
||||
responseBody.Error = "no response"
|
||||
}
|
||||
body, err = json.Marshal(responseBody)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
httpContext.Response().SetBody(body)
|
||||
return nil
|
||||
}
|
||||
@@ -59,7 +59,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -57,7 +57,7 @@ func Create(cfg string) (ai_convert.IConverter, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, errorCallback)
|
||||
return ai_convert.NewOpenAIConvert(conf.APIKey, conf.BaseUrl, 0, nil, errorCallback)
|
||||
}
|
||||
|
||||
func errorCallback(ctx http_service.IHttpContext, body []byte) {
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/eolinker/eosc"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
APIKey string `json:"api_key"`
|
||||
}
|
||||
|
||||
func checkConfig(v interface{}) (*Config, error) {
|
||||
conf, ok := v.(*Config)
|
||||
if !ok {
|
||||
return nil, eosc.ErrorConfigType
|
||||
}
|
||||
if conf.APIKey == "" {
|
||||
return nil, fmt.Errorf("api_key is required")
|
||||
}
|
||||
|
||||
return conf, nil
|
||||
}
|
||||
@@ -1,98 +0,0 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/eolinker/eosc/eocontext"
|
||||
http_context "github.com/eolinker/eosc/eocontext/http-context"
|
||||
"github.com/eolinker/eosc/log"
|
||||
)
|
||||
|
||||
var _ ai_convert.IConverterFactory = &convertFactory{}
|
||||
|
||||
type convertFactory struct {
|
||||
}
|
||||
|
||||
func (c *convertFactory) Create(cfg string) (ai_convert.IConverterDriver, error) {
|
||||
var tmp Config
|
||||
err := json.Unmarshal([]byte(cfg), &tmp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return newConverterDriver(&tmp)
|
||||
}
|
||||
|
||||
var _ ai_convert.IConverterDriver = &converterDriver{}
|
||||
|
||||
type converterDriver struct {
|
||||
eocontext.BalanceHandler
|
||||
apikey string
|
||||
}
|
||||
|
||||
func newConverterDriver(cfg *Config) (ai_convert.IConverterDriver, error) {
|
||||
return &converterDriver{
|
||||
apikey: cfg.APIKey,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (e *converterDriver) GetConverter(model string) (ai_convert.IConverter, bool) {
|
||||
converter, ok := modelConvert[model]
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
return &Converter{converter: converter, apikey: e.apikey}, true
|
||||
}
|
||||
|
||||
func (e *converterDriver) GetModel(model string) (ai_convert.FGenerateConfig, bool) {
|
||||
if _, ok := modelConvert[model]; !ok {
|
||||
return nil, false
|
||||
}
|
||||
return func(cfg string) (map[string]interface{}, error) {
|
||||
|
||||
result := map[string]interface{}{
|
||||
"model": model,
|
||||
}
|
||||
if cfg != "" {
|
||||
tmp := make(map[string]interface{})
|
||||
if err := json.Unmarshal([]byte(cfg), &tmp); err != nil {
|
||||
log.Errorf("unmarshal config error: %v, cfg: %s", err, cfg)
|
||||
return result, nil
|
||||
}
|
||||
modelCfg := ai_convert.MapToStruct[ModelConfig](tmp)
|
||||
result["frequency_penalty"] = modelCfg.FrequencyPenalty
|
||||
if modelCfg.MaxTokens >= 1 {
|
||||
result["max_tokens"] = modelCfg.MaxTokens
|
||||
}
|
||||
|
||||
result["presence_penalty"] = modelCfg.PresencePenalty
|
||||
result["temperature"] = modelCfg.Temperature
|
||||
result["top_p"] = modelCfg.TopP
|
||||
if modelCfg.ResponseFormat != "" {
|
||||
result["response_format"] = map[string]interface{}{
|
||||
"type": modelCfg.ResponseFormat,
|
||||
}
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}, true
|
||||
}
|
||||
|
||||
type Converter struct {
|
||||
apikey string
|
||||
converter ai_convert.IConverter
|
||||
}
|
||||
|
||||
func (c *Converter) RequestConvert(ctx eocontext.EoContext, extender map[string]interface{}) error {
|
||||
httpContext, err := http_context.Assert(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
httpContext.Proxy().Header().SetHeader("Authorization", "Bearer "+c.apikey)
|
||||
|
||||
return c.converter.RequestConvert(httpContext, extender)
|
||||
}
|
||||
|
||||
func (c *Converter) ResponseConvert(ctx eocontext.EoContext) error {
|
||||
return c.converter.ResponseConvert(ctx)
|
||||
}
|
||||
@@ -1,80 +0,0 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"fmt"
|
||||
|
||||
"github.com/eolinker/apinto/drivers"
|
||||
|
||||
"github.com/eolinker/eosc"
|
||||
)
|
||||
|
||||
var (
|
||||
//go:embed nvidia.yaml
|
||||
providerContent []byte
|
||||
//go:embed *
|
||||
providerDir embed.FS
|
||||
modelConvert = make(map[string]ai_convert.IConverter)
|
||||
|
||||
_ ai_convert.IConverterDriver = (*executor)(nil)
|
||||
)
|
||||
|
||||
func init() {
|
||||
models, err := ai_convert.LoadModels(providerContent, providerDir)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
for key, value := range models {
|
||||
if value.ModelProperties != nil {
|
||||
if v, ok := modelModes[value.ModelProperties.Mode]; ok {
|
||||
modelConvert[key] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type executor struct {
|
||||
drivers.WorkerBase
|
||||
ai_convert.IConverterDriver
|
||||
}
|
||||
|
||||
func (e *executor) Start() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *executor) Reset(conf interface{}, workers map[eosc.RequireId]eosc.IWorker) error {
|
||||
cfg, ok := conf.(*Config)
|
||||
if !ok {
|
||||
return fmt.Errorf("invalid config")
|
||||
}
|
||||
|
||||
return e.reset(cfg, workers)
|
||||
}
|
||||
|
||||
func (e *executor) reset(conf *Config, workers map[eosc.RequireId]eosc.IWorker) error {
|
||||
d, err := newConverterDriver(conf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
e.IConverterDriver = d
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *executor) Stop() error {
|
||||
e.IConverterDriver = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *executor) CheckSkill(skill string) bool {
|
||||
return ai_convert.CheckKeySourceSkill(skill)
|
||||
}
|
||||
|
||||
type ModelConfig struct {
|
||||
FrequencyPenalty float64 `json:"frequency_penalty"`
|
||||
MaxTokens int `json:"max_tokens"`
|
||||
PresencePenalty float64 `json:"presence_penalty"`
|
||||
ResponseFormat string `json:"response_format"`
|
||||
Temperature float64 `json:"temperature"`
|
||||
TopP float64 `json:"top_p"`
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/eolinker/eosc/common/bean"
|
||||
|
||||
"github.com/eolinker/apinto/drivers"
|
||||
"github.com/eolinker/eosc"
|
||||
)
|
||||
|
||||
var name = "nvidia"
|
||||
|
||||
var (
|
||||
converterManager ai_convert.IManager
|
||||
once sync.Once
|
||||
)
|
||||
|
||||
// Register 注册驱动
|
||||
func Register(register eosc.IExtenderDriverRegister) {
|
||||
register.RegisterExtenderDriver(name, NewFactory())
|
||||
}
|
||||
|
||||
// NewFactory 创建service_http驱动工厂
|
||||
func NewFactory() eosc.IExtenderDriverFactory {
|
||||
once.Do(func() {
|
||||
bean.Autowired(&converterManager)
|
||||
converterManager.Set(name, &convertFactory{})
|
||||
})
|
||||
return drivers.NewFactory[Config](Create)
|
||||
}
|
||||
|
||||
// Create 创建驱动实例
|
||||
func Create(id, name string, v *Config, workers map[eosc.RequireId]eosc.IWorker) (eosc.IWorker, error) {
|
||||
_, err := checkConfig(v)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
w := &executor{
|
||||
WorkerBase: drivers.Worker(id, name),
|
||||
}
|
||||
w.reset(v, workers)
|
||||
return w, nil
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user