Files
cursor-api-go/src/utils.js
左康生 0646173589 update
2024-11-24 11:57:04 +08:00

125 lines
4.3 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Helper function to convert string to hex bytes
function stringToHex (str, modelName) {
const bytes = Buffer.from(str, 'utf-8')
const byteLength = bytes.length
// Calculate lengths and fields similar to Python version
const FIXED_HEADER = 2
const SEPARATOR = 1
const FIXED_SUFFIX_LENGTH = 0xA3 + modelName.length
// 计算文本长度字段 (类似 Python 中的 base_length1)
let textLengthField1, textLengthFieldSize1
if (byteLength < 128) {
textLengthField1 = byteLength.toString(16).padStart(2, '0')
textLengthFieldSize1 = 1
} else {
const lowByte1 = (byteLength & 0x7F) | 0x80
const highByte1 = (byteLength >> 7) & 0xFF
textLengthField1 = lowByte1.toString(16).padStart(2, '0') + highByte1.toString(16).padStart(2, '0')
textLengthFieldSize1 = 2
}
// 计算基础长度 (类似 Python 中的 base_length)
const baseLength = byteLength + 0x2A
let textLengthField, textLengthFieldSize
if (baseLength < 128) {
textLengthField = baseLength.toString(16).padStart(2, '0')
textLengthFieldSize = 1
} else {
const lowByte = (baseLength & 0x7F) | 0x80
const highByte = (baseLength >> 7) & 0xFF
textLengthField = lowByte.toString(16).padStart(2, '0') + highByte.toString(16).padStart(2, '0')
textLengthFieldSize = 2
}
// 计算总消息长度
const messageTotalLength = FIXED_HEADER + textLengthFieldSize + SEPARATOR +
textLengthFieldSize1 + byteLength + FIXED_SUFFIX_LENGTH
const messageLengthHex = messageTotalLength.toString(16).padStart(10, '0')
// 构造完整的十六进制字符串
const hexString = (
messageLengthHex +
'12' +
textLengthField +
'0A' +
textLengthField1 +
bytes.toString('hex') +
'10016A2432343163636435662D393162612D343131382D393239612D3936626330313631626432612' +
'2002A132F643A2F6964656150726F2F656475626F73733A1E0A' +
// 将模型名称长度转换为两位十六进制,并确保是大写
Buffer.from(modelName, 'utf-8').length.toString(16).padStart(2, '0').toUpperCase() +
Buffer.from(modelName, 'utf-8').toString('hex').toUpperCase() +
'22004A' +
'24' + '61383761396133342D323164642D343863372D623434662D616636633365636536663765' +
'680070007A2436393337376535612D386332642D343835342D623564392D653062623232336163303061' +
'800101B00100C00100E00100E80100'
).toUpperCase()
return Buffer.from(hexString, 'hex')
}
// 封装函数,用于将 chunk 转换为 UTF-8 字符串
function chunkToUtf8String (chunk) {
if (chunk[0] === 0x01 || chunk[0] === 0x02) {
return ''
}
console.log('chunk:', Buffer.from(chunk).toString('hex'))
console.log('chunk string:', Buffer.from(chunk).toString('utf-8'))
// 去掉 chunk 中 0x0A 以及之前的字符
chunk = chunk.slice(chunk.indexOf(0x0A) + 1)
let filteredChunk = []
let i = 0
while (i < chunk.length) {
// 新的条件过滤如果遇到连续4个0x00则移除其之后所有的以 0 开头的字节0x00 到 0x0F
if (chunk.slice(i, i + 4).every(byte => byte === 0x00)) {
i += 4 // 跳过这4个0x00
while (i < chunk.length && chunk[i] >= 0x00 && chunk[i] <= 0x0F) {
i++ // 跳过所有以 0 开头的字节
}
continue
}
if (chunk[i] === 0x0C) {
// 遇到 0x0C 时,跳过 0x0C 以及后续的所有连续的 0x0A
i++ // 跳过 0x0C
while (i < chunk.length && chunk[i] === 0x0A) {
i++ // 跳过所有连续的 0x0A
}
} else if (
i > 0 &&
chunk[i] === 0x0A &&
chunk[i - 1] >= 0x00 &&
chunk[i - 1] <= 0x09
) {
// 如果当前字节是 0x0A且前一个字节在 0x00 至 0x09 之间,跳过前一个字节和当前字节
filteredChunk.pop() // 移除已添加的前一个字节
i++ // 跳过当前的 0x0A
} else {
filteredChunk.push(chunk[i])
i++
}
}
// 第二步:去除所有的 0x00 和 0x0C
filteredChunk = filteredChunk.filter((byte) => byte !== 0x00 && byte !== 0x0C)
// 去除小于 0x0A 的字节
filteredChunk = filteredChunk.filter((byte) => byte >= 0x0A)
const hexString = Buffer.from(filteredChunk).toString('hex')
console.log('hexString:', hexString)
const utf8String = Buffer.from(filteredChunk).toString('utf-8')
console.log('utf8String:', utf8String)
return utf8String
}
module.exports = {
stringToHex,
chunkToUtf8String
}