mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Features] add audio request & fix embedding bug (#5201)
* [Features] add audio request & fix embedding bug * fix bug
This commit is contained in:
@@ -27,27 +27,43 @@ from fastdeploy.utils import data_processor_logger
|
||||
class BaseEncodeRequest(BaseModel):
|
||||
version: str
|
||||
req_id: str
|
||||
is_gen: bool
|
||||
resolution: int
|
||||
|
||||
|
||||
class ImageEncodeRequest(BaseEncodeRequest):
|
||||
image_url: Union[str, HttpUrl]
|
||||
is_gen: bool
|
||||
resolution: int
|
||||
|
||||
|
||||
class VideoEncodeRequest(BaseEncodeRequest):
|
||||
video_url: Union[str, HttpUrl]
|
||||
is_gen: bool
|
||||
resolution: int
|
||||
start_ts: int
|
||||
end_ts: int
|
||||
frames: int
|
||||
vit_merge: bool
|
||||
|
||||
|
||||
class AudioEncodeRequest(BaseEncodeRequest):
|
||||
audio_url: Union[str, HttpUrl]
|
||||
is_add_spk_emb: bool
|
||||
is_pad_aug: bool
|
||||
is_aug: bool
|
||||
audio_start: Optional[float]
|
||||
audio_dur: Optional[float]
|
||||
|
||||
|
||||
class ImageDecodeRequest(BaseModel):
|
||||
req_id: str
|
||||
data: list[Any]
|
||||
|
||||
|
||||
class AudioDecodeRequest(BaseModel):
|
||||
req_id: str
|
||||
data: list[Any]
|
||||
|
||||
|
||||
class AsyncTokenizerClient:
|
||||
def __init__(
|
||||
self,
|
||||
@@ -74,9 +90,15 @@ class AsyncTokenizerClient:
|
||||
async def encode_video(self, request: VideoEncodeRequest):
|
||||
return await self._async_encode_request("video", request.__dict__)
|
||||
|
||||
async def encode_audio(self, request: AudioEncodeRequest):
|
||||
return await self._async_encode_request("audio", request.__dict__)
|
||||
|
||||
async def decode_image(self, request: ImageDecodeRequest):
|
||||
return await self._async_decode_request("image", request.__dict__)
|
||||
|
||||
async def decode_audio(self, request: AudioDecodeRequest):
|
||||
return await self._async_decode_request("audio", request.__dict__)
|
||||
|
||||
async def log_request(self, request):
|
||||
data_processor_logger.debug(f">>> Request: {request.method} {request.url}")
|
||||
data_processor_logger.debug(f">>> Headers: {request.headers}")
|
||||
@@ -101,6 +123,8 @@ class AsyncTokenizerClient:
|
||||
url = f"{self.base_url}/image/encode"
|
||||
elif type == "video":
|
||||
url = f"{self.base_url}/video/encode"
|
||||
elif type == "audio":
|
||||
url = f"{self.base_url}/audio/encode"
|
||||
else:
|
||||
raise ValueError("Invalid type")
|
||||
|
||||
@@ -110,6 +134,7 @@ class AsyncTokenizerClient:
|
||||
raise RuntimeError(f"Failed to create tokenize task: {e}") from e
|
||||
|
||||
task_info = resp.json()
|
||||
|
||||
if task_info.get("code") != 0:
|
||||
raise RuntimeError(f"Tokenize task creation failed, {task_info.get('message')}")
|
||||
|
||||
@@ -154,6 +179,8 @@ class AsyncTokenizerClient:
|
||||
url = None
|
||||
if type == "image":
|
||||
url = f"{self.base_url}/image/decode"
|
||||
elif type == "audio":
|
||||
url = f"{self.base_url}/audio/decode"
|
||||
else:
|
||||
raise ValueError("Invalid type")
|
||||
|
||||
|
||||
@@ -106,6 +106,8 @@ class VocabParallelEmbedding(nn.Layer):
|
||||
params_dtype: str = "bfloat16",
|
||||
prefix="",
|
||||
padding_size: int = DEFAULT_VOCAB_PADDING_SIZE,
|
||||
org_num_embeddings: int | None = None,
|
||||
general=False,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the VocabParallelEmbedding layer for the model.
|
||||
@@ -132,17 +134,23 @@ class VocabParallelEmbedding(nn.Layer):
|
||||
self.max_position_embeddings: int = fd_config.model_config.max_position_embeddings
|
||||
self.tie_word_embeddings: bool = fd_config.model_config.tie_word_embeddings
|
||||
self.params_dtype: str = params_dtype
|
||||
self.padding_size = padding_size
|
||||
|
||||
self.org_vocab_size = num_embeddings
|
||||
self.general = general # used for general Embedding
|
||||
self.num_embeddings = num_embeddings
|
||||
num_added_embeddings = num_embeddings - self.org_vocab_size
|
||||
self.padding_size = padding_size
|
||||
if self.general:
|
||||
self.org_vocab_size = num_embeddings
|
||||
self.num_embeddings_padded = num_embeddings
|
||||
self.org_vocab_size_padded = num_embeddings
|
||||
else:
|
||||
self.org_vocab_size = org_num_embeddings or num_embeddings
|
||||
num_added_embeddings = num_embeddings - self.org_vocab_size
|
||||
|
||||
self.org_vocab_size_padded = pad_vocab_size(self.org_vocab_size, self.padding_size)
|
||||
self.num_embeddings_padded = pad_vocab_size(
|
||||
self.org_vocab_size_padded + num_added_embeddings, self.padding_size
|
||||
)
|
||||
assert self.org_vocab_size_padded <= self.num_embeddings_padded
|
||||
self.org_vocab_size_padded = pad_vocab_size(self.org_vocab_size, self.padding_size)
|
||||
self.num_embeddings_padded = pad_vocab_size(
|
||||
self.org_vocab_size_padded + num_added_embeddings, self.padding_size
|
||||
)
|
||||
assert self.org_vocab_size_padded <= self.num_embeddings_padded
|
||||
self.shard_indices = self._get_indices(
|
||||
self.num_embeddings_padded,
|
||||
self.org_vocab_size_padded,
|
||||
@@ -152,9 +160,6 @@ class VocabParallelEmbedding(nn.Layer):
|
||||
self.world_size,
|
||||
)
|
||||
|
||||
if num_embeddings % self.world_size != 0:
|
||||
self.num_embeddings_padded = pad_vocab_size(num_embeddings, self.padding_size)
|
||||
|
||||
if not self.column_cut:
|
||||
self.embeddings = fleet.meta_parallel.VocabParallelEmbedding(
|
||||
self.num_embeddings_padded,
|
||||
@@ -188,7 +193,7 @@ class VocabParallelEmbedding(nn.Layer):
|
||||
Args:
|
||||
state_dict (dict): A dictionary containing the checkpoint weights and biases.
|
||||
"""
|
||||
if self.tie_word_embeddings:
|
||||
if self.tie_word_embeddings and not self.general:
|
||||
weight_tensor = get_tensor(state_dict[self.prefix + ".weight"]).astype(paddle.get_default_dtype())
|
||||
else:
|
||||
weight_tensor = get_tensor(state_dict.pop(self.prefix + ".weight")).astype(paddle.get_default_dtype())
|
||||
|
||||
Reference in New Issue
Block a user