mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
* feat(fmq): add ZMQ-based FMQ implementation and benchmark tools * move FMQ_CONFIG_JSON to envs * fix top_p_candidates (#5400) Co-authored-by: freeliuzc <lzc842650834@gmail.com> * [RL] Support Rollout Routing Replay (#5321) * [RL] Support Rollout Routing Replay * add routing indices cache * fix config bug and moe forward bug * R3 Support GLM * support eb4.5 * fix merge bug * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * add routing replay ci * support glm topk * support orther top_k * fix ci bug * pre-commit * only support chatcmpl --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Yuanle Liu <yuanlehome@163.com> * [Bug fix] Fix the multi-input accuracy issue in the pooling model. (#5374) * fix multi-inputs * fix threshold * fix threshold * fix * [BugFix]remove _execute_empty_input (#5396) * Revert "[RL] Support Rollout Routing Replay (#5321)" (#5402) This reverts commit96d2d4877b. * [New][RL] Support Rollout Routing Replay (#5405) * [RL] Support Rollout Routing Replay * add routing indices cache * fix config bug and moe forward bug * R3 Support GLM * support eb4.5 * fix merge bug * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * add routing replay ci * support glm topk * support orther top_k * fix ci bug * pre-commit * only support chatcmpl * Revert "Revert "[RL] Support Rollout Routing Replay (#5321)" (#5402)" This reverts commitc45e064f3d. * Fix XPU and NPU bug --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Yuanle Liu <yuanlehome@163.com> * bf16 deepseek (#5379) * fix deepseek (#5410) * Update tests/inter_communicator/test_fmq_factory.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update benchmarks/benchmark_fmq.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update fastdeploy/inter_communicator/fmq.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: GoldPancake <56388518+Deleter-D@users.noreply.github.com> Co-authored-by: freeliuzc <lzc842650834@gmail.com> Co-authored-by: RAM <gstian5555@outlook.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Yuanle Liu <yuanlehome@163.com> Co-authored-by: lizexu123 <39205361+lizexu123@users.noreply.github.com> Co-authored-by: 周周周 <39978853+zhoutianzi666@users.noreply.github.com> Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> Co-authored-by: bukejiyu <52310069+bukejiyu@users.noreply.github.com>
84 lines
2.4 KiB
Python
84 lines
2.4 KiB
Python
"""
|
|
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
|
|
from fastdeploy.inter_communicator.fmq import FMQ
|
|
|
|
|
|
class FMQFactory:
|
|
"""
|
|
Static factory for creating the four standard FMQ queues:
|
|
1. q_a2e: api server --> engine
|
|
2. q_e2w: engine --> worker
|
|
3. q_w2e: worker --> engine
|
|
4. q_e2a: engine --> api server
|
|
API Server: q_a2e producer / q_e2a consumer
|
|
Engine: q_a2e consumer / q_e2w producer / q_w2e consumer / q_e2a producer
|
|
Worker: q_e2w consumer / q_w2e producer
|
|
"""
|
|
|
|
_fmq = FMQ()
|
|
|
|
# ------------------------------
|
|
# API → Engine
|
|
# ------------------------------
|
|
@classmethod
|
|
def q_a2e_producer(cls):
|
|
return cls._fmq.queue("q_a2e", role="producer")
|
|
|
|
@classmethod
|
|
def q_a2e_consumer(cls):
|
|
return cls._fmq.queue("q_a2e", role="consumer")
|
|
|
|
# ------------------------------
|
|
# Engine → Worker
|
|
# ------------------------------
|
|
@classmethod
|
|
def q_e2w_producer(cls):
|
|
return cls._fmq.queue("q_e2w", role="producer")
|
|
|
|
@classmethod
|
|
def q_e2w_consumer(cls):
|
|
return cls._fmq.queue("q_e2w", role="consumer")
|
|
|
|
# ------------------------------
|
|
# Worker → Engine
|
|
# ------------------------------
|
|
@classmethod
|
|
def q_w2e_producer(cls):
|
|
return cls._fmq.queue("q_w2e", role="producer")
|
|
|
|
@classmethod
|
|
def q_w2e_consumer(cls):
|
|
return cls._fmq.queue("q_w2e", role="consumer")
|
|
|
|
# ------------------------------
|
|
# Engine → API
|
|
# ------------------------------
|
|
@classmethod
|
|
def q_e2a_producer(cls):
|
|
return cls._fmq.queue("q_e2a", role="producer")
|
|
|
|
@classmethod
|
|
def q_e2a_consumer(cls):
|
|
return cls._fmq.queue("q_e2a", role="consumer")
|
|
|
|
# ------------------------------
|
|
# Destroy context
|
|
# ------------------------------
|
|
@classmethod
|
|
async def destroy(cls):
|
|
await cls._fmq.destroy()
|