[Optimize] Increase zmq buffer size to prevent apiserver too slowly to consume (#3723) (#3728)

Co-authored-by: chenjian <1435317881@qq.com>
2025-10-07 01:22:59 +08:00 · 2025-08-30 15:58:18 +08:00
parent 64cf769bee
commit b9af800edd
1 changed files with 1 additions and 1 deletions
--- a/fastdeploy/envs.py
+++ b/fastdeploy/envs.py
@@ -44,7 +44,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # Whether to use HuggingFace tokenizer.
    "FD_USE_HF_TOKENIZER": lambda: os.getenv("FD_USE_HF_TOKENIZER", 0),
    # Set the high watermark (HWM) for receiving data during ZMQ initialization
-    "FD_ZMQ_SNDHWM": lambda: os.getenv("FD_ZMQ_SNDHWM", 10000),
+    "FD_ZMQ_SNDHWM": lambda: os.getenv("FD_ZMQ_SNDHWM", 64000),
    # cache kv quant params directory
    "FD_CACHE_PARAMS": lambda: os.getenv("FD_CACHE_PARAMS", "none"),
    # Set attention backend. "NATIVE_ATTN", "APPEND_ATTN"