[NewFeature]Support dp multi api server && Fix some bug in mixed ep && merge develop (#3598)

* [Feature] update ep

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix queue ports idx

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* Update engine.py

* fix ci

* fix some bug in mixed ep

* add server fix and op fix

* rm some log

* fix code style

* ltd fix

* fix

* fix

* fix some bug

* fix bug

* fix bug

* fix style

* Update config.py

* Update splitwise_connector.py

* Update cache_messager.py

* Update __init__.py

* merge and fix

* Update engine.py

* Update common_engine.py

* Update run_ci_xpu.sh

* Update ernie_processor.py

* Update ernie_processor.py

---------

Co-authored-by: ltd0924 <ltd0924@sina.com>
Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com>
This commit is contained in:
gaoziyuan
2025-08-26 19:59:02 +08:00
committed by GitHub
parent cbce94a00e
commit 82e64b13e1
24 changed files with 1244 additions and 1200 deletions

View File

@@ -0,0 +1,107 @@
"""
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
import argparse
import os
import subprocess
import sys
import time
from fastdeploy.utils import get_logger, is_port_available
logger = get_logger("multi_api_server", "multi_api_server.log")
def start_servers(server_count, server_args, ports, metrics_ports):
processes = []
logger.info(f"Starting servers on ports: {ports} with args: {server_args} and metrics ports: {metrics_ports}")
for i in range(len(server_args)):
if server_args[i] == "--engine-worker-queue-port":
engine_worker_queue_port = server_args[i + 1].split(",")
break
check_param(ports, server_count)
check_param(metrics_ports, server_count)
check_param(engine_worker_queue_port, server_count)
# check_param(server_args, server_count)
for i in range(server_count):
port = int(ports[i])
metrics_port = int(metrics_ports[i])
env = os.environ.copy()
env["FD_LOG_DIR"] = f"log_{i}"
cmd = [
sys.executable,
"-m",
"fastdeploy.entrypoints.openai.api_server",
*server_args,
"--port",
str(port),
"--metrics-port",
str(metrics_port),
"--local-data-parallel-id",
str(i),
]
# 启动子进程
proc = subprocess.Popen(cmd, env=env)
processes.append(proc)
logger.info(f"Starting servers #{i+1} (PID: {proc.pid}) port: {port} | command: {' '.join(cmd)}")
return processes
def check_param(ports, num_servers):
logger.info(f"check param {ports}, {num_servers}")
assert len(ports) == num_servers, "Number of ports must match num-servers"
for port in ports:
logger.info(f"check port {port}")
if not is_port_available("0.0.0.0", int(port)):
raise ValueError(f"Port {port} is already in use.")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--ports", default="8000,8002", type=str, help="ports to the http server")
parser.add_argument("--num-servers", default=2, type=int, help="number of workers")
parser.add_argument("--metrics-ports", default="8800,8802", type=str, help="ports for metrics server")
parser.add_argument("--args", nargs=argparse.REMAINDER, help="remaining arguments are passed to api_server.py")
args = parser.parse_args()
logger.info(f"Starting {args.num_servers} servers on ports: {args.ports} with args: {args.args}")
# check_param(args.ports, args.num_servers)
# check_param(args.metrics_ports, args.num_servers)
# check_param(args.args.engine_worker_queue_port, args.num_servers)
processes = start_servers(
server_count=args.num_servers,
server_args=args.args,
ports=args.ports.split(","),
metrics_ports=args.metrics_ports.split(","),
)
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
for proc in processes:
proc.terminate()
for proc in processes:
proc.wait()
logger.info("All servers stopped.")
if __name__ == "__main__":
main()