feat: add support for API usage with multimodal models (#4548)

* feat: add support for API usage with multimodal models * completion_tokens contains num_image_tokens * remove test_request.py * fix: paddle.device.is_compiled_with_cuda() * fix test_unstream_without_logprobs
2025-12-24 13:28:13 +08:00 · 2025-10-28 20:23:46 +08:00
parent e1ac90d787
commit 2a9ed72533
10 changed files with 256 additions and 21 deletions
--- a/fastdeploy/engine/common_engine.py
+++ b/fastdeploy/engine/common_engine.py
@@ -1068,6 +1068,7 @@ class EngineService:
        """
        exit sub services
        """
+        llm_logger.info("Exit sub services.....")
        self.running = False
        if hasattr(self, "engine_worker_queue_server") and self.engine_worker_queue_server is not None:
            self.engine_worker_queue_server.cleanup()