[Backend]Add stable_diffusion and detection models support for KunlunXin XPU (#954)

* [FlyCV] Bump up FlyCV -> official release 1.0.0 * add valid_xpu for detection * add paddledetection model support for xpu * support all detection model in c++ and python * fix code * add python stable_diffusion support Co-authored-by: DefTruth <qiustudent_r@163.com> Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
2025-10-05 08:37:06 +08:00 · 2022-12-26 16:22:52 +08:00
parent 8a986c23ec
commit 1911002b90
42 changed files with 857 additions and 38 deletions
--- a/examples/multimodal/stable_diffusion/infer.py
+++ b/examples/multimodal/stable_diffusion/infer.py
@@ -69,10 +69,7 @@ def parse_arguments():
        type=str,
        default='paddle',
        # Note(zhoushunjie): Will support 'tensorrt', 'paddle-tensorrt' soon.
-        choices=[
-            'onnx_runtime',
-            'paddle',
-        ],
+        choices=['onnx_runtime', 'paddle', 'paddle-xpu'],
        help="The inference runtime backend of unet model and text encoder model."
    )
    parser.add_argument(
@@ -178,6 +175,24 @@ def create_trt_runtime(model_dir,
    return fd.Runtime(option)


+def create_xpu_runtime(model_dir, model_prefix, device_id=0):
+    option = fd.RuntimeOption()
+    option.use_xpu(
+        device_id,
+        l3_workspace_size=(64 * 1024 * 1024 - 4 * 1024),
+        locked=False,
+        autotune=False,
+        autotune_file="",
+        precision="int16",
+        adaptive_seqlen=True,
+        enable_multi_stream=True)
+    option.use_paddle_lite_backend()
+    model_file = os.path.join(model_dir, model_prefix, "inference.pdmodel")
+    params_file = os.path.join(model_dir, model_prefix, "inference.pdiparams")
+    option.set_model_path(model_file, params_file)
+    return fd.Runtime(option)
+
+
 def get_scheduler(args):
    if args.scheduler == "pndm":
        scheduler = PNDMScheduler(
@@ -291,6 +306,20 @@ if __name__ == "__main__":
            dynamic_shape=unet_dynamic_shape,
            device_id=args.device_id)
        print(f"Spend {time.time() - start : .2f} s to load unet model.")
+    elif args.backend == "paddle-xpu":
+        print("=== build text_encoder_runtime")
+        text_encoder_runtime = create_xpu_runtime(
+            args.model_dir,
+            args.text_encoder_model_prefix,
+            device_id=args.device_id)
+        print("=== build vae_decoder_runtime")
+        vae_decoder_runtime = create_xpu_runtime(
+            args.model_dir, args.vae_model_prefix, device_id=args.device_id)
+        print("=== build unet_runtime")
+        start = time.time()
+        unet_runtime = create_xpu_runtime(
+            args.model_dir, args.unet_model_prefix, device_id=args.device_id)
+        print(f"Spend {time.time() - start : .2f} s to load unet model.")
    pipe = StableDiffusionFastDeployPipeline(
        vae_decoder_runtime=vae_decoder_runtime,
        text_encoder_runtime=text_encoder_runtime,