mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Backend]Add stable_diffusion and detection models support for KunlunXin XPU (#954)
* [FlyCV] Bump up FlyCV -> official release 1.0.0 * add valid_xpu for detection * add paddledetection model support for xpu * support all detection model in c++ and python * fix code * add python stable_diffusion support Co-authored-by: DefTruth <qiustudent_r@163.com> Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
This commit is contained in:
37
examples/multimodal/stable_diffusion/infer.py
Normal file → Executable file
37
examples/multimodal/stable_diffusion/infer.py
Normal file → Executable file
@@ -69,10 +69,7 @@ def parse_arguments():
|
||||
type=str,
|
||||
default='paddle',
|
||||
# Note(zhoushunjie): Will support 'tensorrt', 'paddle-tensorrt' soon.
|
||||
choices=[
|
||||
'onnx_runtime',
|
||||
'paddle',
|
||||
],
|
||||
choices=['onnx_runtime', 'paddle', 'paddle-xpu'],
|
||||
help="The inference runtime backend of unet model and text encoder model."
|
||||
)
|
||||
parser.add_argument(
|
||||
@@ -178,6 +175,24 @@ def create_trt_runtime(model_dir,
|
||||
return fd.Runtime(option)
|
||||
|
||||
|
||||
def create_xpu_runtime(model_dir, model_prefix, device_id=0):
|
||||
option = fd.RuntimeOption()
|
||||
option.use_xpu(
|
||||
device_id,
|
||||
l3_workspace_size=(64 * 1024 * 1024 - 4 * 1024),
|
||||
locked=False,
|
||||
autotune=False,
|
||||
autotune_file="",
|
||||
precision="int16",
|
||||
adaptive_seqlen=True,
|
||||
enable_multi_stream=True)
|
||||
option.use_paddle_lite_backend()
|
||||
model_file = os.path.join(model_dir, model_prefix, "inference.pdmodel")
|
||||
params_file = os.path.join(model_dir, model_prefix, "inference.pdiparams")
|
||||
option.set_model_path(model_file, params_file)
|
||||
return fd.Runtime(option)
|
||||
|
||||
|
||||
def get_scheduler(args):
|
||||
if args.scheduler == "pndm":
|
||||
scheduler = PNDMScheduler(
|
||||
@@ -291,6 +306,20 @@ if __name__ == "__main__":
|
||||
dynamic_shape=unet_dynamic_shape,
|
||||
device_id=args.device_id)
|
||||
print(f"Spend {time.time() - start : .2f} s to load unet model.")
|
||||
elif args.backend == "paddle-xpu":
|
||||
print("=== build text_encoder_runtime")
|
||||
text_encoder_runtime = create_xpu_runtime(
|
||||
args.model_dir,
|
||||
args.text_encoder_model_prefix,
|
||||
device_id=args.device_id)
|
||||
print("=== build vae_decoder_runtime")
|
||||
vae_decoder_runtime = create_xpu_runtime(
|
||||
args.model_dir, args.vae_model_prefix, device_id=args.device_id)
|
||||
print("=== build unet_runtime")
|
||||
start = time.time()
|
||||
unet_runtime = create_xpu_runtime(
|
||||
args.model_dir, args.unet_model_prefix, device_id=args.device_id)
|
||||
print(f"Spend {time.time() - start : .2f} s to load unet model.")
|
||||
pipe = StableDiffusionFastDeployPipeline(
|
||||
vae_decoder_runtime=vae_decoder_runtime,
|
||||
text_encoder_runtime=text_encoder_runtime,
|
||||
|
Reference in New Issue
Block a user