mirror of
				https://github.com/PaddlePaddle/FastDeploy.git
				synced 2025-10-27 02:20:31 +08:00 
			
		
		
		
	 434b48dda5
			
		
	
	434b48dda5
	
	
	
		
			
			* [patchelf] fix patchelf error for inference xpu * [serving] add xpu dockerfile and support fd server * [serving] add xpu dockerfile and support fd server * [Serving] support XPU + Tritron * [Serving] support XPU + Tritron * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] update xpu tritron docker file -> paddle 0.0.0 * [Dockerfile] add comments for xpu tritron dockerfile * [Doruntime] fix xpu infer error * [Doruntime] fix xpu infer error * [XPU] update xpu dockerfile * add xpu triton server docs * add xpu triton server docs * add xpu triton server docs * add xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs * update xpu triton server docs
		
			
				
	
	
		
			90 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			90 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| # optional, If name is specified it must match the name of the model repository directory containing the model.
 | |
| name: "runtime"
 | |
| backend: "fastdeploy"
 | |
| max_batch_size: 16
 | |
| 
 | |
| # Input configuration of the model
 | |
| input [
 | |
|   {
 | |
|     # input name
 | |
|     name: "inputs"
 | |
|     # input type such as TYPE_FP32、TYPE_UINT8、TYPE_INT8、TYPE_INT16、TYPE_INT32、TYPE_INT64、TYPE_FP16、TYPE_STRING
 | |
|     data_type: TYPE_FP32
 | |
|     # input shape, The batch dimension is omitted and the actual shape is [batch, c, h, w]
 | |
|     dims: [ 3, 224, 224 ]
 | |
|   }
 | |
| ]
 | |
| 
 | |
| # The output of the model is configured in the same format as the input
 | |
| output [
 | |
|   {
 | |
|     name: "save_infer_model/scale_0.tmp_1"
 | |
|     data_type: TYPE_FP32
 | |
|     dims: [ 1000 ]
 | |
|   }
 | |
| ]
 | |
| 
 | |
| # Number of instances of the model
 | |
| instance_group [
 | |
|   {
 | |
|     # The number of instances is 1
 | |
|     count: 1
 | |
|     # Use GPU, CPU inference option is:KIND_CPU
 | |
|     kind: KIND_GPU
 | |
|     # kind: KIND_CPU
 | |
|     # The instance is deployed on the 0th GPU card
 | |
|     gpus: [0]
 | |
|   }
 | |
| ]
 | |
| 
 | |
| optimization {
 | |
|   execution_accelerators {
 | |
|   gpu_execution_accelerator : [ {
 | |
|     # use TRT engine
 | |
|     name: "tensorrt",
 | |
|     # use fp16 on TRT engine
 | |
|     parameters { key: "precision" value: "trt_fp16" }
 | |
|   },
 | |
|   {
 | |
|     name: "min_shape"
 | |
|     parameters { key: "inputs" value: "1 3 224 224" }
 | |
|   },
 | |
|   {
 | |
|     name: "opt_shape"
 | |
|     parameters { key: "inputs" value: "1 3 224 224" }
 | |
|   },
 | |
|   {
 | |
|     name: "max_shape"
 | |
|     parameters { key: "inputs" value: "16 3 224 224" }
 | |
|   }
 | |
|   ]
 | |
| }}
 | |
| 
 | |
| # instance_group [
 | |
| #   {
 | |
| #     # The number of instances is 1
 | |
| #     count: 1
 | |
| #     # Use GPU, CPU inference option is:KIND_CPU
 | |
| #     # kind: KIND_GPU
 | |
| #     kind: KIND_CPU
 | |
| #     # The instance is deployed on the 0th GPU card
 | |
| #     # gpus: [0]
 | |
| #   }
 | |
| # ]
 | |
| 
 | |
| # optimization {
 | |
| #   execution_accelerators {
 | |
| #   cpu_execution_accelerator: [{
 | |
| #     name: "paddle_xpu",
 | |
| #     parameters { key: "cpu_threads" value: "4" }
 | |
| #     parameters { key: "use_paddle_log" value: "1" }
 | |
| #     parameters { key: "kunlunxin_id" value: "0" }
 | |
| #     parameters { key: "l3_workspace_size" value: "62914560" }
 | |
| #     parameters { key: "locked" value: "0" }
 | |
| #     parameters { key: "autotune" value: "1" }
 | |
| #     parameters { key: "precision" value: "int16" }
 | |
| #     parameters { key: "adaptive_seqlen" value: "0" }
 | |
| #     parameters { key: "enable_multi_stream" value: "0" }
 | |
| #     parameters { key: "gm_default_size" value: "0" }
 | |
| #     }]
 | |
| # }} |