mirror of
				https://github.com/PaddlePaddle/FastDeploy.git
				synced 2025-11-01 04:12:58 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			61 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			61 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| # optional, If name is specified it must match the name of the model repository directory containing the model.
 | ||
| name: "runtime"
 | ||
| backend: "fastdeploy"
 | ||
| max_batch_size: 16
 | ||
| 
 | ||
| # Input configuration of the model
 | ||
| input [
 | ||
|   {
 | ||
|     # input name
 | ||
|     name: "inputs"
 | ||
|     # input type such as TYPE_FP32、TYPE_UINT8、TYPE_INT8、TYPE_INT16、TYPE_INT32、TYPE_INT64、TYPE_FP16、TYPE_STRING
 | ||
|     data_type: TYPE_FP32
 | ||
|     # input shape, The batch dimension is omitted and the actual shape is [batch, c, h, w]
 | ||
|     dims: [ 3, 224, 224 ]
 | ||
|   }
 | ||
| ]
 | ||
| 
 | ||
| # The output of the model is configured in the same format as the input
 | ||
| output [
 | ||
|   {
 | ||
|     name: "save_infer_model/scale_0.tmp_1"
 | ||
|     data_type: TYPE_FP32
 | ||
|     dims: [ 1000 ]
 | ||
|   }
 | ||
| ]
 | ||
| 
 | ||
| # Number of instances of the model
 | ||
| instance_group [
 | ||
|   {
 | ||
|     # The number of instances is 1
 | ||
|     count: 1
 | ||
|     # Use GPU, CPU inference option is:KIND_CPU
 | ||
|     kind: KIND_GPU
 | ||
|     # The instance is deployed on the 0th GPU card
 | ||
|     gpus: [0]
 | ||
|   }
 | ||
| ]
 | ||
| 
 | ||
| optimization {
 | ||
|   execution_accelerators {
 | ||
|   gpu_execution_accelerator : [ {
 | ||
|     # use TRT engine
 | ||
|     name: "tensorrt",
 | ||
|     # use fp16 on TRT engine
 | ||
|     parameters { key: "precision" value: "trt_fp16" }
 | ||
|   },
 | ||
|   {
 | ||
|     name: "min_shape"
 | ||
|     parameters { key: "inputs" value: "1 3 224 224" }
 | ||
|   },
 | ||
|   {
 | ||
|     name: "opt_shape"
 | ||
|     parameters { key: "inputs" value: "1 3 224 224" }
 | ||
|   },
 | ||
|   {
 | ||
|     name: "max_shape"
 | ||
|     parameters { key: "inputs" value: "16 3 224 224" }
 | ||
|   }
 | ||
|   ]
 | ||
| }}
 | 
