mirror of
				https://github.com/PaddlePaddle/FastDeploy.git
				synced 2025-10-27 02:20:31 +08:00 
			
		
		
		
	ep support logprob (#4089)
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
		
			
				
	
				CE Compile Job / ce_job_pre_check (push) Has been cancelled
				
			
		
			
				
	
				CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
				
			
		
			
				
	
				CE Compile Job / FD-Clone-Linux (push) Has been cancelled
				
			
		
			
				
	
				CE Compile Job / Show Code Archive Output (push) Has been cancelled
				
			
		
			
				
	
				CE Compile Job / BUILD_SM8090 (push) Has been cancelled
				
			
		
			
				
	
				CE Compile Job / BUILD_SM8689 (push) Has been cancelled
				
			
		
			
				
	
				CE Compile Job / CE_UPLOAD (push) Has been cancelled
				
			
		
		
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	CE Compile Job / ce_job_pre_check (push) Has been cancelled
				
			CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
				
			CE Compile Job / FD-Clone-Linux (push) Has been cancelled
				
			CE Compile Job / Show Code Archive Output (push) Has been cancelled
				
			CE Compile Job / BUILD_SM8090 (push) Has been cancelled
				
			CE Compile Job / BUILD_SM8689 (push) Has been cancelled
				
			CE Compile Job / CE_UPLOAD (push) Has been cancelled
				
			This commit is contained in:
		| @@ -39,9 +39,6 @@ void GetOutputTopK(const paddle::Tensor& x, | |||||||
|                    int k, |                    int k, | ||||||
|                    int64_t rank_id, |                    int64_t rank_id, | ||||||
|                    bool wait_flag) { |                    bool wait_flag) { | ||||||
|     if (rank_id > 0) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     static struct msgdata msg_rcv; |     static struct msgdata msg_rcv; | ||||||
|     int msg_queue_id = 1; |     int msg_queue_id = 1; | ||||||
|   | |||||||
| @@ -400,8 +400,6 @@ class EngineArgs: | |||||||
|         if self.enable_logprob: |         if self.enable_logprob: | ||||||
|             if self.speculative_config is not None: |             if self.speculative_config is not None: | ||||||
|                 raise NotImplementedError("Logprob does not support speculation_config.") |                 raise NotImplementedError("Logprob does not support speculation_config.") | ||||||
|             if self.enable_expert_parallel: |  | ||||||
|                 raise NotImplementedError("Logprob does not support enable_expert_parallel.") |  | ||||||
|             if not current_platform.is_cuda(): |             if not current_platform.is_cuda(): | ||||||
|                 raise NotImplementedError("Only CUDA platform supports logprob.") |                 raise NotImplementedError("Only CUDA platform supports logprob.") | ||||||
|         if self.splitwise_role != "mixed": |         if self.splitwise_role != "mixed": | ||||||
|   | |||||||
| @@ -160,13 +160,6 @@ class TokenProcessor: | |||||||
|                     if self.output_tokens[0] == -2: |                     if self.output_tokens[0] == -2: | ||||||
|                         continue |                         continue | ||||||
|  |  | ||||||
|                 else: |  | ||||||
|                     if ( |  | ||||||
|                         self.cfg.parallel_config.enable_expert_parallel |  | ||||||
|                         and self.cfg.parallel_config.data_parallel_size > 1 |  | ||||||
|                     ): |  | ||||||
|                         get_output_ep(self.output_tokens, rank_id, is_blocking) |  | ||||||
|  |  | ||||||
|                 else: |                 else: | ||||||
|                     if self.use_logprobs: |                     if self.use_logprobs: | ||||||
|                         get_output_topk( |                         get_output_topk( | ||||||
| @@ -177,6 +170,12 @@ class TokenProcessor: | |||||||
|                             rank_id, |                             rank_id, | ||||||
|                             is_blocking, |                             is_blocking, | ||||||
|                         ) |                         ) | ||||||
|  |                     elif ( | ||||||
|  |                         self.cfg.parallel_config.enable_expert_parallel | ||||||
|  |                         and self.cfg.parallel_config.data_parallel_size > 1 | ||||||
|  |                     ): | ||||||
|  |                         get_output_ep(self.output_tokens, rank_id, is_blocking) | ||||||
|  |  | ||||||
|                     else: |                     else: | ||||||
|                         get_output(self.output_tokens, rank_id, is_blocking) |                         get_output(self.output_tokens, rank_id, is_blocking) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 chen
					chen