freeliuzc 
							
						 
					 
					
						
						
							
						
						71267840f7 
					 
					
						
						
							
							【Fix】fix mtp bug ( #3139 )  
						
						
						
						
					 
					
						2025-08-08 13:30:12 +08:00 
						 
				 
			
				
					
						
							
							
								bukejiyu 
							
						 
					 
					
						
						
							
						
						b76b17fc1b 
					 
					
						
						
							
							qwen3 0.3B fix ( #3255 )  
						
						
	
		
			
	 
	
	
		
	
	
		
			
				
	Deploy GitHub Pages / deploy (push) Has been cancelled 
				
			 
		
		
	 
 
	 
						
						
					 
					
						2025-08-08 11:35:40 +08:00 
						 
				 
			
				
					
						
							
							
								yzwu 
							
						 
					 
					
						
						
							
						
						fbdd6b0663 
					 
					
						
						
							
							[Iluvatar GPU] Optimze attention and moe performance ( #3234 )  
						
						
						
						
					 
					
						2025-08-08 10:51:24 +08:00 
						 
				 
			
				
					
						
							
							
								bukejiyu 
							
						 
					 
					
						
						
							
						
						37569cca86 
					 
					
						
						
							
							[feat]add fast_weights_iterator ( #3258 )  
						
						... 
						
						
	
		
			
	 
	
	
		
	
	
		
			
				
	Deploy GitHub Pages / deploy (push) Has been cancelled 
				
			 
		
		
	 
 
	 
						
						* add fast_weights_iterator
* update
* update 
						
						
					 
					
						2025-08-07 22:36:46 +08:00 
						 
				 
			
				
					
						
							
							
								bukejiyu 
							
						 
					 
					
						
						
							
						
						9408e667a5 
					 
					
						
						
							
							[bugfix]fix blockwisefp8 and all_reduce ( #3243 )  
						
						... 
						
						
						
						* fix
* update
* fix linear for prequant loader 
						
						
					 
					
						2025-08-06 23:54:33 +08:00 
						 
				 
			
				
					
						
							
							
								yangjianfengo1 
							
						 
					 
					
						
						
							
						
						3a15e0c53e 
					 
					
						
						
							
							【Fix Bug】 修复 fa3 支持集中式bug ( #3235 )  
						
						... 
						
						
	
		
			
	 
	
	
		
	
	
		
			
				
	Deploy GitHub Pages / deploy (push) Has been cancelled 
				
			 
		
		
	 
 
	 
						
						* fix fa3 集中式bug
* 增加qknorm参数 
						
						
					 
					
						2025-08-06 16:24:27 +08:00 
						 
				 
			
				
					
						
							
							
								lizexu123 
							
						 
					 
					
						
						
							
						
						afff4d37ea 
					 
					
						
						
							
							[Feature] support seed parameter ( #3161 )  
						
						... 
						
						
						
						* support seed
* fix
* add SamplingMetadata seed test
* The next_tokens values are inconsistent!
* add air and rejection seed test
* fix
* add SamplingParams seed test
* fix seed=0
* Default to defualt
* fix
* fix args_utils
* fix review
* fix review
* fix
* fix
* add xpu,gcu,iluvatar support seed
* fix 
						
						
					 
					
						2025-08-06 15:20:47 +08:00 
						 
				 
			
				
					
						
							
							
								bukejiyu 
							
						 
					 
					
						
						
							
						
						20839abccf 
					 
					
						
						
							
							qwen3_moe ( #3084 )  
						
						
						
						
					 
					
						2025-08-06 14:45:27 +08:00 
						 
				 
			
				
					
						
							
							
								Yuan Xiaolan 
							
						 
					 
					
						
						
							
						
						7ce00e597c 
					 
					
						
						
							
							support qk norm ( #3145 )  
						
						
						
						
					 
					
						2025-08-05 16:46:14 +08:00 
						 
				 
			
				
					
						
							
							
								RAM 
							
						 
					 
					
						
						
							
						
						4a10e29804 
					 
					
						
						
							
							fix mla attention backend ( #3176 )  
						
						
						
						
					 
					
						2025-08-05 16:43:15 +08:00 
						 
				 
			
				
					
						
							
							
								Yuan Xiaolan 
							
						 
					 
					
						
						
							
						
						af543b7f0f 
					 
					
						
						
							
							revise get_moe_scores ( #3164 )  
						
						
						
						
					 
					
						2025-08-05 16:43:07 +08:00 
						 
				 
			
				
					
						
							
							
								RichardWooSJTU 
							
						 
					 
					
						
						
							
						
						1e9a8e8cef 
					 
					
						
						
							
							fix lm head bias ( #3185 )  
						
						... 
						
						
						
						Co-authored-by: yuanxiaolan <yuanxiaolan01@baidu.com > 
						
						
					 
					
						2025-08-05 15:40:24 +08:00 
						 
				 
			
				
					
						
							
							
								RichardWooSJTU 
							
						 
					 
					
						
						
							
						
						f5c64a074c 
					 
					
						
						
							
							[EP] Refactor DeepEP Engine Organization for Mixed Mode & Buffer Management Optimization  ( #3182 )  
						
						... 
						
						
						
						* Add support for mixed-ep across multi nodes
* code refine
---------
Co-authored-by: yuanxiaolan <yuanxiaolan01@baidu.com > 
						
						
					 
					
						2025-08-05 15:40:11 +08:00 
						 
				 
			
				
					
						
							
							
								lizhenyun01 
							
						 
					 
					
						
						
							
						
						fe540f6caa 
					 
					
						
						
							
							[plugin] Custom model_runner/model support ( #3186 )  
						
						... 
						
						
						
						* support custom model&&model_runner
* fix merge
* add test && update doc
* fix codestyle
* fix unittest
* load model in rl 
						
						
					 
					
						2025-08-04 18:52:39 -07:00 
						 
				 
			
				
					
						
							
							
								Yuan Xiaolan 
							
						 
					 
					
						
						
							
						
						1f8289e106 
					 
					
						
						
							
							fix expertwise_scale ( #3181 )  
						
						
						
						
					 
					
						2025-08-04 20:06:15 +08:00 
						 
				 
			
				
					
						
							
							
								gaoziyuan 
							
						 
					 
					
						
						
							
						
						4021d66ea5 
					 
					
						
						
							
							【Feature】add fd plugins && rm model_classes ( #3123 )  
						
						... 
						
						
						
						* add fd plugins && rm model_classed
* fix reviews
* add docs
* fix
* fix unitest ci 
						
						
					 
					
						2025-08-03 19:53:20 -07:00 
						 
				 
			
				
					
						
							
							
								bukejiyu 
							
						 
					 
					
						
						
							
						
						1582814905 
					 
					
						
						
							
							fix load_pre_sharded_checkpoint ( #3152 )  
						
						... 
						
						
						
						Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com > 
						
						
					 
					
						2025-08-04 10:44:20 +08:00 
						 
				 
			
				
					
						
							
							
								ApplEOFDiscord 
							
						 
					 
					
						
						
							
						
						b71cbb466d 
					 
					
						
						
							
							[Feature] remove dependency on enable_mm and refine multimodal's code ( #3014 )  
						
						... 
						
						
						
						* remove dependency on enable_mm
* fix codestyle check error
* fix codestyle check error
* update docs
* resolve conflicts on model config
* fix unit test error
* fix code style check error
---------
Co-authored-by: shige <1021937542@qq.com >
Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com > 
						
						
					 
					
						2025-08-01 20:01:18 +08:00 
						 
				 
			
				
					
						
							
							
								yangjianfengo1 
							
						 
					 
					
						
						
							
						
						64d7a3194d 
					 
					
						
						
							
							集中式支持fa3 ( #3112 )  
						
						
						
						
					 
					
						2025-08-01 18:03:36 +08:00 
						 
				 
			
				
					
						
							
							
								Ryan 
							
						 
					 
					
						
						
							
						
						94264bbf60 
					 
					
						
						
							
							[Code Simplification] Refactor Post-processing in VL Model Forward Method ( #2937 )  
						
						... 
						
						
						
						* rm sth useless
* refactor model forward
* mv bool index to kernel 
						
						
					 
					
						2025-08-01 17:28:07 +08:00 
						 
				 
			
				
					
						
							
							
								chen 
							
						 
					 
					
						
						
							
						
						a2f5cc54f8 
					 
					
						
						
							
							moe preprocess op support 160 experts and fused_moe triton kernel name add K ( #3121 )  
						
						
						
						
					 
					
						2025-08-01 10:46:20 +08:00 
						 
				 
			
				
					
						
							
							
								Yuan Xiaolan 
							
						 
					 
					
						
						
							
						
						5f56d289a7 
					 
					
						
						
							
							fix is_permuted ( #3098 )  
						
						... 
						
						
						
						Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com > 
						
						
					 
					
						2025-07-31 19:58:05 +08:00 
						 
				 
			
				
					
						
							
							
								RAM 
							
						 
					 
					
						
						
							
						
						d850660872 
					 
					
						
						
							
							[Executor] Refactor GetBlockShapeAndSplitKVBlock Kernel ( #2989 )  
						
						... 
						
						
						
						* reset decoder_block_shape_q buffer
* refactor GetBlockShapeAndSplitKVBlock Kernel and cudagraph padding batch
* update decode_max_tile_size
* fix pre-commit
* update block_multihead_attn_backend
* update flas attn backend
* update MLA Attention
* update XPU Attention
* update gcu,iluvatar model runner
* Update MTP
* fix MTP bug 
						
						
					 
					
						2025-07-31 00:09:31 +08:00 
						 
				 
			
				
					
						
							
							
								bukejiyu 
							
						 
					 
					
						
						
							
						
						db698bda01 
					 
					
						
						
							
							qwen loader ( #3057 )  
						
						
						
						
					 
					
						2025-07-30 19:09:38 +08:00 
						 
				 
			
				
					
						
							
							
								zhink 
							
						 
					 
					
						
						
							
						
						d89b6dd43f 
					 
					
						
						
							
							adapter qwen3 moe attr for init ( #3066 )  
						
						... 
						
						
						
						adapter qwen3 moe attr for init 
						
						
					 
					
						2025-07-30 16:49:28 +08:00 
						 
				 
			
				
					
						
							
							
								bukejiyu 
							
						 
					 
					
						
						
							
						
						8e203666d9 
					 
					
						
						
							
							w4a8 offline ( #3074 )  
						
						... 
						
						
						
						* w4a8 offline
* update
* update
* update 
						
						
					 
					
						2025-07-30 16:33:30 +08:00 
						 
				 
			
				
					
						
							
							
								Yuan Xiaolan 
							
						 
					 
					
						
						
							
						
						35935da9e5 
					 
					
						
						
							
							support W4A8 EPLB ( #3075 )  
						
						
						
						
					 
					
						2025-07-30 14:34:12 +08:00 
						 
				 
			
				
					
						
							
							
								Zero Rains 
							
						 
					 
					
						
						
							
						
						b2f9a42d87 
					 
					
						
						
							
							[Feature] Support repetition early stop ( #3024 )  
						
						... 
						
						
						
						* support repetition early stop and support user to set the parameter
* remove log
* fix codestyle
* add the early_stop_config to rollout_config
* update config and EarlyStopper class
* fix the bug for triton
* modify the stop method
* update description
* modify the usage for stop_flags
---------
Co-authored-by: Yuanle Liu <yuanlehome@163.com > 
						
						
					 
					
						2025-07-29 22:42:54 +08:00 
						 
				 
			
				
					
						
							
							
								Yuan Xiaolan 
							
						 
					 
					
						
						
							
						
						3214fb5393 
					 
					
						
						
							
							support model loading for w4a8 offline quant ( #3064 )  
						
						... 
						
						
						
						支持W4A8 EP 对离线量化权重的load 
						
						
					 
					
						2025-07-29 21:54:37 +08:00 
						 
				 
			
				
					
						
							
							
								Longzhi Wang 
							
						 
					 
					
						
						
							
						
						be0a0f2bb2 
					 
					
						
						
							
							fix arguement error in ep when pd ( #3060 )  
						
						
						
						
					 
					
						2025-07-29 17:17:24 +08:00 
						 
				 
			
				
					
						
							
							
								YuanRisheng 
							
						 
					 
					
						
						
							
						
						502ee92a0a 
					 
					
						
						
							
							Unify server-side and model-side Config (Part3)  ( #3047 )  
						
						... 
						
						
						
						* merge model config
* fix arch
* fix rl 
						
						
					 
					
						2025-07-29 17:07:44 +08:00 
						 
				 
			
				
					
						
							
							
								Longzhi Wang 
							
						 
					 
					
						
						
							
						
						907d561523 
					 
					
						
						
							
							fix ep when paddle version mismatch ( #3056 )  
						
						
						
						
					 
					
						2025-07-29 15:06:49 +08:00 
						 
				 
			
				
					
						
							
							
								JYChen 
							
						 
					 
					
						
						
							
						
						dafe02a7b9 
					 
					
						
						
							
							[stop sequence] support stop sequence ( #3025 )  
						
						... 
						
						
						
						* stop seqs in multi-ends
* unittest for gpu stop op
* kernel tid==0 
						
						
					 
					
						2025-07-29 14:17:37 +08:00 
						 
				 
			
				
					
						
							
							
								Yuan Xiaolan 
							
						 
					 
					
						
						
							
						
						b1d787a272 
					 
					
						
						
							
							[fix] w4a8 model loading and hadamard config ( #3013 )  
						
						
						
						
					 
					
						2025-07-28 18:17:59 +08:00 
						 
				 
			
				
					
						
							
							
								K11OntheBoat 
							
						 
					 
					
						
						
							
						
						83048bbe55 
					 
					
						
						
							
							[Feature] Deepseekv3 supports cudagraph ( #3041 )  
						
						... 
						
						
						
						Co-authored-by: K11OntheBoat <“ruianmaidanglao@163.com ”> 
						
						
					 
					
						2025-07-28 17:12:54 +08:00 
						 
				 
			
				
					
						
							
							
								AIbin 
							
						 
					 
					
						
						
							
						
						ec52d39e68 
					 
					
						
						
							
							【Inference Optimize】Update wint2 weight n-dim reorder ( #3042 )  
						
						
						
						
					 
					
						2025-07-28 16:31:56 +08:00 
						 
				 
			
				
					
						
							
							
								YuanRisheng 
							
						 
					 
					
						
						
							
						
						bddf403576 
					 
					
						
						
							
							Unify server-side and model-side Config (Part2) ( #3035 )  
						
						... 
						
						
						
						* merge speculative and graph opt conifg
* add attr 
						
						
					 
					
						2025-07-28 15:31:48 +08:00 
						 
				 
			
				
					
						
							
							
								Longzhi Wang 
							
						 
					 
					
						
						
							
						
						247010d298 
					 
					
						
						
							
							fix arguement error ( #3030 )  
						
						
						
						
					 
					
						2025-07-28 11:03:29 +08:00 
						 
				 
			
				
					
						
							
							
								YuanRisheng 
							
						 
					 
					
						
						
							
						
						6ccc10ad47 
					 
					
						
						
							
							Unify server-side and model-side Config (Part1) ( #3018 )  
						
						... 
						
						
						
						* move cache config
* fix mtp 
						
						
					 
					
						2025-07-28 10:51:52 +08:00 
						 
				 
			
				
					
						
							
							
								Longzhi Wang 
							
						 
					 
					
						
						
							
						
						0700c90caa 
					 
					
						
						
							
							[Feat] support mixed ep ( #2969 )  
						
						... 
						
						
	
		
			
	 
	
	
		
	
	
		
			
				
	Deploy GitHub Pages / deploy (push) Has been cancelled 
				
			 
		
		
	 
 
	 
						
						* Support mixed ep
* fix comment
* fix comment
* update mixep
* fix conflict
* fix typo
* update
* fix typo
* fix code style
* fix conflict 
						
						
					 
					
						2025-07-25 15:29:30 +08:00 
						 
				 
			
				
					
						
							
							
								chen 
							
						 
					 
					
						
						
							
						
						332154f504 
					 
					
						
						
							
							[feature] Support FA2 ( #3009 )  
						
						
						
						
					 
					
						2025-07-25 14:09:00 +08:00 
						 
				 
			
				
					
						
							
							
								xiaoxiaohehe001 
							
						 
					 
					
						
						
							
						
						2970b00dfa 
					 
					
						
						
							
							[Feature] Support_eplb ( #2997 )  
						
						... 
						
						
	
		
			
	 
	
	
		
	
	
		
			
				
	Deploy GitHub Pages / deploy (push) Has been cancelled 
				
			 
		
		
	 
 
	 
						
						* [Feature] support_eplb
* [Feature] support_eplb
* [Fix] fix mm ep 
						
						
					 
					
						2025-07-24 20:22:45 +08:00 
						 
				 
			
				
					
						
							
							
								littledgg 
							
						 
					 
					
						
						
							
						
						f37d00e856 
					 
					
						
						
							
							[Model] Provide clearer error for missing KV cache quantization scales ( #3007 )  
						
						
						
						
					 
					
						2025-07-24 20:15:00 +08:00 
						 
				 
			
				
					
						
							
							
								EnflameGCU 
							
						 
					 
					
						
						
							
						
						c40df1802e 
					 
					
						
						
							
							[GCU] Update to develop ( #2988 )  
						
						
						
						
					 
					
						2025-07-24 19:30:52 +08:00 
						 
				 
			
				
					
						
							
							
								Zero Rains 
							
						 
					 
					
						
						
							
						
						0fb37ab7e4 
					 
					
						
						
							
							update flake8 version to support pre-commit in python3.12 ( #3000 )  
						
						... 
						
						
						
						* update flake8 version to support pre-commit in python3.12
* polish code 
						
						
					 
					
						2025-07-24 01:43:31 -07:00 
						 
				 
			
				
					
						
							
							
								xiaoxiaohehe001 
							
						 
					 
					
						
						
							
						
						2c0ff068e2 
					 
					
						
						
							
							[Fix] fix mm ep empty run ( #2999 )  
						
						
						
						
					 
					
						2025-07-24 14:15:55 +08:00 
						 
				 
			
				
					
						
							
							
								lizhenyun01 
							
						 
					 
					
						
						
							
						
						29c3292f02 
					 
					
						
						
							
							support c4 attn && fix cache  
						
						
						
						
					 
					
						2025-07-24 12:00:52 +08:00 
						 
				 
			
				
					
						
							
							
								lizexu123 
							
						 
					 
					
						
						
							
						
						832d25334a 
					 
					
						
						
							
							[Code Simplification] fix init_distributed_environment() ( #2982 )  
						
						
						
						
					 
					
						2025-07-24 11:43:28 +08:00 
						 
				 
			
				
					
						
							
							
								bukejiyu 
							
						 
					 
					
						
						
							
						
						bfeb664ab8 
					 
					
						
						
							
							update ( #2978 )  
						
						
	
		
			
	 
	
	
		
	
	
		
			
				
	Deploy GitHub Pages / deploy (push) Has been cancelled 
				
			 
		
		
	 
 
	 
						
						
					 
					
						2025-07-24 00:16:42 +08:00 
						 
				 
			
				
					
						
							
							
								chenjian 
							
						 
					 
					
						
						
							
						
						85a78d695d 
					 
					
						
						
							
							[Feature] Support block scheduler v1 for FD ( #2928 )  
						
						... 
						
						
						
						* Support FD block scheduler v1
* Support FD block scheduler v1
* Support FD block scheduler v1
* Fix according to copilot review
* Fix according to review
* Remove is_dummy
* Fix bug when real_bsz=1
* Fix infer first token cost time
---------
Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com > 
						
						
					 
					
						2025-07-23 20:31:31 +08:00