[FIX 2.0.3]fix rejection sampling when topp=0 using _SAMPLING_EPS (#2966)

* fix rejection sampling when topp=0

* fix

* fix
This commit is contained in:
Sunny-bot1
2025-07-22 20:53:04 +08:00
committed by GitHub
parent b0f1e0eef4
commit 8c43bc8176
3 changed files with 9 additions and 1 deletions

View File

@@ -289,7 +289,7 @@ __global__ void TopKTopPSamplingFromProbKernel(DType* probs, IdType* output,
curand_init(philox_seed, bx, philox_offset, &state);
const uint32_t row_idx = bx;
const uint32_t k = top_k_arr[row_idx] == 0 ? d : top_k_arr[row_idx];
const float p = top_p_arr[row_idx] == 0 ? 1e-6 : top_p_arr[row_idx];
const float p = top_p_arr[row_idx];
extern __shared__ __align__(
alignof(SamplingTempStorage<BLOCK_THREADS, SCAN_ALGORITHM, REDUCE_ALGORITHM>))

View File

@@ -125,6 +125,8 @@ class ErnieProcessor(BaseDataProcessor):
if request.get("temperature") < _SAMPLING_EPS:
# zero temperature is equivalent to greedy sampling
request.set("temperature", 1)
if request.get("top_p") < _SAMPLING_EPS:
request.set("top_p", _SAMPLING_EPS)
data_processor_logger.info(f"Processed request {request}")
return request
@@ -182,6 +184,8 @@ class ErnieProcessor(BaseDataProcessor):
if request.get("temperature") < _SAMPLING_EPS:
# zero temperature is equivalent to greedy sampling
request["temperature"] = 1
if request.get("top_p") < _SAMPLING_EPS:
request["top_p"] = _SAMPLING_EPS
data_processor_logger.info(f"Processed request {request}")
return request

View File

@@ -258,6 +258,8 @@ class DataProcessor(BaseDataProcessor):
if request.get("temperature") < _SAMPLING_EPS:
# zero temperature is equivalent to greedy sampling
request.set("temperature", 1)
if request.get("top_p") < _SAMPLING_EPS:
request.set("top_p", _SAMPLING_EPS)
data_processor_logger.info(f"Processed request {request}")
return request
@@ -306,6 +308,8 @@ class DataProcessor(BaseDataProcessor):
if request.get("temperature") < _SAMPLING_EPS:
# zero temperature is equivalent to greedy sampling
request["temperature"] = 1
if request.get("top_p") < _SAMPLING_EPS:
request["top_p"] = _SAMPLING_EPS
data_processor_logger.info(f"Processed request {request}")
return request