mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 08:16:42 +08:00
[FIX]fix rejection sampling when topp=0 using _SAMPLING_EPS (#2967)
* fix rejection sampling when topp=0 * fix
This commit is contained in:
@@ -292,7 +292,7 @@ __global__ void TopKTopPSamplingFromProbKernel(DType* probs, IdType* output,
|
|||||||
curand_init(philox_seed, bx, philox_offset, &state);
|
curand_init(philox_seed, bx, philox_offset, &state);
|
||||||
const uint32_t row_idx = bx;
|
const uint32_t row_idx = bx;
|
||||||
const uint32_t k = top_k_arr[row_idx] == 0 ? d : top_k_arr[row_idx];
|
const uint32_t k = top_k_arr[row_idx] == 0 ? d : top_k_arr[row_idx];
|
||||||
const float p = top_p_arr[row_idx] == 0 ? 1e-6 : top_p_arr[row_idx];
|
const float p = top_p_arr[row_idx];
|
||||||
|
|
||||||
extern __shared__ __align__(
|
extern __shared__ __align__(
|
||||||
alignof(SamplingTempStorage<BLOCK_THREADS, SCAN_ALGORITHM, REDUCE_ALGORITHM>))
|
alignof(SamplingTempStorage<BLOCK_THREADS, SCAN_ALGORITHM, REDUCE_ALGORITHM>))
|
||||||
|
@@ -123,6 +123,8 @@ class ErnieProcessor(BaseDataProcessor):
|
|||||||
if request.get("temperature") < _SAMPLING_EPS:
|
if request.get("temperature") < _SAMPLING_EPS:
|
||||||
# zero temperature is equivalent to greedy sampling
|
# zero temperature is equivalent to greedy sampling
|
||||||
request.set("temperature", 1)
|
request.set("temperature", 1)
|
||||||
|
if request.get("top_p") < _SAMPLING_EPS:
|
||||||
|
request.set("top_p", _SAMPLING_EPS)
|
||||||
data_processor_logger.info(f"Processed request {request}")
|
data_processor_logger.info(f"Processed request {request}")
|
||||||
return request
|
return request
|
||||||
|
|
||||||
@@ -174,6 +176,8 @@ class ErnieProcessor(BaseDataProcessor):
|
|||||||
if request.get("temperature") < _SAMPLING_EPS:
|
if request.get("temperature") < _SAMPLING_EPS:
|
||||||
# zero temperature is equivalent to greedy sampling
|
# zero temperature is equivalent to greedy sampling
|
||||||
request["temperature"] = 1
|
request["temperature"] = 1
|
||||||
|
if request.get("top_p") < _SAMPLING_EPS:
|
||||||
|
request["top_p"] = _SAMPLING_EPS
|
||||||
data_processor_logger.info(f"Processed request {request}")
|
data_processor_logger.info(f"Processed request {request}")
|
||||||
|
|
||||||
return request
|
return request
|
||||||
|
@@ -252,6 +252,8 @@ class DataProcessor(BaseDataProcessor):
|
|||||||
if request.get("temperature") < _SAMPLING_EPS:
|
if request.get("temperature") < _SAMPLING_EPS:
|
||||||
# zero temperature is equivalent to greedy sampling
|
# zero temperature is equivalent to greedy sampling
|
||||||
request.set("temperature", 1)
|
request.set("temperature", 1)
|
||||||
|
if request.get("top_p") < _SAMPLING_EPS:
|
||||||
|
request.set("top_p", _SAMPLING_EPS)
|
||||||
data_processor_logger.info(f"Processed request {request}")
|
data_processor_logger.info(f"Processed request {request}")
|
||||||
return request
|
return request
|
||||||
|
|
||||||
@@ -297,6 +299,8 @@ class DataProcessor(BaseDataProcessor):
|
|||||||
if request.get("temperature") < _SAMPLING_EPS:
|
if request.get("temperature") < _SAMPLING_EPS:
|
||||||
# zero temperature is equivalent to greedy sampling
|
# zero temperature is equivalent to greedy sampling
|
||||||
request["temperature"] = 1
|
request["temperature"] = 1
|
||||||
|
if request.get("top_p") < _SAMPLING_EPS:
|
||||||
|
request["top_p"] = _SAMPLING_EPS
|
||||||
data_processor_logger.info(f"Processed request {request}")
|
data_processor_logger.info(f"Processed request {request}")
|
||||||
return request
|
return request
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user