mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[Other] faster_tokenizer->fast_tokenizer (#636)
* faster_tokenizer->fast_tokenizer * ErnieFasterTokenizer->ErnieFastTokenizer * update the fastdeploy_init Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
@@ -415,14 +415,14 @@ endif()
|
|||||||
if(ANDROID OR IOS)
|
if(ANDROID OR IOS)
|
||||||
if(ENABLE_TEXT)
|
if(ENABLE_TEXT)
|
||||||
set(ENABLE_TEXT OFF CACHE BOOL "Force ENABLE_TEXT OFF" FORCE)
|
set(ENABLE_TEXT OFF CACHE BOOL "Force ENABLE_TEXT OFF" FORCE)
|
||||||
message(STATUS "Found Android or IOS, force ENABLE_TEXT OFF. We do not support faster_tokenizer with Android/IOS now.")
|
message(STATUS "Found Android or IOS, force ENABLE_TEXT OFF. We do not support fast_tokenizer with Android/IOS now.")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ENABLE_TEXT)
|
if(ENABLE_TEXT)
|
||||||
add_definitions(-DENABLE_TEXT)
|
add_definitions(-DENABLE_TEXT)
|
||||||
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TEXT_SRCS})
|
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TEXT_SRCS})
|
||||||
include(${PROJECT_SOURCE_DIR}/cmake/faster_tokenizer.cmake)
|
include(${PROJECT_SOURCE_DIR}/cmake/fast_tokenizer.cmake)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ENABLE_PADDLE_FRONTEND)
|
if(ENABLE_PADDLE_FRONTEND)
|
||||||
|
@@ -213,10 +213,10 @@ if (ENABLE_TEXT)
|
|||||||
message(FATAL_ERROR "Not support fastdeploy text APIs with Android now!")
|
message(FATAL_ERROR "Not support fastdeploy text APIs with Android now!")
|
||||||
endif()
|
endif()
|
||||||
# Add dependency libs later
|
# Add dependency libs later
|
||||||
find_library(FASTER_TOKENIZER_LIB core_tokenizers ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/lib NO_DEFAULT_PATH)
|
find_library(FAST_TOKENIZER_LIB core_tokenizers ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/lib NO_DEFAULT_PATH)
|
||||||
list(APPEND FASTDEPLOY_LIBS ${FASTER_TOKENIZER_LIB})
|
list(APPEND FASTDEPLOY_LIBS ${FAST_TOKENIZER_LIB})
|
||||||
list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/include)
|
list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/include)
|
||||||
list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/third_party/include)
|
list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/third_party/include)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ENABLE_PADDLE_FRONTEND)
|
if(ENABLE_PADDLE_FRONTEND)
|
||||||
|
108
cmake/fast_tokenizer.cmake
Normal file
108
cmake/fast_tokenizer.cmake
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
|
||||||
|
|
||||||
|
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
include(ExternalProject)
|
||||||
|
|
||||||
|
set(FASTTOKENIZER_PROJECT "extern_fast_tokenizer")
|
||||||
|
set(FASTTOKENIZER_PREFIX_DIR ${THIRD_PARTY_PATH}/fast_tokenizer)
|
||||||
|
set(FASTTOKENIZER_SOURCE_DIR
|
||||||
|
${THIRD_PARTY_PATH}/fast_tokenizer/src/${FASTTOKENIZER_PROJECT})
|
||||||
|
set(FASTTOKENIZER_INSTALL_DIR ${THIRD_PARTY_PATH}/install/fast_tokenizer)
|
||||||
|
set(FASTTOKENIZER_INC_DIR
|
||||||
|
"${FASTTOKENIZER_INSTALL_DIR}/include"
|
||||||
|
"${FASTTOKENIZER_INSTALL_DIR}/third_party/include"
|
||||||
|
CACHE PATH "fast_tokenizer include directory." FORCE)
|
||||||
|
set(FASTTOKENIZER_LIB_DIR
|
||||||
|
"${FASTTOKENIZER_INSTALL_DIR}/lib/"
|
||||||
|
CACHE PATH "fast_tokenizer lib directory." FORCE)
|
||||||
|
set(FASTTOKENIZER_THIRD_LIB_DIR
|
||||||
|
"${FASTTOKENIZER_INSTALL_DIR}/third_party/lib/"
|
||||||
|
CACHE PATH "fast_tokenizer lib directory." FORCE)
|
||||||
|
set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
|
||||||
|
"${FASTTOKENIZER_LIB_DIR}")
|
||||||
|
|
||||||
|
include_directories(${FASTTOKENIZER_INC_DIR})
|
||||||
|
|
||||||
|
# Set lib path
|
||||||
|
if(WIN32)
|
||||||
|
set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/core_tokenizers.lib"
|
||||||
|
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
|
||||||
|
message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}")
|
||||||
|
set(ICUDT_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icudt.lib")
|
||||||
|
set(ICUUC_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icuuc.lib")
|
||||||
|
|
||||||
|
elseif(APPLE)
|
||||||
|
set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.dylib"
|
||||||
|
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
|
||||||
|
else()
|
||||||
|
|
||||||
|
set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.so"
|
||||||
|
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
|
||||||
|
message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}")
|
||||||
|
endif(WIN32)
|
||||||
|
|
||||||
|
set(FASTTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/fast_tokenizer/")
|
||||||
|
set(FASTTOKENIZER_VERSION "1.0.0")
|
||||||
|
|
||||||
|
# Set download url
|
||||||
|
if(WIN32)
|
||||||
|
set(FASTTOKENIZER_FILE "fast_tokenizer-win-x64-${FASTTOKENIZER_VERSION}.zip")
|
||||||
|
if(NOT CMAKE_CL_64)
|
||||||
|
set(FASTTOKENIZER_FILE "fast_tokenizer-win-x86-${FASTTOKENIZER_VERSION}.zip")
|
||||||
|
endif()
|
||||||
|
elseif(APPLE)
|
||||||
|
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||||
|
set(FASTTOKENIZER_FILE "fast_tokenizer-osx-arm64-${FASTTOKENIZER_VERSION}.tgz")
|
||||||
|
else()
|
||||||
|
set(FASTTOKENIZER_FILE "fast_tokenizer-osx-x86_64-${FASTTOKENIZER_VERSION}.tgz")
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
|
||||||
|
set(FASTTOKENIZER_FILE "fast_tokenizer-linux-aarch64-${FASTTOKENIZER_VERSION}.tgz")
|
||||||
|
else()
|
||||||
|
set(FASTTOKENIZER_FILE "fast_tokenizer-linux-x64-${FASTTOKENIZER_VERSION}.tgz")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
set(FASTTOKENIZER_URL "${FASTTOKENIZER_URL_BASE}${FASTTOKENIZER_FILE}")
|
||||||
|
|
||||||
|
ExternalProject_Add(
|
||||||
|
${FASTTOKENIZER_PROJECT}
|
||||||
|
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||||
|
URL ${FASTTOKENIZER_URL}
|
||||||
|
PREFIX ${FASTTOKENIZER_PREFIX_DIR}
|
||||||
|
DOWNLOAD_NO_PROGRESS 1
|
||||||
|
CONFIGURE_COMMAND ""
|
||||||
|
BUILD_COMMAND ""
|
||||||
|
UPDATE_COMMAND ""
|
||||||
|
INSTALL_COMMAND
|
||||||
|
${CMAKE_COMMAND} -E copy_directory ${FASTTOKENIZER_SOURCE_DIR} ${FASTTOKENIZER_INSTALL_DIR}
|
||||||
|
BUILD_BYPRODUCTS ${FASTTOKENIZER_COMPILE_LIB})
|
||||||
|
|
||||||
|
add_library(fast_tokenizer STATIC IMPORTED GLOBAL)
|
||||||
|
set_property(TARGET fast_tokenizer PROPERTY IMPORTED_LOCATION ${FASTTOKENIZER_COMPILE_LIB})
|
||||||
|
add_dependencies(fast_tokenizer ${FASTTOKENIZER_PROJECT})
|
||||||
|
list(APPEND DEPEND_LIBS fast_tokenizer)
|
||||||
|
|
||||||
|
if (WIN32)
|
||||||
|
add_library(icudt STATIC IMPORTED GLOBAL)
|
||||||
|
set_property(TARGET icudt PROPERTY IMPORTED_LOCATION ${ICUDT_LIB})
|
||||||
|
add_dependencies(icudt ${FASTTOKENIZER_PROJECT})
|
||||||
|
list(APPEND DEPEND_LIBS icudt)
|
||||||
|
|
||||||
|
add_library(icuuc STATIC IMPORTED GLOBAL)
|
||||||
|
set_property(TARGET icuuc PROPERTY IMPORTED_LOCATION ${ICUUC_LIB})
|
||||||
|
add_dependencies(icuuc ${FASTTOKENIZER_PROJECT})
|
||||||
|
list(APPEND DEPEND_LIBS icuuc)
|
||||||
|
endif()
|
@@ -1,108 +0,0 @@
|
|||||||
|
|
||||||
|
|
||||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
include(ExternalProject)
|
|
||||||
|
|
||||||
set(FASTERTOKENIZER_PROJECT "extern_faster_tokenizer")
|
|
||||||
set(FASTERTOKENIZER_PREFIX_DIR ${THIRD_PARTY_PATH}/faster_tokenizer)
|
|
||||||
set(FASTERTOKENIZER_SOURCE_DIR
|
|
||||||
${THIRD_PARTY_PATH}/faster_tokenizer/src/${FASTERTOKENIZER_PROJECT})
|
|
||||||
set(FASTERTOKENIZER_INSTALL_DIR ${THIRD_PARTY_PATH}/install/faster_tokenizer)
|
|
||||||
set(FASTERTOKENIZER_INC_DIR
|
|
||||||
"${FASTERTOKENIZER_INSTALL_DIR}/include"
|
|
||||||
"${FASTERTOKENIZER_INSTALL_DIR}/third_party/include"
|
|
||||||
CACHE PATH "faster_tokenizer include directory." FORCE)
|
|
||||||
set(FASTERTOKENIZER_LIB_DIR
|
|
||||||
"${FASTERTOKENIZER_INSTALL_DIR}/lib/"
|
|
||||||
CACHE PATH "faster_tokenizer lib directory." FORCE)
|
|
||||||
set(FASTERTOKENIZER_THIRD_LIB_DIR
|
|
||||||
"${FASTERTOKENIZER_INSTALL_DIR}/third_party/lib/"
|
|
||||||
CACHE PATH "faster_tokenizer lib directory." FORCE)
|
|
||||||
set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
|
|
||||||
"${FASTERTOKENIZER_LIB_DIR}")
|
|
||||||
|
|
||||||
include_directories(${FASTERTOKENIZER_INC_DIR})
|
|
||||||
|
|
||||||
# Set lib path
|
|
||||||
if(WIN32)
|
|
||||||
set(FASTERTOKENIZER_COMPILE_LIB "${FASTERTOKENIZER_LIB_DIR}/core_tokenizers.lib"
|
|
||||||
CACHE FILEPATH "faster_tokenizer compile library." FORCE)
|
|
||||||
message("FASTERTOKENIZER_COMPILE_LIB = ${FASTERTOKENIZER_COMPILE_LIB}")
|
|
||||||
set(ICUDT_LIB "${FASTERTOKENIZER_THIRD_LIB_DIR}/icudt.lib")
|
|
||||||
set(ICUUC_LIB "${FASTERTOKENIZER_THIRD_LIB_DIR}/icuuc.lib")
|
|
||||||
|
|
||||||
elseif(APPLE)
|
|
||||||
set(FASTERTOKENIZER_COMPILE_LIB "${FASTERTOKENIZER_LIB_DIR}/libcore_tokenizers.dylib"
|
|
||||||
CACHE FILEPATH "faster_tokenizer compile library." FORCE)
|
|
||||||
else()
|
|
||||||
|
|
||||||
set(FASTERTOKENIZER_COMPILE_LIB "${FASTERTOKENIZER_LIB_DIR}/libcore_tokenizers.so"
|
|
||||||
CACHE FILEPATH "faster_tokenizer compile library." FORCE)
|
|
||||||
message("FASTERTOKENIZER_COMPILE_LIB = ${FASTERTOKENIZER_COMPILE_LIB}")
|
|
||||||
endif(WIN32)
|
|
||||||
|
|
||||||
set(FASTERTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/faster_tokenizer/")
|
|
||||||
set(FASTERTOKENIZER_VERSION "dev")
|
|
||||||
|
|
||||||
# Set download url
|
|
||||||
if(WIN32)
|
|
||||||
set(FASTERTOKENIZER_FILE "faster_tokenizer-win-x64-${FASTERTOKENIZER_VERSION}.zip")
|
|
||||||
if(NOT CMAKE_CL_64)
|
|
||||||
set(FASTERTOKENIZER_FILE "faster_tokenizer-win-x86-${FASTERTOKENIZER_VERSION}.zip")
|
|
||||||
endif()
|
|
||||||
elseif(APPLE)
|
|
||||||
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
|
|
||||||
set(FASTERTOKENIZER_FILE "faster_tokenizer-osx-arm64-${FASTERTOKENIZER_VERSION}.tgz")
|
|
||||||
else()
|
|
||||||
set(FASTERTOKENIZER_FILE "faster_tokenizer-osx-x86_64-${FASTERTOKENIZER_VERSION}.tgz")
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
|
|
||||||
set(FASTERTOKENIZER_FILE "faster_tokenizer-linux-aarch64-${FASTERTOKENIZER_VERSION}.tgz")
|
|
||||||
else()
|
|
||||||
set(FASTERTOKENIZER_FILE "faster_tokenizer-linux-x64-${FASTERTOKENIZER_VERSION}.tgz")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
set(FASTERTOKENIZER_URL "${FASTERTOKENIZER_URL_BASE}${FASTERTOKENIZER_FILE}")
|
|
||||||
|
|
||||||
ExternalProject_Add(
|
|
||||||
${FASTERTOKENIZER_PROJECT}
|
|
||||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
|
||||||
URL ${FASTERTOKENIZER_URL}
|
|
||||||
PREFIX ${FASTERTOKENIZER_PREFIX_DIR}
|
|
||||||
DOWNLOAD_NO_PROGRESS 1
|
|
||||||
CONFIGURE_COMMAND ""
|
|
||||||
BUILD_COMMAND ""
|
|
||||||
UPDATE_COMMAND ""
|
|
||||||
INSTALL_COMMAND
|
|
||||||
${CMAKE_COMMAND} -E copy_directory ${FASTERTOKENIZER_SOURCE_DIR} ${FASTERTOKENIZER_INSTALL_DIR}
|
|
||||||
BUILD_BYPRODUCTS ${FASTERTOKENIZER_COMPILE_LIB})
|
|
||||||
|
|
||||||
add_library(faster_tokenizer STATIC IMPORTED GLOBAL)
|
|
||||||
set_property(TARGET faster_tokenizer PROPERTY IMPORTED_LOCATION ${FASTERTOKENIZER_COMPILE_LIB})
|
|
||||||
add_dependencies(faster_tokenizer ${FASTERTOKENIZER_PROJECT})
|
|
||||||
list(APPEND DEPEND_LIBS faster_tokenizer)
|
|
||||||
|
|
||||||
if (WIN32)
|
|
||||||
add_library(icudt STATIC IMPORTED GLOBAL)
|
|
||||||
set_property(TARGET icudt PROPERTY IMPORTED_LOCATION ${ICUDT_LIB})
|
|
||||||
add_dependencies(icudt ${FASTERTOKENIZER_PROJECT})
|
|
||||||
list(APPEND DEPEND_LIBS icudt)
|
|
||||||
|
|
||||||
add_library(icuuc STATIC IMPORTED GLOBAL)
|
|
||||||
set_property(TARGET icuuc PROPERTY IMPORTED_LOCATION ${ICUUC_LIB})
|
|
||||||
add_dependencies(icuuc ${FASTERTOKENIZER_PROJECT})
|
|
||||||
list(APPEND DEPEND_LIBS icuuc)
|
|
||||||
endif()
|
|
@@ -18,11 +18,11 @@
|
|||||||
#include "fastdeploy/function/softmax.h"
|
#include "fastdeploy/function/softmax.h"
|
||||||
#include "fastdeploy/runtime.h"
|
#include "fastdeploy/runtime.h"
|
||||||
#include "fastdeploy/utils/path.h"
|
#include "fastdeploy/utils/path.h"
|
||||||
#include "faster_tokenizer/tokenizers/ernie_faster_tokenizer.h"
|
#include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h"
|
||||||
#include "gflags/gflags.h"
|
#include "gflags/gflags.h"
|
||||||
|
|
||||||
using namespace paddlenlp;
|
using namespace paddlenlp;
|
||||||
using namespace faster_tokenizer::tokenizers_impl;
|
using namespace fast_tokenizer::tokenizers_impl;
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
const char sep = '\\';
|
const char sep = '\\';
|
||||||
#else
|
#else
|
||||||
@@ -124,10 +124,10 @@ struct SeqClsResult {
|
|||||||
|
|
||||||
struct ErnieForSequenceClassificationPredictor {
|
struct ErnieForSequenceClassificationPredictor {
|
||||||
fastdeploy::Runtime runtime_;
|
fastdeploy::Runtime runtime_;
|
||||||
ErnieFasterTokenizer tokenizer_;
|
ErnieFastTokenizer tokenizer_;
|
||||||
ErnieForSequenceClassificationPredictor(
|
ErnieForSequenceClassificationPredictor(
|
||||||
const fastdeploy::RuntimeOption& option,
|
const fastdeploy::RuntimeOption& option,
|
||||||
const ErnieFasterTokenizer& tokenizer)
|
const ErnieFastTokenizer& tokenizer)
|
||||||
: tokenizer_(tokenizer) {
|
: tokenizer_(tokenizer) {
|
||||||
runtime_.Init(option);
|
runtime_.Init(option);
|
||||||
}
|
}
|
||||||
@@ -135,8 +135,8 @@ struct ErnieForSequenceClassificationPredictor {
|
|||||||
bool Preprocess(const std::vector<std::string>& texts,
|
bool Preprocess(const std::vector<std::string>& texts,
|
||||||
const std::vector<std::string>& texts_pair,
|
const std::vector<std::string>& texts_pair,
|
||||||
std::vector<fastdeploy::FDTensor>* inputs) {
|
std::vector<fastdeploy::FDTensor>* inputs) {
|
||||||
std::vector<faster_tokenizer::core::Encoding> encodings;
|
std::vector<fast_tokenizer::core::Encoding> encodings;
|
||||||
std::vector<faster_tokenizer::core::EncodeInput> text_pair_input;
|
std::vector<fast_tokenizer::core::EncodeInput> text_pair_input;
|
||||||
// 1. Tokenize the text or (text, text_pair)
|
// 1. Tokenize the text or (text, text_pair)
|
||||||
if (texts_pair.empty()) {
|
if (texts_pair.empty()) {
|
||||||
for (int i = 0; i < texts.size(); ++i) {
|
for (int i = 0; i < texts.size(); ++i) {
|
||||||
@@ -242,7 +242,7 @@ int main(int argc, char* argv[]) {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ErnieFasterTokenizer tokenizer(vocab_path);
|
ErnieFastTokenizer tokenizer(vocab_path);
|
||||||
|
|
||||||
ErnieForSequenceClassificationPredictor predictor(option, tokenizer);
|
ErnieForSequenceClassificationPredictor predictor(option, tokenizer);
|
||||||
|
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
faster_tokenizer
|
fast-tokenizer-python
|
||||||
paddlenlp
|
paddlenlp
|
||||||
|
@@ -15,7 +15,7 @@ import os
|
|||||||
import distutils.util
|
import distutils.util
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import faster_tokenizer
|
import fast_tokenizer
|
||||||
from paddlenlp.transformers import AutoTokenizer
|
from paddlenlp.transformers import AutoTokenizer
|
||||||
import fastdeploy as fd
|
import fastdeploy as fd
|
||||||
|
|
||||||
|
@@ -19,8 +19,8 @@
|
|||||||
#include <queue>
|
#include <queue>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
#include "faster_tokenizer/pretokenizers/pretokenizer.h"
|
#include "fast_tokenizer/pretokenizers/pretokenizer.h"
|
||||||
#include "faster_tokenizer/utils/utf8.h"
|
#include "fast_tokenizer/utils/utf8.h"
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
namespace text {
|
namespace text {
|
||||||
@@ -30,9 +30,9 @@ static std::string DBC2SBC(const std::string& content) {
|
|||||||
size_t content_utf8_len = 0;
|
size_t content_utf8_len = 0;
|
||||||
while (content_utf8_len < content.length()) {
|
while (content_utf8_len < content.length()) {
|
||||||
uint32_t content_char;
|
uint32_t content_char;
|
||||||
auto content_char_width = faster_tokenizer::utils::UTF8ToUInt32(
|
auto content_char_width = fast_tokenizer::utils::UTF8ToUInt32(
|
||||||
content.data() + content_utf8_len, &content_char);
|
content.data() + content_utf8_len, &content_char);
|
||||||
content_char = faster_tokenizer::utils::UTF8ToUnicode(content_char);
|
content_char = fast_tokenizer::utils::UTF8ToUnicode(content_char);
|
||||||
if (content_char == 0x3000) {
|
if (content_char == 0x3000) {
|
||||||
content_char = 0x0020;
|
content_char = 0x0020;
|
||||||
} else {
|
} else {
|
||||||
@@ -43,9 +43,9 @@ static std::string DBC2SBC(const std::string& content) {
|
|||||||
} else {
|
} else {
|
||||||
char dst_char[5] = {0};
|
char dst_char[5] = {0};
|
||||||
uint32_t utf8_uint32 =
|
uint32_t utf8_uint32 =
|
||||||
faster_tokenizer::utils::UnicodeToUTF8(content_char);
|
fast_tokenizer::utils::UnicodeToUTF8(content_char);
|
||||||
uint32_t utf8_char_count =
|
uint32_t utf8_char_count =
|
||||||
faster_tokenizer::utils::UnicodeToUTF8Char(utf8_uint32, dst_char);
|
fast_tokenizer::utils::UnicodeToUTF8Char(utf8_uint32, dst_char);
|
||||||
result.append(dst_char, utf8_char_count);
|
result.append(dst_char, utf8_char_count);
|
||||||
}
|
}
|
||||||
content_utf8_len += content_char_width;
|
content_utf8_len += content_char_width;
|
||||||
@@ -177,8 +177,8 @@ UIEModel::UIEModel(const std::string& model_file,
|
|||||||
initialized = Initialize();
|
initialized = Initialize();
|
||||||
SetSchema(schema);
|
SetSchema(schema);
|
||||||
tokenizer_.EnableTruncMethod(
|
tokenizer_.EnableTruncMethod(
|
||||||
max_length, 0, faster_tokenizer::core::Direction::RIGHT,
|
max_length, 0, fast_tokenizer::core::Direction::RIGHT,
|
||||||
faster_tokenizer::core::TruncStrategy::LONGEST_FIRST);
|
fast_tokenizer::core::TruncStrategy::LONGEST_FIRST);
|
||||||
}
|
}
|
||||||
|
|
||||||
UIEModel::UIEModel(const std::string& model_file,
|
UIEModel::UIEModel(const std::string& model_file,
|
||||||
@@ -198,8 +198,8 @@ UIEModel::UIEModel(const std::string& model_file,
|
|||||||
initialized = Initialize();
|
initialized = Initialize();
|
||||||
SetSchema(schema);
|
SetSchema(schema);
|
||||||
tokenizer_.EnableTruncMethod(
|
tokenizer_.EnableTruncMethod(
|
||||||
max_length, 0, faster_tokenizer::core::Direction::RIGHT,
|
max_length, 0, fast_tokenizer::core::Direction::RIGHT,
|
||||||
faster_tokenizer::core::TruncStrategy::LONGEST_FIRST);
|
fast_tokenizer::core::TruncStrategy::LONGEST_FIRST);
|
||||||
}
|
}
|
||||||
|
|
||||||
UIEModel::UIEModel(const std::string& model_file,
|
UIEModel::UIEModel(const std::string& model_file,
|
||||||
@@ -219,8 +219,8 @@ UIEModel::UIEModel(const std::string& model_file,
|
|||||||
initialized = Initialize();
|
initialized = Initialize();
|
||||||
SetSchema(schema);
|
SetSchema(schema);
|
||||||
tokenizer_.EnableTruncMethod(
|
tokenizer_.EnableTruncMethod(
|
||||||
max_length, 0, faster_tokenizer::core::Direction::RIGHT,
|
max_length, 0, fast_tokenizer::core::Direction::RIGHT,
|
||||||
faster_tokenizer::core::TruncStrategy::LONGEST_FIRST);
|
fast_tokenizer::core::TruncStrategy::LONGEST_FIRST);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UIEModel::Initialize() {
|
bool UIEModel::Initialize() {
|
||||||
@@ -253,7 +253,7 @@ void UIEModel::AutoSplitter(const std::vector<std::string>& texts,
|
|||||||
size_t cnt_org = 0;
|
size_t cnt_org = 0;
|
||||||
size_t cnt_short = 0;
|
size_t cnt_short = 0;
|
||||||
for (auto& text : texts) {
|
for (auto& text : texts) {
|
||||||
auto text_len = faster_tokenizer::utils::GetUnicodeLenFromUTF8(
|
auto text_len = fast_tokenizer::utils::GetUnicodeLenFromUTF8(
|
||||||
text.c_str(), text.length());
|
text.c_str(), text.length());
|
||||||
if (text_len <= max_length) {
|
if (text_len <= max_length) {
|
||||||
short_texts->push_back(text);
|
short_texts->push_back(text);
|
||||||
@@ -264,14 +264,14 @@ void UIEModel::AutoSplitter(const std::vector<std::string>& texts,
|
|||||||
}
|
}
|
||||||
cnt_short += 1;
|
cnt_short += 1;
|
||||||
} else {
|
} else {
|
||||||
faster_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
|
fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
|
||||||
text);
|
text);
|
||||||
for (size_t start = 0; start < text_len; start += max_length) {
|
for (size_t start = 0; start < text_len; start += max_length) {
|
||||||
size_t end = start + max_length;
|
size_t end = start + max_length;
|
||||||
if (end > text_len) {
|
if (end > text_len) {
|
||||||
end = text_len;
|
end = text_len;
|
||||||
}
|
}
|
||||||
faster_tokenizer::core::Offset byte_offset;
|
fast_tokenizer::core::Offset byte_offset;
|
||||||
converter.convert({start, end}, &byte_offset);
|
converter.convert({start, end}, &byte_offset);
|
||||||
short_texts->emplace_back(text.data() + byte_offset.first,
|
short_texts->emplace_back(text.data() + byte_offset.first,
|
||||||
byte_offset.second - byte_offset.first);
|
byte_offset.second - byte_offset.first);
|
||||||
@@ -344,12 +344,12 @@ void UIEModel::GetSpan(const std::vector<IDX_PROB>& start_idx_prob,
|
|||||||
}
|
}
|
||||||
void UIEModel::GetSpanIdxAndProbs(
|
void UIEModel::GetSpanIdxAndProbs(
|
||||||
const SPAN_SET& span_set,
|
const SPAN_SET& span_set,
|
||||||
const std::vector<faster_tokenizer::core::Offset>& offset_mapping,
|
const std::vector<fast_tokenizer::core::Offset>& offset_mapping,
|
||||||
std::vector<SpanIdx>* span_idxs, std::vector<float>* probs) const {
|
std::vector<SpanIdx>* span_idxs, std::vector<float>* probs) const {
|
||||||
auto first_sep_idx =
|
auto first_sep_idx =
|
||||||
std::find_if(offset_mapping.begin() + 1, offset_mapping.end(),
|
std::find_if(offset_mapping.begin() + 1, offset_mapping.end(),
|
||||||
[](const faster_tokenizer::core::Offset& offset) {
|
[](const fast_tokenizer::core::Offset& offset) {
|
||||||
return offset == faster_tokenizer::core::Offset(0, 0);
|
return offset == fast_tokenizer::core::Offset(0, 0);
|
||||||
});
|
});
|
||||||
auto prompt_end_token_id =
|
auto prompt_end_token_id =
|
||||||
std::distance(offset_mapping.begin(), first_sep_idx) - 1;
|
std::distance(offset_mapping.begin(), first_sep_idx) - 1;
|
||||||
@@ -384,9 +384,9 @@ void UIEModel::ConvertSpanToUIEResult(
|
|||||||
std::string span_text;
|
std::string span_text;
|
||||||
std::vector<uint32_t> offset_mapping;
|
std::vector<uint32_t> offset_mapping;
|
||||||
if (span_idxs[i][j].is_prompt_) {
|
if (span_idxs[i][j].is_prompt_) {
|
||||||
faster_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
|
fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
|
||||||
prompt);
|
prompt);
|
||||||
faster_tokenizer::core::Offset byte_offset;
|
fast_tokenizer::core::Offset byte_offset;
|
||||||
converter.convert({start, end}, &byte_offset);
|
converter.convert({start, end}, &byte_offset);
|
||||||
span_text = prompt.substr(byte_offset.first,
|
span_text = prompt.substr(byte_offset.first,
|
||||||
byte_offset.second - byte_offset.first);
|
byte_offset.second - byte_offset.first);
|
||||||
@@ -394,9 +394,9 @@ void UIEModel::ConvertSpanToUIEResult(
|
|||||||
start = 0;
|
start = 0;
|
||||||
end = 0;
|
end = 0;
|
||||||
} else {
|
} else {
|
||||||
faster_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
|
fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
|
||||||
text);
|
text);
|
||||||
faster_tokenizer::core::Offset byte_offset;
|
fast_tokenizer::core::Offset byte_offset;
|
||||||
converter.convert({start, end}, &byte_offset);
|
converter.convert({start, end}, &byte_offset);
|
||||||
span_text = text.substr(byte_offset.first,
|
span_text = text.substr(byte_offset.first,
|
||||||
byte_offset.second - byte_offset.first);
|
byte_offset.second - byte_offset.first);
|
||||||
@@ -461,14 +461,14 @@ void UIEModel::AutoJoiner(const std::vector<std::string>& short_texts,
|
|||||||
for (auto&& result_idx : input_mapping_item) {
|
for (auto&& result_idx : input_mapping_item) {
|
||||||
if (result_idx == 0) {
|
if (result_idx == 0) {
|
||||||
result_list = std::move((*results)[result_idx]);
|
result_list = std::move((*results)[result_idx]);
|
||||||
offset += faster_tokenizer::utils::GetUnicodeLenFromUTF8(
|
offset += fast_tokenizer::utils::GetUnicodeLenFromUTF8(
|
||||||
short_texts[result_idx].c_str(), short_texts[result_idx].size());
|
short_texts[result_idx].c_str(), short_texts[result_idx].size());
|
||||||
} else {
|
} else {
|
||||||
for (auto&& curr_result : (*results)[result_idx]) {
|
for (auto&& curr_result : (*results)[result_idx]) {
|
||||||
curr_result.start_ += offset;
|
curr_result.start_ += offset;
|
||||||
curr_result.end_ += offset;
|
curr_result.end_ += offset;
|
||||||
}
|
}
|
||||||
offset += faster_tokenizer::utils::GetUnicodeLenFromUTF8(
|
offset += fast_tokenizer::utils::GetUnicodeLenFromUTF8(
|
||||||
short_texts[result_idx].c_str(), short_texts[result_idx].size());
|
short_texts[result_idx].c_str(), short_texts[result_idx].size());
|
||||||
result_list.insert(result_list.end(), (*results)[result_idx].begin(),
|
result_list.insert(result_list.end(), (*results)[result_idx].begin(),
|
||||||
(*results)[result_idx].end());
|
(*results)[result_idx].end());
|
||||||
@@ -521,13 +521,13 @@ bool UIEModel::ConstructTextsAndPrompts(
|
|||||||
auto max_prompt_iter = std::max_element(
|
auto max_prompt_iter = std::max_element(
|
||||||
prompts->begin(), prompts->end(),
|
prompts->begin(), prompts->end(),
|
||||||
[](const std::string& lhs, const std::string& rhs) {
|
[](const std::string& lhs, const std::string& rhs) {
|
||||||
auto lhs_ulen = faster_tokenizer::utils::GetUnicodeLenFromUTF8(
|
auto lhs_ulen = fast_tokenizer::utils::GetUnicodeLenFromUTF8(
|
||||||
lhs.c_str(), lhs.length());
|
lhs.c_str(), lhs.length());
|
||||||
auto rhs_ulen = faster_tokenizer::utils::GetUnicodeLenFromUTF8(
|
auto rhs_ulen = fast_tokenizer::utils::GetUnicodeLenFromUTF8(
|
||||||
rhs.c_str(), rhs.length());
|
rhs.c_str(), rhs.length());
|
||||||
return lhs_ulen < rhs_ulen;
|
return lhs_ulen < rhs_ulen;
|
||||||
});
|
});
|
||||||
auto max_prompt_len = faster_tokenizer::utils::GetUnicodeLenFromUTF8(
|
auto max_prompt_len = fast_tokenizer::utils::GetUnicodeLenFromUTF8(
|
||||||
max_prompt_iter->c_str(), max_prompt_iter->length());
|
max_prompt_iter->c_str(), max_prompt_iter->length());
|
||||||
auto max_predict_len = max_length_ - 3 - max_prompt_len;
|
auto max_predict_len = max_length_ - 3 - max_prompt_len;
|
||||||
|
|
||||||
@@ -547,10 +547,10 @@ bool UIEModel::ConstructTextsAndPrompts(
|
|||||||
void UIEModel::Preprocess(
|
void UIEModel::Preprocess(
|
||||||
const std::vector<std::string>& input_texts,
|
const std::vector<std::string>& input_texts,
|
||||||
const std::vector<std::string>& prompts,
|
const std::vector<std::string>& prompts,
|
||||||
std::vector<faster_tokenizer::core::Encoding>* encodings,
|
std::vector<fast_tokenizer::core::Encoding>* encodings,
|
||||||
std::vector<fastdeploy::FDTensor>* inputs) {
|
std::vector<fastdeploy::FDTensor>* inputs) {
|
||||||
// 1. Tokenize the short texts and short prompts
|
// 1. Tokenize the short texts and short prompts
|
||||||
std::vector<faster_tokenizer::core::EncodeInput> text_pair_input;
|
std::vector<fast_tokenizer::core::EncodeInput> text_pair_input;
|
||||||
for (int i = 0; i < input_texts.size(); ++i) {
|
for (int i = 0; i < input_texts.size(); ++i) {
|
||||||
text_pair_input.emplace_back(
|
text_pair_input.emplace_back(
|
||||||
std::pair<std::string, std::string>(prompts[i], input_texts[i]));
|
std::pair<std::string, std::string>(prompts[i], input_texts[i]));
|
||||||
@@ -596,7 +596,7 @@ void UIEModel::Preprocess(
|
|||||||
|
|
||||||
void UIEModel::Postprocess(
|
void UIEModel::Postprocess(
|
||||||
const std::vector<fastdeploy::FDTensor>& outputs,
|
const std::vector<fastdeploy::FDTensor>& outputs,
|
||||||
const std::vector<faster_tokenizer::core::Encoding>& encodings,
|
const std::vector<fast_tokenizer::core::Encoding>& encodings,
|
||||||
const std::vector<std::string>& short_input_texts,
|
const std::vector<std::string>& short_input_texts,
|
||||||
const std::vector<std::string>& short_prompts,
|
const std::vector<std::string>& short_prompts,
|
||||||
const std::vector<std::vector<size_t>>& input_mapping_with_short_text,
|
const std::vector<std::vector<size_t>>& input_mapping_with_short_text,
|
||||||
@@ -611,7 +611,7 @@ void UIEModel::Postprocess(
|
|||||||
GetCandidateIdx(end_prob, outputs[1].shape[0], outputs[1].shape[1],
|
GetCandidateIdx(end_prob, outputs[1].shape[0], outputs[1].shape[1],
|
||||||
&end_candidate_idx_prob, position_prob_);
|
&end_candidate_idx_prob, position_prob_);
|
||||||
|
|
||||||
std::vector<std::vector<faster_tokenizer::core::Offset>> offset_mapping;
|
std::vector<std::vector<fast_tokenizer::core::Offset>> offset_mapping;
|
||||||
for (int i = 0; i < encodings.size(); ++i) {
|
for (int i = 0; i < encodings.size(); ++i) {
|
||||||
auto&& curr_offsets = encodings[i].GetOffsets();
|
auto&& curr_offsets = encodings[i].GetOffsets();
|
||||||
offset_mapping.push_back(curr_offsets);
|
offset_mapping.push_back(curr_offsets);
|
||||||
@@ -739,7 +739,7 @@ void UIEModel::Predict(
|
|||||||
if (has_prompt) {
|
if (has_prompt) {
|
||||||
// 2. Convert texts and prompts to FDTensor
|
// 2. Convert texts and prompts to FDTensor
|
||||||
std::vector<FDTensor> inputs;
|
std::vector<FDTensor> inputs;
|
||||||
std::vector<faster_tokenizer::core::Encoding> encodings;
|
std::vector<fast_tokenizer::core::Encoding> encodings;
|
||||||
Preprocess(short_input_texts, short_prompts, &encodings, &inputs);
|
Preprocess(short_input_texts, short_prompts, &encodings, &inputs);
|
||||||
|
|
||||||
// 3. Infer
|
// 3. Infer
|
||||||
|
@@ -21,7 +21,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include "fastdeploy/fastdeploy_model.h"
|
#include "fastdeploy/fastdeploy_model.h"
|
||||||
#include "fastdeploy/utils/unique_ptr.h"
|
#include "fastdeploy/utils/unique_ptr.h"
|
||||||
#include "faster_tokenizer/tokenizers/ernie_faster_tokenizer.h"
|
#include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h"
|
||||||
|
|
||||||
using namespace paddlenlp;
|
using namespace paddlenlp;
|
||||||
|
|
||||||
@@ -133,11 +133,11 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
|
|||||||
std::vector<std::vector<size_t>>* input_mapping_with_short_text);
|
std::vector<std::vector<size_t>>* input_mapping_with_short_text);
|
||||||
void Preprocess(const std::vector<std::string>& input_texts,
|
void Preprocess(const std::vector<std::string>& input_texts,
|
||||||
const std::vector<std::string>& prompts,
|
const std::vector<std::string>& prompts,
|
||||||
std::vector<faster_tokenizer::core::Encoding>* encodings,
|
std::vector<fast_tokenizer::core::Encoding>* encodings,
|
||||||
std::vector<fastdeploy::FDTensor>* inputs);
|
std::vector<fastdeploy::FDTensor>* inputs);
|
||||||
void Postprocess(
|
void Postprocess(
|
||||||
const std::vector<fastdeploy::FDTensor>& outputs,
|
const std::vector<fastdeploy::FDTensor>& outputs,
|
||||||
const std::vector<faster_tokenizer::core::Encoding>& encodings,
|
const std::vector<fast_tokenizer::core::Encoding>& encodings,
|
||||||
const std::vector<std::string>& short_input_texts,
|
const std::vector<std::string>& short_input_texts,
|
||||||
const std::vector<std::string>& short_prompts,
|
const std::vector<std::string>& short_prompts,
|
||||||
const std::vector<std::vector<size_t>>& input_mapping_with_short_text,
|
const std::vector<std::vector<size_t>>& input_mapping_with_short_text,
|
||||||
@@ -167,7 +167,7 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
|
|||||||
};
|
};
|
||||||
using SPAN_SET = std::set<std::pair<IDX_PROB, IDX_PROB>, IdxProbCmp>;
|
using SPAN_SET = std::set<std::pair<IDX_PROB, IDX_PROB>, IdxProbCmp>;
|
||||||
struct SpanIdx {
|
struct SpanIdx {
|
||||||
faster_tokenizer::core::Offset offset_;
|
fast_tokenizer::core::Offset offset_;
|
||||||
bool is_prompt_;
|
bool is_prompt_;
|
||||||
};
|
};
|
||||||
void SetValidBackend();
|
void SetValidBackend();
|
||||||
@@ -188,7 +188,7 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
|
|||||||
SPAN_SET* span_set) const;
|
SPAN_SET* span_set) const;
|
||||||
void GetSpanIdxAndProbs(
|
void GetSpanIdxAndProbs(
|
||||||
const SPAN_SET& span_set,
|
const SPAN_SET& span_set,
|
||||||
const std::vector<faster_tokenizer::core::Offset>& offset_mapping,
|
const std::vector<fast_tokenizer::core::Offset>& offset_mapping,
|
||||||
std::vector<SpanIdx>* span_idxs, std::vector<float>* probs) const;
|
std::vector<SpanIdx>* span_idxs, std::vector<float>* probs) const;
|
||||||
void ConvertSpanToUIEResult(
|
void ConvertSpanToUIEResult(
|
||||||
const std::vector<std::string>& texts,
|
const std::vector<std::string>& texts,
|
||||||
@@ -200,7 +200,7 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
|
|||||||
size_t max_length_;
|
size_t max_length_;
|
||||||
float position_prob_;
|
float position_prob_;
|
||||||
SchemaLanguage schema_language_;
|
SchemaLanguage schema_language_;
|
||||||
faster_tokenizer::tokenizers_impl::ErnieFasterTokenizer tokenizer_;
|
fast_tokenizer::tokenizers_impl::ErnieFastTokenizer tokenizer_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace text
|
} // namespace text
|
||||||
|
@@ -46,8 +46,8 @@ if "%__script_action_type%" == "show" (
|
|||||||
echo !__3rd_lib_file! | findstr "opencv">nul && set __3rd_needed_flag=true
|
echo !__3rd_lib_file! | findstr "opencv">nul && set __3rd_needed_flag=true
|
||||||
echo !__3rd_lib_file! | findstr "opencv">nul && set __api_tag=!__api_tag!::vision
|
echo !__3rd_lib_file! | findstr "opencv">nul && set __api_tag=!__api_tag!::vision
|
||||||
if "!__3rd_needed_flag!"=="true" (echo !__3rd_lib_file! | findstr d\.lib>nul && set __3rd_needed_flag=false)
|
if "!__3rd_needed_flag!"=="true" (echo !__3rd_lib_file! | findstr d\.lib>nul && set __3rd_needed_flag=false)
|
||||||
echo !__3rd_lib_file! | findstr "faster_tokenizer">nul && set __3rd_needed_flag=true
|
echo !__3rd_lib_file! | findstr "fast_tokenizer">nul && set __3rd_needed_flag=true
|
||||||
echo !__3rd_lib_file! | findstr "faster_tokenizer">nul && set __api_tag=!__api_tag!::text
|
echo !__3rd_lib_file! | findstr "fast_tokenizer">nul && set __api_tag=!__api_tag!::text
|
||||||
if "!__3rd_needed_flag!"=="true" (echo [Lib] !__3rd_lib_file! **[NEEDED][!__api_tag!]**) else (echo [Lib] !__3rd_lib_file!)
|
if "!__3rd_needed_flag!"=="true" (echo [Lib] !__3rd_lib_file! **[NEEDED][!__api_tag!]**) else (echo [Lib] !__3rd_lib_file!)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -58,8 +58,8 @@ if "%__script_action_type%" == "show" (
|
|||||||
set __3rd_include_dir=%%a && set __3rd_needed_flag=false && set __api_tag=fastdeploy
|
set __3rd_include_dir=%%a && set __3rd_needed_flag=false && set __api_tag=fastdeploy
|
||||||
echo !__3rd_include_dir! | findstr "opencv">nul && set __3rd_needed_flag=true
|
echo !__3rd_include_dir! | findstr "opencv">nul && set __3rd_needed_flag=true
|
||||||
echo !__3rd_include_dir! | findstr "opencv">nul && set __api_tag=!__api_tag!::vision
|
echo !__3rd_include_dir! | findstr "opencv">nul && set __api_tag=!__api_tag!::vision
|
||||||
echo !__3rd_include_dir! | findstr "faster_tokenizer">nul && set __3rd_needed_flag=true
|
echo !__3rd_include_dir! | findstr "fast_tokenizer">nul && set __3rd_needed_flag=true
|
||||||
echo !__3rd_include_dir! | findstr "faster_tokenizer">nul && set __api_tag=!__api_tag!::text
|
echo !__3rd_include_dir! | findstr "fast_tokenizer">nul && set __api_tag=!__api_tag!::text
|
||||||
if "!__3rd_needed_flag!"=="true" (echo [Include] !__3rd_include_dir! **[NEEDED][!__api_tag!]**) else (echo [Include] !__3rd_include_dir!)
|
if "!__3rd_needed_flag!"=="true" (echo [Include] !__3rd_include_dir! **[NEEDED][!__api_tag!]**) else (echo [Include] !__3rd_include_dir!)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -164,4 +164,4 @@ echo ---------------------------------------------------------------------------
|
|||||||
goto:eof
|
goto:eof
|
||||||
@rem end
|
@rem end
|
||||||
|
|
||||||
@echo on
|
@echo on
|
||||||
|
@@ -60,9 +60,4 @@ if [ -d ${INSTALLED_PREBUILT_FASTDEPLOY_DIR}/third_libs/install/paddlelite ]; th
|
|||||||
echo "Paddle Lite Lib: ${INSTALLED_PREBUILT_FASTDEPLOY_DIR}/third_libs/install/paddlelite/lib"
|
echo "Paddle Lite Lib: ${INSTALLED_PREBUILT_FASTDEPLOY_DIR}/third_libs/install/paddlelite/lib"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -d ${INSTALLED_PREBUILT_FASTDEPLOY_DIR}/third_libs/install/faster_tokenizer ]; then
|
|
||||||
export LD_LIBRARY_PATH=${INSTALLED_PREBUILT_FASTDEPLOY_DIR}/third_libs/install/faster_tokenizer/lib:${LD_LIBRARY_PATH}
|
|
||||||
echo "Faster Tokenizer Lib: ${INSTALLED_PREBUILT_FASTDEPLOY_DIR}/third_libs/install/faster_tokenizer/lib"
|
|
||||||
fi
|
|
||||||
|
|
||||||
cd ${CURRENT_EXE_DIR}
|
cd ${CURRENT_EXE_DIR}
|
||||||
|
@@ -35,7 +35,7 @@ RUN apt-get update \
|
|||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& apt-get install -y --no-install-recommends libre2-5 libb64-0d python3 python3-pip libarchive-dev ffmpeg libsm6 libxext6 \
|
&& apt-get install -y --no-install-recommends libre2-5 libb64-0d python3 python3-pip libarchive-dev ffmpeg libsm6 libxext6 \
|
||||||
&& python3 -m pip install -U pip \
|
&& python3 -m pip install -U pip \
|
||||||
&& python3 -m pip install paddlepaddle-gpu paddlenlp faster_tokenizer
|
&& python3 -m pip install paddlepaddle-gpu paddlenlp fast-tokenizer-python
|
||||||
|
|
||||||
COPY python/dist/*.whl /opt/fastdeploy/
|
COPY python/dist/*.whl /opt/fastdeploy/
|
||||||
RUN python3 -m pip install /opt/fastdeploy/*.whl \
|
RUN python3 -m pip install /opt/fastdeploy/*.whl \
|
||||||
|
@@ -19,7 +19,7 @@ ENV TZ=Asia/Shanghai \
|
|||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 ffmpeg libsm6 libxext6 \
|
RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 ffmpeg libsm6 libxext6 \
|
||||||
&& python3 -m pip install -U pip \
|
&& python3 -m pip install -U pip \
|
||||||
&& python3 -m pip install paddlepaddle paddlenlp faster_tokenizer
|
&& python3 -m pip install paddlepaddle paddlenlp fast-tokenizer-python
|
||||||
|
|
||||||
COPY python/dist/*.whl *.whl /opt/fastdeploy/
|
COPY python/dist/*.whl *.whl /opt/fastdeploy/
|
||||||
RUN python3 -m pip install /opt/fastdeploy/*.whl \
|
RUN python3 -m pip install /opt/fastdeploy/*.whl \
|
||||||
|
@@ -58,8 +58,8 @@ set PATH=%FASTDEPLOY_HOME%\third_libs\install\paddle_inference\third_party\insta
|
|||||||
set PATH=%FASTDEPLOY_HOME%\third_libs\install\paddle_inference\third_party\install\mklml\lib;%PATH%
|
set PATH=%FASTDEPLOY_HOME%\third_libs\install\paddle_inference\third_party\install\mklml\lib;%PATH%
|
||||||
set PATH=%FASTDEPLOY_HOME%\third_libs\install\paddle2onnx\lib;%PATH%
|
set PATH=%FASTDEPLOY_HOME%\third_libs\install\paddle2onnx\lib;%PATH%
|
||||||
set PATH=%FASTDEPLOY_HOME%\third_libs\install\tensorrt\lib;%PATH%
|
set PATH=%FASTDEPLOY_HOME%\third_libs\install\tensorrt\lib;%PATH%
|
||||||
set PATH=%FASTDEPLOY_HOME%\third_libs\install\faster_tokenizer\lib;%PATH%
|
set PATH=%FASTDEPLOY_HOME%\third_libs\install\fast_tokenizer\lib;%PATH%
|
||||||
set PATH=%FASTDEPLOY_HOME%\third_libs\install\faster_tokenizer\third_party\lib;%PATH%
|
set PATH=%FASTDEPLOY_HOME%\third_libs\install\fast_tokenizer\third_party\lib;%PATH%
|
||||||
set PATH=%FASTDEPLOY_HOME%\third_libs\install\yaml-cpp\lib;%PATH%
|
set PATH=%FASTDEPLOY_HOME%\third_libs\install\yaml-cpp\lib;%PATH%
|
||||||
set PATH=%FASTDEPLOY_HOME%\third_libs\install\opencv\build\x64\vc15\bin;%PATH%
|
set PATH=%FASTDEPLOY_HOME%\third_libs\install\opencv\build\x64\vc15\bin;%PATH%
|
||||||
set PATH=%FASTDEPLOY_HOME%\third_libs\install\openvino\runtime\bin;%PATH%
|
set PATH=%FASTDEPLOY_HOME%\third_libs\install\openvino\runtime\bin;%PATH%
|
||||||
|
Reference in New Issue
Block a user