diff --git a/.github/workflows/ci_metax.yml b/.github/workflows/ci_metax.yml new file mode 100644 index 000000000..0c5c56158 --- /dev/null +++ b/.github/workflows/ci_metax.yml @@ -0,0 +1,162 @@ +name: CI_METAX + +on: + workflow_dispatch: + pull_request: + types: [opened, synchronize] + branches: [develop, release/**] + +concurrency: + group: ${{ github.event.pull_request.number }}-metax-ci + cancel-in-progress: true + +permissions: read-all + +defaults: + run: + shell: bash + +jobs: + CI_METAX: + runs-on: pde-ai2-squad3-fastdeploy-runner-set + env: + PR_ID: ${{ github.event.pull_request.number }} + COMMIT_ID: ${{ github.event.pull_request.head.sha }} + BRANCH: develop + steps: + - name: Checkout repository + id: run-metax + if: steps.check-bypass.outputs.can-skip != 'true' + run: | + export DATE_NUMBER=$(date +%Y%m%d) + export PREV_DATE_NUMBER=$(date -d "yesterday" +%Y%m%d) + + git config --global user.name "GitHub Actions" + git config --global user.email "actions@github.com" + + git clone https://${{ github.actor }}:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git . + # git reset --hard 21f138f68be16c815f60496c1bc5ea69b511f8cc + # git revert --no-edit 2e1680838f5e99e4ea5c5bc4251365d9add0f62f + + MODIFIED_FILES="" + if [ "${{ github.event_name }}" == "pull_request" ]; then + git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/head + # git rebase pull/${{ github.event.pull_request.number }}/head + git cherry-pick FETCH_HEAD + + echo -e "\n=========== Git log info ===========" + git --no-pager log --pretty=oneline -5 + MODIFIED_FILES=$(git --no-pager diff --name-only HEAD^ HEAD | grep -v '^$' || true) + fi + + if [ -z "$MODIFIED_FILES" ]; then + echo "No file change, skip metax ci." + exit 0 + fi + + echo -e "\n=========== PR change file list ===========" + echo "$MODIFIED_FILES" + + echo -e "\n=========== Start Metax CI Trigger Check ===========" + echo -e "\nTarget comparison branch: remotes/origin/${BRANCH}" + FOLLOW_PATH=( + "custom_ops/cpu_ops/" + "custom_ops/gpu_ops/" + "custom_ops/metax_ops/" + "custom_ops/*.py" + "fastdeploy/" + ".github/" + "setup.py" + "build.sh" + "tests/ci_use/Metax_UT/" + ) + echo -e "\nFollow path needs to be checked: ${FOLLOW_PATH[*]}" + + trigger_ci=false + while IFS= read -r file; do + for target_path in "${FOLLOW_PATH[@]}"; do + if [[ $file == $target_path || $file == $target_path* ]]; then + trigger_ci=true + break + fi + done + if [ "$trigger_ci" = true ]; then + break + fi + done <<< "$MODIFIED_FILES" + + if [ "$trigger_ci" = true ]; then + echo -e "\nExist file change found in follow path, continue metax ci." + else + echo -e "\nNo file change found in follow path, skip metax ci." + exit 0 + fi + + METAX_PADDLE_CUSTOM_DEVICE_WHL_SOURCE=oss://opensource-ci/paddle + METAX_PADDLE_CUSTOM_DEVICE_WHL_NAME=paddle_metax_gpu-3.3.0.dev${DATE_NUMBER}+maca0.0.0-cp310-cp310-linux_x86_64.whl + # METAX_PADDLE_CUSTOM_DEVICE_WHL_NAME=paddle_metax_gpu-3.3.0.dev20251210+maca0.0.0-cp310-cp310-linux_x86_64.whl + PADDLE_PADDLE_WHL_SOURCE=https://paddle-whl.bj.bcebos.com/nightly/cpu/paddlepaddle + PADDLE_PADDLE_WHL_NAME=paddlepaddle-3.3.0.dev${PREV_DATE_NUMBER}-cp310-cp310-linux_x86_64.whl + # PADDLE_PADDLE_WHL_NAME=paddlepaddle-3.3.0.dev20251209-cp310-cp310-linux_x86_64.whl + + echo -e "\n=========== Pull [ ${METAX_PADDLE_CUSTOM_DEVICE_WHL_NAME} ] from [ ${METAX_PADDLE_CUSTOM_DEVICE_WHL_SOURCE} ] ===========" + ossutil cp ${METAX_PADDLE_CUSTOM_DEVICE_WHL_SOURCE}/${METAX_PADDLE_CUSTOM_DEVICE_WHL_NAME} . + + echo -e "\n=========== Pip install [ ${PADDLE_PADDLE_WHL_NAME} ] from [ ${PADDLE_PADDLE_WHL_SOURCE} ] ===========" + python -m pip install ${PADDLE_PADDLE_WHL_SOURCE}/${PADDLE_PADDLE_WHL_NAME} + + echo -e "\n=========== Pip install [ ${METAX_PADDLE_CUSTOM_DEVICE_WHL_NAME} ] ===========" + python -m pip install ${METAX_PADDLE_CUSTOM_DEVICE_WHL_NAME} + + echo -e "\n=========== Pip install [ use_triton_in_paddle ] ===========" + python -m pip install /data/maca3.0-release-2.3/use_triton_in_paddle-0.0.0-py3-none-any.whl + + - name: Compile + run: | + export MACA_PATH=/opt/maca + + if [ ! -d ${HOME}/cu-bridge ]; then + `${MACA_PATH}/tools/cu-bridge/tools/pre_make` + fi + + export CUDA_PATH=${HOME}/cu-bridge/CUDA_DIR + export LD_LIBRARY_PATH=${CUDA_PATH}/lib64:${MACA_PATH}/lib:${MACA_PATH}/mxgpu_llvm/lib:$LD_LIBRARY_PATH + + PACKAGES_LINK=~/.local/lib/python3.10/site-packages + REPLACE_FILES_PATH=/data/maca3.0-release-2.3 + cp ${REPLACE_FILES_PATH}/all_reduce.py ${PACKAGES_LINK}/paddle/distributed/communication/ + cp ${REPLACE_FILES_PATH}/all_gather.py ${PACKAGES_LINK}/paddle/distributed/communication/ + cp ${REPLACE_FILES_PATH}/broadcast.py ${PACKAGES_LINK}/paddle/distributed/communication/ + + cp ${REPLACE_FILES_PATH}/cublasLt.h ${PACKAGES_LINK}/paddle/include/paddle/phi/backends/dynload/ + + cp ${REPLACE_FILES_PATH}/all_things.py ${PACKAGES_LINK}/use_triton_in_paddle/cuda/ + + sudo chmod 777 -R ${REPLACE_FILES_PATH}/mctlass + sudo cp ${REPLACE_FILES_PATH}/mctlass/mctlassEx.h /opt/maca/include/mctlassEx/ + sudo cp ${REPLACE_FILES_PATH}/mctlass/libmctlassEx.so /opt/maca/lib/ + sudo cp ${REPLACE_FILES_PATH}/mctlass/mctlassEx_xcore1000.mcfb /opt/maca/lib/ + + bash build.sh + + echo -e "\n=========== Pip install [ triton-3.0.0+metax3.0.0.3 ] ===========" + python -m pip install /data/maca3.0-release-2.3/triton-3.0.0+metax3.0.0.3-cp310-cp310-linux_x86_64.whl + + - name: Run test + run: | + exit_code=0 + ignore_error() { + local cmd="$*" + echo "Execute command - [ $cmd ]" + eval "$cmd" || { + exit_code=$? + echo -e "\n=========== ⚠️ Instruction execution failed (exit code $exit_code), ignore and continue. ===========" + } + } + + ignore_error "timeout -s 9 600s python tests/ci_use/Metax_UT/run_ernie_vl_28B.py" + + echo -e "\n=========== Fastdeploy workerlog.0 ===========" + cat log/workerlog.0 + + exit ${exit_code} diff --git a/tests/ci_use/Metax_UT/run_ernie_vl_28B.py b/tests/ci_use/Metax_UT/run_ernie_vl_28B.py new file mode 100644 index 000000000..59105f38c --- /dev/null +++ b/tests/ci_use/Metax_UT/run_ernie_vl_28B.py @@ -0,0 +1,36 @@ +import os + +os.environ["MACA_VISIBLE_DEVICES"] = "0,1" +os.environ["FD_MOE_BACKEND"] = "cutlass" +os.environ["PADDLE_XCCL_BACKEND"] = "metax_gpu" +os.environ["FLAGS_weight_only_linear_arch"] = "80" +os.environ["FD_METAX_KVCACHE_MEM"] = "8" +os.environ["ENABLE_V1_KVCACHE_SCHEDULER"] = "1" +os.environ["FD_ENC_DEC_BLOCK_NUM"] = "2" + + +import fastdeploy + +sampling_params = fastdeploy.SamplingParams(top_p=0.95, max_tokens=2048, temperature=0.6) + +llm = fastdeploy.LLM( + model="/data/models/PaddlePaddle/ERNIE-4.5-VL-28B-A3B-Thinking", + tensor_parallel_size=2, + engine_worker_queue_port=8899, + max_model_len=2048, + quantization="wint8", + load_choices="default_v1", + disable_custom_all_reduce=True, +) + +prompts = [ + "A robe takes 2 bolts of blue fiber and half that much white fiber. How many bolts in total does it take?", +] + +outputs = llm.generate(prompts, sampling_params) + +for output in outputs: + prompt = output.prompt + generated_text = output.outputs.text + print(f"Prompt: {prompt!r}") + print(f"Generated: {generated_text!r}")