Skip to content

Add FLOAT8E8M0 data type support in ONNX Runtime #1558

Add FLOAT8E8M0 data type support in ONNX Runtime

Add FLOAT8E8M0 data type support in ONNX Runtime #1558

name: CUDA Plugin Windows CI
on:
push:
branches:
- main
- rel-*
pull_request:
branches:
- main
- rel-*
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }}
cancel-in-progress: true
jobs:
build:
name: Windows CUDA Plugin EP Build
runs-on: [
"self-hosted",
"1ES.Pool=onnxruntime-github-vs2022-latest",
"JobId=windows-cuda-plugin-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
]
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
submodules: 'none'
- uses: actions/setup-python@v6
with:
python-version: '3.14'
architecture: x64
- name: Locate vcvarsall and Setup Env
uses: ./.github/actions/locate-vcvarsall-and-setup-env
with:
architecture: x64
- name: Install python modules
run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt
working-directory: ${{ github.workspace }}
shell: cmd
- name: Download CUDA SDK v12.8
working-directory: ${{ runner.temp }}
run: |
azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v12.8" .
dir
shell: pwsh
- name: Add CUDA to PATH
shell: powershell
run: |
Write-Host "Adding CUDA to PATH"
Write-Host "CUDA Path: $env:RUNNER_TEMP\v12.8\bin"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\bin"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\extras\CUPTI\lib64"
- name: Set OnnxRuntimeBuildDirectory
shell: pwsh
run: |
$buildDir = Join-Path ${{ runner.temp }} "build"
echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV
- name: Build ONNX Runtime with CUDA Plugin EP
working-directory: ${{ runner.temp }}
run: |
python.exe ${{ github.workspace }}\tools\ci_build\build.py `
--update --build --config Release `
--build_dir build `
--skip_submodule_sync `
--parallel `
--nvcc_threads 1 `
--use_binskim_compliant_compile_flags `
--cmake_generator "Visual Studio 17 2022" `
--build_shared_lib `
--build_wheel `
--use_cuda `
--cuda_home="$env:RUNNER_TEMP\v12.8" `
--skip_tests `
--use_vcpkg `
--use_vcpkg_ms_internal_asset_cache `
--enable_cuda_profiling `
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 `
--cmake_extra_defines onnxruntime_BUILD_CUDA_EP_AS_PLUGIN=ON
if ($lastExitCode -ne 0) {
exit $lastExitCode
}
# Clean up intermediate files before uploading artifacts
$outputDir = "${{ runner.temp }}\build\Release"
Write-Host "Cleaning up files from $outputDir..."
Remove-Item -Path "$outputDir\onnxruntime" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\pybind11" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\models" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\vcpkg_installed" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\_deps" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\CMakeCache.txt" -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\CMakeFiles" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path $outputDir -Include "*.obj" -Recurse
shell: pwsh
- name: Upload build artifacts
uses: actions/upload-artifact@v6
with:
name: cuda-plugin-build-artifacts
path: ${{ runner.temp }}\build
env:
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
setVcvars: true
ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
test:
name: Windows CUDA Plugin EP Test
needs: build
timeout-minutes: 120
runs-on: [
"self-hosted",
"1ES.Pool=onnxruntime-github-Win2022-GPU-A10",
"JobId=windows-cuda-plugin-test-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
]
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
submodules: 'none'
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: cuda-plugin-build-artifacts
path: ${{ runner.temp }}\build
- uses: actions/setup-python@v6
with:
python-version: '3.14'
architecture: x64
- name: Locate vcvarsall and Setup Env
uses: ./.github/actions/locate-vcvarsall-and-setup-env
with:
architecture: x64
- name: Install python modules
run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt
working-directory: ${{ github.workspace }}
shell: cmd
- name: Install torch for CPU only
run: python -m pip install torch
working-directory: ${{ github.workspace }}
shell: cmd
- name: Download CUDA SDK v12.8
working-directory: ${{ runner.temp }}
run: |
azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v12.8" .
dir
shell: pwsh
- name: Add CUDA to PATH
shell: powershell
run: |
Write-Host "Adding CUDA to PATH"
Write-Host "CUDA Path: $env:RUNNER_TEMP\v12.8\bin"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\bin"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\extras\CUPTI\lib64"
- name: Set OnnxRuntimeBuildDirectory
shell: pwsh
run: |
$buildDir = Join-Path ${{ runner.temp }} "build"
echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV
- name: Install ONNX Runtime Wheel
uses: ./.github/actions/install-onnxruntime-wheel
with:
whl-directory: ${{ runner.temp }}\build\Release\Release\dist
- name: Run CUDA Plugin EP Python Tests
working-directory: ${{ github.workspace }}\onnxruntime\test\python\transformers
shell: pwsh
run: |
$env:ORT_CUDA_PLUGIN_PATH = "${{ runner.temp }}\build\Release\Release\onnxruntime_providers_cuda_plugin.dll"
Write-Host "ORT_CUDA_PLUGIN_PATH=$env:ORT_CUDA_PLUGIN_PATH"
if (-not (Test-Path $env:ORT_CUDA_PLUGIN_PATH)) {
Write-Error "CUDA plugin EP library not found at $env:ORT_CUDA_PLUGIN_PATH"
exit 1
}
python test_cuda_plugin_ep.py
if ($lastExitCode -ne 0) {
exit $lastExitCode
}
env:
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
setVcvars: true
ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4