Skip to content

Commit

Permalink
Dockerfile - Add support for arm64 build (#660)
Browse files Browse the repository at this point in the history
Add support for arm64 build:

- Updated dockerfile for arm64 build
- extend cpu stream compilation for neoverse 
- handle onnxruntime-gpu installation
- third party builds filtering based on arch
- disable cuda decode perf build for non x86
  • Loading branch information
dpower4 authored Nov 6, 2024
1 parent 59d36f7 commit 4794912
Show file tree
Hide file tree
Showing 7 changed files with 187 additions and 146 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/build-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,25 @@ jobs:
- name: cuda12.4
dockerfile: cuda12.4
tags: superbench/main:cuda12.4
platforms: linux/amd64 # TODO: linux/arm64
runner: [self-hosted]
build_args: "NUM_MAKE_JOBS=16"
- name: cuda12.2
dockerfile: cuda12.2
tags: superbench/main:cuda12.2
platforms: linux/amd64
runner: [self-hosted]
build_args: "NUM_MAKE_JOBS=16"
- name: cuda11.1.1
dockerfile: cuda11.1.1
tags: superbench/main:cuda11.1.1,superbench/superbench:latest
platforms: linux/amd64
runner: ubuntu-latest
build_args: "NUM_MAKE_JOBS=8"
- name: rocm6.2
dockerfile: rocm6.2.x
tags: superbench/main:rocm6.2
platforms: linux/amd64
runner: [self-hosted]
build_args: "NUM_MAKE_JOBS=16"
steps:
Expand Down Expand Up @@ -125,7 +129,7 @@ jobs:
id: docker_build
uses: docker/build-push-action@v2
with:
platforms: linux/amd64
platforms: ${{ matrix.platforms }}
context: .
file: ${{ steps.metadata.outputs.dockerfile }}
push: ${{ github.event_name != 'pull_request' }}
Expand Down
46 changes: 26 additions & 20 deletions dockerfile/cuda12.4.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ FROM nvcr.io/nvidia/pytorch:24.03-py3
LABEL maintainer="SuperBench"

ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update && \
apt-get install -y --no-install-recommends \
autoconf \
Expand Down Expand Up @@ -60,11 +61,13 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/* /tmp/*

ARG NUM_MAKE_JOBS=
ARG TARGETPLATFORM
ARG TARGETARCH

# Install Docker
ENV DOCKER_VERSION=20.10.8
RUN cd /tmp && \
wget -q https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VERSION}.tgz -O docker.tgz && \
RUN TARGETARCH_HW=$(uname -m) && \
wget -q https://download.docker.com/linux/static/stable/${TARGETARCH_HW}/docker-${DOCKER_VERSION}.tgz -O docker.tgz && \
tar --extract --file docker.tgz --strip-components 1 --directory /usr/local/bin/ && \
rm docker.tgz

Expand All @@ -80,40 +83,43 @@ RUN mkdir -p /root/.ssh && \

# Install OFED
ENV OFED_VERSION=23.07-0.5.1.2
RUN cd /tmp && \
wget -q https://content.mellanox.com/ofed/MLNX_OFED-${OFED_VERSION}/MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu22.04-x86_64.tgz && \
tar xzf MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu22.04-x86_64.tgz && \
MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu22.04-x86_64/mlnxofedinstall --user-space-only --without-fw-update --without-ucx-cuda --force --all && \
RUN TARGETARCH_HW=$(uname -m) && \
cd /tmp && \
wget -q https://content.mellanox.com/ofed/MLNX_OFED-${OFED_VERSION}/MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu22.04-${TARGETARCH_HW}.tgz && \
tar xzf MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu22.04-${TARGETARCH_HW}.tgz && \
MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu22.04-${TARGETARCH_HW}/mlnxofedinstall --user-space-only --without-fw-update --without-ucx-cuda --force --all && \
rm -rf /tmp/MLNX_OFED_LINUX-${OFED_VERSION}*

# Install HPC-X
ENV HPCX_VERSION=v2.18
RUN cd /opt && \
RUN TARGETARCH_HW=$(uname -m) && \
cd /opt && \
rm -rf hpcx && \
wget https://content.mellanox.com/hpc/hpc-x/${HPCX_VERSION}/hpcx-${HPCX_VERSION}-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64.tbz -O hpcx.tbz && \
wget https://content.mellanox.com/hpc/hpc-x/${HPCX_VERSION}/hpcx-${HPCX_VERSION}-gcc-mlnx_ofed-ubuntu22.04-cuda12-${TARGETARCH_HW}.tbz -O hpcx.tbz && \
tar xf hpcx.tbz && \
mv hpcx-${HPCX_VERSION}-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64 hpcx && \
mv hpcx-${HPCX_VERSION}-gcc-mlnx_ofed-ubuntu22.04-cuda12-${TARGETARCH_HW} hpcx && \
rm hpcx.tbz

# Install Intel MLC
RUN cd /tmp && \
# Installs specific to amd64 platform
RUN if [ "$TARGETARCH" = "amd64" ]; then \
# Install Intel MLC
cd /tmp && \
wget -q https://downloadmirror.intel.com/793041/mlc_v3.11.tgz -O mlc.tgz && \
tar xzf mlc.tgz Linux/mlc && \
cp ./Linux/mlc /usr/local/bin/ && \
rm -rf ./Linux mlc.tgz

# Install AOCC compiler
RUN cd /tmp && \
rm -rf ./Linux mlc.tgz && \
# Install AOCC compiler
wget https://download.amd.com/developer/eula/aocc-compiler/aocc-compiler-4.0.0_1_amd64.deb && \
apt install -y ./aocc-compiler-4.0.0_1_amd64.deb && \
rm -rf aocc-compiler-4.0.0_1_amd64.deb

# Install AMD BLIS
RUN cd /tmp && \
rm -rf aocc-compiler-4.0.0_1_amd64.deb && \
# Install AMD BLIS
wget https://download.amd.com/developer/eula/blis/blis-4-0/aocl-blis-linux-aocc-4.0.tar.gz && \
tar xzf aocl-blis-linux-aocc-4.0.tar.gz && \
mv amd-blis /opt/AMD && \
rm -rf aocl-blis-linux-aocc-4.0.tar.gz
rm -rf aocl-blis-linux-aocc-4.0.tar.gz; \
else \
echo "Skipping Intel MLC, AOCC and AMD Bliss installations for non-amd64 architecture: $TARGETARCH"; \
fi

# Install NCCL 2.23.4
RUN cd /tmp && \
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,8 @@ def run(self):
],
'ort': [
'onnx>=1.10.2',
'onnxruntime-gpu==1.10.0; python_version<"3.10"',
'onnxruntime-gpu; python_version>="3.10"',
'onnxruntime-gpu==1.10.0; python_version<"3.10" and platform_machine == "x86_64"',
'onnxruntime-gpu; python_version>="3.10" and platform_machine == "x86_64"',
],
'nvidia': ['py3nvml>=0.2.6'],
'amd': ['amdsmi'],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(self, name, parameters=''):
super().__init__(name, parameters)

self._bin_name = 'streamZen3.exe'
self.__cpu_arch = ['other', 'zen3', 'zen4']
self.__cpu_arch = ['other', 'zen3', 'zen4', 'neo2']

def add_parser_arguments(self):
"""Add the specified arguments."""
Expand Down Expand Up @@ -80,6 +80,8 @@ def _preprocess(self):
exe = 'streamZen3.exe'
elif self._args.cpu_arch == 'zen4':
exe = 'streamZen4.exe'
elif self._args.cpu_arch == 'neo2':
exe = 'streamNeo2.exe'
else:
exe = 'streamx86.exe'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,114 +4,120 @@
cmake_minimum_required(VERSION 3.18)
project(cuda_decode_performance)

find_package(CUDA QUIET)
if(CUDA_FOUND)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

set(THIRD_PARTY_SAMPLE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../third_party/Video_Codec_SDK/Samples)
set(NVCODEC_PUBLIC_INTERFACE_DIR ${THIRD_PARTY_SAMPLE_DIR}/../Interface)
set(NVCODEC_UTILS_DIR ${THIRD_PARTY_SAMPLE_DIR}/Utils)
set(NV_CODEC_DIR ${THIRD_PARTY_SAMPLE_DIR}/NvCodec)
set(NV_DEC_DIR ${THIRD_PARTY_SAMPLE_DIR}/NvCodec/NvDecoder)

if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
find_package(PkgConfig REQUIRED)
pkg_check_modules(PC_AVCODEC REQUIRED IMPORTED_TARGET libavcodec)
pkg_check_modules(PC_AVFORMAT REQUIRED IMPORTED_TARGET libavformat)
pkg_check_modules(PC_AVUTIL REQUIRED IMPORTED_TARGET libavutil)
pkg_check_modules(PC_SWRESAMPLE REQUIRED IMPORTED_TARGET libswresample)

set(NV_FFMPEG_HDRS ${PC_AVCODEC_INCLUDE_DIRS})
find_library(AVCODEC_LIBRARY NAMES avcodec
HINTS
${PC_AVCODEC_LIBDIR}
${PC_AVCODEC_LIBRARY_DIRS}
)
find_library(AVFORMAT_LIBRARY NAMES avformat
HINTS
${PC_AVFORMAT_LIBDIR}
${PC_AVFORMAT_LIBRARY_DIRS}
)
find_library(AVUTIL_LIBRARY NAMES avutil
HINTS
${PC_AVUTIL_LIBDIR}
${PC_AVUTIL_LIBRARY_DIRS}
)
find_library(SWRESAMPLE_LIBRARY NAMES swresample
HINTS
${PC_SWRESAMPLE_LIBDIR}
${PC_SWRESAMPLE_LIBRARY_DIRS}
)
set(AVCODEC_LIB ${AVCODEC_LIBRARY})
set(AVFORMAT_LIB ${AVFORMAT_LIBRARY})
set(AVUTIL_LIB ${AVUTIL_LIBRARY})
set(SWRESAMPLE_LIB ${SWRESAMPLE_LIBRARY})
endif()

set(APP_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/AppDecPerf.cpp
)

set(NV_DEC_SOURCES
${NV_DEC_DIR}/NvDecoder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/OptimizedNvDecoder.cpp
)

set(NV_DEC_HDRS
${NV_DEC_DIR}/NvDecoder.h
${NVCODEC_PUBLIC_INTERFACE_DIR}/cuviddec.h
${NVCODEC_PUBLIC_INTERFACE_DIR}/nvcuvid.h
${NVCODEC_UTILS_DIR}/NvCodecUtils.h
${NVCODEC_UTILS_DIR}/FFmpegDemuxer.h
${CMAKE_CURRENT_SOURCE_DIR}/ThreadPoolUtils.h
${CMAKE_CURRENT_SOURCE_DIR}/OptimizedNvDecoder.h
)

source_group( "headers" FILES ${NV_DEC_HDRS} )
source_group( "sources" FILES ${APP_SOURCES} ${NV_DEC_SOURCES})
set(CMAKE_LIBRARY_PATH "${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib64;${CUDA_TOOLKIT_ROOT_DIR}/lib;${CMAKE_LIBRARY_PATH}")
find_package(CUDA)
set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_50,code=\"sm_50,compute_50\")
if ( CMAKE_COMPILER_IS_GNUCC )
if(NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std=c\\+\\+11" )
list(APPEND CUDA_NVCC_FLAGS -std=c++11)
endif()
endif()

# Check if the file exists
if (NOT EXISTS "/usr/local/lib/libnvcuvid.so" )
execute_process(
COMMAND sudo ln -s /usr/lib/x86_64-linux-gnu/libnvcuvid.so.1 /usr/local/lib/libnvcuvid.so
RESULT_VARIABLE result
)
if(result)
message(FATAL_ERROR "Failed to create symbolic link for nvcuvid lib: ${result}")
endif()
endif ()

find_library(CUVID_LIB nvcuvid
HINTS
"/usr/local/lib/"
"${CMAKE_CURRENT_SOURCE_DIR}/../../../../third_party/Video_Codec_SDK/Lib/linux/stubs/x86_64/"
)

cuda_add_executable(${PROJECT_NAME} ${APP_SOURCES} ${NV_DEC_SOURCES} ${NV_DEC_HDRS})

set_target_properties(${PROJECT_NAME} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

target_include_directories(${PROJECT_NAME} PUBLIC ${CUDA_INCLUDE_DIRS}
${NVCODEC_PUBLIC_INTERFACE_DIR}
${NVCODEC_UTILS_DIR}
${NV_CODEC_DIR}
${NV_APPDEC_COMMON_DIR}
${NV_FFMPEG_HDRS}
${THIRD_PARTY_SAMPLE_DIR}
)

target_link_libraries(${PROJECT_NAME} ${CUDA_CUDA_LIBRARY} ${CMAKE_DL_LIBS} ${CUVID_LIB} ${AVCODEC_LIB}
${AVFORMAT_LIB} ${AVUTIL_LIB} ${SWRESAMPLE_LIB})

install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
# Check architecture
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
message(WARNING "Skipping Cuda decode Performance build. This build only supports x86_64 arch.")
else()
find_package(CUDA QUIET)
if(CUDA_FOUND)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

set(THIRD_PARTY_SAMPLE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../third_party/Video_Codec_SDK/Samples)
set(NVCODEC_PUBLIC_INTERFACE_DIR ${THIRD_PARTY_SAMPLE_DIR}/../Interface)
set(NVCODEC_UTILS_DIR ${THIRD_PARTY_SAMPLE_DIR}/Utils)
set(NV_CODEC_DIR ${THIRD_PARTY_SAMPLE_DIR}/NvCodec)
set(NV_DEC_DIR ${THIRD_PARTY_SAMPLE_DIR}/NvCodec/NvDecoder)

if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
find_package(PkgConfig REQUIRED)
pkg_check_modules(PC_AVCODEC REQUIRED IMPORTED_TARGET libavcodec)
pkg_check_modules(PC_AVFORMAT REQUIRED IMPORTED_TARGET libavformat)
pkg_check_modules(PC_AVUTIL REQUIRED IMPORTED_TARGET libavutil)
pkg_check_modules(PC_SWRESAMPLE REQUIRED IMPORTED_TARGET libswresample)

set(NV_FFMPEG_HDRS ${PC_AVCODEC_INCLUDE_DIRS})
find_library(AVCODEC_LIBRARY NAMES avcodec
HINTS
${PC_AVCODEC_LIBDIR}
${PC_AVCODEC_LIBRARY_DIRS}
)
find_library(AVFORMAT_LIBRARY NAMES avformat
HINTS
${PC_AVFORMAT_LIBDIR}
${PC_AVFORMAT_LIBRARY_DIRS}
)
find_library(AVUTIL_LIBRARY NAMES avutil
HINTS
${PC_AVUTIL_LIBDIR}
${PC_AVUTIL_LIBRARY_DIRS}
)
find_library(SWRESAMPLE_LIBRARY NAMES swresample
HINTS
${PC_SWRESAMPLE_LIBDIR}
${PC_SWRESAMPLE_LIBRARY_DIRS}
)
set(AVCODEC_LIB ${AVCODEC_LIBRARY})
set(AVFORMAT_LIB ${AVFORMAT_LIBRARY})
set(AVUTIL_LIB ${AVUTIL_LIBRARY})
set(SWRESAMPLE_LIB ${SWRESAMPLE_LIBRARY})
endif()

set(APP_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/AppDecPerf.cpp
)

set(NV_DEC_SOURCES
${NV_DEC_DIR}/NvDecoder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/OptimizedNvDecoder.cpp
)

set(NV_DEC_HDRS
${NV_DEC_DIR}/NvDecoder.h
${NVCODEC_PUBLIC_INTERFACE_DIR}/cuviddec.h
${NVCODEC_PUBLIC_INTERFACE_DIR}/nvcuvid.h
${NVCODEC_UTILS_DIR}/NvCodecUtils.h
${NVCODEC_UTILS_DIR}/FFmpegDemuxer.h
${CMAKE_CURRENT_SOURCE_DIR}/ThreadPoolUtils.h
${CMAKE_CURRENT_SOURCE_DIR}/OptimizedNvDecoder.h
)

source_group( "headers" FILES ${NV_DEC_HDRS} )
source_group( "sources" FILES ${APP_SOURCES} ${NV_DEC_SOURCES})
set(CMAKE_LIBRARY_PATH "${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib64;${CUDA_TOOLKIT_ROOT_DIR}/lib;${CMAKE_LIBRARY_PATH}")
find_package(CUDA)
set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_50,code=\"sm_50,compute_50\")
if ( CMAKE_COMPILER_IS_GNUCC )
if(NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std=c\\+\\+11" )
list(APPEND CUDA_NVCC_FLAGS -std=c++11)
endif()
endif()

# Check if the file exists
if (NOT EXISTS "/usr/local/lib/libnvcuvid.so" )
execute_process(
COMMAND sudo ln -s /usr/lib/x86_64-linux-gnu/libnvcuvid.so.1 /usr/local/lib/libnvcuvid.so
RESULT_VARIABLE result
)
if(result)
message(FATAL_ERROR "Failed to create symbolic link for nvcuvid lib: ${result}")
endif()
endif ()

find_library(CUVID_LIB nvcuvid
HINTS
"/usr/local/lib/"
"${CMAKE_CURRENT_SOURCE_DIR}/../../../../third_party/Video_Codec_SDK/Lib/linux/stubs/x86_64/"
)

cuda_add_executable(${PROJECT_NAME} ${APP_SOURCES} ${NV_DEC_SOURCES} ${NV_DEC_HDRS})

set_target_properties(${PROJECT_NAME} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

target_include_directories(${PROJECT_NAME} PUBLIC ${CUDA_INCLUDE_DIRS}
${NVCODEC_PUBLIC_INTERFACE_DIR}
${NVCODEC_UTILS_DIR}
${NV_CODEC_DIR}
${NV_APPDEC_COMMON_DIR}
${NV_FFMPEG_HDRS}
${THIRD_PARTY_SAMPLE_DIR}
)

target_link_libraries(${PROJECT_NAME} ${CUDA_CUDA_LIBRARY} ${CMAKE_DL_LIBS} ${CUVID_LIB} ${AVCODEC_LIB}
${AVFORMAT_LIB} ${AVUTIL_LIB} ${SWRESAMPLE_LIB})

install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
endif()

endif()
Loading

0 comments on commit 4794912

Please sign in to comment.