Skip to content

Commit

Permalink
fix PR comments
Browse files Browse the repository at this point in the history
cleanup thirdparty Makefile and stream tests makefile. fix mising gpcnet. fix lint in cuda decode perf Makefile.
  • Loading branch information
dpower4 committed Nov 1, 2024
1 parent 9672133 commit 3e7136f
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 142 deletions.
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,8 @@ def run(self):
],
'ort': [
'onnx>=1.10.2',
'onnxruntime-gpu==1.10.0; python_version<"3.10" and platform_machine != "aarch64"',
'onnxruntime-gpu; python_version>="3.10" and platform_machine != "aarch64"',
'onnxruntime-gpu==1.10.0; python_version<"3.10" and platform_machine == "x86_64"',
'onnxruntime-gpu; python_version>="3.10" and platform_machine == "x86_64"',
],
'nvidia': ['py3nvml>=0.2.6'],
'amd': ['amdsmi'],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,121 +4,120 @@
cmake_minimum_required(VERSION 3.18)
project(cuda_decode_performance)


# Check architecture
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
message(WARNING "Skipping Cuda decode Performance build. This build only supports x86_64 arch.")
else()
find_package(CUDA QUIET)
if(CUDA_FOUND)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

set(THIRD_PARTY_SAMPLE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../third_party/Video_Codec_SDK/Samples)
set(NVCODEC_PUBLIC_INTERFACE_DIR ${THIRD_PARTY_SAMPLE_DIR}/../Interface)
set(NVCODEC_UTILS_DIR ${THIRD_PARTY_SAMPLE_DIR}/Utils)
set(NV_CODEC_DIR ${THIRD_PARTY_SAMPLE_DIR}/NvCodec)
set(NV_DEC_DIR ${THIRD_PARTY_SAMPLE_DIR}/NvCodec/NvDecoder)

if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
find_package(PkgConfig REQUIRED)
pkg_check_modules(PC_AVCODEC REQUIRED IMPORTED_TARGET libavcodec)
pkg_check_modules(PC_AVFORMAT REQUIRED IMPORTED_TARGET libavformat)
pkg_check_modules(PC_AVUTIL REQUIRED IMPORTED_TARGET libavutil)
pkg_check_modules(PC_SWRESAMPLE REQUIRED IMPORTED_TARGET libswresample)

set(NV_FFMPEG_HDRS ${PC_AVCODEC_INCLUDE_DIRS})
find_library(AVCODEC_LIBRARY NAMES avcodec
HINTS
${PC_AVCODEC_LIBDIR}
${PC_AVCODEC_LIBRARY_DIRS}
)
find_library(AVFORMAT_LIBRARY NAMES avformat
HINTS
${PC_AVFORMAT_LIBDIR}
${PC_AVFORMAT_LIBRARY_DIRS}
)
find_library(AVUTIL_LIBRARY NAMES avutil
HINTS
${PC_AVUTIL_LIBDIR}
${PC_AVUTIL_LIBRARY_DIRS}
)
find_library(SWRESAMPLE_LIBRARY NAMES swresample
HINTS
${PC_SWRESAMPLE_LIBDIR}
${PC_SWRESAMPLE_LIBRARY_DIRS}
)
set(AVCODEC_LIB ${AVCODEC_LIBRARY})
set(AVFORMAT_LIB ${AVFORMAT_LIBRARY})
set(AVUTIL_LIB ${AVUTIL_LIBRARY})
set(SWRESAMPLE_LIB ${SWRESAMPLE_LIBRARY})
endif()

set(APP_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/AppDecPerf.cpp
)

set(NV_DEC_SOURCES
${NV_DEC_DIR}/NvDecoder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/OptimizedNvDecoder.cpp
)

set(NV_DEC_HDRS
${NV_DEC_DIR}/NvDecoder.h
${NVCODEC_PUBLIC_INTERFACE_DIR}/cuviddec.h
${NVCODEC_PUBLIC_INTERFACE_DIR}/nvcuvid.h
${NVCODEC_UTILS_DIR}/NvCodecUtils.h
${NVCODEC_UTILS_DIR}/FFmpegDemuxer.h
${CMAKE_CURRENT_SOURCE_DIR}/ThreadPoolUtils.h
${CMAKE_CURRENT_SOURCE_DIR}/OptimizedNvDecoder.h
)

source_group( "headers" FILES ${NV_DEC_HDRS} )
source_group( "sources" FILES ${APP_SOURCES} ${NV_DEC_SOURCES})
set(CMAKE_LIBRARY_PATH "${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib64;${CUDA_TOOLKIT_ROOT_DIR}/lib;${CMAKE_LIBRARY_PATH}")
find_package(CUDA)
set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_50,code=\"sm_50,compute_50\")
if ( CMAKE_COMPILER_IS_GNUCC )
if(NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std=c\\+\\+11" )
list(APPEND CUDA_NVCC_FLAGS -std=c++11)
endif()
endif()

# Check if the file exists
if (NOT EXISTS "/usr/local/lib/libnvcuvid.so" )
execute_process(
COMMAND sudo ln -s /usr/lib/x86_64-linux-gnu/libnvcuvid.so.1 /usr/local/lib/libnvcuvid.so
RESULT_VARIABLE result
)
if(result)
message(FATAL_ERROR "Failed to create symbolic link for nvcuvid lib: ${result}")
endif()
endif ()

find_library(CUVID_LIB nvcuvid
HINTS
"/usr/local/lib/"
"${CMAKE_CURRENT_SOURCE_DIR}/../../../../third_party/Video_Codec_SDK/Lib/linux/stubs/x86_64/"
)

cuda_add_executable(${PROJECT_NAME} ${APP_SOURCES} ${NV_DEC_SOURCES} ${NV_DEC_HDRS})

set_target_properties(${PROJECT_NAME} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

target_include_directories(${PROJECT_NAME} PUBLIC ${CUDA_INCLUDE_DIRS}
${NVCODEC_PUBLIC_INTERFACE_DIR}
${NVCODEC_UTILS_DIR}
${NV_CODEC_DIR}
${NV_APPDEC_COMMON_DIR}
${NV_FFMPEG_HDRS}
${THIRD_PARTY_SAMPLE_DIR}
)

target_link_libraries(${PROJECT_NAME} ${CUDA_CUDA_LIBRARY} ${CMAKE_DL_LIBS} ${CUVID_LIB} ${AVCODEC_LIB}
${AVFORMAT_LIB} ${AVUTIL_LIB} ${SWRESAMPLE_LIB})

install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
endif()

endif()
# Check architecture
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
message(WARNING "Skipping Cuda decode Performance build. This build only supports x86_64 arch.")
else()
find_package(CUDA QUIET)
if(CUDA_FOUND)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

set(THIRD_PARTY_SAMPLE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../third_party/Video_Codec_SDK/Samples)
set(NVCODEC_PUBLIC_INTERFACE_DIR ${THIRD_PARTY_SAMPLE_DIR}/../Interface)
set(NVCODEC_UTILS_DIR ${THIRD_PARTY_SAMPLE_DIR}/Utils)
set(NV_CODEC_DIR ${THIRD_PARTY_SAMPLE_DIR}/NvCodec)
set(NV_DEC_DIR ${THIRD_PARTY_SAMPLE_DIR}/NvCodec/NvDecoder)

if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
find_package(PkgConfig REQUIRED)
pkg_check_modules(PC_AVCODEC REQUIRED IMPORTED_TARGET libavcodec)
pkg_check_modules(PC_AVFORMAT REQUIRED IMPORTED_TARGET libavformat)
pkg_check_modules(PC_AVUTIL REQUIRED IMPORTED_TARGET libavutil)
pkg_check_modules(PC_SWRESAMPLE REQUIRED IMPORTED_TARGET libswresample)

set(NV_FFMPEG_HDRS ${PC_AVCODEC_INCLUDE_DIRS})
find_library(AVCODEC_LIBRARY NAMES avcodec
HINTS
${PC_AVCODEC_LIBDIR}
${PC_AVCODEC_LIBRARY_DIRS}
)
find_library(AVFORMAT_LIBRARY NAMES avformat
HINTS
${PC_AVFORMAT_LIBDIR}
${PC_AVFORMAT_LIBRARY_DIRS}
)
find_library(AVUTIL_LIBRARY NAMES avutil
HINTS
${PC_AVUTIL_LIBDIR}
${PC_AVUTIL_LIBRARY_DIRS}
)
find_library(SWRESAMPLE_LIBRARY NAMES swresample
HINTS
${PC_SWRESAMPLE_LIBDIR}
${PC_SWRESAMPLE_LIBRARY_DIRS}
)
set(AVCODEC_LIB ${AVCODEC_LIBRARY})
set(AVFORMAT_LIB ${AVFORMAT_LIBRARY})
set(AVUTIL_LIB ${AVUTIL_LIBRARY})
set(SWRESAMPLE_LIB ${SWRESAMPLE_LIBRARY})
endif()

set(APP_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/AppDecPerf.cpp
)

set(NV_DEC_SOURCES
${NV_DEC_DIR}/NvDecoder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/OptimizedNvDecoder.cpp
)

set(NV_DEC_HDRS
${NV_DEC_DIR}/NvDecoder.h
${NVCODEC_PUBLIC_INTERFACE_DIR}/cuviddec.h
${NVCODEC_PUBLIC_INTERFACE_DIR}/nvcuvid.h
${NVCODEC_UTILS_DIR}/NvCodecUtils.h
${NVCODEC_UTILS_DIR}/FFmpegDemuxer.h
${CMAKE_CURRENT_SOURCE_DIR}/ThreadPoolUtils.h
${CMAKE_CURRENT_SOURCE_DIR}/OptimizedNvDecoder.h
)

source_group( "headers" FILES ${NV_DEC_HDRS} )
source_group( "sources" FILES ${APP_SOURCES} ${NV_DEC_SOURCES})
set(CMAKE_LIBRARY_PATH "${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib64;${CUDA_TOOLKIT_ROOT_DIR}/lib;${CMAKE_LIBRARY_PATH}")
find_package(CUDA)
set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_50,code=\"sm_50,compute_50\")
if ( CMAKE_COMPILER_IS_GNUCC )
if(NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std=c\\+\\+11" )
list(APPEND CUDA_NVCC_FLAGS -std=c++11)
endif()
endif()

# Check if the file exists
if (NOT EXISTS "/usr/local/lib/libnvcuvid.so" )
execute_process(
COMMAND sudo ln -s /usr/lib/x86_64-linux-gnu/libnvcuvid.so.1 /usr/local/lib/libnvcuvid.so
RESULT_VARIABLE result
)
if(result)
message(FATAL_ERROR "Failed to create symbolic link for nvcuvid lib: ${result}")
endif()
endif ()

find_library(CUVID_LIB nvcuvid
HINTS
"/usr/local/lib/"
"${CMAKE_CURRENT_SOURCE_DIR}/../../../../third_party/Video_Codec_SDK/Lib/linux/stubs/x86_64/"
)

cuda_add_executable(${PROJECT_NAME} ${APP_SOURCES} ${NV_DEC_SOURCES} ${NV_DEC_HDRS})

set_target_properties(${PROJECT_NAME} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

target_include_directories(${PROJECT_NAME} PUBLIC ${CUDA_INCLUDE_DIRS}
${NVCODEC_PUBLIC_INTERFACE_DIR}
${NVCODEC_UTILS_DIR}
${NV_CODEC_DIR}
${NV_APPDEC_COMMON_DIR}
${NV_FFMPEG_HDRS}
${THIRD_PARTY_SAMPLE_DIR}
)

target_link_libraries(${PROJECT_NAME} ${CUDA_CUDA_LIBRARY} ${CMAKE_DL_LIBS} ${CUVID_LIB} ${AVCODEC_LIB}
${AVFORMAT_LIB} ${AVUTIL_LIB} ${SWRESAMPLE_LIB})

install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
endif()

endif()
15 changes: 6 additions & 9 deletions third_party/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,16 @@ NUM_MAKE_JOBS ?= $(shell nproc --ignore=2)
.PHONY: all cuda_with_msccl cuda rocm common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest cuda_msccl rocm_perftest fio rocm_rccl_tests rocm_rocblas rocm_bandwidthTest gpcnet cuda_gpuburn cpu_stream cpu_hpl directx_amf_encoding_latency directx_amd rocm_hipblaslt megatron_lm megatron_deepspeed apex_rocm

# Build targets.
ifeq ($(shell uname -m), aarch64)
all: cuda
cuda_with_msccl: cuda cuda_msccl
cuda: common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest cuda_gpuburn megatron_lm megatron_deepspeed
cpu: common cpu_perftest
common: cpu_stream fio
else
all: cuda rocm
cuda_with_msccl: cuda cuda_msccl
cuda: common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest gpcnet cuda_gpuburn megatron_lm megatron_deepspeed
rocm: common rocm_perftest rocm_rccl_tests rocm_rocblas rocm_bandwidthTest rocm_hipblaslt megatron_deepspeed apex_rocm
cpu: common cpu_perftest
common: cpu_hpl cpu_stream fio
cpu: common cpu_perftest cpu_stream
common: fio

# non aarch64 specific targets
ifneq ($(shell uname -m), aarch64)
common: fio cpu_hpl
directx_amd: directx_amf_encoding_latency
endif

Expand Down
25 changes: 12 additions & 13 deletions third_party/stream-tests/Makefile
Original file line number Diff line number Diff line change
@@ -1,29 +1,28 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

GENFLAGS= -DSTREAM_ARRAY_SIZE=400000000
ZEN3FLAGS= -DSTREAM_ARRAY_SIZE=400000000 -march=znver3
ZEN4FLAGS= -DSTREAM_ARRAY_SIZE=800000000 -march=znver4
NEO2FLAGS= -DSTREAM_ARRAY_SIZE=120000000 -mcpu=neoverse-v2
GENFLAGS := -DSTREAM_ARRAY_SIZE=400000000
ZEN3FLAGS := -DSTREAM_ARRAY_SIZE=400000000 -march=znver3
ZEN4FLAGS := -DSTREAM_ARRAY_SIZE=800000000 -march=znver4
NEO2FLAGS := -DSTREAM_ARRAY_SIZE=120000000 -mcpu=neoverse-v2

GEN_OUTPUT= streamx86.exe
ZEN3_OUTPUT= streamZen3.exe
ZEN4_OUTPUT= streamZen4.exe
NEO2_OUTPUT= streamNeo2.exe
GEN_OUTPUT := streamx86.exe
ZEN3_OUTPUT := streamZen3.exe
ZEN4_OUTPUT := streamZen4.exe
NEO2_OUTPUT := streamNeo2.exe

ARCH := $(shell uname -m)

ifeq ($(ARCH), aarch64)
CFLAGS = -Ofast -fopenmp -DNTIMES=200
CC=gcc
CFLAGS := -Ofast -fopenmp -DNTIMES=200
CC := gcc
all: NEO2
else
CC= /opt/AMD/aocc-compiler-4.0.0/bin/clang
CFLAGS= -Ofast -mcmodel=large -mavx2 -ffp-contract=fast -lomp -fopenmp -fnt-store=aggressive -DNTIMES=10
CC := /opt/AMD/aocc-compiler-4.0.0/bin/clang
CFLAGS := -Ofast -mcmodel=large -mavx2 -ffp-contract=fast -lomp -fopenmp -fnt-store=aggressive -DNTIMES=10
all: ZEN3 ZEN4 X86
endif


ZEN3: stream.c
$(CC) $(CFLAGS) $(ZEN3FLAGS) stream.c -o $(ZEN3_OUTPUT)
ZEN4:
Expand Down

0 comments on commit 3e7136f

Please sign in to comment.