Skip to content

Commit

Permalink
Added cuSZp src code
Browse files Browse the repository at this point in the history
  • Loading branch information
mkshah5 committed Feb 15, 2024
1 parent 9bc3d9c commit 92bf98b
Show file tree
Hide file tree
Showing 22 changed files with 2,443 additions and 0 deletions.
79 changes: 79 additions & 0 deletions qtensor/compression/cuszp/cuSZp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Specify the minimum version of CMake required to build the project
cmake_minimum_required(VERSION 3.21)

project(cuSZp
VERSION 0.0.2
DESCRIPTION "Error-bounded GPU lossy compression library"
)
set(namespace "cuSZp")
enable_language(CXX)
enable_language(CUDA)

find_package(CUDAToolkit REQUIRED)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

#set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -debug -Wall -diag-disable=10441")
#set(CMAKE_CXX_FLAGS_RELEASE "-diag-disable=10441 -g -ftz -fma -O2 -fp-model precise -prec-div -Wall")

#set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -ftz=true -G -allow-unsupported-compiler")
#set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -allow-unsupported-compiler")

set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
set(CMAKE_CUDA_STANDARD "17")
set(CMAKE_CXX_STANDARD_REQUIRED ON)
#set(CMAKE_CUDA_FLAGS_INIT "-std=c++17 -allow-unsupported-compiler")
set(CMAKE_CUDA_ARCHITECTURES 60 61 62 70 75)
set(CUDA_PROPAGATE_HOST_FLAGS ON)
set(CUDA_LIBRARY CUDA::cudart)

if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY VALUE Release)
endif()

add_library(${PROJECT_NAME} STATIC)

target_sources(${PROJECT_NAME}
PRIVATE
src/cuSZp_f32.cu
src/cuSZp_f64.cu
src/cuSZp_utility.cu
src/cuSZp_timer.cu
src/cuSZp_entry_f32.cu
src/cuSZp_entry_f64.cu
)

target_include_directories(${PROJECT_NAME}
PRIVATE
# where the library itself will look for its internal headers
${CMAKE_CURRENT_SOURCE_DIR}/src
PUBLIC
# where top-level project will look for the library's public headers
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
# where external projects will look for the library's public headers
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
)

#target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})

target_link_libraries(${PROJECT_NAME} PRIVATE CUDA::cudart)

set(public_headers
include/cuSZp_f32.h
include/cuSZp_f64.h
include/cuSZp_utility.h
include/cuSZp_timer.h
include/cuSZp_entry_f32.h
include/cuSZp_entry_f64.h
)

set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
include(Installing)

option(CUSZP_BUILD_EXAMPLES "Option to enable building example programs" ON)
if (CUSZP_BUILD_EXAMPLES)
add_subdirectory(examples)
endif ()
5 changes: 5 additions & 0 deletions qtensor/compression/cuszp/cuSZp/Config.cmake.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@PACKAGE_INIT@

include("${CMAKE_CURRENT_LIST_DIR}/@[email protected]")

check_required_components(@PROJECT_NAME@)
30 changes: 30 additions & 0 deletions qtensor/compression/cuszp/cuSZp/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
Copyright © 2023, UChicago Argonne and University of Iowa

All Rights Reserved

Software Name: cuSZp: An Ultra-fast GPU Error-bounded Lossy Compressor with Optimized End-to-End Performance

By: Argonne National Laboratory, University of Iowa

OPEN SOURCE LICENSE

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

******************************************************************************************************
DISCLAIMER

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************************************

Contact: SZ Team ([email protected])
106 changes: 106 additions & 0 deletions qtensor/compression/cuszp/cuSZp/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# cuSZp
<a href="./LICENSE"><img src="https://img.shields.io/badge/License-BSD%203--Clause-blue.svg"></a>

cuSZp is a user-friendly error-bounded lossy compression tool specifically designed for the compression of single- and double-precision floating-point data using NVIDIA GPUs.
This tool fuses all compression or decompression computations into one single kernel, achieving ultra fast end-to-end throughput.
Specifically, the cuSZp framework is structured around four pivotal stages: Quantization and Prediction, Fixed-length Encoding, Global Synchronization, and Block Bit-shuffling.
Noting that ongoing optimization efforts are being devoted to cuSZp, aimed at further improving its end-to-end performance.

- Developer: Yafan Huang
- Contributors: Sheng Di, Xiaodong Yu, Guanpeng Li, and Franck Cappello

## Environment Requirements
- Linux OS with NVIDIA GPUs
- Git >= 2.15
- CMake >= 3.21
- Cuda Toolkit >= 11.0
- GCC >= 7.3.0

## Compile and Run cuSZp Prepared Executable Binary
You can compile and install cuSZp with following commands:
```shell
$ git clone https://github.com/szcompressor/cuSZp.git
$ cd cuSZp
$ mkdir build && cd build
$ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install/ ..
$ make -j
$ make install
```
After compilation, you will see a list of executable binaries ```cuSZp/install/bin/```:
- ```cuSZp_cpu_f32_api```: single-precision, host pointers (i.e. on CPU).
- ```cuSZp_gpu_f32_api```: single-precision, device pointers (i.e. on GPU).
- ```cuSZp_cpu_f64_api```: double-precision, host pointers (i.e. on CPU).
- ```cuSZp_gpu_f64_api```: double-precision, device pointers (i.e. on GPU).

To use those binaries, try following commands.
We here use RTM pressure_2000 dataset (1.4 GB, 1008x1008x352) for single-precision example, and NWChem acd-tst.bin.d64 (6.0 GB) for double-precision example.
```shell
# Example for single-precision API
# ./cuSZp_gpu_f32_api TARGET_HPC_DATASET ERROR_MODE ERROR_BOUND
# ABS or REL
$ ./cuSZp_gpu_f32_api ./pressure_2000 REL 1e-4
cuSZp finished!
cuSZp compression end-to-end speed: 151.564649 GB/s
cuSZp decompression end-to-end speed: 232.503219 GB/s
cuSZp compression ratio: 13.003452

Pass error check!
$
# Example for double-precision API
# ./cuSZp_gpu_f64_api TARGET_HPC_DATASET ERROR_MODE ERROR_BOUND
# ABS or REL
$ ./cuSZp_gpu_f64_api ./acd-tst.bin.d64 ABS 1E-8
cuSZp finished!
cuSZp compression end-to-end speed: 110.117965 GB/s
cuSZp decompression end-to-end speed: 222.743097 GB/s
cuSZp compression ratio: 3.990585

Pass error check!
```
More HPC dataset can be downloaded from [SDRBench](https://sdrbench.github.io/).

## Using cuSZp as an Internal API
This repository provides several examples for using cuSZp compression and decompression for different scenarios (device pointer? host pointer? f32 or f64?).
The examples can be found in ```cuSZp/examples/```.
Assuming your original data, compressed data, and reconstructed data are all device pointers (allocated on GPU), and the data type is single-precision. The compression and decompression APIs can be called as below:
```C++
// For measuring the end-to-end throughput.
TimingGPU timer_GPU;

// cuSZp compression.
timer_GPU.StartCounter(); // set timer
SZp_compress_deviceptr_f32(d_oriData, d_cmpBytes, nbEle, &cmpSize, errorBound, stream);
float cmpTime = timer_GPU.GetCounter();

// cuSZp decompression.
timer_GPU.StartCounter(); // set timer
SZp_decompress_deviceptr_f32(d_decData, d_cmpBytes, nbEle, cmpSize, errorBound, stream);
float decTime = timer_GPU.GetCounter();
```
More details can be checked in:
- **f32-hostptr**: ```cuSZp/examples/cuSZp_cpu_f32_api.cpp```.
- **f32-deviceptr**: ```cuSZp/examples/cuSZp_gpu_f32_api.cpp```.
- **f64-hostptr**: ```cuSZp/examples/cuSZp_cpu_f64_api.cpp```.
- **f64-deviceptr**: ```cuSZp/examples/cuSZp_gpu_f64_api.cpp```.
## Citation
```bibtex
@inproceedings{cuSZp2023huang,
title = {cuSZp: An Ultra-Fast GPU Error-Bounded Lossy Compression Framework with Optimized End-to-End Performance}
author = {Huang, Yafan and Di, Sheng and Yu, Xiaodong and Li, Guanpeng and Cappello, Franck},
year = {2023},
isbn = {979-8-4007-0109-2/23/11},
publisher = {Association for Computing Machinery},
address = {Denver, CO, USA},
doi = {10.1145/3581784.3607048},
booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis},
keywords = {Lossy compression; parallel computing; HPC; GPU},
series = {SC'23}
}
```

## Copyright
(C) 2023 by Argonne National Laboratory and University of Iowa. More details see [COPYRIGHT](https://github.com/szcompressor/cuSZp/blob/master/LICENSE).

## Acknowledgement
This research was supported by the Exascale Computing Project (ECP), Project Number: 17-SC-20-SC, a collaborative effort of two DOE organizations – the Office of Science and the National Nuclear Security Administration, responsible for the planning and preparation of a capable exascale ecosystem, including software, applications, hardware, advanced system engineering and early testbed platforms, to support the nation’s exascale computing imperative. The material was supported by the U.S. Department of Energy, Office of Science, Advanced Scientific Computing Research (ASCR), under contract DE-AC02-06CH11357, and supported by the National Science Foundation under Grant OAC-2003709 and OAC-2104023. We acknowledge the computing resources provided on Bebop (operated by Laboratory Computing Resource Center at Argonne) and on Theta and JLSE (operated by Argonne Leadership Computing Facility). We acknowledge the support of ARAMCO.
67 changes: 67 additions & 0 deletions qtensor/compression/cuszp/cuSZp/cmake/Installing.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
include(GNUInstallDirs)

if(DEFINED CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
message(
STATUS
"CMAKE_INSTALL_PREFIX is not set\n"
"Default value: ${CMAKE_INSTALL_PREFIX}\n"
"Will set it to ${CMAKE_SOURCE_DIR}/install"
)
set(CMAKE_INSTALL_PREFIX
"${CMAKE_SOURCE_DIR}/install"
CACHE PATH "Where the library will be installed to" FORCE
)
else()
message(
STATUS
"CMAKE_INSTALL_PREFIX was already set\n"
"Current value: ${CMAKE_INSTALL_PREFIX}"
)
endif()

set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER "${public_headers}")

set_target_properties(${PROJECT_NAME} PROPERTIES DEBUG_POSTFIX "d")

install(TARGETS ${PROJECT_NAME}
EXPORT "${PROJECT_NAME}Targets"
# these get default values from GNUInstallDirs, no need to set them
#RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} # bin
#LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} # lib
#ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} # lib
# except for public headers, as we want them to be inside a library folder
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} # include/SomeProject
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} # include
)

# generate and install export file
install(EXPORT "${PROJECT_NAME}Targets"
FILE "${PROJECT_NAME}Targets.cmake"
NAMESPACE ${namespace}::
DESTINATION cmake
)

include(CMakePackageConfigHelpers)

# generate the version file for the config file
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
VERSION "${version}"
COMPATIBILITY AnyNewerVersion
)
# create config file
configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
INSTALL_DESTINATION cmake
)
# install config files
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
DESTINATION cmake
)
# generate the export targets for the build tree
export(EXPORT "${PROJECT_NAME}Targets"
FILE "${CMAKE_CURRENT_BINARY_DIR}/cmake/${PROJECT_NAME}Targets.cmake"
NAMESPACE ${namespace}::
)
45 changes: 45 additions & 0 deletions qtensor/compression/cuszp/cuSZp/examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Find CUDA package
find_package(CUDA REQUIRED)

set(install_dir ${PROJECT_BINARY_DIR}/examples/bin)
set(execName_gpu_f32 "cuSZp_gpu_f32_api")
set(execName_cpu_f32 "cuSZp_cpu_f32_api")
set(execName_gpu_f64 "cuSZp_gpu_f64_api")
set(execName_cpu_f64 "cuSZp_cpu_f64_api")
set(SRC_DIR ${PROJECT_SOURCE_DIR}/src)
set(INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)

# Add include and library directories
include_directories(${INCLUDE_DIR})

# Compile headers as a library
cuda_add_library(cuSZp_libs STATIC ${SRC_DIR}/cuSZp_f32.cu
${SRC_DIR}/cuSZp_f64.cu
${SRC_DIR}/cuSZp_utility.cu
${SRC_DIR}/cuSZp_timer.cu
${SRC_DIR}/cuSZp_entry_f32.cu
${SRC_DIR}/cuSZp_entry_f64.cu)

# Compile executable binary
cuda_add_executable(${execName_gpu_f32} cuSZp_gpu_f32_api.cpp)
cuda_add_executable(${execName_cpu_f32} cuSZp_cpu_f32_api.cpp)
cuda_add_executable(${execName_gpu_f64} cuSZp_gpu_f64_api.cpp)
cuda_add_executable(${execName_cpu_f64} cuSZp_cpu_f64_api.cpp)

# Link with headers
target_link_libraries(${execName_gpu_f32} cuSZp_libs)
target_link_libraries(${execName_cpu_f32} cuSZp_libs)
target_link_libraries(${execName_gpu_f64} cuSZp_libs)
target_link_libraries(${execName_cpu_f64} cuSZp_libs)

# Set output paths for the compiled binary
set_target_properties(${execName_gpu_f32} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${install_dir})
set_target_properties(${execName_cpu_f32} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${install_dir})
set_target_properties(${execName_gpu_f64} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${install_dir})
set_target_properties(${execName_cpu_f64} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${install_dir})

# Set installation paths for the compiled binary.
install(TARGETS ${execName_gpu_f32} DESTINATION bin)
install(TARGETS ${execName_cpu_f32} DESTINATION bin)
install(TARGETS ${execName_gpu_f64} DESTINATION bin)
install(TARGETS ${execName_cpu_f64} DESTINATION bin)
Loading

0 comments on commit 92bf98b

Please sign in to comment.