Added cuSZp src code

danlkv · Feb 15, 2024 · 92bf98b · 92bf98b
1 parent 9bc3d9c
commit 92bf98b
Show file tree

Hide file tree

Showing 22 changed files with 2,443 additions and 0 deletions.
diff --git a/qtensor/compression/cuszp/cuSZp/CMakeLists.txt b/qtensor/compression/cuszp/cuSZp/CMakeLists.txt
@@ -0,0 +1,79 @@
+# Specify the minimum version of CMake required to build the project
+cmake_minimum_required(VERSION 3.21)
+
+project(cuSZp
+        VERSION 0.0.2
+        DESCRIPTION "Error-bounded GPU lossy compression library"
+        )
+set(namespace "cuSZp")
+enable_language(CXX)
+enable_language(CUDA)
+
+find_package(CUDAToolkit REQUIRED)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+#set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -debug -Wall -diag-disable=10441")
+#set(CMAKE_CXX_FLAGS_RELEASE "-diag-disable=10441 -g -ftz -fma -O2 -fp-model precise -prec-div -Wall")
+
+#set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -ftz=true -G -allow-unsupported-compiler")
+#set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -allow-unsupported-compiler")
+
+set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
+set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
+set(CMAKE_CUDA_STANDARD "17")
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+#set(CMAKE_CUDA_FLAGS_INIT "-std=c++17 -allow-unsupported-compiler")
+set(CMAKE_CUDA_ARCHITECTURES 60 61 62 70 75)
+set(CUDA_PROPAGATE_HOST_FLAGS ON)
+set(CUDA_LIBRARY CUDA::cudart)
+
+if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
+    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY VALUE Release)
+endif()
+
+add_library(${PROJECT_NAME} STATIC)
+
+target_sources(${PROJECT_NAME}
+        PRIVATE
+        src/cuSZp_f32.cu
+        src/cuSZp_f64.cu
+        src/cuSZp_utility.cu
+        src/cuSZp_timer.cu
+        src/cuSZp_entry_f32.cu
+        src/cuSZp_entry_f64.cu
+        )
+
+target_include_directories(${PROJECT_NAME}
+        PRIVATE
+        # where the library itself will look for its internal headers
+        ${CMAKE_CURRENT_SOURCE_DIR}/src
+        PUBLIC
+        # where top-level project will look for the library's public headers
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+        # where external projects will look for the library's public headers
+        $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+        )
+
+#target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+
+target_link_libraries(${PROJECT_NAME} PRIVATE CUDA::cudart)
+
+set(public_headers
+        include/cuSZp_f32.h
+        include/cuSZp_f64.h
+        include/cuSZp_utility.h
+        include/cuSZp_timer.h
+        include/cuSZp_entry_f32.h
+        include/cuSZp_entry_f64.h
+        )
+
+set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+include(Installing)
+
+option(CUSZP_BUILD_EXAMPLES "Option to enable building example programs" ON)
+if (CUSZP_BUILD_EXAMPLES)
+    add_subdirectory(examples)
+endif ()
diff --git a/qtensor/compression/cuszp/cuSZp/Config.cmake.in b/qtensor/compression/cuszp/cuSZp/Config.cmake.in
@@ -0,0 +1,5 @@
+@PACKAGE_INIT@
+
+include("${CMAKE_CURRENT_LIST_DIR}/@[email protected]")
+
+check_required_components(@PROJECT_NAME@)
diff --git a/qtensor/compression/cuszp/cuSZp/LICENSE b/qtensor/compression/cuszp/cuSZp/LICENSE
@@ -0,0 +1,30 @@
+Copyright © 2023, UChicago Argonne and University of Iowa
+
+All Rights Reserved
+
+Software Name: cuSZp: An Ultra-fast GPU Error-bounded Lossy Compressor with Optimized End-to-End Performance
+
+By: Argonne National Laboratory, University of Iowa
+
+OPEN SOURCE LICENSE
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+******************************************************************************************************
+                                              DISCLAIMER
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************************************************************************************
+
+Contact: SZ Team ([email protected])
diff --git a/qtensor/compression/cuszp/cuSZp/README.md b/qtensor/compression/cuszp/cuSZp/README.md
@@ -0,0 +1,106 @@
+# cuSZp
+<a href="./LICENSE"><img src="https://img.shields.io/badge/License-BSD%203--Clause-blue.svg"></a> 
+
+cuSZp is a user-friendly error-bounded lossy compression tool specifically designed for the compression of single- and double-precision floating-point data using NVIDIA GPUs. 
+This tool fuses all compression or decompression computations into one single kernel, achieving ultra fast end-to-end throughput.
+Specifically, the cuSZp framework is structured around four pivotal stages: Quantization and Prediction, Fixed-length Encoding, Global Synchronization, and Block Bit-shuffling. 
+Noting that ongoing optimization efforts are being devoted to cuSZp, aimed at further improving its end-to-end performance.
+
+- Developer: Yafan Huang
+- Contributors: Sheng Di, Xiaodong Yu, Guanpeng Li, and Franck Cappello
+
+## Environment Requirements
+- Linux OS with NVIDIA GPUs
+- Git >= 2.15
+- CMake >= 3.21
+- Cuda Toolkit >= 11.0
+- GCC >= 7.3.0
+
+## Compile and Run cuSZp Prepared Executable Binary
+You can compile and install cuSZp with following commands:
+```shell
+$ git clone https://github.com/szcompressor/cuSZp.git
+$ cd cuSZp
+$ mkdir build && cd build
+$ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install/ ..
+$ make -j
+$ make install
+```
+After compilation, you will see a list of executable binaries ```cuSZp/install/bin/```:
+- ```cuSZp_cpu_f32_api```: single-precision, host pointers (i.e. on CPU).
+- ```cuSZp_gpu_f32_api```: single-precision, device pointers (i.e. on GPU).
+- ```cuSZp_cpu_f64_api```: double-precision, host pointers (i.e. on CPU).
+- ```cuSZp_gpu_f64_api```: double-precision, device pointers (i.e. on GPU).
+
+To use those binaries, try following commands. 
+We here use RTM pressure_2000 dataset (1.4 GB, 1008x1008x352) for single-precision example, and NWChem acd-tst.bin.d64 (6.0 GB) for double-precision example.
+```shell
+# Example for single-precision API
+# ./cuSZp_gpu_f32_api TARGET_HPC_DATASET ERROR_MODE ERROR_BOUND
+#                                        ABS or REL
+$ ./cuSZp_gpu_f32_api ./pressure_2000 REL 1e-4
+cuSZp finished!
+cuSZp compression   end-to-end speed: 151.564649 GB/s
+cuSZp decompression end-to-end speed: 232.503219 GB/s
+cuSZp compression ratio: 13.003452
+
+Pass error check!
+$
+# Example for double-precision API
+# ./cuSZp_gpu_f64_api TARGET_HPC_DATASET ERROR_MODE ERROR_BOUND
+#                                        ABS or REL
+$ ./cuSZp_gpu_f64_api ./acd-tst.bin.d64 ABS 1E-8
+cuSZp finished!
+cuSZp compression   end-to-end speed: 110.117965 GB/s
+cuSZp decompression end-to-end speed: 222.743097 GB/s
+cuSZp compression ratio: 3.990585
+
+Pass error check!
+```
+More HPC dataset can be downloaded from [SDRBench](https://sdrbench.github.io/).
+
+## Using cuSZp as an Internal API
+This repository provides several examples for using cuSZp compression and decompression for different scenarios (device pointer? host pointer? f32 or f64?).
+The examples can be found in ```cuSZp/examples/```.
+Assuming your original data, compressed data, and reconstructed data are all device pointers (allocated on GPU), and the data type is single-precision. The compression and decompression APIs can be called as below:
+```C++
+// For measuring the end-to-end throughput.
+TimingGPU timer_GPU;
+
+// cuSZp compression.
+timer_GPU.StartCounter(); // set timer
+SZp_compress_deviceptr_f32(d_oriData, d_cmpBytes, nbEle, &cmpSize, errorBound, stream);
+float cmpTime = timer_GPU.GetCounter();
+
+// cuSZp decompression.
+timer_GPU.StartCounter(); // set timer
+SZp_decompress_deviceptr_f32(d_decData, d_cmpBytes, nbEle, cmpSize, errorBound, stream);
+float decTime = timer_GPU.GetCounter();
+```
+More details can be checked in:
+- **f32-hostptr**: ```cuSZp/examples/cuSZp_cpu_f32_api.cpp```.
+- **f32-deviceptr**: ```cuSZp/examples/cuSZp_gpu_f32_api.cpp```.
+- **f64-hostptr**: ```cuSZp/examples/cuSZp_cpu_f64_api.cpp```.
+- **f64-deviceptr**: ```cuSZp/examples/cuSZp_gpu_f64_api.cpp```.
+
+## Citation
+```bibtex
+@inproceedings{cuSZp2023huang,
+      title = {cuSZp: An Ultra-Fast GPU Error-Bounded Lossy Compression Framework with Optimized End-to-End Performance}
+     author = {Huang, Yafan and Di, Sheng and Yu, Xiaodong and Li, Guanpeng and Cappello, Franck},
+       year = {2023},
+       isbn = {979-8-4007-0109-2/23/11},
+  publisher = {Association for Computing Machinery},
+    address = {Denver, CO, USA},
+        doi = {10.1145/3581784.3607048},
+  booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis},
+   keywords = {Lossy compression; parallel computing; HPC; GPU},
+     series = {SC'23}
+}
+```
+
+## Copyright
+(C) 2023 by Argonne National Laboratory and University of Iowa. More details see [COPYRIGHT](https://github.com/szcompressor/cuSZp/blob/master/LICENSE).
+
+## Acknowledgement
+This research was supported by the Exascale Computing Project (ECP), Project Number: 17-SC-20-SC, a collaborative effort of two DOE organizations – the Office of Science and the National Nuclear Security Administration, responsible for the planning and preparation of a capable exascale ecosystem, including software, applications, hardware, advanced system engineering and early testbed platforms, to support the nation’s exascale computing imperative. The material was supported by the U.S. Department of Energy, Office of Science, Advanced Scientific Computing Research (ASCR), under contract DE-AC02-06CH11357, and supported by the National Science Foundation under Grant OAC-2003709 and OAC-2104023. We acknowledge the computing resources provided on Bebop (operated by Laboratory Computing Resource Center at Argonne) and on Theta and JLSE (operated by Argonne Leadership Computing Facility). We acknowledge the support of ARAMCO. 
diff --git a/qtensor/compression/cuszp/cuSZp/cmake/Installing.cmake b/qtensor/compression/cuszp/cuSZp/cmake/Installing.cmake
@@ -0,0 +1,67 @@
+include(GNUInstallDirs)
+
+if(DEFINED CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+    message(
+            STATUS
+            "CMAKE_INSTALL_PREFIX is not set\n"
+            "Default value: ${CMAKE_INSTALL_PREFIX}\n"
+            "Will set it to ${CMAKE_SOURCE_DIR}/install"
+    )
+    set(CMAKE_INSTALL_PREFIX
+            "${CMAKE_SOURCE_DIR}/install"
+            CACHE PATH "Where the library will be installed to" FORCE
+            )
+else()
+    message(
+            STATUS
+            "CMAKE_INSTALL_PREFIX was already set\n"
+            "Current value: ${CMAKE_INSTALL_PREFIX}"
+    )
+endif()
+
+set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER "${public_headers}")
+
+set_target_properties(${PROJECT_NAME} PROPERTIES DEBUG_POSTFIX "d")
+
+install(TARGETS ${PROJECT_NAME}
+        EXPORT "${PROJECT_NAME}Targets"
+        # these get default values from GNUInstallDirs, no need to set them
+        #RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} # bin
+        #LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} # lib
+        #ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} # lib
+        # except for public headers, as we want them to be inside a library folder
+        PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} # include/SomeProject
+        INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} # include
+        )
+
+# generate and install export file
+install(EXPORT "${PROJECT_NAME}Targets"
+        FILE "${PROJECT_NAME}Targets.cmake"
+        NAMESPACE ${namespace}::
+        DESTINATION cmake
+        )
+
+include(CMakePackageConfigHelpers)
+
+# generate the version file for the config file
+write_basic_package_version_file(
+        "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
+        VERSION "${version}"
+        COMPATIBILITY AnyNewerVersion
+)
+# create config file
+configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
+        "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
+        INSTALL_DESTINATION cmake
+        )
+# install config files
+install(FILES
+        "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
+        "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
+        DESTINATION cmake
+        )
+# generate the export targets for the build tree
+export(EXPORT "${PROJECT_NAME}Targets"
+        FILE "${CMAKE_CURRENT_BINARY_DIR}/cmake/${PROJECT_NAME}Targets.cmake"
+        NAMESPACE ${namespace}::
+        )
diff --git a/qtensor/compression/cuszp/cuSZp/examples/CMakeLists.txt b/qtensor/compression/cuszp/cuSZp/examples/CMakeLists.txt
@@ -0,0 +1,45 @@
+# Find CUDA package
+find_package(CUDA REQUIRED)
+
+set(install_dir ${PROJECT_BINARY_DIR}/examples/bin)
+set(execName_gpu_f32 "cuSZp_gpu_f32_api")
+set(execName_cpu_f32 "cuSZp_cpu_f32_api")
+set(execName_gpu_f64 "cuSZp_gpu_f64_api")
+set(execName_cpu_f64 "cuSZp_cpu_f64_api")
+set(SRC_DIR ${PROJECT_SOURCE_DIR}/src)
+set(INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
+
+# Add include and library directories
+include_directories(${INCLUDE_DIR})
+
+# Compile headers as a library
+cuda_add_library(cuSZp_libs STATIC ${SRC_DIR}/cuSZp_f32.cu
+                                   ${SRC_DIR}/cuSZp_f64.cu 
+                                   ${SRC_DIR}/cuSZp_utility.cu
+                                   ${SRC_DIR}/cuSZp_timer.cu
+                                   ${SRC_DIR}/cuSZp_entry_f32.cu
+                                   ${SRC_DIR}/cuSZp_entry_f64.cu)
+
+# Compile executable binary
+cuda_add_executable(${execName_gpu_f32} cuSZp_gpu_f32_api.cpp)
+cuda_add_executable(${execName_cpu_f32} cuSZp_cpu_f32_api.cpp)
+cuda_add_executable(${execName_gpu_f64} cuSZp_gpu_f64_api.cpp)
+cuda_add_executable(${execName_cpu_f64} cuSZp_cpu_f64_api.cpp)
+
+# Link with headers
+target_link_libraries(${execName_gpu_f32} cuSZp_libs)
+target_link_libraries(${execName_cpu_f32} cuSZp_libs)
+target_link_libraries(${execName_gpu_f64} cuSZp_libs)
+target_link_libraries(${execName_cpu_f64} cuSZp_libs)
+
+# Set output paths for the compiled binary
+set_target_properties(${execName_gpu_f32} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${install_dir})
+set_target_properties(${execName_cpu_f32} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${install_dir})
+set_target_properties(${execName_gpu_f64} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${install_dir})
+set_target_properties(${execName_cpu_f64} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${install_dir})
+
+# Set installation paths for the compiled binary.
+install(TARGETS ${execName_gpu_f32} DESTINATION bin)
+install(TARGETS ${execName_cpu_f32} DESTINATION bin)
+install(TARGETS ${execName_gpu_f64} DESTINATION bin)
+install(TARGETS ${execName_cpu_f64} DESTINATION bin)