A simple wrapper for std::unique_ptr
and std::shared_ptr
for GPU on CUDA.
All definitions are in a single header file memory.cuh, you need to include it in your project. Test programs can be found at src/test/.
#include "memory.cuh"
// creates a gpu pointer of type int* with 10 elements
// and returns a std::shared_ptr encapsulating it
auto sptr1 = cuda::shared<int>(10);
//or
std::shared_ptr<int> sptr2 = cuda::shared<int>(10);
{
// returns std::unique_ptr
auto uptr = cuda::unique<float>(100);
} // deallocation when out of scope
The header gives the following four functions to create valid GPU smart pointers:
auto ptr = cuda::shared<TYPE>(NUM_ElEMENTS) //std::shared_ptr
auto ptr = cuda::unique<TYPE>(NUM_ElEMENTS) //std::unique_ptr
auto ptr = cuda::unified_shared<TYPE>(NUM_ElEMENTS) //std::shared_ptr
auto ptr = cuda::unified_unique<TYPE>(NUM_ElEMENTS) //std::unique_ptr
1-2 creates pointers valid only on GPU, while 3-4 creates pointers valid on both CPU and GPU by using CUDA unified memory.
#include <iostream>
#include "memory.cuh"
int main() {
// creates std::shared_ptr which hold GPU memory
auto ptr1 = cuda::shared<int>(1);
auto ptr2(ptr1);
// Returns the number of shared_ptr objects
// referring to the same managed object.
std::cout << "[4] ptr1 use count: " << ptr1.use_count() << std::endl; // output: 2
std::cout << "[5] ptr2 use count: " << ptr2.use_count() << std::endl; // output: 2
// Relinquishes ownership of ptr1 on the object
// and pointer becomes NULL
ptr1.reset();
std::cout << "ptr1 relinquishes ownership" << std::endl;
std::cout << "[6] ptr1 is " << ptr1.get() << std::endl //output: 0x0000;
std::cout << "[7] ptr2 use count: " << ptr2.use_count() << std::endl; //output: 1
std::shared_ptr<int> ptr3;
{
auto ptr4 = cuda::shared<int>(1);
ptr3 = ptr4;
std::cout << "[9] ptr3 use count: " << ptr3.use_count() << std::endl; //output: 2
std::cout << "[10] ptr4 use count: " << ptr4.use_count() << std::endl; //ouput: 2
}// deallocates ptr4
std::cout << "[11] ptr3 use count: " << ptr3.use_count() << std::endl; //ouput: 1
return 0;
}
Requirements:
- CUDA and C++ (std:11)
- CMake (3.8+)
In the directory "cuda-smart-pointers" run:
./build.sh
It will build the executables at ./build/src/test/
To Run the test programs:
# in directory cuda-smart-pointers
./build/src/test/app_name
where app_name can be any one of the following: unique, shared, unified_unique, unified_shared
To Build the VS project:
- Open cmake-gui and point the source code to ".../cuda-smart-pointers", and build and binaries to repository ".../cuda-smart-pointers/build".
- Click "Configure"
- Select your visual studio version
- Click "Configure"
- Click "Generate"
To Build and Run:
- Open Project "Example.sln" in build directory
- Right click the test app you want to run and click "Set as Startup Project"
- Click "Build" from main menu, to build the project
- Click "Debug"->"Start Without Debugging" to run and see the output.