Skip to content

Commit

Permalink
Fix Memory Leak In AIO (#6630)
Browse files Browse the repository at this point in the history
Fixing a memory leak in AIO pinned tensor as well as an incorrect
function type for gds op.

---------

Co-authored-by: Masahiro Tanaka <[email protected]>
  • Loading branch information
jomayeri and tohtana authored Oct 18, 2024
1 parent c9fc34a commit 6eefc3d
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 6 deletions.
5 changes: 4 additions & 1 deletion csrc/aio/py_lib/deepspeed_cpu_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ void cpu_op_desc_t::finish()
{
if (_use_bounce_buffer) {
if (_read_op) {
if (_buffer.is_cuda()) { _buffer.copy_(_cpu_buffer.to(torch::kCUDA)); }
if (_buffer.is_cuda()) {
_buffer.copy_(_cpu_buffer.to(torch::Device(torch::kCUDA, _buffer.get_device()),
/*non_blocking=*/true));
}
if (_buffer.is_xpu()) { _buffer.copy_(_cpu_buffer.to(torch::kXPU)); }
if (_buffer.is_cpu()) { _buffer.copy_(_cpu_buffer); }
#if defined(__ENABLE_CANN__)
Expand Down
2 changes: 2 additions & 0 deletions csrc/aio/py_lib/deepspeed_pin_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ deepspeed_pin_tensor_t::~deepspeed_pin_tensor_t()
{
for (auto iter = _locked_tensors.begin(); iter != _locked_tensors.end(); ++iter) {
munlock(iter->first, iter->second);
std::free((void*)iter->first);
}
_locked_tensors.clear();
}
Expand Down Expand Up @@ -43,6 +44,7 @@ bool deepspeed_pin_tensor_t::free(torch::Tensor& locked_tensor)
auto addr = locked_tensor.data_ptr();
if (_locked_tensors.find(addr) != _locked_tensors.end()) {
munlock(addr, _locked_tensors[addr]);
std::free(addr);
_locked_tensors.erase(addr);
return true;
}
Expand Down
3 changes: 1 addition & 2 deletions csrc/gds/py_lib/deepspeed_gds_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ void gds_op_desc_t::add_buffer_to_registry(const torch::Tensor& buffer)
const int64_t device = buffer.get_device();
void* reg_ptr = buffer.data_ptr();

// std::cout << "REG PTR " << reg_ptr << std::endl;
// TODO: add checking to make sure pointer isn't already in set
const auto it = base_ptr_registry.find(device);
if (it == base_ptr_registry.end()) {
Expand Down Expand Up @@ -94,7 +93,7 @@ gds_op_desc_t::gds_op_desc_t(const bool read_op,
const torch::Tensor& buffer,
const int fd,
const char* filename,
const long long int file_num_bytes,
const int64_t file_num_bytes,
const int intra_op_parallelism,
const bool validate)
: io_op_desc_t(read_op, buffer, fd, filename, file_num_bytes, intra_op_parallelism, validate)
Expand Down
2 changes: 1 addition & 1 deletion csrc/gds/py_lib/deepspeed_gds_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ struct gds_op_desc_t : io_op_desc_t {
const torch::Tensor& buffer,
const int fd,
const char* filename,
const long long int file_num_bytes,
const int64_t file_num_bytes,
const int intra_op_parallelism,
const bool validate);

Expand Down
2 changes: 1 addition & 1 deletion csrc/gds/py_lib/deepspeed_py_gds_handle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ std::shared_ptr<struct io_op_desc_t> deepspeed_gds_handle_t::_create_io_op_desc(
const torch::Tensor& buffer,
const int fd,
const char* filename,
const long long int file_num_bytes,
const int64_t file_num_bytes,
const bool validate)
{
if (buffer.is_cuda()) {
Expand Down
2 changes: 1 addition & 1 deletion csrc/gds/py_lib/deepspeed_py_gds_handle.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ struct deepspeed_gds_handle_t : deepspeed_io_handle_t {
const torch::Tensor& buffer,
const int fd,
const char* filename,
const long long int file_num_bytes,
const int64_t file_num_bytes,
const bool validate);

static int s_cuFile_init;
Expand Down

0 comments on commit 6eefc3d

Please sign in to comment.