Skip to content

Commit

Permalink
[NewComm] No.9 compatiable upgrade for fused_attention op
Browse files Browse the repository at this point in the history
  • Loading branch information
jinyouzhi committed Sep 20, 2023
1 parent 0cb7a28 commit aeddb6e
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 5 deletions.
50 changes: 45 additions & 5 deletions paddle/fluid/operators/fused/fused_attention_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,13 @@
#include "paddle/fluid/distributed/collective/process_group_nccl.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#include "paddle/phi/core/distributed/nccl_comm_context.h"
#include "paddle/phi/core/flags.h"
PHI_DECLARE_bool(dynamic_static_unified_comm);
#endif

#include "paddle/fluid/distributed/collective/utils.h"
#include "paddle/phi/core/distributed/comm_context_manager.h"
#include "paddle/phi/core/errors.h"

namespace phi {
Expand Down Expand Up @@ -47,11 +52,46 @@ static void AllReduce(phi::DenseTensor &tensor, // NOLINT
auto place = dev_ctx.GetPlace();
void *recvbuff =
dev_ctx.template Alloc<T>(&tensor, tensor.numel() * sizeof(T));
auto comm =
paddle::platform::NCCLCommContext::Instance().Get(ring_id, place);
auto stream = dev_ctx.stream();
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclAllReduce(
sendbuff, recvbuff, numel, dtype, ncclSum, comm->comm(), stream));
gpuStream_t stream = nullptr;
platform::NCCLComm *comm = nullptr;
phi::distributed::NCCLCommContext *comm_ctx = nullptr;

const auto &comm_context_manager =
phi::distributed::CommContextManager::GetInstance();

if (FLAGS_dynamic_static_unified_comm) {
// Use New Communication Library
PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)),
true,
platform::errors::InvalidArgument(
"You choose to use new communication library by "
"setting environment "
"variable FLAGS_dynamic_static_unified_comm True. "
"But ring_id(%d) is "
"not found in comm_context_manager.",
std::to_string(ring_id)));
comm_ctx = static_cast<phi::distributed::NCCLCommContext *>(
comm_context_manager.Get(std::to_string(ring_id)));
PADDLE_ENFORCE_NE(comm_ctx,
nullptr,
platform::errors::Unavailable(
"NCCLCommContext is nullptr, collective op should "
"has ring_id attr."));

stream = comm_ctx->GetStream();
VLOG(3) << "new comm_context_manager has ring_id" << ring_id;
} else {
comm = paddle::platform::NCCLCommContext::Instance().Get(ring_id, place);

stream = dev_ctx.stream();
VLOG(3) << "old NCCLCommContext has ring_id " << ring_id;
}
if (comm_ctx) {
comm_ctx->AllReduce(&tensor, tensor, ncclSum, stream);
} else {
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclAllReduce(
sendbuff, recvbuff, numel, dtype, ncclSum, comm->comm(), stream));
}
}
#else
PADDLE_THROW(phi::errors::Unimplemented(
Expand Down
10 changes: 10 additions & 0 deletions test/legacy_test/test_fused_attention_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import unittest

import numpy as np
Expand All @@ -31,6 +32,7 @@

class TestFusedAttentionOp(OpTest):
def setUp(self):
self.with_new_comm()
self.config()
self.generate_input_data()

Expand Down Expand Up @@ -79,6 +81,9 @@ def setUp(self):
paddle.set_default_dtype(self.x_type)
self.dropout = Dropout(self.dropout_prob, mode="upscale_in_train")

def with_new_comm(self):
os.environ["FLAGS_dynamic_static_unified_comm"] = "0"

def config(self):
self.x_type = np.float32
self.attn_mask_type = np.float64
Expand Down Expand Up @@ -350,6 +355,11 @@ def test_fused_attention_op(self):
)


class TestFusedAttentionOpWithNewComm(TestFusedAttentionOp):
def with_new_comm(self):
os.environ["FLAGS_dynamic_static_unified_comm"] = "1"


class TestFusedAttentionOpBiasIsNone(TestFusedAttentionOp):
def config(self):
super().config()
Expand Down

0 comments on commit aeddb6e

Please sign in to comment.