Skip to content

Commit

Permalink
[XLA:GPU] Always use GpuAsyncTracker.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 705095500
  • Loading branch information
golechwierowicz authored and Google-ML-Automation committed Dec 13, 2024
1 parent f5d511b commit a22c0c7
Show file tree
Hide file tree
Showing 5 changed files with 4 additions and 34 deletions.
6 changes: 0 additions & 6 deletions xla/debug_options_flags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
opts.set_xla_gpu_shape_checks(DebugOptions::RUNTIME);
opts.set_xla_dump_latency_hiding_schedule(false);
opts.set_xla_gpu_enable_latency_hiding_scheduler(false);
opts.set_xla_gpu_lhs_enable_gpu_async_tracker(true);
opts.set_xla_gpu_enable_analytical_latency_estimator(false);
opts.set_xla_gpu_pgle_profile_file_or_directory_path("");
opts.set_xla_gpu_memory_limit_slop_factor(95);
Expand Down Expand Up @@ -1566,11 +1565,6 @@ void MakeDebugOptionsFlags(std::vector<tsl::Flag>* flag_list,
&DebugOptions::set_xla_gpu_pgle_profile_file_or_directory_path),
debug_options->xla_gpu_pgle_profile_file_or_directory_path(),
"Directory or file for PGLE profiles in XLA:GPU"));
flag_list->push_back(tsl::Flag(
"xla_gpu_lhs_enable_gpu_async_tracker",
bool_setter_for(&DebugOptions::set_xla_gpu_lhs_enable_gpu_async_tracker),
debug_options->xla_gpu_lhs_enable_gpu_async_tracker(),
"Enable GPU async tracker for latency-hiding scheduler in XLA:GPU"));
flag_list->push_back(tsl::Flag(
"xla_gpu_memory_limit_slop_factor",
int32_setter_for(&DebugOptions::set_xla_gpu_memory_limit_slop_factor),
Expand Down
7 changes: 1 addition & 6 deletions xla/service/gpu/gpu_hlo_schedule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -514,12 +514,7 @@ absl::Status RunLatencyHidingSchedulerPasses(
return GetSizeOfShape(shape, pointer_size);
};

const DebugOptions& options = module->config().debug_options();
auto async_tracker = [&]() -> std::unique_ptr<AsyncTracker> {
return options.xla_gpu_lhs_enable_gpu_async_tracker()
? std::make_unique<GpuAsyncTracker>(config)
: std::make_unique<GpuAsyncTrackerBase>(config);
}();
auto async_tracker = std::make_unique<GpuAsyncTracker>(config);

HloPassPipeline pipeline("latency-hiding-scheduler");
std::unique_ptr<LatencyEstimator> latency_estimator = GetLatencyEstimator(
Expand Down
22 changes: 2 additions & 20 deletions xla/service/gpu/gpu_hlo_schedule_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ class GpuHloScheduleTest : public HloTestBase {

struct TestConfig {
bool enable_latency_hiding_scheduler = false;
bool enable_gpu_async_tracker = false;
bool enable_pipelined_p2p = false;
std::string fdo_profile = "";
};
Expand All @@ -88,8 +87,6 @@ class GpuHloScheduleTest : public HloTestBase {
DebugOptions debug_options = GetDebugOptionsForTest();
debug_options.set_xla_gpu_enable_latency_hiding_scheduler(
test_config.enable_latency_hiding_scheduler);
debug_options.set_xla_gpu_lhs_enable_gpu_async_tracker(
test_config.enable_gpu_async_tracker);
debug_options.set_xla_gpu_enable_pipelined_p2p(
test_config.enable_pipelined_p2p);
config.set_debug_options(debug_options);
Expand Down Expand Up @@ -510,7 +507,6 @@ TEST_F(GpuHloScheduleTest, ProfileGuidedCostModel) {
for (const SubTest& subtest : subtests) {
TestConfig test_config;
test_config.enable_latency_hiding_scheduler = true;
test_config.enable_gpu_async_tracker = true;
test_config.fdo_profile = subtest.profile;
TF_ASSERT_OK_AND_ASSIGN(
auto module,
Expand Down Expand Up @@ -573,7 +569,6 @@ TEST_F(GpuHloScheduleTest, ProfileGuidedCostModelFailsWithIncompleteProfile) {

TestConfig test_config;
test_config.enable_latency_hiding_scheduler = true;
test_config.enable_gpu_async_tracker = true;
test_config.fdo_profile = kProfile;
TF_ASSERT_OK_AND_ASSIGN(
auto module,
Expand Down Expand Up @@ -634,7 +629,6 @@ TEST_F(

TestConfig test_config;
test_config.enable_latency_hiding_scheduler = true;
test_config.enable_gpu_async_tracker = true;
test_config.fdo_profile = kProfile;
TF_ASSERT_OK_AND_ASSIGN(
auto module,
Expand Down Expand Up @@ -692,7 +686,6 @@ TEST_F(GpuHloScheduleTest, ProfileGuidedCostModelWithRematData) {
)pb";
TestConfig test_config;
test_config.enable_latency_hiding_scheduler = true;
test_config.enable_gpu_async_tracker = true;
test_config.fdo_profile = ar_long_latency_proto_text;
TF_ASSERT_OK_AND_ASSIGN(
auto module,
Expand Down Expand Up @@ -876,7 +869,6 @@ TEST_F(GpuHloScheduleTest, LHSSendRecvPairs2) {

TestConfig test_config;
test_config.enable_latency_hiding_scheduler = true;
test_config.enable_gpu_async_tracker = true;
test_config.enable_pipelined_p2p = true;
TF_ASSERT_OK_AND_ASSIGN(
auto module,
Expand Down Expand Up @@ -973,7 +965,6 @@ TEST_F(GpuHloScheduleTest, LHSSendRecvAllReduce) {

TestConfig test_config;
test_config.enable_latency_hiding_scheduler = true;
test_config.enable_gpu_async_tracker = true;
test_config.enable_pipelined_p2p = true;
TF_ASSERT_OK_AND_ASSIGN(
auto module,
Expand Down Expand Up @@ -1095,7 +1086,6 @@ TEST_F(GpuHloScheduleTest, LHSSendRecvPipelined1) {

TestConfig test_config;
test_config.enable_latency_hiding_scheduler = true;
test_config.enable_gpu_async_tracker = true;
test_config.enable_pipelined_p2p = true;
TF_ASSERT_OK_AND_ASSIGN(
auto module,
Expand Down Expand Up @@ -1291,7 +1281,6 @@ TEST_F(GpuHloScheduleTest, LHSSendRecvPipelined2) {

TestConfig test_config;
test_config.enable_latency_hiding_scheduler = true;
test_config.enable_gpu_async_tracker = true;
test_config.enable_pipelined_p2p = true;
TF_ASSERT_OK_AND_ASSIGN(
auto module,
Expand Down Expand Up @@ -1520,7 +1509,7 @@ TEST_P(GpuHloScheduleParameterizedTest, AsyncAllReduce) {
EXPECT_TRUE(HasValidFingerprint(module.get()));
}

TEST_P(GpuHloScheduleParameterizedTest, LHSResourceModel) {
TEST_F(GpuHloScheduleTest, LHSResourceModel) {
const char* hlo_text = R"(
HloModule AsyncModule
apply_op {
Expand Down Expand Up @@ -1559,19 +1548,13 @@ TEST_P(GpuHloScheduleParameterizedTest, LHSResourceModel) {
ROOT t = (f32[32], f32[64], f32[32,32]) tuple(ar-done, %ag-done, add5)
})";

const bool enable_gpu_async_tracker = GetParam();
TestConfig test_config;
test_config.enable_latency_hiding_scheduler = true;
test_config.enable_gpu_async_tracker = GetParam();
TF_ASSERT_OK_AND_ASSIGN(
auto module,
ParseAndReturnVerifiedModule(hlo_text, GetModuleConfig(test_config)));
SequentialHloOrdering order = BuildHloOrdering(module.get());

// Count the number of collectives in flight. Without gpu async tracker, we
// will incorrectly have 2 in-flight (as base async tracker assumes each
// collective can be scheduled independently as they use different resource
// types), but with gpu async tracker we will have 1.
uint32_t in_flight = 0;
uint32_t max_in_flight = 0;
for (const HloInstruction* inst :
Expand All @@ -1584,8 +1567,7 @@ TEST_P(GpuHloScheduleParameterizedTest, LHSResourceModel) {
}
}

const uint32_t expected_max_in_flight = enable_gpu_async_tracker ? 1 : 2;
EXPECT_EQ(expected_max_in_flight, max_in_flight);
EXPECT_EQ(max_in_flight, 1);
EXPECT_TRUE(HasValidFingerprint(module.get()));
}

Expand Down
1 change: 0 additions & 1 deletion xla/service/gpu/gpu_latency_hiding_scheduler_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ class GpuLatencyHidingSchedulerBaseTest : public HloTestBase {
HloModuleConfig config;
DebugOptions debug_options = GetDebugOptionsForTest();
debug_options.set_xla_gpu_enable_latency_hiding_scheduler(true);
debug_options.set_xla_gpu_lhs_enable_gpu_async_tracker(true);
debug_options.set_xla_gpu_enable_experimental_pipeline_parallelism_opt(
enable_experimental_pipeline_parallelism_opt);
config.set_debug_options(debug_options);
Expand Down
2 changes: 1 addition & 1 deletion xla/xla.proto
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ message DebugOptions {
bool xla_gpu_enable_highest_priority_async_stream = 216;
bool xla_gpu_enable_analytical_latency_estimator = 255;

bool xla_gpu_lhs_enable_gpu_async_tracker = 204;
reserved 204; // Was xla_gpu_lhs_enable_gpu_async_tracker.
string xla_gpu_pgle_profile_file_or_directory_path = 210;
int32 xla_gpu_memory_limit_slop_factor = 260;

Expand Down

0 comments on commit a22c0c7

Please sign in to comment.