Skip to content

Commit

Permalink
update, events appended in rocmtracer first, then add via rocmtraceco…
Browse files Browse the repository at this point in the history
…llector in gputracer
  • Loading branch information
cj401-ai committed Nov 26, 2024
1 parent bdc7778 commit d7c71e8
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 48 deletions.
42 changes: 24 additions & 18 deletions xla/backends/profiler/gpu/device_tracer_rocm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ namespace se = ::stream_executor;
// GpuTracer for ROCm GPU.
class GpuTracer : public profiler::ProfilerInterface {
public:
GpuTracer() {
// se::rocprofiler_force_configure
GpuTracer(RocmTracer* rocmtracer) : rocm_tracer_(rocmtracer) {
LOG(ERROR) << "GpuTrace with rocprofv3...\n";
Start();
LOG(INFO) << "GpuTracer created...";
Expand Down Expand Up @@ -120,26 +119,24 @@ RocmTraceCollectorOptions GpuTracer::GetRocmTraceCollectorOptions(
}

absl::Status GpuTracer::DoStart() {
/*
if (!rocm_tracer_->IsAvailable()) {
return tsl::errors::Unavailable("Another profile session running.");
}
*/

// AnnotationStack::Enable(true);

/*
RocmTraceCollectorOptions trace_collector_options =
GetRocmTraceCollectorOptions(rocm_tracer_->NumGpus());
uint64_t start_gputime_ns = rocm_tracer_->GetTimestamp();
uint64_t start_walltime_ns = tsl::EnvTime::NowNanos();
rocm_trace_collector_ = CreateRocmCollector(
trace_collector_options, start_walltime_ns, start_gputime_ns);

RocmTracerOptions tracer_options = GetRocmTracerOptions();
rocm_tracer_->Enable(tracer_options, rocm_trace_collector_.get());
*/
LOG(ERROR) << "cj rocm_tracer_collector = " << rocm_trace_collector_.get();
LOG(ERROR) << "cj rocm_tracer_ collector = " << rocm_tracer_->get_collector();
// RocmTracerOptions tracer_options = GetRocmTracerOptions();
// rocm_tracer_->Enable(tracer_options, rocm_trace_collector_.get());

// LOG(ERROR) << "cj rocm_tracer_collector = " << rocm_trace_collector_.get();
// LOG(ERROR) << "cj rocm_tracer_ collector = " << rocm_tracer_->get_collector();
// LOG(ERROR) << "cj check XSpace = " << space;
LOG(ERROR) << "DO START ...";

Expand All @@ -151,8 +148,14 @@ absl::Status GpuTracer::DoStart() {
return absel::;
}
*/
rocm_tracer_->setup();
rocm_tracer_->start();
for (auto& event: rocm_tracer_->GetEvents()) {
rocm_trace_collector_->AddEvent(std::move(event));
}
LOG(ERROR) << "DO START after moving events...";
rocm_trace_collector_->Flush();
LOG(ERROR) << "DO START after flush...";

LOG(ERROR) << "Export XSpace after flush...";
return absl::OkStatus();
}

Expand All @@ -168,8 +171,8 @@ absl::Status GpuTracer::Start() {
}

absl::Status GpuTracer::DoStop() {
rocm_tracer_->stop();
rocm_tracer_->shutdown();
// rocm_tracer_->stop();
// rocm_tracer_->shutdown();
return absl::OkStatus();
}

Expand All @@ -182,6 +185,9 @@ absl::Status GpuTracer::Stop() {
}

absl::Status GpuTracer::CollectData(XSpace* space) {
if (rocm_trace_collector_) rocm_trace_collector_->Export(space);
LOG(ERROR) << "CollectData XSpace = " << space;

switch (profiling_state_) {
case State::kNotStarted:
VLOG(3) << "No trace data collected, session wasn't started";
Expand All @@ -196,7 +202,7 @@ absl::Status GpuTracer::CollectData(XSpace* space) {
VLOG(3) << "No trace data collected";
return absl::OkStatus();
case State::kStoppedOk: {
if (rocm_trace_collector_) rocm_trace_collector_->Export(space);
// if (rocm_trace_collector_) rocm_trace_collector_->Export(space);
return absl::OkStatus();
}
}
Expand All @@ -211,15 +217,15 @@ std::unique_ptr<profiler::ProfilerInterface> CreateGpuTracer(
return nullptr;
}

/*
profiler::RocmTracer* rocm_tracer =
profiler::RocmTracer::GetRocmTracerSingleton();
LOG(ERROR) << "cj rocm_tracer is available = " << rocm_tracer->IsAvailable();
LOG(ERROR) << "Traced events = " << rocm_tracer->GetEvents().size();
if (!rocm_tracer->IsAvailable()) {
return nullptr;
}
*/
return std::make_unique<profiler::GpuTracer>();

return std::make_unique<profiler::GpuTracer>(rocm_tracer);
}

auto register_rocm_gpu_tracer_factory = [] {
Expand Down
6 changes: 3 additions & 3 deletions xla/backends/profiler/gpu/rocm_collector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -455,9 +455,9 @@ class RocmTraceCollectorImpl : public profiler::RocmTraceCollector {
};

void RocmTraceCollectorImpl::AddEvent(RocmTracerEvent&& event) {
LOG(ERROR) << "Starting RocmTraceCollectorImpl::AddEvent";
// mutex_lock lock(event_maps_mutex_);
// events_.push_back(std::move(event));
// LOG(ERROR) << "Starting RocmTraceCollectorImpl::AddEvent";
mutex_lock lock(event_maps_mutex_);
events_.push_back(std::move(event));
}

void RocmTraceCollectorImpl::Flush() {
Expand Down
67 changes: 40 additions & 27 deletions xla/backends/profiler/gpu/rocm_tracer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,16 @@ extern "C" rocprofiler_tool_configure_result_t* rocprofiler_configure(
rocprofiler_client_id_t* id
);

auto rocmtracer_singleton = xla::profiler::RocmTracer::GetRocmTracerSingleton();
// auto rocmtracer_singleton = xla::profiler::RocmTracer::GetRocmTracerSingleton();

template <typename Tp = std::string_view>
using buffer_name_info_t = rocprofiler::sdk::utility::name_info<rocprofiler_buffer_tracing_kind_t, Tp>;

namespace se = ::stream_executor;

namespace xla {
namespace profiler {
namespace profiler {

namespace {
using xla::common::buffer_name_info;
using xla::common::call_stack_t;
Expand All @@ -93,6 +94,8 @@ RocmTraceCollectorOptions GetRocmTraceCollectorOptions(
return options;
}



void
tool_code_object_callback(rocprofiler_callback_tracing_record_t record,
rocprofiler_user_data_t* user_data,
Expand Down Expand Up @@ -167,6 +170,7 @@ rocm_get_buffer_tracing_names()
return cb_name_info;
}


void
tool_tracing_callback(rocprofiler_context_id_t context,
rocprofiler_buffer_id_t buffer_id,
Expand All @@ -179,8 +183,8 @@ tool_tracing_callback(rocprofiler_context_id_t context,
assert(drop_count == 0 && "drop count should be zero for lossless policy");

auto rocmtracer_singleton = xla::profiler::RocmTracer::GetRocmTracerSingleton();
LOG(ERROR) << "rocmtracer_singleton = " << rocmtracer_singleton;

/*
static bool first_cb = true;
if (rocmtracer_singleton->IsAvailable() && first_cb) {
Expand All @@ -192,6 +196,7 @@ tool_tracing_callback(rocprofiler_context_id_t context,
rocmtracer_singleton->Enable(rocm_trace_collector_.get());
first_cb = false;
}
*/

/*
if(num_headers == 0)
Expand Down Expand Up @@ -337,15 +342,36 @@ tool_tracing_callback(rocprofiler_context_id_t context,
record->thread_id,
0};

LOG(ERROR) << "CJ after tmp : " << info.str();
LOG(ERROR) << "CJ number of GPU = " << rocmtracer_singleton->NumGpus();
LOG(ERROR) << "cj collector = " << rocmtracer_singleton->get_collector();
rocmtracer_singleton->AppendEvent(tmp);
// LOG(ERROR) << "CJ after tmp : " << info.str();
// LOG(ERROR) << "CJ number of GPU = " << rocmtracer_singleton->NumGpus();
// LOG(ERROR) << "cj collector = " << rocmtracer_singleton->get_collector();

// xla::profiler::all_rocm_events_1.push_back(tmp);

/*
for (auto &event: all_rocm_events) {
std::ostringstream oss;
// oss << "correlation_id=" << event.correlation_id;
// oss << ",type=" << GetRocmTracerEventTypeName(event.type);
// oss << ",source=" << GetRocmTracerEventSourceName(event.source);
// oss << ",domain=" << GetRocmTracerEventDomainName(event.domain);
oss << ",name=" << event.name;
oss << ",duration=" << (event.end_time_ns - event.start_time_ns) / 1000;
oss << ",device_id=" << event.device_id;
oss << ",thread_id=" << event.thread_id;
oss << ",stream_id=" << event.stream_id;
LOG(ERROR) << oss.str();
}
*/
/*
if (rocmtracer_singleton && rocmtracer_singleton->get_collector()) {
rocmtracer_singleton->get_collector()->AddEvent(std::move(tmp));
} else {
LOG(ERROR) << "Collector not initialized";
}
*/
}
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)
Expand Down Expand Up @@ -519,8 +545,8 @@ void RocmTracer::stop(){
ROCPROFILER_CALL(se::wrap::rocprofiler_stop_context(client_ctx), "context stop");
}


/* static */ RocmTracer* RocmTracer::GetRocmTracerSingleton() {
/*
RocmTracer* RocmTracer::GetRocmTracerSingleton() {
LOG(INFO) << "Entering GetRocmTracerSingleton...";
static std::once_flag flag;
Expand All @@ -538,10 +564,15 @@ void RocmTracer::stop(){
abort(); // Ensure the program stops if initialization fails.
}
LOG(INFO) << "Returning RocmTracer singleton instance.";
LOG(INFO) << "Returning RocmTracer singleton instance." << instance;
return instance;
}
*/

/* static */ RocmTracer* RocmTracer::GetRocmTracerSingleton() {
static auto* singleton = new RocmTracer();
return singleton;
}

bool RocmTracer::IsAvailable() const {
return GetRocmTracerSingleton() != nullptr;
Expand Down Expand Up @@ -602,24 +633,6 @@ rocprofiler_configure(uint32_t version,
// store client info
xla::profiler::client_id = id;
LOG(ERROR) << "Configure rocprofiler-sdk...\n";

// auto rocmtracer_singleton = xla::profiler::RocmTracer::GetRocmTracerSingleton();
// LOG(ERROR) << "cj -1 rocprofiler_configure() with rocm collector";
/*
auto trace_collector_options = GetRocmTraceCollectorOptions(rocmtracer_singleton->NumGpus());
LOG(ERROR) << "cj 0 rocprofiler_configure() with rocm collector";
uint64_t start_gputime_ns = rocmtracer_singleton->GetTimestamp();
LOG(ERROR) << "cj 1 rocprofiler_configure() with rocm collector";
uint64_t start_walltime_ns = tsl::EnvTime::NowNanos();
auto rocm_trace_collector_ = xla::profiler::CreateRocmCollector(
trace_collector_options, start_walltime_ns, start_gputime_ns);
LOG(ERROR) << "cj 2 rocprofiler_configure() with rocm collector";
auto tracer_options = GetRocmTracerOptions();
LOG(ERROR) << "cj 3 rocprofiler_configure() with rocm collector";
rocmtracer_singleton->Enable(tracer_options, rocm_trace_collector_.get());
LOG(ERROR) << "cj 4 rocprofiler_configure() with rocm collector";
*/

// compute major/minor/patch version info
uint32_t major = version / 10000;
Expand Down
5 changes: 5 additions & 0 deletions xla/backends/profiler/gpu/rocm_tracer.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ limitations under the License.
namespace xla {
namespace profiler {

// std::vector<RocmTracerEvent> all_rocm_events_1;

struct RocmTracerOptions {
std::set<uint32_t> api_tracking_set; // actual api set we want to profile

Expand All @@ -65,6 +67,8 @@ class RocmTracer {
static int NumGpus();
void Enable(RocmTraceCollector* collector);
RocmTraceCollector* get_collector() { return collector_; }
void AppendEvent(RocmTracerEvent event) { rocm_events_.push_back(event); }
RocmTracerEvent_t GetEvents() {return rocm_events_;}

void setup() CLIENT_API;
void start() CLIENT_API;
Expand All @@ -80,6 +84,7 @@ class RocmTracer {
int num_gpus_;
// std::optional<RocmTracerOptions> options_;
RocmTraceCollector* collector_ = nullptr;
RocmTracerEvent_t rocm_events_;
static tsl::mutex mtx;

public:
Expand Down

0 comments on commit d7c71e8

Please sign in to comment.