Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GC] Add IO related size policy for Parallel GC #847

Merged
merged 1 commit into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/hotspot/share/gc/parallel/parallelArguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,16 @@ void ParallelArguments::initialize() {
if (FLAG_IS_DEFAULT(MaxHeapFreeRatio)) {
FLAG_SET_DEFAULT(MaxHeapFreeRatio, 100);
}
if (FLAG_IS_DEFAULT(UseIOPrioritySizePolicy)) {
FLAG_SET_DEFAULT(UseIOPrioritySizePolicy, true);
}
}

if (UseIOPrioritySizePolicy && !UseAdaptiveSizePolicy) {
// User explicitly enable UseIOPrioritySizePolicy but disable UseAdaptiveSizePolicy,
// this is a wrong usage.
UseIOPrioritySizePolicy = false;
warning("UseIOPrioritySizePolicy requires UseAdaptiveSizePolicy to be enabled");
}

// If InitialSurvivorRatio or MinSurvivorRatio were not specified, but the
Expand Down
8 changes: 7 additions & 1 deletion src/hotspot/share/gc/parallel/parallel_globals.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@
"Delay in scheduling GC workers (in milliseconds)") \
\
product(bool, PSChunkLargeArrays, true, \
"Process large arrays in chunks")
"Process large arrays in chunks") \
\
product(bool, UseIOPrioritySizePolicy, false, \
"eagerly decrease heap when io wait is high") \
\
product(float, IOPrioritySizePolicyEdenScale, 8.0, \
"how much eden to decrease when io wait is high") \

#endif // SHARE_GC_PARALLEL_PARALLEL_GLOBALS_HPP
288 changes: 288 additions & 0 deletions src/hotspot/share/gc/parallel/psAdaptiveSizePolicy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,249 @@

#include <math.h>

class IOPolicy : public CHeapObj<mtGC> {
class IOWaitRecord {
public:
IOWaitRecord(bool *active) : _active(active), _last_cpu_total(0), _last_cpu_iowait(0) {}
void start() {
fill_value(&_last_cpu_total, &_last_cpu_iowait);
}
double stop() {
size_t total, iowait;
fill_value(&total, &iowait);

size_t total_diff = total - _last_cpu_total;
size_t iowait_diff = iowait - _last_cpu_iowait;
if (total_diff == 0) {
log_debug(gc, ergo, heap)("fail to record, cpu total diff is 0");
return 0;
} else {
return (double)iowait_diff / (double)total_diff;
}
}
private:
// if anything unexpected happened during record, we will deactivate the policy
bool *_active;
size_t _last_cpu_total;
size_t _last_cpu_iowait;
void fill_value_fail(FILE *file) {
if (file != NULL) {
fclose(file);
}
log_warning(gc, ergo, heap)("Deactivate UseIOPrioritySizePolicy due to failed to parse cpu stat");
*_active = false;
}
void fill_value(size_t *total, size_t *iowait) {
FILE *file = fopen("/proc/stat", "r");
if (file == NULL) {
fill_value_fail(file);
return;
}

char line[256];
char *read_line = fgets(line, sizeof(line), file);
if (read_line == NULL) {
fill_value_fail(file);
return;
}

weixlu marked this conversation as resolved.
Show resolved Hide resolved
/*
* Expected stdout of the first line of /proc/stat should be like:
* cat /proc/stat
* cpu 417487649 75106 102895030 23107566512 152075 65480092 6013218 0 0 0
*/
size_t user, nice, system, idle, iowait_time, irq, softirq, steal, guest, guest_nice;
int parse_line = sscanf(line, "cpu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", &user,
&nice, &system, &idle, &iowait_time, &irq, &softirq, &steal, &guest, &guest_nice);
if (parse_line != 10) {
fill_value_fail(file);
return;
}

*total = user + nice + system + idle + iowait_time + irq + softirq + steal + guest + guest_nice;
*iowait = iowait_time;
fclose(file);
}
};

class UserTimeRecord {
public:
UserTimeRecord(bool *active) : _active(active), _starting_user_time(0), _starting_system_time(0), _starting_real_time(0) {}
void start() {
if (!os::getTimesSecs(&_starting_real_time, &_starting_user_time, &_starting_system_time)) {
log_warning(gc, ergo, heap)("Deactivate UseIOPrioritySizePolicy due to failed to get cpu times");
*_active = false;
}
}
double stop() {
const static double INVALID = 99999;
double real_time, user_time, system_time;
if (!os::getTimesSecs(&real_time, &user_time, &system_time)) {
log_warning(gc, ergo, heap)("Deactivate UseIOPrioritySizePolicy due to failed to get cpu times");
*_active = false;
return INVALID;
}
double user_diff = user_time - _starting_user_time;
double real_diff = real_time - _starting_real_time;
// too short interval to calculate a meaningful user time percent, thus we
// return a very large number to avoid trigger memory reduction.
if (real_diff < 0.00001) {
log_debug(gc, ergo, heap)("fail to record, real_duration too small: %f", real_diff);
return INVALID;
}
return user_diff / real_diff;
}
private:
// if anything unexpected happened during record, we will deactivate the policy
bool *_active;
double _starting_user_time;
double _starting_system_time;
double _starting_real_time;
};

double _default_throughput_goal;

double _mutator_iowait_percent;
double _mutator_user_percent;
elapsedTimer _io_triggerred_major_gc_timer;

IOWaitRecord _io_wait_record;
UserTimeRecord _user_time_record;

bool _active;
bool _should_reduce_heap;

public:
IOPolicy(double default_throughput_goal) :
_default_throughput_goal(default_throughput_goal),
_mutator_iowait_percent(0.0),
_mutator_user_percent(0.0),
_io_triggerred_major_gc_timer(),
_io_wait_record(&_active),
_user_time_record(&_active),
_active(true),
_should_reduce_heap(false)
{
_io_triggerred_major_gc_timer.start();
start_mutator_record();
if (FLAG_IS_CMDLINE(NewSize)) {
log_debug(gc, ergo, heap)("NewSize or Xmn is set, which may introduce a large size for min young size");
}
if (MaxHeapSize == InitialHeapSize) {
log_debug(gc, ergo, heap)("Xmx is equal to Xms, which may introduce a large size for min young size");
}
log_debug(gc, ergo, heap)("min size: young " SIZE_FORMAT "M, old " SIZE_FORMAT "M. "
"IOPrioritySizePolicy can't decrease heap below these sizes",
ParallelScavengeHeap::young_gen()->min_gen_size()/M,
ParallelScavengeHeap::old_gen()->min_gen_size()/M);
}

void start_mutator_record() {
if (!_active) {
return;
}
_io_wait_record.start();
_user_time_record.start();
}

void stop_mutator_record() {
if (!_active) {
return;
}
_mutator_iowait_percent = _io_wait_record.stop();
_mutator_user_percent = _user_time_record.stop();
}

void print(double mutator_cost) const {
if (!_active) {
return;
}
log_debug(gc, ergo, heap)("mutator cost: %f, iowait : %f, user: %f", mutator_cost, _mutator_iowait_percent, _mutator_user_percent);
}

bool should_full_GC() {
if (!_active) {
return false;
}

// These thresholds are tuned by spark on TPC-DS workload.
const static double IOTriggerredFullGCUserThreshold = 0.75;
const static double IOTriggerredFullGCIOWaitThreshold = 0.4;
const static double IOTriggerredFullGCMinInterval = 60; // can be set longer if io heavy workload lasts long.

if (_mutator_user_percent < IOTriggerredFullGCUserThreshold &&
_mutator_iowait_percent > IOTriggerredFullGCIOWaitThreshold) {
_io_triggerred_major_gc_timer.stop();
if (_io_triggerred_major_gc_timer.seconds() >
IOTriggerredFullGCMinInterval) {
_io_triggerred_major_gc_timer.reset();
_io_triggerred_major_gc_timer.start();
log_debug(gc, ergo, heap)("decrease old gen by full gc");
return true;
} else {
log_debug(gc, ergo, heap)(
"decrease old gen FAILED because interval is %f < %f",
_io_triggerred_major_gc_timer.seconds(), IOTriggerredFullGCMinInterval);
_io_triggerred_major_gc_timer.start();
return false;
}
}
return false;
}

double calculate_reduced_throughput_goal() {
if (!_active) {
return _default_throughput_goal;
}

const static double UserThreshold = 1.0;
const static double IOWaitThreshold = 0.1;

if (_mutator_user_percent < UserThreshold &&
_mutator_iowait_percent > IOWaitThreshold) {
double reduced_throughput_goal = _default_throughput_goal - (1 - _mutator_user_percent);
_should_reduce_heap = true;
log_debug(gc, ergo, heap)("decrease throughput goal to %.3f", reduced_throughput_goal);
return reduced_throughput_goal;
} else {
_should_reduce_heap = false;
return _default_throughput_goal;
}
}

size_t calculate_reduced_eden_size(size_t eden_size, float avg_survivor, size_t current_eden_size) const {
if (!_active || !_should_reduce_heap) {
return eden_size;
}
size_t reduced_size;
reduced_size = MIN(eden_size, avg_survivor * IOPrioritySizePolicyEdenScale);
reduced_size = MAX(reduced_size, ParallelScavengeHeap::heap()->young_gen()->max_size() / 10);
log_debug(gc, ergo, heap)(
"decrease eden from " SIZE_FORMAT "M to " SIZE_FORMAT "M , "
"survivor avg: %fM, min threshold: " SIZE_FORMAT "M",
current_eden_size/M, reduced_size/M, avg_survivor/M,
ParallelScavengeHeap::heap()->young_gen()->max_size()/10/M);
return reduced_size;
}

size_t calculate_reduced_promo_size(size_t promo_size, float avg_promo, size_t current_promo_size) const {
if (!_active || !_should_reduce_heap) {
return promo_size;
}
const static float PromoScale = 5;
size_t reduced_size;
reduced_size = MIN(reduced_size, avg_promo * PromoScale);
reduced_size = MAX(reduced_size,
ParallelScavengeHeap::heap()->old_gen()->max_gen_size() / 10);
log_debug(gc, ergo, heap)(
"decrease promotion from " SIZE_FORMAT "M to " SIZE_FORMAT "M , "
"promo avg: %fM, min threshold: " SIZE_FORMAT "M",
current_promo_size/M, reduced_size/M, avg_promo/M,
ParallelScavengeHeap::heap()->old_gen()->max_gen_size()/10/M);
return reduced_size;
}
};

PSAdaptiveSizePolicy::PSAdaptiveSizePolicy(size_t init_eden_size,
size_t init_promo_size,
size_t init_survivor_size,
Expand All @@ -54,6 +297,7 @@ PSAdaptiveSizePolicy::PSAdaptiveSizePolicy(size_t init_eden_size,
_live_at_last_full_gc(init_promo_size),
_gc_minor_pause_goal_sec(gc_minor_pause_goal_sec),
_latest_major_mutator_interval_seconds(0),
_throughput_goal(AdaptiveSizePolicy::_throughput_goal),
_young_gen_change_for_major_pause_count(0)
{
// Sizing policy statistics
Expand All @@ -75,6 +319,9 @@ PSAdaptiveSizePolicy::PSAdaptiveSizePolicy(size_t init_eden_size,

// Start the timers
_major_timer.start();
if (UseIOPrioritySizePolicy) {
_io_policy = new IOPolicy(_throughput_goal);
}
}

size_t PSAdaptiveSizePolicy::calculate_free_based_on_live(size_t live, uintx ratio_as_percentage) {
Expand Down Expand Up @@ -109,6 +356,21 @@ size_t PSAdaptiveSizePolicy::calculated_old_free_size_in_bytes() const {
return free_size;
}

void PSAdaptiveSizePolicy::minor_collection_begin() {
AdaptiveSizePolicy::minor_collection_begin();
if (UseIOPrioritySizePolicy) {
_io_policy->stop_mutator_record();
}
}

void PSAdaptiveSizePolicy::minor_collection_end(GCCause::Cause gc_cause) {
AdaptiveSizePolicy::minor_collection_end(gc_cause);
if (UseIOPrioritySizePolicy) {
_io_policy->start_mutator_record();
_io_policy->print(adjusted_mutator_cost());
}
}

void PSAdaptiveSizePolicy::major_collection_begin() {
// Update the interval time
_major_timer.stop();
Expand All @@ -129,6 +391,9 @@ void PSAdaptiveSizePolicy::major_collection_end(size_t amount_live,
GCCause::Cause gc_cause) {
// Update the pause time.
_major_timer.stop();
if (UseIOPrioritySizePolicy) {
_io_policy->start_mutator_record();
}

if (should_update_promo_stats(gc_cause)) {
double major_pause_in_seconds = _major_timer.seconds();
Expand Down Expand Up @@ -168,6 +433,10 @@ void PSAdaptiveSizePolicy::major_collection_end(size_t amount_live,
assert(collection_cost >= 0.0, "Expected to be non-negative");
_major_collection_estimator->update(promo_size_in_mbytes,
collection_cost);

if (UseIOPrioritySizePolicy) {
_io_policy->print(adjusted_mutator_cost());
}
}

// Update the amount live at the end of a full GC
Expand All @@ -183,6 +452,11 @@ void PSAdaptiveSizePolicy::major_collection_end(size_t amount_live,
// that expected to be needed by the next collection, do a full
// collection now.
bool PSAdaptiveSizePolicy::should_full_GC(size_t old_free_in_bytes) {
if (UseIOPrioritySizePolicy) {
if (_io_policy->should_full_GC()) {
return true;
}
}

// A similar test is done in the scavenge's should_attempt_scavenge(). If
// this is changed, decide if that test should also be changed.
Expand Down Expand Up @@ -233,6 +507,10 @@ void PSAdaptiveSizePolicy::compute_eden_space_size(
size_t max_eden_size,
bool is_full_gc) {

if (UseIOPrioritySizePolicy) {
_throughput_goal = _io_policy->calculate_reduced_throughput_goal();
}

// Update statistics
// Time statistics are updated as we go, update footprint stats here
_avg_base_footprint->sample(BaseFootPrintEstimate);
Expand Down Expand Up @@ -855,6 +1133,11 @@ size_t PSAdaptiveSizePolicy::adjust_promo_for_footprint(

size_t reduced_size = desired_promo_size - change;

if (UseIOPrioritySizePolicy) {
reduced_size = _io_policy->calculate_reduced_promo_size(reduced_size, avg_promoted()->average(), desired_promo_size);
change = desired_promo_size - reduced_size;
}

log_trace(gc, ergo)(
"AdaptiveSizePolicy::adjust_promo_for_footprint "
"adjusting tenured gen for footprint. "
Expand All @@ -877,6 +1160,11 @@ size_t PSAdaptiveSizePolicy::adjust_eden_for_footprint(

size_t reduced_size = desired_eden_size - change;

if (UseIOPrioritySizePolicy) {
reduced_size = _io_policy->calculate_reduced_eden_size(reduced_size, avg_survived()->average(), desired_eden_size);
change = desired_eden_size - reduced_size;
}

log_trace(gc, ergo)(
"AdaptiveSizePolicy::adjust_eden_for_footprint "
"adjusting eden for footprint. "
Expand Down
Loading
Loading