Skip to content

8353115: GenShen: mixed evacuation candidate regions need accurate live_data #24319

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
702710e
Improve documentation of how Evac-OOM Protocol works
kdnilsen Jan 12, 2024
61b575f
Merge branch 'openjdk:master' into master
kdnilsen Jan 17, 2024
51d056f
Revert "Improve documentation of how Evac-OOM Protocol works"
kdnilsen Jan 17, 2024
ba98e42
Merge branch 'openjdk:master' into master
kdnilsen Jan 23, 2024
441487c
Merge branch 'openjdk:master' into master
kdnilsen Jan 25, 2024
dafc363
Merge branch 'openjdk:master' into master
kdnilsen Feb 2, 2024
c4c252e
Merge branch 'openjdk:master' into master
kdnilsen Feb 21, 2024
41ba86a
Merge branch 'openjdk:master' into master
kdnilsen Feb 28, 2024
f215a70
Merge branch 'openjdk:master' into master
kdnilsen Mar 1, 2024
4d6b5cd
Merge branch 'openjdk:master' into master
kdnilsen Mar 26, 2024
7fe605f
Merge branch 'openjdk:master' into master
kdnilsen Mar 28, 2024
2e224f6
Merge branch 'openjdk:master' into master
kdnilsen Apr 30, 2024
46ad5c6
Merge branch 'openjdk:master' into master
kdnilsen May 3, 2024
9a1989d
Merge branch 'openjdk:master' into master
kdnilsen May 9, 2024
4126c22
Merge branch 'openjdk:master' into master
kdnilsen Jun 12, 2024
981692e
Merge branch 'openjdk:master' into master
kdnilsen Jun 14, 2024
3a67b1f
Make GC logging less verbose
kdnilsen Jun 14, 2024
3692312
Revert "Make GC logging less verbose"
kdnilsen Jun 19, 2024
045590b
Merge branch 'openjdk:master' into master
kdnilsen Jun 26, 2024
fbbd88c
Merge branch 'openjdk:master' into master
kdnilsen Jul 8, 2024
7e0edf0
Merge branch 'openjdk:master' into master
kdnilsen Sep 12, 2024
3525369
Merge branch 'openjdk:master' into master
kdnilsen Sep 26, 2024
fe0da51
Merge branch 'openjdk:master' into master
kdnilsen Nov 6, 2024
db12fe5
Merge branch 'openjdk:master' into master
kdnilsen Dec 6, 2024
0440bae
Merge branch 'openjdk:master' into master
kdnilsen Jan 16, 2025
3bdc022
Merge branch 'openjdk:master' into master
kdnilsen Jan 19, 2025
1ee2ff1
Merge branch 'openjdk:master' into master
kdnilsen Feb 7, 2025
e6e772f
Merge branch 'openjdk:master' into master
kdnilsen Feb 27, 2025
c5a159e
Merge branch 'openjdk:master' into master
kdnilsen Mar 18, 2025
e7ca4f8
Merge branch 'openjdk:master' into master
kdnilsen Mar 20, 2025
42a93c7
Merge branch 'openjdk:master' into master
kdnilsen Mar 27, 2025
7061388
Track live and garbage for mixed-evac regions
kdnilsen Mar 28, 2025
3c1f788
Experiment with reviewer suggestion
kdnilsen Apr 9, 2025
8ff388d
Experiment 2: refinements to reduce regressions
kdnilsen Apr 9, 2025
8e820f2
Fix garbage_before_padded_for_promote()
kdnilsen Apr 10, 2025
d3cba66
Fix set_live() after full gc
kdnilsen Apr 10, 2025
eb2679a
Refactor for better abstraction
kdnilsen Apr 10, 2025
b9f828c
Adjust candidate live memory for each mixed evac
kdnilsen Apr 10, 2025
ef783d4
Remove deprecation conditional compiles
kdnilsen Apr 10, 2025
e6e44b6
Fix uninitialized variable
kdnilsen Apr 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,13 @@ class ShenandoahHeuristics : public CHeapObj<mtGC> {
#endif
}

inline void update_livedata(size_t live) {
_region_union._live_data = live;
#ifdef ASSERT
_union_tag = is_live_data;
#endif
}

inline ShenandoahHeapRegion* get_region() const {
assert(_union_tag != is_uninitialized, "Cannot fetch region from uninitialized RegionData");
return _region;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,17 @@ bool ShenandoahOldHeuristics::prime_collection_set(ShenandoahCollectionSet* coll
return false;
}

// Between consecutive mixed-evacuation cycles, the live data within each candidate region may change due to
// promotions and old-gen evacuations. Re-sort the candidate regions in order to first evacuate regions that have
// the smallest amount of live data. These are easiest to evacuate with least effort. Doing these first allows
// us to more quickly replenish free memory with empty regions.
for (uint i = _next_old_collection_candidate; i < _last_old_collection_candidate; i++) {
ShenandoahHeapRegion* r = _region_data[i].get_region();
_region_data[i].update_livedata(r->get_live_data_bytes());
}
QuickSort::sort<RegionData>(_region_data + _next_old_collection_candidate, unprocessed_old_collection_candidates(),
compare_by_live);

_first_pinned_candidate = NOT_FOUND;

uint included_old_regions = 0;
Expand Down Expand Up @@ -312,6 +323,19 @@ void ShenandoahOldHeuristics::slide_pinned_regions_to_front() {
_next_old_collection_candidate = write_index + 1;
}

void ShenandoahOldHeuristics::recalibrate_old_collection_candidates_live_memory() {
size_t total_live_data = 0;
for (uint i = _next_old_collection_candidate; i < _last_old_collection_candidate; i++) {
ShenandoahHeapRegion* r = _region_data[i].get_region();
size_t region_live = r->get_live_data_bytes();
total_live_data += region_live;
_region_data[i].update_livedata(region_live);
}
QuickSort::sort<RegionData>(_region_data + _next_old_collection_candidate, unprocessed_old_collection_candidates(),
compare_by_live);
_live_bytes_in_unprocessed_candidates = total_live_data;
}

void ShenandoahOldHeuristics::prepare_for_old_collections() {
ShenandoahHeap* heap = ShenandoahHeap::heap();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,13 @@ class ShenandoahOldHeuristics : public ShenandoahHeuristics {
// Return true iff the collection set is primed with at least one old-gen region.
bool prime_collection_set(ShenandoahCollectionSet* set);

// Between consecutive mixed-evacuation cycles, the live data within each candidate region may change due to
// promotions and old-gen evacuations. Recompute live data in candidates and re--sort the candidate regions in
// order to first evacuate regions that have the smallest amount of live data. These are easiest to evacuate
// with least effort. Doing these first allows us to more quickly replenish free memory with entirely empty regions.
// Call this during final mark before adjusting the old-evacuation ratio.
void recalibrate_old_collection_candidates_live_memory();

// How many old-collection candidates have not yet been processed?
uint unprocessed_old_collection_candidates() const;

Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,7 @@ class ShenandoahInitMarkUpdateRegionStateClosure : public ShenandoahHeapRegionCl
ShenandoahInitMarkUpdateRegionStateClosure() : _ctx(ShenandoahHeap::heap()->marking_context()) {}

void heap_region_do(ShenandoahHeapRegion* r) {
assert(!r->has_live(), "Region %zu should have no live data", r->index());
assert(!r->has_marked(), "Region %zu should have no marked data", r->index());
if (r->is_active()) {
// Check if region needs updating its TAMS. We have updated it already during concurrent
// reset, so it is very likely we don't need to do another write here. Since most regions
Expand Down
6 changes: 4 additions & 2 deletions src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -925,12 +925,14 @@ class ShenandoahPostCompactClosure : public ShenandoahHeapRegionClosure {
private:
ShenandoahHeap* const _heap;
bool _is_generational;
ShenandoahMarkingContext* _mark_context;
size_t _young_regions, _young_usage, _young_humongous_waste;
size_t _old_regions, _old_usage, _old_humongous_waste;

public:
ShenandoahPostCompactClosure() : _heap(ShenandoahHeap::heap()),
_is_generational(_heap->mode()->is_generational()),
_mark_context(_heap->complete_marking_context()),
_young_regions(0),
_young_usage(0),
_young_humongous_waste(0),
Expand Down Expand Up @@ -983,7 +985,7 @@ class ShenandoahPostCompactClosure : public ShenandoahHeapRegionClosure {
ShenandoahGenerationalFullGC::account_for_region(r, _young_regions, _young_usage, _young_humongous_waste);
}
}
r->set_live_data(live);
r->set_live_data_after_fullgc(live);
r->reset_alloc_metadata();
}

Expand Down Expand Up @@ -1093,7 +1095,7 @@ class ShenandoahMCResetCompleteBitmapTask : public WorkerTask {
ShenandoahHeap* heap = ShenandoahHeap::heap();
ShenandoahMarkingContext* const ctx = heap->complete_marking_context();
while (region != nullptr) {
if (heap->is_bitmap_slice_committed(region) && !region->is_pinned() && region->has_live()) {
if (heap->is_bitmap_slice_committed(region) && !region->is_pinned() && region->has_marked()) {
ctx->clear_bitmap(region);
}
region = _regions.next();
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2561,6 +2561,10 @@ void ShenandoahHeap::rebuild_free_set(bool concurrent) {
// The computation of bytes_of_allocation_runway_before_gc_trigger is quite conservative so consider all of this
// available for transfer to old. Note that transfer of humongous regions does not impact available.
ShenandoahGenerationalHeap* gen_heap = ShenandoahGenerationalHeap::heap();
ShenandoahOldGeneration* old_gen = gen_heap->old_generation();
ShenandoahOldHeuristics* old_heuristics = old_gen->heuristics();

old_heuristics->recalibrate_old_collection_candidates_live_memory();
size_t allocation_runway = gen_heap->young_generation()->heuristics()->bytes_of_allocation_runway_before_gc_trigger(young_cset_regions);
gen_heap->compute_old_generation_balance(allocation_runway, old_cset_regions);

Expand Down
7 changes: 5 additions & 2 deletions src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ ShenandoahHeapRegion::ShenandoahHeapRegion(HeapWord* start, size_t index, bool c
_plab_allocs(0),
_live_data(0),
_critical_pins(0),
_mixed_candidate_garbage_words(0),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a new field to track this? During final_mark, we call increase_live_data_alloc_words to add TAMS + top to _live_data to account for objects allocated during mark. Could we "fix" get_live_data so that it always returned marked objects (counted by increase_live_data_gc_words) plus top - TAMS. This way, the live data would not become stale after final_mark and we wouldn't have another field to manage. What do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good idea. Let me experiment with this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My experiment with an initial attempt at this failed with over 60 failures. The "problem" is that we often consult get_live_data() in contexts from which it is "not appropriate" to add (top- TAMS) to the atomic volatile ShenandoahHeapRegion::_live_data() . I think most of these are asserts. I have so far confirmed that there are at least two different places that need to be fixed. Not sure how many total scenarios.

I'm willing to move forward with changes to the failing asserts to make this change work. I think the code would be cleaner with your suggested refactor. It just makes this PR a little more far-reaching than the original.

See the most recent commit on this PR to see the direction this would move us. Let me know if you think I should move forward with more refactoring, or revert this most recent change.

Thanks.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does look simpler. Do you have an example of one of the failing asserts?

One thing I hadn't considered is how "hot" ShenandoahHeapRegion::get_live_data_words is. Is there going to be a significant performance hit if we make this method do more work? It does look like this method is called frequently.

Copy link
Contributor Author

@kdnilsen kdnilsen Apr 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Examples:
FullGC worker:

   void ShenandoahMCResetCompleteBitmapTask::work(uint worker_id) {
    ShenandoahParallelWorkerSession worker_session(worker_id);
    ShenandoahHeapRegion* region = _regions.next();
    ShenandoahHeap* heap = ShenandoahHeap::heap();
    ShenandoahMarkingContext* const ctx = heap->complete_marking_context();
    while (region != nullptr) {
      if (heap->is_bitmap_slice_committed(region) && !region->is_pinned() && region->has_marked()) {
        //  kelvin replacing has_live() with new method has_marked() because has_live() calls get_live_data_words()
        //  and pointer_delta() asserts out because TAMS is not less than top().  has_marked() does what has_live()
        //  used to do...
        ctx->clear_bitmap(region);
      }
      region = _regions.next();
    }
  }

ShenandoahInitMarkUpdateRegionStateClosure::heap_region_do() {

-    assert(!r->has_live(), "Region %zu should have no live data", r->index());
+    assert(!r->has_marked(), "Region %zu should have no marked data", r->index());

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure about performance impact, other than implementing and testing...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i suspect performance impact is minimal.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've committed changes that endeavor to implement the suggested refactor. Performance impact does appear to be minimal. This broader refactoring does change behavior slightly. In particular:

  1. We now have a better understanding of live-memory evacuated during mixed evacuations. This allows the selection of old-candidates for mixed evacuations to be more conservative. We'll have fewer old regions in order to honor the intended budget.
  2. Potentially, this will result in more mixed evacuations, but each mixed evacuation should take less time.
  3. There should be no impact on behavior of traditional Shenandoah.

On one recently completed test run, we observed the following impacts compared to tip:
Shenandoah

+80.69% specjbb2015/trigger_failure p=0.00000
Control: 58.250 (+/- 13.48 ) 110
Test: 105.250 (+/- 33.13 ) 30

Genshen

-19.46% jme/context_switch_count p=0.00176
Control: 117.420 (+/- 28.01 ) 108
Test: 98.292 (+/- 32.76 ) 30

Perhaps we need more data to decide whether this is "significant".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This result seems to be consistent. The effect on traditional Shenandoah is apparently to reduce the size of traditional Shenandoah collection sets also because certain regions that would have been collected are now rejected due to "better awareness" of how much live data will need to be copied. The amount of garbage associated with candidate regions for the young collection set is reduced by the amount of allocations above TAMS.
Previously, this had been erroneously reported as garbage. This has the effect of delaying reclamation of some garbage, resulting in an increase in allocation failures on the specjbb 2025 workload.

We might argue that the original behavior was incorrect, in that it was allowing violation of the intended evacuation budget.

We apparently were getting away with this violation because we were able to flip mutator regions to collector space, and/or because evacuation waste was sufficient to accommodate the unbudgeted evacuations.

Now that we have more accurate accounting of live memory, we could perhaps slightly reduce the default evacuation waste budget if we want to claw back the losses in specjbb performance (to enable larger collection sets) as part of this PR.

_update_watermark(start),
_age(0),
#ifdef SHENANDOAH_CENSUS_NOISE
Expand Down Expand Up @@ -391,9 +392,11 @@ size_t ShenandoahHeapRegion::get_plab_allocs() const {
return _plab_allocs * HeapWordSize;
}

void ShenandoahHeapRegion::set_live_data(size_t s) {
void ShenandoahHeapRegion::set_live_data_after_fullgc(size_t s) {
assert(Thread::current()->is_VM_thread(), "by VM thread");
_live_data = (s >> LogHeapWordSize);
ShenandoahMarkingContext* marking_context = ShenandoahHeap::heap()->complete_marking_context();
size_t words_allocated_above_tams = pointer_delta(top(), marking_context->top_at_mark_start(this));
_live_data = (s >> LogHeapWordSize) - words_allocated_above_tams;
}

void ShenandoahHeapRegion::print_on(outputStream* st) const {
Expand Down
20 changes: 16 additions & 4 deletions src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,8 @@ class ShenandoahHeapRegion {
volatile size_t _live_data;
volatile size_t _critical_pins;

size_t _mixed_candidate_garbage_words;

HeapWord* volatile _update_watermark;

uint _age;
Expand Down Expand Up @@ -367,18 +369,28 @@ class ShenandoahHeapRegion {
inline HeapWord* allocate(size_t word_size, const ShenandoahAllocRequest& req);

inline void clear_live_data();
void set_live_data(size_t s);

// Increase live data for newly allocated region
inline void increase_live_data_alloc_words(size_t s);
void set_live_data_after_fullgc(size_t s);

// Increase live data for region scanned with GC
inline void increase_live_data_gc_words(size_t s);

inline bool has_marked() const;

inline bool has_live() const;

// Returns bytes identified as live by most recently completed marking effort. Can only be called during safepoints.
inline size_t get_marked_data_bytes() const;

// Returns bytes identified as live by most recently completed marking effort, plus allocations above TAMS.
// Can only be called during safepoints.
inline size_t get_live_data_bytes() const;

// Returns words identified as live by most recently completed marking effort, plus allocations above TAMS.
// Can only be called during safepoints.
inline size_t get_live_data_words() const;

// Returns garbage by calculating difference between used and get_live_data_words. Can only be called at
// safepoints. Allocations above TAMS are considered live.
inline size_t garbage() const;

void print_on(outputStream* st) const;
Expand Down
28 changes: 18 additions & 10 deletions src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,6 @@ inline void ShenandoahHeapRegion::adjust_alloc_metadata(ShenandoahAllocRequest::
}
}

inline void ShenandoahHeapRegion::increase_live_data_alloc_words(size_t s) {
internal_increase_live_data(s);
}

inline void ShenandoahHeapRegion::increase_live_data_gc_words(size_t s) {
internal_increase_live_data(s);
if (ShenandoahPacing) {
Expand All @@ -147,14 +143,25 @@ inline void ShenandoahHeapRegion::clear_live_data() {
Atomic::store(&_live_data, (size_t)0);
}

inline size_t ShenandoahHeapRegion::get_marked_data_bytes() const {
return Atomic::load(&_live_data) * HeapWordSize;
}

inline size_t ShenandoahHeapRegion::get_live_data_words() const {
return Atomic::load(&_live_data);
ShenandoahMarkingContext *ctx = ShenandoahHeap::heap()->complete_marking_context();
HeapWord* tams = ctx->top_at_mark_start(this);
size_t words_above_tams = pointer_delta(top(), tams);
return Atomic::load(&_live_data) + words_above_tams;
}

inline size_t ShenandoahHeapRegion::get_live_data_bytes() const {
return get_live_data_words() * HeapWordSize;
}

inline bool ShenandoahHeapRegion::has_marked() const {
return Atomic::load(&_live_data) != 0;
}

inline bool ShenandoahHeapRegion::has_live() const {
return get_live_data_words() != 0;
}
Expand All @@ -163,20 +170,21 @@ inline size_t ShenandoahHeapRegion::garbage() const {
assert(used() >= get_live_data_bytes(),
"Live Data must be a subset of used() live: %zu used: %zu",
get_live_data_bytes(), used());

size_t result = used() - get_live_data_bytes();
return result;
}

inline size_t ShenandoahHeapRegion::garbage_before_padded_for_promote() const {
assert(get_top_before_promote() != nullptr, "top before promote should not equal null");
size_t used_before_promote = byte_size(bottom(), get_top_before_promote());
assert(used_before_promote >= get_live_data_bytes(),
assert(used_before_promote >= get_marked_data_bytes(),
"Live Data must be a subset of used before promotion live: %zu used: %zu",
get_live_data_bytes(), used_before_promote);
size_t result = used_before_promote - get_live_data_bytes();
get_marked_data_bytes(), used_before_promote);
ShenandoahMarkingContext *ctx = ShenandoahHeap::heap()->complete_marking_context();
HeapWord* tams = ctx->top_at_mark_start(this);
size_t bytes_allocated_during_mark = pointer_delta(get_top_before_promote(), tams) * HeapWordSize;
size_t result = used_before_promote - (get_marked_data_bytes() + bytes_allocated_during_mark);
return result;

}

inline HeapWord* ShenandoahHeapRegion::get_update_watermark() const {
Expand Down
15 changes: 0 additions & 15 deletions src/hotspot/share/gc/shenandoah/shenandoahHeapRegionClosures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,21 +57,6 @@ ShenandoahFinalMarkUpdateRegionStateClosure::ShenandoahFinalMarkUpdateRegionStat

void ShenandoahFinalMarkUpdateRegionStateClosure::heap_region_do(ShenandoahHeapRegion* r) {
if (r->is_active()) {
if (_ctx != nullptr) {
// _ctx may be null when this closure is used to sync only the pin status
// update the watermark of old regions. For old regions we cannot reset
// the TAMS because we rely on that to keep promoted objects alive after
// old marking is complete.

// All allocations past TAMS are implicitly live, adjust the region data.
// Bitmaps/TAMS are swapped at this point, so we need to poll complete bitmap.
HeapWord *tams = _ctx->top_at_mark_start(r);
HeapWord *top = r->top();
if (top > tams) {
r->increase_live_data_alloc_words(pointer_delta(top, tams));
}
}

// We are about to select the collection set, make sure it knows about
// current pinning status. Also, this allows trashing more regions that
// now have their pinning status dropped.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class ShenandoahOldHeuristicTest : public ::testing::Test {
region->set_affiliation(OLD_GENERATION);
region->make_regular_allocation(OLD_GENERATION);
size_t live_bytes = ShenandoahHeapRegion::region_size_bytes() - garbage_bytes;
region->increase_live_data_alloc_words(live_bytes / HeapWordSize);
region->increase_live_data_gc_words(live_bytes / HeapWordSize);
region->set_top(region->end());
return region->garbage();
}
Expand Down