Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,12 @@ Number ARC headers to evict per sub-list before proceeding to another sub-list.
This batch-style operation prevents entire sub-lists from being evicted at once
but comes at a cost of additional unlocking and locking.
.
.It Sy zfs_arc_evict_batches_limit Ns = Ns Sy 5 Pq uint
Number of
.Sy zfs_arc_evict_batch_limit
batches to process per parallel eviction task under heavy load to reduce number
of context switches.
.
.It Sy zfs_arc_evict_threads Ns = Ns Sy 0 Pq int
Sets the number of ARC eviction threads to be used.
.Pp
Expand Down
69 changes: 42 additions & 27 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,12 @@ static uint_t zfs_arc_eviction_pct = 200;
*/
static uint_t zfs_arc_evict_batch_limit = 10;

/*
* Number batches to process per parallel eviction task under heavy load to
* reduce number of context switches.
*/
static uint_t zfs_arc_evict_batches_limit = 5;

/* number of seconds before growing cache again */
uint_t arc_grow_retry = 5;

Expand Down Expand Up @@ -3900,7 +3906,7 @@ arc_set_need_free(void)

static uint64_t
arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
uint64_t spa, uint64_t bytes)
uint64_t spa, uint64_t bytes, boolean_t *more)
{
multilist_sublist_t *mls;
uint64_t bytes_evicted = 0, real_evicted = 0;
Expand Down Expand Up @@ -3984,6 +3990,10 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,

multilist_sublist_unlock(mls);

/* Indicate if another iteration may be productive. */
if (more)
*more = (hdr != NULL);

/*
* Increment the count of evicted bytes, and wake up any threads that
* are waiting for the count to reach this value. Since the list is
Expand All @@ -4004,21 +4014,12 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
while ((aw = list_head(&arc_evict_waiters)) != NULL &&
aw->aew_count <= arc_evict_count) {
list_remove(&arc_evict_waiters, aw);
cv_broadcast(&aw->aew_cv);
cv_signal(&aw->aew_cv);
}
}
arc_set_need_free();
mutex_exit(&arc_evict_lock);

/*
* If the ARC size is reduced from arc_c_max to arc_c_min (especially
* if the average cached block is small), eviction can be on-CPU for
* many seconds. To ensure that other threads that may be bound to
* this CPU are able to make progress, make a voluntary preemption
* call here.
*/
kpreempt(KPREEMPT_SYNC);

return (bytes_evicted);
}

Expand Down Expand Up @@ -4079,8 +4080,18 @@ static void
arc_evict_task(void *arg)
{
evict_arg_t *eva = arg;
eva->eva_evicted = arc_evict_state_impl(eva->eva_ml, eva->eva_idx,
eva->eva_marker, eva->eva_spa, eva->eva_bytes);
uint64_t total_evicted = 0;
boolean_t more;
uint_t batches = zfs_arc_evict_batches_limit;

/* Process multiple batches to amortize taskq dispatch overhead. */
do {
total_evicted += arc_evict_state_impl(eva->eva_ml,
eva->eva_idx, eva->eva_marker, eva->eva_spa,
eva->eva_bytes - total_evicted, &more);
} while (total_evicted < eva->eva_bytes && --batches > 0 && more);

eva->eva_evicted = total_evicted;
}

static void
Expand Down Expand Up @@ -4221,18 +4232,19 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa,

if (bytes == ARC_EVICT_ALL) {
evict = bytes;
} else if (left > ntasks * MIN_EVICT_SIZE) {
} else if (left >= ntasks * MIN_EVICT_SIZE) {
evict = DIV_ROUND_UP(left, ntasks);
} else {
ntasks = DIV_ROUND_UP(left, MIN_EVICT_SIZE);
if (ntasks == 1)
ntasks = left / MIN_EVICT_SIZE;
if (ntasks < 2)
use_evcttq = B_FALSE;
else
evict = DIV_ROUND_UP(left, ntasks);
}
}

for (int i = 0; sublists_left > 0; i++, sublist_idx++,
sublists_left--) {
uint64_t bytes_remaining;
uint64_t bytes_evicted;

/* we've reached the end, wrap to the beginning */
Expand All @@ -4254,16 +4266,17 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa,
continue;
}

if (total_evicted < bytes)
bytes_remaining = bytes - total_evicted;
else
break;

bytes_evicted = arc_evict_state_impl(ml, sublist_idx,
markers[sublist_idx], spa, bytes_remaining);
markers[sublist_idx], spa, bytes - total_evicted,
NULL);

scan_evicted += bytes_evicted;
total_evicted += bytes_evicted;

if (total_evicted < bytes)
kpreempt(KPREEMPT_SYNC);
else
break;
}

if (use_evcttq) {
Expand Down Expand Up @@ -4889,7 +4902,7 @@ arc_evict_cb(void *arg, zthr_t *zthr)
*/
arc_evict_waiter_t *aw;
while ((aw = list_remove_head(&arc_evict_waiters)) != NULL) {
cv_broadcast(&aw->aew_cv);
cv_signal(&aw->aew_cv);
}
arc_set_need_free();
}
Expand Down Expand Up @@ -5170,9 +5183,8 @@ arc_wait_for_eviction(uint64_t amount, boolean_t lax, boolean_t use_reserve)

uint64_t last_count = 0;
mutex_enter(&arc_evict_lock);
if (!list_is_empty(&arc_evict_waiters)) {
arc_evict_waiter_t *last =
list_tail(&arc_evict_waiters);
arc_evict_waiter_t *last;
if ((last = list_tail(&arc_evict_waiters)) != NULL) {
last_count = last->aew_count;
} else if (!arc_evict_needed) {
arc_evict_needed = B_TRUE;
Expand Down Expand Up @@ -11290,6 +11302,9 @@ ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, eviction_pct, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, evict_batch_limit, UINT, ZMOD_RW,
"The number of headers to evict per sublist before moving to the next");

ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, evict_batches_limit, UINT, ZMOD_RW,
"The number of batches to run per parallel eviction task");

ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, prune_task_threads, INT, ZMOD_RW,
"Number of arc_prune threads");

Expand Down
Loading