@@ -131,6 +131,8 @@ union swap_header {
*/
struct reclaim_state {
unsigned long reclaimed_pages; /* pages freed by shrinkers */
+ unsigned long scanned_objects; /* quantity of work done */
+ unsigned long deferred_objects; /* work that wasn't done */
};
#ifdef __KERNEL__
@@ -569,8 +569,11 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
* If the shrinker can't run (e.g. due to gfp_mask constraints), then
* defer the work to a context that can scan the cache.
*/
- if (shrinkctl->will_defer)
+ if (shrinkctl->will_defer) {
+ if (current->reclaim_state)
+ current->reclaim_state->deferred_objects += scan_count;
goto done;
+ }
/*
* Normally, we should not scan less than batch_size objects in one
@@ -605,6 +608,8 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
cond_resched();
}
+ if (current->reclaim_state)
+ current->reclaim_state->scanned_objects += scanned_objects;
done:
/*
@@ -2766,7 +2771,30 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
if (reclaim_state) {
sc->nr_reclaimed += reclaim_state->reclaimed_pages;
+
+ /*
+ * If we are deferring more work than we are actually
+ * doing in the shrinkers, and we are scanning more
+ * objects than we are pages, the we have a large amount
+ * of slab caches we are deferring work to kswapd for.
+ * We better back off here for a while, otherwise
+ * we risk priority windup, swap storms and OOM kills
+ * once we empty the page lists but still can't make
+ * progress on the shrinker memory.
+ *
+ * kswapd won't ever defer work as it's run under a
+ * GFP_KERNEL context and can always do work.
+ */
+ if ((reclaim_state->deferred_objects >
+ sc->nr_scanned - nr_scanned) &&
+ (reclaim_state->deferred_objects >
+ reclaim_state->scanned_objects)) {
+ wait_iff_congested(BLK_RW_ASYNC, HZ/50);
+ }
+
reclaim_state->reclaimed_pages = 0;
+ reclaim_state->deferred_objects = 0;
+ reclaim_state->scanned_objects = 0;
}
/* Record the subtree's reclaim efficiency */