@@ -21,6 +21,7 @@
#include <linux/vmstat.h>
#include <linux/writeback.h>
#include <linux/page-flags.h>
+#include <linux/shrinker.h>
struct mem_cgroup;
struct obj_cgroup;
@@ -88,17 +89,6 @@ struct mem_cgroup_reclaim_iter {
unsigned int generation;
};
-/*
- * Bitmap and deferred work of shrinker::id corresponding to memcg-aware
- * shrinkers, which have elements charged to this memcg.
- */
-struct shrinker_info {
- struct rcu_head rcu;
- atomic_long_t *nr_deferred;
- unsigned long *map;
- int map_nr_max;
-};
-
struct lruvec_stats_percpu {
/* Local (CPU and cgroup) state */
long state[NR_VM_NODE_STAT_ITEMS];
@@ -5,6 +5,23 @@
#include <linux/atomic.h>
#include <linux/types.h>
+#define SHRINKER_UNIT_BITS BITS_PER_LONG
+
+/*
+ * Bitmap and deferred work of shrinker::id corresponding to memcg-aware
+ * shrinkers, which have elements charged to the memcg.
+ */
+struct shrinker_info_unit {
+ atomic_long_t nr_deferred[SHRINKER_UNIT_BITS];
+ DECLARE_BITMAP(map, SHRINKER_UNIT_BITS);
+};
+
+struct shrinker_info {
+ struct rcu_head rcu;
+ int map_nr_max;
+ struct shrinker_info_unit *unit[];
+};
+
/*
* This struct is used to pass information from page reclaim to the shrinkers.
* We consolidate the values for easier extension later.
@@ -12,15 +12,50 @@ DECLARE_RWSEM(shrinker_rwsem);
#ifdef CONFIG_MEMCG
static int shrinker_nr_max;
-/* The shrinker_info is expanded in a batch of BITS_PER_LONG */
-static inline int shrinker_map_size(int nr_items)
+static inline int shrinker_unit_size(int nr_items)
{
- return (DIV_ROUND_UP(nr_items, BITS_PER_LONG) * sizeof(unsigned long));
+ return (DIV_ROUND_UP(nr_items, SHRINKER_UNIT_BITS) * sizeof(struct shrinker_info_unit *));
}
-static inline int shrinker_defer_size(int nr_items)
+static inline void shrinker_unit_free(struct shrinker_info *info, int start)
{
- return (round_up(nr_items, BITS_PER_LONG) * sizeof(atomic_long_t));
+ struct shrinker_info_unit **unit;
+ int nr, i;
+
+ if (!info)
+ return;
+
+ unit = info->unit;
+ nr = DIV_ROUND_UP(info->map_nr_max, SHRINKER_UNIT_BITS);
+
+ for (i = start; i < nr; i++) {
+ if (!unit[i])
+ break;
+
+ kfree(unit[i]);
+ unit[i] = NULL;
+ }
+}
+
+static inline int shrinker_unit_alloc(struct shrinker_info *new,
+ struct shrinker_info *old, int nid)
+{
+ struct shrinker_info_unit *unit;
+ int nr = DIV_ROUND_UP(new->map_nr_max, SHRINKER_UNIT_BITS);
+ int start = old ? DIV_ROUND_UP(old->map_nr_max, SHRINKER_UNIT_BITS) : 0;
+ int i;
+
+ for (i = start; i < nr; i++) {
+ unit = kzalloc_node(sizeof(*unit), GFP_KERNEL, nid);
+ if (!unit) {
+ shrinker_unit_free(new, start);
+ return -ENOMEM;
+ }
+
+ new->unit[i] = unit;
+ }
+
+ return 0;
}
void free_shrinker_info(struct mem_cgroup *memcg)
@@ -32,6 +67,7 @@ void free_shrinker_info(struct mem_cgroup *memcg)
for_each_node(nid) {
pn = memcg->nodeinfo[nid];
info = rcu_dereference_protected(pn->shrinker_info, true);
+ shrinker_unit_free(info, 0);
kvfree(info);
rcu_assign_pointer(pn->shrinker_info, NULL);
}
@@ -40,28 +76,27 @@ void free_shrinker_info(struct mem_cgroup *memcg)
int alloc_shrinker_info(struct mem_cgroup *memcg)
{
struct shrinker_info *info;
- int nid, size, ret = 0;
- int map_size, defer_size = 0;
+ int nid, ret = 0;
+ int array_size = 0;
down_write(&shrinker_rwsem);
- map_size = shrinker_map_size(shrinker_nr_max);
- defer_size = shrinker_defer_size(shrinker_nr_max);
- size = map_size + defer_size;
+ array_size = shrinker_unit_size(shrinker_nr_max);
for_each_node(nid) {
- info = kvzalloc_node(sizeof(*info) + size, GFP_KERNEL, nid);
- if (!info) {
- free_shrinker_info(memcg);
- ret = -ENOMEM;
- break;
- }
- info->nr_deferred = (atomic_long_t *)(info + 1);
- info->map = (void *)info->nr_deferred + defer_size;
+ info = kvzalloc_node(sizeof(*info) + array_size, GFP_KERNEL, nid);
+ if (!info)
+ goto err;
info->map_nr_max = shrinker_nr_max;
+ if (shrinker_unit_alloc(info, NULL, nid))
+ goto err;
rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
}
up_write(&shrinker_rwsem);
return ret;
+
+err:
+ free_shrinker_info(memcg);
+ return -ENOMEM;
}
static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
@@ -71,15 +106,12 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
lockdep_is_held(&shrinker_rwsem));
}
-static int expand_one_shrinker_info(struct mem_cgroup *memcg,
- int map_size, int defer_size,
- int old_map_size, int old_defer_size,
- int new_nr_max)
+static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size,
+ int old_size, int new_nr_max)
{
struct shrinker_info *new, *old;
struct mem_cgroup_per_node *pn;
int nid;
- int size = map_size + defer_size;
for_each_node(nid) {
pn = memcg->nodeinfo[nid];
@@ -92,21 +124,17 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg,
if (new_nr_max <= old->map_nr_max)
continue;
- new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid);
+ new = kvmalloc_node(sizeof(*new) + new_size, GFP_KERNEL, nid);
if (!new)
return -ENOMEM;
- new->nr_deferred = (atomic_long_t *)(new + 1);
- new->map = (void *)new->nr_deferred + defer_size;
new->map_nr_max = new_nr_max;
- /* map: set all old bits, clear all new bits */
- memset(new->map, (int)0xff, old_map_size);
- memset((void *)new->map + old_map_size, 0, map_size - old_map_size);
- /* nr_deferred: copy old values, clear all new values */
- memcpy(new->nr_deferred, old->nr_deferred, old_defer_size);
- memset((void *)new->nr_deferred + old_defer_size, 0,
- defer_size - old_defer_size);
+ memcpy(new->unit, old->unit, old_size);
+ if (shrinker_unit_alloc(new, old, nid)) {
+ kvfree(new);
+ return -ENOMEM;
+ }
rcu_assign_pointer(pn->shrinker_info, new);
kvfree_rcu(old, rcu);
@@ -118,9 +146,8 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg,
static int expand_shrinker_info(int new_id)
{
int ret = 0;
- int new_nr_max = round_up(new_id + 1, BITS_PER_LONG);
- int map_size, defer_size = 0;
- int old_map_size, old_defer_size = 0;
+ int new_nr_max = round_up(new_id + 1, SHRINKER_UNIT_BITS);
+ int new_size, old_size = 0;
struct mem_cgroup *memcg;
if (!root_mem_cgroup)
@@ -128,15 +155,12 @@ static int expand_shrinker_info(int new_id)
lockdep_assert_held(&shrinker_rwsem);
- map_size = shrinker_map_size(new_nr_max);
- defer_size = shrinker_defer_size(new_nr_max);
- old_map_size = shrinker_map_size(shrinker_nr_max);
- old_defer_size = shrinker_defer_size(shrinker_nr_max);
+ new_size = shrinker_unit_size(new_nr_max);
+ old_size = shrinker_unit_size(shrinker_nr_max);
memcg = mem_cgroup_iter(NULL, NULL, NULL);
do {
- ret = expand_one_shrinker_info(memcg, map_size, defer_size,
- old_map_size, old_defer_size,
+ ret = expand_one_shrinker_info(memcg, new_size, old_size,
new_nr_max);
if (ret) {
mem_cgroup_iter_break(NULL, memcg);
@@ -150,17 +174,34 @@ static int expand_shrinker_info(int new_id)
return ret;
}
+static inline int shrinker_id_to_index(int shrinker_id)
+{
+ return shrinker_id / SHRINKER_UNIT_BITS;
+}
+
+static inline int shrinker_id_to_offset(int shrinker_id)
+{
+ return shrinker_id % SHRINKER_UNIT_BITS;
+}
+
+static inline int calc_shrinker_id(int index, int offset)
+{
+ return index * SHRINKER_UNIT_BITS + offset;
+}
+
void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
{
if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
struct shrinker_info *info;
+ struct shrinker_info_unit *unit;
rcu_read_lock();
info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
+ unit = info->unit[shrinker_id_to_index(shrinker_id)];
if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
/* Pairs with smp mb in shrink_slab() */
smp_mb__before_atomic();
- set_bit(shrinker_id, info->map);
+ set_bit(shrinker_id_to_offset(shrinker_id), unit->map);
}
rcu_read_unlock();
}
@@ -209,26 +250,31 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
struct mem_cgroup *memcg)
{
struct shrinker_info *info;
+ struct shrinker_info_unit *unit;
info = shrinker_info_protected(memcg, nid);
- return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0);
+ unit = info->unit[shrinker_id_to_index(shrinker->id)];
+ return atomic_long_xchg(&unit->nr_deferred[shrinker_id_to_offset(shrinker->id)], 0);
}
static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
struct mem_cgroup *memcg)
{
struct shrinker_info *info;
+ struct shrinker_info_unit *unit;
info = shrinker_info_protected(memcg, nid);
- return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]);
+ unit = info->unit[shrinker_id_to_index(shrinker->id)];
+ return atomic_long_add_return(nr, &unit->nr_deferred[shrinker_id_to_offset(shrinker->id)]);
}
void reparent_shrinker_deferred(struct mem_cgroup *memcg)
{
- int i, nid;
+ int nid, index, offset;
long nr;
struct mem_cgroup *parent;
struct shrinker_info *child_info, *parent_info;
+ struct shrinker_info_unit *child_unit, *parent_unit;
parent = parent_mem_cgroup(memcg);
if (!parent)
@@ -239,9 +285,13 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg)
for_each_node(nid) {
child_info = shrinker_info_protected(memcg, nid);
parent_info = shrinker_info_protected(parent, nid);
- for (i = 0; i < child_info->map_nr_max; i++) {
- nr = atomic_long_read(&child_info->nr_deferred[i]);
- atomic_long_add(nr, &parent_info->nr_deferred[i]);
+ for (index = 0; index < shrinker_id_to_index(child_info->map_nr_max); index++) {
+ child_unit = child_info->unit[index];
+ parent_unit = parent_info->unit[index];
+ for (offset = 0; offset < SHRINKER_UNIT_BITS; offset++) {
+ nr = atomic_long_read(&child_unit->nr_deferred[offset]);
+ atomic_long_add(nr, &parent_unit->nr_deferred[offset]);
+ }
}
}
up_read(&shrinker_rwsem);
@@ -407,7 +457,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
{
struct shrinker_info *info;
unsigned long ret, freed = 0;
- int i;
+ int offset, index = 0;
if (!mem_cgroup_online(memcg))
return 0;
@@ -419,56 +469,63 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
if (unlikely(!info))
goto unlock;
- for_each_set_bit(i, info->map, info->map_nr_max) {
- struct shrink_control sc = {
- .gfp_mask = gfp_mask,
- .nid = nid,
- .memcg = memcg,
- };
- struct shrinker *shrinker;
+ for (; index < shrinker_id_to_index(info->map_nr_max); index++) {
+ struct shrinker_info_unit *unit;
- shrinker = idr_find(&shrinker_idr, i);
- if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) {
- if (!shrinker)
- clear_bit(i, info->map);
- continue;
- }
+ unit = info->unit[index];
- /* Call non-slab shrinkers even though kmem is disabled */
- if (!memcg_kmem_online() &&
- !(shrinker->flags & SHRINKER_NONSLAB))
- continue;
+ for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) {
+ struct shrink_control sc = {
+ .gfp_mask = gfp_mask,
+ .nid = nid,
+ .memcg = memcg,
+ };
+ struct shrinker *shrinker;
+ int shrinker_id = calc_shrinker_id(index, offset);
- ret = do_shrink_slab(&sc, shrinker, priority);
- if (ret == SHRINK_EMPTY) {
- clear_bit(i, info->map);
- /*
- * After the shrinker reported that it had no objects to
- * free, but before we cleared the corresponding bit in
- * the memcg shrinker map, a new object might have been
- * added. To make sure, we have the bit set in this
- * case, we invoke the shrinker one more time and reset
- * the bit if it reports that it is not empty anymore.
- * The memory barrier here pairs with the barrier in
- * set_shrinker_bit():
- *
- * list_lru_add() shrink_slab_memcg()
- * list_add_tail() clear_bit()
- * <MB> <MB>
- * set_bit() do_shrink_slab()
- */
- smp_mb__after_atomic();
- ret = do_shrink_slab(&sc, shrinker, priority);
- if (ret == SHRINK_EMPTY)
- ret = 0;
- else
- set_shrinker_bit(memcg, nid, i);
- }
- freed += ret;
+ shrinker = idr_find(&shrinker_idr, shrinker_id);
+ if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) {
+ if (!shrinker)
+ clear_bit(offset, unit->map);
+ continue;
+ }
- if (rwsem_is_contended(&shrinker_rwsem)) {
- freed = freed ? : 1;
- break;
+ /* Call non-slab shrinkers even though kmem is disabled */
+ if (!memcg_kmem_online() &&
+ !(shrinker->flags & SHRINKER_NONSLAB))
+ continue;
+
+ ret = do_shrink_slab(&sc, shrinker, priority);
+ if (ret == SHRINK_EMPTY) {
+ clear_bit(offset, unit->map);
+ /*
+ * After the shrinker reported that it had no objects to
+ * free, but before we cleared the corresponding bit in
+ * the memcg shrinker map, a new object might have been
+ * added. To make sure, we have the bit set in this
+ * case, we invoke the shrinker one more time and reset
+ * the bit if it reports that it is not empty anymore.
+ * The memory barrier here pairs with the barrier in
+ * set_shrinker_bit():
+ *
+ * list_lru_add() shrink_slab_memcg()
+ * list_add_tail() clear_bit()
+ * <MB> <MB>
+ * set_bit() do_shrink_slab()
+ */
+ smp_mb__after_atomic();
+ ret = do_shrink_slab(&sc, shrinker, priority);
+ if (ret == SHRINK_EMPTY)
+ ret = 0;
+ else
+ set_shrinker_bit(memcg, nid, shrinker_id);
+ }
+ freed += ret;
+
+ if (rwsem_is_contended(&shrinker_rwsem)) {
+ freed = freed ? : 1;
+ goto unlock;
+ }
}
}
unlock: