@@ -1106,7 +1106,6 @@ enum lru_status binder_alloc_free_page(struct list_head *item,
mmput_async(mm);
__free_page(page_to_free);
- spin_lock(lock);
return LRU_REMOVED_RETRY;
err_invalid_vma:
@@ -934,7 +934,6 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
mm_account_reclaimed_pages(reap);
}
inode_unpin_lru_isolating(inode);
- spin_lock(lru_lock);
return LRU_RETRY;
}
@@ -496,7 +496,6 @@ xfs_qm_dquot_isolate(
trace_xfs_dqreclaim_busy(dqp);
XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
xfs_dqunlock(dqp);
- spin_lock(lru_lock);
return LRU_RETRY;
}
@@ -32,6 +32,8 @@ struct list_lru_one {
struct list_head list;
/* may become negative during memcg reparenting */
long nr_items;
+ /* protects all fields above */
+ spinlock_t lock;
};
struct list_lru_memcg {
@@ -41,11 +43,9 @@ struct list_lru_memcg {
};
struct list_lru_node {
- /* protects all lists on the node, including per cgroup */
- spinlock_t lock;
/* global list, used for the root cgroup in cgroup aware lrus */
struct list_lru_one lru;
- long nr_items;
+ atomic_long_t nr_items;
} ____cacheline_aligned_in_smp;
struct list_lru {
@@ -61,18 +61,51 @@ list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
}
static inline struct list_lru_one *
-list_lru_from_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg)
+lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
+ bool irq, bool skip_empty)
{
struct list_lru_one *l;
+ long nr_items;
+
+ rcu_read_lock();
again:
l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
- if (likely(l))
- return l;
-
- memcg = parent_mem_cgroup(memcg);
+ if (likely(l)) {
+ if (irq)
+ spin_lock_irq(&l->lock);
+ else
+ spin_lock(&l->lock);
+ nr_items = READ_ONCE(l->nr_items);
+ if (likely(nr_items != LONG_MIN)) {
+ WARN_ON(nr_items < 0);
+ rcu_read_unlock();
+ return l;
+ }
+ if (irq)
+ spin_unlock_irq(&l->lock);
+ else
+ spin_unlock(&l->lock);
+ }
+ /*
+ * Caller may simply bail out if raced with reparenting or
+ * may iterate through the list_lru and expect empty slots.
+ */
+ if (skip_empty) {
+ rcu_read_unlock();
+ return NULL;
+ }
VM_WARN_ON(!css_is_dying(&memcg->css));
+ memcg = parent_mem_cgroup(memcg);
goto again;
}
+
+static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
+{
+ if (irq_off)
+ spin_unlock_irq(&l->lock);
+ else
+ spin_unlock(&l->lock);
+}
#else
static void list_lru_register(struct list_lru *lru)
{
@@ -99,31 +132,48 @@ list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
}
static inline struct list_lru_one *
-list_lru_from_memcg(struct list_lru *lru, int nid, int idx)
+lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
+ bool irq, bool skip_empty)
{
- return &lru->node[nid].lru;
+ struct list_lru_one *l = &lru->node[nid].lru;
+
+ if (irq)
+ spin_lock_irq(&l->lock);
+ else
+ spin_lock(&l->lock);
+
+ return l;
+}
+
+static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
+{
+ if (irq_off)
+ spin_unlock_irq(&l->lock);
+ else
+ spin_unlock(&l->lock);
}
#endif /* CONFIG_MEMCG */
/* The caller must ensure the memcg lifetime. */
bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
- struct mem_cgroup *memcg)
+ struct mem_cgroup *memcg)
{
struct list_lru_node *nlru = &lru->node[nid];
struct list_lru_one *l;
- spin_lock(&nlru->lock);
+ l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
+ if (!l)
+ return false;
if (list_empty(item)) {
- l = list_lru_from_memcg(lru, nid, memcg);
list_add_tail(item, &l->list);
/* Set shrinker bit if the first element was added */
if (!l->nr_items++)
set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
- nlru->nr_items++;
- spin_unlock(&nlru->lock);
+ unlock_list_lru(l, false);
+ atomic_long_inc(&nlru->nr_items);
return true;
}
- spin_unlock(&nlru->lock);
+ unlock_list_lru(l, false);
return false;
}
@@ -146,24 +196,23 @@ EXPORT_SYMBOL_GPL(list_lru_add_obj);
/* The caller must ensure the memcg lifetime. */
bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
- struct mem_cgroup *memcg)
+ struct mem_cgroup *memcg)
{
struct list_lru_node *nlru = &lru->node[nid];
struct list_lru_one *l;
-
- spin_lock(&nlru->lock);
+ l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
+ if (!l)
+ return false;
if (!list_empty(item)) {
- l = list_lru_from_memcg(lru, nid, memcg);
list_del_init(item);
l->nr_items--;
- nlru->nr_items--;
- spin_unlock(&nlru->lock);
+ unlock_list_lru(l, false);
+ atomic_long_dec(&nlru->nr_items);
return true;
}
- spin_unlock(&nlru->lock);
+ unlock_list_lru(l, false);
return false;
}
-EXPORT_SYMBOL_GPL(list_lru_del);
bool list_lru_del_obj(struct list_lru *lru, struct list_head *item)
{
@@ -220,25 +269,24 @@ unsigned long list_lru_count_node(struct list_lru *lru, int nid)
struct list_lru_node *nlru;
nlru = &lru->node[nid];
- return nlru->nr_items;
+ return atomic_long_read(&nlru->nr_items);
}
EXPORT_SYMBOL_GPL(list_lru_count_node);
static unsigned long
-__list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
+__list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
list_lru_walk_cb isolate, void *cb_arg,
- unsigned long *nr_to_walk)
+ unsigned long *nr_to_walk, bool irq_off)
{
struct list_lru_node *nlru = &lru->node[nid];
- struct list_lru_one *l;
+ struct list_lru_one *l = NULL;
struct list_head *item, *n;
unsigned long isolated = 0;
restart:
- l = list_lru_from_memcg_idx(lru, nid, memcg_idx);
+ l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true);
if (!l)
- goto out;
-
+ return isolated;
list_for_each_safe(item, n, &l->list) {
enum lru_status ret;
@@ -250,19 +298,19 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
break;
--*nr_to_walk;
- ret = isolate(item, l, &nlru->lock, cb_arg);
+ ret = isolate(item, l, &l->lock, cb_arg);
switch (ret) {
+ /*
+ * LRU_RETRY, LRU_REMOVED_RETRY and LRU_STOP will drop the lru
+ * lock. List traversal will have to restart from scratch.
+ */
+ case LRU_RETRY:
+ goto restart;
case LRU_REMOVED_RETRY:
- assert_spin_locked(&nlru->lock);
fallthrough;
case LRU_REMOVED:
isolated++;
- nlru->nr_items--;
- /*
- * If the lru lock has been dropped, our list
- * traversal is now invalid and so we have to
- * restart from scratch.
- */
+ atomic_long_dec(&nlru->nr_items);
if (ret == LRU_REMOVED_RETRY)
goto restart;
break;
@@ -271,20 +319,13 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
break;
case LRU_SKIP:
break;
- case LRU_RETRY:
- /*
- * The lru lock has been dropped, our list traversal is
- * now invalid and so we have to restart from scratch.
- */
- assert_spin_locked(&nlru->lock);
- goto restart;
case LRU_STOP:
- assert_spin_locked(&nlru->lock);
goto out;
default:
BUG();
}
}
+ unlock_list_lru(l, irq_off);
out:
return isolated;
}
@@ -294,14 +335,8 @@ list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
list_lru_walk_cb isolate, void *cb_arg,
unsigned long *nr_to_walk)
{
- struct list_lru_node *nlru = &lru->node[nid];
- unsigned long ret;
-
- spin_lock(&nlru->lock);
- ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate,
- cb_arg, nr_to_walk);
- spin_unlock(&nlru->lock);
- return ret;
+ return __list_lru_walk_one(lru, nid, memcg, isolate,
+ cb_arg, nr_to_walk, false);
}
EXPORT_SYMBOL_GPL(list_lru_walk_one);
@@ -310,14 +345,8 @@ list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
list_lru_walk_cb isolate, void *cb_arg,
unsigned long *nr_to_walk)
{
- struct list_lru_node *nlru = &lru->node[nid];
- unsigned long ret;
-
- spin_lock_irq(&nlru->lock);
- ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate,
- cb_arg, nr_to_walk);
- spin_unlock_irq(&nlru->lock);
- return ret;
+ return __list_lru_walk_one(lru, nid, memcg, isolate,
+ cb_arg, nr_to_walk, true);
}
unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
@@ -332,16 +361,21 @@ unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
#ifdef CONFIG_MEMCG
if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
struct list_lru_memcg *mlru;
+ struct mem_cgroup *memcg;
unsigned long index;
xa_for_each(&lru->xa, index, mlru) {
- struct list_lru_node *nlru = &lru->node[nid];
-
- spin_lock(&nlru->lock);
- isolated += __list_lru_walk_one(lru, nid, index,
+ rcu_read_lock();
+ memcg = mem_cgroup_from_id(index);
+ if (!mem_cgroup_tryget(memcg)) {
+ rcu_read_unlock();
+ continue;
+ }
+ rcu_read_unlock();
+ isolated += __list_lru_walk_one(lru, nid, memcg,
isolate, cb_arg,
- nr_to_walk);
- spin_unlock(&nlru->lock);
+ nr_to_walk, false);
+ mem_cgroup_put(memcg);
if (*nr_to_walk <= 0)
break;
@@ -353,14 +387,19 @@ unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
}
EXPORT_SYMBOL_GPL(list_lru_walk_node);
-static void init_one_lru(struct list_lru_one *l)
+static void init_one_lru(struct list_lru *lru, struct list_lru_one *l)
{
INIT_LIST_HEAD(&l->list);
+ spin_lock_init(&l->lock);
l->nr_items = 0;
+#ifdef CONFIG_LOCKDEP
+ if (lru->key)
+ lockdep_set_class(&l->lock, lru->key);
+#endif
}
#ifdef CONFIG_MEMCG
-static struct list_lru_memcg *memcg_init_list_lru_one(gfp_t gfp)
+static struct list_lru_memcg *memcg_init_list_lru_one(struct list_lru *lru, gfp_t gfp)
{
int nid;
struct list_lru_memcg *mlru;
@@ -370,7 +409,7 @@ static struct list_lru_memcg *memcg_init_list_lru_one(gfp_t gfp)
return NULL;
for_each_node(nid)
- init_one_lru(&mlru->node[nid]);
+ init_one_lru(lru, &mlru->node[nid]);
return mlru;
}
@@ -398,28 +437,27 @@ static void memcg_destroy_list_lru(struct list_lru *lru)
xas_unlock_irq(&xas);
}
-static void memcg_reparent_list_lru_node(struct list_lru *lru, int nid,
- struct list_lru_one *src,
- struct mem_cgroup *dst_memcg)
+static void memcg_reparent_list_lru_one(struct list_lru *lru, int nid,
+ struct list_lru_one *src,
+ struct mem_cgroup *dst_memcg)
{
- struct list_lru_node *nlru = &lru->node[nid];
+ int dst_idx = dst_memcg->kmemcg_id;
struct list_lru_one *dst;
- /*
- * Since list_lru_{add,del} may be called under an IRQ-safe lock,
- * we have to use IRQ-safe primitives here to avoid deadlock.
- */
- spin_lock_irq(&nlru->lock);
- dst = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(dst_memcg));
+ spin_lock_irq(&src->lock);
+ dst = list_lru_from_memcg_idx(lru, nid, dst_idx);
+ spin_lock_nested(&dst->lock, SINGLE_DEPTH_NESTING);
list_splice_init(&src->list, &dst->list);
-
if (src->nr_items) {
dst->nr_items += src->nr_items;
set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
- src->nr_items = 0;
}
- spin_unlock_irq(&nlru->lock);
+ /* Mark the list_lru_one dead */
+ src->nr_items = LONG_MIN;
+
+ spin_unlock(&dst->lock);
+ spin_unlock_irq(&src->lock);
}
void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent)
@@ -448,7 +486,7 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren
* safe to reparent.
*/
for_each_node(i)
- memcg_reparent_list_lru_node(lru, i, &mlru->node[i], parent);
+ memcg_reparent_list_lru_one(lru, i, &mlru->node[i], parent);
/*
* Here all list_lrus corresponding to the cgroup are guaranteed
@@ -497,7 +535,7 @@ int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
parent = parent_mem_cgroup(pos);
}
- mlru = memcg_init_list_lru_one(gfp);
+ mlru = memcg_init_list_lru_one(lru, gfp);
if (!mlru)
return -ENOMEM;
xas_set(&xas, pos->kmemcg_id);
@@ -544,14 +582,8 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct shrinker *shr
if (!lru->node)
return -ENOMEM;
- for_each_node(i) {
- spin_lock_init(&lru->node[i].lock);
-#ifdef CONFIG_LOCKDEP
- if (lru->key)
- lockdep_set_class(&lru->node[i].lock, lru->key);
-#endif
- init_one_lru(&lru->node[i].lru);
- }
+ for_each_node(i)
+ init_one_lru(lru, &lru->node[i].lru);
memcg_init_list_lru(lru, memcg_aware);
list_lru_register(lru);
@@ -3098,8 +3098,13 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
if (!parent)
parent = root_mem_cgroup;
- memcg_reparent_objcgs(memcg, parent);
memcg_reparent_list_lrus(memcg, parent);
+
+ /*
+ * Objcg's reparenting must be after list_lru's, make sure list_lru
+ * helpers won't use parent's list_lru until child is drained.
+ */
+ memcg_reparent_objcgs(memcg, parent);
}
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -777,7 +777,6 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
ret = LRU_REMOVED_RETRY;
out:
cond_resched();
- spin_lock_irq(lru_lock);
return ret;
}
@@ -705,9 +705,9 @@ static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry)
* Note that it is safe to use rcu_read_lock() here, even in the face of
* concurrent memcg offlining:
*
- * 1. list_lru_add() is called before list_lru_memcg is erased. The
+ * 1. list_lru_add() is called before list_lru_one is dead. The
* new entry will be reparented to memcg's parent's list_lru.
- * 2. list_lru_add() is called after list_lru_memcg is erased. The
+ * 2. list_lru_add() is called after list_lru_one is dead. The
* new entry will be added directly to memcg's parent's list_lru.
*
* Similar reasoning holds for list_lru_del().
@@ -1172,7 +1172,6 @@ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_o
zswap_written_back_pages++;
}
- spin_lock(lock);
return ret;
}