Message ID | 20180622151221.28167-4-bigeasy@linutronix.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Jun 22, 2018 at 05:12:21PM +0200, Sebastian Andrzej Siewior wrote: > scan_shadow_nodes() is the only user of __list_lru_walk_one() which > disables interrupts before invoking it. The reason is that nlru->lock is > nesting inside IRQ-safe i_pages lock. Some functions unconditionally > acquire the lock with the _irq() suffix. > > __list_lru_walk_one() can't acquire the lock unconditionally with _irq() > suffix because it might invoke a callback which unlocks the nlru->lock > and invokes a sleeping function without enabling interrupts. > > Add an argument to __list_lru_init() which identifies wheather the > nlru->lock needs to be acquired with disabling interrupts or without. > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> > --- > include/linux/list_lru.h | 12 ++++++++---- > mm/list_lru.c | 14 ++++++++++---- > mm/workingset.c | 12 ++++-------- > 3 files changed, 22 insertions(+), 16 deletions(-) > > diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h > index 96def9d15b1b..c2161c3a1809 100644 > --- a/include/linux/list_lru.h > +++ b/include/linux/list_lru.h > @@ -51,18 +51,22 @@ struct list_lru_node { > > struct list_lru { > struct list_lru_node *node; > + bool lock_irq; > #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) > struct list_head list; > #endif > }; TBO I don't like this patch, because the new member of struct list_lru, lock_irq, has rather obscure meaning IMHO: it makes list_lru_walk disable irq before taking lru_lock, but at the same time list_lru_add and list_lru_del never do that, no matter whether lock_irq is true or false. That is, if a user of struct list_lru sets this flag, he's supposed to disable irq for list_lru_add/del by himself (mm/workingset does that). IMHO the code of mm/workingset is clear as it is. Since it is the only place where this flag is used, I'd rather leave it as is. > > void list_lru_destroy(struct list_lru *lru); > -int __list_lru_init(struct list_lru *lru, bool memcg_aware, > +int __list_lru_init(struct list_lru *lru, bool memcg_aware, bool lock_irq, > struct lock_class_key *key); > > -#define list_lru_init(lru) __list_lru_init((lru), false, NULL) > -#define list_lru_init_key(lru, key) __list_lru_init((lru), false, (key)) > -#define list_lru_init_memcg(lru) __list_lru_init((lru), true, NULL) > +#define list_lru_init(lru) __list_lru_init((lru), false, false, \ > + NULL) > +#define list_lru_init_key(lru, key) __list_lru_init((lru), false, false, \ > + (key)) > +#define list_lru_init_memcg(lru) __list_lru_init((lru), true, false, \ > + NULL) > > int memcg_update_all_list_lrus(int num_memcgs); > void memcg_drain_all_list_lrus(int src_idx, int dst_idx); > diff --git a/mm/list_lru.c b/mm/list_lru.c > index fcfb6c89ed47..1c49d48078e4 100644 > --- a/mm/list_lru.c > +++ b/mm/list_lru.c > @@ -204,7 +204,10 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, > struct list_head *item, *n; > unsigned long isolated = 0; > > - spin_lock(&nlru->lock); > + if (lru->lock_irq) > + spin_lock_irq(&nlru->lock); > + else > + spin_lock(&nlru->lock); > l = list_lru_from_memcg_idx(nlru, memcg_idx); > restart: > list_for_each_safe(item, n, &l->list) { > @@ -251,7 +254,10 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, > } > } > > - spin_unlock(&nlru->lock); > + if (lru->lock_irq) > + spin_unlock_irq(&nlru->lock); > + else > + spin_unlock(&nlru->lock); > return isolated; > } > > @@ -553,7 +559,7 @@ static void memcg_destroy_list_lru(struct list_lru *lru) > } > #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ > > -int __list_lru_init(struct list_lru *lru, bool memcg_aware, > +int __list_lru_init(struct list_lru *lru, bool memcg_aware, bool lock_irq, > struct lock_class_key *key) > { > int i; > @@ -580,7 +586,7 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware, > lru->node = NULL; > goto out; > } > - > + lru->lock_irq = lock_irq; > list_lru_register(lru); > out: > memcg_put_cache_ids(); > diff --git a/mm/workingset.c b/mm/workingset.c > index 529480c21f93..23ce00f48212 100644 > --- a/mm/workingset.c > +++ b/mm/workingset.c > @@ -480,13 +480,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, > static unsigned long scan_shadow_nodes(struct shrinker *shrinker, > struct shrink_control *sc) > { > - unsigned long ret; > - > - /* list_lru lock nests inside the IRQ-safe i_pages lock */ > - local_irq_disable(); > - ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL); > - local_irq_enable(); > - return ret; > + return list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, > + NULL); > } > > static struct shrinker workingset_shadow_shrinker = { > @@ -523,7 +518,8 @@ static int __init workingset_init(void) > pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", > timestamp_bits, max_order, bucket_order); > > - ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key); > + /* list_lru lock nests inside the IRQ-safe i_pages lock */ > + ret = __list_lru_init(&shadow_nodes, true, true, &shadow_nodes_key); > if (ret) > goto err; > ret = register_shrinker(&workingset_shadow_shrinker);
My intepretation of situtation is that Vladimir Davydon is fine patch #1 and #2 of the series [0] but dislikes the irq argument and struct member. It has been suggested to use list_lru_shrink_walk_irq() instead the approach I went on in "mm: list_lru: Add lock_irq member to __list_lru_init()". This series is based on the former two patches and introduces list_lru_shrink_walk_irq() (and makes the third patch of series obsolete). In patch 1-3 I tried a tiny cleanup so the different locking (spin_lock() vs spin_lock_irq()) is simply lifted to the caller of the function. [0] The patch mm: workingset: remove local_irq_disable() from count_shadow_nodes() and mm: workingset: make shadow_lru_isolate() use locking suffix Sebastian
> Reply-To: "[PATCH 0/4] mm/list_lru": add.list_lru_shrink_walk_irq@mail.linuxfoundation.org.and.use.it () Well that's messed up. On Tue, 3 Jul 2018 16:52:31 +0200 Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote: > My intepretation of situtation is that Vladimir Davydon is fine patch #1 > and #2 of the series [0] but dislikes the irq argument and struct > member. It has been suggested to use list_lru_shrink_walk_irq() instead > the approach I went on in "mm: list_lru: Add lock_irq member to > __list_lru_init()". > > This series is based on the former two patches and introduces > list_lru_shrink_walk_irq() (and makes the third patch of series > obsolete). > In patch 1-3 I tried a tiny cleanup so the different locking > (spin_lock() vs spin_lock_irq()) is simply lifted to the caller of the > function. > > [0] The patch > mm: workingset: remove local_irq_disable() from count_shadow_nodes() > and > mm: workingset: make shadow_lru_isolate() use locking suffix > This isn't a very informative [0/n] changelog. Some overall summary of the patchset's objective, behaviour, use cases, testing results, etc. I'm seeing significant conflicts with Kirill's "Improve shrink_slab() scalability (old complexity was O(n^2), new is O(n))" series, which I merged eight milliseconds ago. Kirill's patchset is large but fairly straightforward so I expect it's good for 4.18. So I suggest we leave things a week or more then please take a look at redoing this patchset on top of that work?
On 2018-07-03 14:14:29 [-0700], Andrew Morton wrote: > > > Reply-To: "[PATCH 0/4] mm/list_lru": add.list_lru_shrink_walk_irq@mail.linuxfoundation.org.and.use.it () > > Well that's messed up. indeed it is. This should get into Subject: > On Tue, 3 Jul 2018 16:52:31 +0200 Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote: > > > My intepretation of situtation is that Vladimir Davydon is fine patch #1 > > and #2 of the series [0] but dislikes the irq argument and struct > > member. It has been suggested to use list_lru_shrink_walk_irq() instead > > the approach I went on in "mm: list_lru: Add lock_irq member to > > __list_lru_init()". > > > > This series is based on the former two patches and introduces > > list_lru_shrink_walk_irq() (and makes the third patch of series > > obsolete). > > In patch 1-3 I tried a tiny cleanup so the different locking > > (spin_lock() vs spin_lock_irq()) is simply lifted to the caller of the > > function. > > > > [0] The patch > > mm: workingset: remove local_irq_disable() from count_shadow_nodes() > > and > > mm: workingset: make shadow_lru_isolate() use locking suffix > > > > This isn't a very informative [0/n] changelog. Some overall summary of > the patchset's objective, behaviour, use cases, testing results, etc. The patches should be threaded as a reply to 3/3 of the series so I assumed it was enough. And while Vladimir complained about 2/3 and 3/3 the discussion went on in 2/3 where he suggested to go on with the _irq function. And testing, well with and without RT the function was invoked as part of swapping (allocating memory until OOM) without complains. > I'm seeing significant conflicts with Kirill's "Improve shrink_slab() > scalability (old complexity was O(n^2), new is O(n))" series, which I > merged eight milliseconds ago. Kirill's patchset is large but fairly > straightforward so I expect it's good for 4.18. So I suggest we leave > things a week or more then please take a look at redoing this patchset > on top of that work? If Vladimir is okay with to redo and nobody else complains then I could rebase these four patches on top of your tree next week. Sebastian
On Tue, Jul 03, 2018 at 11:44:29PM +0200, Sebastian Andrzej Siewior wrote: > On 2018-07-03 14:14:29 [-0700], Andrew Morton wrote: > > > > > Reply-To: "[PATCH 0/4] mm/list_lru": add.list_lru_shrink_walk_irq@mail.linuxfoundation.org.and.use.it () > > > > Well that's messed up. > > indeed it is. This should get into Subject: > > > On Tue, 3 Jul 2018 16:52:31 +0200 Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote: > > > > > My intepretation of situtation is that Vladimir Davydon is fine patch #1 > > > and #2 of the series [0] but dislikes the irq argument and struct > > > member. It has been suggested to use list_lru_shrink_walk_irq() instead > > > the approach I went on in "mm: list_lru: Add lock_irq member to > > > __list_lru_init()". > > > > > > This series is based on the former two patches and introduces > > > list_lru_shrink_walk_irq() (and makes the third patch of series > > > obsolete). > > > In patch 1-3 I tried a tiny cleanup so the different locking > > > (spin_lock() vs spin_lock_irq()) is simply lifted to the caller of the > > > function. > > > > > > [0] The patch > > > mm: workingset: remove local_irq_disable() from count_shadow_nodes() > > > and > > > mm: workingset: make shadow_lru_isolate() use locking suffix > > > > > > > This isn't a very informative [0/n] changelog. Some overall summary of > > the patchset's objective, behaviour, use cases, testing results, etc. > > The patches should be threaded as a reply to 3/3 of the series so I > assumed it was enough. And while Vladimir complained about 2/3 and 3/3 > the discussion went on in 2/3 where he suggested to go on with the _irq > function. And testing, well with and without RT the function was invoked > as part of swapping (allocating memory until OOM) without complains. > > > I'm seeing significant conflicts with Kirill's "Improve shrink_slab() > > scalability (old complexity was O(n^2), new is O(n))" series, which I > > merged eight milliseconds ago. Kirill's patchset is large but fairly > > straightforward so I expect it's good for 4.18. So I suggest we leave > > things a week or more then please take a look at redoing this patchset > > on top of that work? > > If Vladimir is okay with to redo and nobody else complains then I could > rebase these four patches on top of your tree next week. IMHO this approach is more straightforward than the one with the per list_lru flag. For all patches, Reviewed-by: Vladimir Davydov <vdavydov.dev@gmail.com>
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 96def9d15b1b..c2161c3a1809 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -51,18 +51,22 @@ struct list_lru_node { struct list_lru { struct list_lru_node *node; + bool lock_irq; #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) struct list_head list; #endif }; void list_lru_destroy(struct list_lru *lru); -int __list_lru_init(struct list_lru *lru, bool memcg_aware, +int __list_lru_init(struct list_lru *lru, bool memcg_aware, bool lock_irq, struct lock_class_key *key); -#define list_lru_init(lru) __list_lru_init((lru), false, NULL) -#define list_lru_init_key(lru, key) __list_lru_init((lru), false, (key)) -#define list_lru_init_memcg(lru) __list_lru_init((lru), true, NULL) +#define list_lru_init(lru) __list_lru_init((lru), false, false, \ + NULL) +#define list_lru_init_key(lru, key) __list_lru_init((lru), false, false, \ + (key)) +#define list_lru_init_memcg(lru) __list_lru_init((lru), true, false, \ + NULL) int memcg_update_all_list_lrus(int num_memcgs); void memcg_drain_all_list_lrus(int src_idx, int dst_idx); diff --git a/mm/list_lru.c b/mm/list_lru.c index fcfb6c89ed47..1c49d48078e4 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -204,7 +204,10 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, struct list_head *item, *n; unsigned long isolated = 0; - spin_lock(&nlru->lock); + if (lru->lock_irq) + spin_lock_irq(&nlru->lock); + else + spin_lock(&nlru->lock); l = list_lru_from_memcg_idx(nlru, memcg_idx); restart: list_for_each_safe(item, n, &l->list) { @@ -251,7 +254,10 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, } } - spin_unlock(&nlru->lock); + if (lru->lock_irq) + spin_unlock_irq(&nlru->lock); + else + spin_unlock(&nlru->lock); return isolated; } @@ -553,7 +559,7 @@ static void memcg_destroy_list_lru(struct list_lru *lru) } #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ -int __list_lru_init(struct list_lru *lru, bool memcg_aware, +int __list_lru_init(struct list_lru *lru, bool memcg_aware, bool lock_irq, struct lock_class_key *key) { int i; @@ -580,7 +586,7 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware, lru->node = NULL; goto out; } - + lru->lock_irq = lock_irq; list_lru_register(lru); out: memcg_put_cache_ids(); diff --git a/mm/workingset.c b/mm/workingset.c index 529480c21f93..23ce00f48212 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -480,13 +480,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, static unsigned long scan_shadow_nodes(struct shrinker *shrinker, struct shrink_control *sc) { - unsigned long ret; - - /* list_lru lock nests inside the IRQ-safe i_pages lock */ - local_irq_disable(); - ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL); - local_irq_enable(); - return ret; + return list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, + NULL); } static struct shrinker workingset_shadow_shrinker = { @@ -523,7 +518,8 @@ static int __init workingset_init(void) pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", timestamp_bits, max_order, bucket_order); - ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key); + /* list_lru lock nests inside the IRQ-safe i_pages lock */ + ret = __list_lru_init(&shadow_nodes, true, true, &shadow_nodes_key); if (ret) goto err; ret = register_shrinker(&workingset_shadow_shrinker);
scan_shadow_nodes() is the only user of __list_lru_walk_one() which disables interrupts before invoking it. The reason is that nlru->lock is nesting inside IRQ-safe i_pages lock. Some functions unconditionally acquire the lock with the _irq() suffix. __list_lru_walk_one() can't acquire the lock unconditionally with _irq() suffix because it might invoke a callback which unlocks the nlru->lock and invokes a sleeping function without enabling interrupts. Add an argument to __list_lru_init() which identifies wheather the nlru->lock needs to be acquired with disabling interrupts or without. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- include/linux/list_lru.h | 12 ++++++++---- mm/list_lru.c | 14 ++++++++++---- mm/workingset.c | 12 ++++-------- 3 files changed, 22 insertions(+), 16 deletions(-)