diff mbox series

[3/6] mm, memcg: Prevent memory.low load/store tearing

Message ID 448206f44b0fa7be9dad2ca2601d2bcb2c0b7844.1584034301.git.chris@chrisdown.name (mailing list archive)
State New, archived
Headers show
Series mm, memcg: cgroup v2 tunable load/store tearing fixes | expand

Commit Message

Chris Down March 12, 2020, 5:33 p.m. UTC
This can be set concurrently with reads, which may cause the wrong value
to be propagated.

Signed-off-by: Chris Down <chris@chrisdown.name>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Roman Gushchin <guro@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-mm@kvack.org
Cc: cgroups@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kernel-team@fb.com
---
 mm/memcontrol.c   | 4 ++--
 mm/page_counter.c | 9 ++++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

Comments

Michal Hocko March 16, 2020, 2:57 p.m. UTC | #1
On Thu 12-03-20 17:33:01, Chris Down wrote:
> This can be set concurrently with reads, which may cause the wrong value
> to be propagated.
> 
> Signed-off-by: Chris Down <chris@chrisdown.name>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Roman Gushchin <guro@fb.com>
> Cc: Tejun Heo <tj@kernel.org>
> Cc: linux-mm@kvack.org
> Cc: cgroups@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: kernel-team@fb.com

Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  mm/memcontrol.c   | 4 ++--
>  mm/page_counter.c | 9 ++++++---
>  2 files changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index aca2964ea494..c85a304fa4a1 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -6262,7 +6262,7 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
>  		return MEMCG_PROT_NONE;
>  
>  	emin = memcg->memory.min;
> -	elow = memcg->memory.low;
> +	elow = READ_ONCE(memcg->memory.low);
>  
>  	parent = parent_mem_cgroup(memcg);
>  	/* No parent means a non-hierarchical mode on v1 memcg */
> @@ -6291,7 +6291,7 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
>  	if (elow && parent_elow) {
>  		unsigned long low_usage, siblings_low_usage;
>  
> -		low_usage = min(usage, memcg->memory.low);
> +		low_usage = min(usage, READ_ONCE(memcg->memory.low));
>  		siblings_low_usage = atomic_long_read(
>  			&parent->memory.children_low_usage);
>  
> diff --git a/mm/page_counter.c b/mm/page_counter.c
> index 50184929b61f..18b7f779f2e2 100644
> --- a/mm/page_counter.c
> +++ b/mm/page_counter.c
> @@ -17,6 +17,7 @@ static void propagate_protected_usage(struct page_counter *c,
>  				      unsigned long usage)
>  {
>  	unsigned long protected, old_protected;
> +	unsigned long low;
>  	long delta;
>  
>  	if (!c->parent)
> @@ -34,8 +35,10 @@ static void propagate_protected_usage(struct page_counter *c,
>  			atomic_long_add(delta, &c->parent->children_min_usage);
>  	}
>  
> -	if (c->low || atomic_long_read(&c->low_usage)) {
> -		if (usage <= c->low)
> +	low = READ_ONCE(c->low);
> +
> +	if (low || atomic_long_read(&c->low_usage)) {
> +		if (usage <= low)
>  			protected = usage;
>  		else
>  			protected = 0;
> @@ -231,7 +234,7 @@ void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
>  {
>  	struct page_counter *c;
>  
> -	counter->low = nr_pages;
> +	WRITE_ONCE(counter->low, nr_pages);
>  
>  	for (c = counter; c; c = c->parent)
>  		propagate_protected_usage(c, atomic_long_read(&c->usage));
> -- 
> 2.25.1
>
Michal Koutný June 12, 2020, 5:03 p.m. UTC | #2
Hello.

I see suspicious asymmetry, in the current mainline:
>	WRITE_ONCE(memcg->memory.emin, effective_protection(usage, parent_usage,
>			READ_ONCE(memcg->memory.min),
>			READ_ONCE(parent->memory.emin),
>			atomic_long_read(&parent->memory.children_min_usage)));
>
>	WRITE_ONCE(memcg->memory.elow, effective_protection(usage, parent_usage,
>			memcg->memory.low, READ_ONCE(parent->memory.elow),
>			atomic_long_read(&parent->memory.children_low_usage)));

On Thu, Mar 12, 2020 at 05:33:01PM +0000, Chris Down <chris@chrisdown.name> wrote:
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index aca2964ea494..c85a304fa4a1 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -6262,7 +6262,7 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
>  		return MEMCG_PROT_NONE;
>  
>  	emin = memcg->memory.min;
> -	elow = memcg->memory.low;
> +	elow = READ_ONCE(memcg->memory.low);
>  
>  	parent = parent_mem_cgroup(memcg);
>  	/* No parent means a non-hierarchical mode on v1 memcg */
> @@ -6291,7 +6291,7 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
>  	if (elow && parent_elow) {
>  		unsigned long low_usage, siblings_low_usage;
>  
> -		low_usage = min(usage, memcg->memory.low);
> +		low_usage = min(usage, READ_ONCE(memcg->memory.low));
>  		siblings_low_usage = atomic_long_read(
>  			&parent->memory.children_low_usage);
Is it possible that these hunks were lost during rebase/merge?

IMHO it should apply as:

-- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6428,7 +6428,8 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
                        atomic_long_read(&parent->memory.children_min_usage)));

        WRITE_ONCE(memcg->memory.elow, effective_protection(usage, parent_usage,
-                       memcg->memory.low, READ_ONCE(parent->memory.elow),
+                       READ_ONCE(memcg->memory.low),
+                       READ_ONCE(parent->memory.elow),
                        atomic_long_read(&parent->memory.children_low_usage)));

 out:


Michal
Chris Down June 12, 2020, 5:13 p.m. UTC | #3
Hi Michal,

Good catch! Andrew and I must have missed these when massaging the commits with 
other stuff in -mm, which is totally understandable considering the amount of 
places being touched by this and other patch series at the same time. Just goes 
to show how complex it can be sometimes, since I even double checked these and 
didn't see that missed hunk :-)

The good news is that these are belt-and-suspenders: this avoids a rare 
theoretical case, not something likely to be practically dangerous. But yes, we 
should fix it up. Since the commit is already out, I'll just submit a new one.

Thanks for the report!

Chris
diff mbox series

Patch

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index aca2964ea494..c85a304fa4a1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6262,7 +6262,7 @@  enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
 		return MEMCG_PROT_NONE;
 
 	emin = memcg->memory.min;
-	elow = memcg->memory.low;
+	elow = READ_ONCE(memcg->memory.low);
 
 	parent = parent_mem_cgroup(memcg);
 	/* No parent means a non-hierarchical mode on v1 memcg */
@@ -6291,7 +6291,7 @@  enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
 	if (elow && parent_elow) {
 		unsigned long low_usage, siblings_low_usage;
 
-		low_usage = min(usage, memcg->memory.low);
+		low_usage = min(usage, READ_ONCE(memcg->memory.low));
 		siblings_low_usage = atomic_long_read(
 			&parent->memory.children_low_usage);
 
diff --git a/mm/page_counter.c b/mm/page_counter.c
index 50184929b61f..18b7f779f2e2 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -17,6 +17,7 @@  static void propagate_protected_usage(struct page_counter *c,
 				      unsigned long usage)
 {
 	unsigned long protected, old_protected;
+	unsigned long low;
 	long delta;
 
 	if (!c->parent)
@@ -34,8 +35,10 @@  static void propagate_protected_usage(struct page_counter *c,
 			atomic_long_add(delta, &c->parent->children_min_usage);
 	}
 
-	if (c->low || atomic_long_read(&c->low_usage)) {
-		if (usage <= c->low)
+	low = READ_ONCE(c->low);
+
+	if (low || atomic_long_read(&c->low_usage)) {
+		if (usage <= low)
 			protected = usage;
 		else
 			protected = 0;
@@ -231,7 +234,7 @@  void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
 {
 	struct page_counter *c;
 
-	counter->low = nr_pages;
+	WRITE_ONCE(counter->low, nr_pages);
 
 	for (c = counter; c; c = c->parent)
 		propagate_protected_usage(c, atomic_long_read(&c->usage));