diff mbox series

[1/6] mm, memcg: Prevent memory.high load/store tearing

Message ID 2f66f7038ed1d4688e59de72b627ae0ea52efa83.1584034301.git.chris@chrisdown.name (mailing list archive)
State New, archived
Headers show
Series mm, memcg: cgroup v2 tunable load/store tearing fixes | expand

Commit Message

Chris Down March 12, 2020, 5:32 p.m. UTC
A mem_cgroup's high attribute can be concurrently set at the same time
as we are trying to read it -- for example, if we are in
memory_high_write at the same time as we are trying to do high reclaim.

Signed-off-by: Chris Down <chris@chrisdown.name>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Roman Gushchin <guro@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-mm@kvack.org
Cc: cgroups@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kernel-team@fb.com
---
 mm/memcontrol.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

Comments

Michal Hocko March 16, 2020, 2:54 p.m. UTC | #1
On Thu 12-03-20 17:32:51, Chris Down wrote:
> A mem_cgroup's high attribute can be concurrently set at the same time
> as we are trying to read it -- for example, if we are in
> memory_high_write at the same time as we are trying to do high reclaim.

I assume this is a replace all kinda patch because css_alloc shouldn't
really be a subject to races. I am not sure about css_reset but it
sounds like a safe as well.

That being said I do not object because this cannot be harmful but it
would be nice to mention that in the changelog just in case somebody
wonders about this in future.
 
> Signed-off-by: Chris Down <chris@chrisdown.name>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Roman Gushchin <guro@fb.com>
> Cc: Tejun Heo <tj@kernel.org>
> Cc: linux-mm@kvack.org
> Cc: cgroups@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: kernel-team@fb.com

Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  mm/memcontrol.c | 13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 63bb6a2aab81..d32d3c0a16d4 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2228,7 +2228,7 @@ static void reclaim_high(struct mem_cgroup *memcg,
>  			 gfp_t gfp_mask)
>  {
>  	do {
> -		if (page_counter_read(&memcg->memory) <= memcg->high)
> +		if (page_counter_read(&memcg->memory) <= READ_ONCE(memcg->high))
>  			continue;
>  		memcg_memory_event(memcg, MEMCG_HIGH);
>  		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
> @@ -2545,7 +2545,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
>  	 * reclaim, the cost of mismatch is negligible.
>  	 */
>  	do {
> -		if (page_counter_read(&memcg->memory) > memcg->high) {
> +		if (page_counter_read(&memcg->memory) > READ_ONCE(memcg->high)) {
>  			/* Don't bother a random interrupted task */
>  			if (in_interrupt()) {
>  				schedule_work(&memcg->high_work);
> @@ -4257,7 +4257,8 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
>  	*pheadroom = PAGE_COUNTER_MAX;
>  
>  	while ((parent = parent_mem_cgroup(memcg))) {
> -		unsigned long ceiling = min(memcg->memory.max, memcg->high);
> +		unsigned long ceiling = min(memcg->memory.max,
> +					    READ_ONCE(memcg->high));
>  		unsigned long used = page_counter_read(&memcg->memory);
>  
>  		*pheadroom = min(*pheadroom, ceiling - min(ceiling, used));
> @@ -4978,7 +4979,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
>  	if (!memcg)
>  		return ERR_PTR(error);
>  
> -	memcg->high = PAGE_COUNTER_MAX;
> +	WRITE_ONCE(memcg->high, PAGE_COUNTER_MAX);
>  	memcg->soft_limit = PAGE_COUNTER_MAX;
>  	if (parent) {
>  		memcg->swappiness = mem_cgroup_swappiness(parent);
> @@ -5131,7 +5132,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
>  	page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX);
>  	page_counter_set_min(&memcg->memory, 0);
>  	page_counter_set_low(&memcg->memory, 0);
> -	memcg->high = PAGE_COUNTER_MAX;
> +	WRITE_ONCE(memcg->high, PAGE_COUNTER_MAX);
>  	memcg->soft_limit = PAGE_COUNTER_MAX;
>  	memcg_wb_domain_size_changed(memcg);
>  }
> @@ -5947,7 +5948,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
>  	if (err)
>  		return err;
>  
> -	memcg->high = high;
> +	WRITE_ONCE(memcg->high, high);
>  
>  	for (;;) {
>  		unsigned long nr_pages = page_counter_read(&memcg->memory);
> -- 
> 2.25.1
>
diff mbox series

Patch

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 63bb6a2aab81..d32d3c0a16d4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2228,7 +2228,7 @@  static void reclaim_high(struct mem_cgroup *memcg,
 			 gfp_t gfp_mask)
 {
 	do {
-		if (page_counter_read(&memcg->memory) <= memcg->high)
+		if (page_counter_read(&memcg->memory) <= READ_ONCE(memcg->high))
 			continue;
 		memcg_memory_event(memcg, MEMCG_HIGH);
 		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
@@ -2545,7 +2545,7 @@  static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	 * reclaim, the cost of mismatch is negligible.
 	 */
 	do {
-		if (page_counter_read(&memcg->memory) > memcg->high) {
+		if (page_counter_read(&memcg->memory) > READ_ONCE(memcg->high)) {
 			/* Don't bother a random interrupted task */
 			if (in_interrupt()) {
 				schedule_work(&memcg->high_work);
@@ -4257,7 +4257,8 @@  void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
 	*pheadroom = PAGE_COUNTER_MAX;
 
 	while ((parent = parent_mem_cgroup(memcg))) {
-		unsigned long ceiling = min(memcg->memory.max, memcg->high);
+		unsigned long ceiling = min(memcg->memory.max,
+					    READ_ONCE(memcg->high));
 		unsigned long used = page_counter_read(&memcg->memory);
 
 		*pheadroom = min(*pheadroom, ceiling - min(ceiling, used));
@@ -4978,7 +4979,7 @@  mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	if (!memcg)
 		return ERR_PTR(error);
 
-	memcg->high = PAGE_COUNTER_MAX;
+	WRITE_ONCE(memcg->high, PAGE_COUNTER_MAX);
 	memcg->soft_limit = PAGE_COUNTER_MAX;
 	if (parent) {
 		memcg->swappiness = mem_cgroup_swappiness(parent);
@@ -5131,7 +5132,7 @@  static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
 	page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX);
 	page_counter_set_min(&memcg->memory, 0);
 	page_counter_set_low(&memcg->memory, 0);
-	memcg->high = PAGE_COUNTER_MAX;
+	WRITE_ONCE(memcg->high, PAGE_COUNTER_MAX);
 	memcg->soft_limit = PAGE_COUNTER_MAX;
 	memcg_wb_domain_size_changed(memcg);
 }
@@ -5947,7 +5948,7 @@  static ssize_t memory_high_write(struct kernfs_open_file *of,
 	if (err)
 		return err;
 
-	memcg->high = high;
+	WRITE_ONCE(memcg->high, high);
 
 	for (;;) {
 		unsigned long nr_pages = page_counter_read(&memcg->memory);