diff mbox series

[v2] memcg: charge before adding to swapcache on swapin

Message ID 20210223055505.2594953-1-shakeelb@google.com (mailing list archive)
State New, archived
Headers show
Series [v2] memcg: charge before adding to swapcache on swapin | expand

Commit Message

Shakeel Butt Feb. 23, 2021, 5:55 a.m. UTC
Currently the kernel adds the page, allocated for swapin, to the
swapcache before charging the page. This is fine but now we want a
per-memcg swapcache stat which is essential for folks who wants to
transparently migrate from cgroup v1's memsw to cgroup v2's memory and
swap counters. In addition charging a page before exposing it to other
parts of the kernel is a step in the right direction.

To correctly maintain the per-memcg swapcache stat, this patch has
adopted to charge the page before adding it to swapcache. One
challenge in this option is the failure case of add_to_swap_cache() on
which we need to undo the mem_cgroup_charge(). Specifically undoing
mem_cgroup_uncharge_swap() is not simple.

To resolve the issue, this patch introduces transaction like interface
to charge a page for swapin. The function mem_cgroup_charge_swapin_page()
initiates the charging of the page and mem_cgroup_finish_swapin_page()
completes the charging process. So, the kernel starts the charging
process of the page for swapin with mem_cgroup_charge_swapin_page(),
adds the page to the swapcache and on success completes the charging
process with mem_cgroup_finish_swapin_page().

Signed-off-by: Shakeel Butt <shakeelb@google.com>
---
Changes since v1:
- Removes __GFP_NOFAIL and introduced transaction interface for charging
  (suggested by Johannes)
- Updated the commit message

 include/linux/memcontrol.h |  14 +++++
 mm/memcontrol.c            | 116 +++++++++++++++++++++++--------------
 mm/memory.c                |  14 ++---
 mm/swap_state.c            |  11 ++--
 4 files changed, 97 insertions(+), 58 deletions(-)

Comments

kernel test robot Feb. 23, 2021, 6:53 a.m. UTC | #1
Hi Shakeel,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on next-20210222]
[cannot apply to linus/master hnaz-linux-mm/master v5.11 v5.11-rc7 v5.11-rc6 v5.11]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Shakeel-Butt/memcg-charge-before-adding-to-swapcache-on-swapin/20210223-135711
base:    37dfbfbdca66834bc0f64ec9b35e09ac6c8898da
config: x86_64-randconfig-m031-20210223 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/7ad6fb47580886394809b563b7476954a35f3054
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Shakeel-Butt/memcg-charge-before-adding-to-swapcache-on-swapin/20210223-135711
        git checkout 7ad6fb47580886394809b563b7476954a35f3054
        # save the attached .config to linux build tree
        make W=1 ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from include/linux/swap.h:9,
                    from include/linux/suspend.h:5,
                    from arch/x86/kernel/asm-offsets.c:13:
>> include/linux/memcontrol.h:1149:1: error: expected identifier or '(' before '{' token
    1149 | {
         | ^
   include/linux/memcontrol.h:1147:19: warning: 'mem_cgroup_charge_swapin_page' declared 'static' but never defined [-Wunused-function]
    1147 | static inline int mem_cgroup_charge_swapin_page(struct page *page,
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
   In file included from include/linux/swap.h:9,
                    from include/linux/suspend.h:5,
                    from arch/x86/kernel/asm-offsets.c:13:
>> include/linux/memcontrol.h:1149:1: error: expected identifier or '(' before '{' token
    1149 | {
         | ^
   include/linux/memcontrol.h:1147:19: warning: 'mem_cgroup_charge_swapin_page' declared 'static' but never defined [-Wunused-function]
    1147 | static inline int mem_cgroup_charge_swapin_page(struct page *page,
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   make[2]: *** [scripts/Makefile.build:117: arch/x86/kernel/asm-offsets.s] Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [Makefile:1228: prepare0] Error 2
   make[1]: Target 'modules_prepare' not remade because of errors.
   make: *** [Makefile:185: __sub-make] Error 2
   make: Target 'modules_prepare' not remade because of errors.
--
   In file included from include/linux/swap.h:9,
                    from include/linux/suspend.h:5,
                    from arch/x86/kernel/asm-offsets.c:13:
>> include/linux/memcontrol.h:1149:1: error: expected identifier or '(' before '{' token
    1149 | {
         | ^
   include/linux/memcontrol.h:1147:19: warning: 'mem_cgroup_charge_swapin_page' declared 'static' but never defined [-Wunused-function]
    1147 | static inline int mem_cgroup_charge_swapin_page(struct page *page,
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   make[2]: *** [scripts/Makefile.build:117: arch/x86/kernel/asm-offsets.s] Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [Makefile:1228: prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:185: __sub-make] Error 2
   make: Target 'prepare' not remade because of errors.


vim +1149 include/linux/memcontrol.h

  1146	
  1147	static inline int mem_cgroup_charge_swapin_page(struct page *page,
  1148				struct mm_struct *mm, gfp_t gfp, swp_entry_t entry);
> 1149	{
  1150		return 0;
  1151	}
  1152	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot Feb. 23, 2021, 7:12 a.m. UTC | #2
Hi Shakeel,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on next-20210222]
[cannot apply to linus/master hnaz-linux-mm/master v5.11 v5.11-rc7 v5.11-rc6 v5.11]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Shakeel-Butt/memcg-charge-before-adding-to-swapcache-on-swapin/20210223-135711
base:    37dfbfbdca66834bc0f64ec9b35e09ac6c8898da
config: arm-colibri_pxa300_defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/7ad6fb47580886394809b563b7476954a35f3054
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Shakeel-Butt/memcg-charge-before-adding-to-swapcache-on-swapin/20210223-135711
        git checkout 7ad6fb47580886394809b563b7476954a35f3054
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arm 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from include/linux/swap.h:9,
                    from mm/swap_state.c:13:
   include/linux/memcontrol.h:1149:1: error: expected identifier or '(' before '{' token
    1149 | {
         | ^
>> include/linux/memcontrol.h:1147:19: warning: 'mem_cgroup_charge_swapin_page' used but never defined
    1147 | static inline int mem_cgroup_charge_swapin_page(struct page *page,
         |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~


vim +/mem_cgroup_charge_swapin_page +1147 include/linux/memcontrol.h

  1146	
> 1147	static inline int mem_cgroup_charge_swapin_page(struct page *page,
  1148				struct mm_struct *mm, gfp_t gfp, swp_entry_t entry);
> 1149	{
  1150		return 0;
  1151	}
  1152	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Shakeel Butt Feb. 23, 2021, 4:39 p.m. UTC | #3
On Mon, Feb 22, 2021 at 9:55 PM Shakeel Butt <shakeelb@google.com> wrote:
[snip]
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -596,6 +596,9 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
>  }
>
>  int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask);
> +int mem_cgroup_charge_swapin_page(struct page *page, struct mm_struct *mm,
> +                                 gfp_t gfp, swp_entry_t entry);
> +void mem_cgroup_finish_swapin_page(struct page *page, swp_entry_t entry);
>
>  void mem_cgroup_uncharge(struct page *page);
>  void mem_cgroup_uncharge_list(struct list_head *page_list);
> @@ -1141,6 +1144,17 @@ static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
>         return 0;
>  }
>
> +static inline int mem_cgroup_charge_swapin_page(struct page *page,
> +                       struct mm_struct *mm, gfp_t gfp, swp_entry_t entry);

I didn't build-test the !CONFIG_MEMCG config and missed this
semicolon. Andrew, let me know if you want me to send a new version.

> +{
> +       return 0;
> +}
> +
diff mbox series

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e6dc793d587d..585d96bda4f5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -596,6 +596,9 @@  static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
 }
 
 int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask);
+int mem_cgroup_charge_swapin_page(struct page *page, struct mm_struct *mm,
+				  gfp_t gfp, swp_entry_t entry);
+void mem_cgroup_finish_swapin_page(struct page *page, swp_entry_t entry);
 
 void mem_cgroup_uncharge(struct page *page);
 void mem_cgroup_uncharge_list(struct list_head *page_list);
@@ -1141,6 +1144,17 @@  static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
 	return 0;
 }
 
+static inline int mem_cgroup_charge_swapin_page(struct page *page,
+			struct mm_struct *mm, gfp_t gfp, swp_entry_t entry);
+{
+	return 0;
+}
+
+static inline void mem_cgroup_finish_swapin_page(struct page *page,
+						 swp_entry_t entry)
+{
+}
+
 static inline void mem_cgroup_uncharge(struct page *page)
 {
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2db2aeac8a9e..226b7bccb44c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6690,6 +6690,27 @@  void mem_cgroup_calculate_protection(struct mem_cgroup *root,
 			atomic_long_read(&parent->memory.children_low_usage)));
 }
 
+static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg,
+			       gfp_t gfp)
+{
+	unsigned int nr_pages = thp_nr_pages(page);
+	int ret;
+
+	ret = try_charge(memcg, gfp, nr_pages);
+	if (ret)
+		goto out;
+
+	css_get(&memcg->css);
+	commit_charge(page, memcg);
+
+	local_irq_disable();
+	mem_cgroup_charge_statistics(memcg, page, nr_pages);
+	memcg_check_events(memcg, page);
+	local_irq_enable();
+out:
+	return ret;
+}
+
 /**
  * mem_cgroup_charge - charge a newly allocated page to a cgroup
  * @page: page to charge
@@ -6699,55 +6720,70 @@  void mem_cgroup_calculate_protection(struct mem_cgroup *root,
  * Try to charge @page to the memcg that @mm belongs to, reclaiming
  * pages according to @gfp_mask if necessary.
  *
+ * Do not use this for pages allocated for swapin.
+ *
  * Returns 0 on success. Otherwise, an error code is returned.
  */
 int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 {
-	unsigned int nr_pages = thp_nr_pages(page);
-	struct mem_cgroup *memcg = NULL;
-	int ret = 0;
+	struct mem_cgroup *memcg;
+	int ret;
 
 	if (mem_cgroup_disabled())
-		goto out;
+		return 0;
 
-	if (PageSwapCache(page)) {
-		swp_entry_t ent = { .val = page_private(page), };
-		unsigned short id;
+	memcg = get_mem_cgroup_from_mm(mm);
+	ret = __mem_cgroup_charge(page, memcg, gfp_mask);
+	css_put(&memcg->css);
 
-		/*
-		 * Every swap fault against a single page tries to charge the
-		 * page, bail as early as possible.  shmem_unuse() encounters
-		 * already charged pages, too.  page and memcg binding is
-		 * protected by the page lock, which serializes swap cache
-		 * removal, which in turn serializes uncharging.
-		 */
-		VM_BUG_ON_PAGE(!PageLocked(page), page);
-		if (page_memcg(compound_head(page)))
-			goto out;
+	return ret;
+}
 
-		id = lookup_swap_cgroup_id(ent);
-		rcu_read_lock();
-		memcg = mem_cgroup_from_id(id);
-		if (memcg && !css_tryget_online(&memcg->css))
-			memcg = NULL;
-		rcu_read_unlock();
-	}
+/**
+ * mem_cgroup_charge_swapin_page - charge a newly allocated page for swapin
+ * @page: page to charge
+ * @mm: mm context of the victim
+ * @gfp: reclaim mode
+ * @entry: swap entry for which the page is allocated
+ *
+ * This function marks the start of the transaction of charging the page for
+ * swapin. Complete the transaction with mem_cgroup_finish_swapin_page().
+ *
+ * Returns 0 on success. Otherwise, an error code is returned.
+ */
+int mem_cgroup_charge_swapin_page(struct page *page, struct mm_struct *mm,
+				  gfp_t gfp, swp_entry_t entry)
+{
+	struct mem_cgroup *memcg;
+	unsigned short id;
+	int ret;
 
-	if (!memcg)
-		memcg = get_mem_cgroup_from_mm(mm);
+	if (mem_cgroup_disabled())
+		return 0;
 
-	ret = try_charge(memcg, gfp_mask, nr_pages);
-	if (ret)
-		goto out_put;
+	id = lookup_swap_cgroup_id(entry);
+	rcu_read_lock();
+	memcg = mem_cgroup_from_id(id);
+	if (!memcg || !css_tryget_online(&memcg->css))
+		memcg = get_mem_cgroup_from_mm(mm);
+	rcu_read_unlock();
 
-	css_get(&memcg->css);
-	commit_charge(page, memcg);
+	ret = __mem_cgroup_charge(page, memcg, gfp);
 
-	local_irq_disable();
-	mem_cgroup_charge_statistics(memcg, page, nr_pages);
-	memcg_check_events(memcg, page);
-	local_irq_enable();
+	css_put(&memcg->css);
+	return ret;
+}
 
+/*
+ * mem_cgroup_finish_swapin_page - complete the swapin page charge transaction
+ * @page: page charged for swapin
+ * @entry: swap entry for which the page is charged
+ *
+ * This function completes the transaction of charging the page allocated for
+ * swapin.
+ */
+void mem_cgroup_finish_swapin_page(struct page *page, swp_entry_t entry)
+{
 	/*
 	 * Cgroup1's unified memory+swap counter has been charged with the
 	 * new swapcache page, finish the transfer by uncharging the swap
@@ -6760,20 +6796,14 @@  int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 	 * correspond 1:1 to page and swap slot lifetimes: we charge the
 	 * page to memory here, and uncharge swap when the slot is freed.
 	 */
-	if (do_memsw_account() && PageSwapCache(page)) {
-		swp_entry_t entry = { .val = page_private(page) };
+	if (!mem_cgroup_disabled() && do_memsw_account()) {
 		/*
 		 * The swap entry might not get freed for a long time,
 		 * let's not wait for it.  The page already received a
 		 * memory+swap charge, drop the swap entry duplicate.
 		 */
-		mem_cgroup_uncharge_swap(entry, nr_pages);
+		mem_cgroup_uncharge_swap(entry, thp_nr_pages(page));
 	}
-
-out_put:
-	css_put(&memcg->css);
-out:
-	return ret;
 }
 
 struct uncharge_gather {
diff --git a/mm/memory.c b/mm/memory.c
index c8e357627318..4cd3cd95bb70 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3307,21 +3307,15 @@  vm_fault_t do_swap_page(struct vm_fault *vmf)
 			page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
 							vmf->address);
 			if (page) {
-				int err;
-
 				__SetPageLocked(page);
 				__SetPageSwapBacked(page);
-				set_page_private(page, entry.val);
-
-				/* Tell memcg to use swap ownership records */
-				SetPageSwapCache(page);
-				err = mem_cgroup_charge(page, vma->vm_mm,
-							GFP_KERNEL);
-				ClearPageSwapCache(page);
-				if (err) {
+
+				if (mem_cgroup_charge_swapin_page(page,
+					vma->vm_mm, GFP_KERNEL, entry)) {
 					ret = VM_FAULT_OOM;
 					goto out_page;
 				}
+				mem_cgroup_finish_swapin_page(page, entry);
 
 				shadow = get_shadow_from_swap_cache(entry);
 				if (shadow)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3cdee7b11da9..27a7acbcf880 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -497,16 +497,16 @@  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 	__SetPageLocked(page);
 	__SetPageSwapBacked(page);
 
+	if (mem_cgroup_charge_swapin_page(page, NULL, gfp_mask, entry))
+		goto fail_unlock;
+
 	/* May fail (-ENOMEM) if XArray node allocation failed. */
 	if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) {
-		put_swap_page(page, entry);
+		mem_cgroup_uncharge(page);
 		goto fail_unlock;
 	}
 
-	if (mem_cgroup_charge(page, NULL, gfp_mask)) {
-		delete_from_swap_cache(page);
-		goto fail_unlock;
-	}
+	mem_cgroup_finish_swapin_page(page, entry);
 
 	if (shadow)
 		workingset_refault(page, shadow);
@@ -517,6 +517,7 @@  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 	return page;
 
 fail_unlock:
+	put_swap_page(page, entry);
 	unlock_page(page);
 	put_page(page);
 	return NULL;