diff mbox series

[v1,05/12] mm: thp: introduce folio_split_queue_lock{_irqsave}()

Message ID 20210814052519.86679-6-songmuchun@bytedance.com (mailing list archive)
State New
Headers show
Series Use obj_cgroup APIs to charge the LRU pages | expand

Commit Message

Muchun Song Aug. 14, 2021, 5:25 a.m. UTC
We should make thp deferred split queue lock safe when LRU pages
are reparented. Similar to folio_lruvec_lock{_irqsave, _irq}(), we
introduce folio_split_queue_lock{_irqsave}() to make the deferred
split queue lock easier to be reparented.

And in the next patch, we can use a similar approach (just like
lruvec lock does) to make thp deferred split queue lock safe when
the LRU pages reparented.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 mm/huge_memory.c | 93 +++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 69 insertions(+), 24 deletions(-)

Comments

kernel test robot Aug. 14, 2021, 8:22 a.m. UTC | #1
Hi Muchun,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on next-20210813]
[cannot apply to hnaz-linux-mm/master cgroup/for-next linus/master v5.14-rc5 v5.14-rc4 v5.14-rc3 v5.14-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Muchun-Song/Use-obj_cgroup-APIs-to-charge-the-LRU-pages/20210814-132844
base:    4b358aabb93a2c654cd1dcab1a25a589f6e2b153
config: s390-randconfig-r044-20210814 (attached as .config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 1f7b25ea76a925aca690da28de9d78db7ca99d0c)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install s390 cross compiling tool for clang build
        # apt-get install binutils-s390x-linux-gnu
        # https://github.com/0day-ci/linux/commit/3460bcf13b968edf6f4621c0e0dcde46500957e5
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Muchun-Song/Use-obj_cgroup-APIs-to-charge-the-LRU-pages/20210814-132844
        git checkout 3460bcf13b968edf6f4621c0e0dcde46500957e5
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=s390 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from mm/huge_memory.c:16:
   In file included from include/linux/rmap.h:12:
   In file included from include/linux/memcontrol.h:22:
   In file included from include/linux/writeback.h:14:
   In file included from include/linux/blk-cgroup.h:23:
   In file included from include/linux/blkdev.h:23:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/s390/include/asm/io.h:75:
   include/asm-generic/io.h:464:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __raw_readb(PCI_IOBASE + addr);
                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:477:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:36:59: note: expanded from macro '__le16_to_cpu'
   #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
                                                             ^
   include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
   #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
                                                        ^
   In file included from mm/huge_memory.c:16:
   In file included from include/linux/rmap.h:12:
   In file included from include/linux/memcontrol.h:22:
   In file included from include/linux/writeback.h:14:
   In file included from include/linux/blk-cgroup.h:23:
   In file included from include/linux/blkdev.h:23:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/s390/include/asm/io.h:75:
   include/asm-generic/io.h:490:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:34:59: note: expanded from macro '__le32_to_cpu'
   #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
                                                             ^
   include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
   #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
                                                        ^
   In file included from mm/huge_memory.c:16:
   In file included from include/linux/rmap.h:12:
   In file included from include/linux/memcontrol.h:22:
   In file included from include/linux/writeback.h:14:
   In file included from include/linux/blk-cgroup.h:23:
   In file included from include/linux/blkdev.h:23:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/s390/include/asm/io.h:75:
   include/asm-generic/io.h:501:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writeb(value, PCI_IOBASE + addr);
                               ~~~~~~~~~~ ^
   include/asm-generic/io.h:511:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:521:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:609:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsb(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:617:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsw(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:625:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsl(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:634:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesb(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
   include/asm-generic/io.h:643:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesw(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
   include/asm-generic/io.h:652:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesl(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
>> mm/huge_memory.c:2797:21: warning: variable 'memcg' set but not used [-Wunused-but-set-variable]
           struct mem_cgroup *memcg;
                              ^
   13 warnings generated.


vim +/memcg +2797 mm/huge_memory.c

  2793	
  2794	void deferred_split_huge_page(struct page *page)
  2795	{
  2796		struct deferred_split *ds_queue;
> 2797		struct mem_cgroup *memcg;
  2798		unsigned long flags;
  2799	
  2800		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
  2801	
  2802		/*
  2803		 * The try_to_unmap() in page reclaim path might reach here too,
  2804		 * this may cause a race condition to corrupt deferred split queue.
  2805		 * And, if page reclaim is already handling the same page, it is
  2806		 * unnecessary to handle it again in shrinker.
  2807		 *
  2808		 * Check PageSwapCache to determine if the page is being
  2809		 * handled by page reclaim since THP swap would add the page into
  2810		 * swap cache before calling try_to_unmap().
  2811		 */
  2812		if (PageSwapCache(page))
  2813			return;
  2814	
  2815		ds_queue = folio_split_queue_lock_irqsave(page_folio(page), &flags);
  2816		memcg = split_queue_memcg(ds_queue);
  2817		if (list_empty(page_deferred_list(page))) {
  2818			count_vm_event(THP_DEFERRED_SPLIT_PAGE);
  2819			list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
  2820			ds_queue->split_queue_len++;
  2821	#ifdef CONFIG_MEMCG
  2822			if (memcg)
  2823				set_shrinker_bit(memcg, page_to_nid(page),
  2824						 deferred_split_shrinker.id);
  2825	#endif
  2826		}
  2827		split_queue_unlock_irqrestore(ds_queue, flags);
  2828	}
  2829	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot Aug. 14, 2021, 10:38 a.m. UTC | #2
Hi Muchun,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on next-20210813]
[cannot apply to hnaz-linux-mm/master cgroup/for-next linus/master v5.14-rc5 v5.14-rc4 v5.14-rc3 v5.14-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Muchun-Song/Use-obj_cgroup-APIs-to-charge-the-LRU-pages/20210814-132844
base:    4b358aabb93a2c654cd1dcab1a25a589f6e2b153
config: sparc64-randconfig-p002-20210814 (attached as .config)
compiler: sparc64-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/3460bcf13b968edf6f4621c0e0dcde46500957e5
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Muchun-Song/Use-obj_cgroup-APIs-to-charge-the-LRU-pages/20210814-132844
        git checkout 3460bcf13b968edf6f4621c0e0dcde46500957e5
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross ARCH=sparc64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   mm/huge_memory.c: In function 'deferred_split_huge_page':
>> mm/huge_memory.c:2797:28: warning: variable 'memcg' set but not used [-Wunused-but-set-variable]
    2797 |         struct mem_cgroup *memcg;
         |                            ^~~~~


vim +/memcg +2797 mm/huge_memory.c

  2793	
  2794	void deferred_split_huge_page(struct page *page)
  2795	{
  2796		struct deferred_split *ds_queue;
> 2797		struct mem_cgroup *memcg;
  2798		unsigned long flags;
  2799	
  2800		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
  2801	
  2802		/*
  2803		 * The try_to_unmap() in page reclaim path might reach here too,
  2804		 * this may cause a race condition to corrupt deferred split queue.
  2805		 * And, if page reclaim is already handling the same page, it is
  2806		 * unnecessary to handle it again in shrinker.
  2807		 *
  2808		 * Check PageSwapCache to determine if the page is being
  2809		 * handled by page reclaim since THP swap would add the page into
  2810		 * swap cache before calling try_to_unmap().
  2811		 */
  2812		if (PageSwapCache(page))
  2813			return;
  2814	
  2815		ds_queue = folio_split_queue_lock_irqsave(page_folio(page), &flags);
  2816		memcg = split_queue_memcg(ds_queue);
  2817		if (list_empty(page_deferred_list(page))) {
  2818			count_vm_event(THP_DEFERRED_SPLIT_PAGE);
  2819			list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
  2820			ds_queue->split_queue_len++;
  2821	#ifdef CONFIG_MEMCG
  2822			if (memcg)
  2823				set_shrinker_bit(memcg, page_to_nid(page),
  2824						 deferred_split_shrinker.id);
  2825	#endif
  2826		}
  2827		split_queue_unlock_irqrestore(ds_queue, flags);
  2828	}
  2829	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ade81c123d87..c49ef28e48c1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -499,25 +499,70 @@  pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 }
 
 #ifdef CONFIG_MEMCG
-static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+static inline struct mem_cgroup *split_queue_memcg(struct deferred_split *queue)
 {
-	struct mem_cgroup *memcg = page_memcg(compound_head(page));
-	struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+	if (mem_cgroup_disabled())
+		return NULL;
+	return container_of(queue, struct mem_cgroup, deferred_split_queue);
+}
 
-	if (memcg)
-		return &memcg->deferred_split_queue;
-	else
-		return &pgdat->deferred_split_queue;
+static inline struct deferred_split *folio_memcg_split_queue(struct folio *folio)
+{
+	struct mem_cgroup *memcg = folio_memcg(folio);
+
+	return memcg ? &memcg->deferred_split_queue : NULL;
 }
 #else
-static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+static inline struct mem_cgroup *split_queue_memcg(struct deferred_split *queue)
 {
-	struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+	return NULL;
+}
 
-	return &pgdat->deferred_split_queue;
+static inline struct deferred_split *folio_memcg_split_queue(struct folio *folio)
+{
+	return NULL;
 }
 #endif
 
+static struct deferred_split *folio_split_queue(struct folio *folio)
+{
+	struct deferred_split *queue = folio_memcg_split_queue(folio);
+
+	return queue ? : &NODE_DATA(folio_nid(folio))->deferred_split_queue;
+}
+
+static struct deferred_split *folio_split_queue_lock(struct folio *folio)
+{
+	struct deferred_split *queue;
+
+	queue = folio_split_queue(folio);
+	spin_lock(&queue->split_queue_lock);
+
+	return queue;
+}
+
+static struct deferred_split *
+folio_split_queue_lock_irqsave(struct folio *folio, unsigned long *flags)
+{
+	struct deferred_split *queue;
+
+	queue = folio_split_queue(folio);
+	spin_lock_irqsave(&queue->split_queue_lock, *flags);
+
+	return queue;
+}
+
+static inline void split_queue_unlock(struct deferred_split *queue)
+{
+	spin_unlock(&queue->split_queue_lock);
+}
+
+static inline void split_queue_unlock_irqrestore(struct deferred_split *queue,
+						 unsigned long flags)
+{
+	spin_unlock_irqrestore(&queue->split_queue_lock, flags);
+}
+
 void prep_transhuge_page(struct page *page)
 {
 	/*
@@ -2610,8 +2655,9 @@  bool can_split_huge_page(struct page *page, int *pextra_pins)
  */
 int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
-	struct page *head = compound_head(page);
-	struct deferred_split *ds_queue = get_deferred_split_queue(head);
+	struct folio *folio = page_folio(page);
+	struct page *head = &folio->page;
+	struct deferred_split *ds_queue;
 	struct anon_vma *anon_vma = NULL;
 	struct address_space *mapping = NULL;
 	int extra_pins, ret;
@@ -2689,13 +2735,13 @@  int split_huge_page_to_list(struct page *page, struct list_head *list)
 	}
 
 	/* Prevent deferred_split_scan() touching ->_refcount */
-	spin_lock(&ds_queue->split_queue_lock);
+	ds_queue = folio_split_queue_lock(folio);
 	if (page_ref_freeze(head, 1 + extra_pins)) {
 		if (!list_empty(page_deferred_list(head))) {
 			ds_queue->split_queue_len--;
 			list_del(page_deferred_list(head));
 		}
-		spin_unlock(&ds_queue->split_queue_lock);
+		split_queue_unlock(ds_queue);
 		if (mapping) {
 			int nr = thp_nr_pages(head);
 
@@ -2710,7 +2756,7 @@  int split_huge_page_to_list(struct page *page, struct list_head *list)
 		__split_huge_page(page, list, end);
 		ret = 0;
 	} else {
-		spin_unlock(&ds_queue->split_queue_lock);
+		split_queue_unlock(ds_queue);
 fail:
 		if (mapping)
 			xa_unlock(&mapping->i_pages);
@@ -2733,24 +2779,22 @@  int split_huge_page_to_list(struct page *page, struct list_head *list)
 
 void free_transhuge_page(struct page *page)
 {
-	struct deferred_split *ds_queue = get_deferred_split_queue(page);
+	struct deferred_split *ds_queue;
 	unsigned long flags;
 
-	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+	ds_queue = folio_split_queue_lock_irqsave(page_folio(page), &flags);
 	if (!list_empty(page_deferred_list(page))) {
 		ds_queue->split_queue_len--;
 		list_del(page_deferred_list(page));
 	}
-	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+	split_queue_unlock_irqrestore(ds_queue, flags);
 	free_compound_page(page);
 }
 
 void deferred_split_huge_page(struct page *page)
 {
-	struct deferred_split *ds_queue = get_deferred_split_queue(page);
-#ifdef CONFIG_MEMCG
-	struct mem_cgroup *memcg = page_memcg(compound_head(page));
-#endif
+	struct deferred_split *ds_queue;
+	struct mem_cgroup *memcg;
 	unsigned long flags;
 
 	VM_BUG_ON_PAGE(!PageTransHuge(page), page);
@@ -2768,7 +2812,8 @@  void deferred_split_huge_page(struct page *page)
 	if (PageSwapCache(page))
 		return;
 
-	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+	ds_queue = folio_split_queue_lock_irqsave(page_folio(page), &flags);
+	memcg = split_queue_memcg(ds_queue);
 	if (list_empty(page_deferred_list(page))) {
 		count_vm_event(THP_DEFERRED_SPLIT_PAGE);
 		list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
@@ -2779,7 +2824,7 @@  void deferred_split_huge_page(struct page *page)
 					 deferred_split_shrinker.id);
 #endif
 	}
-	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+	split_queue_unlock_irqrestore(ds_queue, flags);
 }
 
 static unsigned long deferred_split_count(struct shrinker *shrink,