diff mbox series

[7/9] mm: Free up PG_slab

Message ID 20240321142448.1645400-8-willy@infradead.org (mailing list archive)
State New
Headers show
Series Various significant MM patches | expand

Commit Message

Matthew Wilcox (Oracle) March 21, 2024, 2:24 p.m. UTC
Reclaim the Slab page flag by using a spare bit in PageType.  We are
perennially short of page flags for various purposes, and now that
the original SLAB allocator has been retired, SLUB does not use the
mapcount/page_type field.  This lets us remove a number of special cases
for ignoring mapcount on Slab pages.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/page-flags.h     | 21 +++++++++++++++++----
 include/trace/events/mmflags.h |  2 +-
 mm/memory-failure.c            |  9 ---------
 mm/slab.h                      |  2 +-
 4 files changed, 19 insertions(+), 15 deletions(-)

Comments

Miaohe Lin March 22, 2024, 9:20 a.m. UTC | #1
On 2024/3/21 22:24, Matthew Wilcox (Oracle) wrote:
> Reclaim the Slab page flag by using a spare bit in PageType.  We are
> perennially short of page flags for various purposes, and now that
> the original SLAB allocator has been retired, SLUB does not use the
> mapcount/page_type field.  This lets us remove a number of special cases
> for ignoring mapcount on Slab pages.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  include/linux/page-flags.h     | 21 +++++++++++++++++----
>  include/trace/events/mmflags.h |  2 +-
>  mm/memory-failure.c            |  9 ---------
>  mm/slab.h                      |  2 +-
>  4 files changed, 19 insertions(+), 15 deletions(-)
> 
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index 94eb8a11a321..73e0b17c7728 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -109,7 +109,6 @@ enum pageflags {
>  	PG_active,
>  	PG_workingset,
>  	PG_error,
> -	PG_slab,
>  	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use*/
>  	PG_arch_1,
>  	PG_reserved,
> @@ -524,7 +523,6 @@ PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
>  	TESTCLEARFLAG(Active, active, PF_HEAD)
>  PAGEFLAG(Workingset, workingset, PF_HEAD)
>  	TESTCLEARFLAG(Workingset, workingset, PF_HEAD)
> -__PAGEFLAG(Slab, slab, PF_NO_TAIL)
>  PAGEFLAG(Checked, checked, PF_NO_COMPOUND)	   /* Used by some filesystems */
>  
>  /* Xen */
> @@ -931,7 +929,7 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
>  #endif
>  
>  /*
> - * For pages that are never mapped to userspace (and aren't PageSlab),
> + * For pages that are never mapped to userspace,
>   * page_type may be used.  Because it is initialised to -1, we invert the
>   * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
>   * __ClearPageFoo *sets* the bit used for PageFoo.  We reserve a few high and
> @@ -947,6 +945,7 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
>  #define PG_table	0x00000200
>  #define PG_guard	0x00000400
>  #define PG_hugetlb	0x00000800
> +#define PG_slab		0x00001000
>  
>  #define PageType(page, flag)						\
>  	((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
> @@ -1041,6 +1040,20 @@ PAGE_TYPE_OPS(Table, table, pgtable)
>   */
>  PAGE_TYPE_OPS(Guard, guard, guard)
>  
> +FOLIO_TYPE_OPS(slab, slab)
> +
> +/**
> + * PageSlab - Determine if the page belongs to the slab allocator
> + * @page: The page to test.
> + *
> + * Context: Any context.
> + * Return: True for slab pages, false for any other kind of page.
> + */
> +static inline bool PageSlab(const struct page *page)
> +{
> +	return folio_test_slab(page_folio(page));
> +}
> +
>  #ifdef CONFIG_HUGETLB_PAGE
>  FOLIO_TYPE_OPS(hugetlb, hugetlb)
>  #else
> @@ -1121,7 +1134,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
>  	(1UL << PG_lru		| 1UL << PG_locked	|	\
>  	 1UL << PG_private	| 1UL << PG_private_2	|	\
>  	 1UL << PG_writeback	| 1UL << PG_reserved	|	\
> -	 1UL << PG_slab		| 1UL << PG_active 	|	\
> +	 1UL << PG_active 	|				\
>  	 1UL << PG_unevictable	| __PG_MLOCKED | LRU_GEN_MASK)
>  
>  /*
> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
> index d55e53ac91bd..e46d6e82765e 100644
> --- a/include/trace/events/mmflags.h
> +++ b/include/trace/events/mmflags.h
> @@ -107,7 +107,6 @@
>  	DEF_PAGEFLAG_NAME(lru),						\
>  	DEF_PAGEFLAG_NAME(active),					\
>  	DEF_PAGEFLAG_NAME(workingset),					\
> -	DEF_PAGEFLAG_NAME(slab),					\
>  	DEF_PAGEFLAG_NAME(owner_priv_1),				\
>  	DEF_PAGEFLAG_NAME(arch_1),					\
>  	DEF_PAGEFLAG_NAME(reserved),					\
> @@ -135,6 +134,7 @@ IF_HAVE_PG_ARCH_X(arch_3)
>  #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) }
>  
>  #define __def_pagetype_names						\
> +	DEF_PAGETYPE_NAME(slab),					\
>  	DEF_PAGETYPE_NAME(hugetlb),					\
>  	DEF_PAGETYPE_NAME(offline),					\
>  	DEF_PAGETYPE_NAME(guard),					\
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 9349948f1abf..1cb41ba7870c 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1239,7 +1239,6 @@ static int me_huge_page(struct page_state *ps, struct page *p)
>  #define mlock		(1UL << PG_mlocked)
>  #define lru		(1UL << PG_lru)
>  #define head		(1UL << PG_head)
> -#define slab		(1UL << PG_slab)
>  #define reserved	(1UL << PG_reserved)
>  
>  static struct page_state error_states[] = {
> @@ -1249,13 +1248,6 @@ static struct page_state error_states[] = {
>  	 * PG_buddy pages only make a small fraction of all free pages.
>  	 */
>  
> -	/*
> -	 * Could in theory check if slab page is free or if we can drop
> -	 * currently unused objects without touching them. But just
> -	 * treat it as standard kernel for now.
> -	 */
> -	{ slab,		slab,		MF_MSG_SLAB,	me_kernel },

Will it be better to leave the above slab case here to catch possible unhandled obscure races with
slab? Though it looks like slab page shouldn't reach here.

Thanks.
Vlastimil Babka March 22, 2024, 10:41 a.m. UTC | #2
On 3/22/24 10:20, Miaohe Lin wrote:
> On 2024/3/21 22:24, Matthew Wilcox (Oracle) wrote:
>> Reclaim the Slab page flag by using a spare bit in PageType.  We are
>> perennially short of page flags for various purposes, and now that
>> the original SLAB allocator has been retired, SLUB does not use the
>> mapcount/page_type field.  This lets us remove a number of special cases
>> for ignoring mapcount on Slab pages.
>> 
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Acked-by: Vlastimil Babka <vbabka@suse.cz>

>> ---
>>  include/linux/page-flags.h     | 21 +++++++++++++++++----
>>  include/trace/events/mmflags.h |  2 +-
>>  mm/memory-failure.c            |  9 ---------
>>  mm/slab.h                      |  2 +-
>>  4 files changed, 19 insertions(+), 15 deletions(-)
>> 
>> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
>> index 94eb8a11a321..73e0b17c7728 100644
>> --- a/include/linux/page-flags.h
>> +++ b/include/linux/page-flags.h
>> @@ -109,7 +109,6 @@ enum pageflags {
>>  	PG_active,
>>  	PG_workingset,
>>  	PG_error,
>> -	PG_slab,
>>  	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use*/
>>  	PG_arch_1,
>>  	PG_reserved,
>> @@ -524,7 +523,6 @@ PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
>>  	TESTCLEARFLAG(Active, active, PF_HEAD)
>>  PAGEFLAG(Workingset, workingset, PF_HEAD)
>>  	TESTCLEARFLAG(Workingset, workingset, PF_HEAD)
>> -__PAGEFLAG(Slab, slab, PF_NO_TAIL)
>>  PAGEFLAG(Checked, checked, PF_NO_COMPOUND)	   /* Used by some filesystems */
>>  
>>  /* Xen */
>> @@ -931,7 +929,7 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
>>  #endif
>>  
>>  /*
>> - * For pages that are never mapped to userspace (and aren't PageSlab),
>> + * For pages that are never mapped to userspace,
>>   * page_type may be used.  Because it is initialised to -1, we invert the
>>   * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
>>   * __ClearPageFoo *sets* the bit used for PageFoo.  We reserve a few high and
>> @@ -947,6 +945,7 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
>>  #define PG_table	0x00000200
>>  #define PG_guard	0x00000400
>>  #define PG_hugetlb	0x00000800
>> +#define PG_slab		0x00001000
>>  
>>  #define PageType(page, flag)						\
>>  	((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
>> @@ -1041,6 +1040,20 @@ PAGE_TYPE_OPS(Table, table, pgtable)
>>   */
>>  PAGE_TYPE_OPS(Guard, guard, guard)
>>  
>> +FOLIO_TYPE_OPS(slab, slab)
>> +
>> +/**
>> + * PageSlab - Determine if the page belongs to the slab allocator
>> + * @page: The page to test.
>> + *
>> + * Context: Any context.
>> + * Return: True for slab pages, false for any other kind of page.
>> + */
>> +static inline bool PageSlab(const struct page *page)
>> +{
>> +	return folio_test_slab(page_folio(page));
>> +}
>> +
>>  #ifdef CONFIG_HUGETLB_PAGE
>>  FOLIO_TYPE_OPS(hugetlb, hugetlb)
>>  #else
>> @@ -1121,7 +1134,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
>>  	(1UL << PG_lru		| 1UL << PG_locked	|	\
>>  	 1UL << PG_private	| 1UL << PG_private_2	|	\
>>  	 1UL << PG_writeback	| 1UL << PG_reserved	|	\
>> -	 1UL << PG_slab		| 1UL << PG_active 	|	\
>> +	 1UL << PG_active 	|				\
>>  	 1UL << PG_unevictable	| __PG_MLOCKED | LRU_GEN_MASK)
>>  
>>  /*
>> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
>> index d55e53ac91bd..e46d6e82765e 100644
>> --- a/include/trace/events/mmflags.h
>> +++ b/include/trace/events/mmflags.h
>> @@ -107,7 +107,6 @@
>>  	DEF_PAGEFLAG_NAME(lru),						\
>>  	DEF_PAGEFLAG_NAME(active),					\
>>  	DEF_PAGEFLAG_NAME(workingset),					\
>> -	DEF_PAGEFLAG_NAME(slab),					\
>>  	DEF_PAGEFLAG_NAME(owner_priv_1),				\
>>  	DEF_PAGEFLAG_NAME(arch_1),					\
>>  	DEF_PAGEFLAG_NAME(reserved),					\
>> @@ -135,6 +134,7 @@ IF_HAVE_PG_ARCH_X(arch_3)
>>  #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) }
>>  
>>  #define __def_pagetype_names						\
>> +	DEF_PAGETYPE_NAME(slab),					\
>>  	DEF_PAGETYPE_NAME(hugetlb),					\
>>  	DEF_PAGETYPE_NAME(offline),					\
>>  	DEF_PAGETYPE_NAME(guard),					\
>> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
>> index 9349948f1abf..1cb41ba7870c 100644
>> --- a/mm/memory-failure.c
>> +++ b/mm/memory-failure.c
>> @@ -1239,7 +1239,6 @@ static int me_huge_page(struct page_state *ps, struct page *p)
>>  #define mlock		(1UL << PG_mlocked)
>>  #define lru		(1UL << PG_lru)
>>  #define head		(1UL << PG_head)
>> -#define slab		(1UL << PG_slab)
>>  #define reserved	(1UL << PG_reserved)
>>  
>>  static struct page_state error_states[] = {
>> @@ -1249,13 +1248,6 @@ static struct page_state error_states[] = {
>>  	 * PG_buddy pages only make a small fraction of all free pages.
>>  	 */
>>  
>> -	/*
>> -	 * Could in theory check if slab page is free or if we can drop
>> -	 * currently unused objects without touching them. But just
>> -	 * treat it as standard kernel for now.
>> -	 */
>> -	{ slab,		slab,		MF_MSG_SLAB,	me_kernel },
> 
> Will it be better to leave the above slab case here to catch possible unhandled obscure races with
> slab? Though it looks like slab page shouldn't reach here.

The code would need to handle page types as it's no longer a page flag. I
guess that's your decision? If it's not necessary, then I guess MF_MSG_SLAB
itself could be also removed with a buch of more code referencing it.

> Thanks.
>
David Hildenbrand March 22, 2024, 3:09 p.m. UTC | #3
On 21.03.24 15:24, Matthew Wilcox (Oracle) wrote:
> Reclaim the Slab page flag by using a spare bit in PageType.  We are
> perennially short of page flags for various purposes, and now that
> the original SLAB allocator has been retired, SLUB does not use the
> mapcount/page_type field.  This lets us remove a number of special cases
> for ignoring mapcount on Slab pages.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---


Acked-by: David Hildenbrand <david@redhat.com>
Matthew Wilcox (Oracle) March 25, 2024, 3:19 p.m. UTC | #4
On Thu, Mar 21, 2024 at 02:24:45PM +0000, Matthew Wilcox (Oracle) wrote:
> Reclaim the Slab page flag by using a spare bit in PageType.  We are
> perennially short of page flags for various purposes, and now that
> the original SLAB allocator has been retired, SLUB does not use the
> mapcount/page_type field.  This lets us remove a number of special cases
> for ignoring mapcount on Slab pages.

Update vmcoreinfo:

diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
index 23c125c2e243..1d5eadd9dd61 100644
--- a/kernel/vmcore_info.c
+++ b/kernel/vmcore_info.c
@@ -198,7 +198,8 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_NUMBER(PG_private);
 	VMCOREINFO_NUMBER(PG_swapcache);
 	VMCOREINFO_NUMBER(PG_swapbacked);
-	VMCOREINFO_NUMBER(PG_slab);
+#define PAGE_SLAB_MAPCOUNT_VALUE	(~PG_slab)
+	VMCOREINFO_NUMBER(PAGE_SLAB_MAPCOUNT_VALUE);
 #ifdef CONFIG_MEMORY_FAILURE
 	VMCOREINFO_NUMBER(PG_hwpoison);
 #endif
kernel test robot March 31, 2024, 3:11 p.m. UTC | #5
Hello,

kernel test robot noticed "UBSAN:shift-out-of-bounds_in_fs/proc/page.c" on:

commit: 30e5296811312a13938b83956a55839ac1e3aa40 ("[PATCH 7/9] mm: Free up PG_slab")
url: https://github.com/intel-lab-lkp/linux/commits/Matthew-Wilcox-Oracle/mm-Always-initialise-folio-_deferred_list/20240321-222800
base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git 23956900041d968f9ad0f30db6dede4daccd7aa9
patch link: https://lore.kernel.org/all/20240321142448.1645400-8-willy@infradead.org/
patch subject: [PATCH 7/9] mm: Free up PG_slab

in testcase: ltp
version: ltp-x86_64-14c1f76-1_20240323
with following parameters:

	disk: 1HDD
	fs: ext4
	test: fs-00



compiler: gcc-12
test machine: 4 threads 1 sockets Intel(R) Core(TM) i3-3220 CPU @ 3.30GHz (Ivy Bridge) with 8G memory

(please refer to attached dmesg/kmsg for entire log/backtrace)



If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202403312344.c0d273ab-oliver.sang@intel.com


kern  :warn  : [  528.627387] ------------[ cut here ]------------
kern  :err   : [  528.627589] UBSAN: shift-out-of-bounds in fs/proc/page.c:107:18
kern  :err   : [  528.627884] shift exponent 4096 is too large for 64-bit type 'long long unsigned int'
kern  :warn  : [  528.628200] CPU: 0 PID: 4703 Comm: proc01 Tainted: G S                 6.8.0-11774-g30e529681131 #1
kern  :warn  : [  528.628446] Hardware name: Hewlett-Packard p6-1451cx/2ADA, BIOS 8.15 02/05/2013
kern  :warn  : [  528.628659] Call Trace:
kern  :warn  : [  528.628814]  <TASK>
kern :warn : [  528.628960] dump_stack_lvl (lib/dump_stack.c:117 (discriminator 1)) 
kern :warn : [  528.629134] __ubsan_handle_shift_out_of_bounds (lib/ubsan.c:218 lib/ubsan.c:454) 
kern :warn : [  528.629360] stable_page_flags.part.0.cold (include/linux/page-flags.h:284 fs/proc/page.c:184) 
kern :warn : [  528.629506] kpageflags_read (fs/proc/page.c:238 fs/proc/page.c:250) 
kern :warn : [  528.629623] vfs_read (fs/read_write.c:474) 
kern :warn : [  528.629737] ? do_sys_openat2 (fs/open.c:1415) 
kern :warn : [  528.629898] ? kmem_cache_free (mm/slub.c:4280 mm/slub.c:4344) 
kern :warn : [  528.630063] ? __pfx_vfs_read (fs/read_write.c:457) 
kern :warn : [  528.630225] ? do_sys_openat2 (fs/open.c:1415) 
kern :warn : [  528.630388] ? __pfx_do_sys_openat2 (fs/open.c:1392) 
kern :warn : [  528.630552] ? __do_sys_newfstatat (fs/stat.c:464) 
kern :warn : [  528.630717] ? __fget_light (include/linux/atomic/atomic-arch-fallback.h:479 include/linux/atomic/atomic-instrumented.h:50 fs/file.c:1145) 
kern :warn : [  528.630888] ksys_read (fs/read_write.c:619) 
kern :warn : [  528.631051] ? __pfx_ksys_read (fs/read_write.c:609) 
kern :warn : [  528.631216] ? kmem_cache_free (mm/slub.c:4280 mm/slub.c:4344) 
kern :warn : [  528.631415] do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) 
kern :warn : [  528.631555] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) 
kern  :warn  : [  528.631756] RIP: 0033:0x7f90bf2ba19d
kern :warn : [ 528.631913] Code: 31 c0 e9 c6 fe ff ff 50 48 8d 3d 66 54 0a 00 e8 49 ff 01 00 66 0f 1f 84 00 00 00 00 00 80 3d 41 24 0e 00 00 74 17 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 5b c3 66 2e 0f 1f 84 00 00 00 00 00 48 83 ec
All code
========
   0:	31 c0                	xor    %eax,%eax
   2:	e9 c6 fe ff ff       	jmpq   0xfffffffffffffecd
   7:	50                   	push   %rax
   8:	48 8d 3d 66 54 0a 00 	lea    0xa5466(%rip),%rdi        # 0xa5475
   f:	e8 49 ff 01 00       	callq  0x1ff5d
  14:	66 0f 1f 84 00 00 00 	nopw   0x0(%rax,%rax,1)
  1b:	00 00 
  1d:	80 3d 41 24 0e 00 00 	cmpb   $0x0,0xe2441(%rip)        # 0xe2465
  24:	74 17                	je     0x3d
  26:	31 c0                	xor    %eax,%eax
  28:	0f 05                	syscall 
  2a:*	48 3d 00 f0 ff ff    	cmp    $0xfffffffffffff000,%rax		<-- trapping instruction
  30:	77 5b                	ja     0x8d
  32:	c3                   	retq   
  33:	66 2e 0f 1f 84 00 00 	nopw   %cs:0x0(%rax,%rax,1)
  3a:	00 00 00 
  3d:	48                   	rex.W
  3e:	83                   	.byte 0x83
  3f:	ec                   	in     (%dx),%al

Code starting with the faulting instruction
===========================================
   0:	48 3d 00 f0 ff ff    	cmp    $0xfffffffffffff000,%rax
   6:	77 5b                	ja     0x63
   8:	c3                   	retq   
   9:	66 2e 0f 1f 84 00 00 	nopw   %cs:0x0(%rax,%rax,1)
  10:	00 00 00 
  13:	48                   	rex.W
  14:	83                   	.byte 0x83
  15:	ec                   	in     (%dx),%al
kern  :warn  : [  528.632309] RSP: 002b:00007ffe2eb3c008 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
kern  :warn  : [  528.632540] RAX: ffffffffffffffda RBX: 00007ffe2eb3d1b0 RCX: 00007f90bf2ba19d
kern  :warn  : [  528.632757] RDX: 0000000000000400 RSI: 000055e284e68c40 RDI: 0000000000000005
kern  :warn  : [  528.632960] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000073
kern  :warn  : [  528.633156] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000005
kern  :warn  : [  528.633399] R13: 000055e284e68c40 R14: 000055e2a975f8cb R15: 00007ffe2eb3d1b0
kern  :warn  : [  528.633645]  </TASK>
kern  :warn  : [  528.633813] ---[ end trace ]---



The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20240331/202403312344.c0d273ab-oliver.sang@intel.com
Miaohe Lin April 1, 2024, 3:38 a.m. UTC | #6
On 2024/3/22 18:41, Vlastimil Babka wrote:
> On 3/22/24 10:20, Miaohe Lin wrote:
>> On 2024/3/21 22:24, Matthew Wilcox (Oracle) wrote:
>>> Reclaim the Slab page flag by using a spare bit in PageType.  We are
>>> perennially short of page flags for various purposes, and now that
>>> the original SLAB allocator has been retired, SLUB does not use the
>>> mapcount/page_type field.  This lets us remove a number of special cases
>>> for ignoring mapcount on Slab pages.
>>>
>>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> 
> Acked-by: Vlastimil Babka <vbabka@suse.cz>
> 
>>> ---
>>>  include/linux/page-flags.h     | 21 +++++++++++++++++----
>>>  include/trace/events/mmflags.h |  2 +-
>>>  mm/memory-failure.c            |  9 ---------
>>>  mm/slab.h                      |  2 +-
>>>  4 files changed, 19 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
>>> index 94eb8a11a321..73e0b17c7728 100644
>>> --- a/include/linux/page-flags.h
>>> +++ b/include/linux/page-flags.h
>>> @@ -109,7 +109,6 @@ enum pageflags {
>>>  	PG_active,
>>>  	PG_workingset,
>>>  	PG_error,
>>> -	PG_slab,
>>>  	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use*/
>>>  	PG_arch_1,
>>>  	PG_reserved,
>>> @@ -524,7 +523,6 @@ PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
>>>  	TESTCLEARFLAG(Active, active, PF_HEAD)
>>>  PAGEFLAG(Workingset, workingset, PF_HEAD)
>>>  	TESTCLEARFLAG(Workingset, workingset, PF_HEAD)
>>> -__PAGEFLAG(Slab, slab, PF_NO_TAIL)
>>>  PAGEFLAG(Checked, checked, PF_NO_COMPOUND)	   /* Used by some filesystems */
>>>  
>>>  /* Xen */
>>> @@ -931,7 +929,7 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
>>>  #endif
>>>  
>>>  /*
>>> - * For pages that are never mapped to userspace (and aren't PageSlab),
>>> + * For pages that are never mapped to userspace,
>>>   * page_type may be used.  Because it is initialised to -1, we invert the
>>>   * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
>>>   * __ClearPageFoo *sets* the bit used for PageFoo.  We reserve a few high and
>>> @@ -947,6 +945,7 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
>>>  #define PG_table	0x00000200
>>>  #define PG_guard	0x00000400
>>>  #define PG_hugetlb	0x00000800
>>> +#define PG_slab		0x00001000
>>>  
>>>  #define PageType(page, flag)						\
>>>  	((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
>>> @@ -1041,6 +1040,20 @@ PAGE_TYPE_OPS(Table, table, pgtable)
>>>   */
>>>  PAGE_TYPE_OPS(Guard, guard, guard)
>>>  
>>> +FOLIO_TYPE_OPS(slab, slab)
>>> +
>>> +/**
>>> + * PageSlab - Determine if the page belongs to the slab allocator
>>> + * @page: The page to test.
>>> + *
>>> + * Context: Any context.
>>> + * Return: True for slab pages, false for any other kind of page.
>>> + */
>>> +static inline bool PageSlab(const struct page *page)
>>> +{
>>> +	return folio_test_slab(page_folio(page));
>>> +}
>>> +
>>>  #ifdef CONFIG_HUGETLB_PAGE
>>>  FOLIO_TYPE_OPS(hugetlb, hugetlb)
>>>  #else
>>> @@ -1121,7 +1134,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
>>>  	(1UL << PG_lru		| 1UL << PG_locked	|	\
>>>  	 1UL << PG_private	| 1UL << PG_private_2	|	\
>>>  	 1UL << PG_writeback	| 1UL << PG_reserved	|	\
>>> -	 1UL << PG_slab		| 1UL << PG_active 	|	\
>>> +	 1UL << PG_active 	|				\
>>>  	 1UL << PG_unevictable	| __PG_MLOCKED | LRU_GEN_MASK)
>>>  
>>>  /*
>>> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
>>> index d55e53ac91bd..e46d6e82765e 100644
>>> --- a/include/trace/events/mmflags.h
>>> +++ b/include/trace/events/mmflags.h
>>> @@ -107,7 +107,6 @@
>>>  	DEF_PAGEFLAG_NAME(lru),						\
>>>  	DEF_PAGEFLAG_NAME(active),					\
>>>  	DEF_PAGEFLAG_NAME(workingset),					\
>>> -	DEF_PAGEFLAG_NAME(slab),					\
>>>  	DEF_PAGEFLAG_NAME(owner_priv_1),				\
>>>  	DEF_PAGEFLAG_NAME(arch_1),					\
>>>  	DEF_PAGEFLAG_NAME(reserved),					\
>>> @@ -135,6 +134,7 @@ IF_HAVE_PG_ARCH_X(arch_3)
>>>  #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) }
>>>  
>>>  #define __def_pagetype_names						\
>>> +	DEF_PAGETYPE_NAME(slab),					\
>>>  	DEF_PAGETYPE_NAME(hugetlb),					\
>>>  	DEF_PAGETYPE_NAME(offline),					\
>>>  	DEF_PAGETYPE_NAME(guard),					\
>>> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
>>> index 9349948f1abf..1cb41ba7870c 100644
>>> --- a/mm/memory-failure.c
>>> +++ b/mm/memory-failure.c
>>> @@ -1239,7 +1239,6 @@ static int me_huge_page(struct page_state *ps, struct page *p)
>>>  #define mlock		(1UL << PG_mlocked)
>>>  #define lru		(1UL << PG_lru)
>>>  #define head		(1UL << PG_head)
>>> -#define slab		(1UL << PG_slab)
>>>  #define reserved	(1UL << PG_reserved)
>>>  
>>>  static struct page_state error_states[] = {
>>> @@ -1249,13 +1248,6 @@ static struct page_state error_states[] = {
>>>  	 * PG_buddy pages only make a small fraction of all free pages.
>>>  	 */
>>>  
>>> -	/*
>>> -	 * Could in theory check if slab page is free or if we can drop
>>> -	 * currently unused objects without touching them. But just
>>> -	 * treat it as standard kernel for now.
>>> -	 */
>>> -	{ slab,		slab,		MF_MSG_SLAB,	me_kernel },
>>
>> Will it be better to leave the above slab case here to catch possible unhandled obscure races with
>> slab? Though it looks like slab page shouldn't reach here.
> 

Sorry for late, I was on hard off-the-job training last week.

> The code would need to handle page types as it's no longer a page flag. I
> guess that's your decision? If it's not necessary, then I guess MF_MSG_SLAB
> itself could be also removed with a buch of more code referencing it.

It might be overkill to add codes to handle page types just for slab. We're only intended to handle
Huge pages, LRU pages and free budy pages anyway. As code changes, I suspect MF_MSG_SLAB and MF_MSG_KERNEL
are obsolete now. But it might be better to leave them alone. There might be some unhandled obscure races
and some buggy kernel code might lead to something unexpected, e.g. slab pages with LRU flags?

Thanks.

> 
>> Thanks.
>>
> 
> .
>
Matthew Wilcox (Oracle) April 2, 2024, 5:26 a.m. UTC | #7
On Sun, Mar 31, 2024 at 11:11:10PM +0800, kernel test robot wrote:
> kernel test robot noticed "UBSAN:shift-out-of-bounds_in_fs/proc/page.c" on:
> 
> commit: 30e5296811312a13938b83956a55839ac1e3aa40 ("[PATCH 7/9] mm: Free up PG_slab")

Quite right.  Spotted another one while I was at it.  Not able to test
right now, but this should do the trick:

diff --git a/fs/proc/page.c b/fs/proc/page.c
index 5bc82828c6aa..55b01535eb22 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -175,6 +175,8 @@ u64 stable_page_flags(const struct page *page)
 		u |= 1 << KPF_OFFLINE;
 	if (PageTable(page))
 		u |= 1 << KPF_PGTABLE;
+	if (folio_test_slab(folio))
+		u |= 1 << KPF_SLAB;
 
 #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
 	u |= kpf_copy_bit(k, KPF_IDLE,          PG_idle);
@@ -184,7 +186,6 @@ u64 stable_page_flags(const struct page *page)
 #endif
 
 	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked);
-	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab);
 	u |= kpf_copy_bit(k, KPF_ERROR,		PG_error);
 	u |= kpf_copy_bit(k, KPF_DIRTY,		PG_dirty);
 	u |= kpf_copy_bit(k, KPF_UPTODATE,	PG_uptodate);
diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py
index 1d3a90d93fe2..270c28a0d098 100644
--- a/tools/cgroup/memcg_slabinfo.py
+++ b/tools/cgroup/memcg_slabinfo.py
@@ -146,12 +146,11 @@ def detect_kernel_config():
 
 
 def for_each_slab(prog):
-    PGSlab = 1 << prog.constant('PG_slab')
-    PGHead = 1 << prog.constant('PG_head')
+    PGSlab = ~prog.constant('PG_slab')
 
     for page in for_each_page(prog):
         try:
-            if page.flags.value_() & PGSlab:
+            if page.page_type.value_() == PGSlab:
                 yield cast('struct slab *', page)
         except FaultError:
             pass
diff mbox series

Patch

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 94eb8a11a321..73e0b17c7728 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -109,7 +109,6 @@  enum pageflags {
 	PG_active,
 	PG_workingset,
 	PG_error,
-	PG_slab,
 	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use*/
 	PG_arch_1,
 	PG_reserved,
@@ -524,7 +523,6 @@  PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
 	TESTCLEARFLAG(Active, active, PF_HEAD)
 PAGEFLAG(Workingset, workingset, PF_HEAD)
 	TESTCLEARFLAG(Workingset, workingset, PF_HEAD)
-__PAGEFLAG(Slab, slab, PF_NO_TAIL)
 PAGEFLAG(Checked, checked, PF_NO_COMPOUND)	   /* Used by some filesystems */
 
 /* Xen */
@@ -931,7 +929,7 @@  PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
 #endif
 
 /*
- * For pages that are never mapped to userspace (and aren't PageSlab),
+ * For pages that are never mapped to userspace,
  * page_type may be used.  Because it is initialised to -1, we invert the
  * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
  * __ClearPageFoo *sets* the bit used for PageFoo.  We reserve a few high and
@@ -947,6 +945,7 @@  PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
 #define PG_table	0x00000200
 #define PG_guard	0x00000400
 #define PG_hugetlb	0x00000800
+#define PG_slab		0x00001000
 
 #define PageType(page, flag)						\
 	((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
@@ -1041,6 +1040,20 @@  PAGE_TYPE_OPS(Table, table, pgtable)
  */
 PAGE_TYPE_OPS(Guard, guard, guard)
 
+FOLIO_TYPE_OPS(slab, slab)
+
+/**
+ * PageSlab - Determine if the page belongs to the slab allocator
+ * @page: The page to test.
+ *
+ * Context: Any context.
+ * Return: True for slab pages, false for any other kind of page.
+ */
+static inline bool PageSlab(const struct page *page)
+{
+	return folio_test_slab(page_folio(page));
+}
+
 #ifdef CONFIG_HUGETLB_PAGE
 FOLIO_TYPE_OPS(hugetlb, hugetlb)
 #else
@@ -1121,7 +1134,7 @@  static __always_inline void __ClearPageAnonExclusive(struct page *page)
 	(1UL << PG_lru		| 1UL << PG_locked	|	\
 	 1UL << PG_private	| 1UL << PG_private_2	|	\
 	 1UL << PG_writeback	| 1UL << PG_reserved	|	\
-	 1UL << PG_slab		| 1UL << PG_active 	|	\
+	 1UL << PG_active 	|				\
 	 1UL << PG_unevictable	| __PG_MLOCKED | LRU_GEN_MASK)
 
 /*
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index d55e53ac91bd..e46d6e82765e 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -107,7 +107,6 @@ 
 	DEF_PAGEFLAG_NAME(lru),						\
 	DEF_PAGEFLAG_NAME(active),					\
 	DEF_PAGEFLAG_NAME(workingset),					\
-	DEF_PAGEFLAG_NAME(slab),					\
 	DEF_PAGEFLAG_NAME(owner_priv_1),				\
 	DEF_PAGEFLAG_NAME(arch_1),					\
 	DEF_PAGEFLAG_NAME(reserved),					\
@@ -135,6 +134,7 @@  IF_HAVE_PG_ARCH_X(arch_3)
 #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) }
 
 #define __def_pagetype_names						\
+	DEF_PAGETYPE_NAME(slab),					\
 	DEF_PAGETYPE_NAME(hugetlb),					\
 	DEF_PAGETYPE_NAME(offline),					\
 	DEF_PAGETYPE_NAME(guard),					\
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 9349948f1abf..1cb41ba7870c 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1239,7 +1239,6 @@  static int me_huge_page(struct page_state *ps, struct page *p)
 #define mlock		(1UL << PG_mlocked)
 #define lru		(1UL << PG_lru)
 #define head		(1UL << PG_head)
-#define slab		(1UL << PG_slab)
 #define reserved	(1UL << PG_reserved)
 
 static struct page_state error_states[] = {
@@ -1249,13 +1248,6 @@  static struct page_state error_states[] = {
 	 * PG_buddy pages only make a small fraction of all free pages.
 	 */
 
-	/*
-	 * Could in theory check if slab page is free or if we can drop
-	 * currently unused objects without touching them. But just
-	 * treat it as standard kernel for now.
-	 */
-	{ slab,		slab,		MF_MSG_SLAB,	me_kernel },
-
 	{ head,		head,		MF_MSG_HUGE,		me_huge_page },
 
 	{ sc|dirty,	sc|dirty,	MF_MSG_DIRTY_SWAPCACHE,	me_swapcache_dirty },
@@ -1282,7 +1274,6 @@  static struct page_state error_states[] = {
 #undef mlock
 #undef lru
 #undef head
-#undef slab
 #undef reserved
 
 static void update_per_node_mf_stats(unsigned long pfn,
diff --git a/mm/slab.h b/mm/slab.h
index d2bc9b191222..457b15da2a6b 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -84,8 +84,8 @@  struct slab {
 		};
 		struct rcu_head rcu_head;
 	};
-	unsigned int __unused;
 
+	unsigned int __page_type;
 	atomic_t __page_refcount;
 #ifdef CONFIG_MEMCG
 	unsigned long memcg_data;