diff mbox series

mm: introduce statistic for inode's gen&tier

Message ID 1687857438-29142-1-git-send-email-zhaoyang.huang@unisoc.com (mailing list archive)
State New
Headers show
Series mm: introduce statistic for inode's gen&tier | expand

Commit Message

zhaoyang.huang June 27, 2023, 9:17 a.m. UTC
From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>

As mglru scale page's activity more presiced than before, I would like to
introduce statistics over these two properties on all pages of the inode, which
could help some mechanisms have ability to judge the inode's activity, etc madivse.

Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
---
 fs/proc/task_mmu.c        |  9 +++++++++
 include/linux/fs.h        |  2 ++
 include/linux/mm_inline.h | 14 ++++++++++++++
 mm/filemap.c              | 11 +++++++++++
 mm/swap.c                 |  1 +
 5 files changed, 37 insertions(+)

Comments

Christoph Hellwig June 28, 2023, 6:11 a.m. UTC | #1
On Tue, Jun 27, 2023 at 05:17:18PM +0800, zhaoyang.huang wrote:
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -434,6 +434,8 @@ struct address_space {
>  	struct rb_root_cached	i_mmap;
>  	struct rw_semaphore	i_mmap_rwsem;
>  	unsigned long		nrpages;
> +	atomic_long_t		gen;
> +	atomic_long_t		tier;

This increases the size of the inode by 16 byes, and better have really
good data supporting it, as increases of the inode size impact a lot
of workloads.
kernel test robot June 28, 2023, 2:49 p.m. UTC | #2
Hello,

kernel test robot noticed "BUG:kernel_NULL_pointer_dereference,address" on:

commit: b22a2ae5b4ed0cc0b7f66a6b0b359563c18b63e9 ("[PATCH] mm: introduce statistic for inode's gen&tier")
url: https://github.com/intel-lab-lkp/linux/commits/zhaoyang-huang/mm-introduce-statistic-for-inode-s-gen-tier/20230627-172036
base: https://git.kernel.org/cgit/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/all/1687857438-29142-1-git-send-email-zhaoyang.huang@unisoc.com/
patch subject: [PATCH] mm: introduce statistic for inode's gen&tier

in testcase: boot

compiler: gcc-12
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

(please refer to attached dmesg/kmsg for entire log/backtrace)


+---------------------------------------------+------------+------------+
|                                             | 1e8d64bb8c | b22a2ae5b4 |
+---------------------------------------------+------------+------------+
| boot_successes                              | 8          | 0          |
| boot_failures                               | 0          | 7          |
| BUG:kernel_NULL_pointer_dereference,address | 0          | 7          |
| Oops:#[##]                                  | 0          | 7          |
| EIP:folio_mark_accessed                     | 0          | 7          |
| Kernel_panic-not_syncing:Fatal_exception    | 0          | 7          |
+---------------------------------------------+------------+------------+


If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202306282225.53fbc6e9-oliver.sang@intel.com



[   11.939571][   T71] BUG: kernel NULL pointer dereference, address: 000000e8
[   11.940485][   T71] #PF: supervisor write access in kernel mode
[   11.941239][   T71] #PF: error_code(0x0002) - not-present page
[   11.941910][   T71] *pde = 00000000
[   11.942425][   T71] Oops: 0002 [#1] PREEMPT SMP
[   11.943029][   T71] CPU: 1 PID: 71 Comm: init Not tainted 6.4.0-rc4-00532-gb22a2ae5b4ed #10
[   11.944053][   T71] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[   11.945294][   T71] EIP: folio_mark_accessed+0x67/0x129
[   11.945967][   T71] Code: 00 8d 8a 00 00 00 02 89 c2 81 e2 ff ff ff f9 09 ca f0 0f b1 13 74 10 89 c2 81 e2 00 00 00 06 81 fa 00 00 00 06 75 da 8b 43
0c <f0> ff 80 e8 00 00 00 e9 b2 00 00 00 a8 02 75 06 f0 80 0b 02 eb 7d
[   11.948103][   T71] EAX: 00000000 EBX: e8a4b7d0 ECX: 02000000 EDX: 42001042
[   11.948953][   T71] ESI: c5677da8 EDI: 00000000 EBP: c5677c70 ESP: c5677c68
[   11.949750][   T71] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00010246
[   11.950693][   T71] CR0: 80050033 CR2: 000000e8 CR3: 2df60000 CR4: 000406d0
[   11.951426][   T71] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[   11.952130][   T71] DR6: fffe0ff0 DR7: 00000400
[   11.952659][   T71] Call Trace:
[   11.953146][   T71]  ? show_regs+0x60/0x66
[   11.953625][   T71]  ? __die_body+0x13/0x4b
[   11.954068][   T71]  ? __die+0x22/0x24
[   11.954516][   T71]  ? page_fault_oops+0x4a/0x73
[   11.955128][   T71]  ? kernelmode_fixup_or_oops+0x98/0xa0
[   11.955916][   T71]  ? __bad_area_nosemaphore+0x41/0x1ca
[   11.956642][   T71]  ? __lock_acquire+0x66c/0x89b
[   11.957257][   T71]  ? bad_area_nosemaphore+0xa/0xd
[   11.957889][   T71]  ? do_user_addr_fault+0xed/0x41f
[   11.958554][   T71]  ? exc_page_fault+0x13b/0x15b
[   11.959176][   T71]  ? pvclock_clocksource_read_nowd+0x172/0x172
[   11.959903][   T71]  ? handle_exception+0x14d/0x14d
[   11.960548][   T71]  ? get_user_pages_unlocked+0x100/0x1e2
[   11.961197][   T71]  ? pvclock_clocksource_read_nowd+0x172/0x172
[   11.961887][   T71]  ? folio_mark_accessed+0x67/0x129
[   11.962507][   T71]  ? get_user_pages_unlocked+0x100/0x1e2
[   11.963143][   T71]  ? pvclock_clocksource_read_nowd+0x172/0x172
[   11.963727][   T71]  ? folio_mark_accessed+0x67/0x129
[   11.964242][   T71]  mark_page_accessed+0xd/0xf
[   11.964727][   T71]  zap_pte_range+0x173/0x336
[   11.965213][   T71]  unmap_page_range+0xb2/0x125
[   11.965816][   T71]  unmap_single_vma+0x9e/0xa8
[   11.966409][   T71]  unmap_vmas+0x6d/0x9d
[   11.966927][   T71]  exit_mmap+0xb3/0x22a
[   11.967479][   T71]  __mmput+0x19/0x81
[   11.968046][   T71]  mmput+0x27/0x2a
[   11.968572][   T71]  exec_mmap+0x210/0x225
[   11.969131][   T71]  begin_new_exec+0xb9/0x2d0
[   11.969726][   T71]  load_elf_binary+0x204/0x808
[   11.970322][   T71]  ? search_binary_handler+0x8e/0x195
[   11.970978][   T71]  ? search_binary_handler+0x8e/0x195
[   11.971632][   T71]  ? lock_release+0x99/0xc0
[   11.972154][   T71]  search_binary_handler+0x93/0x195
[   11.972817][   T71]  exec_binprm+0x91/0x18f
[   11.973376][   T71]  bprm_execve+0x144/0x1cf
[   11.974002][   T71]  bprm_execve+0x5d/0x6d
[   11.974562][   T71]  do_execveat_common+0x150/0x16c
[   11.975205][   T71]  __ia32_sys_execve+0x23/0x2a
[   11.975809][   T71]  __do_fast_syscall_32+0x94/0xb3
[   11.976437][   T71]  do_fast_syscall_32+0x29/0x5b
[   11.977054][   T71]  do_SYSENTER_32+0x15/0x17
[   11.977640][   T71]  entry_SYSENTER_32+0xa2/0xfb
[   11.978246][   T71] EIP: 0xb7faa579
[   11.978743][   T71] Code: Unable to access opcode bytes at 0xb7faa54f.
[   11.979529][   T71] EAX: ffffffda EBX: 00468928 ECX: 00467778 EDX: 00451f20
[   11.980351][   T71] ESI: 00451f20 EDI: b7f27ff4 EBP: bfcc8438 ESP: bfcc83c4
[   11.981188][   T71] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 007b EFLAGS: 00000292
[   11.982140][   T71] Modules linked in:
[   11.982661][   T71] CR2: 00000000000000e8
[   11.983208][   T71] ---[ end trace 0000000000000000 ]---
[   11.983883][   T71] EIP: folio_mark_accessed+0x67/0x129
[   11.984569][   T71] Code: 00 8d 8a 00 00 00 02 89 c2 81 e2 ff ff ff f9 09 ca f0 0f b1 13 74 10 89 c2 81 e2 00 00 00 06 81 fa 00 00 00 06 75 da 8b 43 0c <f0> ff 80 e8 00 00 00 e9 b2 00 00 00 a8 02 75 06 f0 80 0b 02 eb 7d
[   11.986726][   T71] EAX: 00000000 EBX: e8a4b7d0 ECX: 02000000 EDX: 42001042
[   11.987547][   T71] ESI: c5677da8 EDI: 00000000 EBP: c5677c70 ESP: c5677c68
[   11.988372][   T71] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00010246
[   11.989334][   T71] CR0: 80050033 CR2: b7faa54f CR3: 2df60000 CR4: 000406d0
[   12.000570][   T71] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[   12.001448][   T71] DR6: fffe0ff0 DR7: 00000400
[   12.002053][   T71] Kernel panic - not syncing: Fatal exception
[   12.003103][   T71] Kernel Offset: disabled



To reproduce:

        # build kernel
	cd linux
	cp config-6.4.0-rc4-00532-gb22a2ae5b4ed .config
	make HOSTCC=gcc-12 CC=gcc-12 ARCH=i386 olddefconfig prepare modules_prepare bzImage modules
	make HOSTCC=gcc-12 CC=gcc-12 ARCH=i386 INSTALL_MOD_PATH=<mod-install-dir> modules_install
	cd <mod-install-dir>
	find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz


        git clone https://github.com/intel/lkp-tests.git
        cd lkp-tests
        bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email

        # if come across any failure that blocks the test,
        # please remove ~/.lkp and /lkp dir to run from a clean state.
diff mbox series

Patch

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e35a039..3ed30ef 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -283,17 +283,26 @@  static void show_vma_header_prefix(struct seq_file *m,
 	unsigned long start, end;
 	dev_t dev = 0;
 	const char *name = NULL;
+	long nrpages = 0, gen = 0, tier = 0;
 
 	if (file) {
 		struct inode *inode = file_inode(vma->vm_file);
 		dev = inode->i_sb->s_dev;
 		ino = inode->i_ino;
 		pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
+		nrpages = inode->i_mapping->nrpages;
+		gen = atomic_long_read(&inode->i_mapping->gen);
+		tier = atomic_long_read(&inode->i_mapping->tier);
 	}
 
 	start = vma->vm_start;
 	end = vma->vm_end;
 	show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
+
+	seq_put_hex_ll(m, NULL, nrpages, 8);
+	seq_put_hex_ll(m, ":", gen, 8);
+	seq_put_hex_ll(m, ":", tier, 8);
+
 	if (mm)
 		anon_name = anon_vma_name(vma);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c1769a2..4f4c3a2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -434,6 +434,8 @@  struct address_space {
 	struct rb_root_cached	i_mmap;
 	struct rw_semaphore	i_mmap_rwsem;
 	unsigned long		nrpages;
+	atomic_long_t		gen;
+	atomic_long_t		tier;
 	pgoff_t			writeback_index;
 	const struct address_space_operations *a_ops;
 	unsigned long		flags;
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index ff3f3f2..f68bd06 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -307,6 +307,20 @@  static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio,
 	return false;
 }
 
+static inline int lru_tier_from_refs(int refs)
+{
+	return 0;
+}
+
+static inline int folio_lru_refs(struct folio *folio)
+{
+	return 0;
+}
+
+static inline int folio_lru_gen(struct folio *folio)
+{
+	return 0;
+}
 #endif /* CONFIG_LRU_GEN */
 
 static __always_inline
diff --git a/mm/filemap.c b/mm/filemap.c
index c4d4ace..a1c68a9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -45,6 +45,7 @@ 
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include "internal.h"
+#include <linux/mm_inline.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/filemap.h>
@@ -126,6 +127,9 @@  static void page_cache_delete(struct address_space *mapping,
 {
 	XA_STATE(xas, &mapping->i_pages, folio->index);
 	long nr = 1;
+	int refs = folio_lru_refs(folio);
+	int tier = lru_tier_from_refs(refs);
+	int gen = folio_lru_gen(folio);
 
 	mapping_set_update(&xas, mapping);
 
@@ -143,6 +147,8 @@  static void page_cache_delete(struct address_space *mapping,
 	folio->mapping = NULL;
 	/* Leave page->index set: truncation lookup relies upon it */
 	mapping->nrpages -= nr;
+	atomic_long_sub(gen, &mapping->gen);
+	atomic_long_sub(tier, &mapping->tier);
 }
 
 static void filemap_unaccount_folio(struct address_space *mapping,
@@ -844,6 +850,9 @@  noinline int __filemap_add_folio(struct address_space *mapping,
 	int huge = folio_test_hugetlb(folio);
 	bool charged = false;
 	long nr = 1;
+	int refs = folio_lru_refs(folio);
+	int tier = lru_tier_from_refs(refs);
+	int gen = folio_lru_gen(folio);
 
 	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
 	VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
@@ -898,6 +907,8 @@  noinline int __filemap_add_folio(struct address_space *mapping,
 			goto unlock;
 
 		mapping->nrpages += nr;
+		atomic_long_add(gen, &mapping->gen);
+		atomic_long_add(tier, &mapping->tier);
 
 		/* hugetlb pages do not participate in page cache accounting */
 		if (!huge) {
diff --git a/mm/swap.c b/mm/swap.c
index 70e2063..6322c1c 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -468,6 +468,7 @@  static void folio_inc_refs(struct folio *folio)
 		new_flags += BIT(LRU_REFS_PGOFF);
 		new_flags |= old_flags & ~LRU_REFS_MASK;
 	} while (!try_cmpxchg(&folio->flags, &old_flags, new_flags));
+	atomic_long_inc(&folio->mapping->tier);
 }
 #else
 static void folio_inc_refs(struct folio *folio)