Message ID | 20250420055159.55851-1-lance.yang@linux.dev (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [1/1] mm/rmap: optimize MM-ID mapcount handling with union | expand |
> /* Note: mapcounts start at -1. */ > atomic_set(&folio->_large_mapcount, mapcount - 1); > diff --git a/mm/internal.h b/mm/internal.h > index 838f840ded83..1505174178f4 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -772,8 +772,13 @@ static inline void prep_compound_head(struct page *page, unsigned int order) > atomic_set(&folio->_nr_pages_mapped, 0); > if (IS_ENABLED(CONFIG_MM_ID)) { > folio->_mm_ids = 0; > - folio->_mm_id_mapcount[0] = -1; > - folio->_mm_id_mapcount[1] = -1; > + /* > + * One-shot initialization of both mapcount slots to -1. > + * Using 'unsigned long' ensures cross-arch compatibility: > + * - 32-bit: Fills both short slots (0xFFFF each) > + * - 64-bit: Fills both int slots (0xFFFFFFFF each) > + */ > + folio->_mm_id_mapcounts = -1UL; Are we sure the compiler cannot optimize that itself? On x86-64 I get with gcc 14.2.1: ; folio->_mm_id_mapcount[0] = -1; 3f2f: 48 c7 42 60 ff ff ff ff movq $-0x1, 0x60(%rdx) Which should be a quadword (64bit) setting, so exactly what you want to achieve.
April 20, 2025 at 3:12 PM, "David Hildenbrand" <david@redhat.com> wrote: > > > > > /* Note: mapcounts start at -1. */ > > > > atomic_set(&folio->_large_mapcount, mapcount - 1); > > > > diff --git a/mm/internal.h b/mm/internal.h > > > > index 838f840ded83..1505174178f4 100644 > > > > --- a/mm/internal.h > > > > +++ b/mm/internal.h > > > > @@ -772,8 +772,13 @@ static inline void prep_compound_head(struct page *page, unsigned int order) > > > > atomic_set(&folio->_nr_pages_mapped, 0); > > > > if (IS_ENABLED(CONFIG_MM_ID)) { > > > > folio->_mm_ids = 0; > > > > - folio->_mm_id_mapcount[0] = -1; > > > > - folio->_mm_id_mapcount[1] = -1; > > > > + /* > > > > + * One-shot initialization of both mapcount slots to -1. > > > > + * Using 'unsigned long' ensures cross-arch compatibility: > > > > + * - 32-bit: Fills both short slots (0xFFFF each) > > > > + * - 64-bit: Fills both int slots (0xFFFFFFFF each) > > > > + */ > > > > + folio->_mm_id_mapcounts = -1UL; > > > > Are we sure the compiler cannot optimize that itself? > > On x86-64 I get with gcc 14.2.1: > > ; folio->_mm_id_mapcount[0] = -1; > > 3f2f: 48 c7 42 60 ff ff ff ff movq $-0x1, 0x60(%rdx) > > Which should be a quadword (64bit) setting, so exactly what you want to achieve. Yeah, the compiler should be as smart as we expect it to be. However, it seems that gcc 4.8.5 doesn't behave as expected with the -O2 optimization level on the x86-64 test machine. struct folio_array { int _mm_id_mapcount[2]; }; void init_array(struct folio_array *f) { f->_mm_id_mapcount[0] = -1; f->_mm_id_mapcount[1] = -1; } 0000000000000000 <init_array>: 0: c7 07 ff ff ff ff movl $0xffffffff,(%rdi) 6: c7 47 04 ff ff ff ff movl $0xffffffff,0x4(%rdi) d: c3 retq --- struct folio_union { union { int _mm_id_mapcount[2]; unsigned long _mm_id_mapcounts; }; }; void init_union(struct folio_union *f) { f->_mm_id_mapcounts = -1UL; } 0000000000000010 <init_union>: 10: 48 c7 07 ff ff ff ff movq $0xffffffffffffffff,(%rdi) 17: c3 retq Hmm... I'm not sure if it's valuable for those compilers that are not very new. Thanks, Lance > > -- Cheers, > > David / dhildenb >
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 56d07edd01f9..0ac80eaa775e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -363,6 +363,7 @@ typedef unsigned short mm_id_t; * @_mm_id: Do not use outside of rmap code. * @_mm_ids: Do not use outside of rmap code. * @_mm_id_mapcount: Do not use outside of rmap code. + * @_mm_id_mapcounts: Do not use outside of rmap code. * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h. * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. @@ -435,7 +436,10 @@ struct folio { atomic_t _entire_mapcount; atomic_t _pincount; #endif /* CONFIG_64BIT */ - mm_id_mapcount_t _mm_id_mapcount[2]; + union { + mm_id_mapcount_t _mm_id_mapcount[2]; + unsigned long _mm_id_mapcounts; + }; union { mm_id_t _mm_id[2]; unsigned long _mm_ids; diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 6b82b618846e..99c0518c1ad8 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -231,8 +231,7 @@ static __always_inline void folio_set_large_mapcount(struct folio *folio, { __folio_large_mapcount_sanity_checks(folio, mapcount, vma->vm_mm->mm_id); - VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY); - VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY); + VM_WARN_ON_ONCE(folio->_mm_ids != MM_ID_DUMMY); /* Note: mapcounts start at -1. */ atomic_set(&folio->_large_mapcount, mapcount - 1); diff --git a/mm/internal.h b/mm/internal.h index 838f840ded83..1505174178f4 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -772,8 +772,13 @@ static inline void prep_compound_head(struct page *page, unsigned int order) atomic_set(&folio->_nr_pages_mapped, 0); if (IS_ENABLED(CONFIG_MM_ID)) { folio->_mm_ids = 0; - folio->_mm_id_mapcount[0] = -1; - folio->_mm_id_mapcount[1] = -1; + /* + * One-shot initialization of both mapcount slots to -1. + * Using 'unsigned long' ensures cross-arch compatibility: + * - 32-bit: Fills both short slots (0xFFFF each) + * - 64-bit: Fills both int slots (0xFFFFFFFF each) + */ + folio->_mm_id_mapcounts = -1UL; } if (IS_ENABLED(CONFIG_64BIT) || order > 1) { atomic_set(&folio->_pincount, 0); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bbfd8e4ce0df..f5c5829c4bfa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -976,12 +976,11 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) goto out; } if (IS_ENABLED(CONFIG_MM_ID)) { - if (unlikely(folio->_mm_id_mapcount[0] != -1)) { - bad_page(page, "nonzero mm mapcount 0"); - goto out; - } - if (unlikely(folio->_mm_id_mapcount[1] != -1)) { - bad_page(page, "nonzero mm mapcount 1"); + if (unlikely(folio->_mm_id_mapcounts != -1UL)) { + if (folio->_mm_id_mapcount[0] != -1) + bad_page(page, "nonzero mm mapcount 0"); + if (folio->_mm_id_mapcount[1] != -1) + bad_page(page, "nonzero mm mapcount 1"); goto out; } }