@@ -67,7 +67,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
*/
ppage = pfn_to_online_page(pfn);
- if (!ppage || PageSlab(ppage) || page_has_type(ppage))
+ if (!ppage || page_has_type(ppage))
pcount = 0;
else
pcount = page_mapcount(ppage);
@@ -124,11 +124,8 @@ u64 stable_page_flags(struct page *page)
/*
* pseudo flags for the well known (anonymous) memory mapped pages
- *
- * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the
- * simple test in page_mapped() is not enough.
*/
- if (!PageSlab(page) && page_mapped(page))
+ if (page_mapped(page))
u |= 1 << KPF_MMAP;
if (PageAnon(page))
u |= 1 << KPF_ANON;
@@ -178,16 +175,14 @@ u64 stable_page_flags(struct page *page)
u |= 1 << KPF_OFFLINE;
if (PageTable(page))
u |= 1 << KPF_PGTABLE;
+ if (PageSlab(page))
+ u |= 1 << KPF_SLAB;
if (page_is_idle(page))
u |= 1 << KPF_IDLE;
u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked);
- u |= kpf_copy_bit(k, KPF_SLAB, PG_slab);
- if (PageTail(page) && PageSlab(compound_head(page)))
- u |= 1 << KPF_SLAB;
-
u |= kpf_copy_bit(k, KPF_ERROR, PG_error);
u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty);
u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate);
@@ -196,10 +196,13 @@ struct page {
atomic_t _mapcount;
/*
- * If the page is neither PageSlab nor mappable to userspace,
- * the value stored here may help determine what this page
- * is used for. See page-flags.h for a list of page types
- * which are currently stored here.
+ * If the page is not mappable to userspace, the value
+ * stored here may help determine what this page is used for.
+ * See page-flags.h for a list of page types which are currently
+ * stored here.
+ *
+ * Note that only upper half is used for page types and lower
+ * half is reserved for SLAB.
*/
unsigned int page_type;
};
@@ -107,7 +107,6 @@ enum pageflags {
PG_workingset,
PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
PG_error,
- PG_slab,
PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/
PG_arch_1,
PG_reserved,
@@ -484,7 +483,6 @@ PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
TESTCLEARFLAG(Active, active, PF_HEAD)
PAGEFLAG(Workingset, workingset, PF_HEAD)
TESTCLEARFLAG(Workingset, workingset, PF_HEAD)
-__PAGEFLAG(Slab, slab, PF_NO_TAIL)
__PAGEFLAG(SlobFree, slob_free, PF_NO_TAIL)
PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */
@@ -926,42 +924,72 @@ static inline bool is_page_hwpoison(struct page *page)
}
/*
- * For pages that are never mapped to userspace (and aren't PageSlab),
- * page_type may be used. Because it is initialised to -1, we invert the
- * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
- * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and
- * low bits so that an underflow or overflow of page_mapcount() won't be
- * mistaken for a page type value.
+ * For pages that are never mapped to userspace, page_type may be used.
+ * Because it is initialised to -1, we invert the sense of the bit,
+ * so __SetPageFoo *clears* the bit used for PageFoo, and __ClearPageFoo
+ * *sets* the bit used for PageFoo. We reserve a few high and low bits
+ * so that an underflow or overflow of page_mapcount() won't be mistaken
+ * for a page type value.
*/
#define PAGE_TYPE_BASE 0xf0000000
-/* Reserve 0x0000007f to catch underflows of page_mapcount */
-#define PAGE_MAPCOUNT_RESERVE -128
-#define PG_buddy 0x00000080
-#define PG_offline 0x00000100
-#define PG_table 0x00000200
-#define PG_guard 0x00000400
+#define PG_buddy 0x00010000
+#define PG_offline 0x00020000
+#define PG_table 0x00040000
+#define PG_guard 0x00080000
+#define PG_slab 0x00100000
#define PageType(page, flag) \
((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
-static inline int page_has_type(struct page *page)
+#define PAGE_TYPE_MASK 0xffff0000
+
+static inline bool page_type_has_type(unsigned int page_type)
{
- return (int)page->page_type < PAGE_MAPCOUNT_RESERVE;
+ return ((int)page_type < (int)PAGE_TYPE_MASK);
}
-#define PAGE_TYPE_OPS(uname, lname) \
+static inline bool page_has_type(struct page *page)
+{
+ return page_type_has_type(page->page_type);
+}
+
+
+#define PAGE_TYPE_OPS(uname, lname, policy) \
static __always_inline int Page##uname(struct page *page) \
{ \
+ page = policy(page, 0); \
+ return PageType(page, PG_##lname); \
+} \
+static __always_inline int folio_test_##lname(struct folio *folio) \
+{ \
+ struct page *page = &folio->page; \
+ \
return PageType(page, PG_##lname); \
} \
static __always_inline void __SetPage##uname(struct page *page) \
{ \
+ page = policy(page, 1); \
+ VM_BUG_ON_PAGE(!PageType(page, 0), page); \
+ page->page_type &= ~PG_##lname; \
+} \
+static __always_inline void __folio_set_##lname(struct folio *folio) \
+{ \
+ struct page *page = &folio->page; \
+ \
VM_BUG_ON_PAGE(!PageType(page, 0), page); \
page->page_type &= ~PG_##lname; \
} \
static __always_inline void __ClearPage##uname(struct page *page) \
{ \
+ page = policy(page, 1); \
+ VM_BUG_ON_PAGE(!Page##uname(page), page); \
+ page->page_type |= PG_##lname; \
+} \
+static __always_inline void __folio_clear_##lname(struct folio *folio) \
+{ \
+ struct page *page = &folio->page; \
+ \
VM_BUG_ON_PAGE(!Page##uname(page), page); \
page->page_type |= PG_##lname; \
}
@@ -970,7 +998,7 @@ static __always_inline void __ClearPage##uname(struct page *page) \
* PageBuddy() indicates that the page is free and in the buddy system
* (see mm/page_alloc.c).
*/
-PAGE_TYPE_OPS(Buddy, buddy)
+PAGE_TYPE_OPS(Buddy, buddy, PF_ANY)
/*
* PageOffline() indicates that the page is logically offline although the
@@ -994,7 +1022,10 @@ PAGE_TYPE_OPS(Buddy, buddy)
* pages should check PageOffline() and synchronize with such drivers using
* page_offline_freeze()/page_offline_thaw().
*/
-PAGE_TYPE_OPS(Offline, offline)
+PAGE_TYPE_OPS(Offline, offline, PF_ANY)
+
+/* PageSlab() indicates that the page is used by slab subsystem. */
+PAGE_TYPE_OPS(Slab, slab, PF_NO_TAIL)
extern void page_offline_freeze(void);
extern void page_offline_thaw(void);
@@ -1004,12 +1035,12 @@ extern void page_offline_end(void);
/*
* Marks pages in use as page tables.
*/
-PAGE_TYPE_OPS(Table, table)
+PAGE_TYPE_OPS(Table, table, PF_ANY)
/*
* Marks guardpages used with debug_pagealloc.
*/
-PAGE_TYPE_OPS(Guard, guard)
+PAGE_TYPE_OPS(Guard, guard, PF_ANY)
extern bool is_free_buddy_page(struct page *page);
@@ -1057,8 +1088,8 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
(1UL << PG_lru | 1UL << PG_locked | \
1UL << PG_private | 1UL << PG_private_2 | \
1UL << PG_writeback | 1UL << PG_reserved | \
- 1UL << PG_slab | 1UL << PG_active | \
- 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK)
+ 1UL << PG_active | 1UL << PG_unevictable | \
+ __PG_MLOCKED | LRU_GEN_MASK)
/*
* Flags checked when a page is prepped for return by the page allocator.
@@ -112,7 +112,6 @@
{1UL << PG_lru, "lru" }, \
{1UL << PG_active, "active" }, \
{1UL << PG_workingset, "workingset" }, \
- {1UL << PG_slab, "slab" }, \
{1UL << PG_owner_priv_1, "owner_priv_1" }, \
{1UL << PG_arch_1, "arch_1" }, \
{1UL << PG_reserved, "reserved" }, \
@@ -479,13 +479,14 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_NUMBER(PG_private);
VMCOREINFO_NUMBER(PG_swapcache);
VMCOREINFO_NUMBER(PG_swapbacked);
- VMCOREINFO_NUMBER(PG_slab);
#ifdef CONFIG_MEMORY_FAILURE
VMCOREINFO_NUMBER(PG_hwpoison);
#endif
VMCOREINFO_NUMBER(PG_head_mask);
#define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy)
VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
+#define PAGE_SLAB_MAPCOUNT_VALUE (~PG_slab)
+ VMCOREINFO_NUMBER(PAGE_SLAB_MAPCOUNT_VALUE);
#ifdef CONFIG_HUGETLB_PAGE
VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR);
#define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline)
@@ -1145,7 +1145,6 @@ static int me_huge_page(struct page_state *ps, struct page *p)
#define mlock (1UL << PG_mlocked)
#define lru (1UL << PG_lru)
#define head (1UL << PG_head)
-#define slab (1UL << PG_slab)
#define reserved (1UL << PG_reserved)
static struct page_state error_states[] = {
@@ -1155,13 +1154,6 @@ static struct page_state error_states[] = {
* PG_buddy pages only make a small fraction of all free pages.
*/
- /*
- * Could in theory check if slab page is free or if we can drop
- * currently unused objects without touching them. But just
- * treat it as standard kernel for now.
- */
- { slab, slab, MF_MSG_SLAB, me_kernel },
-
{ head, head, MF_MSG_HUGE, me_huge_page },
{ sc|dirty, sc|dirty, MF_MSG_DIRTY_SWAPCACHE, me_swapcache_dirty },
@@ -2265,6 +2265,21 @@ void __kmem_cache_release(struct kmem_cache *cachep)
}
}
+static inline unsigned int slab_get_active(struct slab *slab)
+{
+ return ~(slab->page_type | PG_slab);
+}
+
+static inline void slab_inc_active(struct slab *slab)
+{
+ slab->page_type--;
+}
+
+static inline void slab_dec_active(struct slab *slab)
+{
+ slab->page_type++;
+}
+
/*
* Get the memory for a slab management obj.
*
@@ -2287,7 +2302,6 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
void *addr = slab_address(slab);
slab->s_mem = addr + colour_off;
- slab->active = 0;
if (OBJFREELIST_SLAB(cachep))
freelist = NULL;
@@ -2506,8 +2520,8 @@ static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slab)
{
void *objp;
- objp = index_to_obj(cachep, slab, get_free_obj(slab, slab->active));
- slab->active++;
+ objp = index_to_obj(cachep, slab, get_free_obj(slab, slab_get_active(slab)));
+ slab_inc_active(slab);
return objp;
}
@@ -2520,7 +2534,7 @@ static void slab_put_obj(struct kmem_cache *cachep,
unsigned int i;
/* Verify double free bug */
- for (i = slab->active; i < cachep->num; i++) {
+ for (i = slab_get_active(slab); i < cachep->num; i++) {
if (get_free_obj(slab, i) == objnr) {
pr_err("slab: double free detected in cache '%s', objp %px\n",
cachep->name, objp);
@@ -2528,11 +2542,11 @@ static void slab_put_obj(struct kmem_cache *cachep,
}
}
#endif
- slab->active--;
+ slab_dec_active(slab);
if (!slab->freelist)
slab->freelist = objp + obj_offset(cachep);
- set_free_obj(slab, slab->active, objnr);
+ set_free_obj(slab, slab_get_active(slab), objnr);
}
/*
@@ -2631,14 +2645,14 @@ static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab)
spin_lock(&n->list_lock);
n->total_slabs++;
- if (!slab->active) {
+ if (!slab_get_active(slab)) {
list_add_tail(&slab->slab_list, &n->slabs_free);
n->free_slabs++;
} else
fixup_slab_list(cachep, n, slab, &list);
STATS_INC_GROWN(cachep);
- n->free_objects += cachep->num - slab->active;
+ n->free_objects += cachep->num - slab_get_active(slab);
spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
@@ -2740,7 +2754,7 @@ static inline void fixup_slab_list(struct kmem_cache *cachep,
{
/* move slabp to correct slabp list: */
list_del(&slab->slab_list);
- if (slab->active == cachep->num) {
+ if (slab_get_active(slab) == cachep->num) {
list_add(&slab->slab_list, &n->slabs_full);
if (OBJFREELIST_SLAB(cachep)) {
#if DEBUG
@@ -2779,7 +2793,7 @@ static noinline struct slab *get_valid_first_slab(struct kmem_cache_node *n,
/* Move pfmemalloc slab to the end of list to speed up next search */
list_del(&slab->slab_list);
- if (!slab->active) {
+ if (!slab_get_active(slab)) {
list_add_tail(&slab->slab_list, &n->slabs_free);
n->free_slabs++;
} else
@@ -2861,9 +2875,9 @@ static __always_inline int alloc_block(struct kmem_cache *cachep,
* There must be at least one object available for
* allocation.
*/
- BUG_ON(slab->active >= cachep->num);
+ BUG_ON(slab_get_active(slab) >= cachep->num);
- while (slab->active < cachep->num && batchcount--) {
+ while (slab_get_active(slab) < cachep->num && batchcount--) {
STATS_INC_ALLOCED(cachep);
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
@@ -3158,7 +3172,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
- BUG_ON(slab->active == cachep->num);
+ BUG_ON(slab_get_active(slab) == cachep->num);
obj = slab_get_obj(cachep, slab);
n->free_objects--;
@@ -3292,7 +3306,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
STATS_DEC_ACTIVE(cachep);
/* fixup slab chains */
- if (slab->active == 0) {
+ if (slab_get_active(slab) == 0) {
list_add(&slab->slab_list, &n->slabs_free);
n->free_slabs++;
} else {
@@ -3347,7 +3361,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
struct slab *slab;
list_for_each_entry(slab, &n->slabs_free, slab_list) {
- BUG_ON(slab->active);
+ BUG_ON(slab_get_active(slab);
i++;
}
@@ -18,7 +18,8 @@ struct slab {
struct kmem_cache *slab_cache;
void *freelist; /* array of free object indexes */
void *s_mem; /* first object */
- unsigned int active;
+ /* lower half of page_type is used as active objects counter */
+ unsigned int page_type;
#elif defined(CONFIG_SLUB)
For now, only SLAB uses _mapcount field as a number of active objects in a slab, and other slab allocators do not use it. As 16 bits are enough for that, use remaining 16 bits of _mapcount as page_type even when SLAB is used. And then move PG_slab flag to page_type. As suggested by Matthew, store number of active objects in negative form and use helper when accessing or modifying it. Note that page_type is always placed in upper 16 bits of _mapcount to avoid confusing normal _mapcount as page_type. As underflow (actually I mean, yeah, overflow) is not a concern anymore, use more lower bits. Add more folio helpers for PAGE_TYPE_OPS() not to break existing slab implementations. Remove PG_slab check from PAGE_FLAGS_CHECK_AT_FREE. buddy will still check if _mapcount is properly set at free. Exclude PG_slab from hwpoison and show_page_flags() for now. Note that with this patch, page_mapped() and folio_mapped() always return false for slab page. Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Naoya Horiguchi <naoya.horiguchi@nec.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Muchun Song <songmuchun@bytedance.com> Cc: David Hildenbrand <david@redhat.com> Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> --- fs/proc/page.c | 13 ++---- include/linux/mm_types.h | 11 +++-- include/linux/page-flags.h | 77 ++++++++++++++++++++++++---------- include/trace/events/mmflags.h | 1 - kernel/crash_core.c | 3 +- mm/memory-failure.c | 8 ---- mm/slab.c | 44 ++++++++++++------- mm/slab.h | 3 +- 8 files changed, 98 insertions(+), 62 deletions(-)