@@ -142,6 +142,15 @@ enum pageflags {
PG_readahead = PG_reclaim,
+ /*
+ * Depending on the way an anonymous folio can be mapped into a page
+ * table (e.g., single PMD/PUD/CONT of the head page vs. PTE-mapped
+ * THP), PG_anon_exclusive may be set only for the head page or for
+ * tail pages of an anonymous folio. For now, we only expect it to be
+ * set on tail pages for PTE-mapped THP.
+ */
+ PG_anon_exclusive = PG_mappedtodisk,
+
/* Filesystems */
PG_checked = PG_owner_priv_1,
@@ -176,7 +185,7 @@ enum pageflags {
* Indicates that at least one subpage is hwpoisoned in the
* THP.
*/
- PG_has_hwpoisoned = PG_mappedtodisk,
+ PG_has_hwpoisoned = PG_error,
#endif
/* non-lru isolated movable page */
@@ -1002,6 +1011,34 @@ extern bool is_free_buddy_page(struct page *page);
PAGEFLAG(Isolated, isolated, PF_ANY);
+static __always_inline int PageAnonExclusive(struct page *page)
+{
+ VM_BUG_ON_PGFLAGS(!PageAnon(page), page);
+ VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
+ return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
+}
+
+static __always_inline void SetPageAnonExclusive(struct page *page)
+{
+ VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page);
+ VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
+ set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
+}
+
+static __always_inline void ClearPageAnonExclusive(struct page *page)
+{
+ VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page);
+ VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
+ clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
+}
+
+static __always_inline void __ClearPageAnonExclusive(struct page *page)
+{
+ VM_BUG_ON_PGFLAGS(!PageAnon(page), page);
+ VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
+ __clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
+}
+
#ifdef CONFIG_MMU
#define __PG_MLOCKED (1UL << PG_mlocked)
#else
@@ -1672,6 +1672,8 @@ void free_huge_page(struct page *page)
VM_BUG_ON_PAGE(page_mapcount(page), page);
hugetlb_set_page_subpool(page, NULL);
+ if (PageAnon(page))
+ __ClearPageAnonExclusive(page);
page->mapping = NULL;
restore_reserve = HPageRestoreReserve(page);
ClearHPageRestoreReserve(page);
@@ -3663,6 +3663,17 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
goto out_nomap;
}
+ /*
+ * PG_anon_exclusive reuses PG_mappedtodisk for anon pages. A swap pte
+ * must never point at an anonymous page in the swapcache that is
+ * PG_anon_exclusive. Sanity check that this holds and especially, that
+ * no filesystem set PG_mappedtodisk on a page in the swapcache. Sanity
+ * check after taking the PT lock and making sure that nobody
+ * concurrently faulted in this page and set PG_anon_exclusive.
+ */
+ BUG_ON(!PageAnon(page) && PageMappedToDisk(page));
+ BUG_ON(PageAnon(page) && PageAnonExclusive(page));
+
/*
* Remove the swap entry and conditionally try to free up the swapcache.
* We're already holding a reference on the page but haven't mapped it
@@ -458,6 +458,15 @@ void free_zone_device_page(struct page *page)
mem_cgroup_uncharge(page_folio(page));
+ /*
+ * Note: we don't expect anonymous compound pages yet. Once supported
+ * and we could PTE-map them similar to THP, we'd have to clear
+ * PG_anon_exclusive on all tail pages.
+ */
+ VM_BUG_ON_PAGE(PageAnon(page) && PageCompound(page), page);
+ if (PageAnon(page))
+ __ClearPageAnonExclusive(page);
+
/*
* When a device managed page is freed, the page->mapping field
* may still contain a (stale) mapping value. For example, the
@@ -1796,6 +1796,10 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
goto out;
}
+ /* See do_swap_page() */
+ BUG_ON(!PageAnon(page) && PageMappedToDisk(page));
+ BUG_ON(PageAnon(page) && PageAnonExclusive(page));
+
dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
get_page(page);
@@ -80,9 +80,10 @@
#define KPF_SOFTDIRTY 40
#define KPF_ARCH_2 41
-/* [48-] take some arbitrary free slots for expanding overloaded flags
+/* [47-] take some arbitrary free slots for expanding overloaded flags
* not part of kernel API
*/
+#define KPF_ANON_EXCLUSIVE 47
#define KPF_READAHEAD 48
#define KPF_SLOB_FREE 49
#define KPF_SLUB_FROZEN 50
@@ -138,6 +139,7 @@ static const char * const page_flag_names[] = {
[KPF_SOFTDIRTY] = "f:softdirty",
[KPF_ARCH_2] = "H:arch_2",
+ [KPF_ANON_EXCLUSIVE] = "d:anon_exclusive",
[KPF_READAHEAD] = "I:readahead",
[KPF_SLOB_FREE] = "P:slob_free",
[KPF_SLUB_FROZEN] = "A:slub_frozen",
@@ -472,6 +474,10 @@ static int bit_mask_ok(uint64_t flags)
static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme)
{
+ /* Anonymous pages overload PG_mappedtodisk */
+ if ((flags & BIT(ANON)) && (flags & BIT(MAPPEDTODISK)))
+ flags ^= BIT(MAPPEDTODISK) | BIT(ANON_EXCLUSIVE);
+
/* SLOB/SLUB overload several page flags */
if (flags & BIT(SLAB)) {
if (flags & BIT(PRIVATE))