@@ -1141,6 +1141,8 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long
return native_pmdp_get_and_clear(pmdp);
}
+#define mk_pud(page, pgprot) pfn_pud(page_to_pfn(page), (pgprot))
+
#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pud_t *pudp)
@@ -428,6 +428,7 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d SUnreclaim: %8lu kB\n"
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
"Node %d AnonHugePages: %8lu kB\n"
+ "Node %d AnonHugePUDPages: %8lu kB\n"
"Node %d ShmemHugePages: %8lu kB\n"
"Node %d ShmemPmdMapped: %8lu kB\n"
"Node %d FileHugePages: %8lu kB\n"
@@ -457,6 +458,8 @@ static ssize_t node_read_meminfo(struct device *dev,
,
nid, K(node_page_state(pgdat, NR_ANON_THPS) *
HPAGE_PMD_NR),
+ nid, K(node_page_state(pgdat, NR_ANON_THPS_PUD) *
+ HPAGE_PUD_NR),
nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
HPAGE_PMD_NR),
nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
@@ -130,6 +130,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
show_val_kb(m, "AnonHugePages: ",
global_node_page_state(NR_ANON_THPS) * HPAGE_PMD_NR);
+ show_val_kb(m, "AnonHugePUDPages: ",
+ global_node_page_state(NR_ANON_THPS_PUD) * HPAGE_PUD_NR);
show_val_kb(m, "ShmemHugePages: ",
global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
show_val_kb(m, "ShmemPmdMapped: ",
@@ -18,10 +18,15 @@ extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
+extern int do_huge_pud_anonymous_page(struct vm_fault *vmf);
#else
static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
{
}
+extern int do_huge_pud_anonymous_page(struct vm_fault *vmf)
+{
+ return VM_FAULT_FALLBACK;
+}
#endif
extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
@@ -323,6 +328,7 @@ struct page *mm_get_huge_zero_page(struct mm_struct *mm);
void mm_put_huge_zero_page(struct mm_struct *mm);
#define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot))
+#define mk_huge_pud(page, prot) pud_mkhuge(mk_pud(page, prot))
static inline bool thp_migration_supported(void)
{
@@ -196,6 +196,7 @@ enum node_stat_item {
NR_FILE_THPS,
NR_FILE_PMDMAPPED,
NR_ANON_THPS,
+ NR_ANON_THPS_PUD,
NR_VMSCAN_WRITE,
NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
NR_DIRTIED, /* page dirtyings since bootup */
@@ -93,6 +93,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
THP_DEFERRED_SPLIT_PAGE,
THP_SPLIT_PMD,
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+ THP_FAULT_ALLOC_PUD,
+ THP_FAULT_FALLBACK_PUD,
+ THP_FAULT_FALLBACK_PUD_CHARGE,
THP_SPLIT_PUD,
#endif
THP_ZERO_PAGE_ALLOC,
@@ -933,6 +933,111 @@ vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf, pfn_t pfn,
return VM_FAULT_NOPAGE;
}
EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud_prot);
+
+static int __do_huge_pud_anonymous_page(struct vm_fault *vmf, struct page *page,
+ gfp_t gfp)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ pmd_t *pmd_pgtable;
+ unsigned long haddr = vmf->address & HPAGE_PUD_MASK;
+ int ret = 0;
+
+ VM_BUG_ON_PAGE(!PageCompound(page), page);
+
+ if (mem_cgroup_charge(page, vma->vm_mm, gfp)) {
+ put_page(page);
+ count_vm_event(THP_FAULT_FALLBACK_PUD);
+ count_vm_event(THP_FAULT_FALLBACK_CHARGE);
+ return VM_FAULT_FALLBACK;
+ }
+ cgroup_throttle_swaprate(page, gfp);
+
+ pmd_pgtable = pmd_alloc_one_page_with_ptes(vma->vm_mm, haddr);
+ if (unlikely(!pmd_pgtable)) {
+ ret = VM_FAULT_OOM;
+ goto release;
+ }
+
+ clear_huge_page(page, vmf->address, HPAGE_PUD_NR);
+ /*
+ * The memory barrier inside __SetPageUptodate makes sure that
+ * clear_huge_page writes become visible before the set_pmd_at()
+ * write.
+ */
+ __SetPageUptodate(page);
+
+ vmf->ptl = pud_lock(vma->vm_mm, vmf->pud);
+ if (unlikely(!pud_none(*vmf->pud))) {
+ goto unlock_release;
+ } else {
+ pud_t entry;
+ int i;
+
+ ret = check_stable_address_space(vma->vm_mm);
+ if (ret)
+ goto unlock_release;
+
+ /* Deliver the page fault to userland */
+ if (userfaultfd_missing(vma)) {
+ vm_fault_t ret2;
+
+ spin_unlock(vmf->ptl);
+ put_page(page);
+ pmd_free_page_with_ptes(vma->vm_mm, pmd_pgtable);
+ ret2 = handle_userfault(vmf, VM_UFFD_MISSING);
+ VM_BUG_ON(ret2 & VM_FAULT_FALLBACK);
+ return ret2;
+ }
+
+ entry = mk_huge_pud(page, vma->vm_page_prot);
+ entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma);
+ page_add_new_anon_rmap(page, vma, haddr, true);
+ lru_cache_add_inactive_or_unevictable(page, vma);
+ pgtable_trans_huge_pud_deposit(vma->vm_mm, vmf->pud,
+ virt_to_page(pmd_pgtable));
+ set_pud_at(vma->vm_mm, haddr, vmf->pud, entry);
+ add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PUD_NR);
+ mm_inc_nr_pmds(vma->vm_mm);
+ for (i = 0; i < (1<<(HPAGE_PUD_ORDER - HPAGE_PMD_ORDER)); i++)
+ mm_inc_nr_ptes(vma->vm_mm);
+ spin_unlock(vmf->ptl);
+ count_vm_event(THP_FAULT_ALLOC_PUD);
+ }
+
+ return 0;
+unlock_release:
+ spin_unlock(vmf->ptl);
+release:
+ if (pmd_pgtable)
+ pmd_free_page_with_ptes(vma->vm_mm, pmd_pgtable);
+ put_page(page);
+ return ret;
+
+}
+
+int do_huge_pud_anonymous_page(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ gfp_t gfp;
+ struct page *page;
+ unsigned long haddr = vmf->address & HPAGE_PUD_MASK;
+
+ if (haddr < vma->vm_start || haddr + HPAGE_PUD_SIZE > vma->vm_end)
+ return VM_FAULT_FALLBACK;
+ if (unlikely(anon_vma_prepare(vma)))
+ return VM_FAULT_OOM;
+ /* no khugepaged_enter, since PUD THP is not supported by khugepaged */
+
+ gfp = alloc_hugepage_direct_gfpmask(vma);
+ page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PUD_ORDER);
+ if (unlikely(!page)) {
+ count_vm_event(THP_FAULT_FALLBACK_PUD);
+ return VM_FAULT_FALLBACK;
+ }
+ prep_transhuge_page(page);
+ return __do_huge_pud_anonymous_page(vmf, page, gfp);
+}
+
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -5434,7 +5434,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR),
K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)
* HPAGE_PMD_NR),
- K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
+ K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR +
+ node_page_state(pgdat, NR_ANON_THPS_PUD) * HPAGE_PUD_NR),
#endif
K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
node_page_state(pgdat, NR_KERNEL_STACK_KB),
@@ -726,6 +726,7 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
+ pud_t pude;
pmd_t *pmd = NULL;
pmd_t pmde;
@@ -738,7 +739,10 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
goto out;
pud = pud_offset(p4d, address);
- if (!pud_present(*pud))
+
+ pude = *pud;
+ barrier();
+ if (!pud_present(pude) || pud_trans_huge(pude))
goto out;
pmd = pmd_offset(pud, address);
@@ -1137,8 +1141,12 @@ void do_page_add_anon_rmap(struct page *page,
* pte lock(a spinlock) is held, which implies preemption
* disabled.
*/
- if (compound)
- __inc_lruvec_page_state(page, NR_ANON_THPS);
+ if (compound) {
+ if (nr == HPAGE_PMD_NR)
+ __inc_lruvec_page_state(page, NR_ANON_THPS);
+ else
+ __inc_lruvec_page_state(page, NR_ANON_THPS_PUD);
+ }
__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
}
@@ -1180,7 +1188,10 @@ void page_add_new_anon_rmap(struct page *page,
if (hpage_pincount_available(page))
atomic_set(compound_pincount_ptr(page), 0);
- __inc_lruvec_page_state(page, NR_ANON_THPS);
+ if (nr == HPAGE_PMD_NR)
+ __inc_lruvec_page_state(page, NR_ANON_THPS);
+ else
+ __inc_lruvec_page_state(page, NR_ANON_THPS_PUD);
} else {
/* Anon THP always mapped first with PMD */
VM_BUG_ON_PAGE(PageTransCompound(page), page);
@@ -1286,7 +1297,10 @@ static void page_remove_anon_compound_rmap(struct page *page)
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
return;
- __dec_lruvec_page_state(page, NR_ANON_THPS);
+ if (thp_nr_pages(page) == HPAGE_PMD_NR)
+ __dec_lruvec_page_state(page, NR_ANON_THPS);
+ else
+ __dec_lruvec_page_state(page, NR_ANON_THPS_PUD);
if (TestClearPageDoubleMap(page)) {
/*
@@ -1209,6 +1209,7 @@ const char * const vmstat_text[] = {
"nr_file_hugepages",
"nr_file_pmdmapped",
"nr_anon_transparent_hugepages",
+ "nr_anon_transparent_pud_hugepages",
"nr_vmscan_write",
"nr_vmscan_immediate_reclaim",
"nr_dirtied",
@@ -1326,6 +1327,9 @@ const char * const vmstat_text[] = {
"thp_deferred_split_page",
"thp_split_pmd",
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+ "thp_fault_alloc_pud",
+ "thp_fault_fallback_pud",
+ "thp_fault_fallback_pud_charge",
"thp_split_pud",
#endif
"thp_zero_page_alloc",