@@ -3302,7 +3302,8 @@ enum mf_action_page_type {
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
extern void clear_huge_page(struct page *page,
unsigned long addr_hint,
- unsigned int pages_per_huge_page);
+ unsigned int pages_per_huge_page,
+ bool non_cached);
extern void copy_user_huge_page(struct page *dst, struct page *src,
unsigned long addr_hint,
struct vm_area_struct *vma,
@@ -594,6 +594,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
pgtable_t pgtable;
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
vm_fault_t ret = 0;
+ bool non_cached = false;
VM_BUG_ON_PAGE(!PageCompound(page), page);
@@ -611,7 +612,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
goto release;
}
- clear_huge_page(page, vmf->address, HPAGE_PMD_NR);
+ clear_huge_page(page, vmf->address, HPAGE_PMD_NR, non_cached);
/*
* The memory barrier inside __SetPageUptodate makes sure that
* clear_huge_page writes become visible before the set_pmd_at()
@@ -5481,6 +5481,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
spinlock_t *ptl;
unsigned long haddr = address & huge_page_mask(h);
bool new_page, new_pagecache_page = false;
+ bool non_cached = false;
/*
* Currently, we are forced to kill the process in the event the
@@ -5536,7 +5537,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
spin_unlock(ptl);
goto out;
}
- clear_huge_page(page, address, pages_per_huge_page(h));
+ clear_huge_page(page, address, pages_per_huge_page(h), non_cached);
__SetPageUptodate(page);
new_page = true;
@@ -5606,11 +5606,18 @@ bool clear_page_prefer_non_caching(unsigned long extent)
*
* With ARCH_MAX_CLEAR_PAGES == 1, clear_user_highpages() drops down
* to page-at-a-time mode. Or, funnels through to clear_user_pages().
+ *
+ * With coherent == false, we use incoherent stores and the caller is
+ * responsible for making the region coherent again by calling
+ * clear_page_make_coherent().
*/
static void clear_user_extent(struct page *start_page, unsigned long vaddr,
- unsigned int npages)
+ unsigned int npages, bool coherent)
{
- clear_user_highpages(start_page, vaddr, npages);
+ if (coherent)
+ clear_user_highpages(start_page, vaddr, npages);
+ else
+ clear_user_highpages_incoherent(start_page, vaddr, npages);
}
struct subpage_arg {
@@ -5709,6 +5716,13 @@ static void clear_gigantic_page(struct page *page,
{
int i;
struct page *p = page;
+ bool coherent;
+
+ /*
+ * Gigantic pages are large enough, that there are no cache
+ * expectations. Use the incoherent path.
+ */
+ coherent = false;
might_sleep();
for (i = 0; i < pages_per_huge_page;
@@ -5718,9 +5732,16 @@ static void clear_gigantic_page(struct page *page,
* guarantees that p[0] and p[clear_page_unit-1]
* never straddle a mem_map discontiguity.
*/
- clear_user_extent(p, base_addr + i * PAGE_SIZE, clear_page_unit);
+ clear_user_extent(p, base_addr + i * PAGE_SIZE,
+ clear_page_unit, coherent);
cond_resched();
}
+
+ /*
+ * We need to make sure that writes above are ordered before
+ * updating the PTE and marking SetPageUptodate().
+ */
+ clear_page_make_coherent();
}
static void clear_subpages(struct subpage_arg *sa,
@@ -5736,15 +5757,16 @@ static void clear_subpages(struct subpage_arg *sa,
n = min(clear_page_unit, remaining);
- clear_user_extent(page + i, base_addr + i * PAGE_SIZE, n);
+ clear_user_extent(page + i, base_addr + i * PAGE_SIZE,
+ n, true);
i += n;
cond_resched();
}
}
-void clear_huge_page(struct page *page,
- unsigned long addr_hint, unsigned int pages_per_huge_page)
+void clear_huge_page(struct page *page, unsigned long addr_hint,
+ unsigned int pages_per_huge_page, bool non_cached)
{
unsigned long addr = addr_hint &
~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);
@@ -5755,7 +5777,21 @@ void clear_huge_page(struct page *page,
.page_unit = clear_page_unit,
};
- if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {
+ /*
+ * The non-caching path is typically slower for small extents so use
+ * it only if the caller explicitly hints it or if the extent is
+ * large enough that there are no cache expectations.
+ *
+ * We let the gigantic page path handle the details.
+ */
+ non_cached |=
+ clear_page_prefer_non_caching(pages_per_huge_page * PAGE_SIZE);
+
+ if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES || non_cached)) {
+ /*
+ * Gigantic page clearing always uses incoherent clearing
+ * internally.
+ */
clear_gigantic_page(page, addr, pages_per_huge_page);
return;
}
Non-caching stores are suitable for circumstances where the destination region is unlikely to be read again soon, or is large enough that there's no expectation that we will find the data in the cache. Add a new parameter to clear_user_extent(), which handles the non-caching clearing path for huge and gigantic pages. This needs a final clear_page_make_coherent() operation since non-cached clearing typically involves weakly ordered stores that are incoherent wrt other operations in the memory hierarchy. This path is always invoked for gigantic pages, for huge pages only if pages_per_huge_page is greater than an architectural threshold, or if the user gives an explicit hint (if for instance, this call is part of a larger clearing operation.) Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com> --- include/linux/mm.h | 3 ++- mm/huge_memory.c | 3 ++- mm/hugetlb.c | 3 ++- mm/memory.c | 50 +++++++++++++++++++++++++++++++++++++++------- 4 files changed, 49 insertions(+), 10 deletions(-)