@@ -26,6 +26,8 @@ void __tlb_remove_table(void *_table);
static inline void tlb_flush(struct mmu_gather *tlb);
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct page *page, bool delay_rmap, int page_size);
+static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
+ struct page *page, unsigned int nr_pages, bool delay_rmap);
#define tlb_flush tlb_flush
#define pte_free_tlb pte_free_tlb
@@ -52,6 +54,21 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
return false;
}
+static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
+ struct page *page, unsigned int nr_pages, bool delay_rmap)
+{
+ struct encoded_page *encoded_pages[] = {
+ encode_page(page, ENCODED_PAGE_BIT_NR_PAGES_NEXT),
+ encode_nr_pages(nr_pages),
+ };
+
+ VM_WARN_ON_ONCE(delay_rmap);
+ VM_WARN_ON_ONCE(page_folio(page) != page_folio(page + nr_pages - 1));
+
+ free_pages_and_swap_cache(encoded_pages, ARRAY_SIZE(encoded_pages));
+ return false;
+}
+
static inline void tlb_flush(struct mmu_gather *tlb)
{
__tlb_flush_mm_lazy(tlb->mm);
@@ -69,6 +69,7 @@
*
* - tlb_remove_page() / __tlb_remove_page()
* - tlb_remove_page_size() / __tlb_remove_page_size()
+ * - __tlb_remove_folio_pages()
*
* __tlb_remove_page_size() is the basic primitive that queues a page for
* freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a
@@ -78,6 +79,11 @@
* tlb_remove_page() and tlb_remove_page_size() imply the call to
* tlb_flush_mmu() when required and has no return value.
*
+ * __tlb_remove_folio_pages() is similar to __tlb_remove_page(), however,
+ * instead of removing a single page, remove the given number of consecutive
+ * pages that are all part of the same (large) folio: just like calling
+ * __tlb_remove_page() on each page individually.
+ *
* - tlb_change_page_size()
*
* call before __tlb_remove_page*() to set the current page-size; implies a
@@ -262,6 +268,8 @@ struct mmu_gather_batch {
extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
bool delay_rmap, int page_size);
+bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page,
+ unsigned int nr_pages, bool delay_rmap);
#ifdef CONFIG_SMP
/*
@@ -226,6 +226,15 @@ struct encoded_page;
/* Perform rmap removal after we have flushed the TLB. */
#define ENCODED_PAGE_BIT_DELAY_RMAP 1ul
+/*
+ * The next item in an encoded_page array is the "nr_pages" argument, specifying
+ * the number of consecutive pages starting from this page, that all belong to
+ * the same folio. For example, "nr_pages" corresponds to the number of folio
+ * references that must be dropped. If this bit is not set, "nr_pages" is
+ * implicitly 1.
+ */
+#define ENCODED_PAGE_BIT_NR_PAGES_NEXT 2ul
+
static __always_inline struct encoded_page *encode_page(struct page *page, unsigned long flags)
{
BUILD_BUG_ON(flags > ENCODED_PAGE_BITS);
@@ -242,6 +251,17 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page)
return (struct page *)(~ENCODED_PAGE_BITS & (unsigned long)page);
}
+static __always_inline struct encoded_page *encode_nr_pages(unsigned long nr)
+{
+ VM_WARN_ON_ONCE((nr << 2) >> 2 != nr);
+ return (struct encoded_page *)(nr << 2);
+}
+
+static __always_inline unsigned long encoded_nr_pages(struct encoded_page *page)
+{
+ return ((unsigned long)page) >> 2;
+}
+
/*
* A swap entry has to fit into a "unsigned long", as the entry is hidden
* in the "index" field of the swapper address space.
@@ -50,12 +50,21 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
#ifdef CONFIG_SMP
static void tlb_flush_rmap_batch(struct mmu_gather_batch *batch, struct vm_area_struct *vma)
{
+ struct encoded_page **pages = batch->encoded_pages;
+
for (int i = 0; i < batch->nr; i++) {
- struct encoded_page *enc = batch->encoded_pages[i];
+ struct encoded_page *enc = pages[i];
if (encoded_page_flags(enc) & ENCODED_PAGE_BIT_DELAY_RMAP) {
struct page *page = encoded_page_ptr(enc);
- folio_remove_rmap_pte(page_folio(page), page, vma);
+ unsigned int nr_pages = 1;
+
+ if (unlikely(encoded_page_flags(enc) &
+ ENCODED_PAGE_BIT_NR_PAGES_NEXT))
+ nr_pages = encoded_nr_pages(pages[++i]);
+
+ folio_remove_rmap_ptes(page_folio(page), page, nr_pages,
+ vma);
}
}
}
@@ -89,18 +98,26 @@ static void tlb_batch_pages_flush(struct mmu_gather *tlb)
for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
struct encoded_page **pages = batch->encoded_pages;
- do {
+ while (batch->nr) {
/*
* limit free batch count when PAGE_SIZE > 4K
*/
unsigned int nr = min(512U, batch->nr);
+ /*
+ * Make sure we cover page + nr_pages, and don't leave
+ * nr_pages behind when capping the number of entries.
+ */
+ if (unlikely(encoded_page_flags(pages[nr - 1]) &
+ ENCODED_PAGE_BIT_NR_PAGES_NEXT))
+ nr++;
+
free_pages_and_swap_cache(pages, nr);
pages += nr;
batch->nr -= nr;
cond_resched();
- } while (batch->nr);
+ }
}
tlb->active = &tlb->local;
}
@@ -116,8 +133,9 @@ static void tlb_batch_list_free(struct mmu_gather *tlb)
tlb->local.next = NULL;
}
-bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
- bool delay_rmap, int page_size)
+static bool __tlb_remove_folio_pages_size(struct mmu_gather *tlb,
+ struct page *page, unsigned int nr_pages, bool delay_rmap,
+ int page_size)
{
int flags = delay_rmap ? ENCODED_PAGE_BIT_DELAY_RMAP : 0;
struct mmu_gather_batch *batch;
@@ -126,6 +144,8 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
VM_WARN_ON(tlb->page_size != page_size);
+ VM_WARN_ON_ONCE(nr_pages != 1 && page_size != PAGE_SIZE);
+ VM_WARN_ON_ONCE(page_folio(page) != page_folio(page + nr_pages - 1));
#endif
batch = tlb->active;
@@ -133,17 +153,40 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
* Add the page and check if we are full. If so
* force a flush.
*/
- batch->encoded_pages[batch->nr++] = encode_page(page, flags);
- if (batch->nr == batch->max) {
+ if (likely(nr_pages == 1)) {
+ batch->encoded_pages[batch->nr++] = encode_page(page, flags);
+ } else {
+ flags |= ENCODED_PAGE_BIT_NR_PAGES_NEXT;
+ batch->encoded_pages[batch->nr++] = encode_page(page, flags);
+ batch->encoded_pages[batch->nr++] = encode_nr_pages(nr_pages);
+ }
+ /*
+ * Make sure that we can always add another "page" + "nr_pages",
+ * requiring two entries instead of only a single one.
+ */
+ if (batch->nr >= batch->max - 1) {
if (!tlb_next_batch(tlb))
return true;
batch = tlb->active;
}
- VM_BUG_ON_PAGE(batch->nr > batch->max, page);
+ VM_BUG_ON_PAGE(batch->nr > batch->max - 1, page);
return false;
}
+bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page,
+ unsigned int nr_pages, bool delay_rmap)
+{
+ return __tlb_remove_folio_pages_size(tlb, page, nr_pages, delay_rmap,
+ PAGE_SIZE);
+}
+
+bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
+ bool delay_rmap, int page_size)
+{
+ return __tlb_remove_folio_pages_size(tlb, page, 1, delay_rmap, page_size);
+}
+
#endif /* MMU_GATHER_NO_GATHER */
#ifdef CONFIG_MMU_GATHER_TABLE_FREE
@@ -967,11 +967,17 @@ void release_pages(release_pages_arg arg, int nr)
unsigned int lock_batch;
for (i = 0; i < nr; i++) {
+ unsigned int nr_refs = 1;
struct folio *folio;
/* Turn any of the argument types into a folio */
folio = page_folio(encoded_page_ptr(encoded[i]));
+ /* Is our next entry actually "nr_pages" -> "nr_refs" ? */
+ if (unlikely(encoded_page_flags(encoded[i]) &
+ ENCODED_PAGE_BIT_NR_PAGES_NEXT))
+ nr_refs = encoded_nr_pages(encoded[++i]);
+
/*
* Make sure the IRQ-safe lock-holding time does not get
* excessive with a continuous string of pages from the
@@ -990,14 +996,14 @@ void release_pages(release_pages_arg arg, int nr)
unlock_page_lruvec_irqrestore(lruvec, flags);
lruvec = NULL;
}
- if (put_devmap_managed_page(&folio->page))
+ if (put_devmap_managed_page_refs(&folio->page, nr_refs))
continue;
- if (folio_put_testzero(folio))
+ if (folio_ref_sub_and_test(folio, nr_refs))
free_zone_device_page(&folio->page);
continue;
}
- if (!folio_put_testzero(folio))
+ if (!folio_ref_sub_and_test(folio, nr_refs))
continue;
if (folio_test_large(folio)) {
@@ -311,8 +311,19 @@ void free_page_and_swap_cache(struct page *page)
void free_pages_and_swap_cache(struct encoded_page **pages, int nr)
{
lru_add_drain();
- for (int i = 0; i < nr; i++)
- free_swap_cache(encoded_page_ptr(pages[i]));
+ for (int i = 0; i < nr; i++) {
+ struct page *page = encoded_page_ptr(pages[i]);
+
+ /*
+ * Skip over the "nr_pages" entry. It's sufficient to call
+ * free_swap_cache() only once per folio.
+ */
+ if (unlikely(encoded_page_flags(pages[i]) &
+ ENCODED_PAGE_BIT_NR_PAGES_NEXT))
+ i++;
+
+ free_swap_cache(page);
+ }
release_pages(pages, nr);
}