diff mbox series

mm: reduce tlb flush range when changing vma protection

Message ID 20220309025721.3051365-1-maobibo@loongson.cn (mailing list archive)
State New
Headers show
Series mm: reduce tlb flush range when changing vma protection | expand

Commit Message

bibo mao March 9, 2022, 2:57 a.m. UTC
numa worker will periodically change vma prot with PROT_NONE, by
default it will scan 256M vma memory size with pmd stepping size.
If there are fewer pages changed with PROT_NONE, tlb flush is called
with pmd size. This patch will calculate flush range for those
pages with pte prot changed, it will reduce size for tlb flush.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
---
 mm/mprotect.c | 39 +++++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)

Comments

Nadav Amit March 9, 2022, 4:14 a.m. UTC | #1
> On Mar 8, 2022, at 6:57 PM, Bibo Mao <maobibo@loongson.cn> wrote:
> 
> numa worker will periodically change vma prot with PROT_NONE, by
> default it will scan 256M vma memory size with pmd stepping size.
> If there are fewer pages changed with PROT_NONE, tlb flush is called
> with pmd size. This patch will calculate flush range for those
> pages with pte prot changed, it will reduce size for tlb flush.
> 
> Signed-off-by: Bibo Mao <maobibo@loongson.cn>

Hi Bibo,

I finally managed to make v3 of a patchiest, which I think does
something similar to what you are looking for (without introducing
yet another TLB batching mechanism).

Have a look at [1] and let me know if that would satisfy you.


[1] https://lore.kernel.org/linux-mm/20220309041043.302261-4-namit@vmware.com/T/#u
bibo mao March 9, 2022, 5:13 a.m. UTC | #2
yeap, your patch is general and better than mine, it can solve the issue.
please drop my patch.

regards
bibo, mao

On 03/09/2022 12:14 PM, Nadav Amit wrote:
> 
> 
>> On Mar 8, 2022, at 6:57 PM, Bibo Mao <maobibo@loongson.cn> wrote:
>>
>> numa worker will periodically change vma prot with PROT_NONE, by
>> default it will scan 256M vma memory size with pmd stepping size.
>> If there are fewer pages changed with PROT_NONE, tlb flush is called
>> with pmd size. This patch will calculate flush range for those
>> pages with pte prot changed, it will reduce size for tlb flush.
>>
>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
> 
> Hi Bibo,
> 
> I finally managed to make v3 of a patchiest, which I think does
> something similar to what you are looking for (without introducing
> yet another TLB batching mechanism).
> 
> Have a look at [1] and let me know if that would satisfy you.
> 
> 
> [1] https://lore.kernel.org/linux-mm/20220309041043.302261-4-namit@vmware.com/T/#u
>
diff mbox series

Patch

diff --git a/mm/mprotect.c b/mm/mprotect.c
index 2887644fd150..a9f51a998dc8 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -35,9 +35,23 @@ 
 
 #include "internal.h"
 
+typedef struct {
+	unsigned long start;
+	unsigned long end;
+} tlb_range;
+
+static inline void add_tlb_range(tlb_range *range, unsigned long start,
+		unsigned long end)
+{
+	if (start < range->start)
+		range->start = start;
+	if (end > range->end)
+		range->end = end;
+}
+
 static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
-		unsigned long cp_flags)
+		unsigned long cp_flags, tlb_range *range)
 {
 	pte_t *pte, oldpte;
 	spinlock_t *ptl;
@@ -138,6 +152,7 @@  static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				ptent = pte_mkwrite(ptent);
 			}
 			ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
+			add_tlb_range(range, addr, addr + PAGE_SIZE);
 			pages++;
 		} else if (is_swap_pte(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
@@ -184,6 +199,7 @@  static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 
 			if (!pte_same(oldpte, newpte)) {
 				set_pte_at(vma->vm_mm, addr, pte, newpte);
+				add_tlb_range(range, addr, addr + PAGE_SIZE);
 				pages++;
 			}
 		}
@@ -221,7 +237,7 @@  static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)
 
 static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 		pud_t *pud, unsigned long addr, unsigned long end,
-		pgprot_t newprot, unsigned long cp_flags)
+		pgprot_t newprot, unsigned long cp_flags, tlb_range *tlb)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -267,6 +283,7 @@  static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 				if (nr_ptes) {
 					if (nr_ptes == HPAGE_PMD_NR) {
 						pages += HPAGE_PMD_NR;
+						add_tlb_range(tlb, addr, next);
 						nr_huge_updates++;
 					}
 
@@ -277,7 +294,7 @@  static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 			/* fall through, the trans huge pmd just split */
 		}
 		this_pages = change_pte_range(vma, pmd, addr, next, newprot,
-					      cp_flags);
+					      cp_flags, tlb);
 		pages += this_pages;
 next:
 		cond_resched();
@@ -293,7 +310,7 @@  static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 
 static inline unsigned long change_pud_range(struct vm_area_struct *vma,
 		p4d_t *p4d, unsigned long addr, unsigned long end,
-		pgprot_t newprot, unsigned long cp_flags)
+		pgprot_t newprot, unsigned long cp_flags, tlb_range *range)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -305,7 +322,7 @@  static inline unsigned long change_pud_range(struct vm_area_struct *vma,
 		if (pud_none_or_clear_bad(pud))
 			continue;
 		pages += change_pmd_range(vma, pud, addr, next, newprot,
-					  cp_flags);
+					  cp_flags, range);
 	} while (pud++, addr = next, addr != end);
 
 	return pages;
@@ -313,7 +330,7 @@  static inline unsigned long change_pud_range(struct vm_area_struct *vma,
 
 static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
 		pgd_t *pgd, unsigned long addr, unsigned long end,
-		pgprot_t newprot, unsigned long cp_flags)
+		pgprot_t newprot, unsigned long cp_flags, tlb_range *range)
 {
 	p4d_t *p4d;
 	unsigned long next;
@@ -325,7 +342,7 @@  static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
 		if (p4d_none_or_clear_bad(p4d))
 			continue;
 		pages += change_pud_range(vma, p4d, addr, next, newprot,
-					  cp_flags);
+					  cp_flags, range);
 	} while (p4d++, addr = next, addr != end);
 
 	return pages;
@@ -338,24 +355,26 @@  static unsigned long change_protection_range(struct vm_area_struct *vma,
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgd;
 	unsigned long next;
-	unsigned long start = addr;
 	unsigned long pages = 0;
+	tlb_range range;
 
 	BUG_ON(addr >= end);
 	pgd = pgd_offset(mm, addr);
 	flush_cache_range(vma, addr, end);
+	range.start = end;
+	range.end = addr;
 	inc_tlb_flush_pending(mm);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
 		pages += change_p4d_range(vma, pgd, addr, next, newprot,
-					  cp_flags);
+					  cp_flags, &range);
 	} while (pgd++, addr = next, addr != end);
 
 	/* Only flush the TLB if we actually modified any entries: */
 	if (pages)
-		flush_tlb_range(vma, start, end);
+		flush_tlb_range(vma, range.start, range.end);
 	dec_tlb_flush_pending(mm);
 
 	return pages;