Message ID | c15e7d09dfe3dfdb9947d39ed0ddd6573ff86dbf.1554248002.git.khalid.aziz@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add support for eXclusive Page Frame Ownership | expand |
On Wed, Apr 03, 2019 at 11:34:05AM -0600, Khalid Aziz wrote: > diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h > index 2779ace16d23..5c0e1581fa56 100644 > --- a/arch/x86/include/asm/pgtable.h > +++ b/arch/x86/include/asm/pgtable.h > @@ -1437,6 +1437,32 @@ static inline bool arch_has_pfn_modify_check(void) > return boot_cpu_has_bug(X86_BUG_L1TF); > } > > +/* > + * The current flushing context - we pass it instead of 5 arguments: > + */ > +struct cpa_data { > + unsigned long *vaddr; > + pgd_t *pgd; > + pgprot_t mask_set; > + pgprot_t mask_clr; > + unsigned long numpages; > + unsigned long curpage; > + unsigned long pfn; > + unsigned int flags; > + unsigned int force_split : 1, > + force_static_prot : 1; > + struct page **pages; > +}; > + > + > +int > +should_split_large_page(pte_t *kpte, unsigned long address, > + struct cpa_data *cpa); > +extern spinlock_t cpa_lock; > +int > +__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, > + struct page *base); > + I really hate exposing all that. > #include <asm-generic/pgtable.h> > #endif /* __ASSEMBLY__ */ > > diff --git a/arch/x86/mm/xpfo.c b/arch/x86/mm/xpfo.c > new file mode 100644 > index 000000000000..3045bb7e4659 > --- /dev/null > +++ b/arch/x86/mm/xpfo.c > @@ -0,0 +1,123 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2017 Hewlett Packard Enterprise Development, L.P. > + * Copyright (C) 2016 Brown University. All rights reserved. > + * > + * Authors: > + * Juerg Haefliger <juerg.haefliger@hpe.com> > + * Vasileios P. Kemerlis <vpk@cs.brown.edu> > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms of the GNU General Public License version 2 as published by > + * the Free Software Foundation. > + */ > + > +#include <linux/mm.h> > + > +#include <asm/tlbflush.h> > + > +extern spinlock_t cpa_lock; > + > +/* Update a single kernel page table entry */ > +inline void set_kpte(void *kaddr, struct page *page, pgprot_t prot) > +{ > + unsigned int level; > + pgprot_t msk_clr; > + pte_t *pte = lookup_address((unsigned long)kaddr, &level); > + > + if (unlikely(!pte)) { > + WARN(1, "xpfo: invalid address %p\n", kaddr); > + return; > + } > + > + switch (level) { > + case PG_LEVEL_4K: > + set_pte_atomic(pte, pfn_pte(page_to_pfn(page), > + canon_pgprot(prot))); (sorry, do we also need a nikon_pgprot() ? :-) > + break; > + case PG_LEVEL_2M: > + case PG_LEVEL_1G: { > + struct cpa_data cpa = { }; > + int do_split; > + > + if (level == PG_LEVEL_2M) > + msk_clr = pmd_pgprot(*(pmd_t *)pte); > + else > + msk_clr = pud_pgprot(*(pud_t *)pte); > + > + cpa.vaddr = kaddr; > + cpa.pages = &page; > + cpa.mask_set = prot; > + cpa.mask_clr = msk_clr; > + cpa.numpages = 1; > + cpa.flags = 0; > + cpa.curpage = 0; > + cpa.force_split = 0; > + > + > + do_split = should_split_large_page(pte, (unsigned long)kaddr, > + &cpa); > + if (do_split) { > + struct page *base; > + > + base = alloc_pages(GFP_ATOMIC, 0); > + if (!base) { > + WARN(1, "xpfo: failed to split large page\n"); You have to be fcking kidding right? A WARN when a GFP_ATOMIC allocation fails?! > + break; > + } > + > + if (!debug_pagealloc_enabled()) > + spin_lock(&cpa_lock); > + if (__split_large_page(&cpa, pte, (unsigned long)kaddr, > + base) < 0) { > + __free_page(base); > + WARN(1, "xpfo: failed to split large page\n"); > + } > + if (!debug_pagealloc_enabled()) > + spin_unlock(&cpa_lock); > + } > + > + break; Ever heard of helper functions? > + } > + case PG_LEVEL_512G: > + /* fallthrough, splitting infrastructure doesn't > + * support 512G pages. > + */ Broken coment style. > + default: > + WARN(1, "xpfo: unsupported page level %x\n", level); > + } > + > +} > +EXPORT_SYMBOL_GPL(set_kpte); > + > +inline void xpfo_flush_kernel_tlb(struct page *page, int order) > +{ > + int level; > + unsigned long size, kaddr; > + > + kaddr = (unsigned long)page_address(page); > + > + if (unlikely(!lookup_address(kaddr, &level))) { > + WARN(1, "xpfo: invalid address to flush %lx %d\n", kaddr, > + level); > + return; > + } > + > + switch (level) { > + case PG_LEVEL_4K: > + size = PAGE_SIZE; > + break; > + case PG_LEVEL_2M: > + size = PMD_SIZE; > + break; > + case PG_LEVEL_1G: > + size = PUD_SIZE; > + break; > + default: > + WARN(1, "xpfo: unsupported page level %x\n", level); > + return; > + } > + > + flush_tlb_kernel_range(kaddr, kaddr + (1 << order) * size); > +} You call this from IRQ/IRQ-disabled context... that _CANNOT_ be right.
[trimmed To: and Cc: It was too large] On 4/4/19 1:52 AM, Peter Zijlstra wrote: > On Wed, Apr 03, 2019 at 11:34:05AM -0600, Khalid Aziz wrote: >> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h >> index 2779ace16d23..5c0e1581fa56 100644 >> --- a/arch/x86/include/asm/pgtable.h >> +++ b/arch/x86/include/asm/pgtable.h >> @@ -1437,6 +1437,32 @@ static inline bool arch_has_pfn_modify_check(void) >> return boot_cpu_has_bug(X86_BUG_L1TF); >> } >> >> +/* >> + * The current flushing context - we pass it instead of 5 arguments: >> + */ >> +struct cpa_data { >> + unsigned long *vaddr; >> + pgd_t *pgd; >> + pgprot_t mask_set; >> + pgprot_t mask_clr; >> + unsigned long numpages; >> + unsigned long curpage; >> + unsigned long pfn; >> + unsigned int flags; >> + unsigned int force_split : 1, >> + force_static_prot : 1; >> + struct page **pages; >> +}; >> + >> + >> +int >> +should_split_large_page(pte_t *kpte, unsigned long address, >> + struct cpa_data *cpa); >> +extern spinlock_t cpa_lock; >> +int >> +__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, >> + struct page *base); >> + > > I really hate exposing all that. I believe this was done so set_kpte() could split large pages if needed. I will look into creating a helper function instead so this does not have to be exposed. > >> #include <asm-generic/pgtable.h> >> #endif /* __ASSEMBLY__ */ >> > >> diff --git a/arch/x86/mm/xpfo.c b/arch/x86/mm/xpfo.c >> new file mode 100644 >> index 000000000000..3045bb7e4659 >> --- /dev/null >> +++ b/arch/x86/mm/xpfo.c >> @@ -0,0 +1,123 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* >> + * Copyright (C) 2017 Hewlett Packard Enterprise Development, L.P. >> + * Copyright (C) 2016 Brown University. All rights reserved. >> + * >> + * Authors: >> + * Juerg Haefliger <juerg.haefliger@hpe.com> >> + * Vasileios P. Kemerlis <vpk@cs.brown.edu> >> + * >> + * This program is free software; you can redistribute it and/or modify it >> + * under the terms of the GNU General Public License version 2 as published by >> + * the Free Software Foundation. >> + */ >> + >> +#include <linux/mm.h> >> + >> +#include <asm/tlbflush.h> >> + >> +extern spinlock_t cpa_lock; >> + >> +/* Update a single kernel page table entry */ >> +inline void set_kpte(void *kaddr, struct page *page, pgprot_t prot) >> +{ >> + unsigned int level; >> + pgprot_t msk_clr; >> + pte_t *pte = lookup_address((unsigned long)kaddr, &level); >> + >> + if (unlikely(!pte)) { >> + WARN(1, "xpfo: invalid address %p\n", kaddr); >> + return; >> + } >> + >> + switch (level) { >> + case PG_LEVEL_4K: >> + set_pte_atomic(pte, pfn_pte(page_to_pfn(page), >> + canon_pgprot(prot))); > > (sorry, do we also need a nikon_pgprot() ? :-) Are we trying to encourage nikon as well to sponsor this patch :) > >> + break; >> + case PG_LEVEL_2M: >> + case PG_LEVEL_1G: { >> + struct cpa_data cpa = { }; >> + int do_split; >> + >> + if (level == PG_LEVEL_2M) >> + msk_clr = pmd_pgprot(*(pmd_t *)pte); >> + else >> + msk_clr = pud_pgprot(*(pud_t *)pte); >> + >> + cpa.vaddr = kaddr; >> + cpa.pages = &page; >> + cpa.mask_set = prot; >> + cpa.mask_clr = msk_clr; >> + cpa.numpages = 1; >> + cpa.flags = 0; >> + cpa.curpage = 0; >> + cpa.force_split = 0; >> + >> + >> + do_split = should_split_large_page(pte, (unsigned long)kaddr, >> + &cpa); >> + if (do_split) { >> + struct page *base; >> + >> + base = alloc_pages(GFP_ATOMIC, 0); >> + if (!base) { >> + WARN(1, "xpfo: failed to split large page\n"); > > You have to be fcking kidding right? A WARN when a GFP_ATOMIC allocation > fails?! > Not sure what the reasoning was for this WARN in original patch, but I think this is trying to warn about failure to split the large page in order to unmap this single page as opposed to warning about allocation failure. Nevertheless this could be done better. >> + break; >> + } >> + >> + if (!debug_pagealloc_enabled()) >> + spin_lock(&cpa_lock); >> + if (__split_large_page(&cpa, pte, (unsigned long)kaddr, >> + base) < 0) { >> + __free_page(base); >> + WARN(1, "xpfo: failed to split large page\n"); >> + } >> + if (!debug_pagealloc_enabled()) >> + spin_unlock(&cpa_lock); >> + } >> + >> + break; > > Ever heard of helper functions? Good idea. I will see if this all can be done in a helper function instead. > >> + } >> + case PG_LEVEL_512G: >> + /* fallthrough, splitting infrastructure doesn't >> + * support 512G pages. >> + */ > > Broken coment style. Slipped by me. Thanks, I will fix that. > >> + default: >> + WARN(1, "xpfo: unsupported page level %x\n", level); >> + } >> + >> +} >> +EXPORT_SYMBOL_GPL(set_kpte); >> + >> +inline void xpfo_flush_kernel_tlb(struct page *page, int order) >> +{ >> + int level; >> + unsigned long size, kaddr; >> + >> + kaddr = (unsigned long)page_address(page); >> + >> + if (unlikely(!lookup_address(kaddr, &level))) { >> + WARN(1, "xpfo: invalid address to flush %lx %d\n", kaddr, >> + level); >> + return; >> + } >> + >> + switch (level) { >> + case PG_LEVEL_4K: >> + size = PAGE_SIZE; >> + break; >> + case PG_LEVEL_2M: >> + size = PMD_SIZE; >> + break; >> + case PG_LEVEL_1G: >> + size = PUD_SIZE; >> + break; >> + default: >> + WARN(1, "xpfo: unsupported page level %x\n", level); >> + return; >> + } >> + >> + flush_tlb_kernel_range(kaddr, kaddr + (1 << order) * size); >> +} > > You call this from IRQ/IRQ-disabled context... that _CANNOT_ be right. > Another reason why current implementation of xpfo_kmap/xpfo_kunmap does not look right. I am leaning more and more towards rewriting this to be similar to kmap_high while taking into account your input on fixmap kmap_atomic. Side note: jsteckli@amazon.de is bouncing. Julian wrote quite a bit of code in these patches. If anyone has Julian's current email, it would be appreciated. Getting his feedback on these discussions will be useful. Thanks, Khalid
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9b36da94760e..e65e3bc1efe0 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2997,11 +2997,11 @@ nox2apic [X86-64,APIC] Do not enable x2APIC mode. - noxpfo [XPFO] Disable eXclusive Page Frame Ownership (XPFO) - when CONFIG_XPFO is on. Physical pages mapped into - user applications will also be mapped in the - kernel's address space as if CONFIG_XPFO was not - enabled. + noxpfo [XPFO,X86-64] Disable eXclusive Page Frame + Ownership (XPFO) when CONFIG_XPFO is on. Physical + pages mapped into user applications will also be + mapped in the kernel's address space as if + CONFIG_XPFO was not enabled. cpu0_hotplug [X86] Turn on CPU0 hotplug feature when CONFIG_BOOTPARAM_HOTPLUG_CPU0 is off. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 68261430fe6e..122786604252 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -209,6 +209,7 @@ config X86 select USER_STACKTRACE_SUPPORT select VIRT_TO_BUS select X86_FEATURE_NAMES if PROC_FS + select ARCH_SUPPORTS_XPFO if X86_64 config INSTRUCTION_DECODER def_bool y diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2779ace16d23..5c0e1581fa56 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1437,6 +1437,32 @@ static inline bool arch_has_pfn_modify_check(void) return boot_cpu_has_bug(X86_BUG_L1TF); } +/* + * The current flushing context - we pass it instead of 5 arguments: + */ +struct cpa_data { + unsigned long *vaddr; + pgd_t *pgd; + pgprot_t mask_set; + pgprot_t mask_clr; + unsigned long numpages; + unsigned long curpage; + unsigned long pfn; + unsigned int flags; + unsigned int force_split : 1, + force_static_prot : 1; + struct page **pages; +}; + + +int +should_split_large_page(pte_t *kpte, unsigned long address, + struct cpa_data *cpa); +extern spinlock_t cpa_lock; +int +__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, + struct page *base); + #include <asm-generic/pgtable.h> #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 4b101dd6e52f..93b0fdaf4a99 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -53,3 +53,5 @@ obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o + +obj-$(CONFIG_XPFO) += xpfo.o diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 14e6119838a6..530b5df0617e 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -28,23 +28,6 @@ #include "mm_internal.h" -/* - * The current flushing context - we pass it instead of 5 arguments: - */ -struct cpa_data { - unsigned long *vaddr; - pgd_t *pgd; - pgprot_t mask_set; - pgprot_t mask_clr; - unsigned long numpages; - unsigned long curpage; - unsigned long pfn; - unsigned int flags; - unsigned int force_split : 1, - force_static_prot : 1; - struct page **pages; -}; - enum cpa_warn { CPA_CONFLICT, CPA_PROTECT, @@ -59,7 +42,7 @@ static const int cpa_warn_level = CPA_PROTECT; * entries change the page attribute in parallel to some other cpu * splitting a large page entry along with changing the attribute. */ -static DEFINE_SPINLOCK(cpa_lock); +DEFINE_SPINLOCK(cpa_lock); #define CPA_FLUSHTLB 1 #define CPA_ARRAY 2 @@ -876,7 +859,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address, return 0; } -static int should_split_large_page(pte_t *kpte, unsigned long address, +int should_split_large_page(pte_t *kpte, unsigned long address, struct cpa_data *cpa) { int do_split; @@ -926,7 +909,7 @@ static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn, set_pte(pte, pfn_pte(pfn, ref_prot)); } -static int +int __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, struct page *base) { diff --git a/arch/x86/mm/xpfo.c b/arch/x86/mm/xpfo.c new file mode 100644 index 000000000000..3045bb7e4659 --- /dev/null +++ b/arch/x86/mm/xpfo.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 Hewlett Packard Enterprise Development, L.P. + * Copyright (C) 2016 Brown University. All rights reserved. + * + * Authors: + * Juerg Haefliger <juerg.haefliger@hpe.com> + * Vasileios P. Kemerlis <vpk@cs.brown.edu> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/mm.h> + +#include <asm/tlbflush.h> + +extern spinlock_t cpa_lock; + +/* Update a single kernel page table entry */ +inline void set_kpte(void *kaddr, struct page *page, pgprot_t prot) +{ + unsigned int level; + pgprot_t msk_clr; + pte_t *pte = lookup_address((unsigned long)kaddr, &level); + + if (unlikely(!pte)) { + WARN(1, "xpfo: invalid address %p\n", kaddr); + return; + } + + switch (level) { + case PG_LEVEL_4K: + set_pte_atomic(pte, pfn_pte(page_to_pfn(page), + canon_pgprot(prot))); + break; + case PG_LEVEL_2M: + case PG_LEVEL_1G: { + struct cpa_data cpa = { }; + int do_split; + + if (level == PG_LEVEL_2M) + msk_clr = pmd_pgprot(*(pmd_t *)pte); + else + msk_clr = pud_pgprot(*(pud_t *)pte); + + cpa.vaddr = kaddr; + cpa.pages = &page; + cpa.mask_set = prot; + cpa.mask_clr = msk_clr; + cpa.numpages = 1; + cpa.flags = 0; + cpa.curpage = 0; + cpa.force_split = 0; + + + do_split = should_split_large_page(pte, (unsigned long)kaddr, + &cpa); + if (do_split) { + struct page *base; + + base = alloc_pages(GFP_ATOMIC, 0); + if (!base) { + WARN(1, "xpfo: failed to split large page\n"); + break; + } + + if (!debug_pagealloc_enabled()) + spin_lock(&cpa_lock); + if (__split_large_page(&cpa, pte, (unsigned long)kaddr, + base) < 0) { + __free_page(base); + WARN(1, "xpfo: failed to split large page\n"); + } + if (!debug_pagealloc_enabled()) + spin_unlock(&cpa_lock); + } + + break; + } + case PG_LEVEL_512G: + /* fallthrough, splitting infrastructure doesn't + * support 512G pages. + */ + default: + WARN(1, "xpfo: unsupported page level %x\n", level); + } + +} +EXPORT_SYMBOL_GPL(set_kpte); + +inline void xpfo_flush_kernel_tlb(struct page *page, int order) +{ + int level; + unsigned long size, kaddr; + + kaddr = (unsigned long)page_address(page); + + if (unlikely(!lookup_address(kaddr, &level))) { + WARN(1, "xpfo: invalid address to flush %lx %d\n", kaddr, + level); + return; + } + + switch (level) { + case PG_LEVEL_4K: + size = PAGE_SIZE; + break; + case PG_LEVEL_2M: + size = PMD_SIZE; + break; + case PG_LEVEL_1G: + size = PUD_SIZE; + break; + default: + WARN(1, "xpfo: unsupported page level %x\n", level); + return; + } + + flush_tlb_kernel_range(kaddr, kaddr + (1 << order) * size); +} +EXPORT_SYMBOL_GPL(xpfo_flush_kernel_tlb); diff --git a/include/linux/xpfo.h b/include/linux/xpfo.h index 93a1b5aceca3..c1d232da7ee0 100644 --- a/include/linux/xpfo.h +++ b/include/linux/xpfo.h @@ -25,6 +25,8 @@ struct page; #ifdef CONFIG_XPFO +#include <linux/dma-mapping.h> + DECLARE_STATIC_KEY_TRUE(xpfo_inited); /* Architecture specific implementations */