Message ID | 20220405194747.2386619-3-jane.chu@oracle.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | DAX poison recovery | expand |
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
I notice that none of the folks from "X86 MM" are on the cc, added. On Tue, Apr 5, 2022 at 12:49 PM Jane Chu <jane.chu@oracle.com> wrote: > > Relocate the twin mce functions to arch/x86/mm/pat/set_memory.c > file where they belong. > > Signed-off-by: Jane Chu <jane.chu@oracle.com> > --- > arch/x86/include/asm/set_memory.h | 52 ------------------------------- > arch/x86/mm/pat/set_memory.c | 47 ++++++++++++++++++++++++++++ > include/linux/set_memory.h | 9 +++--- > 3 files changed, 52 insertions(+), 56 deletions(-) > > diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h > index 78ca53512486..b45c4d27fd46 100644 > --- a/arch/x86/include/asm/set_memory.h > +++ b/arch/x86/include/asm/set_memory.h > @@ -86,56 +86,4 @@ bool kernel_page_present(struct page *page); > > extern int kernel_set_to_readonly; > > -#ifdef CONFIG_X86_64 > -/* > - * Prevent speculative access to the page by either unmapping > - * it (if we do not require access to any part of the page) or > - * marking it uncacheable (if we want to try to retrieve data > - * from non-poisoned lines in the page). > - */ > -static inline int set_mce_nospec(unsigned long pfn, bool unmap) > -{ > - unsigned long decoy_addr; > - int rc; > - > - /* SGX pages are not in the 1:1 map */ > - if (arch_is_platform_page(pfn << PAGE_SHIFT)) > - return 0; > - /* > - * We would like to just call: > - * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); > - * but doing that would radically increase the odds of a > - * speculative access to the poison page because we'd have > - * the virtual address of the kernel 1:1 mapping sitting > - * around in registers. > - * Instead we get tricky. We create a non-canonical address > - * that looks just like the one we want, but has bit 63 flipped. > - * This relies on set_memory_XX() properly sanitizing any __pa() > - * results with __PHYSICAL_MASK or PTE_PFN_MASK. > - */ > - decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); > - > - if (unmap) > - rc = set_memory_np(decoy_addr, 1); > - else > - rc = set_memory_uc(decoy_addr, 1); > - if (rc) > - pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); > - return rc; > -} > -#define set_mce_nospec set_mce_nospec > - > -/* Restore full speculative operation to the pfn. */ > -static inline int clear_mce_nospec(unsigned long pfn) > -{ > - return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1); > -} > -#define clear_mce_nospec clear_mce_nospec > -#else > -/* > - * Few people would run a 32-bit kernel on a machine that supports > - * recoverable errors because they have too much memory to boot 32-bit. > - */ > -#endif > - > #endif /* _ASM_X86_SET_MEMORY_H */ > diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c > index 38af155aaba9..93dde949f224 100644 > --- a/arch/x86/mm/pat/set_memory.c > +++ b/arch/x86/mm/pat/set_memory.c > @@ -1925,6 +1925,53 @@ int set_memory_wb(unsigned long addr, int numpages) > } > EXPORT_SYMBOL(set_memory_wb); > > +#ifdef CONFIG_X86_64 It seems like the only X86_64 dependency in this routine is the address bit 63 usage, so how about: if (!IS_ENABLED(CONFIG_64BIT)) return 0; ...and drop the ifdef? Other than that you can add: Reviewed-by: Dan Williams <dan.j.williams@intel.com>
On 4/11/2022 3:20 PM, Dan Williams wrote: > I notice that none of the folks from "X86 MM" are on the cc, added. > Noted, thanks! > On Tue, Apr 5, 2022 at 12:49 PM Jane Chu <jane.chu@oracle.com> wrote: >> >> Relocate the twin mce functions to arch/x86/mm/pat/set_memory.c >> file where they belong. >> >> Signed-off-by: Jane Chu <jane.chu@oracle.com> >> --- >> arch/x86/include/asm/set_memory.h | 52 ------------------------------- >> arch/x86/mm/pat/set_memory.c | 47 ++++++++++++++++++++++++++++ >> include/linux/set_memory.h | 9 +++--- >> 3 files changed, 52 insertions(+), 56 deletions(-) >> >> diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h >> index 78ca53512486..b45c4d27fd46 100644 >> --- a/arch/x86/include/asm/set_memory.h >> +++ b/arch/x86/include/asm/set_memory.h >> @@ -86,56 +86,4 @@ bool kernel_page_present(struct page *page); >> >> extern int kernel_set_to_readonly; >> >> -#ifdef CONFIG_X86_64 >> -/* >> - * Prevent speculative access to the page by either unmapping >> - * it (if we do not require access to any part of the page) or >> - * marking it uncacheable (if we want to try to retrieve data >> - * from non-poisoned lines in the page). >> - */ >> -static inline int set_mce_nospec(unsigned long pfn, bool unmap) >> -{ >> - unsigned long decoy_addr; >> - int rc; >> - >> - /* SGX pages are not in the 1:1 map */ >> - if (arch_is_platform_page(pfn << PAGE_SHIFT)) >> - return 0; >> - /* >> - * We would like to just call: >> - * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); >> - * but doing that would radically increase the odds of a >> - * speculative access to the poison page because we'd have >> - * the virtual address of the kernel 1:1 mapping sitting >> - * around in registers. >> - * Instead we get tricky. We create a non-canonical address >> - * that looks just like the one we want, but has bit 63 flipped. >> - * This relies on set_memory_XX() properly sanitizing any __pa() >> - * results with __PHYSICAL_MASK or PTE_PFN_MASK. >> - */ >> - decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); >> - >> - if (unmap) >> - rc = set_memory_np(decoy_addr, 1); >> - else >> - rc = set_memory_uc(decoy_addr, 1); >> - if (rc) >> - pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); >> - return rc; >> -} >> -#define set_mce_nospec set_mce_nospec >> - >> -/* Restore full speculative operation to the pfn. */ >> -static inline int clear_mce_nospec(unsigned long pfn) >> -{ >> - return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1); >> -} >> -#define clear_mce_nospec clear_mce_nospec >> -#else >> -/* >> - * Few people would run a 32-bit kernel on a machine that supports >> - * recoverable errors because they have too much memory to boot 32-bit. >> - */ >> -#endif >> - >> #endif /* _ASM_X86_SET_MEMORY_H */ >> diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c >> index 38af155aaba9..93dde949f224 100644 >> --- a/arch/x86/mm/pat/set_memory.c >> +++ b/arch/x86/mm/pat/set_memory.c >> @@ -1925,6 +1925,53 @@ int set_memory_wb(unsigned long addr, int numpages) >> } >> EXPORT_SYMBOL(set_memory_wb); >> >> +#ifdef CONFIG_X86_64 > > It seems like the only X86_64 dependency in this routine is the > address bit 63 usage, so how about: > > if (!IS_ENABLED(CONFIG_64BIT)) > return 0; > > ...and drop the ifdef? Sure. > > Other than that you can add: > > Reviewed-by: Dan Williams <dan.j.williams@intel.com> Thanks! -jane
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 78ca53512486..b45c4d27fd46 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -86,56 +86,4 @@ bool kernel_page_present(struct page *page); extern int kernel_set_to_readonly; -#ifdef CONFIG_X86_64 -/* - * Prevent speculative access to the page by either unmapping - * it (if we do not require access to any part of the page) or - * marking it uncacheable (if we want to try to retrieve data - * from non-poisoned lines in the page). - */ -static inline int set_mce_nospec(unsigned long pfn, bool unmap) -{ - unsigned long decoy_addr; - int rc; - - /* SGX pages are not in the 1:1 map */ - if (arch_is_platform_page(pfn << PAGE_SHIFT)) - return 0; - /* - * We would like to just call: - * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); - * but doing that would radically increase the odds of a - * speculative access to the poison page because we'd have - * the virtual address of the kernel 1:1 mapping sitting - * around in registers. - * Instead we get tricky. We create a non-canonical address - * that looks just like the one we want, but has bit 63 flipped. - * This relies on set_memory_XX() properly sanitizing any __pa() - * results with __PHYSICAL_MASK or PTE_PFN_MASK. - */ - decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); - - if (unmap) - rc = set_memory_np(decoy_addr, 1); - else - rc = set_memory_uc(decoy_addr, 1); - if (rc) - pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); - return rc; -} -#define set_mce_nospec set_mce_nospec - -/* Restore full speculative operation to the pfn. */ -static inline int clear_mce_nospec(unsigned long pfn) -{ - return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1); -} -#define clear_mce_nospec clear_mce_nospec -#else -/* - * Few people would run a 32-bit kernel on a machine that supports - * recoverable errors because they have too much memory to boot 32-bit. - */ -#endif - #endif /* _ASM_X86_SET_MEMORY_H */ diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 38af155aaba9..93dde949f224 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1925,6 +1925,53 @@ int set_memory_wb(unsigned long addr, int numpages) } EXPORT_SYMBOL(set_memory_wb); +#ifdef CONFIG_X86_64 +/* + * Prevent speculative access to the page by either unmapping + * it (if we do not require access to any part of the page) or + * marking it uncacheable (if we want to try to retrieve data + * from non-poisoned lines in the page). + */ +int set_mce_nospec(unsigned long pfn, bool unmap) +{ + unsigned long decoy_addr; + int rc; + + /* SGX pages are not in the 1:1 map */ + if (arch_is_platform_page(pfn << PAGE_SHIFT)) + return 0; + /* + * We would like to just call: + * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); + * but doing that would radically increase the odds of a + * speculative access to the poison page because we'd have + * the virtual address of the kernel 1:1 mapping sitting + * around in registers. + * Instead we get tricky. We create a non-canonical address + * that looks just like the one we want, but has bit 63 flipped. + * This relies on set_memory_XX() properly sanitizing any __pa() + * results with __PHYSICAL_MASK or PTE_PFN_MASK. + */ + decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); + + if (unmap) + rc = set_memory_np(decoy_addr, 1); + else + rc = set_memory_uc(decoy_addr, 1); + if (rc) + pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); + return rc; +} + +/* Restore full speculative operation to the pfn. */ +int clear_mce_nospec(unsigned long pfn) +{ + return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1); +} +EXPORT_SYMBOL_GPL(clear_mce_nospec); + +#endif + int set_memory_x(unsigned long addr, int numpages) { if (!(__supported_pte_mask & _PAGE_NX)) diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index f36be5166c19..d6263d7afb55 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -42,20 +42,21 @@ static inline bool can_set_direct_map(void) #endif #endif /* CONFIG_ARCH_HAS_SET_DIRECT_MAP */ -#ifndef set_mce_nospec +#ifdef CONFIG_X86_64 +int set_mce_nospec(unsigned long pfn, bool unmap); +int clear_mce_nospec(unsigned long pfn); +#else static inline int set_mce_nospec(unsigned long pfn, bool unmap) { return 0; } -#endif - -#ifndef clear_mce_nospec static inline int clear_mce_nospec(unsigned long pfn) { return 0; } #endif + #ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT static inline int set_memory_encrypted(unsigned long addr, int numpages) {
Relocate the twin mce functions to arch/x86/mm/pat/set_memory.c file where they belong. Signed-off-by: Jane Chu <jane.chu@oracle.com> --- arch/x86/include/asm/set_memory.h | 52 ------------------------------- arch/x86/mm/pat/set_memory.c | 47 ++++++++++++++++++++++++++++ include/linux/set_memory.h | 9 +++--- 3 files changed, 52 insertions(+), 56 deletions(-)