Message ID | 1357260531-11115-32-git-send-email-yinghai@kernel.org (mailing list archive) |
---|---|
State | Not Applicable, archived |
Headers | show |
On Thursday, January 03, 2013 04:48:51 PM Yinghai Lu wrote: > Make it only map range in pfn_mapped array. Can you please explain why that should be sufficient? Have you tested it? > and it has kernel mapping with EXEC. That's because it needs to execute code from one of those pages and it doesn't know in advance which one that's going to be. Thanks, Rafael > Signed-off-by: Yinghai Lu <yinghai@kernel.org> > Cc: Pavel Machek <pavel@ucw.cz> > Cc: Rafael J. Wysocki <rjw@sisk.pl> > Cc: linux-pm@vger.kernel.org > --- > arch/x86/power/hibernate_64.c | 66 ++++++++++++++--------------------------- > 1 file changed, 22 insertions(+), 44 deletions(-) > > diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c > index 460f314..a0fde91 100644 > --- a/arch/x86/power/hibernate_64.c > +++ b/arch/x86/power/hibernate_64.c > @@ -11,6 +11,8 @@ > #include <linux/gfp.h> > #include <linux/smp.h> > #include <linux/suspend.h> > + > +#include <asm/init.h> > #include <asm/proto.h> > #include <asm/page.h> > #include <asm/pgtable.h> > @@ -39,41 +41,21 @@ pgd_t *temp_level4_pgt; > > void *relocated_restore_code; > > -static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) > +static void *alloc_pgt_page(void *context) > { > - long i, j; > - > - i = pud_index(address); > - pud = pud + i; > - for (; i < PTRS_PER_PUD; pud++, i++) { > - unsigned long paddr; > - pmd_t *pmd; > - > - paddr = address + i*PUD_SIZE; > - if (paddr >= end) > - break; > - > - pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); > - if (!pmd) > - return -ENOMEM; > - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); > - for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { > - unsigned long pe; > - > - if (paddr >= end) > - break; > - pe = __PAGE_KERNEL_LARGE_EXEC | paddr; > - pe &= __supported_pte_mask; > - set_pmd(pmd, __pmd(pe)); > - } > - } > - return 0; > + return (void *)get_safe_page(GFP_ATOMIC); > } > > static int set_up_temporary_mappings(void) > { > - unsigned long start, end, next; > - int error; > + struct x86_mapping_info info = { > + .alloc_pgt_page = alloc_pgt_page, > + .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, > + .kernel_mapping = true, > + }; > + unsigned long mstart, mend; > + int result; > + int i; > > temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); > if (!temp_level4_pgt) > @@ -84,21 +66,17 @@ static int set_up_temporary_mappings(void) > init_level4_pgt[pgd_index(__START_KERNEL_map)]); > > /* Set up the direct mapping from scratch */ > - start = (unsigned long)pfn_to_kaddr(0); > - end = (unsigned long)pfn_to_kaddr(max_pfn); > - > - for (; start < end; start = next) { > - pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); > - if (!pud) > - return -ENOMEM; > - next = start + PGDIR_SIZE; > - if (next > end) > - next = end; > - if ((error = res_phys_pud_init(pud, __pa(start), __pa(next)))) > - return error; > - set_pgd(temp_level4_pgt + pgd_index(start), > - mk_kernel_pgd(__pa(pud))); > + for (i = 0; i < nr_pfn_mapped; i++) { > + mstart = pfn_mapped[i].start << PAGE_SHIFT; > + mend = pfn_mapped[i].end << PAGE_SHIFT; > + > + result = kernel_ident_mapping_init(&info, temp_level4_pgt, > + mstart, mend); > + > + if (result) > + return result; > } > + > return 0; > } > >
On Fri, Jan 4, 2013 at 3:43 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote: > On Thursday, January 03, 2013 04:48:51 PM Yinghai Lu wrote: >> Make it only map range in pfn_mapped array. > > Can you please explain why that should be sufficient? It is needed. http://git.kernel.org/?p=linux/kernel/git/tip/tip.git;a=commitdiff;h=66520ebc2df3fe52eb4792f8101fac573b766baf Subject: [PATCH] x86, mm: Only direct map addresses that are marked as E820_RAM Currently direct mappings are created for [ 0 to max_low_pfn<<PAGE_SHIFT ) and [ 4GB to max_pfn<<PAGE_SHIFT ), which may include regions that are not backed by actual DRAM. This is fine for holes under 4GB which are covered by fixed and variable range MTRRs to be UC. However, we run into trouble on higher memory addresses which cannot be covered by MTRRs. Our system with 1TB of RAM has an e820 that looks like this: BIOS-e820: [mem 0x0000000000000000-0x00000000000983ff] usable BIOS-e820: [mem 0x0000000000098400-0x000000000009ffff] reserved BIOS-e820: [mem 0x00000000000d0000-0x00000000000fffff] reserved BIOS-e820: [mem 0x0000000000100000-0x00000000c7ebffff] usable BIOS-e820: [mem 0x00000000c7ec0000-0x00000000c7ed7fff] ACPI data BIOS-e820: [mem 0x00000000c7ed8000-0x00000000c7ed9fff] ACPI NVS BIOS-e820: [mem 0x00000000c7eda000-0x00000000c7ffffff] reserved BIOS-e820: [mem 0x00000000fec00000-0x00000000fec0ffff] reserved BIOS-e820: [mem 0x00000000fee00000-0x00000000fee00fff] reserved BIOS-e820: [mem 0x00000000fff00000-0x00000000ffffffff] reserved BIOS-e820: [mem 0x0000000100000000-0x000000e037ffffff] usable BIOS-e820: [mem 0x000000e038000000-0x000000fcffffffff] reserved BIOS-e820: [mem 0x0000010000000000-0x0000011ffeffffff] usable and so direct mappings are created for huge memory hole between 0x000000e038000000 to 0x0000010000000000. Even though the kernel never generates memory accesses in that region, since the page tables mark them incorrectly as being WB, our (AMD) processor ends up causing a MCE while doing some memory bookkeeping/optimizations around that area. This patch iterates through e820 and only direct maps ranges that are marked as E820_RAM, and keeps track of those pfn ranges. Depending on the alignment of E820 ranges, this may possibly result in using smaller size (i.e. 4K instead of 2M or 1G) page tables. > > Have you tested it? > No will update to Subject: [PATCH] x86, 64bit, mm: hibernate use generic mapping_init We should not set mapping for all under max_pfn. That causes same problem that is fixed by x86, mm: Only direct map addresses that are marked as E820_RAM Make it only map range in pfn_mapped array. -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Friday, January 04, 2013 01:59:33 PM Yinghai Lu wrote: > On Fri, Jan 4, 2013 at 3:43 AM, Rafael J. Wysocki <rjw@sisk.pl> wrote: > > On Thursday, January 03, 2013 04:48:51 PM Yinghai Lu wrote: > >> Make it only map range in pfn_mapped array. > > > > Can you please explain why that should be sufficient? > > It is needed. > > http://git.kernel.org/?p=linux/kernel/git/tip/tip.git;a=commitdiff;h=66520ebc2df3fe52eb4792f8101fac573b766baf > > Subject: [PATCH] x86, mm: Only direct map addresses that are marked as > E820_RAM > > Currently direct mappings are created for [ 0 to max_low_pfn<<PAGE_SHIFT ) > and [ 4GB to max_pfn<<PAGE_SHIFT ), which may include regions that are not > backed by actual DRAM. This is fine for holes under 4GB which are covered > by fixed and variable range MTRRs to be UC. However, we run into trouble > on higher memory addresses which cannot be covered by MTRRs. > > Our system with 1TB of RAM has an e820 that looks like this: > > BIOS-e820: [mem 0x0000000000000000-0x00000000000983ff] usable > BIOS-e820: [mem 0x0000000000098400-0x000000000009ffff] reserved > BIOS-e820: [mem 0x00000000000d0000-0x00000000000fffff] reserved > BIOS-e820: [mem 0x0000000000100000-0x00000000c7ebffff] usable > BIOS-e820: [mem 0x00000000c7ec0000-0x00000000c7ed7fff] ACPI data > BIOS-e820: [mem 0x00000000c7ed8000-0x00000000c7ed9fff] ACPI NVS > BIOS-e820: [mem 0x00000000c7eda000-0x00000000c7ffffff] reserved > BIOS-e820: [mem 0x00000000fec00000-0x00000000fec0ffff] reserved > BIOS-e820: [mem 0x00000000fee00000-0x00000000fee00fff] reserved > BIOS-e820: [mem 0x00000000fff00000-0x00000000ffffffff] reserved > BIOS-e820: [mem 0x0000000100000000-0x000000e037ffffff] usable > BIOS-e820: [mem 0x000000e038000000-0x000000fcffffffff] reserved > BIOS-e820: [mem 0x0000010000000000-0x0000011ffeffffff] usable > > and so direct mappings are created for huge memory hole between > 0x000000e038000000 to 0x0000010000000000. Even though the kernel never > generates memory accesses in that region, since the page tables mark > them incorrectly as being WB, our (AMD) processor ends up causing a MCE > while doing some memory bookkeeping/optimizations around that area. > > This patch iterates through e820 and only direct maps ranges that are > marked as E820_RAM, and keeps track of those pfn ranges. Depending on > the alignment of E820 ranges, this may possibly result in using smaller > size (i.e. 4K instead of 2M or 1G) page tables. > > > > > Have you tested it? > > > > No > > will update to > > Subject: [PATCH] x86, 64bit, mm: hibernate use generic mapping_init > > We should not set mapping for all under max_pfn. > That causes same problem that is fixed by > x86, mm: Only direct map addresses that are marked as E820_RAM > > Make it only map range in pfn_mapped array. OK Thanks, Rafael
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 460f314..a0fde91 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -11,6 +11,8 @@ #include <linux/gfp.h> #include <linux/smp.h> #include <linux/suspend.h> + +#include <asm/init.h> #include <asm/proto.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -39,41 +41,21 @@ pgd_t *temp_level4_pgt; void *relocated_restore_code; -static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) +static void *alloc_pgt_page(void *context) { - long i, j; - - i = pud_index(address); - pud = pud + i; - for (; i < PTRS_PER_PUD; pud++, i++) { - unsigned long paddr; - pmd_t *pmd; - - paddr = address + i*PUD_SIZE; - if (paddr >= end) - break; - - pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); - if (!pmd) - return -ENOMEM; - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); - for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { - unsigned long pe; - - if (paddr >= end) - break; - pe = __PAGE_KERNEL_LARGE_EXEC | paddr; - pe &= __supported_pte_mask; - set_pmd(pmd, __pmd(pe)); - } - } - return 0; + return (void *)get_safe_page(GFP_ATOMIC); } static int set_up_temporary_mappings(void) { - unsigned long start, end, next; - int error; + struct x86_mapping_info info = { + .alloc_pgt_page = alloc_pgt_page, + .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, + .kernel_mapping = true, + }; + unsigned long mstart, mend; + int result; + int i; temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); if (!temp_level4_pgt) @@ -84,21 +66,17 @@ static int set_up_temporary_mappings(void) init_level4_pgt[pgd_index(__START_KERNEL_map)]); /* Set up the direct mapping from scratch */ - start = (unsigned long)pfn_to_kaddr(0); - end = (unsigned long)pfn_to_kaddr(max_pfn); - - for (; start < end; start = next) { - pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); - if (!pud) - return -ENOMEM; - next = start + PGDIR_SIZE; - if (next > end) - next = end; - if ((error = res_phys_pud_init(pud, __pa(start), __pa(next)))) - return error; - set_pgd(temp_level4_pgt + pgd_index(start), - mk_kernel_pgd(__pa(pud))); + for (i = 0; i < nr_pfn_mapped; i++) { + mstart = pfn_mapped[i].start << PAGE_SHIFT; + mend = pfn_mapped[i].end << PAGE_SHIFT; + + result = kernel_ident_mapping_init(&info, temp_level4_pgt, + mstart, mend); + + if (result) + return result; } + return 0; }
Make it only map range in pfn_mapped array. and it has kernel mapping with EXEC. Signed-off-by: Yinghai Lu <yinghai@kernel.org> Cc: Pavel Machek <pavel@ucw.cz> Cc: Rafael J. Wysocki <rjw@sisk.pl> Cc: linux-pm@vger.kernel.org --- arch/x86/power/hibernate_64.c | 66 ++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 44 deletions(-)