Message ID | 1594004178-8861-2-git-send-email-anshuman.khandual@arm.com (mailing list archive) |
---|---|
State | Mainlined |
Commit | 1d9cfee7535c213038a615f112c900c2d0ba8f54 |
Headers | show |
Series | arm64: Enable vmemmap mapping from device memory | expand |
> return 0; > @@ -1505,7 +1505,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, > int err; > > if (end - start < PAGES_PER_SECTION * sizeof(struct page)) > - err = vmemmap_populate_basepages(start, end, node); > + err = vmemmap_populate_basepages(start, end, node, NULL); > else if (boot_cpu_has(X86_FEATURE_PSE)) > err = vmemmap_populate_hugepages(start, end, node, altmap); > else if (altmap) { It's somewhat weird that we don't allocate basepages from altmap on x86 (both for sub-sections and without PSE). I wonder if we can simply unlock that with your change. Especially, also handle the !X86_FEATURE_PSE case below properly with an altmap. a) all hw with PMEM has PSE - except special QEMU setups, so nobody cared to implement. For the sub-section special case, nobody cared about a handfull of memmap not ending up on the altmap. (but it's still wasted system memory IIRC). b) the pagetable overhead for small pages is not-neglectable and might result in similar issues as solved by the switch to altmap on very huge PMEM (with small amount of system RAM). I guess it is due to a). [...] > > -pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) > +pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, > + struct vmem_altmap *altmap) > { > pte_t *pte = pte_offset_kernel(pmd, addr); > if (pte_none(*pte)) { > pte_t entry; > - void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); > + void *p; > + > + if (altmap) > + p = altmap_alloc_block_buf(PAGE_SIZE, altmap); > + else > + p = vmemmap_alloc_block_buf(PAGE_SIZE, node); > if (!p) > return NULL; I was wondering if if (altmap) p = altmap_alloc_block_buf(PAGE_SIZE, altmap); if (!p) p = vmemmap_alloc_block_buf(PAGE_SIZE, node); if (!p) return NULL Would make sense. But I guess this isn't really relevant in practice, because the altmap is usually sized properly. In general, LGTM.
On 07/06/2020 02:33 PM, David Hildenbrand wrote: >> return 0; >> @@ -1505,7 +1505,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, >> int err; >> >> if (end - start < PAGES_PER_SECTION * sizeof(struct page)) >> - err = vmemmap_populate_basepages(start, end, node); >> + err = vmemmap_populate_basepages(start, end, node, NULL); >> else if (boot_cpu_has(X86_FEATURE_PSE)) >> err = vmemmap_populate_hugepages(start, end, node, altmap); >> else if (altmap) { > > It's somewhat weird that we don't allocate basepages from altmap on x86 > (both for sub-sections and without PSE). I wonder if we can simply > unlock that with your change. Especially, also handle the > !X86_FEATURE_PSE case below properly with an altmap. > > a) all hw with PMEM has PSE - except special QEMU setups, so nobody > cared to implement. For the sub-section special case, nobody cared about > a handfull of memmap not ending up on the altmap. (but it's still wasted > system memory IIRC). > > b) the pagetable overhead for small pages is not-neglectable and might > result in similar issues as solved by the switch to altmap on very huge > PMEM (with small amount of system RAM). > > I guess it is due to a). Hmm, I assume these are some decisions that x86 platform will have to make going forward in a subsequent patch as the third patch does for the arm64 platform. But it is clearly beyond the scope of this patch which never intended to change existing behavior on a given platform. > > [...] > >> >> -pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) >> +pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, >> + struct vmem_altmap *altmap) >> { >> pte_t *pte = pte_offset_kernel(pmd, addr); >> if (pte_none(*pte)) { >> pte_t entry; >> - void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); >> + void *p; >> + >> + if (altmap) >> + p = altmap_alloc_block_buf(PAGE_SIZE, altmap); >> + else >> + p = vmemmap_alloc_block_buf(PAGE_SIZE, node); >> if (!p) >> return NULL; > > I was wondering if > > if (altmap) > p = altmap_alloc_block_buf(PAGE_SIZE, altmap); > if (!p) > p = vmemmap_alloc_block_buf(PAGE_SIZE, node); > if (!p) > return NULL > > Would make sense. But I guess this isn't really relevant in practice, > because the altmap is usually sized properly. > > In general, LGTM. Okay, I assume that no further changes are required here.
> Hmm, I assume these are some decisions that x86 platform will have to > make going forward in a subsequent patch as the third patch does for > the arm64 platform. But it is clearly beyond the scope of this patch > which never intended to change existing behavior on a given platform. > Yeah, I would be curious if my assumption is correct. >> >> [...] >> >>> >>> -pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) >>> +pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, >>> + struct vmem_altmap *altmap) >>> { >>> pte_t *pte = pte_offset_kernel(pmd, addr); >>> if (pte_none(*pte)) { >>> pte_t entry; >>> - void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); >>> + void *p; >>> + >>> + if (altmap) >>> + p = altmap_alloc_block_buf(PAGE_SIZE, altmap); >>> + else >>> + p = vmemmap_alloc_block_buf(PAGE_SIZE, node); >>> if (!p) >>> return NULL; >> >> I was wondering if >> >> if (altmap) >> p = altmap_alloc_block_buf(PAGE_SIZE, altmap); >> if (!p) >> p = vmemmap_alloc_block_buf(PAGE_SIZE, node); >> if (!p) >> return NULL >> >> Would make sense. But I guess this isn't really relevant in practice, >> because the altmap is usually sized properly. >> >> In general, LGTM. > > Okay, I assume that no further changes are required here. > Jep, Reviewed-by: David Hildenbrand <david@redhat.com>
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 1df25f26571d..63b74fd56cd8 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1069,7 +1069,7 @@ static void free_empty_tables(unsigned long addr, unsigned long end, int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { - return vmemmap_populate_basepages(start, end, node); + return vmemmap_populate_basepages(start, end, node, NULL); } #else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index dd8284bcbf16..4d23c81f96d8 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -656,7 +656,7 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { - return vmemmap_populate_basepages(start, end, node); + return vmemmap_populate_basepages(start, end, node, NULL); } void vmemmap_free(unsigned long start, unsigned long end, diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index f4adb3684f3d..8101170f54ac 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -530,6 +530,6 @@ void __init paging_init(void) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { - return vmemmap_populate_basepages(start, end, node); + return vmemmap_populate_basepages(start, end, node, NULL); } #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index dbae185511cd..19c0ed3271a3 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1493,7 +1493,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, vmemmap_verify((pte_t *)pmd, node, addr, next); continue; } - if (vmemmap_populate_basepages(addr, next, node)) + if (vmemmap_populate_basepages(addr, next, node, NULL)) return -ENOMEM; } return 0; @@ -1505,7 +1505,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, int err; if (end - start < PAGES_PER_SECTION * sizeof(struct page)) - err = vmemmap_populate_basepages(start, end, node); + err = vmemmap_populate_basepages(start, end, node, NULL); else if (boot_cpu_has(X86_FEATURE_PSE)) err = vmemmap_populate_hugepages(start, end, node, altmap); else if (altmap) { @@ -1513,7 +1513,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, __func__); err = -ENOMEM; } else - err = vmemmap_populate_basepages(start, end, node); + err = vmemmap_populate_basepages(start, end, node, NULL); if (!err) sync_global_pgds(start, end - 1); return err; diff --git a/include/linux/mm.h b/include/linux/mm.h index dc7b87310c10..e40ac543d248 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3011,14 +3011,15 @@ pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); -pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); +pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, + struct vmem_altmap *altmap); void *vmemmap_alloc_block(unsigned long size, int node); struct vmem_altmap; void *vmemmap_alloc_block_buf(unsigned long size, int node); void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap); void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); int vmemmap_populate_basepages(unsigned long start, unsigned long end, - int node); + int node, struct vmem_altmap *altmap); int vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); void vmemmap_populate_print_last(void); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 0db7738d76e9..ceed10dec31e 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -139,12 +139,18 @@ void __meminit vmemmap_verify(pte_t *pte, int node, start, end - 1); } -pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) +pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, + struct vmem_altmap *altmap) { pte_t *pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) { pte_t entry; - void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); + void *p; + + if (altmap) + p = altmap_alloc_block_buf(PAGE_SIZE, altmap); + else + p = vmemmap_alloc_block_buf(PAGE_SIZE, node); if (!p) return NULL; entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); @@ -212,8 +218,8 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) return pgd; } -int __meminit vmemmap_populate_basepages(unsigned long start, - unsigned long end, int node) +int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, + int node, struct vmem_altmap *altmap) { unsigned long addr = start; pgd_t *pgd; @@ -235,7 +241,7 @@ int __meminit vmemmap_populate_basepages(unsigned long start, pmd = vmemmap_pmd_populate(pud, addr, node); if (!pmd) return -ENOMEM; - pte = vmemmap_pte_populate(pmd, addr, node); + pte = vmemmap_pte_populate(pmd, addr, node, altmap); if (!pte) return -ENOMEM; vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);