Message ID | 50608D3D.2010106@ti.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Sep 24, 2012 at 05:41:33PM +0100, Cyril Chemparathy wrote: > On 9/24/2012 11:22 AM, Russell King - ARM Linux wrote: > > On Mon, Sep 24, 2012 at 11:09:50AM -0400, Cyril Chemparathy wrote: > >> On 9/24/2012 9:41 AM, Russell King - ARM Linux wrote: > >>> On Mon, Sep 24, 2012 at 02:29:42PM +0100, Catalin Marinas wrote: > >>>> This function also calls free_bootmem() which takes unsigned long. Are > >>>> patches sent separately for this or we just ignore holes in memmap? > >>>> There are other calls to free_bootmem() or reserve_bootmem(), do they > >>>> just work with the high phys addresses? > >>> > >>> Bootmem only deals with physical addresses which fit within the size > >>> of an 'unsigned long'. Unfortunately, the bootmem API is a mess of > >>> 'unsigned long' physical addresses and PFNs. > >>> > >>> Years ago there was a patch to make it use only PFNs but other changes > >>> resulted in that patch being thrown away. > >>> > >> > >> A separate patch has been posted for bootmem (see [1]). > >> > >> Tejun suggested that we'd be better off moving entirely to memblock > >> instead (see [2]). > > > > Yes we should, but that's not as easy as typing a few words in an email. > > When I tried it when I integrated memblock into our boot sequence, I > > found that sparsemem wouldn't work without bootmem being in place. > > > > It may be that things have now moved on, and we can just eliminate > > bootmem once and for all, but that's something I've not looked at for > > quite some time. > > > > It appears to be not that hard actually... Or maybe I'm totally missing > your point. Could it be that you last looked at this prior to the > nobootmem compatibility stuff being added in? I was just trying the same :) > +static unsigned long free_all_lowmem(void) I don't think that's needed. free_all_bootmem() in mm/nobootmem.c takes care of freeing lowmem but it has a different notion of max_low_pfn. So this hunk did the trick: @@ -420,8 +366,8 @@ void __init bootmem_init(void) * Note: max_low_pfn and max_pfn reflect the number of _pages_ in * the system, not the maximum PFN. */ - max_low_pfn = max_low - PHYS_PFN_OFFSET; - max_pfn = max_high - PHYS_PFN_OFFSET; + max_low_pfn = max_low; + max_pfn = max_high; } static inline int free_area(unsigned long pfn, unsigned long end, char *s)
On Mon, Sep 24, 2012 at 12:41:33PM -0400, Cyril Chemparathy wrote: > It appears to be not that hard actually... Or maybe I'm totally missing > your point. Could it be that you last looked at this prior to the > nobootmem compatibility stuff being added in? Quite possible - we converted over to memblock here: commit 2778f62056ada442414392d7ccd41188bb631619 Author: Russell King <rmk+kernel@arm.linux.org.uk> Date: Fri Jul 9 16:27:52 2010 +0100 It looks like nobootmem.c was created here: commit 0932587328d9bd5b500a640fbaff3290c8d4cabf Author: Yinghai Lu <yinghai@kernel.org> Date: Thu Feb 24 14:43:05 2011 +0100 So, when I was porting ARM to memblock, we had: commit 08677214e318297f228237be0042aac754f48f1d Author: Yinghai Lu <yinghai@kernel.org> Date: Wed Feb 10 01:20:20 2010 -0800 which causes a load of bootmem functions to panic() when bootmem is disabled. > The following patch appears to work just fine on a faked sparsemem system: Great. Don't forget to also check flatmem too, just to be sure. > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig > index 92f598a..e94fa4c 100644 > --- a/arch/arm/Kconfig > +++ b/arch/arm/Kconfig > @@ -49,6 +49,7 @@ config ARM > select GENERIC_STRNCPY_FROM_USER > select GENERIC_STRNLEN_USER > select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && > !CPU_BIG_ENDIAN > + select NO_BOOTMEM > help > The ARM series is a line of low-power-consumption RISC chip designs > licensed by ARM Ltd and targeted at embedded applications and > diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c > index 29560d8..0ca6c00 100644 > --- a/arch/arm/mm/init.c > +++ b/arch/arm/mm/init.c > @@ -150,58 +150,6 @@ static void __init find_limits(unsigned long *min, > unsigned long *max_low, > *max_high = bank_pfn_end(&mi->bank[mi->nr_banks - 1]); > } > > -static void __init arm_bootmem_init(unsigned long start_pfn, > - unsigned long end_pfn) > -{ > - struct memblock_region *reg; > - unsigned int boot_pages; > - phys_addr_t bitmap; > - pg_data_t *pgdat; > - > - /* > - * Allocate the bootmem bitmap page. This must be in a region > - * of memory which has already been mapped. > - */ > - boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn); > - bitmap = memblock_alloc_base(boot_pages << PAGE_SHIFT, L1_CACHE_BYTES, > - __pfn_to_phys(end_pfn)); > - > - /* > - * Initialise the bootmem allocator, handing the > - * memory banks over to bootmem. > - */ > - node_set_online(0); > - pgdat = NODE_DATA(0); > - init_bootmem_node(pgdat, __phys_to_pfn(bitmap), start_pfn, end_pfn); > - > - /* Free the lowmem regions from memblock into bootmem. */ > - for_each_memblock(memory, reg) { > - unsigned long start = memblock_region_memory_base_pfn(reg); > - unsigned long end = memblock_region_memory_end_pfn(reg); > - > - if (end >= end_pfn) > - end = end_pfn; > - if (start >= end) > - break; > - > - free_bootmem(__pfn_to_phys(start), (end - start) << PAGE_SHIFT); > - } > - > - /* Reserve the lowmem memblock reserved regions in bootmem. */ > - for_each_memblock(reserved, reg) { > - unsigned long start = memblock_region_reserved_base_pfn(reg); > - unsigned long end = memblock_region_reserved_end_pfn(reg); > - > - if (end >= end_pfn) > - end = end_pfn; > - if (start >= end) > - break; > - > - reserve_bootmem(__pfn_to_phys(start), > - (end - start) << PAGE_SHIFT, BOOTMEM_DEFAULT); > - } > -} > - > #ifdef CONFIG_ZONE_DMA > > unsigned long arm_dma_zone_size __read_mostly; > @@ -393,8 +341,6 @@ void __init bootmem_init(void) > > find_limits(&min, &max_low, &max_high); > > - arm_bootmem_init(min, max_low); > - > /* > * Sparsemem tries to allocate bootmem in memory_present(), > * so must be done after the fixed reservations > @@ -443,6 +389,54 @@ static inline int free_area(unsigned long pfn, > unsigned long end, char *s) > return pages; > } > > +static unsigned long free_all_lowmem(void) > +{ > + unsigned long max_low = max_low_pfn + PHYS_PFN_OFFSET; > + struct memblock_region *mem, *res; > + unsigned long total_pages = 0; > + > + /* set highmem page free */ > + for_each_memblock(memory, mem) { > + unsigned long start = memblock_region_memory_base_pfn(mem); > + unsigned long end = memblock_region_memory_end_pfn(mem); > + > + /* Ignore complete lowmem entries */ > + if (start > max_low) > + continue; > + > + /* Truncate partial highmem entries */ > + if (end > max_low) > + end = max_low; > + > + /* Find and exclude any reserved regions */ > + for_each_memblock(reserved, res) { > + unsigned long res_start, res_end; > + > + res_start = memblock_region_reserved_base_pfn(res); > + res_end = memblock_region_reserved_end_pfn(res); > + > + if (res_end < start) > + continue; > + if (res_start < start) > + res_start = start; > + if (res_start > end) > + res_start = end; > + if (res_end > end) > + res_end = end; > + if (res_start != start) > + total_pages += free_area(start, res_start, NULL); > + start = res_end; > + if (start == end) > + break; > + } > + > + /* And now free anything which remains */ > + if (start < end) > + total_pages += free_area(start, end, NULL); > + } > + return total_pages; > +} > + > /* > * Poison init memory with an undefined instruction (ARM) or a branch > to an > * undefined instruction (Thumb). > @@ -606,7 +600,7 @@ void __init mem_init(void) > /* this will put all unused low memory onto the freelists */ > free_unused_memmap(&meminfo); > > - totalram_pages += free_all_bootmem(); > + totalram_pages += free_all_lowmem(); > > #ifdef CONFIG_SA1111 > /* now that our DMA memory is actually so designated, we can free it */ > > -- > Thanks > - Cyril
On Mon, Sep 24, 2012 at 05:55:55PM +0100, Russell King - ARM Linux wrote: > On Mon, Sep 24, 2012 at 12:41:33PM -0400, Cyril Chemparathy wrote: > > It appears to be not that hard actually... Or maybe I'm totally missing > > your point. Could it be that you last looked at this prior to the > > nobootmem compatibility stuff being added in? > > Quite possible - we converted over to memblock here: > > commit 2778f62056ada442414392d7ccd41188bb631619 > Author: Russell King <rmk+kernel@arm.linux.org.uk> > Date: Fri Jul 9 16:27:52 2010 +0100 > > It looks like nobootmem.c was created here: > > commit 0932587328d9bd5b500a640fbaff3290c8d4cabf > Author: Yinghai Lu <yinghai@kernel.org> > Date: Thu Feb 24 14:43:05 2011 +0100 > > So, when I was porting ARM to memblock, we had: > > commit 08677214e318297f228237be0042aac754f48f1d > Author: Yinghai Lu <yinghai@kernel.org> > Date: Wed Feb 10 01:20:20 2010 -0800 > > which causes a load of bootmem functions to panic() when bootmem is > disabled. > > > The following patch appears to work just fine on a faked sparsemem system: > > Great. Don't forget to also check flatmem too, just to be sure. There is still a problem since nobootmem.c doesn't handle 64-bit physical addresses, so calling free_bootmem() still truncates the values. Simply replacing bootmem.c with nobootmem.c is not enough, we need to also replace bootmem calls with the corresponding memblock ones.
On 9/24/2012 12:51 PM, Catalin Marinas wrote: > On Mon, Sep 24, 2012 at 05:41:33PM +0100, Cyril Chemparathy wrote: >> On 9/24/2012 11:22 AM, Russell King - ARM Linux wrote: >>> On Mon, Sep 24, 2012 at 11:09:50AM -0400, Cyril Chemparathy wrote: >>>> On 9/24/2012 9:41 AM, Russell King - ARM Linux wrote: >>>>> On Mon, Sep 24, 2012 at 02:29:42PM +0100, Catalin Marinas wrote: >>>>>> This function also calls free_bootmem() which takes unsigned long. Are >>>>>> patches sent separately for this or we just ignore holes in memmap? >>>>>> There are other calls to free_bootmem() or reserve_bootmem(), do they >>>>>> just work with the high phys addresses? >>>>> >>>>> Bootmem only deals with physical addresses which fit within the size >>>>> of an 'unsigned long'. Unfortunately, the bootmem API is a mess of >>>>> 'unsigned long' physical addresses and PFNs. >>>>> >>>>> Years ago there was a patch to make it use only PFNs but other changes >>>>> resulted in that patch being thrown away. >>>>> >>>> >>>> A separate patch has been posted for bootmem (see [1]). >>>> >>>> Tejun suggested that we'd be better off moving entirely to memblock >>>> instead (see [2]). >>> >>> Yes we should, but that's not as easy as typing a few words in an email. >>> When I tried it when I integrated memblock into our boot sequence, I >>> found that sparsemem wouldn't work without bootmem being in place. >>> >>> It may be that things have now moved on, and we can just eliminate >>> bootmem once and for all, but that's something I've not looked at for >>> quite some time. >>> >> >> It appears to be not that hard actually... Or maybe I'm totally missing >> your point. Could it be that you last looked at this prior to the >> nobootmem compatibility stuff being added in? > > I was just trying the same :) > >> +static unsigned long free_all_lowmem(void) > > I don't think that's needed. free_all_bootmem() in mm/nobootmem.c takes > care of freeing lowmem but it has a different notion of max_low_pfn. So > this hunk did the trick: > Indeed. My intent behind cloning (and hacking) free_highpages(), was to unify these into a single step free-all-memory operation that operates directly on memblock. > @@ -420,8 +366,8 @@ void __init bootmem_init(void) > * Note: max_low_pfn and max_pfn reflect the number of _pages_ in > * the system, not the maximum PFN. > */ > - max_low_pfn = max_low - PHYS_PFN_OFFSET; > - max_pfn = max_high - PHYS_PFN_OFFSET; > + max_low_pfn = max_low; > + max_pfn = max_high; > } > > static inline int free_area(unsigned long pfn, unsigned long end, char *s) >
On Mon, Sep 24, 2012 at 05:51:46PM +0100, Catalin Marinas wrote: > I don't think that's needed. free_all_bootmem() in mm/nobootmem.c takes > care of freeing lowmem but it has a different notion of max_low_pfn. So > this hunk did the trick: > > @@ -420,8 +366,8 @@ void __init bootmem_init(void) > * Note: max_low_pfn and max_pfn reflect the number of _pages_ in > * the system, not the maximum PFN. > */ > - max_low_pfn = max_low - PHYS_PFN_OFFSET; > - max_pfn = max_high - PHYS_PFN_OFFSET; > + max_low_pfn = max_low; > + max_pfn = max_high; > } Did you actually look to see where that's used before you made the change. I don't think you did. The reason we have that there is that much of the kernel assumes memory always starts at physical zero, so the max*pfn variables can be used to generate bitmasks to cover the range of system memory addresses - iow, (1 << max_low_pfn) - 1. Eg, in the block code: blk_max_low_pfn = max_low_pfn - 1; blk_max_pfn = max_pfn - 1; ... unsigned long b_pfn = dma_mask >> PAGE_SHIFT; if (b_pfn < blk_max_low_pfn) dma = 1; Having a DMA mask for a peripheral which only has 24 bits wired (so 0x00ffffff) with a system memory offset of 0xc0000000 results in apparantly _all_ system memory being DMA-able according to this test unless max_low_pfn is defined as the _number_ of bits in the RAM address. In dma_get_required_mask(): u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); low_totalram = (1 << (fls(low_totalram) - 1)); low_totalram += low_totalram - 1; which results in (for a phys offset of 0xc0000000) low_totalram being 0xffffffff unconditionally no matter how much RAM you actually have. fs/proc/kcore.c can be ignored because that's not supported on ARM. So, for DMA masks to work correctly, max_pfn and max_low_pfn must be defined to cover the number of signiciant address bits in the RAM region, and not covering the number of significant bits of the last RAM address.
On Mon, Sep 24, 2012 at 06:14:25PM +0100, Russell King - ARM Linux wrote: > On Mon, Sep 24, 2012 at 05:51:46PM +0100, Catalin Marinas wrote: > > I don't think that's needed. free_all_bootmem() in mm/nobootmem.c takes > > care of freeing lowmem but it has a different notion of max_low_pfn. So > > this hunk did the trick: > > > > @@ -420,8 +366,8 @@ void __init bootmem_init(void) > > * Note: max_low_pfn and max_pfn reflect the number of _pages_ in > > * the system, not the maximum PFN. > > */ > > - max_low_pfn = max_low - PHYS_PFN_OFFSET; > > - max_pfn = max_high - PHYS_PFN_OFFSET; > > + max_low_pfn = max_low; > > + max_pfn = max_high; > > } > > Did you actually look to see where that's used before you made the change. > I don't think you did. > > The reason we have that there is that much of the kernel assumes memory > always starts at physical zero, so the max*pfn variables can be used to > generate bitmasks to cover the range of system memory addresses - iow, > (1 << max_low_pfn) - 1. > > Eg, in the block code: > > blk_max_low_pfn = max_low_pfn - 1; > blk_max_pfn = max_pfn - 1; > ... > unsigned long b_pfn = dma_mask >> PAGE_SHIFT; > > if (b_pfn < blk_max_low_pfn) > dma = 1; > > Having a DMA mask for a peripheral which only has 24 bits wired (so > 0x00ffffff) with a system memory offset of 0xc0000000 results in > apparantly _all_ system memory being DMA-able according to this test > unless max_low_pfn is defined as the _number_ of bits in the RAM > address. > > In dma_get_required_mask(): > > u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); > > low_totalram = (1 << (fls(low_totalram) - 1)); > low_totalram += low_totalram - 1; > > which results in (for a phys offset of 0xc0000000) low_totalram being > 0xffffffff unconditionally no matter how much RAM you actually have. And for those platforms the phys and bus (dma) addresses are different. So it's not about whether the physical RAM starts at 0 but whether the device has a different view of the RAM address space. The reverse could also be problematic if phys == bus and max_low_pfn corresponds to an address that's not actually accessible for the device (though in practice I don't expect this). There is no consistency in the kernel on whether max_low_pfn is absolute or relative (nobootmem.c considers it absolute). There is also a min_low_pfn which could be used to update the above functions or introduce a new min_dma_pfn.
On Tue, Sep 25, 2012 at 02:08:04PM +0100, Catalin Marinas wrote: > On Mon, Sep 24, 2012 at 06:14:25PM +0100, Russell King - ARM Linux wrote: > > On Mon, Sep 24, 2012 at 05:51:46PM +0100, Catalin Marinas wrote: > > > I don't think that's needed. free_all_bootmem() in mm/nobootmem.c takes > > > care of freeing lowmem but it has a different notion of max_low_pfn. So > > > this hunk did the trick: > > > > > > @@ -420,8 +366,8 @@ void __init bootmem_init(void) > > > * Note: max_low_pfn and max_pfn reflect the number of _pages_ in > > > * the system, not the maximum PFN. > > > */ > > > - max_low_pfn = max_low - PHYS_PFN_OFFSET; > > > - max_pfn = max_high - PHYS_PFN_OFFSET; > > > + max_low_pfn = max_low; > > > + max_pfn = max_high; > > > } > > > > Did you actually look to see where that's used before you made the change. > > I don't think you did. > > > > The reason we have that there is that much of the kernel assumes memory > > always starts at physical zero, so the max*pfn variables can be used to > > generate bitmasks to cover the range of system memory addresses - iow, > > (1 << max_low_pfn) - 1. > > > > Eg, in the block code: > > > > blk_max_low_pfn = max_low_pfn - 1; > > blk_max_pfn = max_pfn - 1; > > ... > > unsigned long b_pfn = dma_mask >> PAGE_SHIFT; > > > > if (b_pfn < blk_max_low_pfn) > > dma = 1; > > > > Having a DMA mask for a peripheral which only has 24 bits wired (so > > 0x00ffffff) with a system memory offset of 0xc0000000 results in > > apparantly _all_ system memory being DMA-able according to this test > > unless max_low_pfn is defined as the _number_ of bits in the RAM > > address. > > > > In dma_get_required_mask(): > > > > u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); > > > > low_totalram = (1 << (fls(low_totalram) - 1)); > > low_totalram += low_totalram - 1; > > > > which results in (for a phys offset of 0xc0000000) low_totalram being > > 0xffffffff unconditionally no matter how much RAM you actually have. > > And for those platforms the phys and bus (dma) addresses are different. That doesn't have anything to do with it actually. If you look at the above analysis, where the DMA addresses are has nothing to do with it. > So it's not about whether the physical RAM starts at 0 but whether the > device has a different view of the RAM address space. Yes it is. Much of the kernel assumes PFN0 equals physical address 0. That's a pretty sane assumption. max_pfn/max_low_pfn are PFNs, so the theoretical correct value for these should be max_physical_address_of_ram >> PAGE_SHIFT. But, as I say, that breaks _anything_ that has an offset of RAM _and_ a device which can't address all RAM. The reason for that is that DMA _masks_ are _masks_ and are not the upper limit of addressible RAM. Taking my example above, the DMA mask for a device with only 24-bits of addressing is 0x00ffffff. That may be connected to a system bridge which maps the low order bus space to system RAM, and system RAM may start at 0xc0000000, and end at 0xcfffffff. In that situation: blk_max_low_pfn = max_low_pfn - 1; unsigned long b_pfn = dma_mask >> PAGE_SHIFT; if (b_pfn < blk_max_low_pfn) dma = 1; _breaks_ if we define max_low_pfn to be 0xcfffffff >> PAGE_SHIFT, because now the above calculation says the whole of system memory is DMA-able. This isn't a question of setting a different DMA mask for the device. The DMA mask for the device is correct. What's wrong is that there's a mis-interpretation between different parts of the kernel caused by the assumption that system RAM always starts at PFN0, physical address zero. > The reverse could > also be problematic if phys == bus and max_low_pfn corresponds to an > address that's not actually accessible for the device (though in > practice I don't expect this). "In practice I don't expect this" - history has shown, for at least the last 12 years, that this isn't a problem because this has been how things have been done on ARM. What I have shown above is that there _are_ problems, particularly in the block layer, if we _don't_ do this.
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 92f598a..e94fa4c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -49,6 +49,7 @@ config ARM select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN + select NO_BOOTMEM help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 29560d8..0ca6c00 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -150,58 +150,6 @@ static void __init find_limits(unsigned long *min, unsigned long *max_low, *max_high = bank_pfn_end(&mi->bank[mi->nr_banks - 1]); } -static void __init arm_bootmem_init(unsigned long start_pfn, - unsigned long end_pfn) -{ - struct memblock_region *reg; - unsigned int boot_pages; - phys_addr_t bitmap; - pg_data_t *pgdat; - - /* - * Allocate the bootmem bitmap page. This must be in a region - * of memory which has already been mapped. - */ - boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bitmap = memblock_alloc_base(boot_pages << PAGE_SHIFT, L1_CACHE_BYTES, - __pfn_to_phys(end_pfn)); - - /* - * Initialise the bootmem allocator, handing the - * memory banks over to bootmem. - */ - node_set_online(0); - pgdat = NODE_DATA(0); - init_bootmem_node(pgdat, __phys_to_pfn(bitmap), start_pfn, end_pfn); - - /* Free the lowmem regions from memblock into bootmem. */ - for_each_memblock(memory, reg) { - unsigned long start = memblock_region_memory_base_pfn(reg); - unsigned long end = memblock_region_memory_end_pfn(reg); - - if (end >= end_pfn) - end = end_pfn; - if (start >= end) - break; - - free_bootmem(__pfn_to_phys(start), (end - start) << PAGE_SHIFT); - } - - /* Reserve the lowmem memblock reserved regions in bootmem. */ - for_each_memblock(reserved, reg) { - unsigned long start = memblock_region_reserved_base_pfn(reg); - unsigned long end = memblock_region_reserved_end_pfn(reg); - - if (end >= end_pfn) - end = end_pfn; - if (start >= end) - break; - - reserve_bootmem(__pfn_to_phys(start), - (end - start) << PAGE_SHIFT, BOOTMEM_DEFAULT); - } -} - #ifdef CONFIG_ZONE_DMA