Message ID | 20180912095232.2110-1-matthias.bgg@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | irqchip/gic-v3-its: Add early memory allocation errata | expand |
Friendly reminder, if anyone has any comment on the patch :) On 9/12/18 11:52 AM, matthias.bgg@kernel.org wrote: > From: Matthias Brugger <mbrugger@suse.com> > > Some hardware does not implement two-level page tables so that > the amount of contigious memory needed by the baser is bigger > then the zone order. This is a known problem on Cavium Thunderx > with 4K page size. > > We fix this by adding an errata which allocates the memory early > in the boot cycle, using the memblock allocator. > > Signed-off-by: Matthias Brugger <mbrugger@suse.com> > --- > arch/arm64/Kconfig | 12 ++++++++ > arch/arm64/include/asm/cpucaps.h | 3 +- > arch/arm64/kernel/cpu_errata.c | 33 +++++++++++++++++++++ > drivers/irqchip/irq-gic-v3-its.c | 50 ++++++++++++++++++++------------ > 4 files changed, 79 insertions(+), 19 deletions(-) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 1b1a0e95c751..dfd9fe08f0b2 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -597,6 +597,18 @@ config QCOM_FALKOR_ERRATUM_E1041 > > If unsure, say Y. > > +config CAVIUM_ALLOC_ITS_TABLE_EARLY > + bool "Cavium Thunderx: Allocate the its table early" > + default y > + depends on ARM64_4K_PAGES && FORCE_MAX_ZONEORDER < 13 > + depends on ARM_GIC_V3_ITS > + help > + Cavium Thunderx needs to allocate 16MB of ITS translation table. > + This can be bigger as MAX_ZONE_ORDER and need therefore be done > + via the memblock allocator. > + > + If unsure, say Y. > + > endmenu > > > diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h > index ae1f70450fb2..c98be4809b7f 100644 > --- a/arch/arm64/include/asm/cpucaps.h > +++ b/arch/arm64/include/asm/cpucaps.h > @@ -51,7 +51,8 @@ > #define ARM64_SSBD 30 > #define ARM64_MISMATCHED_CACHE_TYPE 31 > #define ARM64_HAS_STAGE2_FWB 32 > +#define ARM64_WORKAROUND_CAVIUM_ITS_TABLE 33 > > -#define ARM64_NCAPS 33 > +#define ARM64_NCAPS 34 > > #endif /* __ASM_CPUCAPS_H */ > diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c > index dec10898d688..7908f8fa3ba8 100644 > --- a/arch/arm64/kernel/cpu_errata.c > +++ b/arch/arm64/kernel/cpu_errata.c > @@ -411,6 +411,29 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, > } > #endif /* CONFIG_ARM64_SSBD */ > > +#ifdef CONFIG_CAVIUM_ALLOC_ITS_TABLE_EARLY > +#include <linux/bootmem.h> > +extern void *its_base; > + > +/* > + * Hardware that doesn't use two-level page table and exceedes > + * the maximum order of pages that can be allocated by the buddy > + * allocator. Try to use the memblock allocator instead. > + * This has been observed on Cavium Thunderx machines with 4K > + * page size. > + */ > +static bool __init its_early_alloc(const struct arm64_cpu_capabilities *cap, > + int scope) > +{ > + /* We need to allocate the table only once */ > + if (scope & ARM64_CPUCAP_SCOPE_BOOT_CPU && !its_base) > + its_base = (void *)memblock_virt_alloc_nopanic(16 * SZ_1M, > + 64 * SZ_1K); > + > + return true; > +} > +#endif /* CONFIG_CAVIUM_ALLOC_ITS_TABLE_EARLY */ > + > #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ > .matches = is_affected_midr_range, \ > .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) > @@ -679,6 +702,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = { > .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, > .matches = has_ssbd_mitigation, > }, > +#endif > +#ifdef CONFIG_CAVIUM_ALLOC_ITS_TABLE_EARLY > + { > + /* Cavium ThunderX, pass 1.x - 2.1 */ > + .desc = "Cavium alloc ITS table early", > + .capability = ARM64_WORKAROUND_CAVIUM_ITS_TABLE, > + .type = ARM64_CPUCAP_SCOPE_BOOT_CPU, > + .matches = its_early_alloc, > + .midr_range = MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), > + }, > #endif > { > } > diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c > index c2df341ff6fa..b78546740a0d 100644 > --- a/drivers/irqchip/irq-gic-v3-its.c > +++ b/drivers/irqchip/irq-gic-v3-its.c > @@ -87,6 +87,8 @@ struct its_baser { > u32 psz; > }; > > +void *its_base; > + > struct its_device; > > /* > @@ -1666,7 +1668,7 @@ static void its_write_baser(struct its_node *its, struct its_baser *baser, > baser->val = its_read_baser(its, baser); > } > > -static int its_setup_baser(struct its_node *its, struct its_baser *baser, > +static int __init its_setup_baser(struct its_node *its, struct its_baser *baser, > u64 cache, u64 shr, u32 psz, u32 order, > bool indirect) > { > @@ -1675,7 +1677,6 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, > u64 type = GITS_BASER_TYPE(val); > u64 baser_phys, tmp; > u32 alloc_pages; > - void *base; > > retry_alloc_baser: > alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz); > @@ -1687,11 +1688,22 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, > order = get_order(GITS_BASER_PAGES_MAX * psz); > } > > - base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); > - if (!base) > - return -ENOMEM; > + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_ITS_TABLE)) { > + if (!its_base) { > + pr_warn("ITS@%pa: %s Allocation using memblock failed %pS\n", > + &its->phys_base, its_base_type_string[type], > + its_base); > + return -ENOMEM; > + } > + > + } else { > + its_base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, > + order); > + if (!its_base) > + return -ENOMEM; > + } > > - baser_phys = virt_to_phys(base); > + baser_phys = virt_to_phys(its_base); > > /* Check if the physical address of the memory is above 48bits */ > if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && (baser_phys >> 48)) { > @@ -1699,7 +1711,7 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, > /* 52bit PA is supported only when PageSize=64K */ > if (psz != SZ_64K) { > pr_err("ITS: no 52bit PA support when psz=%d\n", psz); > - free_pages((unsigned long)base, order); > + free_pages((unsigned long)its_base, order); > return -ENXIO; > } > > @@ -1744,7 +1756,7 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, > shr = tmp & GITS_BASER_SHAREABILITY_MASK; > if (!shr) { > cache = GITS_BASER_nC; > - gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order)); > + gic_flush_dcache_to_poc(its_base, PAGE_ORDER_TO_SIZE(order)); > } > goto retry_baser; > } > @@ -1755,7 +1767,7 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, > * size and retry. If we reach 4K, then > * something is horribly wrong... > */ > - free_pages((unsigned long)base, order); > + free_pages((unsigned long)its_base, order); > baser->base = NULL; > > switch (psz) { > @@ -1772,19 +1784,19 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, > pr_err("ITS@%pa: %s doesn't stick: %llx %llx\n", > &its->phys_base, its_base_type_string[type], > val, tmp); > - free_pages((unsigned long)base, order); > + free_pages((unsigned long)its_base, order); > return -ENXIO; > } > > baser->order = order; > - baser->base = base; > + baser->base = its_base; > baser->psz = psz; > tmp = indirect ? GITS_LVL1_ENTRY_SIZE : esz; > > pr_info("ITS@%pa: allocated %d %s @%lx (%s, esz %d, psz %dK, shr %d)\n", > &its->phys_base, (int)(PAGE_ORDER_TO_SIZE(order) / (int)tmp), > its_base_type_string[type], > - (unsigned long)virt_to_phys(base), > + (unsigned long)virt_to_phys(its_base), > indirect ? "indirect" : "flat", (int)esz, > psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT); > > @@ -1832,12 +1844,14 @@ static bool its_parse_indirect_baser(struct its_node *its, > * feature is not supported by hardware. > */ > new_order = max_t(u32, get_order(esz << ids), new_order); > - if (new_order >= MAX_ORDER) { > - new_order = MAX_ORDER - 1; > - ids = ilog2(PAGE_ORDER_TO_SIZE(new_order) / (int)esz); > - pr_warn("ITS@%pa: %s Table too large, reduce ids %u->%u\n", > - &its->phys_base, its_base_type_string[type], > - its->device_ids, ids); > + if (!cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_ITS_TABLE)) { > + if (new_order >= MAX_ORDER) { > + new_order = MAX_ORDER - 1; > + ids = ilog2(PAGE_ORDER_TO_SIZE(new_order) / (int)esz); > + pr_warn("ITS@%pa: %s Table too large, reduce ids %u->%u\n", > + &its->phys_base, its_base_type_string[type], > + its->device_ids, ids); > + } > } > > *order = new_order; >
Hi Matthias, On 04/10/18 23:11, Matthias Brugger wrote: > Friendly reminder, if anyone has any comment on the patch :) > > On 9/12/18 11:52 AM, matthias.bgg@kernel.org wrote: >> From: Matthias Brugger <mbrugger@suse.com> >> >> Some hardware does not implement two-level page tables so that >> the amount of contigious memory needed by the baser is bigger >> then the zone order. This is a known problem on Cavium Thunderx >> with 4K page size. >> >> We fix this by adding an errata which allocates the memory early >> in the boot cycle, using the memblock allocator. >> >> Signed-off-by: Matthias Brugger <mbrugger@suse.com> >> --- >> arch/arm64/Kconfig | 12 ++++++++ >> arch/arm64/include/asm/cpucaps.h | 3 +- >> arch/arm64/kernel/cpu_errata.c | 33 +++++++++++++++++++++ >> drivers/irqchip/irq-gic-v3-its.c | 50 ++++++++++++++++++++------------ >> 4 files changed, 79 insertions(+), 19 deletions(-) My only comment would be to state how much I dislike both the HW and the patch... ;-) The idea that we have some erratum that depends on the page size doesn't feel good at all. >> >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index 1b1a0e95c751..dfd9fe08f0b2 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -597,6 +597,18 @@ config QCOM_FALKOR_ERRATUM_E1041 >> >> If unsure, say Y. >> >> +config CAVIUM_ALLOC_ITS_TABLE_EARLY >> + bool "Cavium Thunderx: Allocate the its table early" >> + default y >> + depends on ARM64_4K_PAGES && FORCE_MAX_ZONEORDER < 13 Here's a though: Why don't we ensure that FORCE_MAX_ZONEORDER is such as we could always allocate the same amount of memory, no matter what the page size is? That, or bump FORCE_MAX_ZONEORDER to 13 if the kernel includes support for TX1. Any of this of course requires buy-in from the arm64 maintainers, as this is quite a departure from the way things work so far. Thanks, M.
On 05/10/2018 12:55, Marc Zyngier wrote: > Hi Matthias, > > On 04/10/18 23:11, Matthias Brugger wrote: >> Friendly reminder, if anyone has any comment on the patch :) >> >> On 9/12/18 11:52 AM, matthias.bgg@kernel.org wrote: >>> From: Matthias Brugger <mbrugger@suse.com> >>> >>> Some hardware does not implement two-level page tables so that >>> the amount of contigious memory needed by the baser is bigger >>> then the zone order. This is a known problem on Cavium Thunderx >>> with 4K page size. >>> >>> We fix this by adding an errata which allocates the memory early >>> in the boot cycle, using the memblock allocator. >>> >>> Signed-off-by: Matthias Brugger <mbrugger@suse.com> >>> --- >>> arch/arm64/Kconfig | 12 ++++++++ >>> arch/arm64/include/asm/cpucaps.h | 3 +- >>> arch/arm64/kernel/cpu_errata.c | 33 +++++++++++++++++++++ >>> drivers/irqchip/irq-gic-v3-its.c | 50 ++++++++++++++++++++------------ >>> 4 files changed, 79 insertions(+), 19 deletions(-) > > My only comment would be to state how much I dislike both the HW and the > patch... ;-) The idea that we have some erratum that depends on the page size > doesn't feel good at all. > Well ugly HW needs ugly patches ;-) >>> >>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >>> index 1b1a0e95c751..dfd9fe08f0b2 100644 >>> --- a/arch/arm64/Kconfig >>> +++ b/arch/arm64/Kconfig >>> @@ -597,6 +597,18 @@ config QCOM_FALKOR_ERRATUM_E1041 >>> If unsure, say Y. >>> +config CAVIUM_ALLOC_ITS_TABLE_EARLY >>> + bool "Cavium Thunderx: Allocate the its table early" >>> + default y >>> + depends on ARM64_4K_PAGES && FORCE_MAX_ZONEORDER < 13 > > Here's a though: Why don't we ensure that FORCE_MAX_ZONEORDER is such as we > could always allocate the same amount of memory, no matter what the page size > is? That, or bump FORCE_MAX_ZONEORDER to 13 if the kernel includes support for TX1. > Bumping FORCE_MAX_ZONEORDER when TX1 is supported was proposed here: https://patchwork.kernel.org/patch/6322281/ To bring in some more history, the CMA approach ended with this discussion: https://patchwork.kernel.org/patch/9888041/ > Any of this of course requires buy-in from the arm64 maintainers, as this is > quite a departure from the way things work so far. > With my distribution head on, I would prefer a solution that does not change FORCE_MAX_ZONEORDER. That's how I came to the idea providing a third solution to the same problem :) Regards, Matthias
On 05/10/18 13:33, Matthias Brugger wrote: > > > On 05/10/2018 12:55, Marc Zyngier wrote: >> Hi Matthias, >> >> On 04/10/18 23:11, Matthias Brugger wrote: >>> Friendly reminder, if anyone has any comment on the patch :) >>> >>> On 9/12/18 11:52 AM, matthias.bgg@kernel.org wrote: >>>> From: Matthias Brugger <mbrugger@suse.com> >>>> >>>> Some hardware does not implement two-level page tables so that >>>> the amount of contigious memory needed by the baser is bigger >>>> then the zone order. This is a known problem on Cavium Thunderx >>>> with 4K page size. >>>> >>>> We fix this by adding an errata which allocates the memory early >>>> in the boot cycle, using the memblock allocator. >>>> >>>> Signed-off-by: Matthias Brugger <mbrugger@suse.com> >>>> --- >>>> arch/arm64/Kconfig | 12 ++++++++ >>>> arch/arm64/include/asm/cpucaps.h | 3 +- >>>> arch/arm64/kernel/cpu_errata.c | 33 +++++++++++++++++++++ >>>> drivers/irqchip/irq-gic-v3-its.c | 50 ++++++++++++++++++++------------ >>>> 4 files changed, 79 insertions(+), 19 deletions(-) >> >> My only comment would be to state how much I dislike both the HW and the >> patch... ;-) The idea that we have some erratum that depends on the page size >> doesn't feel good at all. >> > > Well ugly HW needs ugly patches ;-) > >>>> >>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >>>> index 1b1a0e95c751..dfd9fe08f0b2 100644 >>>> --- a/arch/arm64/Kconfig >>>> +++ b/arch/arm64/Kconfig >>>> @@ -597,6 +597,18 @@ config QCOM_FALKOR_ERRATUM_E1041 >>>> If unsure, say Y. >>>> +config CAVIUM_ALLOC_ITS_TABLE_EARLY >>>> + bool "Cavium Thunderx: Allocate the its table early" >>>> + default y >>>> + depends on ARM64_4K_PAGES && FORCE_MAX_ZONEORDER < 13 >> >> Here's a though: Why don't we ensure that FORCE_MAX_ZONEORDER is such as we >> could always allocate the same amount of memory, no matter what the page size >> is? That, or bump FORCE_MAX_ZONEORDER to 13 if the kernel includes support for TX1. >> > > Bumping FORCE_MAX_ZONEORDER when TX1 is supported was proposed here: > https://patchwork.kernel.org/patch/6322281/ > > To bring in some more history, the CMA approach ended with this discussion: > https://patchwork.kernel.org/patch/9888041/ > >> Any of this of course requires buy-in from the arm64 maintainers, as this is >> quite a departure from the way things work so far. >> > > With my distribution head on, I would prefer a solution that does not change > FORCE_MAX_ZONEORDER. That's how I came to the idea providing a third solution to > the same problem :) Why is that a problem? What impact does this have on your favourite distro? Thanks, M.
On 05/10/2018 15:42, Marc Zyngier wrote: > On 05/10/18 13:33, Matthias Brugger wrote: >> >> >> On 05/10/2018 12:55, Marc Zyngier wrote: >>> Hi Matthias, >>> >>> On 04/10/18 23:11, Matthias Brugger wrote: >>>> Friendly reminder, if anyone has any comment on the patch :) >>>> >>>> On 9/12/18 11:52 AM, matthias.bgg@kernel.org wrote: >>>>> From: Matthias Brugger <mbrugger@suse.com> >>>>> >>>>> Some hardware does not implement two-level page tables so that >>>>> the amount of contigious memory needed by the baser is bigger >>>>> then the zone order. This is a known problem on Cavium Thunderx >>>>> with 4K page size. >>>>> >>>>> We fix this by adding an errata which allocates the memory early >>>>> in the boot cycle, using the memblock allocator. >>>>> >>>>> Signed-off-by: Matthias Brugger <mbrugger@suse.com> >>>>> --- >>>>> arch/arm64/Kconfig | 12 ++++++++ >>>>> arch/arm64/include/asm/cpucaps.h | 3 +- >>>>> arch/arm64/kernel/cpu_errata.c | 33 +++++++++++++++++++++ >>>>> drivers/irqchip/irq-gic-v3-its.c | 50 ++++++++++++++++++++------------ >>>>> 4 files changed, 79 insertions(+), 19 deletions(-) >>> >>> My only comment would be to state how much I dislike both the HW and the >>> patch... ;-) The idea that we have some erratum that depends on the page size >>> doesn't feel good at all. >>> >> >> Well ugly HW needs ugly patches ;-) >> >>>>> >>>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >>>>> index 1b1a0e95c751..dfd9fe08f0b2 100644 >>>>> --- a/arch/arm64/Kconfig >>>>> +++ b/arch/arm64/Kconfig >>>>> @@ -597,6 +597,18 @@ config QCOM_FALKOR_ERRATUM_E1041 >>>>> If unsure, say Y. >>>>> +config CAVIUM_ALLOC_ITS_TABLE_EARLY >>>>> + bool "Cavium Thunderx: Allocate the its table early" >>>>> + default y >>>>> + depends on ARM64_4K_PAGES && FORCE_MAX_ZONEORDER < 13 >>> >>> Here's a though: Why don't we ensure that FORCE_MAX_ZONEORDER is such as we >>> could always allocate the same amount of memory, no matter what the page size >>> is? That, or bump FORCE_MAX_ZONEORDER to 13 if the kernel includes support >>> for TX1. >>> >> >> Bumping FORCE_MAX_ZONEORDER when TX1 is supported was proposed here: >> https://patchwork.kernel.org/patch/6322281/ >> >> To bring in some more history, the CMA approach ended with this discussion: >> https://patchwork.kernel.org/patch/9888041/ >> >>> Any of this of course requires buy-in from the arm64 maintainers, as this is >>> quite a departure from the way things work so far. >>> >> >> With my distribution head on, I would prefer a solution that does not change >> FORCE_MAX_ZONEORDER. That's how I came to the idea providing a third solution to >> the same problem :) > > Why is that a problem? What impact does this have on your favourite distro? > The impact is on changing FORCE_MAX_ZONEORDER on an already released kernel will break Kernel ABI and with that all external modules. I know that's nothing upstream cares too much about, but the distros do :)
On 05.10.18 16:13:48, Matthias Brugger wrote: > On 05/10/2018 15:42, Marc Zyngier wrote: > > On 05/10/18 13:33, Matthias Brugger wrote: > >> With my distribution head on, I would prefer a solution that does not change > >> FORCE_MAX_ZONEORDER. That's how I came to the idea providing a third solution to > >> the same problem :) > > > > Why is that a problem? What impact does this have on your favourite distro? > > > > The impact is on changing FORCE_MAX_ZONEORDER on an already released kernel will > break Kernel ABI and with that all external modules. I know that's nothing > upstream cares too much about, but the distros do :) Wrt upstream, there is no way to do this change without patching the kernel. Thus, it is impossible to run an unpatched 4k page size kernel on ThunderX. -Robert
On Fri, 05 Oct 2018 15:13:48 +0100, Matthias Brugger <matthias.bgg@gmail.com> wrote: > > > > On 05/10/2018 15:42, Marc Zyngier wrote: > > On 05/10/18 13:33, Matthias Brugger wrote: > >> > >> > >> On 05/10/2018 12:55, Marc Zyngier wrote: > >>> Hi Matthias, > >>> > >>> On 04/10/18 23:11, Matthias Brugger wrote: > >>>> Friendly reminder, if anyone has any comment on the patch :) > >>>> > >>>> On 9/12/18 11:52 AM, matthias.bgg@kernel.org wrote: > >>>>> From: Matthias Brugger <mbrugger@suse.com> > >>>>> > >>>>> Some hardware does not implement two-level page tables so that > >>>>> the amount of contigious memory needed by the baser is bigger > >>>>> then the zone order. This is a known problem on Cavium Thunderx > >>>>> with 4K page size. > >>>>> > >>>>> We fix this by adding an errata which allocates the memory early > >>>>> in the boot cycle, using the memblock allocator. > >>>>> > >>>>> Signed-off-by: Matthias Brugger <mbrugger@suse.com> > >>>>> --- > >>>>> arch/arm64/Kconfig | 12 ++++++++ > >>>>> arch/arm64/include/asm/cpucaps.h | 3 +- > >>>>> arch/arm64/kernel/cpu_errata.c | 33 +++++++++++++++++++++ > >>>>> drivers/irqchip/irq-gic-v3-its.c | 50 ++++++++++++++++++++------------ > >>>>> 4 files changed, 79 insertions(+), 19 deletions(-) > >>> > >>> My only comment would be to state how much I dislike both the HW and the > >>> patch... ;-) The idea that we have some erratum that depends on the page size > >>> doesn't feel good at all. > >>> > >> > >> Well ugly HW needs ugly patches ;-) > >> > >>>>> > >>>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > >>>>> index 1b1a0e95c751..dfd9fe08f0b2 100644 > >>>>> --- a/arch/arm64/Kconfig > >>>>> +++ b/arch/arm64/Kconfig > >>>>> @@ -597,6 +597,18 @@ config QCOM_FALKOR_ERRATUM_E1041 > >>>>> If unsure, say Y. > >>>>> +config CAVIUM_ALLOC_ITS_TABLE_EARLY > >>>>> + bool "Cavium Thunderx: Allocate the its table early" > >>>>> + default y > >>>>> + depends on ARM64_4K_PAGES && FORCE_MAX_ZONEORDER < 13 > >>> > >>> Here's a though: Why don't we ensure that FORCE_MAX_ZONEORDER is such as we > >>> could always allocate the same amount of memory, no matter what the page size > >>> is? That, or bump FORCE_MAX_ZONEORDER to 13 if the kernel includes support > >>> for TX1. > >>> > >> > >> Bumping FORCE_MAX_ZONEORDER when TX1 is supported was proposed here: > >> https://patchwork.kernel.org/patch/6322281/ > >> > >> To bring in some more history, the CMA approach ended with this discussion: > >> https://patchwork.kernel.org/patch/9888041/ > >> > >>> Any of this of course requires buy-in from the arm64 maintainers, as this is > >>> quite a departure from the way things work so far. > >>> > >> > >> With my distribution head on, I would prefer a solution that does not change > >> FORCE_MAX_ZONEORDER. That's how I came to the idea providing a third solution to > >> the same problem :) > > > > Why is that a problem? What impact does this have on your favourite distro? > > > > The impact is on changing FORCE_MAX_ZONEORDER on an already released > kernel will break Kernel ABI and with that all external modules. I > know that's nothing upstream cares too much about, but the distros > do :) Unfortunately, that's something you're bringing upon yourself, and I'm afraid I can't really take this into account. You could always bump that ABI if you really want to support this platform as, at the end of the day, this is something you're in control of. But I'd really like to hear what Catalin or Will think of this (Will wasn't massively impressed by this 3 years ago, and I wonder if his approach has changed since). Thanks, M.
On Fri, Oct 05, 2018 at 04:17:30PM +0100, Marc Zyngier wrote: > On Fri, 05 Oct 2018 15:13:48 +0100, > Matthias Brugger <matthias.bgg@gmail.com> wrote: > > > > > > > > On 05/10/2018 15:42, Marc Zyngier wrote: > > > On 05/10/18 13:33, Matthias Brugger wrote: > > >> > > >> > > >> On 05/10/2018 12:55, Marc Zyngier wrote: > > >>> Hi Matthias, > > >>> > > >>> On 04/10/18 23:11, Matthias Brugger wrote: > > >>>> Friendly reminder, if anyone has any comment on the patch :) > > >>>> > > >>>> On 9/12/18 11:52 AM, matthias.bgg@kernel.org wrote: > > >>>>> From: Matthias Brugger <mbrugger@suse.com> > > >>>>> > > >>>>> Some hardware does not implement two-level page tables so that > > >>>>> the amount of contigious memory needed by the baser is bigger > > >>>>> then the zone order. This is a known problem on Cavium Thunderx > > >>>>> with 4K page size. > > >>>>> > > >>>>> We fix this by adding an errata which allocates the memory early > > >>>>> in the boot cycle, using the memblock allocator. > > >>>>> > > >>>>> Signed-off-by: Matthias Brugger <mbrugger@suse.com> > > >>>>> --- > > >>>>> arch/arm64/Kconfig | 12 ++++++++ > > >>>>> arch/arm64/include/asm/cpucaps.h | 3 +- > > >>>>> arch/arm64/kernel/cpu_errata.c | 33 +++++++++++++++++++++ > > >>>>> drivers/irqchip/irq-gic-v3-its.c | 50 ++++++++++++++++++++------------ > > >>>>> 4 files changed, 79 insertions(+), 19 deletions(-) > > >>> > > >>> My only comment would be to state how much I dislike both the HW and the > > >>> patch... ;-) The idea that we have some erratum that depends on the page size > > >>> doesn't feel good at all. > > >>> > > >> > > >> Well ugly HW needs ugly patches ;-) > > >> > > >>>>> > > >>>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > > >>>>> index 1b1a0e95c751..dfd9fe08f0b2 100644 > > >>>>> --- a/arch/arm64/Kconfig > > >>>>> +++ b/arch/arm64/Kconfig > > >>>>> @@ -597,6 +597,18 @@ config QCOM_FALKOR_ERRATUM_E1041 > > >>>>> If unsure, say Y. > > >>>>> +config CAVIUM_ALLOC_ITS_TABLE_EARLY > > >>>>> + bool "Cavium Thunderx: Allocate the its table early" > > >>>>> + default y > > >>>>> + depends on ARM64_4K_PAGES && FORCE_MAX_ZONEORDER < 13 > > >>> > > >>> Here's a though: Why don't we ensure that FORCE_MAX_ZONEORDER is such as we > > >>> could always allocate the same amount of memory, no matter what the page size > > >>> is? That, or bump FORCE_MAX_ZONEORDER to 13 if the kernel includes support > > >>> for TX1. > > >>> > > >> > > >> Bumping FORCE_MAX_ZONEORDER when TX1 is supported was proposed here: > > >> https://patchwork.kernel.org/patch/6322281/ > > >> > > >> To bring in some more history, the CMA approach ended with this discussion: > > >> https://patchwork.kernel.org/patch/9888041/ > > >> > > >>> Any of this of course requires buy-in from the arm64 maintainers, as this is > > >>> quite a departure from the way things work so far. > > >>> > > >> > > >> With my distribution head on, I would prefer a solution that does not change > > >> FORCE_MAX_ZONEORDER. That's how I came to the idea providing a third solution to > > >> the same problem :) > > > > > > Why is that a problem? What impact does this have on your favourite distro? > > > > > > > The impact is on changing FORCE_MAX_ZONEORDER on an already released > > kernel will break Kernel ABI and with that all external modules. I > > know that's nothing upstream cares too much about, but the distros > > do :) > > Unfortunately, that's something you're bringing upon yourself, and I'm > afraid I can't really take this into account. You could always bump > that ABI if you really want to support this platform as, at the end of > the day, this is something you're in control of. > > But I'd really like to hear what Catalin or Will think of this (Will > wasn't massively impressed by this 3 years ago, and I wonder if his > approach has changed since). I don't see anything that changes my opinion here, and the reality is that bumping FORCE_MAX_ZONEORDER doesn't guarantee anything about the allocation succeeding. I'm also hesitant to punish other platforms (including TX2!) because of this TX1 "feature". One thing I'm unsure about is why the CMA approach failed; the link above is a complain about the use of subsys_initcall() afaict. Will
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1b1a0e95c751..dfd9fe08f0b2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -597,6 +597,18 @@ config QCOM_FALKOR_ERRATUM_E1041 If unsure, say Y. +config CAVIUM_ALLOC_ITS_TABLE_EARLY + bool "Cavium Thunderx: Allocate the its table early" + default y + depends on ARM64_4K_PAGES && FORCE_MAX_ZONEORDER < 13 + depends on ARM_GIC_V3_ITS + help + Cavium Thunderx needs to allocate 16MB of ITS translation table. + This can be bigger as MAX_ZONE_ORDER and need therefore be done + via the memblock allocator. + + If unsure, say Y. + endmenu diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index ae1f70450fb2..c98be4809b7f 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -51,7 +51,8 @@ #define ARM64_SSBD 30 #define ARM64_MISMATCHED_CACHE_TYPE 31 #define ARM64_HAS_STAGE2_FWB 32 +#define ARM64_WORKAROUND_CAVIUM_ITS_TABLE 33 -#define ARM64_NCAPS 33 +#define ARM64_NCAPS 34 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index dec10898d688..7908f8fa3ba8 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -411,6 +411,29 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, } #endif /* CONFIG_ARM64_SSBD */ +#ifdef CONFIG_CAVIUM_ALLOC_ITS_TABLE_EARLY +#include <linux/bootmem.h> +extern void *its_base; + +/* + * Hardware that doesn't use two-level page table and exceedes + * the maximum order of pages that can be allocated by the buddy + * allocator. Try to use the memblock allocator instead. + * This has been observed on Cavium Thunderx machines with 4K + * page size. + */ +static bool __init its_early_alloc(const struct arm64_cpu_capabilities *cap, + int scope) +{ + /* We need to allocate the table only once */ + if (scope & ARM64_CPUCAP_SCOPE_BOOT_CPU && !its_base) + its_base = (void *)memblock_virt_alloc_nopanic(16 * SZ_1M, + 64 * SZ_1K); + + return true; +} +#endif /* CONFIG_CAVIUM_ALLOC_ITS_TABLE_EARLY */ + #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) @@ -679,6 +702,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = has_ssbd_mitigation, }, +#endif +#ifdef CONFIG_CAVIUM_ALLOC_ITS_TABLE_EARLY + { + /* Cavium ThunderX, pass 1.x - 2.1 */ + .desc = "Cavium alloc ITS table early", + .capability = ARM64_WORKAROUND_CAVIUM_ITS_TABLE, + .type = ARM64_CPUCAP_SCOPE_BOOT_CPU, + .matches = its_early_alloc, + .midr_range = MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), + }, #endif { } diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index c2df341ff6fa..b78546740a0d 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -87,6 +87,8 @@ struct its_baser { u32 psz; }; +void *its_base; + struct its_device; /* @@ -1666,7 +1668,7 @@ static void its_write_baser(struct its_node *its, struct its_baser *baser, baser->val = its_read_baser(its, baser); } -static int its_setup_baser(struct its_node *its, struct its_baser *baser, +static int __init its_setup_baser(struct its_node *its, struct its_baser *baser, u64 cache, u64 shr, u32 psz, u32 order, bool indirect) { @@ -1675,7 +1677,6 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, u64 type = GITS_BASER_TYPE(val); u64 baser_phys, tmp; u32 alloc_pages; - void *base; retry_alloc_baser: alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz); @@ -1687,11 +1688,22 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, order = get_order(GITS_BASER_PAGES_MAX * psz); } - base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!base) - return -ENOMEM; + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_ITS_TABLE)) { + if (!its_base) { + pr_warn("ITS@%pa: %s Allocation using memblock failed %pS\n", + &its->phys_base, its_base_type_string[type], + its_base); + return -ENOMEM; + } + + } else { + its_base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + order); + if (!its_base) + return -ENOMEM; + } - baser_phys = virt_to_phys(base); + baser_phys = virt_to_phys(its_base); /* Check if the physical address of the memory is above 48bits */ if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && (baser_phys >> 48)) { @@ -1699,7 +1711,7 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, /* 52bit PA is supported only when PageSize=64K */ if (psz != SZ_64K) { pr_err("ITS: no 52bit PA support when psz=%d\n", psz); - free_pages((unsigned long)base, order); + free_pages((unsigned long)its_base, order); return -ENXIO; } @@ -1744,7 +1756,7 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, shr = tmp & GITS_BASER_SHAREABILITY_MASK; if (!shr) { cache = GITS_BASER_nC; - gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order)); + gic_flush_dcache_to_poc(its_base, PAGE_ORDER_TO_SIZE(order)); } goto retry_baser; } @@ -1755,7 +1767,7 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, * size and retry. If we reach 4K, then * something is horribly wrong... */ - free_pages((unsigned long)base, order); + free_pages((unsigned long)its_base, order); baser->base = NULL; switch (psz) { @@ -1772,19 +1784,19 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, pr_err("ITS@%pa: %s doesn't stick: %llx %llx\n", &its->phys_base, its_base_type_string[type], val, tmp); - free_pages((unsigned long)base, order); + free_pages((unsigned long)its_base, order); return -ENXIO; } baser->order = order; - baser->base = base; + baser->base = its_base; baser->psz = psz; tmp = indirect ? GITS_LVL1_ENTRY_SIZE : esz; pr_info("ITS@%pa: allocated %d %s @%lx (%s, esz %d, psz %dK, shr %d)\n", &its->phys_base, (int)(PAGE_ORDER_TO_SIZE(order) / (int)tmp), its_base_type_string[type], - (unsigned long)virt_to_phys(base), + (unsigned long)virt_to_phys(its_base), indirect ? "indirect" : "flat", (int)esz, psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT); @@ -1832,12 +1844,14 @@ static bool its_parse_indirect_baser(struct its_node *its, * feature is not supported by hardware. */ new_order = max_t(u32, get_order(esz << ids), new_order); - if (new_order >= MAX_ORDER) { - new_order = MAX_ORDER - 1; - ids = ilog2(PAGE_ORDER_TO_SIZE(new_order) / (int)esz); - pr_warn("ITS@%pa: %s Table too large, reduce ids %u->%u\n", - &its->phys_base, its_base_type_string[type], - its->device_ids, ids); + if (!cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_ITS_TABLE)) { + if (new_order >= MAX_ORDER) { + new_order = MAX_ORDER - 1; + ids = ilog2(PAGE_ORDER_TO_SIZE(new_order) / (int)esz); + pr_warn("ITS@%pa: %s Table too large, reduce ids %u->%u\n", + &its->phys_base, its_base_type_string[type], + its->device_ids, ids); + } } *order = new_order;