Message ID | 20240222094006.1030709-2-apatel@ventanamicro.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Linux RISC-V AIA Support | expand |
Hi Anup, On Thu, Feb 22, 2024 at 9:41 AM Anup Patel <apatel@ventanamicro.com> wrote: > > The PLIC driver does not require very early initialization so convert > it into a platform driver. > > After conversion, the PLIC driver is probed after CPUs are brought-up > so setup cpuhp state after context handler of all online CPUs are > initialized otherwise PLIC driver crashes for platforms with multiple > PLIC instances. > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> > --- > drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------ > 1 file changed, 61 insertions(+), 40 deletions(-) > This patch seems to have broken things on RZ/Five SoC, after reverting this patch I get to boot it back again on v6.9-rc2. Looks like there is some probe order issue after switching to platform driver? Cheers, Prabhakar > diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c > index 5b7bc4fd9517..7400a07fc479 100644 > --- a/drivers/irqchip/irq-sifive-plic.c > +++ b/drivers/irqchip/irq-sifive-plic.c > @@ -64,6 +64,7 @@ > #define PLIC_QUIRK_EDGE_INTERRUPT 0 > > struct plic_priv { > + struct device *dev; > struct cpumask lmask; > struct irq_domain *irqdomain; > void __iomem *regs; > @@ -406,30 +407,50 @@ static int plic_starting_cpu(unsigned int cpu) > return 0; > } > > -static int __init __plic_init(struct device_node *node, > - struct device_node *parent, > - unsigned long plic_quirks) > +static const struct of_device_id plic_match[] = { > + { .compatible = "sifive,plic-1.0.0" }, > + { .compatible = "riscv,plic0" }, > + { .compatible = "andestech,nceplic100", > + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) }, > + { .compatible = "thead,c900-plic", > + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) }, > + {} > +}; > + > +static int plic_probe(struct platform_device *pdev) > { > int error = 0, nr_contexts, nr_handlers = 0, i; > - u32 nr_irqs; > - struct plic_priv *priv; > + struct device *dev = &pdev->dev; > + unsigned long plic_quirks = 0; > struct plic_handler *handler; > + struct plic_priv *priv; > + bool cpuhp_setup; > unsigned int cpu; > + u32 nr_irqs; > + > + if (is_of_node(dev->fwnode)) { > + const struct of_device_id *id; > + > + id = of_match_node(plic_match, to_of_node(dev->fwnode)); > + if (id) > + plic_quirks = (unsigned long)id->data; > + } > > priv = kzalloc(sizeof(*priv), GFP_KERNEL); > if (!priv) > return -ENOMEM; > > + priv->dev = dev; > priv->plic_quirks = plic_quirks; > > - priv->regs = of_iomap(node, 0); > + priv->regs = of_iomap(to_of_node(dev->fwnode), 0); > if (WARN_ON(!priv->regs)) { > error = -EIO; > goto out_free_priv; > } > > error = -EINVAL; > - of_property_read_u32(node, "riscv,ndev", &nr_irqs); > + of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", &nr_irqs); > if (WARN_ON(!nr_irqs)) > goto out_iounmap; > > @@ -439,13 +460,13 @@ static int __init __plic_init(struct device_node *node, > if (!priv->prio_save) > goto out_free_priority_reg; > > - nr_contexts = of_irq_count(node); > + nr_contexts = of_irq_count(to_of_node(dev->fwnode)); > if (WARN_ON(!nr_contexts)) > goto out_free_priority_reg; > > error = -ENOMEM; > - priv->irqdomain = irq_domain_add_linear(node, nr_irqs + 1, > - &plic_irqdomain_ops, priv); > + priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1, > + &plic_irqdomain_ops, priv); > if (WARN_ON(!priv->irqdomain)) > goto out_free_priority_reg; > > @@ -455,7 +476,7 @@ static int __init __plic_init(struct device_node *node, > int cpu; > unsigned long hartid; > > - if (of_irq_parse_one(node, i, &parent)) { > + if (of_irq_parse_one(to_of_node(dev->fwnode), i, &parent)) { > pr_err("failed to parse parent for context %d.\n", i); > continue; > } > @@ -491,7 +512,7 @@ static int __init __plic_init(struct device_node *node, > > /* Find parent domain and register chained handler */ > if (!plic_parent_irq && irq_find_host(parent.np)) { > - plic_parent_irq = irq_of_parse_and_map(node, i); > + plic_parent_irq = irq_of_parse_and_map(to_of_node(dev->fwnode), i); > if (plic_parent_irq) > irq_set_chained_handler(plic_parent_irq, > plic_handle_irq); > @@ -533,20 +554,29 @@ static int __init __plic_init(struct device_node *node, > > /* > * We can have multiple PLIC instances so setup cpuhp state > - * and register syscore operations only when context handler > - * for current/boot CPU is present. > + * and register syscore operations only once after context > + * handlers of all online CPUs are initialized. > */ > - handler = this_cpu_ptr(&plic_handlers); > - if (handler->present && !plic_cpuhp_setup_done) { > - cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, > - "irqchip/sifive/plic:starting", > - plic_starting_cpu, plic_dying_cpu); > - register_syscore_ops(&plic_irq_syscore_ops); > - plic_cpuhp_setup_done = true; > + if (!plic_cpuhp_setup_done) { > + cpuhp_setup = true; > + for_each_online_cpu(cpu) { > + handler = per_cpu_ptr(&plic_handlers, cpu); > + if (!handler->present) { > + cpuhp_setup = false; > + break; > + } > + } > + if (cpuhp_setup) { > + cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, > + "irqchip/sifive/plic:starting", > + plic_starting_cpu, plic_dying_cpu); > + register_syscore_ops(&plic_irq_syscore_ops); > + plic_cpuhp_setup_done = true; > + } > } > > - pr_info("%pOFP: mapped %d interrupts with %d handlers for" > - " %d contexts.\n", node, nr_irqs, nr_handlers, nr_contexts); > + pr_info("%pOFP: mapped %d interrupts with %d handlers for %d contexts.\n", > + to_of_node(dev->fwnode), nr_irqs, nr_handlers, nr_contexts); > return 0; > > out_free_enable_reg: > @@ -563,20 +593,11 @@ static int __init __plic_init(struct device_node *node, > return error; > } > > -static int __init plic_init(struct device_node *node, > - struct device_node *parent) > -{ > - return __plic_init(node, parent, 0); > -} > - > -IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init); > -IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */ > - > -static int __init plic_edge_init(struct device_node *node, > - struct device_node *parent) > -{ > - return __plic_init(node, parent, BIT(PLIC_QUIRK_EDGE_INTERRUPT)); > -} > - > -IRQCHIP_DECLARE(andestech_nceplic100, "andestech,nceplic100", plic_edge_init); > -IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_edge_init); > +static struct platform_driver plic_driver = { > + .driver = { > + .name = "riscv-plic", > + .of_match_table = plic_match, > + }, > + .probe = plic_probe, > +}; > +builtin_platform_driver(plic_driver); > -- > 2.34.1 > > > _______________________________________________ > linux-arm-kernel mailing list > linux-arm-kernel@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On Wed, Apr 3, 2024 at 2:01 PM Lad, Prabhakar <prabhakar.csengg@gmail.com> wrote: > > Hi Anup, > > On Thu, Feb 22, 2024 at 9:41 AM Anup Patel <apatel@ventanamicro.com> wrote: > > > > The PLIC driver does not require very early initialization so convert > > it into a platform driver. > > > > After conversion, the PLIC driver is probed after CPUs are brought-up > > so setup cpuhp state after context handler of all online CPUs are > > initialized otherwise PLIC driver crashes for platforms with multiple > > PLIC instances. > > > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> > > --- > > drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------ > > 1 file changed, 61 insertions(+), 40 deletions(-) > > > This patch seems to have broken things on RZ/Five SoC, after reverting > this patch I get to boot it back again on v6.9-rc2. Looks like there > is some probe order issue after switching to platform driver? Yes, this is most likely related to probe ordering based on your DT. Can you share the failing boot log and DT ? Regards, Anup > > Cheers, > Prabhakar > > > diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c > > index 5b7bc4fd9517..7400a07fc479 100644 > > --- a/drivers/irqchip/irq-sifive-plic.c > > +++ b/drivers/irqchip/irq-sifive-plic.c > > @@ -64,6 +64,7 @@ > > #define PLIC_QUIRK_EDGE_INTERRUPT 0 > > > > struct plic_priv { > > + struct device *dev; > > struct cpumask lmask; > > struct irq_domain *irqdomain; > > void __iomem *regs; > > @@ -406,30 +407,50 @@ static int plic_starting_cpu(unsigned int cpu) > > return 0; > > } > > > > -static int __init __plic_init(struct device_node *node, > > - struct device_node *parent, > > - unsigned long plic_quirks) > > +static const struct of_device_id plic_match[] = { > > + { .compatible = "sifive,plic-1.0.0" }, > > + { .compatible = "riscv,plic0" }, > > + { .compatible = "andestech,nceplic100", > > + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) }, > > + { .compatible = "thead,c900-plic", > > + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) }, > > + {} > > +}; > > + > > +static int plic_probe(struct platform_device *pdev) > > { > > int error = 0, nr_contexts, nr_handlers = 0, i; > > - u32 nr_irqs; > > - struct plic_priv *priv; > > + struct device *dev = &pdev->dev; > > + unsigned long plic_quirks = 0; > > struct plic_handler *handler; > > + struct plic_priv *priv; > > + bool cpuhp_setup; > > unsigned int cpu; > > + u32 nr_irqs; > > + > > + if (is_of_node(dev->fwnode)) { > > + const struct of_device_id *id; > > + > > + id = of_match_node(plic_match, to_of_node(dev->fwnode)); > > + if (id) > > + plic_quirks = (unsigned long)id->data; > > + } > > > > priv = kzalloc(sizeof(*priv), GFP_KERNEL); > > if (!priv) > > return -ENOMEM; > > > > + priv->dev = dev; > > priv->plic_quirks = plic_quirks; > > > > - priv->regs = of_iomap(node, 0); > > + priv->regs = of_iomap(to_of_node(dev->fwnode), 0); > > if (WARN_ON(!priv->regs)) { > > error = -EIO; > > goto out_free_priv; > > } > > > > error = -EINVAL; > > - of_property_read_u32(node, "riscv,ndev", &nr_irqs); > > + of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", &nr_irqs); > > if (WARN_ON(!nr_irqs)) > > goto out_iounmap; > > > > @@ -439,13 +460,13 @@ static int __init __plic_init(struct device_node *node, > > if (!priv->prio_save) > > goto out_free_priority_reg; > > > > - nr_contexts = of_irq_count(node); > > + nr_contexts = of_irq_count(to_of_node(dev->fwnode)); > > if (WARN_ON(!nr_contexts)) > > goto out_free_priority_reg; > > > > error = -ENOMEM; > > - priv->irqdomain = irq_domain_add_linear(node, nr_irqs + 1, > > - &plic_irqdomain_ops, priv); > > + priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1, > > + &plic_irqdomain_ops, priv); > > if (WARN_ON(!priv->irqdomain)) > > goto out_free_priority_reg; > > > > @@ -455,7 +476,7 @@ static int __init __plic_init(struct device_node *node, > > int cpu; > > unsigned long hartid; > > > > - if (of_irq_parse_one(node, i, &parent)) { > > + if (of_irq_parse_one(to_of_node(dev->fwnode), i, &parent)) { > > pr_err("failed to parse parent for context %d.\n", i); > > continue; > > } > > @@ -491,7 +512,7 @@ static int __init __plic_init(struct device_node *node, > > > > /* Find parent domain and register chained handler */ > > if (!plic_parent_irq && irq_find_host(parent.np)) { > > - plic_parent_irq = irq_of_parse_and_map(node, i); > > + plic_parent_irq = irq_of_parse_and_map(to_of_node(dev->fwnode), i); > > if (plic_parent_irq) > > irq_set_chained_handler(plic_parent_irq, > > plic_handle_irq); > > @@ -533,20 +554,29 @@ static int __init __plic_init(struct device_node *node, > > > > /* > > * We can have multiple PLIC instances so setup cpuhp state > > - * and register syscore operations only when context handler > > - * for current/boot CPU is present. > > + * and register syscore operations only once after context > > + * handlers of all online CPUs are initialized. > > */ > > - handler = this_cpu_ptr(&plic_handlers); > > - if (handler->present && !plic_cpuhp_setup_done) { > > - cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, > > - "irqchip/sifive/plic:starting", > > - plic_starting_cpu, plic_dying_cpu); > > - register_syscore_ops(&plic_irq_syscore_ops); > > - plic_cpuhp_setup_done = true; > > + if (!plic_cpuhp_setup_done) { > > + cpuhp_setup = true; > > + for_each_online_cpu(cpu) { > > + handler = per_cpu_ptr(&plic_handlers, cpu); > > + if (!handler->present) { > > + cpuhp_setup = false; > > + break; > > + } > > + } > > + if (cpuhp_setup) { > > + cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, > > + "irqchip/sifive/plic:starting", > > + plic_starting_cpu, plic_dying_cpu); > > + register_syscore_ops(&plic_irq_syscore_ops); > > + plic_cpuhp_setup_done = true; > > + } > > } > > > > - pr_info("%pOFP: mapped %d interrupts with %d handlers for" > > - " %d contexts.\n", node, nr_irqs, nr_handlers, nr_contexts); > > + pr_info("%pOFP: mapped %d interrupts with %d handlers for %d contexts.\n", > > + to_of_node(dev->fwnode), nr_irqs, nr_handlers, nr_contexts); > > return 0; > > > > out_free_enable_reg: > > @@ -563,20 +593,11 @@ static int __init __plic_init(struct device_node *node, > > return error; > > } > > > > -static int __init plic_init(struct device_node *node, > > - struct device_node *parent) > > -{ > > - return __plic_init(node, parent, 0); > > -} > > - > > -IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init); > > -IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */ > > - > > -static int __init plic_edge_init(struct device_node *node, > > - struct device_node *parent) > > -{ > > - return __plic_init(node, parent, BIT(PLIC_QUIRK_EDGE_INTERRUPT)); > > -} > > - > > -IRQCHIP_DECLARE(andestech_nceplic100, "andestech,nceplic100", plic_edge_init); > > -IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_edge_init); > > +static struct platform_driver plic_driver = { > > + .driver = { > > + .name = "riscv-plic", > > + .of_match_table = plic_match, > > + }, > > + .probe = plic_probe, > > +}; > > +builtin_platform_driver(plic_driver); > > -- > > 2.34.1 > > > > > > _______________________________________________ > > linux-arm-kernel mailing list > > linux-arm-kernel@lists.infradead.org > > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On Wed, Apr 3, 2024 at 3:17 PM Anup Patel <apatel@ventanamicro.com> wrote: > > On Wed, Apr 3, 2024 at 2:01 PM Lad, Prabhakar > <prabhakar.csengg@gmail.com> wrote: > > > > Hi Anup, > > > > On Thu, Feb 22, 2024 at 9:41 AM Anup Patel <apatel@ventanamicro.com> wrote: > > > > > > The PLIC driver does not require very early initialization so convert > > > it into a platform driver. > > > > > > After conversion, the PLIC driver is probed after CPUs are brought-up > > > so setup cpuhp state after context handler of all online CPUs are > > > initialized otherwise PLIC driver crashes for platforms with multiple > > > PLIC instances. > > > > > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> > > > --- > > > drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------ > > > 1 file changed, 61 insertions(+), 40 deletions(-) > > > > > This patch seems to have broken things on RZ/Five SoC, after reverting > > this patch I get to boot it back again on v6.9-rc2. Looks like there > > is some probe order issue after switching to platform driver? > > Yes, this is most likely related to probe ordering based on your DT. > > Can you share the failing boot log and DT ? non working case, https://paste.debian.net/1312947/ after reverting, https://paste.debian.net/1312948/ (attached is the DTB) Cheers, Prabhakar
Hi Prabhakar, On 2024-04-03 10:49 AM, Lad, Prabhakar wrote: > On Wed, Apr 3, 2024 at 3:17 PM Anup Patel <apatel@ventanamicro.com> wrote: >> >> On Wed, Apr 3, 2024 at 2:01 PM Lad, Prabhakar >> <prabhakar.csengg@gmail.com> wrote: >>> >>> Hi Anup, >>> >>> On Thu, Feb 22, 2024 at 9:41 AM Anup Patel <apatel@ventanamicro.com> wrote: >>>> >>>> The PLIC driver does not require very early initialization so convert >>>> it into a platform driver. >>>> >>>> After conversion, the PLIC driver is probed after CPUs are brought-up >>>> so setup cpuhp state after context handler of all online CPUs are >>>> initialized otherwise PLIC driver crashes for platforms with multiple >>>> PLIC instances. >>>> >>>> Signed-off-by: Anup Patel <apatel@ventanamicro.com> >>>> --- >>>> drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------ >>>> 1 file changed, 61 insertions(+), 40 deletions(-) >>>> >>> This patch seems to have broken things on RZ/Five SoC, after reverting >>> this patch I get to boot it back again on v6.9-rc2. Looks like there >>> is some probe order issue after switching to platform driver? >> >> Yes, this is most likely related to probe ordering based on your DT. >> >> Can you share the failing boot log and DT ? > > non working case, https://paste.debian.net/1312947/ Looks like you need to add "keep_bootcon" to your kernel command line to get a full log here. > after reverting, https://paste.debian.net/1312948/ > (attached is the DTB) I don't see anything suspicious between the "riscv-intc" lines and the "Fixed dependency cycle(s)" lines that looks like it would depend on the PLIC IRQ domain. Maybe there is some driver that does not handle -EPROBE_DEFER? It's hard to tell without the full log from the failure case. Regards, Samuel
On Wed, Apr 3, 2024 at 9:19 PM Lad, Prabhakar <prabhakar.csengg@gmail.com> wrote: > > On Wed, Apr 3, 2024 at 3:17 PM Anup Patel <apatel@ventanamicro.com> wrote: > > > > On Wed, Apr 3, 2024 at 2:01 PM Lad, Prabhakar > > <prabhakar.csengg@gmail.com> wrote: > > > > > > Hi Anup, > > > > > > On Thu, Feb 22, 2024 at 9:41 AM Anup Patel <apatel@ventanamicro.com> wrote: > > > > > > > > The PLIC driver does not require very early initialization so convert > > > > it into a platform driver. > > > > > > > > After conversion, the PLIC driver is probed after CPUs are brought-up > > > > so setup cpuhp state after context handler of all online CPUs are > > > > initialized otherwise PLIC driver crashes for platforms with multiple > > > > PLIC instances. > > > > > > > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> > > > > --- > > > > drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------ > > > > 1 file changed, 61 insertions(+), 40 deletions(-) > > > > > > > This patch seems to have broken things on RZ/Five SoC, after reverting > > > this patch I get to boot it back again on v6.9-rc2. Looks like there > > > is some probe order issue after switching to platform driver? > > > > Yes, this is most likely related to probe ordering based on your DT. > > > > Can you share the failing boot log and DT ? > > non working case, https://paste.debian.net/1312947/ > after reverting, https://paste.debian.net/1312948/ > (attached is the DTB) Can you add "console=ttySC0,115200" to kernel parameters and share updated boot logs ? Regards, Anup
On Wed, Apr 3, 2024 at 9:19 PM Lad, Prabhakar <prabhakar.csengg@gmail.com> wrote: > > On Wed, Apr 3, 2024 at 3:17 PM Anup Patel <apatel@ventanamicro.com> wrote: > > > > On Wed, Apr 3, 2024 at 2:01 PM Lad, Prabhakar > > <prabhakar.csengg@gmail.com> wrote: > > > > > > Hi Anup, > > > > > > On Thu, Feb 22, 2024 at 9:41 AM Anup Patel <apatel@ventanamicro.com> wrote: > > > > > > > > The PLIC driver does not require very early initialization so convert > > > > it into a platform driver. > > > > > > > > After conversion, the PLIC driver is probed after CPUs are brought-up > > > > so setup cpuhp state after context handler of all online CPUs are > > > > initialized otherwise PLIC driver crashes for platforms with multiple > > > > PLIC instances. > > > > > > > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> > > > > --- > > > > drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------ > > > > 1 file changed, 61 insertions(+), 40 deletions(-) > > > > > > > This patch seems to have broken things on RZ/Five SoC, after reverting > > > this patch I get to boot it back again on v6.9-rc2. Looks like there > > > is some probe order issue after switching to platform driver? > > > > Yes, this is most likely related to probe ordering based on your DT. > > > > Can you share the failing boot log and DT ? > > non working case, https://paste.debian.net/1312947/ > after reverting, https://paste.debian.net/1312948/ > (attached is the DTB) One potential problem is that drivers/clocksource/renesas-ostm.c is probed early using TIMER_OF_DECLARE() but the timer interrupt is connected to PLIC which is probed late hence the timer probe will fail. We have two possible options: 1) Disable OSTM nodes 2) Improve the OSTM driver to probe like a regular platform device on RISC-V Regards, Anup
Hi Samuel and Anup, On Wed, Apr 3, 2024 at 5:28 PM Samuel Holland <samuel.holland@sifive.com> wrote: > > Hi Prabhakar, > > On 2024-04-03 10:49 AM, Lad, Prabhakar wrote: > > On Wed, Apr 3, 2024 at 3:17 PM Anup Patel <apatel@ventanamicro.com> wrote: > >> > >> On Wed, Apr 3, 2024 at 2:01 PM Lad, Prabhakar > >> <prabhakar.csengg@gmail.com> wrote: > >>> > >>> Hi Anup, > >>> > >>> On Thu, Feb 22, 2024 at 9:41 AM Anup Patel <apatel@ventanamicro.com> wrote: > >>>> > >>>> The PLIC driver does not require very early initialization so convert > >>>> it into a platform driver. > >>>> > >>>> After conversion, the PLIC driver is probed after CPUs are brought-up > >>>> so setup cpuhp state after context handler of all online CPUs are > >>>> initialized otherwise PLIC driver crashes for platforms with multiple > >>>> PLIC instances. > >>>> > >>>> Signed-off-by: Anup Patel <apatel@ventanamicro.com> > >>>> --- > >>>> drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------ > >>>> 1 file changed, 61 insertions(+), 40 deletions(-) > >>>> > >>> This patch seems to have broken things on RZ/Five SoC, after reverting > >>> this patch I get to boot it back again on v6.9-rc2. Looks like there > >>> is some probe order issue after switching to platform driver? > >> > >> Yes, this is most likely related to probe ordering based on your DT. > >> > >> Can you share the failing boot log and DT ? > > > > non working case, https://paste.debian.net/1312947/ > > Looks like you need to add "keep_bootcon" to your kernel command line to get a > full log here. > Thanks for the pointer, that helped me to get to the root cause. > > after reverting, https://paste.debian.net/1312948/ > > (attached is the DTB) > > I don't see anything suspicious between the "riscv-intc" lines and the "Fixed > dependency cycle(s)" lines that looks like it would depend on the PLIC IRQ > domain. Maybe there is some driver that does not handle -EPROBE_DEFER? It's hard > to tell without the full log from the failure case. > The clock required for the PLIC wasnt available during the probe of this driver. This bug got hidden when the PLIC driver was probed earlier in boot where it used an incorrect clock source. Ive created a patch which adds a missing clock for the PLIC. Sorry for the noise! Cheers, Prabhakar
Hi Anup, On Thu, Feb 22, 2024 at 10:41 AM Anup Patel <apatel@ventanamicro.com> wrote: > The PLIC driver does not require very early initialization so convert > it into a platform driver. > > After conversion, the PLIC driver is probed after CPUs are brought-up > so setup cpuhp state after context handler of all online CPUs are > initialized otherwise PLIC driver crashes for platforms with multiple > PLIC instances. > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> Thanks for your patch, which is now commit 8ec99b033147ef3b ("irqchip/sifive-plic: Convert PLIC driver into a platform driver") in v6.9. It looks like this conversion is causing issues on BeagleV Starlight Beta. After updating esmil/visionfive to v6.10-rc1, the kernel usually fails to boot. Adding "earlycon keep_bootcon" reveals these differences: -riscv-plic c000000.interrupt-controller: mapped 133 interrupts with 2 handlers for 4 contexts. +------------[ cut here ]------------ +WARNING: CPU: 0 PID: 1 at drivers/irqchip/irq-sifive-plic.c:373 plic_handle_irq+0xf2/0xf6 +Modules linked in: +CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.10.0-rc1-starlight-02342-g0ba4c76ca0e8-dirty #323 +Hardware name: BeagleV Starlight Beta (DT) +epc : plic_handle_irq+0xf2/0xf6 + ra : generic_handle_domain_irq+0x1c/0x2a +epc : ffffffff8033f994 ra : ffffffff8006319a sp : ffffffc800003f50 + gp : ffffffff812d63f0 tp : ffffffd8800b8000 t0 : 0000000000000040 + t1 : 0000000000000000 t2 : 0000000000001000 s0 : ffffffc800003fa0 + s1 : 0000000000000009 a0 : ffffffd880183600 a1 : 0000000000000009 + a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 + a5 : 0000000000000000 a6 : ffffffd880400248 a7 : ffffffd8804002b8 + s2 : ffffffd9f8fac458 s3 : 0000000000000004 s4 : 0000000000000000 + s5 : ffffffff81293f58 s6 : ffffffd88014ac00 s7 : 0000000000000004 + s8 : ffffffc800013b2c s9 : ffffffc800013b34 s10: 0000000000000006 + s11: ffffffd9f8fc1458 t3 : 0000000000000002 t4 : 0000000000000402 + t5 : ffffffd8800610c0 t6 : ffffffd8800610e0 +status: 0000000200000100 badaddr: ffffffd9f8fac458 cause: 0000000000000003 +[<ffffffff8033f994>] plic_handle_irq+0xf2/0xf6 +[<ffffffff8006319a>] generic_handle_domain_irq+0x1c/0x2a +[<ffffffff8033d7aa>] riscv_intc_irq+0x26/0x60 +[<ffffffff806c92ee>] handle_riscv_irq+0x4a/0x74 +[<ffffffff806d2346>] call_on_irq_stack+0x32/0x40 +---[ end trace 0000000000000000 ]--- +Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004 +Oops [#1] +Modules linked in: +CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 6.10.0-rc1-starlight-02342-g0ba4c76ca0e8-dirty #323 +Hardware name: BeagleV Starlight Beta (DT) +epc : plic_handle_irq+0x66/0xf6 + ra : generic_handle_domain_irq+0x1c/0x2a +epc : ffffffff8033f908 ra : ffffffff8006319a sp : ffffffc800003f50 + gp : ffffffff812d63f0 tp : ffffffd8800b8000 t0 : 0000000000000040 + t1 : 0000000000000000 t2 : 0000000000001000 s0 : ffffffc800003fa0 + s1 : 0000000000000009 a0 : ffffffd880183600 a1 : 0000000000000009 + a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 + a5 : ffffffff8033d72a a6 : ffffffd880400248 a7 : ffffffd8804002b8 + s2 : ffffffd9f8fac458 s3 : 0000000000000004 s4 : ffffffd880183630 + s5 : ffffffff81293f58 s6 : ffffffff812948a0 s7 : ffffffff80c4e660 + s8 : ffffffff80d9eea0 s9 : ffffffc800013b34 s10: 0000000000000006 + s11: ffffffd9f8fc1458 t3 : 0000000000000002 t4 : 0000000000000402 + t5 : ffffffd8800610c0 t6 : ffffffd8800610e0 +status: 0000000200000100 badaddr: 0000000000000004 cause: 000000000000000d +[<ffffffff8033f908>] plic_handle_irq+0x66/0xf6 +[<ffffffff8006319a>] generic_handle_domain_irq+0x1c/0x2a +[<ffffffff8033d7aa>] riscv_intc_irq+0x26/0x60 +[<ffffffff806c92ee>] handle_riscv_irq+0x4a/0x74 +[<ffffffff806d2346>] call_on_irq_stack+0x32/0x40 +Code: 8b93 d70b 5b17 00f5 0b13 fa8b fc17 00a5 0c13 5a0c (a783) 0009 +---[ end trace 0000000000000000 ]--- +Kernel panic - not syncing: Fatal exception in interrupt +SMP: stopping secondary CPUs +---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- As "mapped 133 interrupts" is no longer printed, it looks like an unexpected early interrupt comes in while still in plic_probe(). Esmil suggested reverting all of: a7fb69ffd7ce438a irqchip/sifive-plic: Avoid explicit cpumask allocation on stack abb7205794900503 irqchip/sifive-plic: Improve locking safety by using irqsave/irqrestore 95652106478030f5 irqchip/sifive-plic: Parse number of interrupts and contexts early in plic_probe() a15587277a246c38 irqchip/sifive-plic: Cleanup PLIC contexts upon irqdomain creation failure 6c725f33d67b53f2 irqchip/sifive-plic: Use riscv_get_intc_hwnode() to get parent fwnode b68d0ff529a939a1 irqchip/sifive-plic: Use devm_xyz() for managed allocation 25d862e183d4efeb irqchip/sifive-plic: Use dev_xyz() in-place of pr_xyz() 8ec99b033147ef3b irqchip/sifive-plic: Convert PLIC driver into a platform driver After this, the PLIC is initialized earlier again, and this indeed seems to fix the issue for me. Before, the kernel booted fine in only ca. 1 out of 5 tries. After the reverts, it booted 5/5. Do you know what's going on? Is there a simpler fix? Thanks! Gr{oetje,eeting}s, Geert
Hi Geert, On 2024-05-29 9:22 AM, Geert Uytterhoeven wrote: > Hi Anup, > > On Thu, Feb 22, 2024 at 10:41 AM Anup Patel <apatel@ventanamicro.com> wrote: >> The PLIC driver does not require very early initialization so convert >> it into a platform driver. >> >> After conversion, the PLIC driver is probed after CPUs are brought-up >> so setup cpuhp state after context handler of all online CPUs are >> initialized otherwise PLIC driver crashes for platforms with multiple >> PLIC instances. >> >> Signed-off-by: Anup Patel <apatel@ventanamicro.com> > > Thanks for your patch, which is now commit 8ec99b033147ef3b > ("irqchip/sifive-plic: Convert PLIC driver into a platform > driver") in v6.9. > > It looks like this conversion is causing issues on BeagleV Starlight > Beta. After updating esmil/visionfive to v6.10-rc1, the kernel usually > fails to boot. Adding "earlycon keep_bootcon" reveals these differences: > > -riscv-plic c000000.interrupt-controller: mapped 133 interrupts with 2 > handlers for 4 contexts. > +------------[ cut here ]------------ > +WARNING: CPU: 0 PID: 1 at drivers/irqchip/irq-sifive-plic.c:373 > plic_handle_irq+0xf2/0xf6 > +Modules linked in: > +CPU: 0 PID: 1 Comm: swapper/0 Not tainted > 6.10.0-rc1-starlight-02342-g0ba4c76ca0e8-dirty #323 > +Hardware name: BeagleV Starlight Beta (DT) > +epc : plic_handle_irq+0xf2/0xf6 > + ra : generic_handle_domain_irq+0x1c/0x2a > +epc : ffffffff8033f994 ra : ffffffff8006319a sp : ffffffc800003f50 > + gp : ffffffff812d63f0 tp : ffffffd8800b8000 t0 : 0000000000000040 > + t1 : 0000000000000000 t2 : 0000000000001000 s0 : ffffffc800003fa0 > + s1 : 0000000000000009 a0 : ffffffd880183600 a1 : 0000000000000009 > + a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 > + a5 : 0000000000000000 a6 : ffffffd880400248 a7 : ffffffd8804002b8 > + s2 : ffffffd9f8fac458 s3 : 0000000000000004 s4 : 0000000000000000 > + s5 : ffffffff81293f58 s6 : ffffffd88014ac00 s7 : 0000000000000004 > + s8 : ffffffc800013b2c s9 : ffffffc800013b34 s10: 0000000000000006 > + s11: ffffffd9f8fc1458 t3 : 0000000000000002 t4 : 0000000000000402 > + t5 : ffffffd8800610c0 t6 : ffffffd8800610e0 > +status: 0000000200000100 badaddr: ffffffd9f8fac458 cause: 0000000000000003 > +[<ffffffff8033f994>] plic_handle_irq+0xf2/0xf6 > +[<ffffffff8006319a>] generic_handle_domain_irq+0x1c/0x2a > +[<ffffffff8033d7aa>] riscv_intc_irq+0x26/0x60 > +[<ffffffff806c92ee>] handle_riscv_irq+0x4a/0x74 > +[<ffffffff806d2346>] call_on_irq_stack+0x32/0x40 > +---[ end trace 0000000000000000 ]--- > +Unable to handle kernel NULL pointer dereference at virtual address > 0000000000000004 > +Oops [#1] > +Modules linked in: > +CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W > 6.10.0-rc1-starlight-02342-g0ba4c76ca0e8-dirty #323 > +Hardware name: BeagleV Starlight Beta (DT) > +epc : plic_handle_irq+0x66/0xf6 > + ra : generic_handle_domain_irq+0x1c/0x2a > +epc : ffffffff8033f908 ra : ffffffff8006319a sp : ffffffc800003f50 > + gp : ffffffff812d63f0 tp : ffffffd8800b8000 t0 : 0000000000000040 > + t1 : 0000000000000000 t2 : 0000000000001000 s0 : ffffffc800003fa0 > + s1 : 0000000000000009 a0 : ffffffd880183600 a1 : 0000000000000009 > + a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 > + a5 : ffffffff8033d72a a6 : ffffffd880400248 a7 : ffffffd8804002b8 > + s2 : ffffffd9f8fac458 s3 : 0000000000000004 s4 : ffffffd880183630 > + s5 : ffffffff81293f58 s6 : ffffffff812948a0 s7 : ffffffff80c4e660 > + s8 : ffffffff80d9eea0 s9 : ffffffc800013b34 s10: 0000000000000006 > + s11: ffffffd9f8fc1458 t3 : 0000000000000002 t4 : 0000000000000402 > + t5 : ffffffd8800610c0 t6 : ffffffd8800610e0 > +status: 0000000200000100 badaddr: 0000000000000004 cause: 000000000000000d > +[<ffffffff8033f908>] plic_handle_irq+0x66/0xf6 > +[<ffffffff8006319a>] generic_handle_domain_irq+0x1c/0x2a > +[<ffffffff8033d7aa>] riscv_intc_irq+0x26/0x60 > +[<ffffffff806c92ee>] handle_riscv_irq+0x4a/0x74 > +[<ffffffff806d2346>] call_on_irq_stack+0x32/0x40 > +Code: 8b93 d70b 5b17 00f5 0b13 fa8b fc17 00a5 0c13 5a0c (a783) 0009 > +---[ end trace 0000000000000000 ]--- > +Kernel panic - not syncing: Fatal exception in interrupt > +SMP: stopping secondary CPUs > +---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- > > As "mapped 133 interrupts" is no longer printed, it looks like an > unexpected early interrupt comes in while still in plic_probe(). > > Esmil suggested reverting all of: > a7fb69ffd7ce438a irqchip/sifive-plic: Avoid explicit cpumask allocation on stack > abb7205794900503 irqchip/sifive-plic: Improve locking safety by using > irqsave/irqrestore > 95652106478030f5 irqchip/sifive-plic: Parse number of interrupts and > contexts early in plic_probe() > a15587277a246c38 irqchip/sifive-plic: Cleanup PLIC contexts upon > irqdomain creation failure > 6c725f33d67b53f2 irqchip/sifive-plic: Use riscv_get_intc_hwnode() to > get parent fwnode > b68d0ff529a939a1 irqchip/sifive-plic: Use devm_xyz() for managed allocation > 25d862e183d4efeb irqchip/sifive-plic: Use dev_xyz() in-place of pr_xyz() > 8ec99b033147ef3b irqchip/sifive-plic: Convert PLIC driver into a platform driver > > After this, the PLIC is initialized earlier again, and this indeed > seems to fix the issue for me. > Before, the kernel booted fine in only ca. 1 out of 5 tries. > After the reverts, it booted 5/5. > > Do you know what's going on? Is there a simpler fix? The fact that you hit the warning indicates that plic_handle_irq() was called before handler->present was set. Previously the PLIC driver was probed very early, so it is unlikely that some peripheral already had a pending interrupt. Now, while platform device drivers would not yet be able to request interrupts (because the irqdomain is not registered yet), they could have programmed the hardware in a way that generates an interrupt. If that interrupt was enabled at the PLIC (e.g. by the bootloader), then we could expect plic_handle_irq() to be called as soon as irq_set_chained_handler() is called. So the fix is to not call irq_set_chained_handler() until after the handlers are completely set up. I've sent a patch doing this: https://lore.kernel.org/linux-riscv/20240529215458.937817-1-samuel.holland@sifive.com/ Regards, Samuel
Hi Samuel, On Thu, May 30, 2024 at 12:04 AM Samuel Holland <samuel.holland@sifive.com> wrote: > On 2024-05-29 9:22 AM, Geert Uytterhoeven wrote: > > On Thu, Feb 22, 2024 at 10:41 AM Anup Patel <apatel@ventanamicro.com> wrote: > >> The PLIC driver does not require very early initialization so convert > >> it into a platform driver. > >> > >> After conversion, the PLIC driver is probed after CPUs are brought-up > >> so setup cpuhp state after context handler of all online CPUs are > >> initialized otherwise PLIC driver crashes for platforms with multiple > >> PLIC instances. > >> > >> Signed-off-by: Anup Patel <apatel@ventanamicro.com> > > > > Thanks for your patch, which is now commit 8ec99b033147ef3b > > ("irqchip/sifive-plic: Convert PLIC driver into a platform > > driver") in v6.9. > > > > It looks like this conversion is causing issues on BeagleV Starlight > > Beta. After updating esmil/visionfive to v6.10-rc1, the kernel usually > > fails to boot. Adding "earlycon keep_bootcon" reveals these differences: > > > > -riscv-plic c000000.interrupt-controller: mapped 133 interrupts with 2 > > handlers for 4 contexts. > > +------------[ cut here ]------------ > > +WARNING: CPU: 0 PID: 1 at drivers/irqchip/irq-sifive-plic.c:373 > > +Unable to handle kernel NULL pointer dereference at virtual address > The fact that you hit the warning indicates that plic_handle_irq() was called > before handler->present was set. Previously the PLIC driver was probed very > early, so it is unlikely that some peripheral already had a pending interrupt. > Now, while platform device drivers would not yet be able to request interrupts > (because the irqdomain is not registered yet), they could have programmed the > hardware in a way that generates an interrupt. If that interrupt was enabled at > the PLIC (e.g. by the bootloader), then we could expect plic_handle_irq() to be > called as soon as irq_set_chained_handler() is called. > > So the fix is to not call irq_set_chained_handler() until after the handlers are > completely set up. > > I've sent a patch doing this: > https://lore.kernel.org/linux-riscv/20240529215458.937817-1-samuel.holland@sifive.com/ Thanks, that fixed the issue! Gr{oetje,eeting}s, Geert
Anup Patel wrote: > The PLIC driver does not require very early initialization so convert > it into a platform driver. > > After conversion, the PLIC driver is probed after CPUs are brought-up > so setup cpuhp state after context handler of all online CPUs are > initialized otherwise PLIC driver crashes for platforms with multiple > PLIC instances. > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> Hi Anup, Sorry for the late reply to the mailing list, but ever since 6.9 where this was applied my Allwinner D1 based boards no longer boot. This is the log of my LicheeRV Dock booting plain 6.10-rc4, locking up and then rebooting due to the the watchdog timing out: https://pastebin.com/raw/nsbzgEKW On 6.10-rc4 I can bring the same board to boot by reverting this patch and all patches building on it. Eg.: git revert e306a894bd51 a7fb69ffd7ce abb720579490 \ 956521064780 a15587277a24 6c725f33d67b \ b68d0ff529a9 25d862e183d4 8ec99b033147 After that it boots but run into this separate issue: https://lore.kernel.org/all/DM6PR01MB58047C810DDD5D0AE397CADFF7C22@DM6PR01MB5804.prod.exchangelabs.com/ Samuel: After the reverts above applying this also prevents my board from booting: https://lore.kernel.org/all/20240312192519.1602493-1-samuel.holland@sifive.com/ /Emil > --- > drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------ > 1 file changed, 61 insertions(+), 40 deletions(-) > > diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c > index 5b7bc4fd9517..7400a07fc479 100644 > --- a/drivers/irqchip/irq-sifive-plic.c > +++ b/drivers/irqchip/irq-sifive-plic.c > @@ -64,6 +64,7 @@ > #define PLIC_QUIRK_EDGE_INTERRUPT 0 > > struct plic_priv { > + struct device *dev; > struct cpumask lmask; > struct irq_domain *irqdomain; > void __iomem *regs; > @@ -406,30 +407,50 @@ static int plic_starting_cpu(unsigned int cpu) > return 0; > } > > -static int __init __plic_init(struct device_node *node, > - struct device_node *parent, > - unsigned long plic_quirks) > +static const struct of_device_id plic_match[] = { > + { .compatible = "sifive,plic-1.0.0" }, > + { .compatible = "riscv,plic0" }, > + { .compatible = "andestech,nceplic100", > + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) }, > + { .compatible = "thead,c900-plic", > + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) }, > + {} > +}; > + > +static int plic_probe(struct platform_device *pdev) > { > int error = 0, nr_contexts, nr_handlers = 0, i; > - u32 nr_irqs; > - struct plic_priv *priv; > + struct device *dev = &pdev->dev; > + unsigned long plic_quirks = 0; > struct plic_handler *handler; > + struct plic_priv *priv; > + bool cpuhp_setup; > unsigned int cpu; > + u32 nr_irqs; > + > + if (is_of_node(dev->fwnode)) { > + const struct of_device_id *id; > + > + id = of_match_node(plic_match, to_of_node(dev->fwnode)); > + if (id) > + plic_quirks = (unsigned long)id->data; > + } > > priv = kzalloc(sizeof(*priv), GFP_KERNEL); > if (!priv) > return -ENOMEM; > > + priv->dev = dev; > priv->plic_quirks = plic_quirks; > > - priv->regs = of_iomap(node, 0); > + priv->regs = of_iomap(to_of_node(dev->fwnode), 0); > if (WARN_ON(!priv->regs)) { > error = -EIO; > goto out_free_priv; > } > > error = -EINVAL; > - of_property_read_u32(node, "riscv,ndev", &nr_irqs); > + of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", &nr_irqs); > if (WARN_ON(!nr_irqs)) > goto out_iounmap; > > @@ -439,13 +460,13 @@ static int __init __plic_init(struct device_node *node, > if (!priv->prio_save) > goto out_free_priority_reg; > > - nr_contexts = of_irq_count(node); > + nr_contexts = of_irq_count(to_of_node(dev->fwnode)); > if (WARN_ON(!nr_contexts)) > goto out_free_priority_reg; > > error = -ENOMEM; > - priv->irqdomain = irq_domain_add_linear(node, nr_irqs + 1, > - &plic_irqdomain_ops, priv); > + priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1, > + &plic_irqdomain_ops, priv); > if (WARN_ON(!priv->irqdomain)) > goto out_free_priority_reg; > > @@ -455,7 +476,7 @@ static int __init __plic_init(struct device_node *node, > int cpu; > unsigned long hartid; > > - if (of_irq_parse_one(node, i, &parent)) { > + if (of_irq_parse_one(to_of_node(dev->fwnode), i, &parent)) { > pr_err("failed to parse parent for context %d.\n", i); > continue; > } > @@ -491,7 +512,7 @@ static int __init __plic_init(struct device_node *node, > > /* Find parent domain and register chained handler */ > if (!plic_parent_irq && irq_find_host(parent.np)) { > - plic_parent_irq = irq_of_parse_and_map(node, i); > + plic_parent_irq = irq_of_parse_and_map(to_of_node(dev->fwnode), i); > if (plic_parent_irq) > irq_set_chained_handler(plic_parent_irq, > plic_handle_irq); > @@ -533,20 +554,29 @@ static int __init __plic_init(struct device_node *node, > > /* > * We can have multiple PLIC instances so setup cpuhp state > - * and register syscore operations only when context handler > - * for current/boot CPU is present. > + * and register syscore operations only once after context > + * handlers of all online CPUs are initialized. > */ > - handler = this_cpu_ptr(&plic_handlers); > - if (handler->present && !plic_cpuhp_setup_done) { > - cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, > - "irqchip/sifive/plic:starting", > - plic_starting_cpu, plic_dying_cpu); > - register_syscore_ops(&plic_irq_syscore_ops); > - plic_cpuhp_setup_done = true; > + if (!plic_cpuhp_setup_done) { > + cpuhp_setup = true; > + for_each_online_cpu(cpu) { > + handler = per_cpu_ptr(&plic_handlers, cpu); > + if (!handler->present) { > + cpuhp_setup = false; > + break; > + } > + } > + if (cpuhp_setup) { > + cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, > + "irqchip/sifive/plic:starting", > + plic_starting_cpu, plic_dying_cpu); > + register_syscore_ops(&plic_irq_syscore_ops); > + plic_cpuhp_setup_done = true; > + } > } > > - pr_info("%pOFP: mapped %d interrupts with %d handlers for" > - " %d contexts.\n", node, nr_irqs, nr_handlers, nr_contexts); > + pr_info("%pOFP: mapped %d interrupts with %d handlers for %d contexts.\n", > + to_of_node(dev->fwnode), nr_irqs, nr_handlers, nr_contexts); > return 0; > > out_free_enable_reg: > @@ -563,20 +593,11 @@ static int __init __plic_init(struct device_node *node, > return error; > } > > -static int __init plic_init(struct device_node *node, > - struct device_node *parent) > -{ > - return __plic_init(node, parent, 0); > -} > - > -IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init); > -IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */ > - > -static int __init plic_edge_init(struct device_node *node, > - struct device_node *parent) > -{ > - return __plic_init(node, parent, BIT(PLIC_QUIRK_EDGE_INTERRUPT)); > -} > - > -IRQCHIP_DECLARE(andestech_nceplic100, "andestech,nceplic100", plic_edge_init); > -IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_edge_init); > +static struct platform_driver plic_driver = { > + .driver = { > + .name = "riscv-plic", > + .of_match_table = plic_match, > + }, > + .probe = plic_probe, > +}; > +builtin_platform_driver(plic_driver); > -- > 2.34.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Tue, Jun 18, 2024 at 7:00 PM Emil Renner Berthing <emil.renner.berthing@canonical.com> wrote: > > Anup Patel wrote: > > The PLIC driver does not require very early initialization so convert > > it into a platform driver. > > > > After conversion, the PLIC driver is probed after CPUs are brought-up > > so setup cpuhp state after context handler of all online CPUs are > > initialized otherwise PLIC driver crashes for platforms with multiple > > PLIC instances. > > > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> > > Hi Anup, > > Sorry for the late reply to the mailing list, but ever since 6.9 where this was > applied my Allwinner D1 based boards no longer boot. This is the log of my > LicheeRV Dock booting plain 6.10-rc4, locking up and then rebooting due to the > the watchdog timing out: > > https://pastebin.com/raw/nsbzgEKW > > On 6.10-rc4 I can bring the same board to boot by reverting this patch and all > patches building on it. Eg.: > > git revert e306a894bd51 a7fb69ffd7ce abb720579490 \ > 956521064780 a15587277a24 6c725f33d67b \ > b68d0ff529a9 25d862e183d4 8ec99b033147 Does your board boot with only SBI timer driver enabled ? If yes then probing of Allwinner timer driver needs to be fixed since it depends on PLIC. Regards, Anup > > After that it boots but run into this separate issue: > https://lore.kernel.org/all/DM6PR01MB58047C810DDD5D0AE397CADFF7C22@DM6PR01MB5804.prod.exchangelabs.com/ > > Samuel: After the reverts above applying this also prevents my board from > booting: > https://lore.kernel.org/all/20240312192519.1602493-1-samuel.holland@sifive.com/ > > /Emil > > > --- > > drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------ > > 1 file changed, 61 insertions(+), 40 deletions(-) > > > > diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c > > index 5b7bc4fd9517..7400a07fc479 100644 > > --- a/drivers/irqchip/irq-sifive-plic.c > > +++ b/drivers/irqchip/irq-sifive-plic.c > > @@ -64,6 +64,7 @@ > > #define PLIC_QUIRK_EDGE_INTERRUPT 0 > > > > struct plic_priv { > > + struct device *dev; > > struct cpumask lmask; > > struct irq_domain *irqdomain; > > void __iomem *regs; > > @@ -406,30 +407,50 @@ static int plic_starting_cpu(unsigned int cpu) > > return 0; > > } > > > > -static int __init __plic_init(struct device_node *node, > > - struct device_node *parent, > > - unsigned long plic_quirks) > > +static const struct of_device_id plic_match[] = { > > + { .compatible = "sifive,plic-1.0.0" }, > > + { .compatible = "riscv,plic0" }, > > + { .compatible = "andestech,nceplic100", > > + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) }, > > + { .compatible = "thead,c900-plic", > > + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) }, > > + {} > > +}; > > + > > +static int plic_probe(struct platform_device *pdev) > > { > > int error = 0, nr_contexts, nr_handlers = 0, i; > > - u32 nr_irqs; > > - struct plic_priv *priv; > > + struct device *dev = &pdev->dev; > > + unsigned long plic_quirks = 0; > > struct plic_handler *handler; > > + struct plic_priv *priv; > > + bool cpuhp_setup; > > unsigned int cpu; > > + u32 nr_irqs; > > + > > + if (is_of_node(dev->fwnode)) { > > + const struct of_device_id *id; > > + > > + id = of_match_node(plic_match, to_of_node(dev->fwnode)); > > + if (id) > > + plic_quirks = (unsigned long)id->data; > > + } > > > > priv = kzalloc(sizeof(*priv), GFP_KERNEL); > > if (!priv) > > return -ENOMEM; > > > > + priv->dev = dev; > > priv->plic_quirks = plic_quirks; > > > > - priv->regs = of_iomap(node, 0); > > + priv->regs = of_iomap(to_of_node(dev->fwnode), 0); > > if (WARN_ON(!priv->regs)) { > > error = -EIO; > > goto out_free_priv; > > } > > > > error = -EINVAL; > > - of_property_read_u32(node, "riscv,ndev", &nr_irqs); > > + of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", &nr_irqs); > > if (WARN_ON(!nr_irqs)) > > goto out_iounmap; > > > > @@ -439,13 +460,13 @@ static int __init __plic_init(struct device_node *node, > > if (!priv->prio_save) > > goto out_free_priority_reg; > > > > - nr_contexts = of_irq_count(node); > > + nr_contexts = of_irq_count(to_of_node(dev->fwnode)); > > if (WARN_ON(!nr_contexts)) > > goto out_free_priority_reg; > > > > error = -ENOMEM; > > - priv->irqdomain = irq_domain_add_linear(node, nr_irqs + 1, > > - &plic_irqdomain_ops, priv); > > + priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1, > > + &plic_irqdomain_ops, priv); > > if (WARN_ON(!priv->irqdomain)) > > goto out_free_priority_reg; > > > > @@ -455,7 +476,7 @@ static int __init __plic_init(struct device_node *node, > > int cpu; > > unsigned long hartid; > > > > - if (of_irq_parse_one(node, i, &parent)) { > > + if (of_irq_parse_one(to_of_node(dev->fwnode), i, &parent)) { > > pr_err("failed to parse parent for context %d.\n", i); > > continue; > > } > > @@ -491,7 +512,7 @@ static int __init __plic_init(struct device_node *node, > > > > /* Find parent domain and register chained handler */ > > if (!plic_parent_irq && irq_find_host(parent.np)) { > > - plic_parent_irq = irq_of_parse_and_map(node, i); > > + plic_parent_irq = irq_of_parse_and_map(to_of_node(dev->fwnode), i); > > if (plic_parent_irq) > > irq_set_chained_handler(plic_parent_irq, > > plic_handle_irq); > > @@ -533,20 +554,29 @@ static int __init __plic_init(struct device_node *node, > > > > /* > > * We can have multiple PLIC instances so setup cpuhp state > > - * and register syscore operations only when context handler > > - * for current/boot CPU is present. > > + * and register syscore operations only once after context > > + * handlers of all online CPUs are initialized. > > */ > > - handler = this_cpu_ptr(&plic_handlers); > > - if (handler->present && !plic_cpuhp_setup_done) { > > - cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, > > - "irqchip/sifive/plic:starting", > > - plic_starting_cpu, plic_dying_cpu); > > - register_syscore_ops(&plic_irq_syscore_ops); > > - plic_cpuhp_setup_done = true; > > + if (!plic_cpuhp_setup_done) { > > + cpuhp_setup = true; > > + for_each_online_cpu(cpu) { > > + handler = per_cpu_ptr(&plic_handlers, cpu); > > + if (!handler->present) { > > + cpuhp_setup = false; > > + break; > > + } > > + } > > + if (cpuhp_setup) { > > + cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, > > + "irqchip/sifive/plic:starting", > > + plic_starting_cpu, plic_dying_cpu); > > + register_syscore_ops(&plic_irq_syscore_ops); > > + plic_cpuhp_setup_done = true; > > + } > > } > > > > - pr_info("%pOFP: mapped %d interrupts with %d handlers for" > > - " %d contexts.\n", node, nr_irqs, nr_handlers, nr_contexts); > > + pr_info("%pOFP: mapped %d interrupts with %d handlers for %d contexts.\n", > > + to_of_node(dev->fwnode), nr_irqs, nr_handlers, nr_contexts); > > return 0; > > > > out_free_enable_reg: > > @@ -563,20 +593,11 @@ static int __init __plic_init(struct device_node *node, > > return error; > > } > > > > -static int __init plic_init(struct device_node *node, > > - struct device_node *parent) > > -{ > > - return __plic_init(node, parent, 0); > > -} > > - > > -IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init); > > -IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */ > > - > > -static int __init plic_edge_init(struct device_node *node, > > - struct device_node *parent) > > -{ > > - return __plic_init(node, parent, BIT(PLIC_QUIRK_EDGE_INTERRUPT)); > > -} > > - > > -IRQCHIP_DECLARE(andestech_nceplic100, "andestech,nceplic100", plic_edge_init); > > -IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_edge_init); > > +static struct platform_driver plic_driver = { > > + .driver = { > > + .name = "riscv-plic", > > + .of_match_table = plic_match, > > + }, > > + .probe = plic_probe, > > +}; > > +builtin_platform_driver(plic_driver); > > -- > > 2.34.1 > > > > > > _______________________________________________ > > linux-riscv mailing list > > linux-riscv@lists.infradead.org > > http://lists.infradead.org/mailman/listinfo/linux-riscv
Anup Patel wrote: > On Tue, Jun 18, 2024 at 7:00 PM Emil Renner Berthing > <emil.renner.berthing@canonical.com> wrote: > > > > Anup Patel wrote: > > > The PLIC driver does not require very early initialization so convert > > > it into a platform driver. > > > > > > After conversion, the PLIC driver is probed after CPUs are brought-up > > > so setup cpuhp state after context handler of all online CPUs are > > > initialized otherwise PLIC driver crashes for platforms with multiple > > > PLIC instances. > > > > > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> > > > > Hi Anup, > > > > Sorry for the late reply to the mailing list, but ever since 6.9 where this was > > applied my Allwinner D1 based boards no longer boot. This is the log of my > > LicheeRV Dock booting plain 6.10-rc4, locking up and then rebooting due to the > > the watchdog timing out: > > > > https://pastebin.com/raw/nsbzgEKW > > > > On 6.10-rc4 I can bring the same board to boot by reverting this patch and all > > patches building on it. Eg.: > > > > git revert e306a894bd51 a7fb69ffd7ce abb720579490 \ > > 956521064780 a15587277a24 6c725f33d67b \ > > b68d0ff529a9 25d862e183d4 8ec99b033147 > > Does your board boot with only SBI timer driver enabled ? I'm not 100% sure this is what you mean, but with this change I can disable CONFIG_SUN4I_TIMER: diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index f51bb24bc84c..0143545348eb 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -39,7 +39,6 @@ config ARCH_SUNXI bool "Allwinner sun20i SoCs" depends on MMU && !XIP_KERNEL select ERRATA_THEAD - select SUN4I_TIMER help This enables support for Allwinner sun20i platform hardware, including boards based on the D1 and D1s SoCs. But unfortunately the board still doesn't boot: https://pastebin.com/raw/AwRxcfeu /Emil
On Wed, Jun 19, 2024 at 11:16 PM Emil Renner Berthing <emil.renner.berthing@canonical.com> wrote: > > Anup Patel wrote: > > On Tue, Jun 18, 2024 at 7:00 PM Emil Renner Berthing > > <emil.renner.berthing@canonical.com> wrote: > > > > > > Anup Patel wrote: > > > > The PLIC driver does not require very early initialization so convert > > > > it into a platform driver. > > > > > > > > After conversion, the PLIC driver is probed after CPUs are brought-up > > > > so setup cpuhp state after context handler of all online CPUs are > > > > initialized otherwise PLIC driver crashes for platforms with multiple > > > > PLIC instances. > > > > > > > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> > > > > > > Hi Anup, > > > > > > Sorry for the late reply to the mailing list, but ever since 6.9 where this was > > > applied my Allwinner D1 based boards no longer boot. This is the log of my > > > LicheeRV Dock booting plain 6.10-rc4, locking up and then rebooting due to the > > > the watchdog timing out: > > > > > > https://pastebin.com/raw/nsbzgEKW > > > > > > On 6.10-rc4 I can bring the same board to boot by reverting this patch and all > > > patches building on it. Eg.: > > > > > > git revert e306a894bd51 a7fb69ffd7ce abb720579490 \ > > > 956521064780 a15587277a24 6c725f33d67b \ > > > b68d0ff529a9 25d862e183d4 8ec99b033147 > > > > Does your board boot with only SBI timer driver enabled ? > > I'm not 100% sure this is what you mean, but with this change I can disable > CONFIG_SUN4I_TIMER: > > diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs > index f51bb24bc84c..0143545348eb 100644 > --- a/arch/riscv/Kconfig.socs > +++ b/arch/riscv/Kconfig.socs > @@ -39,7 +39,6 @@ config ARCH_SUNXI > bool "Allwinner sun20i SoCs" > depends on MMU && !XIP_KERNEL > select ERRATA_THEAD > - select SUN4I_TIMER > help > This enables support for Allwinner sun20i platform hardware, > including boards based on the D1 and D1s SoCs. > > > But unfortunately the board still doesn't boot: > https://pastebin.com/raw/AwRxcfeu I think we should enable debug prints in DD core and see which device is not getting probed due to lack of a provider. Just add "#define DEBUG" at the top in drivers/base/core.c and boot again with "loglevel=8" kernel parameter (along with the above change). Regards, Anup
Anup Patel wrote: > On Wed, Jun 19, 2024 at 11:16 PM Emil Renner Berthing > <emil.renner.berthing@canonical.com> wrote: > > > > Anup Patel wrote: > > > On Tue, Jun 18, 2024 at 7:00 PM Emil Renner Berthing > > > <emil.renner.berthing@canonical.com> wrote: > > > > > > > > Anup Patel wrote: > > > > > The PLIC driver does not require very early initialization so convert > > > > > it into a platform driver. > > > > > > > > > > After conversion, the PLIC driver is probed after CPUs are brought-up > > > > > so setup cpuhp state after context handler of all online CPUs are > > > > > initialized otherwise PLIC driver crashes for platforms with multiple > > > > > PLIC instances. > > > > > > > > > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> > > > > > > > > Hi Anup, > > > > > > > > Sorry for the late reply to the mailing list, but ever since 6.9 where this was > > > > applied my Allwinner D1 based boards no longer boot. This is the log of my > > > > LicheeRV Dock booting plain 6.10-rc4, locking up and then rebooting due to the > > > > the watchdog timing out: > > > > > > > > https://pastebin.com/raw/nsbzgEKW > > > > > > > > On 6.10-rc4 I can bring the same board to boot by reverting this patch and all > > > > patches building on it. Eg.: > > > > > > > > git revert e306a894bd51 a7fb69ffd7ce abb720579490 \ > > > > 956521064780 a15587277a24 6c725f33d67b \ > > > > b68d0ff529a9 25d862e183d4 8ec99b033147 > > > > > > Does your board boot with only SBI timer driver enabled ? > > > > I'm not 100% sure this is what you mean, but with this change I can disable > > CONFIG_SUN4I_TIMER: > > > > diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs > > index f51bb24bc84c..0143545348eb 100644 > > --- a/arch/riscv/Kconfig.socs > > +++ b/arch/riscv/Kconfig.socs > > @@ -39,7 +39,6 @@ config ARCH_SUNXI > > bool "Allwinner sun20i SoCs" > > depends on MMU && !XIP_KERNEL > > select ERRATA_THEAD > > - select SUN4I_TIMER > > help > > This enables support for Allwinner sun20i platform hardware, > > including boards based on the D1 and D1s SoCs. > > > > > > But unfortunately the board still doesn't boot: > > https://pastebin.com/raw/AwRxcfeu > > I think we should enable debug prints in DD core and see > which device is not getting probed due to lack of a provider. > > Just add "#define DEBUG" at the top in drivers/base/core.c > and boot again with "loglevel=8" kernel parameter (along with > the above change). With the above changes this is what I get: https://pastebin.com/raw/JfRrEahT /Emil
On Thu, Jun 20, 2024 at 6:40 PM Emil Renner Berthing <emil.renner.berthing@canonical.com> wrote: > > Anup Patel wrote: > > On Wed, Jun 19, 2024 at 11:16 PM Emil Renner Berthing > > <emil.renner.berthing@canonical.com> wrote: > > > > > > Anup Patel wrote: > > > > On Tue, Jun 18, 2024 at 7:00 PM Emil Renner Berthing > > > > <emil.renner.berthing@canonical.com> wrote: > > > > > > > > > > Anup Patel wrote: > > > > > > The PLIC driver does not require very early initialization so convert > > > > > > it into a platform driver. > > > > > > > > > > > > After conversion, the PLIC driver is probed after CPUs are brought-up > > > > > > so setup cpuhp state after context handler of all online CPUs are > > > > > > initialized otherwise PLIC driver crashes for platforms with multiple > > > > > > PLIC instances. > > > > > > > > > > > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> > > > > > > > > > > Hi Anup, > > > > > > > > > > Sorry for the late reply to the mailing list, but ever since 6.9 where this was > > > > > applied my Allwinner D1 based boards no longer boot. This is the log of my > > > > > LicheeRV Dock booting plain 6.10-rc4, locking up and then rebooting due to the > > > > > the watchdog timing out: > > > > > > > > > > https://pastebin.com/raw/nsbzgEKW > > > > > > > > > > On 6.10-rc4 I can bring the same board to boot by reverting this patch and all > > > > > patches building on it. Eg.: > > > > > > > > > > git revert e306a894bd51 a7fb69ffd7ce abb720579490 \ > > > > > 956521064780 a15587277a24 6c725f33d67b \ > > > > > b68d0ff529a9 25d862e183d4 8ec99b033147 > > > > > > > > Does your board boot with only SBI timer driver enabled ? > > > > > > I'm not 100% sure this is what you mean, but with this change I can disable > > > CONFIG_SUN4I_TIMER: > > > > > > diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs > > > index f51bb24bc84c..0143545348eb 100644 > > > --- a/arch/riscv/Kconfig.socs > > > +++ b/arch/riscv/Kconfig.socs > > > @@ -39,7 +39,6 @@ config ARCH_SUNXI > > > bool "Allwinner sun20i SoCs" > > > depends on MMU && !XIP_KERNEL > > > select ERRATA_THEAD > > > - select SUN4I_TIMER > > > help > > > This enables support for Allwinner sun20i platform hardware, > > > including boards based on the D1 and D1s SoCs. > > > > > > > > > But unfortunately the board still doesn't boot: > > > https://pastebin.com/raw/AwRxcfeu > > > > I think we should enable debug prints in DD core and see > > which device is not getting probed due to lack of a provider. > > > > Just add "#define DEBUG" at the top in drivers/base/core.c > > and boot again with "loglevel=8" kernel parameter (along with > > the above change). > > With the above changes this is what I get: > https://pastebin.com/raw/JfRrEahT You should see prints like below which show producer consumer relation: [ 0.214589] /soc/rtc@101000 Linked as a fwnode consumer to /soc/plic@c000000 [ 0.214966] /soc/serial@10000000 Linked as a fwnode consumer to /soc/plic@c000000 [ 0.215443] /soc/virtio_mmio@10008000 Linked as a fwnode consumer to /soc/plic@c000000 [ 0.216041] /soc/virtio_mmio@10007000 Linked as a fwnode consumer to /soc/plic@c000000 [ 0.216482] /soc/virtio_mmio@10006000 Linked as a fwnode consumer to /soc/plic@c000000 [ 0.216868] /soc/virtio_mmio@10005000 Linked as a fwnode consumer to /soc/plic@c000000 [ 0.217477] /soc/virtio_mmio@10004000 Linked as a fwnode consumer to /soc/plic@c000000 [ 0.217949] /soc/virtio_mmio@10003000 Linked as a fwnode consumer to /soc/plic@c000000 [ 0.218595] /soc/virtio_mmio@10002000 Linked as a fwnode consumer to /soc/plic@c000000 [ 0.219280] /soc/virtio_mmio@10001000 Linked as a fwnode consumer to /soc/plic@c000000 [ 0.219908] /soc/plic@c000000 Linked as a fwnode consumer to /cpus/cpu@0/interrupt-controller [ 0.220800] /soc/plic@c000000 Linked as a fwnode consumer to /cpus/cpu@1/interrupt-controller [ 0.221323] /soc/plic@c000000 Linked as a fwnode consumer to /cpus/cpu@2/interrupt-controller [ 0.221838] /soc/plic@c000000 Linked as a fwnode consumer to /cpus/cpu@3/interrupt-controller [ 0.222347] /soc/clint@2000000 Linked as a fwnode consumer to /cpus/cpu@0/interrupt-controller [ 0.222769] /soc/clint@2000000 Linked as a fwnode consumer to /cpus/cpu@1/interrupt-controller [ 0.223864] /soc/clint@2000000 Linked as a fwnode consumer to /cpus/cpu@2/interrupt-controller [ 0.224370] /soc/clint@2000000 Linked as a fwnode consumer to /cpus/cpu@3/interrupt-controller [ 0.225217] /soc/pci@30000000 Linked as a fwnode consumer to /soc/plic@c000000 To get further prints, I suggest enabling SBI_HVC console and use "console=hvc0" as kernel parameter. Regards, Anup
On Thu, Jun 20, 2024 at 08:38:09PM +0530, Anup Patel wrote: > On Thu, Jun 20, 2024 at 6:40 PM Emil Renner Berthing > <emil.renner.berthing@canonical.com> wrote: > > > > Anup Patel wrote: > > > On Wed, Jun 19, 2024 at 11:16 PM Emil Renner Berthing > > > <emil.renner.berthing@canonical.com> wrote: > > > > > > > > Anup Patel wrote: > > > > > On Tue, Jun 18, 2024 at 7:00 PM Emil Renner Berthing > > > > > <emil.renner.berthing@canonical.com> wrote: > > > > > > > > > > > > Anup Patel wrote: > > > > > > > The PLIC driver does not require very early initialization so convert > > > > > > > it into a platform driver. > > > > > > > > > > > > > > After conversion, the PLIC driver is probed after CPUs are brought-up > > > > > > > so setup cpuhp state after context handler of all online CPUs are > > > > > > > initialized otherwise PLIC driver crashes for platforms with multiple > > > > > > > PLIC instances. > > > > > > > > > > > > > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> > > > > > > > > > > > > Hi Anup, > > > > > > > > > > > > Sorry for the late reply to the mailing list, but ever since 6.9 where this was > > > > > > applied my Allwinner D1 based boards no longer boot. This is the log of my > > > > > > LicheeRV Dock booting plain 6.10-rc4, locking up and then rebooting due to the > > > > > > the watchdog timing out: > > > > > > > > > > > > https://pastebin.com/raw/nsbzgEKW > > > > > > > > > > > > On 6.10-rc4 I can bring the same board to boot by reverting this patch and all > > > > > > patches building on it. Eg.: > > > > > > > > > > > > git revert e306a894bd51 a7fb69ffd7ce abb720579490 \ > > > > > > 956521064780 a15587277a24 6c725f33d67b \ > > > > > > b68d0ff529a9 25d862e183d4 8ec99b033147 > > > > > > > > > > Does your board boot with only SBI timer driver enabled ? > > > > > > > > I'm not 100% sure this is what you mean, but with this change I can disable > > > > CONFIG_SUN4I_TIMER: > > > > > > > > diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs > > > > index f51bb24bc84c..0143545348eb 100644 > > > > --- a/arch/riscv/Kconfig.socs > > > > +++ b/arch/riscv/Kconfig.socs > > > > @@ -39,7 +39,6 @@ config ARCH_SUNXI > > > > bool "Allwinner sun20i SoCs" > > > > depends on MMU && !XIP_KERNEL > > > > select ERRATA_THEAD > > > > - select SUN4I_TIMER > > > > help > > > > This enables support for Allwinner sun20i platform hardware, > > > > including boards based on the D1 and D1s SoCs. > > > > > > > > > > > > But unfortunately the board still doesn't boot: > > > > https://pastebin.com/raw/AwRxcfeu > > > > > > I think we should enable debug prints in DD core and see > > > which device is not getting probed due to lack of a provider. > > > > > > Just add "#define DEBUG" at the top in drivers/base/core.c > > > and boot again with "loglevel=8" kernel parameter (along with > > > the above change). > > > > With the above changes this is what I get: > > https://pastebin.com/raw/JfRrEahT > > You should see prints like below which show producer consumer > relation: > > [ 0.214589] /soc/rtc@101000 Linked as a fwnode consumer to /soc/plic@c000000 > [ 0.214966] /soc/serial@10000000 Linked as a fwnode consumer to > /soc/plic@c000000 > [ 0.215443] /soc/virtio_mmio@10008000 Linked as a fwnode consumer > to /soc/plic@c000000 > [ 0.216041] /soc/virtio_mmio@10007000 Linked as a fwnode consumer > to /soc/plic@c000000 > [ 0.216482] /soc/virtio_mmio@10006000 Linked as a fwnode consumer > to /soc/plic@c000000 > [ 0.216868] /soc/virtio_mmio@10005000 Linked as a fwnode consumer > to /soc/plic@c000000 > [ 0.217477] /soc/virtio_mmio@10004000 Linked as a fwnode consumer > to /soc/plic@c000000 > [ 0.217949] /soc/virtio_mmio@10003000 Linked as a fwnode consumer > to /soc/plic@c000000 > [ 0.218595] /soc/virtio_mmio@10002000 Linked as a fwnode consumer > to /soc/plic@c000000 > [ 0.219280] /soc/virtio_mmio@10001000 Linked as a fwnode consumer > to /soc/plic@c000000 > [ 0.219908] /soc/plic@c000000 Linked as a fwnode consumer to > /cpus/cpu@0/interrupt-controller > [ 0.220800] /soc/plic@c000000 Linked as a fwnode consumer to > /cpus/cpu@1/interrupt-controller > [ 0.221323] /soc/plic@c000000 Linked as a fwnode consumer to > /cpus/cpu@2/interrupt-controller > [ 0.221838] /soc/plic@c000000 Linked as a fwnode consumer to > /cpus/cpu@3/interrupt-controller > [ 0.222347] /soc/clint@2000000 Linked as a fwnode consumer to > /cpus/cpu@0/interrupt-controller > [ 0.222769] /soc/clint@2000000 Linked as a fwnode consumer to > /cpus/cpu@1/interrupt-controller > [ 0.223864] /soc/clint@2000000 Linked as a fwnode consumer to > /cpus/cpu@2/interrupt-controller > [ 0.224370] /soc/clint@2000000 Linked as a fwnode consumer to > /cpus/cpu@3/interrupt-controller > [ 0.225217] /soc/pci@30000000 Linked as a fwnode consumer to > /soc/plic@c000000 > > To get further prints, I suggest enabling SBI_HVC console and use > "console=hvc0" as kernel parameter. > > Regards, > Anup I did some follow-up research into this. The hanging after "cpuidle: using governor menu" is due to being stuck inside of check_unaligned_access(). Specifically, there is a check that appears to be waiting for jiffies to start ticking, but they never do: while ((now = jiffies) == start_jiffies) cpu_relax(); `jiffies` is fixed at 0xfffedb08, effectively making this a while(true) loop. This happens with and without SUN4I_TIMER. This hang unfortunately happens before the "Linked as a fwnode consumer" print statements start. After bypassing this with the configs CONFIG_NONPORTABLE=y CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS=y A new warning is tripped: [ 1.015134] No max_rate, ignoring min_rate of clock 9 - pll-video0 [ 1.021322] WARNING: CPU: 0 PID: 1 at drivers/clk/sunxi-ng/ccu_common.c:155 sunxi_ccu_probe+0x144/0x1a2 [ 1.021351] Modules linked in: [ 1.021360] CPU: 0 PID: 1 Comm: swapper Tainted: G W 6.10.0-rc6 #1 [ 1.021372] Hardware name: Allwinner D1 Nezha (changed) (DT) [ 1.021377] epc : sunxi_ccu_probe+0x144/0x1a2 [ 1.021386] ra : sunxi_ccu_probe+0x144/0x1a2 [ 1.021397] epc : ffffffff80405a50 ra : ffffffff80405a50 sp : ffffffc80000bb80 [ 1.021406] gp : ffffffff815f69c8 tp : ffffffd801df8000 t0 : 6100000000000000 [ 1.021414] t1 : 000000000000004e t2 : 61725f78616d206f s0 : ffffffc80000bbe0 [ 1.021422] s1 : ffffffff81537498 a0 : 0000000000000036 a1 : 000000000000054b [ 1.021430] a2 : 00000000ffffefff a3 : 0000000000000000 a4 : ffffffff8141f628 [ 1.021438] a5 : 0000000000000000 a6 : 0000000000000000 a7 : 000000004442434e [ 1.021446] s2 : 0000000000000009 s3 : 0000000000000000 s4 : ffffffd801dc9010 [ 1.021453] s5 : ffffffd802428a00 s6 : ffffffd83ffdcf20 s7 : ffffffc800015000 [ 1.021462] s8 : ffffffff80e55360 s9 : ffffffff81034598 s10: 0000000000000000 [ 1.021470] s11: 0000000000000000 t3 : ffffffff8160a257 t4 : ffffffff8160a257 [ 1.021478] t5 : ffffffff8160a258 t6 : ffffffc80000b990 [ 1.021485] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003 [ 1.021493] [<ffffffff80405a50>] sunxi_ccu_probe+0x144/0x1a2 [ 1.021510] [<ffffffff80405af6>] devm_sunxi_ccu_probe+0x48/0x82 [ 1.021524] [<ffffffff80409020>] sun20i_d1_ccu_probe+0xba/0xfa [ 1.021546] [<ffffffff804a8b40>] platform_probe+0x4e/0xa6 [ 1.021562] [<ffffffff808d81ee>] really_probe+0x10a/0x2dc [ 1.021581] [<ffffffff808d8472>] __driver_probe_device.part.0+0xb2/0xe8 [ 1.021597] [<ffffffff804a67aa>] driver_probe_device+0x7a/0xca [ 1.021621] [<ffffffff804a6912>] __driver_attach+0x52/0x164 [ 1.021638] [<ffffffff804a4c7a>] bus_for_each_dev+0x56/0x8c [ 1.021656] [<ffffffff804a6382>] driver_attach+0x1a/0x22 [ 1.021673] [<ffffffff804a5c18>] bus_add_driver+0xea/0x1d8 [ 1.021690] [<ffffffff804a7852>] driver_register+0x3e/0xd8 [ 1.021709] [<ffffffff804a8826>] __platform_driver_register+0x1c/0x24 Emil[ 1.021725] [<ffffffff80a17488>] sun20i_d1_ccu_driver_init+0x1a/0x22 [ 1.021746] [<ffffffff800026ae>] do_one_initcall+0x46/0x1be [ 1.021762] [<ffffffff80a00ef2>] kernel_init_freeable+0x1c6/0x220 [ 1.021791] [<ffffffff808e0b46>] kernel_init+0x1e/0x112 Linked as a fwnode consumer[ 1.021807] [<ffffffff808e7632>] ret_from_fork+0xe/0x1c The warning is not fatal, so execution continues until hanging at [ 2.110919] printk: legacy console [ttyS0] disabled [ 2.136911] 2500000.serial: ttyS0 at MMIO 0x2500000 (irq = 205, base_baud = 1500000) is a 16550A�[ 2.145674] printk: legacy console [ttyS0] enabled [ 2.145674] printk: legacy console [ttyS0] enabled [ 2.155095] printk: legacy bootconsole [sbi0] disabled [ 2.155095] printk: legacy bootconsole [sbi0] disabled I have not been able to discover why it hangs here. The clock is somehow relying on the previous behavior of this PLIC driver. - Charlie > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Mon, Jul 08, 2024 at 07:15:51PM -0700, Charlie Jenkins wrote: > CONFIG_NONPORTABLE=y > CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS=y > > A new warning is tripped: > > [ 1.015134] No max_rate, ignoring min_rate of clock 9 - pll-video0 > [ 1.021322] WARNING: CPU: 0 PID: 1 at drivers/clk/sunxi-ng/ccu_common.c:155 sunxi_ccu_probe+0x144/0x1a2 > [ 1.021351] Modules linked in: > [ 1.021360] CPU: 0 PID: 1 Comm: swapper Tainted: G W 6.10.0-rc6 #1 > [ 1.021372] Hardware name: Allwinner D1 Nezha (changed) (DT) > [ 1.021377] epc : sunxi_ccu_probe+0x144/0x1a2 > [ 1.021386] ra : sunxi_ccu_probe+0x144/0x1a2 > [ 1.021397] epc : ffffffff80405a50 ra : ffffffff80405a50 sp : ffffffc80000bb80 > [ 1.021406] gp : ffffffff815f69c8 tp : ffffffd801df8000 t0 : 6100000000000000 > [ 1.021414] t1 : 000000000000004e t2 : 61725f78616d206f s0 : ffffffc80000bbe0 > [ 1.021422] s1 : ffffffff81537498 a0 : 0000000000000036 a1 : 000000000000054b > [ 1.021430] a2 : 00000000ffffefff a3 : 0000000000000000 a4 : ffffffff8141f628 > [ 1.021438] a5 : 0000000000000000 a6 : 0000000000000000 a7 : 000000004442434e > [ 1.021446] s2 : 0000000000000009 s3 : 0000000000000000 s4 : ffffffd801dc9010 > [ 1.021453] s5 : ffffffd802428a00 s6 : ffffffd83ffdcf20 s7 : ffffffc800015000 > [ 1.021462] s8 : ffffffff80e55360 s9 : ffffffff81034598 s10: 0000000000000000 > [ 1.021470] s11: 0000000000000000 t3 : ffffffff8160a257 t4 : ffffffff8160a257 > [ 1.021478] t5 : ffffffff8160a258 t6 : ffffffc80000b990 > [ 1.021485] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003 > [ 1.021493] [<ffffffff80405a50>] sunxi_ccu_probe+0x144/0x1a2 > [ 1.021510] [<ffffffff80405af6>] devm_sunxi_ccu_probe+0x48/0x82 > [ 1.021524] [<ffffffff80409020>] sun20i_d1_ccu_probe+0xba/0xfa > [ 1.021546] [<ffffffff804a8b40>] platform_probe+0x4e/0xa6 > [ 1.021562] [<ffffffff808d81ee>] really_probe+0x10a/0x2dc > [ 1.021581] [<ffffffff808d8472>] __driver_probe_device.part.0+0xb2/0xe8 > [ 1.021597] [<ffffffff804a67aa>] driver_probe_device+0x7a/0xca > [ 1.021621] [<ffffffff804a6912>] __driver_attach+0x52/0x164 > [ 1.021638] [<ffffffff804a4c7a>] bus_for_each_dev+0x56/0x8c > [ 1.021656] [<ffffffff804a6382>] driver_attach+0x1a/0x22 > [ 1.021673] [<ffffffff804a5c18>] bus_add_driver+0xea/0x1d8 > [ 1.021690] [<ffffffff804a7852>] driver_register+0x3e/0xd8 > [ 1.021709] [<ffffffff804a8826>] __platform_driver_register+0x1c/0x24 > Emil[ 1.021725] [<ffffffff80a17488>] sun20i_d1_ccu_driver_init+0x1a/0x22 > [ 1.021746] [<ffffffff800026ae>] do_one_initcall+0x46/0x1be > [ 1.021762] [<ffffffff80a00ef2>] kernel_init_freeable+0x1c6/0x220 > [ 1.021791] [<ffffffff808e0b46>] kernel_init+0x1e/0x112 > Linked as a fwnode consumer[ 1.021807] [<ffffffff808e7632>] ret_from_fork+0xe/0x1c > > The warning is not fatal, so execution continues until hanging at > > [ 2.110919] printk: legacy console [ttyS0] disabled > [ 2.136911] 2500000.serial: ttyS0 at MMIO 0x2500000 (irq = 205, base_baud = 1500000) is a 16550A�[ 2.145674] printk: legacy console [ttyS0] enabled > [ 2.145674] printk: legacy console [ttyS0] enabled > [ 2.155095] printk: legacy bootconsole [sbi0] disabled > [ 2.155095] printk: legacy bootconsole [sbi0] disabled > > I have not been able to discover why it hangs here. FWIW, that's probably because the CCU is the clock driver providing the clock for the uart, so when the sbi console goes away you lose output cos the uart driver cannot get the right rate for its input. You'd probably get further if you set keep_bootcon in your cmdline - but realistically the clock driver failing to probe is gonna have a load of knock on effects that it's probably enough to just have the failure you link here.
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 5b7bc4fd9517..7400a07fc479 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -64,6 +64,7 @@ #define PLIC_QUIRK_EDGE_INTERRUPT 0 struct plic_priv { + struct device *dev; struct cpumask lmask; struct irq_domain *irqdomain; void __iomem *regs; @@ -406,30 +407,50 @@ static int plic_starting_cpu(unsigned int cpu) return 0; } -static int __init __plic_init(struct device_node *node, - struct device_node *parent, - unsigned long plic_quirks) +static const struct of_device_id plic_match[] = { + { .compatible = "sifive,plic-1.0.0" }, + { .compatible = "riscv,plic0" }, + { .compatible = "andestech,nceplic100", + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) }, + { .compatible = "thead,c900-plic", + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) }, + {} +}; + +static int plic_probe(struct platform_device *pdev) { int error = 0, nr_contexts, nr_handlers = 0, i; - u32 nr_irqs; - struct plic_priv *priv; + struct device *dev = &pdev->dev; + unsigned long plic_quirks = 0; struct plic_handler *handler; + struct plic_priv *priv; + bool cpuhp_setup; unsigned int cpu; + u32 nr_irqs; + + if (is_of_node(dev->fwnode)) { + const struct of_device_id *id; + + id = of_match_node(plic_match, to_of_node(dev->fwnode)); + if (id) + plic_quirks = (unsigned long)id->data; + } priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + priv->dev = dev; priv->plic_quirks = plic_quirks; - priv->regs = of_iomap(node, 0); + priv->regs = of_iomap(to_of_node(dev->fwnode), 0); if (WARN_ON(!priv->regs)) { error = -EIO; goto out_free_priv; } error = -EINVAL; - of_property_read_u32(node, "riscv,ndev", &nr_irqs); + of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", &nr_irqs); if (WARN_ON(!nr_irqs)) goto out_iounmap; @@ -439,13 +460,13 @@ static int __init __plic_init(struct device_node *node, if (!priv->prio_save) goto out_free_priority_reg; - nr_contexts = of_irq_count(node); + nr_contexts = of_irq_count(to_of_node(dev->fwnode)); if (WARN_ON(!nr_contexts)) goto out_free_priority_reg; error = -ENOMEM; - priv->irqdomain = irq_domain_add_linear(node, nr_irqs + 1, - &plic_irqdomain_ops, priv); + priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1, + &plic_irqdomain_ops, priv); if (WARN_ON(!priv->irqdomain)) goto out_free_priority_reg; @@ -455,7 +476,7 @@ static int __init __plic_init(struct device_node *node, int cpu; unsigned long hartid; - if (of_irq_parse_one(node, i, &parent)) { + if (of_irq_parse_one(to_of_node(dev->fwnode), i, &parent)) { pr_err("failed to parse parent for context %d.\n", i); continue; } @@ -491,7 +512,7 @@ static int __init __plic_init(struct device_node *node, /* Find parent domain and register chained handler */ if (!plic_parent_irq && irq_find_host(parent.np)) { - plic_parent_irq = irq_of_parse_and_map(node, i); + plic_parent_irq = irq_of_parse_and_map(to_of_node(dev->fwnode), i); if (plic_parent_irq) irq_set_chained_handler(plic_parent_irq, plic_handle_irq); @@ -533,20 +554,29 @@ static int __init __plic_init(struct device_node *node, /* * We can have multiple PLIC instances so setup cpuhp state - * and register syscore operations only when context handler - * for current/boot CPU is present. + * and register syscore operations only once after context + * handlers of all online CPUs are initialized. */ - handler = this_cpu_ptr(&plic_handlers); - if (handler->present && !plic_cpuhp_setup_done) { - cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, - "irqchip/sifive/plic:starting", - plic_starting_cpu, plic_dying_cpu); - register_syscore_ops(&plic_irq_syscore_ops); - plic_cpuhp_setup_done = true; + if (!plic_cpuhp_setup_done) { + cpuhp_setup = true; + for_each_online_cpu(cpu) { + handler = per_cpu_ptr(&plic_handlers, cpu); + if (!handler->present) { + cpuhp_setup = false; + break; + } + } + if (cpuhp_setup) { + cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, + "irqchip/sifive/plic:starting", + plic_starting_cpu, plic_dying_cpu); + register_syscore_ops(&plic_irq_syscore_ops); + plic_cpuhp_setup_done = true; + } } - pr_info("%pOFP: mapped %d interrupts with %d handlers for" - " %d contexts.\n", node, nr_irqs, nr_handlers, nr_contexts); + pr_info("%pOFP: mapped %d interrupts with %d handlers for %d contexts.\n", + to_of_node(dev->fwnode), nr_irqs, nr_handlers, nr_contexts); return 0; out_free_enable_reg: @@ -563,20 +593,11 @@ static int __init __plic_init(struct device_node *node, return error; } -static int __init plic_init(struct device_node *node, - struct device_node *parent) -{ - return __plic_init(node, parent, 0); -} - -IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init); -IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */ - -static int __init plic_edge_init(struct device_node *node, - struct device_node *parent) -{ - return __plic_init(node, parent, BIT(PLIC_QUIRK_EDGE_INTERRUPT)); -} - -IRQCHIP_DECLARE(andestech_nceplic100, "andestech,nceplic100", plic_edge_init); -IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_edge_init); +static struct platform_driver plic_driver = { + .driver = { + .name = "riscv-plic", + .of_match_table = plic_match, + }, + .probe = plic_probe, +}; +builtin_platform_driver(plic_driver);
The PLIC driver does not require very early initialization so convert it into a platform driver. After conversion, the PLIC driver is probed after CPUs are brought-up so setup cpuhp state after context handler of all online CPUs are initialized otherwise PLIC driver crashes for platforms with multiple PLIC instances. Signed-off-by: Anup Patel <apatel@ventanamicro.com> --- drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------ 1 file changed, 61 insertions(+), 40 deletions(-)