Message ID | 08d84ce3-5345-dc69-bbc3-37372a3df22b@sigmadesigns.com (mailing list archive) |
---|---|
State | New, archived |
Delegated to: | Bjorn Helgaas |
Headers | show |
On 14/06/2017 11:00, Marc Gonzalez wrote: > The MSI controller in Tango supports 256 message-signaled interrupts, > and a single doorbell address. > > Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com> > --- > Changes from v6 to v7 > o Call spin_lock() not spin_lock_irqsave() in the ISR > --- > drivers/pci/host/pcie-tango.c | 225 ++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 225 insertions(+) Someone on IRC suggested testing the driver with LOCKDEP. If I understand the warning below correctly, I am not supposed to call irq_domain_set_info() while holding used_msi_lock? NB: in probe, my driver calls add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); This should not break LOCKDEP analysis, right? Regards. [ 0.685615] ====================================================== [ 0.685621] [ INFO: possible circular locking dependency detected ] [ 0.685632] 4.9.20-1-rc3 #2 Tainted: G I [ 0.685638] ------------------------------------------------------- [ 0.685644] swapper/0/1 is trying to acquire lock: [ 0.685650] (&(&pcie->used_msi_lock)->rlock){......}, at: [<c0352e5c>] update_msi_enable+0x2c/0x58 [ 0.685692] but task is already holding lock: [ 0.685698] (&irq_desc_lock_class){......}, at: [<c017586c>] __setup_irq+0xa4/0x5f0 [ 0.685718] which lock already depends on the new lock. [ 0.685718] [ 0.685725] [ 0.685725] the existing dependency chain (in reverse order) is: [ 0.685731] [ 0.685731] -> #1 (&irq_desc_lock_class){......}: [ 0.685758] __irq_get_desc_lock+0x54/0x94 [ 0.685768] __irq_set_handler+0x24/0x54 [ 0.685778] irq_domain_set_info+0x38/0x4c [ 0.685786] tango_irq_domain_alloc+0x98/0xbc [ 0.685795] msi_domain_alloc+0x68/0x130 [ 0.685802] __irq_domain_alloc_irqs+0x11c/0x2ac [ 0.685809] msi_domain_alloc_irqs+0x78/0x188 [ 0.685816] __pci_enable_msi_range+0x200/0x37c [ 0.685824] pcie_port_device_register+0x3cc/0x494 [ 0.685831] pcie_portdrv_probe+0x2c/0xa4 [ 0.685844] pci_device_probe+0x8c/0xe8 [ 0.685852] driver_probe_device+0x1c8/0x2ac [ 0.685863] bus_for_each_drv+0x60/0x94 [ 0.685870] __device_attach+0xb4/0x118 [ 0.685883] pci_bus_add_device+0x44/0x90 [ 0.685891] pci_bus_add_devices+0x3c/0x80 [ 0.685898] pci_host_common_probe+0x100/0x314 [ 0.685906] platform_drv_probe+0x50/0xb0 [ 0.685912] driver_probe_device+0x21c/0x2ac [ 0.685918] __driver_attach+0xc0/0xc4 [ 0.685926] bus_for_each_dev+0x68/0x9c [ 0.685933] bus_add_driver+0x108/0x214 [ 0.685939] driver_register+0x78/0xf4 [ 0.685947] do_one_initcall+0x44/0x174 [ 0.685961] kernel_init_freeable+0x158/0x1e8 [ 0.685971] kernel_init+0x8/0x10c [ 0.685980] ret_from_fork+0x14/0x24 [ 0.685985] [ 0.685985] -> #0 (&(&pcie->used_msi_lock)->rlock){......}: [ 0.686003] _raw_spin_lock_irqsave+0x44/0x58 [ 0.686012] update_msi_enable+0x2c/0x58 [ 0.686019] irq_enable+0x30/0x44 [ 0.686025] irq_startup+0x80/0x84 [ 0.686031] __setup_irq+0x558/0x5f0 [ 0.686038] request_threaded_irq+0xe4/0x184 [ 0.686044] pcie_pme_probe+0xc4/0x1f0 [ 0.686051] pcie_port_probe_service+0x34/0x70 [ 0.686057] driver_probe_device+0x21c/0x2ac [ 0.686065] bus_for_each_drv+0x60/0x94 [ 0.686071] __device_attach+0xb4/0x118 [ 0.686079] bus_probe_device+0x88/0x90 [ 0.686085] device_add+0x3f4/0x584 [ 0.686092] pcie_port_device_register+0x228/0x494 [ 0.686099] pcie_portdrv_probe+0x2c/0xa4 [ 0.686106] pci_device_probe+0x8c/0xe8 [ 0.686113] driver_probe_device+0x1c8/0x2ac [ 0.686120] bus_for_each_drv+0x60/0x94 [ 0.686126] __device_attach+0xb4/0x118 [ 0.686134] pci_bus_add_device+0x44/0x90 [ 0.686141] pci_bus_add_devices+0x3c/0x80 [ 0.686149] pci_host_common_probe+0x100/0x314 [ 0.686155] platform_drv_probe+0x50/0xb0 [ 0.686161] driver_probe_device+0x21c/0x2ac [ 0.686167] __driver_attach+0xc0/0xc4 [ 0.686175] bus_for_each_dev+0x68/0x9c [ 0.686182] bus_add_driver+0x108/0x214 [ 0.686188] driver_register+0x78/0xf4 [ 0.686194] do_one_initcall+0x44/0x174 [ 0.686203] kernel_init_freeable+0x158/0x1e8 [ 0.686210] kernel_init+0x8/0x10c [ 0.686218] ret_from_fork+0x14/0x24 [ 0.686222] [ 0.686222] other info that might help us debug this: [ 0.686222] [ 0.686229] Possible unsafe locking scenario: [ 0.686229] [ 0.686235] CPU0 CPU1 [ 0.686239] ---- ---- [ 0.686243] lock(&irq_desc_lock_class); [ 0.686251] lock(&(&pcie->used_msi_lock)->rlock); [ 0.686260] lock(&irq_desc_lock_class); [ 0.686267] lock(&(&pcie->used_msi_lock)->rlock); [ 0.686275] [ 0.686275] *** DEADLOCK *** [ 0.686275] [ 0.686283] 5 locks held by swapper/0/1: [ 0.686288] #0: (&dev->mutex){......}, at: [<c03939e4>] __driver_attach+0x50/0xc4 [ 0.686303] #1: (&dev->mutex){......}, at: [<c03939f4>] __driver_attach+0x60/0xc4 [ 0.686318] #2: (&dev->mutex){......}, at: [<c0393574>] __device_attach+0x20/0x118 [ 0.686333] #3: (&dev->mutex){......}, at: [<c0393574>] __device_attach+0x20/0x118 [ 0.686348] #4: (&irq_desc_lock_class){......}, at: [<c017586c>] __setup_irq+0xa4/0x5f0 [ 0.686363] [ 0.686363] stack backtrace: [ 0.686373] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G I 4.9.20-1-rc3 #2 [ 0.686379] Hardware name: Sigma Tango DT [ 0.686398] [<c010f8a0>] (unwind_backtrace) from [<c010b540>] (show_stack+0x10/0x14) [ 0.686411] [<c010b540>] (show_stack) from [<c0309f40>] (dump_stack+0x98/0xc4) [ 0.686423] [<c0309f40>] (dump_stack) from [<c0165ce4>] (print_circular_bug+0x214/0x334) [ 0.686432] [<c0165ce4>] (print_circular_bug) from [<c016934c>] (__lock_acquire+0x171c/0x1a28) [ 0.686441] [<c016934c>] (__lock_acquire) from [<c0169a28>] (lock_acquire+0x6c/0x88) [ 0.686452] [<c0169a28>] (lock_acquire) from [<c0533004>] (_raw_spin_lock_irqsave+0x44/0x58) [ 0.686464] [<c0533004>] (_raw_spin_lock_irqsave) from [<c0352e5c>] (update_msi_enable+0x2c/0x58) [ 0.686475] [<c0352e5c>] (update_msi_enable) from [<c0177614>] (irq_enable+0x30/0x44) [ 0.686484] [<c0177614>] (irq_enable) from [<c01776a8>] (irq_startup+0x80/0x84) [ 0.686493] [<c01776a8>] (irq_startup) from [<c0175d20>] (__setup_irq+0x558/0x5f0) [ 0.686502] [<c0175d20>] (__setup_irq) from [<c0175f60>] (request_threaded_irq+0xe4/0x184) [ 0.686511] [<c0175f60>] (request_threaded_irq) from [<c034fed0>] (pcie_pme_probe+0xc4/0x1f0) [ 0.686520] [<c034fed0>] (pcie_pme_probe) from [<c034d8a8>] (pcie_port_probe_service+0x34/0x70) [ 0.686530] [<c034d8a8>] (pcie_port_probe_service) from [<c0393904>] (driver_probe_device+0x21c/0x2ac) [ 0.686540] [<c0393904>] (driver_probe_device) from [<c0391d18>] (bus_for_each_drv+0x60/0x94) [ 0.686550] [<c0391d18>] (bus_for_each_drv) from [<c0393608>] (__device_attach+0xb4/0x118) [ 0.686560] [<c0393608>] (__device_attach) from [<c0392b14>] (bus_probe_device+0x88/0x90) [ 0.686570] [<c0392b14>] (bus_probe_device) from [<c0390ef0>] (device_add+0x3f4/0x584) [ 0.686580] [<c0390ef0>] (device_add) from [<c034dba4>] (pcie_port_device_register+0x228/0x494) [ 0.686589] [<c034dba4>] (pcie_port_device_register) from [<c034e26c>] (pcie_portdrv_probe+0x2c/0xa4) [ 0.686600] [<c034e26c>] (pcie_portdrv_probe) from [<c0341ff0>] (pci_device_probe+0x8c/0xe8) [ 0.686611] [<c0341ff0>] (pci_device_probe) from [<c03938b0>] (driver_probe_device+0x1c8/0x2ac) [ 0.686620] [<c03938b0>] (driver_probe_device) from [<c0391d18>] (bus_for_each_drv+0x60/0x94) [ 0.686630] [<c0391d18>] (bus_for_each_drv) from [<c0393608>] (__device_attach+0xb4/0x118) [ 0.686641] [<c0393608>] (__device_attach) from [<c03381fc>] (pci_bus_add_device+0x44/0x90) [ 0.686651] [<c03381fc>] (pci_bus_add_device) from [<c0338284>] (pci_bus_add_devices+0x3c/0x80) [ 0.686662] [<c0338284>] (pci_bus_add_devices) from [<c0352bf4>] (pci_host_common_probe+0x100/0x314) [ 0.686673] [<c0352bf4>] (pci_host_common_probe) from [<c03950dc>] (platform_drv_probe+0x50/0xb0) [ 0.686682] [<c03950dc>] (platform_drv_probe) from [<c0393904>] (driver_probe_device+0x21c/0x2ac) [ 0.686690] [<c0393904>] (driver_probe_device) from [<c0393a54>] (__driver_attach+0xc0/0xc4) [ 0.686700] [<c0393a54>] (__driver_attach) from [<c0391c70>] (bus_for_each_dev+0x68/0x9c) [ 0.686711] [<c0391c70>] (bus_for_each_dev) from [<c0392d2c>] (bus_add_driver+0x108/0x214) [ 0.686721] [<c0392d2c>] (bus_add_driver) from [<c0394170>] (driver_register+0x78/0xf4) [ 0.686730] [<c0394170>] (driver_register) from [<c01017dc>] (do_one_initcall+0x44/0x174) [ 0.686742] [<c01017dc>] (do_one_initcall) from [<c0800dc0>] (kernel_init_freeable+0x158/0x1e8) [ 0.686754] [<c0800dc0>] (kernel_init_freeable) from [<c052c918>] (kernel_init+0x8/0x10c) [ 0.686765] [<c052c918>] (kernel_init) from [<c01077d0>] (ret_from_fork+0x14/0x24) [ 0.686824] pcieport 0000:00:00.0: Signaling PME through PCIe PME interrupt [ 0.686835] pci 0000:01:00.0: Signaling PME through PCIe PME interrupt [ 0.686849] pcie_pme 0000:00:00.0:pcie001: service driver pcie_pme loaded [ 0.687049] aer 0000:00:00.0:pcie002: service driver aer loaded [ 0.687276] pci 0000:01:00.0: enabling device (0140 -> 0142)
On 14/06/17 10:19, Marc Gonzalez wrote: > On 14/06/2017 11:00, Marc Gonzalez wrote: > >> The MSI controller in Tango supports 256 message-signaled interrupts, >> and a single doorbell address. >> >> Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com> >> --- >> Changes from v6 to v7 >> o Call spin_lock() not spin_lock_irqsave() in the ISR >> --- >> drivers/pci/host/pcie-tango.c | 225 ++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 225 insertions(+) > > Someone on IRC suggested testing the driver with LOCKDEP. > > If I understand the warning below correctly, I am not supposed > to call irq_domain_set_info() while holding used_msi_lock? Indeed. This creates an AB/BA situation, which will eventually deadlock. Once again, lockdep saves the day. > NB: in probe, my driver calls > > add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); > > This should not break LOCKDEP analysis, right? It doesn't. The code is provably wrong, and lockdep proved that it is wrong. M.
diff --git a/drivers/pci/host/pcie-tango.c b/drivers/pci/host/pcie-tango.c index 67aaadcc1c5e..d4b3520b5a03 100644 --- a/drivers/pci/host/pcie-tango.c +++ b/drivers/pci/host/pcie-tango.c @@ -1,16 +1,228 @@ +#include <linux/irqchip/chained_irq.h> +#include <linux/irqdomain.h> #include <linux/pci-ecam.h> #include <linux/delay.h> +#include <linux/msi.h> #include <linux/of.h> #define MSI_MAX 256 #define SMP8759_MUX 0x48 #define SMP8759_TEST_OUT 0x74 +#define SMP8759_STATUS 0x80 +#define SMP8759_ENABLE 0xa0 +#define SMP8759_DOORBELL 0xa002e07c struct tango_pcie { + DECLARE_BITMAP(used_msi, MSI_MAX); + spinlock_t used_msi_lock; void __iomem *mux; + void __iomem *msi_status; + void __iomem *msi_enable; + phys_addr_t msi_doorbell; + struct irq_domain *irq_dom; + struct irq_domain *msi_dom; + int irq; }; +/*** MSI CONTROLLER SUPPORT ***/ + +static void tango_msi_isr(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct tango_pcie *pcie = irq_desc_get_handler_data(desc); + unsigned long status, base, virq, idx, pos = 0; + + chained_irq_enter(chip, desc); + spin_lock(&pcie->used_msi_lock); + + while ((pos = find_next_bit(pcie->used_msi, MSI_MAX, pos)) < MSI_MAX) { + base = round_down(pos, 32); + status = readl_relaxed(pcie->msi_status + base / 8); + for_each_set_bit(idx, &status, 32) { + virq = irq_find_mapping(pcie->irq_dom, base + idx); + generic_handle_irq(virq); + } + pos = base + 32; + } + + spin_unlock(&pcie->used_msi_lock); + chained_irq_exit(chip, desc); +} + +static void tango_ack(struct irq_data *d) +{ + struct tango_pcie *pcie = d->chip_data; + u32 offset = (d->hwirq / 32) * 4; + u32 bit = BIT(d->hwirq % 32); + + writel_relaxed(bit, pcie->msi_status + offset); +} + +static void update_msi_enable(struct irq_data *d, bool unmask) +{ + unsigned long flags; + struct tango_pcie *pcie = d->chip_data; + u32 offset = (d->hwirq / 32) * 4; + u32 bit = BIT(d->hwirq % 32); + u32 val; + + spin_lock_irqsave(&pcie->used_msi_lock, flags); + val = readl_relaxed(pcie->msi_enable + offset); + val = unmask ? val | bit : val & ~bit; + writel_relaxed(val, pcie->msi_enable + offset); + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); +} + +static void tango_mask(struct irq_data *d) +{ + update_msi_enable(d, false); +} + +static void tango_unmask(struct irq_data *d) +{ + update_msi_enable(d, true); +} + +static int tango_set_affinity(struct irq_data *d, + const struct cpumask *mask, bool force) +{ + return -EINVAL; +} + +static void tango_compose_msi_msg(struct irq_data *d, struct msi_msg *msg) +{ + struct tango_pcie *pcie = d->chip_data; + msg->address_lo = lower_32_bits(pcie->msi_doorbell); + msg->address_hi = upper_32_bits(pcie->msi_doorbell); + msg->data = d->hwirq; +} + +static struct irq_chip tango_chip = { + .irq_ack = tango_ack, + .irq_mask = tango_mask, + .irq_unmask = tango_unmask, + .irq_set_affinity = tango_set_affinity, + .irq_compose_msi_msg = tango_compose_msi_msg, +}; + +static void msi_ack(struct irq_data *d) +{ + irq_chip_ack_parent(d); +} + +static void msi_mask(struct irq_data *d) +{ + pci_msi_mask_irq(d); + irq_chip_mask_parent(d); +} + +static void msi_unmask(struct irq_data *d) +{ + pci_msi_unmask_irq(d); + irq_chip_unmask_parent(d); +} + +static struct irq_chip msi_chip = { + .name = "MSI", + .irq_ack = msi_ack, + .irq_mask = msi_mask, + .irq_unmask = msi_unmask, +}; + +static struct msi_domain_info msi_dom_info = { + .flags = MSI_FLAG_PCI_MSIX + | MSI_FLAG_USE_DEF_DOM_OPS + | MSI_FLAG_USE_DEF_CHIP_OPS, + .chip = &msi_chip, +}; + +static int tango_irq_domain_alloc(struct irq_domain *dom, + unsigned int virq, unsigned int nr_irqs, void *args) +{ + unsigned long flags; + int pos, err = -ENOSPC; + struct tango_pcie *pcie = dom->host_data; + + spin_lock_irqsave(&pcie->used_msi_lock, flags); + pos = find_first_zero_bit(pcie->used_msi, MSI_MAX); + if (pos < MSI_MAX) { + err = 0; + __set_bit(pos, pcie->used_msi); + irq_domain_set_info(dom, virq, pos, + &tango_chip, pcie, handle_edge_irq, NULL, NULL); + } + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); + + return err; +} + +static void tango_irq_domain_free(struct irq_domain *dom, + unsigned int virq, unsigned int nr_irqs) +{ + unsigned long flags; + struct irq_data *d = irq_domain_get_irq_data(dom, virq); + struct tango_pcie *pcie = d->chip_data; + + spin_lock_irqsave(&pcie->used_msi_lock, flags); + __clear_bit(d->hwirq, pcie->used_msi); + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); +} + +static const struct irq_domain_ops irq_dom_ops = { + .alloc = tango_irq_domain_alloc, + .free = tango_irq_domain_free, +}; + +static int tango_msi_remove(struct platform_device *pdev) +{ + struct tango_pcie *pcie = platform_get_drvdata(pdev); + + irq_set_chained_handler_and_data(pcie->irq, NULL, NULL); + irq_domain_remove(pcie->msi_dom); + irq_domain_remove(pcie->irq_dom); + + return 0; +} + +static int tango_msi_probe(struct platform_device *pdev, struct tango_pcie *pcie) +{ + int i, virq; + struct irq_domain *msi_dom, *irq_dom; + struct fwnode_handle *fwnode = of_node_to_fwnode(pdev->dev.of_node); + + spin_lock_init(&pcie->used_msi_lock); + for (i = 0; i < MSI_MAX / 32; ++i) + writel_relaxed(0, pcie->msi_enable + i * 4); + + virq = platform_get_irq(pdev, 1); + if (virq <= 0) { + pr_err("Failed to map IRQ\n"); + return -ENXIO; + } + + irq_dom = irq_domain_create_linear(fwnode, MSI_MAX, &irq_dom_ops, pcie); + if (!irq_dom) { + pr_err("Failed to create IRQ domain\n"); + return -ENOMEM; + } + + msi_dom = pci_msi_create_irq_domain(fwnode, &msi_dom_info, irq_dom); + if (!msi_dom) { + pr_err("Failed to create MSI domain\n"); + irq_domain_remove(irq_dom); + return -ENOMEM; + } + + pcie->irq_dom = irq_dom; + pcie->msi_dom = msi_dom; + pcie->irq = virq; + + irq_set_chained_handler_and_data(virq, tango_msi_isr, pcie); + + return 0; +} + /*** HOST BRIDGE SUPPORT ***/ static int smp8759_config_read(struct pci_bus *bus, @@ -88,6 +300,9 @@ static int tango_check_pcie_link(void __iomem *test_out) static int smp8759_init(struct tango_pcie *pcie, void __iomem *base) { pcie->mux = base + SMP8759_MUX; + pcie->msi_status = base + SMP8759_STATUS; + pcie->msi_enable = base + SMP8759_ENABLE; + pcie->msi_doorbell = SMP8759_DOORBELL; return tango_check_pcie_link(base + SMP8759_TEST_OUT); } @@ -121,11 +336,21 @@ static int tango_pcie_probe(struct platform_device *pdev) if (ret) return ret; + ret = tango_msi_probe(pdev, pcie); + if (ret) + return ret; + return pci_host_common_probe(pdev, &smp8759_ecam_ops); } +static int tango_pcie_remove(struct platform_device *pdev) +{ + return tango_msi_remove(pdev); +} + static struct platform_driver tango_pcie_driver = { .probe = tango_pcie_probe, + .remove = tango_pcie_remove, .driver = { .name = KBUILD_MODNAME, .of_match_table = tango_pcie_ids,
The MSI controller in Tango supports 256 message-signaled interrupts, and a single doorbell address. Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com> --- Changes from v6 to v7 o Call spin_lock() not spin_lock_irqsave() in the ISR --- drivers/pci/host/pcie-tango.c | 225 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+)