Message ID | 7923ef6ffd60a7c3de3c8c04fe746bbd3c15b5ac.1428564115.git.dhdang@apm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Apr 09, 2015 at 10:05:03AM -0700, Duc Dang wrote: > X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into > 16 HW IRQ lines. > > Signed-off-by: Duc Dang <dhdang@apm.com> > Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> > ... > --- /dev/null > +++ b/drivers/pci/host/pci-xgene-msi.c > @@ -0,0 +1,407 @@ > ... > +static void xgene_irq_domain_free(struct irq_domain *domain, > + unsigned int virq, unsigned int nr_irqs) > +{ > + struct irq_data *d = irq_domain_get_irq_data(domain, virq); > + struct xgene_msi *msi = irq_data_get_irq_chip_data(d); > + > + mutex_lock(&msi->bitmap_lock); > + > + if (!test_bit(d->hwirq, msi->bitmap)) > + pr_err("trying to free unused MSI#%lu\n", d->hwirq); I'll let Marc comment on the substance of this patch, but I'd sure like to see more information in this and the other pr_errs below. The text "failed to create parent MSI domain" in the dmesg log is not enough. Ideally it would identify the relevant host bridge. > + else > + clear_bit(d->hwirq, msi->bitmap); > + > + mutex_unlock(&msi->bitmap_lock); > + > + irq_domain_free_irqs_parent(domain, virq, nr_irqs); > +} > + > +static const struct irq_domain_ops msi_domain_ops = { > + .alloc = xgene_irq_domain_alloc, > + .free = xgene_irq_domain_free, > +}; > + > +static int xgene_allocate_domains(struct xgene_msi *msi) > +{ > + msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec, > + &msi_domain_ops, msi); > + > + if (!msi->domain) { > + pr_err("failed to create parent MSI domain\n"); > + return -ENOMEM; > + } > + > + msi->mchip.of_node = msi->node; > + msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node, > + &xgene_msi_domain_info, > + msi->domain); > + > + if (!msi->mchip.domain) { > + pr_err("failed to create MSI domain\n"); > + irq_domain_remove(msi->domain); > + return -ENOMEM; > + } > + > + return 0; > +} > + > +static void xgene_free_domains(struct xgene_msi *msi) > +{ > + if (msi->mchip.domain) > + irq_domain_remove(msi->mchip.domain); > + if (msi->domain) > + irq_domain_remove(msi->domain); > +} > + > +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi) > +{ > + u32 msi_irq_count = xgene_msi->settings->nr_msi_vec; > + u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs; > + int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long); > + > + xgene_msi->bitmap = kzalloc(size, GFP_KERNEL); > + if (!xgene_msi->bitmap) > + return -ENOMEM; > + > + mutex_init(&xgene_msi->bitmap_lock); > + > + xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL); > + if (!xgene_msi->msi_virqs) > + return -ENOMEM; > + > + return 0; > +} > + > +static irqreturn_t xgene_msi_isr(int irq, void *data) > +{ > + struct xgene_msi *xgene_msi = (struct xgene_msi *) data; > + unsigned int virq; > + int msir_index, msir_reg, msir_val, hw_irq; > + u32 intr_index, grp_select, msi_grp, processed = 0; > + u32 nr_hw_irqs, irqs_per_index, index_per_group; > + > + msi_grp = irq - xgene_msi->msi_virqs[0]; > + if (msi_grp >= xgene_msi->settings->nr_hw_irqs) { > + pr_err("invalid msi received\n"); > + return IRQ_NONE; > + } Bjorn
On Thu, Apr 9, 2015 at 1:11 PM, Bjorn Helgaas <bhelgaas@google.com> wrote: > On Thu, Apr 09, 2015 at 10:05:03AM -0700, Duc Dang wrote: >> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into >> 16 HW IRQ lines. >> >> Signed-off-by: Duc Dang <dhdang@apm.com> >> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> >> ... > >> --- /dev/null >> +++ b/drivers/pci/host/pci-xgene-msi.c >> @@ -0,0 +1,407 @@ >> ... > >> +static void xgene_irq_domain_free(struct irq_domain *domain, >> + unsigned int virq, unsigned int nr_irqs) >> +{ >> + struct irq_data *d = irq_domain_get_irq_data(domain, virq); >> + struct xgene_msi *msi = irq_data_get_irq_chip_data(d); >> + >> + mutex_lock(&msi->bitmap_lock); >> + >> + if (!test_bit(d->hwirq, msi->bitmap)) >> + pr_err("trying to free unused MSI#%lu\n", d->hwirq); > > I'll let Marc comment on the substance of this patch, but I'd sure like to > see more information in this and the other pr_errs below. The text "failed > to create parent MSI domain" in the dmesg log is not enough. Ideally it > would identify the relevant host bridge. > Hi Bjorn, X-Gene has single MSI block that serves 5 PCIe ports. so in this implementation, 5 X-Gene PCIe host bridges share the same MSI domain. I can add X-Gene 1 MSI driver name and device tree node name into these messages, but the host bridge information is not available. As I understand, in case of error, the upper layer MSI core and driver code will check and report the error with additional device information (bus number, function number, etc)? Regards, Duc Dang. >> + else >> + clear_bit(d->hwirq, msi->bitmap); >> + >> + mutex_unlock(&msi->bitmap_lock); >> + >> + irq_domain_free_irqs_parent(domain, virq, nr_irqs); >> +} >> + >> +static const struct irq_domain_ops msi_domain_ops = { >> + .alloc = xgene_irq_domain_alloc, >> + .free = xgene_irq_domain_free, >> +}; >> + >> +static int xgene_allocate_domains(struct xgene_msi *msi) >> +{ >> + msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec, >> + &msi_domain_ops, msi); >> + >> + if (!msi->domain) { >> + pr_err("failed to create parent MSI domain\n"); >> + return -ENOMEM; >> + } >> + >> + msi->mchip.of_node = msi->node; >> + msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node, >> + &xgene_msi_domain_info, >> + msi->domain); >> + >> + if (!msi->mchip.domain) { >> + pr_err("failed to create MSI domain\n"); >> + irq_domain_remove(msi->domain); >> + return -ENOMEM; >> + } >> + >> + return 0; >> +} >> + >> +static void xgene_free_domains(struct xgene_msi *msi) >> +{ >> + if (msi->mchip.domain) >> + irq_domain_remove(msi->mchip.domain); >> + if (msi->domain) >> + irq_domain_remove(msi->domain); >> +} >> + >> +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi) >> +{ >> + u32 msi_irq_count = xgene_msi->settings->nr_msi_vec; >> + u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs; >> + int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long); >> + >> + xgene_msi->bitmap = kzalloc(size, GFP_KERNEL); >> + if (!xgene_msi->bitmap) >> + return -ENOMEM; >> + >> + mutex_init(&xgene_msi->bitmap_lock); >> + >> + xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL); >> + if (!xgene_msi->msi_virqs) >> + return -ENOMEM; >> + >> + return 0; >> +} >> + >> +static irqreturn_t xgene_msi_isr(int irq, void *data) >> +{ >> + struct xgene_msi *xgene_msi = (struct xgene_msi *) data; >> + unsigned int virq; >> + int msir_index, msir_reg, msir_val, hw_irq; >> + u32 intr_index, grp_select, msi_grp, processed = 0; >> + u32 nr_hw_irqs, irqs_per_index, index_per_group; >> + >> + msi_grp = irq - xgene_msi->msi_virqs[0]; >> + if (msi_grp >= xgene_msi->settings->nr_hw_irqs) { >> + pr_err("invalid msi received\n"); >> + return IRQ_NONE; >> + } > > Bjorn
On Thu, Apr 9, 2015 at 4:52 PM, Duc Dang <dhdang@apm.com> wrote: > On Thu, Apr 9, 2015 at 1:11 PM, Bjorn Helgaas <bhelgaas@google.com> wrote: >> On Thu, Apr 09, 2015 at 10:05:03AM -0700, Duc Dang wrote: >>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into >>> 16 HW IRQ lines. >>> >>> Signed-off-by: Duc Dang <dhdang@apm.com> >>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> >>> ... >> >>> --- /dev/null >>> +++ b/drivers/pci/host/pci-xgene-msi.c >>> @@ -0,0 +1,407 @@ >>> ... >> >>> +static void xgene_irq_domain_free(struct irq_domain *domain, >>> + unsigned int virq, unsigned int nr_irqs) >>> +{ >>> + struct irq_data *d = irq_domain_get_irq_data(domain, virq); >>> + struct xgene_msi *msi = irq_data_get_irq_chip_data(d); >>> + >>> + mutex_lock(&msi->bitmap_lock); >>> + >>> + if (!test_bit(d->hwirq, msi->bitmap)) >>> + pr_err("trying to free unused MSI#%lu\n", d->hwirq); >> >> I'll let Marc comment on the substance of this patch, but I'd sure like to >> see more information in this and the other pr_errs below. The text "failed >> to create parent MSI domain" in the dmesg log is not enough. Ideally it >> would identify the relevant host bridge. >> > Hi Bjorn, > > X-Gene has single MSI block that serves 5 PCIe ports. so in this > implementation, 5 X-Gene PCIe host bridges share the same MSI domain. > > I can add X-Gene 1 MSI driver name and device tree node name into > these messages, but the host bridge information is not available. > > As I understand, in case of error, the upper layer MSI core and driver > code will check and report the error with additional device > information (bus number, function number, etc)? If you think the upper layers already report the error, maybe you could just drop the output from the X-Gene code. I just think it's pointless to print messages that are constant strings with no hook for the user to associate them with a device or event. >>> + else >>> + clear_bit(d->hwirq, msi->bitmap); >>> + >>> + mutex_unlock(&msi->bitmap_lock); >>> + >>> + irq_domain_free_irqs_parent(domain, virq, nr_irqs); >>> +} >>> + >>> +static const struct irq_domain_ops msi_domain_ops = { >>> + .alloc = xgene_irq_domain_alloc, >>> + .free = xgene_irq_domain_free, >>> +}; >>> + >>> +static int xgene_allocate_domains(struct xgene_msi *msi) >>> +{ >>> + msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec, >>> + &msi_domain_ops, msi); >>> + >>> + if (!msi->domain) { >>> + pr_err("failed to create parent MSI domain\n"); >>> + return -ENOMEM; >>> + } >>> + >>> + msi->mchip.of_node = msi->node; >>> + msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node, >>> + &xgene_msi_domain_info, >>> + msi->domain); >>> + >>> + if (!msi->mchip.domain) { >>> + pr_err("failed to create MSI domain\n"); >>> + irq_domain_remove(msi->domain); >>> + return -ENOMEM; >>> + } >>> + >>> + return 0; >>> +} >>> + >>> +static void xgene_free_domains(struct xgene_msi *msi) >>> +{ >>> + if (msi->mchip.domain) >>> + irq_domain_remove(msi->mchip.domain); >>> + if (msi->domain) >>> + irq_domain_remove(msi->domain); >>> +} >>> + >>> +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi) >>> +{ >>> + u32 msi_irq_count = xgene_msi->settings->nr_msi_vec; >>> + u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs; >>> + int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long); >>> + >>> + xgene_msi->bitmap = kzalloc(size, GFP_KERNEL); >>> + if (!xgene_msi->bitmap) >>> + return -ENOMEM; >>> + >>> + mutex_init(&xgene_msi->bitmap_lock); >>> + >>> + xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL); >>> + if (!xgene_msi->msi_virqs) >>> + return -ENOMEM; >>> + >>> + return 0; >>> +} >>> + >>> +static irqreturn_t xgene_msi_isr(int irq, void *data) >>> +{ >>> + struct xgene_msi *xgene_msi = (struct xgene_msi *) data; >>> + unsigned int virq; >>> + int msir_index, msir_reg, msir_val, hw_irq; >>> + u32 intr_index, grp_select, msi_grp, processed = 0; >>> + u32 nr_hw_irqs, irqs_per_index, index_per_group; >>> + >>> + msi_grp = irq - xgene_msi->msi_virqs[0]; >>> + if (msi_grp >= xgene_msi->settings->nr_hw_irqs) { >>> + pr_err("invalid msi received\n"); >>> + return IRQ_NONE; >>> + } >> >> Bjorn > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/
On Thu, Apr 9, 2015 at 3:39 PM, Bjorn Helgaas <bhelgaas@google.com> wrote: > On Thu, Apr 9, 2015 at 4:52 PM, Duc Dang <dhdang@apm.com> wrote: >> On Thu, Apr 9, 2015 at 1:11 PM, Bjorn Helgaas <bhelgaas@google.com> wrote: >>> On Thu, Apr 09, 2015 at 10:05:03AM -0700, Duc Dang wrote: >>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into >>>> 16 HW IRQ lines. >>>> >>>> Signed-off-by: Duc Dang <dhdang@apm.com> >>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> >>>> ... >>> >>>> --- /dev/null >>>> +++ b/drivers/pci/host/pci-xgene-msi.c >>>> @@ -0,0 +1,407 @@ >>>> ... >>> >>>> +static void xgene_irq_domain_free(struct irq_domain *domain, >>>> + unsigned int virq, unsigned int nr_irqs) >>>> +{ >>>> + struct irq_data *d = irq_domain_get_irq_data(domain, virq); >>>> + struct xgene_msi *msi = irq_data_get_irq_chip_data(d); >>>> + >>>> + mutex_lock(&msi->bitmap_lock); >>>> + >>>> + if (!test_bit(d->hwirq, msi->bitmap)) >>>> + pr_err("trying to free unused MSI#%lu\n", d->hwirq); >>> >>> I'll let Marc comment on the substance of this patch, but I'd sure like to >>> see more information in this and the other pr_errs below. The text "failed >>> to create parent MSI domain" in the dmesg log is not enough. Ideally it >>> would identify the relevant host bridge. >>> >> Hi Bjorn, >> >> X-Gene has single MSI block that serves 5 PCIe ports. so in this >> implementation, 5 X-Gene PCIe host bridges share the same MSI domain. >> >> I can add X-Gene 1 MSI driver name and device tree node name into >> these messages, but the host bridge information is not available. >> >> As I understand, in case of error, the upper layer MSI core and driver >> code will check and report the error with additional device >> information (bus number, function number, etc)? > > If you think the upper layers already report the error, maybe you > could just drop the output from the X-Gene code. > > I just think it's pointless to print messages that are constant > strings with no hook for the user to associate them with a device or > event. Hi Bjorn, Yes, I will do so. Regards, Duc Dang. > >>>> + else >>>> + clear_bit(d->hwirq, msi->bitmap); >>>> + >>>> + mutex_unlock(&msi->bitmap_lock); >>>> + >>>> + irq_domain_free_irqs_parent(domain, virq, nr_irqs); >>>> +} >>>> + >>>> +static const struct irq_domain_ops msi_domain_ops = { >>>> + .alloc = xgene_irq_domain_alloc, >>>> + .free = xgene_irq_domain_free, >>>> +}; >>>> + >>>> +static int xgene_allocate_domains(struct xgene_msi *msi) >>>> +{ >>>> + msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec, >>>> + &msi_domain_ops, msi); >>>> + >>>> + if (!msi->domain) { >>>> + pr_err("failed to create parent MSI domain\n"); >>>> + return -ENOMEM; >>>> + } >>>> + >>>> + msi->mchip.of_node = msi->node; >>>> + msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node, >>>> + &xgene_msi_domain_info, >>>> + msi->domain); >>>> + >>>> + if (!msi->mchip.domain) { >>>> + pr_err("failed to create MSI domain\n"); >>>> + irq_domain_remove(msi->domain); >>>> + return -ENOMEM; >>>> + } >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +static void xgene_free_domains(struct xgene_msi *msi) >>>> +{ >>>> + if (msi->mchip.domain) >>>> + irq_domain_remove(msi->mchip.domain); >>>> + if (msi->domain) >>>> + irq_domain_remove(msi->domain); >>>> +} >>>> + >>>> +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi) >>>> +{ >>>> + u32 msi_irq_count = xgene_msi->settings->nr_msi_vec; >>>> + u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs; >>>> + int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long); >>>> + >>>> + xgene_msi->bitmap = kzalloc(size, GFP_KERNEL); >>>> + if (!xgene_msi->bitmap) >>>> + return -ENOMEM; >>>> + >>>> + mutex_init(&xgene_msi->bitmap_lock); >>>> + >>>> + xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL); >>>> + if (!xgene_msi->msi_virqs) >>>> + return -ENOMEM; >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +static irqreturn_t xgene_msi_isr(int irq, void *data) >>>> +{ >>>> + struct xgene_msi *xgene_msi = (struct xgene_msi *) data; >>>> + unsigned int virq; >>>> + int msir_index, msir_reg, msir_val, hw_irq; >>>> + u32 intr_index, grp_select, msi_grp, processed = 0; >>>> + u32 nr_hw_irqs, irqs_per_index, index_per_group; >>>> + >>>> + msi_grp = irq - xgene_msi->msi_virqs[0]; >>>> + if (msi_grp >= xgene_msi->settings->nr_hw_irqs) { >>>> + pr_err("invalid msi received\n"); >>>> + return IRQ_NONE; >>>> + } >>> >>> Bjorn >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html >> Please read the FAQ at http://www.tux.org/lkml/
On 09/04/15 18:05, Duc Dang wrote: > X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into > 16 HW IRQ lines. > > Signed-off-by: Duc Dang <dhdang@apm.com> > Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> > --- > drivers/pci/host/Kconfig | 6 + > drivers/pci/host/Makefile | 1 + > drivers/pci/host/pci-xgene-msi.c | 407 +++++++++++++++++++++++++++++++++++++++ > drivers/pci/host/pci-xgene.c | 21 ++ > 4 files changed, 435 insertions(+) > create mode 100644 drivers/pci/host/pci-xgene-msi.c > > diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig > index 7b892a9..c9b61fa 100644 > --- a/drivers/pci/host/Kconfig > +++ b/drivers/pci/host/Kconfig > @@ -89,11 +89,17 @@ config PCI_XGENE > depends on ARCH_XGENE > depends on OF > select PCIEPORTBUS > + select PCI_MSI_IRQ_DOMAIN if PCI_MSI > + select PCI_XGENE_MSI if PCI_MSI > help > Say Y here if you want internal PCI support on APM X-Gene SoC. > There are 5 internal PCIe ports available. Each port is GEN3 capable > and have varied lanes from x1 to x8. > > +config PCI_XGENE_MSI > + bool "X-Gene v1 PCIe MSI feature" > + depends on PCI_XGENE && PCI_MSI > + > config PCI_LAYERSCAPE > bool "Freescale Layerscape PCIe controller" > depends on OF && ARM > diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile > index e61d91c..f39bde3 100644 > --- a/drivers/pci/host/Makefile > +++ b/drivers/pci/host/Makefile > @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o > obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o > obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o > obj-$(CONFIG_PCI_XGENE) += pci-xgene.o > +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o > obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o > obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o > diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c > new file mode 100644 > index 0000000..4f0ff42 > --- /dev/null > +++ b/drivers/pci/host/pci-xgene-msi.c > @@ -0,0 +1,407 @@ > +/* > + * APM X-Gene MSI Driver > + * > + * Copyright (c) 2014, Applied Micro Circuits Corporation > + * Author: Tanmay Inamdar <tinamdar@apm.com> > + * Duc Dang <dhdang@apm.com> > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms of the GNU General Public License as published by the > + * Free Software Foundation; either version 2 of the License, or (at your > + * option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + */ > +#include <linux/interrupt.h> > +#include <linux/module.h> > +#include <linux/msi.h> > +#include <linux/of_irq.h> > +#include <linux/pci.h> > +#include <linux/platform_device.h> > +#include <linux/of_pci.h> > + > +#define MSI_INDEX0 0x000000 > +#define MSI_INT0 0x800000 > + > +struct xgene_msi_settings { > + u32 index_per_group; > + u32 irqs_per_index; > + u32 nr_msi_vec; > + u32 nr_hw_irqs; > +}; > + > +struct xgene_msi { > + struct device_node *node; > + struct msi_controller mchip; > + struct irq_domain *domain; > + struct xgene_msi_settings *settings; > + u32 msi_addr_lo; > + u32 msi_addr_hi; I'd rather see the mailbox address directly, and only do the split when assigning it to the message (you seem to play all kind of tricks on the address anyway). > + void __iomem *msi_regs; > + unsigned long *bitmap; > + struct mutex bitmap_lock; > + int *msi_virqs; > +}; > + > +struct xgene_msi_settings storm_msi_settings = { > + .index_per_group = 8, > + .irqs_per_index = 21, > + .nr_msi_vec = 2688, > + .nr_hw_irqs = 16, > +}; It would really help understanding how index, group and hw irq lines are structured. nr_msi_vec is obviously the product of these three numbers, so maybe you can loose it (we have computers, remember... ;-). Do you expect more than this single "storm" implementation? If so, maybe they should be described in the DT. If not, why do we need a separate structure with an indirection if we know these are constants? > + > +typedef int (*xgene_msi_initcall_t)(struct xgene_msi *); > + > +static int xgene_msi_init_storm_settings(struct xgene_msi *xgene_msi) > +{ > + xgene_msi->settings = &storm_msi_settings; > + return 0; > +} > + > +static struct irq_chip xgene_msi_top_irq_chip = { > + .name = "X-Gene1 MSI", > + .irq_enable = pci_msi_unmask_irq, > + .irq_disable = pci_msi_mask_irq, > + .irq_mask = pci_msi_mask_irq, > + .irq_unmask = pci_msi_unmask_irq, > +}; > + > +static struct msi_domain_info xgene_msi_domain_info = { > + .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | > + MSI_FLAG_PCI_MSIX), > + .chip = &xgene_msi_top_irq_chip, > +}; > + > +static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) > +{ > + struct xgene_msi *msi = irq_data_get_irq_chip_data(data); > + u32 nr_hw_irqs = msi->settings->nr_hw_irqs; > + u32 irqs_per_index = msi->settings->irqs_per_index; > + u32 reg_set = data->hwirq / (nr_hw_irqs * irqs_per_index); > + u32 group = data->hwirq % nr_hw_irqs; > + > + msg->address_hi = msi->msi_addr_hi; > + msg->address_lo = msi->msi_addr_lo + > + (((8 * group) + reg_set) << 16); > + msg->data = (data->hwirq / nr_hw_irqs) % irqs_per_index; ... (sound of head exploding...). I hate it when I have to reverse engineer the hardware by looking at the code... Please give us some clue on how interrupts are spread across the various pages, how the various indexes are combined to give an actual address. > +} > + > +static int xgene_msi_set_affinity(struct irq_data *irq_data, > + const struct cpumask *mask, bool force) > +{ > + struct xgene_msi *msi = irq_data_get_irq_chip_data(irq_data); > + unsigned int gic_irq; > + int ret; > + > + gic_irq = msi->msi_virqs[irq_data->hwirq % msi->settings->nr_hw_irqs]; > + ret = irq_set_affinity(gic_irq, mask); Erm... This as the effect of moving *all* the MSIs hanging off this interrupt to another CPU. I'm not sure that's an acceptable effect... What if another MSI requires a different affinity? > + if (ret == IRQ_SET_MASK_OK) > + ret = IRQ_SET_MASK_OK_DONE; > + > + return ret; > +} > + > +static struct irq_chip xgene_msi_bottom_irq_chip = { > + .name = "MSI", > + .irq_set_affinity = xgene_msi_set_affinity, > + .irq_compose_msi_msg = xgene_compose_msi_msg, > +}; > + > +static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, > + unsigned int nr_irqs, void *args) > +{ > + > + struct xgene_msi *msi = domain->host_data; > + u32 msi_irq_count = msi->settings->nr_msi_vec; > + int msi_irq; > + > + mutex_lock(&msi->bitmap_lock); > + > + msi_irq = find_first_zero_bit(msi->bitmap, msi_irq_count); > + if (msi_irq < msi_irq_count) > + set_bit(msi_irq, msi->bitmap); > + else > + msi_irq = -ENOSPC; > + > + mutex_unlock(&msi->bitmap_lock); > + > + if (msi_irq < 0) > + return msi_irq; > + > + irq_domain_set_info(domain, virq, msi_irq, &xgene_msi_bottom_irq_chip, > + domain->host_data, handle_simple_irq, NULL, NULL); > + set_irq_flags(virq, IRQF_VALID); > + > + return 0; > +} > + > +static void xgene_irq_domain_free(struct irq_domain *domain, > + unsigned int virq, unsigned int nr_irqs) > +{ > + struct irq_data *d = irq_domain_get_irq_data(domain, virq); > + struct xgene_msi *msi = irq_data_get_irq_chip_data(d); > + > + mutex_lock(&msi->bitmap_lock); > + > + if (!test_bit(d->hwirq, msi->bitmap)) > + pr_err("trying to free unused MSI#%lu\n", d->hwirq); > + else > + clear_bit(d->hwirq, msi->bitmap); > + > + mutex_unlock(&msi->bitmap_lock); > + > + irq_domain_free_irqs_parent(domain, virq, nr_irqs); > +} > + > +static const struct irq_domain_ops msi_domain_ops = { > + .alloc = xgene_irq_domain_alloc, > + .free = xgene_irq_domain_free, > +}; > + > +static int xgene_allocate_domains(struct xgene_msi *msi) > +{ > + msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec, > + &msi_domain_ops, msi); > + > + if (!msi->domain) { > + pr_err("failed to create parent MSI domain\n"); > + return -ENOMEM; > + } > + > + msi->mchip.of_node = msi->node; > + msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node, > + &xgene_msi_domain_info, > + msi->domain); > + > + if (!msi->mchip.domain) { > + pr_err("failed to create MSI domain\n"); > + irq_domain_remove(msi->domain); > + return -ENOMEM; > + } > + > + return 0; > +} > + > +static void xgene_free_domains(struct xgene_msi *msi) > +{ > + if (msi->mchip.domain) > + irq_domain_remove(msi->mchip.domain); > + if (msi->domain) > + irq_domain_remove(msi->domain); > +} > + > +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi) > +{ > + u32 msi_irq_count = xgene_msi->settings->nr_msi_vec; > + u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs; > + int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long); > + > + xgene_msi->bitmap = kzalloc(size, GFP_KERNEL); > + if (!xgene_msi->bitmap) > + return -ENOMEM; > + > + mutex_init(&xgene_msi->bitmap_lock); > + > + xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL); > + if (!xgene_msi->msi_virqs) > + return -ENOMEM; > + > + return 0; > +} > + > +static irqreturn_t xgene_msi_isr(int irq, void *data) This is not a "normal" interrupt handler. This is really a chained interrupt controller. Please use the proper infrastructure for this (irq_set_chained_handler, chained_irq_enter, chained_irq_exit). Otherwise, your usage of irq_find_mapping is illegal wrt RCU. > +{ > + struct xgene_msi *xgene_msi = (struct xgene_msi *) data; > + unsigned int virq; > + int msir_index, msir_reg, msir_val, hw_irq; > + u32 intr_index, grp_select, msi_grp, processed = 0; > + u32 nr_hw_irqs, irqs_per_index, index_per_group; > + > + msi_grp = irq - xgene_msi->msi_virqs[0]; > + if (msi_grp >= xgene_msi->settings->nr_hw_irqs) { > + pr_err("invalid msi received\n"); > + return IRQ_NONE; > + } > + > + nr_hw_irqs = xgene_msi->settings->nr_hw_irqs; > + irqs_per_index = xgene_msi->settings->irqs_per_index; > + index_per_group = xgene_msi->settings->index_per_group; > + > + grp_select = readl(xgene_msi->msi_regs + MSI_INT0 + (msi_grp << 16)); > + while (grp_select) { > + msir_index = ffs(grp_select) - 1; > + msir_reg = (msi_grp << 19) + (msir_index << 16); > + msir_val = readl(xgene_msi->msi_regs + MSI_INDEX0 + msir_reg); > + while (msir_val) { > + intr_index = ffs(msir_val) - 1; > + hw_irq = (((msir_index * irqs_per_index) + intr_index) * > + nr_hw_irqs) + msi_grp; Same thing here. This requires some explaination on how the HW is organised. > + virq = irq_find_mapping(xgene_msi->domain, hw_irq); > + if (virq != 0) > + generic_handle_irq(virq); > + msir_val &= ~(1 << intr_index); > + processed++; > + } > + grp_select &= ~(1 << msir_index); > + } > + > + return processed > 0 ? IRQ_HANDLED : IRQ_NONE; > +} > + > +static int xgene_msi_remove(struct platform_device *pdev) > +{ > + int virq, i; > + struct xgene_msi *msi = platform_get_drvdata(pdev); > + u32 nr_hw_irqs = msi->settings->nr_hw_irqs; > + > + for (i = 0; i < nr_hw_irqs; i++) { > + virq = msi->msi_virqs[i]; > + if (virq != 0) > + free_irq(virq, msi); > + } > + > + kfree(msi->bitmap); > + msi->bitmap = NULL; > + > + xgene_free_domains(msi); > + > + return 0; > +} > + > +static int xgene_msi_setup_hwirq(struct xgene_msi *msi, > + struct platform_device *pdev, > + int irq_index) > +{ > + int virt_msir; > + cpumask_var_t mask; > + int err; > + > + virt_msir = platform_get_irq(pdev, irq_index); > + if (virt_msir < 0) { > + dev_err(&pdev->dev, "Cannot translate IRQ index %d\n", > + irq_index); > + return -EINVAL; > + } > + > + err = request_irq(virt_msir, xgene_msi_isr, 0, > + xgene_msi_top_irq_chip.name, msi); This is where you should use irq_set_chained_handler. > + if (err) { > + dev_err(&pdev->dev, "request irq failed\n"); > + return err; > + } > + > + if (alloc_cpumask_var(&mask, GFP_KERNEL)) { > + cpumask_setall(mask); > + irq_set_affinity(virt_msir, mask); > + free_cpumask_var(mask); > + } > + > + msi->msi_virqs[irq_index] = virt_msir; > + > + return 0; > +} > + > +static const struct of_device_id xgene_msi_match_table[] = { > + {.compatible = "apm,xgene1-msi", > + .data = xgene_msi_init_storm_settings}, > + {}, > +}; > + > +static int xgene_msi_probe(struct platform_device *pdev) > +{ > + struct resource *res; > + int rc, irq_index; > + struct device_node *np; > + const struct of_device_id *matched_np; > + struct xgene_msi *xgene_msi; > + xgene_msi_initcall_t init_fn; > + u32 nr_hw_irqs, nr_msi_vecs; > + > + np = of_find_matching_node_and_match(NULL, > + xgene_msi_match_table, &matched_np); > + if (!np) > + return -ENODEV; > + > + xgene_msi = kzalloc(sizeof(struct xgene_msi), GFP_KERNEL); > + if (!xgene_msi) { > + dev_err(&pdev->dev, "failed to allocate X-Gene MSI data\n"); > + return -ENOMEM; > + } > + > + xgene_msi->node = np; > + > + init_fn = (xgene_msi_initcall_t) matched_np->data; > + rc = init_fn(xgene_msi); > + if (rc) > + return rc; > + > + platform_set_drvdata(pdev, xgene_msi); > + > + nr_msi_vecs = xgene_msi->settings->nr_msi_vec; > + > + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > + xgene_msi->msi_regs = devm_ioremap_resource(&pdev->dev, res); > + if (IS_ERR(xgene_msi->msi_regs)) { > + dev_err(&pdev->dev, "no reg space\n"); > + rc = -EINVAL; > + goto error; > + } > + > + xgene_msi->msi_addr_hi = upper_32_bits(res->start); > + xgene_msi->msi_addr_lo = lower_32_bits(res->start); > + > + rc = xgene_msi_init_allocator(xgene_msi); > + if (rc) { > + dev_err(&pdev->dev, "Error allocating MSI bitmap\n"); > + goto error; > + } > + > + rc = xgene_allocate_domains(xgene_msi); > + if (rc) { > + dev_err(&pdev->dev, "Failed to allocate MSI domain\n"); > + goto error; > + } > + > + nr_hw_irqs = xgene_msi->settings->nr_hw_irqs; > + for (irq_index = 0; irq_index < nr_hw_irqs; irq_index++) { > + rc = xgene_msi_setup_hwirq(xgene_msi, pdev, irq_index); > + if (rc) > + goto error; > + } > + > + rc = of_pci_msi_chip_add(&xgene_msi->mchip); > + if (rc) { > + dev_err(&pdev->dev, "failed to add MSI controller chip\n"); > + goto error; > + } > + > + dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n"); > + > + return 0; > +error: > + xgene_msi_remove(pdev); > + return rc; > +} > + > +static struct platform_driver xgene_msi_driver = { > + .driver = { > + .name = "xgene-msi", > + .owner = THIS_MODULE, > + .of_match_table = xgene_msi_match_table, > + }, > + .probe = xgene_msi_probe, > + .remove = xgene_msi_remove, > +}; > + > +static int __init xgene_pcie_msi_init(void) > +{ > + return platform_driver_register(&xgene_msi_driver); > +} > +subsys_initcall(xgene_pcie_msi_init); > + > +MODULE_AUTHOR("Duc Dang <dhdang@apm.com>"); > +MODULE_DESCRIPTION("APM X-Gene PCIe MSI driver"); > +MODULE_LICENSE("GPL v2"); > diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c > index 22751ed..3e6faa1 100644 > --- a/drivers/pci/host/pci-xgene.c > +++ b/drivers/pci/host/pci-xgene.c > @@ -468,6 +468,23 @@ static int xgene_pcie_setup(struct xgene_pcie_port *port, > return 0; > } > > +static int xgene_pcie_msi_enable(struct pci_bus *bus) > +{ > + struct device_node *msi_node; > + > + msi_node = of_parse_phandle(bus->dev.of_node, > + "msi-parent", 0); > + if (!msi_node) > + return -ENODEV; > + > + bus->msi = of_pci_find_msi_chip_by_node(msi_node); > + if (bus->msi) > + bus->msi->dev = &bus->dev; > + else > + return -ENODEV; > + return 0; > +} > + > static int xgene_pcie_probe_bridge(struct platform_device *pdev) > { > struct device_node *dn = pdev->dev.of_node; > @@ -504,6 +521,10 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev) > if (!bus) > return -ENOMEM; > > + if (IS_ENABLED(CONFIG_PCI_MSI)) > + if (xgene_pcie_msi_enable(bus)) > + dev_info(port->dev, "failed to enable MSI\n"); > + > pci_scan_child_bus(bus); > pci_assign_unassigned_bus_resources(bus); > pci_bus_add_devices(bus); > -- > 1.9.1 > Thanks, M.
Just a nit about a license mismatch. On Thu, 2015-04-09 at 10:05 -0700, Duc Dang wrote: > --- a/drivers/pci/host/Kconfig > +++ b/drivers/pci/host/Kconfig > +config PCI_XGENE_MSI > + bool "X-Gene v1 PCIe MSI feature" > + depends on PCI_XGENE && PCI_MSI > --- a/drivers/pci/host/Makefile > +++ b/drivers/pci/host/Makefile > +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o > --- /dev/null > +++ b/drivers/pci/host/pci-xgene-msi.c > + * This program is free software; you can redistribute it and/or modify it > + * under the terms of the GNU General Public License as published by the > + * Free Software Foundation; either version 2 of the License, or (at your > + * option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. This states the license of this file is GPL v2 or later. > +MODULE_LICENSE("GPL v2"); And, according to include/linux/module.h, this states the license of this driver is GPL v2. So either the comment at the top of this file or the license ident used in this macro needs to change. (Now, as far as I can tell, this driver is built-in only. So I think this macro - and all other module specific code in this file - is actually unneeded. But I already know Bjorn's position here, so I won't bother you about that.) Thanks, Paul Bolle
On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: > On 09/04/15 18:05, Duc Dang wrote: >> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into >> 16 HW IRQ lines. >> >> Signed-off-by: Duc Dang <dhdang@apm.com> >> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> >> --- >> drivers/pci/host/Kconfig | 6 + >> drivers/pci/host/Makefile | 1 + >> drivers/pci/host/pci-xgene-msi.c | 407 +++++++++++++++++++++++++++++++++++++++ >> drivers/pci/host/pci-xgene.c | 21 ++ >> 4 files changed, 435 insertions(+) >> create mode 100644 drivers/pci/host/pci-xgene-msi.c >> >> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig >> index 7b892a9..c9b61fa 100644 >> --- a/drivers/pci/host/Kconfig >> +++ b/drivers/pci/host/Kconfig >> @@ -89,11 +89,17 @@ config PCI_XGENE >> depends on ARCH_XGENE >> depends on OF >> select PCIEPORTBUS >> + select PCI_MSI_IRQ_DOMAIN if PCI_MSI >> + select PCI_XGENE_MSI if PCI_MSI >> help >> Say Y here if you want internal PCI support on APM X-Gene SoC. >> There are 5 internal PCIe ports available. Each port is GEN3 capable >> and have varied lanes from x1 to x8. >> >> +config PCI_XGENE_MSI >> + bool "X-Gene v1 PCIe MSI feature" >> + depends on PCI_XGENE && PCI_MSI >> + >> config PCI_LAYERSCAPE >> bool "Freescale Layerscape PCIe controller" >> depends on OF && ARM >> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile >> index e61d91c..f39bde3 100644 >> --- a/drivers/pci/host/Makefile >> +++ b/drivers/pci/host/Makefile >> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o >> obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o >> obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o >> obj-$(CONFIG_PCI_XGENE) += pci-xgene.o >> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o >> obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o >> obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o >> diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c >> new file mode 100644 >> index 0000000..4f0ff42 >> --- /dev/null >> +++ b/drivers/pci/host/pci-xgene-msi.c >> @@ -0,0 +1,407 @@ >> +/* >> + * APM X-Gene MSI Driver >> + * >> + * Copyright (c) 2014, Applied Micro Circuits Corporation >> + * Author: Tanmay Inamdar <tinamdar@apm.com> >> + * Duc Dang <dhdang@apm.com> >> + * >> + * This program is free software; you can redistribute it and/or modify it >> + * under the terms of the GNU General Public License as published by the >> + * Free Software Foundation; either version 2 of the License, or (at your >> + * option) any later version. >> + * >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + */ >> +#include <linux/interrupt.h> >> +#include <linux/module.h> >> +#include <linux/msi.h> >> +#include <linux/of_irq.h> >> +#include <linux/pci.h> >> +#include <linux/platform_device.h> >> +#include <linux/of_pci.h> >> + >> +#define MSI_INDEX0 0x000000 >> +#define MSI_INT0 0x800000 >> + >> +struct xgene_msi_settings { >> + u32 index_per_group; >> + u32 irqs_per_index; >> + u32 nr_msi_vec; >> + u32 nr_hw_irqs; >> +}; >> + >> +struct xgene_msi { >> + struct device_node *node; >> + struct msi_controller mchip; >> + struct irq_domain *domain; >> + struct xgene_msi_settings *settings; >> + u32 msi_addr_lo; >> + u32 msi_addr_hi; > > I'd rather see the mailbox address directly, and only do the split when > assigning it to the message (you seem to play all kind of tricks on the > address anyway). msi_addr_lo and msi_addr_hi store the physical base address of MSI controller registers. I will add comment to clarify this. > >> + void __iomem *msi_regs; >> + unsigned long *bitmap; >> + struct mutex bitmap_lock; >> + int *msi_virqs; >> +}; >> + >> +struct xgene_msi_settings storm_msi_settings = { >> + .index_per_group = 8, >> + .irqs_per_index = 21, >> + .nr_msi_vec = 2688, >> + .nr_hw_irqs = 16, >> +}; > > It would really help understanding how index, group and hw irq lines are > structured. nr_msi_vec is obviously the product of these three numbers, > so maybe you can loose it (we have computers, remember... ;-). > > Do you expect more than this single "storm" implementation? If so, maybe > they should be described in the DT. If not, why do we need a separate > structure with an indirection if we know these are constants? > Yes, I will add description about index, group, and hw irq lines structure and also replace the fields in this storm_msi_settings structure with constant dedinitions. >> + >> +typedef int (*xgene_msi_initcall_t)(struct xgene_msi *); >> + >> +static int xgene_msi_init_storm_settings(struct xgene_msi *xgene_msi) >> +{ >> + xgene_msi->settings = &storm_msi_settings; >> + return 0; >> +} >> + >> +static struct irq_chip xgene_msi_top_irq_chip = { >> + .name = "X-Gene1 MSI", >> + .irq_enable = pci_msi_unmask_irq, >> + .irq_disable = pci_msi_mask_irq, >> + .irq_mask = pci_msi_mask_irq, >> + .irq_unmask = pci_msi_unmask_irq, >> +}; >> + >> +static struct msi_domain_info xgene_msi_domain_info = { >> + .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | >> + MSI_FLAG_PCI_MSIX), >> + .chip = &xgene_msi_top_irq_chip, >> +}; >> + >> +static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) >> +{ >> + struct xgene_msi *msi = irq_data_get_irq_chip_data(data); >> + u32 nr_hw_irqs = msi->settings->nr_hw_irqs; >> + u32 irqs_per_index = msi->settings->irqs_per_index; >> + u32 reg_set = data->hwirq / (nr_hw_irqs * irqs_per_index); >> + u32 group = data->hwirq % nr_hw_irqs; >> + >> + msg->address_hi = msi->msi_addr_hi; >> + msg->address_lo = msi->msi_addr_lo + >> + (((8 * group) + reg_set) << 16); >> + msg->data = (data->hwirq / nr_hw_irqs) % irqs_per_index; > > ... (sound of head exploding...). I hate it when I have to reverse > engineer the hardware by looking at the code... > > Please give us some clue on how interrupts are spread across the various > pages, how the various indexes are combined to give an actual address. I will add description about this. > >> +} >> + >> +static int xgene_msi_set_affinity(struct irq_data *irq_data, >> + const struct cpumask *mask, bool force) >> +{ >> + struct xgene_msi *msi = irq_data_get_irq_chip_data(irq_data); >> + unsigned int gic_irq; >> + int ret; >> + >> + gic_irq = msi->msi_virqs[irq_data->hwirq % msi->settings->nr_hw_irqs]; >> + ret = irq_set_affinity(gic_irq, mask); > > Erm... This as the effect of moving *all* the MSIs hanging off this > interrupt to another CPU. I'm not sure that's an acceptable effect... > What if another MSI requires a different affinity? We have 16 'real' hardware IRQs. Each of these has multiple MSIs attached to it. So this will move all MSIs handing off this interrupt to another CPU; and we don't support different affinity settings for different MSIs that are attached to the same hardware IRQ. > >> + if (ret == IRQ_SET_MASK_OK) >> + ret = IRQ_SET_MASK_OK_DONE; >> + >> + return ret; >> +} >> + >> +static struct irq_chip xgene_msi_bottom_irq_chip = { >> + .name = "MSI", >> + .irq_set_affinity = xgene_msi_set_affinity, >> + .irq_compose_msi_msg = xgene_compose_msi_msg, >> +}; >> + >> +static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, >> + unsigned int nr_irqs, void *args) >> +{ >> + >> + struct xgene_msi *msi = domain->host_data; >> + u32 msi_irq_count = msi->settings->nr_msi_vec; >> + int msi_irq; >> + >> + mutex_lock(&msi->bitmap_lock); >> + >> + msi_irq = find_first_zero_bit(msi->bitmap, msi_irq_count); >> + if (msi_irq < msi_irq_count) >> + set_bit(msi_irq, msi->bitmap); >> + else >> + msi_irq = -ENOSPC; >> + >> + mutex_unlock(&msi->bitmap_lock); >> + >> + if (msi_irq < 0) >> + return msi_irq; >> + >> + irq_domain_set_info(domain, virq, msi_irq, &xgene_msi_bottom_irq_chip, >> + domain->host_data, handle_simple_irq, NULL, NULL); >> + set_irq_flags(virq, IRQF_VALID); >> + >> + return 0; >> +} >> + >> +static void xgene_irq_domain_free(struct irq_domain *domain, >> + unsigned int virq, unsigned int nr_irqs) >> +{ >> + struct irq_data *d = irq_domain_get_irq_data(domain, virq); >> + struct xgene_msi *msi = irq_data_get_irq_chip_data(d); >> + >> + mutex_lock(&msi->bitmap_lock); >> + >> + if (!test_bit(d->hwirq, msi->bitmap)) >> + pr_err("trying to free unused MSI#%lu\n", d->hwirq); >> + else >> + clear_bit(d->hwirq, msi->bitmap); >> + >> + mutex_unlock(&msi->bitmap_lock); >> + >> + irq_domain_free_irqs_parent(domain, virq, nr_irqs); >> +} >> + >> +static const struct irq_domain_ops msi_domain_ops = { >> + .alloc = xgene_irq_domain_alloc, >> + .free = xgene_irq_domain_free, >> +}; >> + >> +static int xgene_allocate_domains(struct xgene_msi *msi) >> +{ >> + msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec, >> + &msi_domain_ops, msi); >> + >> + if (!msi->domain) { >> + pr_err("failed to create parent MSI domain\n"); >> + return -ENOMEM; >> + } >> + >> + msi->mchip.of_node = msi->node; >> + msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node, >> + &xgene_msi_domain_info, >> + msi->domain); >> + >> + if (!msi->mchip.domain) { >> + pr_err("failed to create MSI domain\n"); >> + irq_domain_remove(msi->domain); >> + return -ENOMEM; >> + } >> + >> + return 0; >> +} >> + >> +static void xgene_free_domains(struct xgene_msi *msi) >> +{ >> + if (msi->mchip.domain) >> + irq_domain_remove(msi->mchip.domain); >> + if (msi->domain) >> + irq_domain_remove(msi->domain); >> +} >> + >> +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi) >> +{ >> + u32 msi_irq_count = xgene_msi->settings->nr_msi_vec; >> + u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs; >> + int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long); >> + >> + xgene_msi->bitmap = kzalloc(size, GFP_KERNEL); >> + if (!xgene_msi->bitmap) >> + return -ENOMEM; >> + >> + mutex_init(&xgene_msi->bitmap_lock); >> + >> + xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL); >> + if (!xgene_msi->msi_virqs) >> + return -ENOMEM; >> + >> + return 0; >> +} >> + >> +static irqreturn_t xgene_msi_isr(int irq, void *data) > > This is not a "normal" interrupt handler. This is really a chained > interrupt controller. Please use the proper infrastructure for this > (irq_set_chained_handler, chained_irq_enter, chained_irq_exit). > Otherwise, your usage of irq_find_mapping is illegal wrt RCU. > Yes, I will change this function to protect it with chained_irq_enter, chained_irq_exit >> +{ >> + struct xgene_msi *xgene_msi = (struct xgene_msi *) data; >> + unsigned int virq; >> + int msir_index, msir_reg, msir_val, hw_irq; >> + u32 intr_index, grp_select, msi_grp, processed = 0; >> + u32 nr_hw_irqs, irqs_per_index, index_per_group; >> + >> + msi_grp = irq - xgene_msi->msi_virqs[0]; >> + if (msi_grp >= xgene_msi->settings->nr_hw_irqs) { >> + pr_err("invalid msi received\n"); >> + return IRQ_NONE; >> + } >> + >> + nr_hw_irqs = xgene_msi->settings->nr_hw_irqs; >> + irqs_per_index = xgene_msi->settings->irqs_per_index; >> + index_per_group = xgene_msi->settings->index_per_group; >> + >> + grp_select = readl(xgene_msi->msi_regs + MSI_INT0 + (msi_grp << 16)); >> + while (grp_select) { >> + msir_index = ffs(grp_select) - 1; >> + msir_reg = (msi_grp << 19) + (msir_index << 16); >> + msir_val = readl(xgene_msi->msi_regs + MSI_INDEX0 + msir_reg); >> + while (msir_val) { >> + intr_index = ffs(msir_val) - 1; >> + hw_irq = (((msir_index * irqs_per_index) + intr_index) * >> + nr_hw_irqs) + msi_grp; > > Same thing here. This requires some explaination on how the HW is organised. I will have description for this. > >> + virq = irq_find_mapping(xgene_msi->domain, hw_irq); >> + if (virq != 0) >> + generic_handle_irq(virq); >> + msir_val &= ~(1 << intr_index); >> + processed++; >> + } >> + grp_select &= ~(1 << msir_index); >> + } >> + >> + return processed > 0 ? IRQ_HANDLED : IRQ_NONE; >> +} >> + >> +static int xgene_msi_remove(struct platform_device *pdev) >> +{ >> + int virq, i; >> + struct xgene_msi *msi = platform_get_drvdata(pdev); >> + u32 nr_hw_irqs = msi->settings->nr_hw_irqs; >> + >> + for (i = 0; i < nr_hw_irqs; i++) { >> + virq = msi->msi_virqs[i]; >> + if (virq != 0) >> + free_irq(virq, msi); >> + } >> + >> + kfree(msi->bitmap); >> + msi->bitmap = NULL; >> + >> + xgene_free_domains(msi); >> + >> + return 0; >> +} >> + >> +static int xgene_msi_setup_hwirq(struct xgene_msi *msi, >> + struct platform_device *pdev, >> + int irq_index) >> +{ >> + int virt_msir; >> + cpumask_var_t mask; >> + int err; >> + >> + virt_msir = platform_get_irq(pdev, irq_index); >> + if (virt_msir < 0) { >> + dev_err(&pdev->dev, "Cannot translate IRQ index %d\n", >> + irq_index); >> + return -EINVAL; >> + } >> + >> + err = request_irq(virt_msir, xgene_msi_isr, 0, >> + xgene_msi_top_irq_chip.name, msi); > > This is where you should use irq_set_chained_handler. I will replace request_irq with irq_set_chained_handler and irq_set_handler_data > >> + if (err) { >> + dev_err(&pdev->dev, "request irq failed\n"); >> + return err; >> + } >> + >> + if (alloc_cpumask_var(&mask, GFP_KERNEL)) { >> + cpumask_setall(mask); >> + irq_set_affinity(virt_msir, mask); >> + free_cpumask_var(mask); >> + } >> + >> + msi->msi_virqs[irq_index] = virt_msir; >> + >> + return 0; >> +} >> + >> +static const struct of_device_id xgene_msi_match_table[] = { >> + {.compatible = "apm,xgene1-msi", >> + .data = xgene_msi_init_storm_settings}, >> + {}, >> +}; >> + >> +static int xgene_msi_probe(struct platform_device *pdev) >> +{ >> + struct resource *res; >> + int rc, irq_index; >> + struct device_node *np; >> + const struct of_device_id *matched_np; >> + struct xgene_msi *xgene_msi; >> + xgene_msi_initcall_t init_fn; >> + u32 nr_hw_irqs, nr_msi_vecs; >> + >> + np = of_find_matching_node_and_match(NULL, >> + xgene_msi_match_table, &matched_np); >> + if (!np) >> + return -ENODEV; >> + >> + xgene_msi = kzalloc(sizeof(struct xgene_msi), GFP_KERNEL); >> + if (!xgene_msi) { >> + dev_err(&pdev->dev, "failed to allocate X-Gene MSI data\n"); >> + return -ENOMEM; >> + } >> + >> + xgene_msi->node = np; >> + >> + init_fn = (xgene_msi_initcall_t) matched_np->data; >> + rc = init_fn(xgene_msi); >> + if (rc) >> + return rc; >> + >> + platform_set_drvdata(pdev, xgene_msi); >> + >> + nr_msi_vecs = xgene_msi->settings->nr_msi_vec; >> + >> + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); >> + xgene_msi->msi_regs = devm_ioremap_resource(&pdev->dev, res); >> + if (IS_ERR(xgene_msi->msi_regs)) { >> + dev_err(&pdev->dev, "no reg space\n"); >> + rc = -EINVAL; >> + goto error; >> + } >> + >> + xgene_msi->msi_addr_hi = upper_32_bits(res->start); >> + xgene_msi->msi_addr_lo = lower_32_bits(res->start); >> + >> + rc = xgene_msi_init_allocator(xgene_msi); >> + if (rc) { >> + dev_err(&pdev->dev, "Error allocating MSI bitmap\n"); >> + goto error; >> + } >> + >> + rc = xgene_allocate_domains(xgene_msi); >> + if (rc) { >> + dev_err(&pdev->dev, "Failed to allocate MSI domain\n"); >> + goto error; >> + } >> + >> + nr_hw_irqs = xgene_msi->settings->nr_hw_irqs; >> + for (irq_index = 0; irq_index < nr_hw_irqs; irq_index++) { >> + rc = xgene_msi_setup_hwirq(xgene_msi, pdev, irq_index); >> + if (rc) >> + goto error; >> + } >> + >> + rc = of_pci_msi_chip_add(&xgene_msi->mchip); >> + if (rc) { >> + dev_err(&pdev->dev, "failed to add MSI controller chip\n"); >> + goto error; >> + } >> + >> + dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n"); >> + >> + return 0; >> +error: >> + xgene_msi_remove(pdev); >> + return rc; >> +} >> + >> +static struct platform_driver xgene_msi_driver = { >> + .driver = { >> + .name = "xgene-msi", >> + .owner = THIS_MODULE, >> + .of_match_table = xgene_msi_match_table, >> + }, >> + .probe = xgene_msi_probe, >> + .remove = xgene_msi_remove, >> +}; >> + >> +static int __init xgene_pcie_msi_init(void) >> +{ >> + return platform_driver_register(&xgene_msi_driver); >> +} >> +subsys_initcall(xgene_pcie_msi_init); >> + >> +MODULE_AUTHOR("Duc Dang <dhdang@apm.com>"); >> +MODULE_DESCRIPTION("APM X-Gene PCIe MSI driver"); >> +MODULE_LICENSE("GPL v2"); >> diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c >> index 22751ed..3e6faa1 100644 >> --- a/drivers/pci/host/pci-xgene.c >> +++ b/drivers/pci/host/pci-xgene.c >> @@ -468,6 +468,23 @@ static int xgene_pcie_setup(struct xgene_pcie_port *port, >> return 0; >> } >> >> +static int xgene_pcie_msi_enable(struct pci_bus *bus) >> +{ >> + struct device_node *msi_node; >> + >> + msi_node = of_parse_phandle(bus->dev.of_node, >> + "msi-parent", 0); >> + if (!msi_node) >> + return -ENODEV; >> + >> + bus->msi = of_pci_find_msi_chip_by_node(msi_node); >> + if (bus->msi) >> + bus->msi->dev = &bus->dev; >> + else >> + return -ENODEV; >> + return 0; >> +} >> + >> static int xgene_pcie_probe_bridge(struct platform_device *pdev) >> { >> struct device_node *dn = pdev->dev.of_node; >> @@ -504,6 +521,10 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev) >> if (!bus) >> return -ENOMEM; >> >> + if (IS_ENABLED(CONFIG_PCI_MSI)) >> + if (xgene_pcie_msi_enable(bus)) >> + dev_info(port->dev, "failed to enable MSI\n"); >> + >> pci_scan_child_bus(bus); >> pci_assign_unassigned_bus_resources(bus); >> pci_bus_add_devices(bus); >> -- >> 1.9.1 >> > > Thanks, > > M. > -- > Jazz is not dead. It just smells funny... Regards, Duc Dang.
On Fri, Apr 10, 2015 at 11:13 AM, Paul Bolle <pebolle@tiscali.nl> wrote: > Just a nit about a license mismatch. > > On Thu, 2015-04-09 at 10:05 -0700, Duc Dang wrote: >> --- a/drivers/pci/host/Kconfig >> +++ b/drivers/pci/host/Kconfig > >> +config PCI_XGENE_MSI >> + bool "X-Gene v1 PCIe MSI feature" >> + depends on PCI_XGENE && PCI_MSI > >> --- a/drivers/pci/host/Makefile >> +++ b/drivers/pci/host/Makefile > >> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o > >> --- /dev/null >> +++ b/drivers/pci/host/pci-xgene-msi.c > >> + * This program is free software; you can redistribute it and/or modify it >> + * under the terms of the GNU General Public License as published by the >> + * Free Software Foundation; either version 2 of the License, or (at your >> + * option) any later version. >> + * >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. > > This states the license of this file is GPL v2 or later. > >> +MODULE_LICENSE("GPL v2"); > > And, according to include/linux/module.h, this states the license of > this driver is GPL v2. So either the comment at the top of this file or > the license ident used in this macro needs to change. > Thanks, I will fix this. > (Now, as far as I can tell, this driver is built-in only. So I think > this macro - and all other module specific code in this file - is > actually unneeded. But I already know Bjorn's position here, so I won't > bother you about that.) > > Thanks, > > > Paul Bolle > Regards, Duc Dang.
Hi Marc: Is there any plans to support ACPI for GICv2m MSI? Both this driver and the GICv2m seems to support OF model of discovery for msi controller. X-Gene1 uses this driver and X-Gene2 uses GICv2m, there needs to be a way to associate msi controller with the PCIe bus. I haven't found a standard way of doing finding "msi-parent" for ACPI. Do you have any suggestion. Sorry for top posting. On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: > On 09/04/15 18:05, Duc Dang wrote: >> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into >> 16 HW IRQ lines. >> >> Signed-off-by: Duc Dang <dhdang@apm.com> >> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> >> --- >> drivers/pci/host/Kconfig | 6 + >> drivers/pci/host/Makefile | 1 + >> drivers/pci/host/pci-xgene-msi.c | 407 +++++++++++++++++++++++++++++++++++++++ >> drivers/pci/host/pci-xgene.c | 21 ++ >> 4 files changed, 435 insertions(+) >> create mode 100644 drivers/pci/host/pci-xgene-msi.c >> >> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig >> index 7b892a9..c9b61fa 100644 >> --- a/drivers/pci/host/Kconfig >> +++ b/drivers/pci/host/Kconfig >> @@ -89,11 +89,17 @@ config PCI_XGENE >> depends on ARCH_XGENE >> depends on OF >> select PCIEPORTBUS >> + select PCI_MSI_IRQ_DOMAIN if PCI_MSI >> + select PCI_XGENE_MSI if PCI_MSI >> help >> Say Y here if you want internal PCI support on APM X-Gene SoC. >> There are 5 internal PCIe ports available. Each port is GEN3 capable >> and have varied lanes from x1 to x8. >> >> +config PCI_XGENE_MSI >> + bool "X-Gene v1 PCIe MSI feature" >> + depends on PCI_XGENE && PCI_MSI >> + >> config PCI_LAYERSCAPE >> bool "Freescale Layerscape PCIe controller" >> depends on OF && ARM >> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile >> index e61d91c..f39bde3 100644 >> --- a/drivers/pci/host/Makefile >> +++ b/drivers/pci/host/Makefile >> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o >> obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o >> obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o >> obj-$(CONFIG_PCI_XGENE) += pci-xgene.o >> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o >> obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o >> obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o >> diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c >> new file mode 100644 >> index 0000000..4f0ff42 >> --- /dev/null >> +++ b/drivers/pci/host/pci-xgene-msi.c >> @@ -0,0 +1,407 @@ >> +/* >> + * APM X-Gene MSI Driver >> + * >> + * Copyright (c) 2014, Applied Micro Circuits Corporation >> + * Author: Tanmay Inamdar <tinamdar@apm.com> >> + * Duc Dang <dhdang@apm.com> >> + * >> + * This program is free software; you can redistribute it and/or modify it >> + * under the terms of the GNU General Public License as published by the >> + * Free Software Foundation; either version 2 of the License, or (at your >> + * option) any later version. >> + * >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + */ >> +#include <linux/interrupt.h> >> +#include <linux/module.h> >> +#include <linux/msi.h> >> +#include <linux/of_irq.h> >> +#include <linux/pci.h> >> +#include <linux/platform_device.h> >> +#include <linux/of_pci.h> >> + >> +#define MSI_INDEX0 0x000000 >> +#define MSI_INT0 0x800000 >> + >> +struct xgene_msi_settings { >> + u32 index_per_group; >> + u32 irqs_per_index; >> + u32 nr_msi_vec; >> + u32 nr_hw_irqs; >> +}; >> + >> +struct xgene_msi { >> + struct device_node *node; >> + struct msi_controller mchip; >> + struct irq_domain *domain; >> + struct xgene_msi_settings *settings; >> + u32 msi_addr_lo; >> + u32 msi_addr_hi; > > I'd rather see the mailbox address directly, and only do the split when > assigning it to the message (you seem to play all kind of tricks on the > address anyway). > >> + void __iomem *msi_regs; >> + unsigned long *bitmap; >> + struct mutex bitmap_lock; >> + int *msi_virqs; >> +}; >> + >> +struct xgene_msi_settings storm_msi_settings = { >> + .index_per_group = 8, >> + .irqs_per_index = 21, >> + .nr_msi_vec = 2688, >> + .nr_hw_irqs = 16, >> +}; > > It would really help understanding how index, group and hw irq lines are > structured. nr_msi_vec is obviously the product of these three numbers, > so maybe you can loose it (we have computers, remember... ;-). > > Do you expect more than this single "storm" implementation? If so, maybe > they should be described in the DT. If not, why do we need a separate > structure with an indirection if we know these are constants? > >> + >> +typedef int (*xgene_msi_initcall_t)(struct xgene_msi *); >> + >> +static int xgene_msi_init_storm_settings(struct xgene_msi *xgene_msi) >> +{ >> + xgene_msi->settings = &storm_msi_settings; >> + return 0; >> +} >> + >> +static struct irq_chip xgene_msi_top_irq_chip = { >> + .name = "X-Gene1 MSI", >> + .irq_enable = pci_msi_unmask_irq, >> + .irq_disable = pci_msi_mask_irq, >> + .irq_mask = pci_msi_mask_irq, >> + .irq_unmask = pci_msi_unmask_irq, >> +}; >> + >> +static struct msi_domain_info xgene_msi_domain_info = { >> + .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | >> + MSI_FLAG_PCI_MSIX), >> + .chip = &xgene_msi_top_irq_chip, >> +}; >> + >> +static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) >> +{ >> + struct xgene_msi *msi = irq_data_get_irq_chip_data(data); >> + u32 nr_hw_irqs = msi->settings->nr_hw_irqs; >> + u32 irqs_per_index = msi->settings->irqs_per_index; >> + u32 reg_set = data->hwirq / (nr_hw_irqs * irqs_per_index); >> + u32 group = data->hwirq % nr_hw_irqs; >> + >> + msg->address_hi = msi->msi_addr_hi; >> + msg->address_lo = msi->msi_addr_lo + >> + (((8 * group) + reg_set) << 16); >> + msg->data = (data->hwirq / nr_hw_irqs) % irqs_per_index; > > ... (sound of head exploding...). I hate it when I have to reverse > engineer the hardware by looking at the code... > > Please give us some clue on how interrupts are spread across the various > pages, how the various indexes are combined to give an actual address. > >> +} >> + >> +static int xgene_msi_set_affinity(struct irq_data *irq_data, >> + const struct cpumask *mask, bool force) >> +{ >> + struct xgene_msi *msi = irq_data_get_irq_chip_data(irq_data); >> + unsigned int gic_irq; >> + int ret; >> + >> + gic_irq = msi->msi_virqs[irq_data->hwirq % msi->settings->nr_hw_irqs]; >> + ret = irq_set_affinity(gic_irq, mask); > > Erm... This as the effect of moving *all* the MSIs hanging off this > interrupt to another CPU. I'm not sure that's an acceptable effect... > What if another MSI requires a different affinity? > >> + if (ret == IRQ_SET_MASK_OK) >> + ret = IRQ_SET_MASK_OK_DONE; >> + >> + return ret; >> +} >> + >> +static struct irq_chip xgene_msi_bottom_irq_chip = { >> + .name = "MSI", >> + .irq_set_affinity = xgene_msi_set_affinity, >> + .irq_compose_msi_msg = xgene_compose_msi_msg, >> +}; >> + >> +static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, >> + unsigned int nr_irqs, void *args) >> +{ >> + >> + struct xgene_msi *msi = domain->host_data; >> + u32 msi_irq_count = msi->settings->nr_msi_vec; >> + int msi_irq; >> + >> + mutex_lock(&msi->bitmap_lock); >> + >> + msi_irq = find_first_zero_bit(msi->bitmap, msi_irq_count); >> + if (msi_irq < msi_irq_count) >> + set_bit(msi_irq, msi->bitmap); >> + else >> + msi_irq = -ENOSPC; >> + >> + mutex_unlock(&msi->bitmap_lock); >> + >> + if (msi_irq < 0) >> + return msi_irq; >> + >> + irq_domain_set_info(domain, virq, msi_irq, &xgene_msi_bottom_irq_chip, >> + domain->host_data, handle_simple_irq, NULL, NULL); >> + set_irq_flags(virq, IRQF_VALID); >> + >> + return 0; >> +} >> + >> +static void xgene_irq_domain_free(struct irq_domain *domain, >> + unsigned int virq, unsigned int nr_irqs) >> +{ >> + struct irq_data *d = irq_domain_get_irq_data(domain, virq); >> + struct xgene_msi *msi = irq_data_get_irq_chip_data(d); >> + >> + mutex_lock(&msi->bitmap_lock); >> + >> + if (!test_bit(d->hwirq, msi->bitmap)) >> + pr_err("trying to free unused MSI#%lu\n", d->hwirq); >> + else >> + clear_bit(d->hwirq, msi->bitmap); >> + >> + mutex_unlock(&msi->bitmap_lock); >> + >> + irq_domain_free_irqs_parent(domain, virq, nr_irqs); >> +} >> + >> +static const struct irq_domain_ops msi_domain_ops = { >> + .alloc = xgene_irq_domain_alloc, >> + .free = xgene_irq_domain_free, >> +}; >> + >> +static int xgene_allocate_domains(struct xgene_msi *msi) >> +{ >> + msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec, >> + &msi_domain_ops, msi); >> + >> + if (!msi->domain) { >> + pr_err("failed to create parent MSI domain\n"); >> + return -ENOMEM; >> + } >> + >> + msi->mchip.of_node = msi->node; >> + msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node, >> + &xgene_msi_domain_info, >> + msi->domain); >> + >> + if (!msi->mchip.domain) { >> + pr_err("failed to create MSI domain\n"); >> + irq_domain_remove(msi->domain); >> + return -ENOMEM; >> + } >> + >> + return 0; >> +} >> + >> +static void xgene_free_domains(struct xgene_msi *msi) >> +{ >> + if (msi->mchip.domain) >> + irq_domain_remove(msi->mchip.domain); >> + if (msi->domain) >> + irq_domain_remove(msi->domain); >> +} >> + >> +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi) >> +{ >> + u32 msi_irq_count = xgene_msi->settings->nr_msi_vec; >> + u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs; >> + int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long); >> + >> + xgene_msi->bitmap = kzalloc(size, GFP_KERNEL); >> + if (!xgene_msi->bitmap) >> + return -ENOMEM; >> + >> + mutex_init(&xgene_msi->bitmap_lock); >> + >> + xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL); >> + if (!xgene_msi->msi_virqs) >> + return -ENOMEM; >> + >> + return 0; >> +} >> + >> +static irqreturn_t xgene_msi_isr(int irq, void *data) > > This is not a "normal" interrupt handler. This is really a chained > interrupt controller. Please use the proper infrastructure for this > (irq_set_chained_handler, chained_irq_enter, chained_irq_exit). > Otherwise, your usage of irq_find_mapping is illegal wrt RCU. > >> +{ >> + struct xgene_msi *xgene_msi = (struct xgene_msi *) data; >> + unsigned int virq; >> + int msir_index, msir_reg, msir_val, hw_irq; >> + u32 intr_index, grp_select, msi_grp, processed = 0; >> + u32 nr_hw_irqs, irqs_per_index, index_per_group; >> + >> + msi_grp = irq - xgene_msi->msi_virqs[0]; >> + if (msi_grp >= xgene_msi->settings->nr_hw_irqs) { >> + pr_err("invalid msi received\n"); >> + return IRQ_NONE; >> + } >> + >> + nr_hw_irqs = xgene_msi->settings->nr_hw_irqs; >> + irqs_per_index = xgene_msi->settings->irqs_per_index; >> + index_per_group = xgene_msi->settings->index_per_group; >> + >> + grp_select = readl(xgene_msi->msi_regs + MSI_INT0 + (msi_grp << 16)); >> + while (grp_select) { >> + msir_index = ffs(grp_select) - 1; >> + msir_reg = (msi_grp << 19) + (msir_index << 16); >> + msir_val = readl(xgene_msi->msi_regs + MSI_INDEX0 + msir_reg); >> + while (msir_val) { >> + intr_index = ffs(msir_val) - 1; >> + hw_irq = (((msir_index * irqs_per_index) + intr_index) * >> + nr_hw_irqs) + msi_grp; > > Same thing here. This requires some explaination on how the HW is organised. > >> + virq = irq_find_mapping(xgene_msi->domain, hw_irq); >> + if (virq != 0) >> + generic_handle_irq(virq); >> + msir_val &= ~(1 << intr_index); >> + processed++; >> + } >> + grp_select &= ~(1 << msir_index); >> + } >> + >> + return processed > 0 ? IRQ_HANDLED : IRQ_NONE; >> +} >> + >> +static int xgene_msi_remove(struct platform_device *pdev) >> +{ >> + int virq, i; >> + struct xgene_msi *msi = platform_get_drvdata(pdev); >> + u32 nr_hw_irqs = msi->settings->nr_hw_irqs; >> + >> + for (i = 0; i < nr_hw_irqs; i++) { >> + virq = msi->msi_virqs[i]; >> + if (virq != 0) >> + free_irq(virq, msi); >> + } >> + >> + kfree(msi->bitmap); >> + msi->bitmap = NULL; >> + >> + xgene_free_domains(msi); >> + >> + return 0; >> +} >> + >> +static int xgene_msi_setup_hwirq(struct xgene_msi *msi, >> + struct platform_device *pdev, >> + int irq_index) >> +{ >> + int virt_msir; >> + cpumask_var_t mask; >> + int err; >> + >> + virt_msir = platform_get_irq(pdev, irq_index); >> + if (virt_msir < 0) { >> + dev_err(&pdev->dev, "Cannot translate IRQ index %d\n", >> + irq_index); >> + return -EINVAL; >> + } >> + >> + err = request_irq(virt_msir, xgene_msi_isr, 0, >> + xgene_msi_top_irq_chip.name, msi); > > This is where you should use irq_set_chained_handler. > >> + if (err) { >> + dev_err(&pdev->dev, "request irq failed\n"); >> + return err; >> + } >> + >> + if (alloc_cpumask_var(&mask, GFP_KERNEL)) { >> + cpumask_setall(mask); >> + irq_set_affinity(virt_msir, mask); >> + free_cpumask_var(mask); >> + } >> + >> + msi->msi_virqs[irq_index] = virt_msir; >> + >> + return 0; >> +} >> + >> +static const struct of_device_id xgene_msi_match_table[] = { >> + {.compatible = "apm,xgene1-msi", >> + .data = xgene_msi_init_storm_settings}, >> + {}, >> +}; >> + >> +static int xgene_msi_probe(struct platform_device *pdev) >> +{ >> + struct resource *res; >> + int rc, irq_index; >> + struct device_node *np; >> + const struct of_device_id *matched_np; >> + struct xgene_msi *xgene_msi; >> + xgene_msi_initcall_t init_fn; >> + u32 nr_hw_irqs, nr_msi_vecs; >> + >> + np = of_find_matching_node_and_match(NULL, >> + xgene_msi_match_table, &matched_np); >> + if (!np) >> + return -ENODEV; >> + >> + xgene_msi = kzalloc(sizeof(struct xgene_msi), GFP_KERNEL); >> + if (!xgene_msi) { >> + dev_err(&pdev->dev, "failed to allocate X-Gene MSI data\n"); >> + return -ENOMEM; >> + } >> + >> + xgene_msi->node = np; >> + >> + init_fn = (xgene_msi_initcall_t) matched_np->data; >> + rc = init_fn(xgene_msi); >> + if (rc) >> + return rc; >> + >> + platform_set_drvdata(pdev, xgene_msi); >> + >> + nr_msi_vecs = xgene_msi->settings->nr_msi_vec; >> + >> + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); >> + xgene_msi->msi_regs = devm_ioremap_resource(&pdev->dev, res); >> + if (IS_ERR(xgene_msi->msi_regs)) { >> + dev_err(&pdev->dev, "no reg space\n"); >> + rc = -EINVAL; >> + goto error; >> + } >> + >> + xgene_msi->msi_addr_hi = upper_32_bits(res->start); >> + xgene_msi->msi_addr_lo = lower_32_bits(res->start); >> + >> + rc = xgene_msi_init_allocator(xgene_msi); >> + if (rc) { >> + dev_err(&pdev->dev, "Error allocating MSI bitmap\n"); >> + goto error; >> + } >> + >> + rc = xgene_allocate_domains(xgene_msi); >> + if (rc) { >> + dev_err(&pdev->dev, "Failed to allocate MSI domain\n"); >> + goto error; >> + } >> + >> + nr_hw_irqs = xgene_msi->settings->nr_hw_irqs; >> + for (irq_index = 0; irq_index < nr_hw_irqs; irq_index++) { >> + rc = xgene_msi_setup_hwirq(xgene_msi, pdev, irq_index); >> + if (rc) >> + goto error; >> + } >> + >> + rc = of_pci_msi_chip_add(&xgene_msi->mchip); >> + if (rc) { >> + dev_err(&pdev->dev, "failed to add MSI controller chip\n"); >> + goto error; >> + } >> + >> + dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n"); >> + >> + return 0; >> +error: >> + xgene_msi_remove(pdev); >> + return rc; >> +} >> + >> +static struct platform_driver xgene_msi_driver = { >> + .driver = { >> + .name = "xgene-msi", >> + .owner = THIS_MODULE, >> + .of_match_table = xgene_msi_match_table, >> + }, >> + .probe = xgene_msi_probe, >> + .remove = xgene_msi_remove, >> +}; >> + >> +static int __init xgene_pcie_msi_init(void) >> +{ >> + return platform_driver_register(&xgene_msi_driver); >> +} >> +subsys_initcall(xgene_pcie_msi_init); >> + >> +MODULE_AUTHOR("Duc Dang <dhdang@apm.com>"); >> +MODULE_DESCRIPTION("APM X-Gene PCIe MSI driver"); >> +MODULE_LICENSE("GPL v2"); >> diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c >> index 22751ed..3e6faa1 100644 >> --- a/drivers/pci/host/pci-xgene.c >> +++ b/drivers/pci/host/pci-xgene.c >> @@ -468,6 +468,23 @@ static int xgene_pcie_setup(struct xgene_pcie_port *port, >> return 0; >> } >> >> +static int xgene_pcie_msi_enable(struct pci_bus *bus) >> +{ >> + struct device_node *msi_node; >> + >> + msi_node = of_parse_phandle(bus->dev.of_node, >> + "msi-parent", 0); >> + if (!msi_node) >> + return -ENODEV; >> + >> + bus->msi = of_pci_find_msi_chip_by_node(msi_node); >> + if (bus->msi) >> + bus->msi->dev = &bus->dev; >> + else >> + return -ENODEV; >> + return 0; >> +} >> + >> static int xgene_pcie_probe_bridge(struct platform_device *pdev) >> { >> struct device_node *dn = pdev->dev.of_node; >> @@ -504,6 +521,10 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev) >> if (!bus) >> return -ENOMEM; >> >> + if (IS_ENABLED(CONFIG_PCI_MSI)) >> + if (xgene_pcie_msi_enable(bus)) >> + dev_info(port->dev, "failed to enable MSI\n"); >> + >> pci_scan_child_bus(bus); >> pci_assign_unassigned_bus_resources(bus); >> pci_bus_add_devices(bus); >> -- >> 1.9.1 >> > > Thanks, > > M. > -- > Jazz is not dead. It just smells funny... > -- > To unsubscribe from this list: send the line "unsubscribe linux-pci" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2015-04-11 00:42, Duc Dang wrote: > On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier <marc.zyngier@arm.com> > wrote: >> On 09/04/15 18:05, Duc Dang wrote: >>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into >>> 16 HW IRQ lines. >>> >>> Signed-off-by: Duc Dang <dhdang@apm.com> >>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> >>> --- >>> drivers/pci/host/Kconfig | 6 + >>> drivers/pci/host/Makefile | 1 + >>> drivers/pci/host/pci-xgene-msi.c | 407 >>> +++++++++++++++++++++++++++++++++++++++ >>> drivers/pci/host/pci-xgene.c | 21 ++ >>> 4 files changed, 435 insertions(+) >>> create mode 100644 drivers/pci/host/pci-xgene-msi.c >>> >>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig >>> index 7b892a9..c9b61fa 100644 >>> --- a/drivers/pci/host/Kconfig >>> +++ b/drivers/pci/host/Kconfig >>> @@ -89,11 +89,17 @@ config PCI_XGENE >>> depends on ARCH_XGENE >>> depends on OF >>> select PCIEPORTBUS >>> + select PCI_MSI_IRQ_DOMAIN if PCI_MSI >>> + select PCI_XGENE_MSI if PCI_MSI >>> help >>> Say Y here if you want internal PCI support on APM X-Gene >>> SoC. >>> There are 5 internal PCIe ports available. Each port is >>> GEN3 capable >>> and have varied lanes from x1 to x8. >>> >>> +config PCI_XGENE_MSI >>> + bool "X-Gene v1 PCIe MSI feature" >>> + depends on PCI_XGENE && PCI_MSI >>> + >>> config PCI_LAYERSCAPE >>> bool "Freescale Layerscape PCIe controller" >>> depends on OF && ARM >>> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile >>> index e61d91c..f39bde3 100644 >>> --- a/drivers/pci/host/Makefile >>> +++ b/drivers/pci/host/Makefile >>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o >>> obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o >>> obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o >>> obj-$(CONFIG_PCI_XGENE) += pci-xgene.o >>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o >>> obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o >>> obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o >>> diff --git a/drivers/pci/host/pci-xgene-msi.c >>> b/drivers/pci/host/pci-xgene-msi.c >>> new file mode 100644 >>> index 0000000..4f0ff42 >>> --- /dev/null >>> +++ b/drivers/pci/host/pci-xgene-msi.c >>> @@ -0,0 +1,407 @@ >>> +/* >>> + * APM X-Gene MSI Driver >>> + * >>> + * Copyright (c) 2014, Applied Micro Circuits Corporation >>> + * Author: Tanmay Inamdar <tinamdar@apm.com> >>> + * Duc Dang <dhdang@apm.com> >>> + * >>> + * This program is free software; you can redistribute it and/or >>> modify it >>> + * under the terms of the GNU General Public License as >>> published by the >>> + * Free Software Foundation; either version 2 of the License, or >>> (at your >>> + * option) any later version. >>> + * >>> + * This program is distributed in the hope that it will be useful, >>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >>> + * GNU General Public License for more details. >>> + */ >>> +#include <linux/interrupt.h> >>> +#include <linux/module.h> >>> +#include <linux/msi.h> >>> +#include <linux/of_irq.h> >>> +#include <linux/pci.h> >>> +#include <linux/platform_device.h> >>> +#include <linux/of_pci.h> >>> + >>> +#define MSI_INDEX0 0x000000 >>> +#define MSI_INT0 0x800000 >>> + >>> +struct xgene_msi_settings { >>> + u32 index_per_group; >>> + u32 irqs_per_index; >>> + u32 nr_msi_vec; >>> + u32 nr_hw_irqs; >>> +}; >>> + >>> +struct xgene_msi { >>> + struct device_node *node; >>> + struct msi_controller mchip; >>> + struct irq_domain *domain; >>> + struct xgene_msi_settings *settings; >>> + u32 msi_addr_lo; >>> + u32 msi_addr_hi; >> >> I'd rather see the mailbox address directly, and only do the split >> when >> assigning it to the message (you seem to play all kind of tricks on >> the >> address anyway). > > msi_addr_lo and msi_addr_hi store the physical base address of MSI > controller registers. I will add comment to clarify this. What I mean is that there is no point in keeping this around as a pair of 32bit variables. You'd better keep it as a single 64bit, and do the split when assigning it the the MSI message. [...] >>> +static int xgene_msi_set_affinity(struct irq_data *irq_data, >>> + const struct cpumask *mask, bool >>> force) >>> +{ >>> + struct xgene_msi *msi = >>> irq_data_get_irq_chip_data(irq_data); >>> + unsigned int gic_irq; >>> + int ret; >>> + >>> + gic_irq = msi->msi_virqs[irq_data->hwirq % >>> msi->settings->nr_hw_irqs]; >>> + ret = irq_set_affinity(gic_irq, mask); >> >> Erm... This as the effect of moving *all* the MSIs hanging off this >> interrupt to another CPU. I'm not sure that's an acceptable >> effect... >> What if another MSI requires a different affinity? > > We have 16 'real' hardware IRQs. Each of these has multiple MSIs > attached to it. > So this will move all MSIs handing off this interrupt to another CPU; > and we don't support different affinity settings for different MSIs > that are attached to the same hardware IRQ. Well, that's a significant departure from the expected behaviour. In other words, I wonder how useful this is. Could you instead reconfigure the MSI itself to hit the right CPU (assuming you don't have more than 16 CPUs and if that's only for XGene-1, this will only be 8 at most)? This would reduce your number of possible MSIs, but at least the semantics of set_afinity would be preserved. Thanks, M.
Hi Feng, On 2015-04-11 01:16, Feng Kan wrote: > Is there any plans to support ACPI for GICv2m MSI? Both this driver > and the > GICv2m seems to support OF model of discovery for msi controller. > X-Gene1 > uses this driver and X-Gene2 uses GICv2m, there needs to be a way to > associate msi controller with the PCIe bus. I haven't > found a standard way of doing finding "msi-parent" for ACPI. Do you > have > any suggestion. This is remarkably off topic! ;-) ACPI only provides information for GICv2m and GICv3. No other interrupt controller, MSI widget or fairy dust provider will be supported on arm64 with ACPI. So this driver will be DT only. GICv2m, being described in the ACPI spec, is growing some support. My understanding is that this is done by AMD (Suravee Suthikulpanit) using my irq/msi-domain branch as a base (I know that Suravee is looking at expanding the irqdomain matching code to accept a pointer to an ACPI table instead of a device node to match the target domain). Hope this helps, M.
On Friday 10 April 2015 17:16:32 Feng Kan wrote: > Hi Marc: > > Is there any plans to support ACPI for GICv2m MSI? Both this driver and the > GICv2m seems to support OF model of discovery for msi controller. X-Gene1 > uses this driver and X-Gene2 uses GICv2m, there needs to be a way to > associate msi controller with the PCIe bus. I haven't > found a standard way of doing finding "msi-parent" for ACPI. Do you have > any suggestion. > > Sorry for top posting. Doing gicv2m in ACPI should be straightforward, and the ACPI code can simply assume that there is only one GIC instance (whichever version) in the system and use that for all standard PCI hosts. Arnd
On Sat, Apr 11, 2015 at 5:06 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: > On 2015-04-11 00:42, Duc Dang wrote: >> >> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier <marc.zyngier@arm.com> >> wrote: >>> >>> On 09/04/15 18:05, Duc Dang wrote: >>>> >>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into >>>> 16 HW IRQ lines. >>>> >>>> Signed-off-by: Duc Dang <dhdang@apm.com> >>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> >>>> --- >>>> drivers/pci/host/Kconfig | 6 + >>>> drivers/pci/host/Makefile | 1 + >>>> drivers/pci/host/pci-xgene-msi.c | 407 >>>> +++++++++++++++++++++++++++++++++++++++ >>>> drivers/pci/host/pci-xgene.c | 21 ++ >>>> 4 files changed, 435 insertions(+) >>>> create mode 100644 drivers/pci/host/pci-xgene-msi.c >>>> >>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig >>>> index 7b892a9..c9b61fa 100644 >>>> --- a/drivers/pci/host/Kconfig >>>> +++ b/drivers/pci/host/Kconfig >>>> @@ -89,11 +89,17 @@ config PCI_XGENE >>>> depends on ARCH_XGENE >>>> depends on OF >>>> select PCIEPORTBUS >>>> + select PCI_MSI_IRQ_DOMAIN if PCI_MSI >>>> + select PCI_XGENE_MSI if PCI_MSI >>>> help >>>> Say Y here if you want internal PCI support on APM X-Gene SoC. >>>> There are 5 internal PCIe ports available. Each port is GEN3 >>>> capable >>>> and have varied lanes from x1 to x8. >>>> >>>> +config PCI_XGENE_MSI >>>> + bool "X-Gene v1 PCIe MSI feature" >>>> + depends on PCI_XGENE && PCI_MSI >>>> + >>>> config PCI_LAYERSCAPE >>>> bool "Freescale Layerscape PCIe controller" >>>> depends on OF && ARM >>>> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile >>>> index e61d91c..f39bde3 100644 >>>> --- a/drivers/pci/host/Makefile >>>> +++ b/drivers/pci/host/Makefile >>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o >>>> obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o >>>> obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o >>>> obj-$(CONFIG_PCI_XGENE) += pci-xgene.o >>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o >>>> obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o >>>> obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o >>>> diff --git a/drivers/pci/host/pci-xgene-msi.c >>>> b/drivers/pci/host/pci-xgene-msi.c >>>> new file mode 100644 >>>> index 0000000..4f0ff42 >>>> --- /dev/null >>>> +++ b/drivers/pci/host/pci-xgene-msi.c >>>> @@ -0,0 +1,407 @@ >>>> +/* >>>> + * APM X-Gene MSI Driver >>>> + * >>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation >>>> + * Author: Tanmay Inamdar <tinamdar@apm.com> >>>> + * Duc Dang <dhdang@apm.com> >>>> + * >>>> + * This program is free software; you can redistribute it and/or >>>> modify it >>>> + * under the terms of the GNU General Public License as published by >>>> the >>>> + * Free Software Foundation; either version 2 of the License, or (at >>>> your >>>> + * option) any later version. >>>> + * >>>> + * This program is distributed in the hope that it will be useful, >>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >>>> + * GNU General Public License for more details. >>>> + */ >>>> +#include <linux/interrupt.h> >>>> +#include <linux/module.h> >>>> +#include <linux/msi.h> >>>> +#include <linux/of_irq.h> >>>> +#include <linux/pci.h> >>>> +#include <linux/platform_device.h> >>>> +#include <linux/of_pci.h> >>>> + >>>> +#define MSI_INDEX0 0x000000 >>>> +#define MSI_INT0 0x800000 >>>> + >>>> +struct xgene_msi_settings { >>>> + u32 index_per_group; >>>> + u32 irqs_per_index; >>>> + u32 nr_msi_vec; >>>> + u32 nr_hw_irqs; >>>> +}; >>>> + >>>> +struct xgene_msi { >>>> + struct device_node *node; >>>> + struct msi_controller mchip; >>>> + struct irq_domain *domain; >>>> + struct xgene_msi_settings *settings; >>>> + u32 msi_addr_lo; >>>> + u32 msi_addr_hi; >>> >>> >>> I'd rather see the mailbox address directly, and only do the split when >>> assigning it to the message (you seem to play all kind of tricks on the >>> address anyway). >> >> >> msi_addr_lo and msi_addr_hi store the physical base address of MSI >> controller registers. I will add comment to clarify this. > > > What I mean is that there is no point in keeping this around as a pair > of 32bit variables. You'd better keep it as a single 64bit, and do the > split when assigning it the the MSI message. Hi Marc, These came from device-tree (which describes 64-bit address number as 2 32-bit words). If I store them this way, I don't need CPU cycles to do the split every time assigning them to the MSI message. Please let me know what do you think about it. > > [...] > >>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data, >>>> + const struct cpumask *mask, bool >>>> force) >>>> +{ >>>> + struct xgene_msi *msi = irq_data_get_irq_chip_data(irq_data); >>>> + unsigned int gic_irq; >>>> + int ret; >>>> + >>>> + gic_irq = msi->msi_virqs[irq_data->hwirq % >>>> msi->settings->nr_hw_irqs]; >>>> + ret = irq_set_affinity(gic_irq, mask); >>> >>> >>> Erm... This as the effect of moving *all* the MSIs hanging off this >>> interrupt to another CPU. I'm not sure that's an acceptable effect... >>> What if another MSI requires a different affinity? >> >> >> We have 16 'real' hardware IRQs. Each of these has multiple MSIs >> attached to it. >> So this will move all MSIs handing off this interrupt to another CPU; >> and we don't support different affinity settings for different MSIs >> that are attached to the same hardware IRQ. > > > Well, that's a significant departure from the expected behaviour. In other > words, I wonder how useful this is. Could you instead reconfigure the MSI > itself to hit the right CPU (assuming you don't have more than 16 CPUs and > if > that's only for XGene-1, this will only be 8 at most)? This would reduce > your number of possible MSIs, but at least the semantics of set_afinity > would be preserved. X-Gene-1 supports 2688 MSIs that are divided into 16 groups, each group has 168 MSIs that are mapped to 1 hardware GIC IRQ (so we have 16 hardware GIC IRQs for 2688 MSIs). Setting affinity of single MSI to deliver it to a target CPU will move all the MSIs mapped to the same GIC IRQ to that CPU as well. This is not a standard behavior, but limiting the total number of MSIs will cause a lot of devices to fall back to INTx (with huge performance penalty) or even fail to load their driver as these devices request more than 16 MSIs during driver initialization. I can document the limitation in affinity setting of X-Gene-1 MSI in the driver to hopefully not make people surprise and hope to keep the total number of supported MSI as 2688 so that we can support as many cards that require MSI/MSI-X as possible. Please let me know your opinion. > > Thanks, > > M. > -- > Fast, cheap, reliable. Pick two.
On Tue, 14 Apr 2015 19:20:19 +0100 Duc Dang <dhdang@apm.com> wrote: > On Sat, Apr 11, 2015 at 5:06 AM, Marc Zyngier <marc.zyngier@arm.com> > wrote: > > On 2015-04-11 00:42, Duc Dang wrote: > >> > >> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier > >> <marc.zyngier@arm.com> wrote: > >>> > >>> On 09/04/15 18:05, Duc Dang wrote: > >>>> > >>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into > >>>> 16 HW IRQ lines. > >>>> > >>>> Signed-off-by: Duc Dang <dhdang@apm.com> > >>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> > >>>> --- > >>>> drivers/pci/host/Kconfig | 6 + > >>>> drivers/pci/host/Makefile | 1 + > >>>> drivers/pci/host/pci-xgene-msi.c | 407 > >>>> +++++++++++++++++++++++++++++++++++++++ > >>>> drivers/pci/host/pci-xgene.c | 21 ++ > >>>> 4 files changed, 435 insertions(+) > >>>> create mode 100644 drivers/pci/host/pci-xgene-msi.c > >>>> > >>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig > >>>> index 7b892a9..c9b61fa 100644 > >>>> --- a/drivers/pci/host/Kconfig > >>>> +++ b/drivers/pci/host/Kconfig > >>>> @@ -89,11 +89,17 @@ config PCI_XGENE > >>>> depends on ARCH_XGENE > >>>> depends on OF > >>>> select PCIEPORTBUS > >>>> + select PCI_MSI_IRQ_DOMAIN if PCI_MSI > >>>> + select PCI_XGENE_MSI if PCI_MSI > >>>> help > >>>> Say Y here if you want internal PCI support on APM > >>>> X-Gene SoC. There are 5 internal PCIe ports available. Each port > >>>> is GEN3 capable > >>>> and have varied lanes from x1 to x8. > >>>> > >>>> +config PCI_XGENE_MSI > >>>> + bool "X-Gene v1 PCIe MSI feature" > >>>> + depends on PCI_XGENE && PCI_MSI > >>>> + > >>>> config PCI_LAYERSCAPE > >>>> bool "Freescale Layerscape PCIe controller" > >>>> depends on OF && ARM > >>>> diff --git a/drivers/pci/host/Makefile > >>>> b/drivers/pci/host/Makefile index e61d91c..f39bde3 100644 > >>>> --- a/drivers/pci/host/Makefile > >>>> +++ b/drivers/pci/host/Makefile > >>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += > >>>> pcie-spear13xx.o obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o > >>>> pci-keystone.o obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o > >>>> obj-$(CONFIG_PCI_XGENE) += pci-xgene.o > >>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o > >>>> obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o > >>>> obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o > >>>> diff --git a/drivers/pci/host/pci-xgene-msi.c > >>>> b/drivers/pci/host/pci-xgene-msi.c > >>>> new file mode 100644 > >>>> index 0000000..4f0ff42 > >>>> --- /dev/null > >>>> +++ b/drivers/pci/host/pci-xgene-msi.c > >>>> @@ -0,0 +1,407 @@ > >>>> +/* > >>>> + * APM X-Gene MSI Driver > >>>> + * > >>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation > >>>> + * Author: Tanmay Inamdar <tinamdar@apm.com> > >>>> + * Duc Dang <dhdang@apm.com> > >>>> + * > >>>> + * This program is free software; you can redistribute it > >>>> and/or modify it > >>>> + * under the terms of the GNU General Public License as > >>>> published by the > >>>> + * Free Software Foundation; either version 2 of the License, > >>>> or (at your > >>>> + * option) any later version. > >>>> + * > >>>> + * This program is distributed in the hope that it will be > >>>> useful, > >>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty > >>>> of > >>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > >>>> + * GNU General Public License for more details. > >>>> + */ > >>>> +#include <linux/interrupt.h> > >>>> +#include <linux/module.h> > >>>> +#include <linux/msi.h> > >>>> +#include <linux/of_irq.h> > >>>> +#include <linux/pci.h> > >>>> +#include <linux/platform_device.h> > >>>> +#include <linux/of_pci.h> > >>>> + > >>>> +#define MSI_INDEX0 0x000000 > >>>> +#define MSI_INT0 0x800000 > >>>> + > >>>> +struct xgene_msi_settings { > >>>> + u32 index_per_group; > >>>> + u32 irqs_per_index; > >>>> + u32 nr_msi_vec; > >>>> + u32 nr_hw_irqs; > >>>> +}; > >>>> + > >>>> +struct xgene_msi { > >>>> + struct device_node *node; > >>>> + struct msi_controller mchip; > >>>> + struct irq_domain *domain; > >>>> + struct xgene_msi_settings *settings; > >>>> + u32 msi_addr_lo; > >>>> + u32 msi_addr_hi; > >>> > >>> > >>> I'd rather see the mailbox address directly, and only do the > >>> split when assigning it to the message (you seem to play all kind > >>> of tricks on the address anyway). > >> > >> > >> msi_addr_lo and msi_addr_hi store the physical base address of MSI > >> controller registers. I will add comment to clarify this. > > > > > > What I mean is that there is no point in keeping this around as a > > pair of 32bit variables. You'd better keep it as a single 64bit, > > and do the split when assigning it the the MSI message. > > Hi Marc, > > These came from device-tree (which describes 64-bit address number as > 2 32-bit words). ... and converted to a resource as a 64bit word, on which you apply {upper,lower}_32_bit(). So much for DT... > If I store them this way, I don't need CPU cycles to do the split > every time assigning them to the MSI message. Please let me know what > do you think about it. This is getting absolutely silly. How many cycles does it take to execute "lsr x1, x0, #32" on X-Gene? If it takes so long that it is considered to be a bottleneck, I suggest you go and design a better CPU (hint: the answer is probably 1 cycle absolutely everywhere). How often are you configuring MSIs in the face of what is happening in the rest of the kernel? Almost never! So, given that "never" times 1 is still never, I'll consider that readability of the code trumps it anytime (I can't believe we're having that kind of conversation...). > > > > [...] > > > >>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data, > >>>> + const struct cpumask *mask, > >>>> bool force) > >>>> +{ > >>>> + struct xgene_msi *msi = > >>>> irq_data_get_irq_chip_data(irq_data); > >>>> + unsigned int gic_irq; > >>>> + int ret; > >>>> + > >>>> + gic_irq = msi->msi_virqs[irq_data->hwirq % > >>>> msi->settings->nr_hw_irqs]; > >>>> + ret = irq_set_affinity(gic_irq, mask); > >>> > >>> > >>> Erm... This as the effect of moving *all* the MSIs hanging off > >>> this interrupt to another CPU. I'm not sure that's an acceptable > >>> effect... What if another MSI requires a different affinity? > >> > >> > >> We have 16 'real' hardware IRQs. Each of these has multiple MSIs > >> attached to it. > >> So this will move all MSIs handing off this interrupt to another > >> CPU; and we don't support different affinity settings for > >> different MSIs that are attached to the same hardware IRQ. > > > > > > Well, that's a significant departure from the expected behaviour. > > In other words, I wonder how useful this is. Could you instead > > reconfigure the MSI itself to hit the right CPU (assuming you don't > > have more than 16 CPUs and if > > that's only for XGene-1, this will only be 8 at most)? This would > > reduce your number of possible MSIs, but at least the semantics of > > set_afinity would be preserved. > > X-Gene-1 supports 2688 MSIs that are divided into 16 groups, each > group has 168 MSIs that are mapped to 1 hardware GIC IRQ (so we have > 16 hardware GIC IRQs for 2688 MSIs). We've already established that. > Setting affinity of single MSI to deliver it to a target CPU will move > all the MSIs mapped to the same GIC IRQ to that CPU as well. This is > not a standard behavior, but limiting the total number of MSIs will > cause a lot of devices to fall back to INTx (with huge performance > penalty) or even fail to load their driver as these devices request > more than 16 MSIs during driver initialization. No, I think you got it wrong. If you have 168 MSIs per GIC IRQ, and provided that you have 8 CPUs (XGene-1), you end up with 336 MSIs per CPU (having statically assigned 2 IRQs per CPU). Assuming you adopt my scheme, you still have a grand total of 336 MSIs that can be freely moved around without breaking any userspace expectations. I think that 336 MSIs is a fair number (nowhere near the 16 you claim). Most platforms are doing quite well with that kind of numbers. Also, you don't have to allocate all the MSIs a device can possibly claim (up to 2048 MSI-X per device), as they are all perfectly capable of using less MSI without having to fallback to INTx). > I can document the limitation in affinity setting of X-Gene-1 MSI in > the driver to hopefully not make people surprise and hope to keep the > total number of supported MSI as 2688 so that we can support as many > cards that require MSI/MSI-X as possible. I don't think this is a valid approach. This breaks userspace (think of things like irqbalance), and breaks the SMP affinity model that Linux uses. No amount of documentation is going to solve it, so I think you just have to admit that the HW is mis-designed and do the best you can to make it work like Linux expect it to work. The alternative would to disable affinity setting altogether instead of introducing these horrible side effects. Thanks, M.
This patch set adds MSI/MSIX termination driver support for APM X-Gene v1 SoC. APM X-Gene v1 SoC supports its own implementation of MSI, which is not compliant to GIC V2M specification for MSI Termination. There is single MSI block in X-Gene v1 SOC which serves all 5 PCIe ports. This MSI block supports 2048 MSI termination ports coalesced into 16 physical HW IRQ lines and shared across all 5 PCIe ports. v4 changes: 1. Remove affinity setting for each MSI 2. Add description about register layout, MSI termination address and data 3. Correct total number of MSI vectors to 2048 4. Clean up error messages 5. Remove unused module code v3 changes: 1. Implement MSI support using PCI MSI IRQ domain 2. Only use msi_controller to store IRQ domain v2 changes: 1. Use msi_controller structure 2. Remove arch hooks arch_teardown_msi_irqs and arch_setup_msi_irqs .../devicetree/bindings/pci/xgene-pci-msi.txt | 63 ++++ MAINTAINERS | 8 + arch/arm64/boot/dts/apm/apm-storm.dtsi | 27 ++ drivers/pci/host/Kconfig | 6 + drivers/pci/host/Makefile | 1 + drivers/pci/host/pci-xgene-msi.c | 410 +++++++++++++++++++++ drivers/pci/host/pci-xgene.c | 21 ++ 7 files changed, 536 insertions(+) create mode 100644 Documentation/devicetree/bindings/pci/xgene-pci-msi.txt create mode 100644 drivers/pci/host/pci-xgene-msi.c
On Wed, Apr 15, 2015 at 1:16 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: > On Tue, 14 Apr 2015 19:20:19 +0100 > Duc Dang <dhdang@apm.com> wrote: > >> On Sat, Apr 11, 2015 at 5:06 AM, Marc Zyngier <marc.zyngier@arm.com> >> wrote: >> > On 2015-04-11 00:42, Duc Dang wrote: >> >> >> >> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier >> >> <marc.zyngier@arm.com> wrote: >> >>> >> >>> On 09/04/15 18:05, Duc Dang wrote: >> >>>> >> >>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into >> >>>> 16 HW IRQ lines. >> >>>> >> >>>> Signed-off-by: Duc Dang <dhdang@apm.com> >> >>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> >> >>>> --- >> >>>> drivers/pci/host/Kconfig | 6 + >> >>>> drivers/pci/host/Makefile | 1 + >> >>>> drivers/pci/host/pci-xgene-msi.c | 407 >> >>>> +++++++++++++++++++++++++++++++++++++++ >> >>>> drivers/pci/host/pci-xgene.c | 21 ++ >> >>>> 4 files changed, 435 insertions(+) >> >>>> create mode 100644 drivers/pci/host/pci-xgene-msi.c >> >>>> >> >>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig >> >>>> index 7b892a9..c9b61fa 100644 >> >>>> --- a/drivers/pci/host/Kconfig >> >>>> +++ b/drivers/pci/host/Kconfig >> >>>> @@ -89,11 +89,17 @@ config PCI_XGENE >> >>>> depends on ARCH_XGENE >> >>>> depends on OF >> >>>> select PCIEPORTBUS >> >>>> + select PCI_MSI_IRQ_DOMAIN if PCI_MSI >> >>>> + select PCI_XGENE_MSI if PCI_MSI >> >>>> help >> >>>> Say Y here if you want internal PCI support on APM >> >>>> X-Gene SoC. There are 5 internal PCIe ports available. Each port >> >>>> is GEN3 capable >> >>>> and have varied lanes from x1 to x8. >> >>>> >> >>>> +config PCI_XGENE_MSI >> >>>> + bool "X-Gene v1 PCIe MSI feature" >> >>>> + depends on PCI_XGENE && PCI_MSI >> >>>> + >> >>>> config PCI_LAYERSCAPE >> >>>> bool "Freescale Layerscape PCIe controller" >> >>>> depends on OF && ARM >> >>>> diff --git a/drivers/pci/host/Makefile >> >>>> b/drivers/pci/host/Makefile index e61d91c..f39bde3 100644 >> >>>> --- a/drivers/pci/host/Makefile >> >>>> +++ b/drivers/pci/host/Makefile >> >>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += >> >>>> pcie-spear13xx.o obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o >> >>>> pci-keystone.o obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o >> >>>> obj-$(CONFIG_PCI_XGENE) += pci-xgene.o >> >>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o >> >>>> obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o >> >>>> obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o >> >>>> diff --git a/drivers/pci/host/pci-xgene-msi.c >> >>>> b/drivers/pci/host/pci-xgene-msi.c >> >>>> new file mode 100644 >> >>>> index 0000000..4f0ff42 >> >>>> --- /dev/null >> >>>> +++ b/drivers/pci/host/pci-xgene-msi.c >> >>>> @@ -0,0 +1,407 @@ >> >>>> +/* >> >>>> + * APM X-Gene MSI Driver >> >>>> + * >> >>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation >> >>>> + * Author: Tanmay Inamdar <tinamdar@apm.com> >> >>>> + * Duc Dang <dhdang@apm.com> >> >>>> + * >> >>>> + * This program is free software; you can redistribute it >> >>>> and/or modify it >> >>>> + * under the terms of the GNU General Public License as >> >>>> published by the >> >>>> + * Free Software Foundation; either version 2 of the License, >> >>>> or (at your >> >>>> + * option) any later version. >> >>>> + * >> >>>> + * This program is distributed in the hope that it will be >> >>>> useful, >> >>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty >> >>>> of >> >>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> >>>> + * GNU General Public License for more details. >> >>>> + */ >> >>>> +#include <linux/interrupt.h> >> >>>> +#include <linux/module.h> >> >>>> +#include <linux/msi.h> >> >>>> +#include <linux/of_irq.h> >> >>>> +#include <linux/pci.h> >> >>>> +#include <linux/platform_device.h> >> >>>> +#include <linux/of_pci.h> >> >>>> + >> >>>> +#define MSI_INDEX0 0x000000 >> >>>> +#define MSI_INT0 0x800000 >> >>>> + >> >>>> +struct xgene_msi_settings { >> >>>> + u32 index_per_group; >> >>>> + u32 irqs_per_index; >> >>>> + u32 nr_msi_vec; >> >>>> + u32 nr_hw_irqs; >> >>>> +}; >> >>>> + >> >>>> +struct xgene_msi { >> >>>> + struct device_node *node; >> >>>> + struct msi_controller mchip; >> >>>> + struct irq_domain *domain; >> >>>> + struct xgene_msi_settings *settings; >> >>>> + u32 msi_addr_lo; >> >>>> + u32 msi_addr_hi; >> >>> >> >>> >> >>> I'd rather see the mailbox address directly, and only do the >> >>> split when assigning it to the message (you seem to play all kind >> >>> of tricks on the address anyway). >> >> >> >> >> >> msi_addr_lo and msi_addr_hi store the physical base address of MSI >> >> controller registers. I will add comment to clarify this. >> > >> > >> > What I mean is that there is no point in keeping this around as a >> > pair of 32bit variables. You'd better keep it as a single 64bit, >> > and do the split when assigning it the the MSI message. >> >> Hi Marc, >> >> These came from device-tree (which describes 64-bit address number as >> 2 32-bit words). > > ... and converted to a resource as a 64bit word, on which you apply > {upper,lower}_32_bit(). So much for DT... > >> If I store them this way, I don't need CPU cycles to do the split >> every time assigning them to the MSI message. Please let me know what >> do you think about it. > > This is getting absolutely silly. > > How many cycles does it take to execute "lsr x1, x0, #32" on X-Gene? If > it takes so long that it is considered to be a bottleneck, I suggest > you go and design a better CPU (hint: the answer is probably 1 cycle > absolutely everywhere). > > How often are you configuring MSIs in the face of what is happening in > the rest of the kernel? Almost never! > > So, given that "never" times 1 is still never, I'll consider that > readability of the code trumps it anytime (I can't believe we're having > that kind of conversation...). > I changed to use u64 for msi_addr and split it when composing MSI messages. The change is in v4 of the patch set that I just posted. >> > >> > [...] >> > >> >>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data, >> >>>> + const struct cpumask *mask, >> >>>> bool force) >> >>>> +{ >> >>>> + struct xgene_msi *msi = >> >>>> irq_data_get_irq_chip_data(irq_data); >> >>>> + unsigned int gic_irq; >> >>>> + int ret; >> >>>> + >> >>>> + gic_irq = msi->msi_virqs[irq_data->hwirq % >> >>>> msi->settings->nr_hw_irqs]; >> >>>> + ret = irq_set_affinity(gic_irq, mask); >> >>> >> >>> >> >>> Erm... This as the effect of moving *all* the MSIs hanging off >> >>> this interrupt to another CPU. I'm not sure that's an acceptable >> >>> effect... What if another MSI requires a different affinity? >> >> >> >> >> >> We have 16 'real' hardware IRQs. Each of these has multiple MSIs >> >> attached to it. >> >> So this will move all MSIs handing off this interrupt to another >> >> CPU; and we don't support different affinity settings for >> >> different MSIs that are attached to the same hardware IRQ. >> > >> > >> > Well, that's a significant departure from the expected behaviour. >> > In other words, I wonder how useful this is. Could you instead >> > reconfigure the MSI itself to hit the right CPU (assuming you don't >> > have more than 16 CPUs and if >> > that's only for XGene-1, this will only be 8 at most)? This would >> > reduce your number of possible MSIs, but at least the semantics of >> > set_afinity would be preserved. >> >> X-Gene-1 supports 2688 MSIs that are divided into 16 groups, each >> group has 168 MSIs that are mapped to 1 hardware GIC IRQ (so we have >> 16 hardware GIC IRQs for 2688 MSIs). > > We've already established that. > >> Setting affinity of single MSI to deliver it to a target CPU will move >> all the MSIs mapped to the same GIC IRQ to that CPU as well. This is >> not a standard behavior, but limiting the total number of MSIs will >> cause a lot of devices to fall back to INTx (with huge performance >> penalty) or even fail to load their driver as these devices request >> more than 16 MSIs during driver initialization. > > No, I think you got it wrong. If you have 168 MSIs per GIC IRQ, and > provided that you have 8 CPUs (XGene-1), you end up with 336 MSIs per > CPU (having statically assigned 2 IRQs per CPU). > > Assuming you adopt my scheme, you still have a grand total of 336 MSIs > that can be freely moved around without breaking any userspace > expectations. > Thanks Marc. This is a very good idea. But to move MSIs around, I need to change MSI termination address and data and write them to device configuration space. This may cause problems if the device fires an interrupt at the same time when I do the config write? What is your opinion here? > I think that 336 MSIs is a fair number (nowhere near the 16 you claim). > Most platforms are doing quite well with that kind of numbers. Also, > you don't have to allocate all the MSIs a device can possibly claim (up > to 2048 MSI-X per device), as they are all perfectly capable of using > less MSI without having to fallback to INTx). > >> I can document the limitation in affinity setting of X-Gene-1 MSI in >> the driver to hopefully not make people surprise and hope to keep the >> total number of supported MSI as 2688 so that we can support as many >> cards that require MSI/MSI-X as possible. > > I don't think this is a valid approach. This breaks userspace (think of > things like irqbalance), and breaks the SMP affinity model that Linux > uses. No amount of documentation is going to solve it, so I think you > just have to admit that the HW is mis-designed and do the best you can > to make it work like Linux expect it to work. > > The alternative would to disable affinity setting altogether instead of > introducing these horrible side effects. > I have it disabled (set_affinity does nothing) in my v4 patch. > Thanks, > > M. > -- > Jazz is not dead. It just smells funny.
On 17/04/15 11:00, Duc Dang wrote: > On Wed, Apr 15, 2015 at 1:16 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: >> On Tue, 14 Apr 2015 19:20:19 +0100 >> Duc Dang <dhdang@apm.com> wrote: >> >>> On Sat, Apr 11, 2015 at 5:06 AM, Marc Zyngier <marc.zyngier@arm.com> >>> wrote: >>>> On 2015-04-11 00:42, Duc Dang wrote: >>>>> >>>>> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier >>>>> <marc.zyngier@arm.com> wrote: >>>>>> >>>>>> On 09/04/15 18:05, Duc Dang wrote: >>>>>>> >>>>>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into >>>>>>> 16 HW IRQ lines. >>>>>>> >>>>>>> Signed-off-by: Duc Dang <dhdang@apm.com> >>>>>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> >>>>>>> --- >>>>>>> drivers/pci/host/Kconfig | 6 + >>>>>>> drivers/pci/host/Makefile | 1 + >>>>>>> drivers/pci/host/pci-xgene-msi.c | 407 >>>>>>> +++++++++++++++++++++++++++++++++++++++ >>>>>>> drivers/pci/host/pci-xgene.c | 21 ++ >>>>>>> 4 files changed, 435 insertions(+) >>>>>>> create mode 100644 drivers/pci/host/pci-xgene-msi.c >>>>>>> >>>>>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig >>>>>>> index 7b892a9..c9b61fa 100644 >>>>>>> --- a/drivers/pci/host/Kconfig >>>>>>> +++ b/drivers/pci/host/Kconfig >>>>>>> @@ -89,11 +89,17 @@ config PCI_XGENE >>>>>>> depends on ARCH_XGENE >>>>>>> depends on OF >>>>>>> select PCIEPORTBUS >>>>>>> + select PCI_MSI_IRQ_DOMAIN if PCI_MSI >>>>>>> + select PCI_XGENE_MSI if PCI_MSI >>>>>>> help >>>>>>> Say Y here if you want internal PCI support on APM >>>>>>> X-Gene SoC. There are 5 internal PCIe ports available. Each port >>>>>>> is GEN3 capable >>>>>>> and have varied lanes from x1 to x8. >>>>>>> >>>>>>> +config PCI_XGENE_MSI >>>>>>> + bool "X-Gene v1 PCIe MSI feature" >>>>>>> + depends on PCI_XGENE && PCI_MSI >>>>>>> + >>>>>>> config PCI_LAYERSCAPE >>>>>>> bool "Freescale Layerscape PCIe controller" >>>>>>> depends on OF && ARM >>>>>>> diff --git a/drivers/pci/host/Makefile >>>>>>> b/drivers/pci/host/Makefile index e61d91c..f39bde3 100644 >>>>>>> --- a/drivers/pci/host/Makefile >>>>>>> +++ b/drivers/pci/host/Makefile >>>>>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += >>>>>>> pcie-spear13xx.o obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o >>>>>>> pci-keystone.o obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o >>>>>>> obj-$(CONFIG_PCI_XGENE) += pci-xgene.o >>>>>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o >>>>>>> obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o >>>>>>> obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o >>>>>>> diff --git a/drivers/pci/host/pci-xgene-msi.c >>>>>>> b/drivers/pci/host/pci-xgene-msi.c >>>>>>> new file mode 100644 >>>>>>> index 0000000..4f0ff42 >>>>>>> --- /dev/null >>>>>>> +++ b/drivers/pci/host/pci-xgene-msi.c >>>>>>> @@ -0,0 +1,407 @@ >>>>>>> +/* >>>>>>> + * APM X-Gene MSI Driver >>>>>>> + * >>>>>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation >>>>>>> + * Author: Tanmay Inamdar <tinamdar@apm.com> >>>>>>> + * Duc Dang <dhdang@apm.com> >>>>>>> + * >>>>>>> + * This program is free software; you can redistribute it >>>>>>> and/or modify it >>>>>>> + * under the terms of the GNU General Public License as >>>>>>> published by the >>>>>>> + * Free Software Foundation; either version 2 of the License, >>>>>>> or (at your >>>>>>> + * option) any later version. >>>>>>> + * >>>>>>> + * This program is distributed in the hope that it will be >>>>>>> useful, >>>>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty >>>>>>> of >>>>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >>>>>>> + * GNU General Public License for more details. >>>>>>> + */ >>>>>>> +#include <linux/interrupt.h> >>>>>>> +#include <linux/module.h> >>>>>>> +#include <linux/msi.h> >>>>>>> +#include <linux/of_irq.h> >>>>>>> +#include <linux/pci.h> >>>>>>> +#include <linux/platform_device.h> >>>>>>> +#include <linux/of_pci.h> >>>>>>> + >>>>>>> +#define MSI_INDEX0 0x000000 >>>>>>> +#define MSI_INT0 0x800000 >>>>>>> + >>>>>>> +struct xgene_msi_settings { >>>>>>> + u32 index_per_group; >>>>>>> + u32 irqs_per_index; >>>>>>> + u32 nr_msi_vec; >>>>>>> + u32 nr_hw_irqs; >>>>>>> +}; >>>>>>> + >>>>>>> +struct xgene_msi { >>>>>>> + struct device_node *node; >>>>>>> + struct msi_controller mchip; >>>>>>> + struct irq_domain *domain; >>>>>>> + struct xgene_msi_settings *settings; >>>>>>> + u32 msi_addr_lo; >>>>>>> + u32 msi_addr_hi; >>>>>> >>>>>> >>>>>> I'd rather see the mailbox address directly, and only do the >>>>>> split when assigning it to the message (you seem to play all kind >>>>>> of tricks on the address anyway). >>>>> >>>>> >>>>> msi_addr_lo and msi_addr_hi store the physical base address of MSI >>>>> controller registers. I will add comment to clarify this. >>>> >>>> >>>> What I mean is that there is no point in keeping this around as a >>>> pair of 32bit variables. You'd better keep it as a single 64bit, >>>> and do the split when assigning it the the MSI message. >>> >>> Hi Marc, >>> >>> These came from device-tree (which describes 64-bit address number as >>> 2 32-bit words). >> >> ... and converted to a resource as a 64bit word, on which you apply >> {upper,lower}_32_bit(). So much for DT... >> >>> If I store them this way, I don't need CPU cycles to do the split >>> every time assigning them to the MSI message. Please let me know what >>> do you think about it. >> >> This is getting absolutely silly. >> >> How many cycles does it take to execute "lsr x1, x0, #32" on X-Gene? If >> it takes so long that it is considered to be a bottleneck, I suggest >> you go and design a better CPU (hint: the answer is probably 1 cycle >> absolutely everywhere). >> >> How often are you configuring MSIs in the face of what is happening in >> the rest of the kernel? Almost never! >> >> So, given that "never" times 1 is still never, I'll consider that >> readability of the code trumps it anytime (I can't believe we're having >> that kind of conversation...). >> > I changed to use u64 for msi_addr and split it when composing MSI messages. > The change is in v4 of the patch set that I just posted. >>>> >>>> [...] >>>> >>>>>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data, >>>>>>> + const struct cpumask *mask, >>>>>>> bool force) >>>>>>> +{ >>>>>>> + struct xgene_msi *msi = >>>>>>> irq_data_get_irq_chip_data(irq_data); >>>>>>> + unsigned int gic_irq; >>>>>>> + int ret; >>>>>>> + >>>>>>> + gic_irq = msi->msi_virqs[irq_data->hwirq % >>>>>>> msi->settings->nr_hw_irqs]; >>>>>>> + ret = irq_set_affinity(gic_irq, mask); >>>>>> >>>>>> >>>>>> Erm... This as the effect of moving *all* the MSIs hanging off >>>>>> this interrupt to another CPU. I'm not sure that's an acceptable >>>>>> effect... What if another MSI requires a different affinity? >>>>> >>>>> >>>>> We have 16 'real' hardware IRQs. Each of these has multiple MSIs >>>>> attached to it. >>>>> So this will move all MSIs handing off this interrupt to another >>>>> CPU; and we don't support different affinity settings for >>>>> different MSIs that are attached to the same hardware IRQ. >>>> >>>> >>>> Well, that's a significant departure from the expected behaviour. >>>> In other words, I wonder how useful this is. Could you instead >>>> reconfigure the MSI itself to hit the right CPU (assuming you don't >>>> have more than 16 CPUs and if >>>> that's only for XGene-1, this will only be 8 at most)? This would >>>> reduce your number of possible MSIs, but at least the semantics of >>>> set_afinity would be preserved. >>> >>> X-Gene-1 supports 2688 MSIs that are divided into 16 groups, each >>> group has 168 MSIs that are mapped to 1 hardware GIC IRQ (so we have >>> 16 hardware GIC IRQs for 2688 MSIs). >> >> We've already established that. >> >>> Setting affinity of single MSI to deliver it to a target CPU will move >>> all the MSIs mapped to the same GIC IRQ to that CPU as well. This is >>> not a standard behavior, but limiting the total number of MSIs will >>> cause a lot of devices to fall back to INTx (with huge performance >>> penalty) or even fail to load their driver as these devices request >>> more than 16 MSIs during driver initialization. >> >> No, I think you got it wrong. If you have 168 MSIs per GIC IRQ, and >> provided that you have 8 CPUs (XGene-1), you end up with 336 MSIs per >> CPU (having statically assigned 2 IRQs per CPU). >> >> Assuming you adopt my scheme, you still have a grand total of 336 MSIs >> that can be freely moved around without breaking any userspace >> expectations. >> > Thanks Marc. This is a very good idea. > > But to move MSIs around, I need to change MSI termination address and data > and write them to device configuration space. This may cause problems > if the device > fires an interrupt at the same time when I do the config write? > > What is your opinion here? There is an inherent race when changing the affinity of any interrupt, whether that's an MSI or not. The kernel is perfectly prepared to handle such a situation (to be honest, the kernel doesn't really care). The write to the config space shouldn't be a concern. You will either hit the old *or* the new CPU, but that race is only during the write itself (you can read back from the config space if you're really paranoid). By the time you return from this read/write, the device will be reconfigured. >> I think that 336 MSIs is a fair number (nowhere near the 16 you claim). >> Most platforms are doing quite well with that kind of numbers. Also, >> you don't have to allocate all the MSIs a device can possibly claim (up >> to 2048 MSI-X per device), as they are all perfectly capable of using >> less MSI without having to fallback to INTx). >> >>> I can document the limitation in affinity setting of X-Gene-1 MSI in >>> the driver to hopefully not make people surprise and hope to keep the >>> total number of supported MSI as 2688 so that we can support as many >>> cards that require MSI/MSI-X as possible. >> >> I don't think this is a valid approach. This breaks userspace (think of >> things like irqbalance), and breaks the SMP affinity model that Linux >> uses. No amount of documentation is going to solve it, so I think you >> just have to admit that the HW is mis-designed and do the best you can >> to make it work like Linux expect it to work. >> >> The alternative would to disable affinity setting altogether instead of >> introducing these horrible side effects. >> > I have it disabled (set_affinity does nothing) in my v4 patch. It would be good if you could try the above approach. It shouldn't be hard to write, and it would be a lot better than just ignoring the problem. I'll try to review the patches soon(-ish)... M.
On Fri, Apr 17, 2015 at 3:17 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: > On 17/04/15 11:00, Duc Dang wrote: >> On Wed, Apr 15, 2015 at 1:16 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: >>> On Tue, 14 Apr 2015 19:20:19 +0100 >>> Duc Dang <dhdang@apm.com> wrote: >>> >>>> On Sat, Apr 11, 2015 at 5:06 AM, Marc Zyngier <marc.zyngier@arm.com> >>>> wrote: >>>>> On 2015-04-11 00:42, Duc Dang wrote: >>>>>> >>>>>> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier >>>>>> <marc.zyngier@arm.com> wrote: >>>>>>> >>>>>>> On 09/04/15 18:05, Duc Dang wrote: >>>>>>>> >>>>>>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into >>>>>>>> 16 HW IRQ lines. >>>>>>>> >>>>>>>> Signed-off-by: Duc Dang <dhdang@apm.com> >>>>>>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> >>>>>>>> --- >>>>>>>> drivers/pci/host/Kconfig | 6 + >>>>>>>> drivers/pci/host/Makefile | 1 + >>>>>>>> drivers/pci/host/pci-xgene-msi.c | 407 >>>>>>>> +++++++++++++++++++++++++++++++++++++++ >>>>>>>> drivers/pci/host/pci-xgene.c | 21 ++ >>>>>>>> 4 files changed, 435 insertions(+) >>>>>>>> create mode 100644 drivers/pci/host/pci-xgene-msi.c >>>>>>>> >>>>>>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig >>>>>>>> index 7b892a9..c9b61fa 100644 >>>>>>>> --- a/drivers/pci/host/Kconfig >>>>>>>> +++ b/drivers/pci/host/Kconfig >>>>>>>> @@ -89,11 +89,17 @@ config PCI_XGENE >>>>>>>> depends on ARCH_XGENE >>>>>>>> depends on OF >>>>>>>> select PCIEPORTBUS >>>>>>>> + select PCI_MSI_IRQ_DOMAIN if PCI_MSI >>>>>>>> + select PCI_XGENE_MSI if PCI_MSI >>>>>>>> help >>>>>>>> Say Y here if you want internal PCI support on APM >>>>>>>> X-Gene SoC. There are 5 internal PCIe ports available. Each port >>>>>>>> is GEN3 capable >>>>>>>> and have varied lanes from x1 to x8. >>>>>>>> >>>>>>>> +config PCI_XGENE_MSI >>>>>>>> + bool "X-Gene v1 PCIe MSI feature" >>>>>>>> + depends on PCI_XGENE && PCI_MSI >>>>>>>> + >>>>>>>> config PCI_LAYERSCAPE >>>>>>>> bool "Freescale Layerscape PCIe controller" >>>>>>>> depends on OF && ARM >>>>>>>> diff --git a/drivers/pci/host/Makefile >>>>>>>> b/drivers/pci/host/Makefile index e61d91c..f39bde3 100644 >>>>>>>> --- a/drivers/pci/host/Makefile >>>>>>>> +++ b/drivers/pci/host/Makefile >>>>>>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += >>>>>>>> pcie-spear13xx.o obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o >>>>>>>> pci-keystone.o obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o >>>>>>>> obj-$(CONFIG_PCI_XGENE) += pci-xgene.o >>>>>>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o >>>>>>>> obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o >>>>>>>> obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o >>>>>>>> diff --git a/drivers/pci/host/pci-xgene-msi.c >>>>>>>> b/drivers/pci/host/pci-xgene-msi.c >>>>>>>> new file mode 100644 >>>>>>>> index 0000000..4f0ff42 >>>>>>>> --- /dev/null >>>>>>>> +++ b/drivers/pci/host/pci-xgene-msi.c >>>>>>>> @@ -0,0 +1,407 @@ >>>>>>>> +/* >>>>>>>> + * APM X-Gene MSI Driver >>>>>>>> + * >>>>>>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation >>>>>>>> + * Author: Tanmay Inamdar <tinamdar@apm.com> >>>>>>>> + * Duc Dang <dhdang@apm.com> >>>>>>>> + * >>>>>>>> + * This program is free software; you can redistribute it >>>>>>>> and/or modify it >>>>>>>> + * under the terms of the GNU General Public License as >>>>>>>> published by the >>>>>>>> + * Free Software Foundation; either version 2 of the License, >>>>>>>> or (at your >>>>>>>> + * option) any later version. >>>>>>>> + * >>>>>>>> + * This program is distributed in the hope that it will be >>>>>>>> useful, >>>>>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty >>>>>>>> of >>>>>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >>>>>>>> + * GNU General Public License for more details. >>>>>>>> + */ >>>>>>>> +#include <linux/interrupt.h> >>>>>>>> +#include <linux/module.h> >>>>>>>> +#include <linux/msi.h> >>>>>>>> +#include <linux/of_irq.h> >>>>>>>> +#include <linux/pci.h> >>>>>>>> +#include <linux/platform_device.h> >>>>>>>> +#include <linux/of_pci.h> >>>>>>>> + >>>>>>>> +#define MSI_INDEX0 0x000000 >>>>>>>> +#define MSI_INT0 0x800000 >>>>>>>> + >>>>>>>> +struct xgene_msi_settings { >>>>>>>> + u32 index_per_group; >>>>>>>> + u32 irqs_per_index; >>>>>>>> + u32 nr_msi_vec; >>>>>>>> + u32 nr_hw_irqs; >>>>>>>> +}; >>>>>>>> + >>>>>>>> +struct xgene_msi { >>>>>>>> + struct device_node *node; >>>>>>>> + struct msi_controller mchip; >>>>>>>> + struct irq_domain *domain; >>>>>>>> + struct xgene_msi_settings *settings; >>>>>>>> + u32 msi_addr_lo; >>>>>>>> + u32 msi_addr_hi; >>>>>>> >>>>>>> >>>>>>> I'd rather see the mailbox address directly, and only do the >>>>>>> split when assigning it to the message (you seem to play all kind >>>>>>> of tricks on the address anyway). >>>>>> >>>>>> >>>>>> msi_addr_lo and msi_addr_hi store the physical base address of MSI >>>>>> controller registers. I will add comment to clarify this. >>>>> >>>>> >>>>> What I mean is that there is no point in keeping this around as a >>>>> pair of 32bit variables. You'd better keep it as a single 64bit, >>>>> and do the split when assigning it the the MSI message. >>>> >>>> Hi Marc, >>>> >>>> These came from device-tree (which describes 64-bit address number as >>>> 2 32-bit words). >>> >>> ... and converted to a resource as a 64bit word, on which you apply >>> {upper,lower}_32_bit(). So much for DT... >>> >>>> If I store them this way, I don't need CPU cycles to do the split >>>> every time assigning them to the MSI message. Please let me know what >>>> do you think about it. >>> >>> This is getting absolutely silly. >>> >>> How many cycles does it take to execute "lsr x1, x0, #32" on X-Gene? If >>> it takes so long that it is considered to be a bottleneck, I suggest >>> you go and design a better CPU (hint: the answer is probably 1 cycle >>> absolutely everywhere). >>> >>> How often are you configuring MSIs in the face of what is happening in >>> the rest of the kernel? Almost never! >>> >>> So, given that "never" times 1 is still never, I'll consider that >>> readability of the code trumps it anytime (I can't believe we're having >>> that kind of conversation...). >>> >> I changed to use u64 for msi_addr and split it when composing MSI messages. >> The change is in v4 of the patch set that I just posted. >>>>> >>>>> [...] >>>>> >>>>>>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data, >>>>>>>> + const struct cpumask *mask, >>>>>>>> bool force) >>>>>>>> +{ >>>>>>>> + struct xgene_msi *msi = >>>>>>>> irq_data_get_irq_chip_data(irq_data); >>>>>>>> + unsigned int gic_irq; >>>>>>>> + int ret; >>>>>>>> + >>>>>>>> + gic_irq = msi->msi_virqs[irq_data->hwirq % >>>>>>>> msi->settings->nr_hw_irqs]; >>>>>>>> + ret = irq_set_affinity(gic_irq, mask); >>>>>>> >>>>>>> >>>>>>> Erm... This as the effect of moving *all* the MSIs hanging off >>>>>>> this interrupt to another CPU. I'm not sure that's an acceptable >>>>>>> effect... What if another MSI requires a different affinity? >>>>>> >>>>>> >>>>>> We have 16 'real' hardware IRQs. Each of these has multiple MSIs >>>>>> attached to it. >>>>>> So this will move all MSIs handing off this interrupt to another >>>>>> CPU; and we don't support different affinity settings for >>>>>> different MSIs that are attached to the same hardware IRQ. >>>>> >>>>> >>>>> Well, that's a significant departure from the expected behaviour. >>>>> In other words, I wonder how useful this is. Could you instead >>>>> reconfigure the MSI itself to hit the right CPU (assuming you don't >>>>> have more than 16 CPUs and if >>>>> that's only for XGene-1, this will only be 8 at most)? This would >>>>> reduce your number of possible MSIs, but at least the semantics of >>>>> set_afinity would be preserved. >>>> >>>> X-Gene-1 supports 2688 MSIs that are divided into 16 groups, each >>>> group has 168 MSIs that are mapped to 1 hardware GIC IRQ (so we have >>>> 16 hardware GIC IRQs for 2688 MSIs). >>> >>> We've already established that. >>> >>>> Setting affinity of single MSI to deliver it to a target CPU will move >>>> all the MSIs mapped to the same GIC IRQ to that CPU as well. This is >>>> not a standard behavior, but limiting the total number of MSIs will >>>> cause a lot of devices to fall back to INTx (with huge performance >>>> penalty) or even fail to load their driver as these devices request >>>> more than 16 MSIs during driver initialization. >>> >>> No, I think you got it wrong. If you have 168 MSIs per GIC IRQ, and >>> provided that you have 8 CPUs (XGene-1), you end up with 336 MSIs per >>> CPU (having statically assigned 2 IRQs per CPU). >>> >>> Assuming you adopt my scheme, you still have a grand total of 336 MSIs >>> that can be freely moved around without breaking any userspace >>> expectations. >>> >> Thanks Marc. This is a very good idea. >> >> But to move MSIs around, I need to change MSI termination address and data >> and write them to device configuration space. This may cause problems >> if the device >> fires an interrupt at the same time when I do the config write? >> >> What is your opinion here? > > There is an inherent race when changing the affinity of any interrupt, > whether that's an MSI or not. The kernel is perfectly prepared to handle > such a situation (to be honest, the kernel doesn't really care). > > The write to the config space shouldn't be a concern. You will either > hit the old *or* the new CPU, but that race is only during the write > itself (you can read back from the config space if you're really > paranoid). By the time you return from this read/write, the device will > be reconfigured. > >>> I think that 336 MSIs is a fair number (nowhere near the 16 you claim). >>> Most platforms are doing quite well with that kind of numbers. Also, >>> you don't have to allocate all the MSIs a device can possibly claim (up >>> to 2048 MSI-X per device), as they are all perfectly capable of using >>> less MSI without having to fallback to INTx). >>> >>>> I can document the limitation in affinity setting of X-Gene-1 MSI in >>>> the driver to hopefully not make people surprise and hope to keep the >>>> total number of supported MSI as 2688 so that we can support as many >>>> cards that require MSI/MSI-X as possible. >>> >>> I don't think this is a valid approach. This breaks userspace (think of >>> things like irqbalance), and breaks the SMP affinity model that Linux >>> uses. No amount of documentation is going to solve it, so I think you >>> just have to admit that the HW is mis-designed and do the best you can >>> to make it work like Linux expect it to work. >>> >>> The alternative would to disable affinity setting altogether instead of >>> introducing these horrible side effects. >>> >> I have it disabled (set_affinity does nothing) in my v4 patch. > > It would be good if you could try the above approach. It shouldn't be > hard to write, and it would be a lot better than just ignoring the problem. > Yes. I am working on this change. > I'll try to review the patches soon(-ish)... > > M. > -- > Jazz is not dead. It just smells funny... Regards, Duc Dang.
On 17/04/15 13:37, Duc Dang wrote: > On Fri, Apr 17, 2015 at 3:17 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: >> On 17/04/15 11:00, Duc Dang wrote: >>> On Wed, Apr 15, 2015 at 1:16 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: >>>> On Tue, 14 Apr 2015 19:20:19 +0100 >>>> Duc Dang <dhdang@apm.com> wrote: >>>> >>>>> On Sat, Apr 11, 2015 at 5:06 AM, Marc Zyngier <marc.zyngier@arm.com> >>>>> wrote: >>>>>> On 2015-04-11 00:42, Duc Dang wrote: >>>>>>> >>>>>>> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier >>>>>>> <marc.zyngier@arm.com> wrote: >>>>>>>> >>>>>>>> On 09/04/15 18:05, Duc Dang wrote: >>>>>>>>> >>>>>>>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into >>>>>>>>> 16 HW IRQ lines. >>>>>>>>> >>>>>>>>> Signed-off-by: Duc Dang <dhdang@apm.com> >>>>>>>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> >>>>>>>>> --- >>>>>>>>> drivers/pci/host/Kconfig | 6 + >>>>>>>>> drivers/pci/host/Makefile | 1 + >>>>>>>>> drivers/pci/host/pci-xgene-msi.c | 407 >>>>>>>>> +++++++++++++++++++++++++++++++++++++++ >>>>>>>>> drivers/pci/host/pci-xgene.c | 21 ++ >>>>>>>>> 4 files changed, 435 insertions(+) >>>>>>>>> create mode 100644 drivers/pci/host/pci-xgene-msi.c >>>>>>>>> >>>>>>>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig >>>>>>>>> index 7b892a9..c9b61fa 100644 >>>>>>>>> --- a/drivers/pci/host/Kconfig >>>>>>>>> +++ b/drivers/pci/host/Kconfig >>>>>>>>> @@ -89,11 +89,17 @@ config PCI_XGENE >>>>>>>>> depends on ARCH_XGENE >>>>>>>>> depends on OF >>>>>>>>> select PCIEPORTBUS >>>>>>>>> + select PCI_MSI_IRQ_DOMAIN if PCI_MSI >>>>>>>>> + select PCI_XGENE_MSI if PCI_MSI >>>>>>>>> help >>>>>>>>> Say Y here if you want internal PCI support on APM >>>>>>>>> X-Gene SoC. There are 5 internal PCIe ports available. Each port >>>>>>>>> is GEN3 capable >>>>>>>>> and have varied lanes from x1 to x8. >>>>>>>>> >>>>>>>>> +config PCI_XGENE_MSI >>>>>>>>> + bool "X-Gene v1 PCIe MSI feature" >>>>>>>>> + depends on PCI_XGENE && PCI_MSI >>>>>>>>> + >>>>>>>>> config PCI_LAYERSCAPE >>>>>>>>> bool "Freescale Layerscape PCIe controller" >>>>>>>>> depends on OF && ARM >>>>>>>>> diff --git a/drivers/pci/host/Makefile >>>>>>>>> b/drivers/pci/host/Makefile index e61d91c..f39bde3 100644 >>>>>>>>> --- a/drivers/pci/host/Makefile >>>>>>>>> +++ b/drivers/pci/host/Makefile >>>>>>>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += >>>>>>>>> pcie-spear13xx.o obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o >>>>>>>>> pci-keystone.o obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o >>>>>>>>> obj-$(CONFIG_PCI_XGENE) += pci-xgene.o >>>>>>>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o >>>>>>>>> obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o >>>>>>>>> obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o >>>>>>>>> diff --git a/drivers/pci/host/pci-xgene-msi.c >>>>>>>>> b/drivers/pci/host/pci-xgene-msi.c >>>>>>>>> new file mode 100644 >>>>>>>>> index 0000000..4f0ff42 >>>>>>>>> --- /dev/null >>>>>>>>> +++ b/drivers/pci/host/pci-xgene-msi.c >>>>>>>>> @@ -0,0 +1,407 @@ >>>>>>>>> +/* >>>>>>>>> + * APM X-Gene MSI Driver >>>>>>>>> + * >>>>>>>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation >>>>>>>>> + * Author: Tanmay Inamdar <tinamdar@apm.com> >>>>>>>>> + * Duc Dang <dhdang@apm.com> >>>>>>>>> + * >>>>>>>>> + * This program is free software; you can redistribute it >>>>>>>>> and/or modify it >>>>>>>>> + * under the terms of the GNU General Public License as >>>>>>>>> published by the >>>>>>>>> + * Free Software Foundation; either version 2 of the License, >>>>>>>>> or (at your >>>>>>>>> + * option) any later version. >>>>>>>>> + * >>>>>>>>> + * This program is distributed in the hope that it will be >>>>>>>>> useful, >>>>>>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty >>>>>>>>> of >>>>>>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >>>>>>>>> + * GNU General Public License for more details. >>>>>>>>> + */ >>>>>>>>> +#include <linux/interrupt.h> >>>>>>>>> +#include <linux/module.h> >>>>>>>>> +#include <linux/msi.h> >>>>>>>>> +#include <linux/of_irq.h> >>>>>>>>> +#include <linux/pci.h> >>>>>>>>> +#include <linux/platform_device.h> >>>>>>>>> +#include <linux/of_pci.h> >>>>>>>>> + >>>>>>>>> +#define MSI_INDEX0 0x000000 >>>>>>>>> +#define MSI_INT0 0x800000 >>>>>>>>> + >>>>>>>>> +struct xgene_msi_settings { >>>>>>>>> + u32 index_per_group; >>>>>>>>> + u32 irqs_per_index; >>>>>>>>> + u32 nr_msi_vec; >>>>>>>>> + u32 nr_hw_irqs; >>>>>>>>> +}; >>>>>>>>> + >>>>>>>>> +struct xgene_msi { >>>>>>>>> + struct device_node *node; >>>>>>>>> + struct msi_controller mchip; >>>>>>>>> + struct irq_domain *domain; >>>>>>>>> + struct xgene_msi_settings *settings; >>>>>>>>> + u32 msi_addr_lo; >>>>>>>>> + u32 msi_addr_hi; >>>>>>>> >>>>>>>> >>>>>>>> I'd rather see the mailbox address directly, and only do the >>>>>>>> split when assigning it to the message (you seem to play all kind >>>>>>>> of tricks on the address anyway). >>>>>>> >>>>>>> >>>>>>> msi_addr_lo and msi_addr_hi store the physical base address of MSI >>>>>>> controller registers. I will add comment to clarify this. >>>>>> >>>>>> >>>>>> What I mean is that there is no point in keeping this around as a >>>>>> pair of 32bit variables. You'd better keep it as a single 64bit, >>>>>> and do the split when assigning it the the MSI message. >>>>> >>>>> Hi Marc, >>>>> >>>>> These came from device-tree (which describes 64-bit address number as >>>>> 2 32-bit words). >>>> >>>> ... and converted to a resource as a 64bit word, on which you apply >>>> {upper,lower}_32_bit(). So much for DT... >>>> >>>>> If I store them this way, I don't need CPU cycles to do the split >>>>> every time assigning them to the MSI message. Please let me know what >>>>> do you think about it. >>>> >>>> This is getting absolutely silly. >>>> >>>> How many cycles does it take to execute "lsr x1, x0, #32" on X-Gene? If >>>> it takes so long that it is considered to be a bottleneck, I suggest >>>> you go and design a better CPU (hint: the answer is probably 1 cycle >>>> absolutely everywhere). >>>> >>>> How often are you configuring MSIs in the face of what is happening in >>>> the rest of the kernel? Almost never! >>>> >>>> So, given that "never" times 1 is still never, I'll consider that >>>> readability of the code trumps it anytime (I can't believe we're having >>>> that kind of conversation...). >>>> >>> I changed to use u64 for msi_addr and split it when composing MSI messages. >>> The change is in v4 of the patch set that I just posted. >>>>>> >>>>>> [...] >>>>>> >>>>>>>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data, >>>>>>>>> + const struct cpumask *mask, >>>>>>>>> bool force) >>>>>>>>> +{ >>>>>>>>> + struct xgene_msi *msi = >>>>>>>>> irq_data_get_irq_chip_data(irq_data); >>>>>>>>> + unsigned int gic_irq; >>>>>>>>> + int ret; >>>>>>>>> + >>>>>>>>> + gic_irq = msi->msi_virqs[irq_data->hwirq % >>>>>>>>> msi->settings->nr_hw_irqs]; >>>>>>>>> + ret = irq_set_affinity(gic_irq, mask); >>>>>>>> >>>>>>>> >>>>>>>> Erm... This as the effect of moving *all* the MSIs hanging off >>>>>>>> this interrupt to another CPU. I'm not sure that's an acceptable >>>>>>>> effect... What if another MSI requires a different affinity? >>>>>>> >>>>>>> >>>>>>> We have 16 'real' hardware IRQs. Each of these has multiple MSIs >>>>>>> attached to it. >>>>>>> So this will move all MSIs handing off this interrupt to another >>>>>>> CPU; and we don't support different affinity settings for >>>>>>> different MSIs that are attached to the same hardware IRQ. >>>>>> >>>>>> >>>>>> Well, that's a significant departure from the expected behaviour. >>>>>> In other words, I wonder how useful this is. Could you instead >>>>>> reconfigure the MSI itself to hit the right CPU (assuming you don't >>>>>> have more than 16 CPUs and if >>>>>> that's only for XGene-1, this will only be 8 at most)? This would >>>>>> reduce your number of possible MSIs, but at least the semantics of >>>>>> set_afinity would be preserved. >>>>> >>>>> X-Gene-1 supports 2688 MSIs that are divided into 16 groups, each >>>>> group has 168 MSIs that are mapped to 1 hardware GIC IRQ (so we have >>>>> 16 hardware GIC IRQs for 2688 MSIs). >>>> >>>> We've already established that. >>>> >>>>> Setting affinity of single MSI to deliver it to a target CPU will move >>>>> all the MSIs mapped to the same GIC IRQ to that CPU as well. This is >>>>> not a standard behavior, but limiting the total number of MSIs will >>>>> cause a lot of devices to fall back to INTx (with huge performance >>>>> penalty) or even fail to load their driver as these devices request >>>>> more than 16 MSIs during driver initialization. >>>> >>>> No, I think you got it wrong. If you have 168 MSIs per GIC IRQ, and >>>> provided that you have 8 CPUs (XGene-1), you end up with 336 MSIs per >>>> CPU (having statically assigned 2 IRQs per CPU). >>>> >>>> Assuming you adopt my scheme, you still have a grand total of 336 MSIs >>>> that can be freely moved around without breaking any userspace >>>> expectations. >>>> >>> Thanks Marc. This is a very good idea. >>> >>> But to move MSIs around, I need to change MSI termination address and data >>> and write them to device configuration space. This may cause problems >>> if the device >>> fires an interrupt at the same time when I do the config write? >>> >>> What is your opinion here? >> >> There is an inherent race when changing the affinity of any interrupt, >> whether that's an MSI or not. The kernel is perfectly prepared to handle >> such a situation (to be honest, the kernel doesn't really care). >> >> The write to the config space shouldn't be a concern. You will either >> hit the old *or* the new CPU, but that race is only during the write >> itself (you can read back from the config space if you're really >> paranoid). By the time you return from this read/write, the device will >> be reconfigured. >> >>>> I think that 336 MSIs is a fair number (nowhere near the 16 you claim). >>>> Most platforms are doing quite well with that kind of numbers. Also, >>>> you don't have to allocate all the MSIs a device can possibly claim (up >>>> to 2048 MSI-X per device), as they are all perfectly capable of using >>>> less MSI without having to fallback to INTx). >>>> >>>>> I can document the limitation in affinity setting of X-Gene-1 MSI in >>>>> the driver to hopefully not make people surprise and hope to keep the >>>>> total number of supported MSI as 2688 so that we can support as many >>>>> cards that require MSI/MSI-X as possible. >>>> >>>> I don't think this is a valid approach. This breaks userspace (think of >>>> things like irqbalance), and breaks the SMP affinity model that Linux >>>> uses. No amount of documentation is going to solve it, so I think you >>>> just have to admit that the HW is mis-designed and do the best you can >>>> to make it work like Linux expect it to work. >>>> >>>> The alternative would to disable affinity setting altogether instead of >>>> introducing these horrible side effects. >>>> >>> I have it disabled (set_affinity does nothing) in my v4 patch. >> >> It would be good if you could try the above approach. It shouldn't be >> hard to write, and it would be a lot better than just ignoring the problem. >> > Yes. I am working on this change. In which case, I'll wait for a v5. No need to spend time on something that is going to change anyway. Thanks, M.
On Fri, Apr 17, 2015 at 5:45 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: > On 17/04/15 13:37, Duc Dang wrote: >> On Fri, Apr 17, 2015 at 3:17 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: >>> On 17/04/15 11:00, Duc Dang wrote: >>>> On Wed, Apr 15, 2015 at 1:16 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: >>>>> On Tue, 14 Apr 2015 19:20:19 +0100 >>>>> Duc Dang <dhdang@apm.com> wrote: >>>>> >>>>>> On Sat, Apr 11, 2015 at 5:06 AM, Marc Zyngier <marc.zyngier@arm.com> >>>>>> wrote: >>>>>>> On 2015-04-11 00:42, Duc Dang wrote: >>>>>>>> >>>>>>>> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier >>>>>>>> <marc.zyngier@arm.com> wrote: >>>>>>>>> >>>>>>>>> On 09/04/15 18:05, Duc Dang wrote: >>>>>>>>>> >>>>>>>>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into >>>>>>>>>> 16 HW IRQ lines. >>>>>>>>>> >>>>>>>>>> Signed-off-by: Duc Dang <dhdang@apm.com> >>>>>>>>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com> >>>>>>>>>> --- >>>>>>>>>> drivers/pci/host/Kconfig | 6 + >>>>>>>>>> drivers/pci/host/Makefile | 1 + >>>>>>>>>> drivers/pci/host/pci-xgene-msi.c | 407 >>>>>>>>>> +++++++++++++++++++++++++++++++++++++++ >>>>>>>>>> drivers/pci/host/pci-xgene.c | 21 ++ >>>>>>>>>> 4 files changed, 435 insertions(+) >>>>>>>>>> create mode 100644 drivers/pci/host/pci-xgene-msi.c >>>>>>>>>> >>>>>>>>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig >>>>>>>>>> index 7b892a9..c9b61fa 100644 >>>>>>>>>> --- a/drivers/pci/host/Kconfig >>>>>>>>>> +++ b/drivers/pci/host/Kconfig >>>>>>>>>> @@ -89,11 +89,17 @@ config PCI_XGENE >>>>>>>>>> depends on ARCH_XGENE >>>>>>>>>> depends on OF >>>>>>>>>> select PCIEPORTBUS >>>>>>>>>> + select PCI_MSI_IRQ_DOMAIN if PCI_MSI >>>>>>>>>> + select PCI_XGENE_MSI if PCI_MSI >>>>>>>>>> help >>>>>>>>>> Say Y here if you want internal PCI support on APM >>>>>>>>>> X-Gene SoC. There are 5 internal PCIe ports available. Each port >>>>>>>>>> is GEN3 capable >>>>>>>>>> and have varied lanes from x1 to x8. >>>>>>>>>> >>>>>>>>>> +config PCI_XGENE_MSI >>>>>>>>>> + bool "X-Gene v1 PCIe MSI feature" >>>>>>>>>> + depends on PCI_XGENE && PCI_MSI >>>>>>>>>> + >>>>>>>>>> config PCI_LAYERSCAPE >>>>>>>>>> bool "Freescale Layerscape PCIe controller" >>>>>>>>>> depends on OF && ARM >>>>>>>>>> diff --git a/drivers/pci/host/Makefile >>>>>>>>>> b/drivers/pci/host/Makefile index e61d91c..f39bde3 100644 >>>>>>>>>> --- a/drivers/pci/host/Makefile >>>>>>>>>> +++ b/drivers/pci/host/Makefile >>>>>>>>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += >>>>>>>>>> pcie-spear13xx.o obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o >>>>>>>>>> pci-keystone.o obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o >>>>>>>>>> obj-$(CONFIG_PCI_XGENE) += pci-xgene.o >>>>>>>>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o >>>>>>>>>> obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o >>>>>>>>>> obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o >>>>>>>>>> diff --git a/drivers/pci/host/pci-xgene-msi.c >>>>>>>>>> b/drivers/pci/host/pci-xgene-msi.c >>>>>>>>>> new file mode 100644 >>>>>>>>>> index 0000000..4f0ff42 >>>>>>>>>> --- /dev/null >>>>>>>>>> +++ b/drivers/pci/host/pci-xgene-msi.c >>>>>>>>>> @@ -0,0 +1,407 @@ >>>>>>>>>> +/* >>>>>>>>>> + * APM X-Gene MSI Driver >>>>>>>>>> + * >>>>>>>>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation >>>>>>>>>> + * Author: Tanmay Inamdar <tinamdar@apm.com> >>>>>>>>>> + * Duc Dang <dhdang@apm.com> >>>>>>>>>> + * >>>>>>>>>> + * This program is free software; you can redistribute it >>>>>>>>>> and/or modify it >>>>>>>>>> + * under the terms of the GNU General Public License as >>>>>>>>>> published by the >>>>>>>>>> + * Free Software Foundation; either version 2 of the License, >>>>>>>>>> or (at your >>>>>>>>>> + * option) any later version. >>>>>>>>>> + * >>>>>>>>>> + * This program is distributed in the hope that it will be >>>>>>>>>> useful, >>>>>>>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty >>>>>>>>>> of >>>>>>>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >>>>>>>>>> + * GNU General Public License for more details. >>>>>>>>>> + */ >>>>>>>>>> +#include <linux/interrupt.h> >>>>>>>>>> +#include <linux/module.h> >>>>>>>>>> +#include <linux/msi.h> >>>>>>>>>> +#include <linux/of_irq.h> >>>>>>>>>> +#include <linux/pci.h> >>>>>>>>>> +#include <linux/platform_device.h> >>>>>>>>>> +#include <linux/of_pci.h> >>>>>>>>>> + >>>>>>>>>> +#define MSI_INDEX0 0x000000 >>>>>>>>>> +#define MSI_INT0 0x800000 >>>>>>>>>> + >>>>>>>>>> +struct xgene_msi_settings { >>>>>>>>>> + u32 index_per_group; >>>>>>>>>> + u32 irqs_per_index; >>>>>>>>>> + u32 nr_msi_vec; >>>>>>>>>> + u32 nr_hw_irqs; >>>>>>>>>> +}; >>>>>>>>>> + >>>>>>>>>> +struct xgene_msi { >>>>>>>>>> + struct device_node *node; >>>>>>>>>> + struct msi_controller mchip; >>>>>>>>>> + struct irq_domain *domain; >>>>>>>>>> + struct xgene_msi_settings *settings; >>>>>>>>>> + u32 msi_addr_lo; >>>>>>>>>> + u32 msi_addr_hi; >>>>>>>>> >>>>>>>>> >>>>>>>>> I'd rather see the mailbox address directly, and only do the >>>>>>>>> split when assigning it to the message (you seem to play all kind >>>>>>>>> of tricks on the address anyway). >>>>>>>> >>>>>>>> >>>>>>>> msi_addr_lo and msi_addr_hi store the physical base address of MSI >>>>>>>> controller registers. I will add comment to clarify this. >>>>>>> >>>>>>> >>>>>>> What I mean is that there is no point in keeping this around as a >>>>>>> pair of 32bit variables. You'd better keep it as a single 64bit, >>>>>>> and do the split when assigning it the the MSI message. >>>>>> >>>>>> Hi Marc, >>>>>> >>>>>> These came from device-tree (which describes 64-bit address number as >>>>>> 2 32-bit words). >>>>> >>>>> ... and converted to a resource as a 64bit word, on which you apply >>>>> {upper,lower}_32_bit(). So much for DT... >>>>> >>>>>> If I store them this way, I don't need CPU cycles to do the split >>>>>> every time assigning them to the MSI message. Please let me know what >>>>>> do you think about it. >>>>> >>>>> This is getting absolutely silly. >>>>> >>>>> How many cycles does it take to execute "lsr x1, x0, #32" on X-Gene? If >>>>> it takes so long that it is considered to be a bottleneck, I suggest >>>>> you go and design a better CPU (hint: the answer is probably 1 cycle >>>>> absolutely everywhere). >>>>> >>>>> How often are you configuring MSIs in the face of what is happening in >>>>> the rest of the kernel? Almost never! >>>>> >>>>> So, given that "never" times 1 is still never, I'll consider that >>>>> readability of the code trumps it anytime (I can't believe we're having >>>>> that kind of conversation...). >>>>> >>>> I changed to use u64 for msi_addr and split it when composing MSI messages. >>>> The change is in v4 of the patch set that I just posted. >>>>>>> >>>>>>> [...] >>>>>>> >>>>>>>>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data, >>>>>>>>>> + const struct cpumask *mask, >>>>>>>>>> bool force) >>>>>>>>>> +{ >>>>>>>>>> + struct xgene_msi *msi = >>>>>>>>>> irq_data_get_irq_chip_data(irq_data); >>>>>>>>>> + unsigned int gic_irq; >>>>>>>>>> + int ret; >>>>>>>>>> + >>>>>>>>>> + gic_irq = msi->msi_virqs[irq_data->hwirq % >>>>>>>>>> msi->settings->nr_hw_irqs]; >>>>>>>>>> + ret = irq_set_affinity(gic_irq, mask); >>>>>>>>> >>>>>>>>> >>>>>>>>> Erm... This as the effect of moving *all* the MSIs hanging off >>>>>>>>> this interrupt to another CPU. I'm not sure that's an acceptable >>>>>>>>> effect... What if another MSI requires a different affinity? >>>>>>>> >>>>>>>> >>>>>>>> We have 16 'real' hardware IRQs. Each of these has multiple MSIs >>>>>>>> attached to it. >>>>>>>> So this will move all MSIs handing off this interrupt to another >>>>>>>> CPU; and we don't support different affinity settings for >>>>>>>> different MSIs that are attached to the same hardware IRQ. >>>>>>> >>>>>>> >>>>>>> Well, that's a significant departure from the expected behaviour. >>>>>>> In other words, I wonder how useful this is. Could you instead >>>>>>> reconfigure the MSI itself to hit the right CPU (assuming you don't >>>>>>> have more than 16 CPUs and if >>>>>>> that's only for XGene-1, this will only be 8 at most)? This would >>>>>>> reduce your number of possible MSIs, but at least the semantics of >>>>>>> set_afinity would be preserved. >>>>>> >>>>>> X-Gene-1 supports 2688 MSIs that are divided into 16 groups, each >>>>>> group has 168 MSIs that are mapped to 1 hardware GIC IRQ (so we have >>>>>> 16 hardware GIC IRQs for 2688 MSIs). >>>>> >>>>> We've already established that. >>>>> >>>>>> Setting affinity of single MSI to deliver it to a target CPU will move >>>>>> all the MSIs mapped to the same GIC IRQ to that CPU as well. This is >>>>>> not a standard behavior, but limiting the total number of MSIs will >>>>>> cause a lot of devices to fall back to INTx (with huge performance >>>>>> penalty) or even fail to load their driver as these devices request >>>>>> more than 16 MSIs during driver initialization. >>>>> >>>>> No, I think you got it wrong. If you have 168 MSIs per GIC IRQ, and >>>>> provided that you have 8 CPUs (XGene-1), you end up with 336 MSIs per >>>>> CPU (having statically assigned 2 IRQs per CPU). >>>>> >>>>> Assuming you adopt my scheme, you still have a grand total of 336 MSIs >>>>> that can be freely moved around without breaking any userspace >>>>> expectations. >>>>> >>>> Thanks Marc. This is a very good idea. >>>> >>>> But to move MSIs around, I need to change MSI termination address and data >>>> and write them to device configuration space. This may cause problems >>>> if the device >>>> fires an interrupt at the same time when I do the config write? >>>> >>>> What is your opinion here? >>> >>> There is an inherent race when changing the affinity of any interrupt, >>> whether that's an MSI or not. The kernel is perfectly prepared to handle >>> such a situation (to be honest, the kernel doesn't really care). >>> >>> The write to the config space shouldn't be a concern. You will either >>> hit the old *or* the new CPU, but that race is only during the write >>> itself (you can read back from the config space if you're really >>> paranoid). By the time you return from this read/write, the device will >>> be reconfigured. >>> >>>>> I think that 336 MSIs is a fair number (nowhere near the 16 you claim). >>>>> Most platforms are doing quite well with that kind of numbers. Also, >>>>> you don't have to allocate all the MSIs a device can possibly claim (up >>>>> to 2048 MSI-X per device), as they are all perfectly capable of using >>>>> less MSI without having to fallback to INTx). >>>>> >>>>>> I can document the limitation in affinity setting of X-Gene-1 MSI in >>>>>> the driver to hopefully not make people surprise and hope to keep the >>>>>> total number of supported MSI as 2688 so that we can support as many >>>>>> cards that require MSI/MSI-X as possible. >>>>> >>>>> I don't think this is a valid approach. This breaks userspace (think of >>>>> things like irqbalance), and breaks the SMP affinity model that Linux >>>>> uses. No amount of documentation is going to solve it, so I think you >>>>> just have to admit that the HW is mis-designed and do the best you can >>>>> to make it work like Linux expect it to work. >>>>> >>>>> The alternative would to disable affinity setting altogether instead of >>>>> introducing these horrible side effects. >>>>> >>>> I have it disabled (set_affinity does nothing) in my v4 patch. >>> >>> It would be good if you could try the above approach. It shouldn't be >>> hard to write, and it would be a lot better than just ignoring the problem. >>> >> Yes. I am working on this change. > > In which case, I'll wait for a v5. No need to spend time on something > that is going to change anyway. Marc: Is it possible to use the pci_msi_create_default_irq_domain for the X-Gene MSI driver. Or is this going to be removed in the future. Thanks > > Thanks, > > M. > -- > Jazz is not dead. It just smells funny...
This patch set adds MSI/MSIX termination driver support for APM X-Gene v1 SoC. APM X-Gene v1 SoC supports its own implementation of MSI, which is not compliant to GIC V2M specification for MSI Termination. There is single MSI block in X-Gene v1 SOC which serves all 5 PCIe ports. This MSI block supports 2048 MSI termination ports coalesced into 16 physical HW IRQ lines and shared across all 5 PCIe ports. As the version 5 of this patch, the total MSI vectors this driver supports is reduced to 256 to maintain the correct set_affinity behavior for each MSI. v5 changes: 1. Implement set_affinity for each MSI by statically allocating 2 MSI GIC IRQs for each X-Gene CPU core and moving MSI vectors around these GIC IRQs to steer them to target CPU core. As a consequence, the total MSI vectors that X-Gene v1 supports is reduced to 256. v4 changes: 1. Remove affinity setting for each MSI 2. Add description about register layout, MSI termination address and data 3. Correct total number of MSI vectors to 2048 4. Clean up error messages 5. Remove unused module code v3 changes: 1. Implement MSI support using PCI MSI IRQ domain 2. Only use msi_controller to store IRQ domain v2 changes: 1. Use msi_controller structure 2. Remove arch hooks arch_teardown_msi_irqs and arch_setup_msi_irqs .../devicetree/bindings/pci/xgene-pci-msi.txt | 63 +++ MAINTAINERS | 8 + arch/arm64/boot/dts/apm/apm-storm.dtsi | 27 ++ drivers/pci/host/Kconfig | 6 + drivers/pci/host/Makefile | 1 + drivers/pci/host/pci-xgene-msi.c | 477 +++++++++++++++++++++ drivers/pci/host/pci-xgene.c | 21 + 7 files changed, 603 insertions(+) create mode 100644 Documentation/devicetree/bindings/pci/xgene-pci-msi.txt create mode 100644 drivers/pci/host/pci-xgene-msi.c
On 20/04/15 19:51, Feng Kan wrote: > Is it possible to use the pci_msi_create_default_irq_domain for the X-Gene MSI > driver. Or is this going to be removed in the future. Thanks This is not the preferred way. pci_msi_create_default_irq_domain is only there for legacy purposes, so that we can lookup an MSI without specifying the domain. New code shouldn't have to rely on it. Thanks, M.
On 04/21/2015 12:04 AM, Duc Dang wrote: > This patch set adds MSI/MSIX termination driver support for APM X-Gene v1 SoC. > APM X-Gene v1 SoC supports its own implementation of MSI, which is not compliant > to GIC V2M specification for MSI Termination. > > There is single MSI block in X-Gene v1 SOC which serves all 5 PCIe ports. This MSI > block supports 2048 MSI termination ports coalesced into 16 physical HW IRQ lines > and shared across all 5 PCIe ports. As the version 5 of this patch, the total MSI > vectors this driver supports is reduced to 256 to maintain the correct set_affinity > behavior for each MSI. Quick note that I have tested the previous several versions of this patch series on several platforms and will test v6 once available with the pending feedback addressed. Jon.
On 04/21/2015 12:04 AM, Duc Dang wrote: > This patch set adds MSI/MSIX termination driver support for APM X-Gene v1 SoC. > APM X-Gene v1 SoC supports its own implementation of MSI, which is not compliant > to GIC V2M specification for MSI Termination. > > There is single MSI block in X-Gene v1 SOC which serves all 5 PCIe ports. This MSI > block supports 2048 MSI termination ports coalesced into 16 physical HW IRQ lines > and shared across all 5 PCIe ports. As the version 5 of this patch, the total MSI > vectors this driver supports is reduced to 256 to maintain the correct set_affinity > behavior for each MSI. Quick note that I have tested the previous several versions of this patch series on several platforms and will test v6 once available with the pending feedback addressed. Jon.
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig index 7b892a9..c9b61fa 100644 --- a/drivers/pci/host/Kconfig +++ b/drivers/pci/host/Kconfig @@ -89,11 +89,17 @@ config PCI_XGENE depends on ARCH_XGENE depends on OF select PCIEPORTBUS + select PCI_MSI_IRQ_DOMAIN if PCI_MSI + select PCI_XGENE_MSI if PCI_MSI help Say Y here if you want internal PCI support on APM X-Gene SoC. There are 5 internal PCIe ports available. Each port is GEN3 capable and have varied lanes from x1 to x8. +config PCI_XGENE_MSI + bool "X-Gene v1 PCIe MSI feature" + depends on PCI_XGENE && PCI_MSI + config PCI_LAYERSCAPE bool "Freescale Layerscape PCIe controller" depends on OF && ARM diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile index e61d91c..f39bde3 100644 --- a/drivers/pci/host/Makefile +++ b/drivers/pci/host/Makefile @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o obj-$(CONFIG_PCI_XGENE) += pci-xgene.o +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c new file mode 100644 index 0000000..4f0ff42 --- /dev/null +++ b/drivers/pci/host/pci-xgene-msi.c @@ -0,0 +1,407 @@ +/* + * APM X-Gene MSI Driver + * + * Copyright (c) 2014, Applied Micro Circuits Corporation + * Author: Tanmay Inamdar <tinamdar@apm.com> + * Duc Dang <dhdang@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/msi.h> +#include <linux/of_irq.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/of_pci.h> + +#define MSI_INDEX0 0x000000 +#define MSI_INT0 0x800000 + +struct xgene_msi_settings { + u32 index_per_group; + u32 irqs_per_index; + u32 nr_msi_vec; + u32 nr_hw_irqs; +}; + +struct xgene_msi { + struct device_node *node; + struct msi_controller mchip; + struct irq_domain *domain; + struct xgene_msi_settings *settings; + u32 msi_addr_lo; + u32 msi_addr_hi; + void __iomem *msi_regs; + unsigned long *bitmap; + struct mutex bitmap_lock; + int *msi_virqs; +}; + +struct xgene_msi_settings storm_msi_settings = { + .index_per_group = 8, + .irqs_per_index = 21, + .nr_msi_vec = 2688, + .nr_hw_irqs = 16, +}; + +typedef int (*xgene_msi_initcall_t)(struct xgene_msi *); + +static int xgene_msi_init_storm_settings(struct xgene_msi *xgene_msi) +{ + xgene_msi->settings = &storm_msi_settings; + return 0; +} + +static struct irq_chip xgene_msi_top_irq_chip = { + .name = "X-Gene1 MSI", + .irq_enable = pci_msi_unmask_irq, + .irq_disable = pci_msi_mask_irq, + .irq_mask = pci_msi_mask_irq, + .irq_unmask = pci_msi_unmask_irq, +}; + +static struct msi_domain_info xgene_msi_domain_info = { + .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_PCI_MSIX), + .chip = &xgene_msi_top_irq_chip, +}; + +static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct xgene_msi *msi = irq_data_get_irq_chip_data(data); + u32 nr_hw_irqs = msi->settings->nr_hw_irqs; + u32 irqs_per_index = msi->settings->irqs_per_index; + u32 reg_set = data->hwirq / (nr_hw_irqs * irqs_per_index); + u32 group = data->hwirq % nr_hw_irqs; + + msg->address_hi = msi->msi_addr_hi; + msg->address_lo = msi->msi_addr_lo + + (((8 * group) + reg_set) << 16); + msg->data = (data->hwirq / nr_hw_irqs) % irqs_per_index; +} + +static int xgene_msi_set_affinity(struct irq_data *irq_data, + const struct cpumask *mask, bool force) +{ + struct xgene_msi *msi = irq_data_get_irq_chip_data(irq_data); + unsigned int gic_irq; + int ret; + + gic_irq = msi->msi_virqs[irq_data->hwirq % msi->settings->nr_hw_irqs]; + ret = irq_set_affinity(gic_irq, mask); + if (ret == IRQ_SET_MASK_OK) + ret = IRQ_SET_MASK_OK_DONE; + + return ret; +} + +static struct irq_chip xgene_msi_bottom_irq_chip = { + .name = "MSI", + .irq_set_affinity = xgene_msi_set_affinity, + .irq_compose_msi_msg = xgene_compose_msi_msg, +}; + +static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *args) +{ + + struct xgene_msi *msi = domain->host_data; + u32 msi_irq_count = msi->settings->nr_msi_vec; + int msi_irq; + + mutex_lock(&msi->bitmap_lock); + + msi_irq = find_first_zero_bit(msi->bitmap, msi_irq_count); + if (msi_irq < msi_irq_count) + set_bit(msi_irq, msi->bitmap); + else + msi_irq = -ENOSPC; + + mutex_unlock(&msi->bitmap_lock); + + if (msi_irq < 0) + return msi_irq; + + irq_domain_set_info(domain, virq, msi_irq, &xgene_msi_bottom_irq_chip, + domain->host_data, handle_simple_irq, NULL, NULL); + set_irq_flags(virq, IRQF_VALID); + + return 0; +} + +static void xgene_irq_domain_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + struct xgene_msi *msi = irq_data_get_irq_chip_data(d); + + mutex_lock(&msi->bitmap_lock); + + if (!test_bit(d->hwirq, msi->bitmap)) + pr_err("trying to free unused MSI#%lu\n", d->hwirq); + else + clear_bit(d->hwirq, msi->bitmap); + + mutex_unlock(&msi->bitmap_lock); + + irq_domain_free_irqs_parent(domain, virq, nr_irqs); +} + +static const struct irq_domain_ops msi_domain_ops = { + .alloc = xgene_irq_domain_alloc, + .free = xgene_irq_domain_free, +}; + +static int xgene_allocate_domains(struct xgene_msi *msi) +{ + msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec, + &msi_domain_ops, msi); + + if (!msi->domain) { + pr_err("failed to create parent MSI domain\n"); + return -ENOMEM; + } + + msi->mchip.of_node = msi->node; + msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node, + &xgene_msi_domain_info, + msi->domain); + + if (!msi->mchip.domain) { + pr_err("failed to create MSI domain\n"); + irq_domain_remove(msi->domain); + return -ENOMEM; + } + + return 0; +} + +static void xgene_free_domains(struct xgene_msi *msi) +{ + if (msi->mchip.domain) + irq_domain_remove(msi->mchip.domain); + if (msi->domain) + irq_domain_remove(msi->domain); +} + +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi) +{ + u32 msi_irq_count = xgene_msi->settings->nr_msi_vec; + u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs; + int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long); + + xgene_msi->bitmap = kzalloc(size, GFP_KERNEL); + if (!xgene_msi->bitmap) + return -ENOMEM; + + mutex_init(&xgene_msi->bitmap_lock); + + xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL); + if (!xgene_msi->msi_virqs) + return -ENOMEM; + + return 0; +} + +static irqreturn_t xgene_msi_isr(int irq, void *data) +{ + struct xgene_msi *xgene_msi = (struct xgene_msi *) data; + unsigned int virq; + int msir_index, msir_reg, msir_val, hw_irq; + u32 intr_index, grp_select, msi_grp, processed = 0; + u32 nr_hw_irqs, irqs_per_index, index_per_group; + + msi_grp = irq - xgene_msi->msi_virqs[0]; + if (msi_grp >= xgene_msi->settings->nr_hw_irqs) { + pr_err("invalid msi received\n"); + return IRQ_NONE; + } + + nr_hw_irqs = xgene_msi->settings->nr_hw_irqs; + irqs_per_index = xgene_msi->settings->irqs_per_index; + index_per_group = xgene_msi->settings->index_per_group; + + grp_select = readl(xgene_msi->msi_regs + MSI_INT0 + (msi_grp << 16)); + while (grp_select) { + msir_index = ffs(grp_select) - 1; + msir_reg = (msi_grp << 19) + (msir_index << 16); + msir_val = readl(xgene_msi->msi_regs + MSI_INDEX0 + msir_reg); + while (msir_val) { + intr_index = ffs(msir_val) - 1; + hw_irq = (((msir_index * irqs_per_index) + intr_index) * + nr_hw_irqs) + msi_grp; + virq = irq_find_mapping(xgene_msi->domain, hw_irq); + if (virq != 0) + generic_handle_irq(virq); + msir_val &= ~(1 << intr_index); + processed++; + } + grp_select &= ~(1 << msir_index); + } + + return processed > 0 ? IRQ_HANDLED : IRQ_NONE; +} + +static int xgene_msi_remove(struct platform_device *pdev) +{ + int virq, i; + struct xgene_msi *msi = platform_get_drvdata(pdev); + u32 nr_hw_irqs = msi->settings->nr_hw_irqs; + + for (i = 0; i < nr_hw_irqs; i++) { + virq = msi->msi_virqs[i]; + if (virq != 0) + free_irq(virq, msi); + } + + kfree(msi->bitmap); + msi->bitmap = NULL; + + xgene_free_domains(msi); + + return 0; +} + +static int xgene_msi_setup_hwirq(struct xgene_msi *msi, + struct platform_device *pdev, + int irq_index) +{ + int virt_msir; + cpumask_var_t mask; + int err; + + virt_msir = platform_get_irq(pdev, irq_index); + if (virt_msir < 0) { + dev_err(&pdev->dev, "Cannot translate IRQ index %d\n", + irq_index); + return -EINVAL; + } + + err = request_irq(virt_msir, xgene_msi_isr, 0, + xgene_msi_top_irq_chip.name, msi); + if (err) { + dev_err(&pdev->dev, "request irq failed\n"); + return err; + } + + if (alloc_cpumask_var(&mask, GFP_KERNEL)) { + cpumask_setall(mask); + irq_set_affinity(virt_msir, mask); + free_cpumask_var(mask); + } + + msi->msi_virqs[irq_index] = virt_msir; + + return 0; +} + +static const struct of_device_id xgene_msi_match_table[] = { + {.compatible = "apm,xgene1-msi", + .data = xgene_msi_init_storm_settings}, + {}, +}; + +static int xgene_msi_probe(struct platform_device *pdev) +{ + struct resource *res; + int rc, irq_index; + struct device_node *np; + const struct of_device_id *matched_np; + struct xgene_msi *xgene_msi; + xgene_msi_initcall_t init_fn; + u32 nr_hw_irqs, nr_msi_vecs; + + np = of_find_matching_node_and_match(NULL, + xgene_msi_match_table, &matched_np); + if (!np) + return -ENODEV; + + xgene_msi = kzalloc(sizeof(struct xgene_msi), GFP_KERNEL); + if (!xgene_msi) { + dev_err(&pdev->dev, "failed to allocate X-Gene MSI data\n"); + return -ENOMEM; + } + + xgene_msi->node = np; + + init_fn = (xgene_msi_initcall_t) matched_np->data; + rc = init_fn(xgene_msi); + if (rc) + return rc; + + platform_set_drvdata(pdev, xgene_msi); + + nr_msi_vecs = xgene_msi->settings->nr_msi_vec; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + xgene_msi->msi_regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(xgene_msi->msi_regs)) { + dev_err(&pdev->dev, "no reg space\n"); + rc = -EINVAL; + goto error; + } + + xgene_msi->msi_addr_hi = upper_32_bits(res->start); + xgene_msi->msi_addr_lo = lower_32_bits(res->start); + + rc = xgene_msi_init_allocator(xgene_msi); + if (rc) { + dev_err(&pdev->dev, "Error allocating MSI bitmap\n"); + goto error; + } + + rc = xgene_allocate_domains(xgene_msi); + if (rc) { + dev_err(&pdev->dev, "Failed to allocate MSI domain\n"); + goto error; + } + + nr_hw_irqs = xgene_msi->settings->nr_hw_irqs; + for (irq_index = 0; irq_index < nr_hw_irqs; irq_index++) { + rc = xgene_msi_setup_hwirq(xgene_msi, pdev, irq_index); + if (rc) + goto error; + } + + rc = of_pci_msi_chip_add(&xgene_msi->mchip); + if (rc) { + dev_err(&pdev->dev, "failed to add MSI controller chip\n"); + goto error; + } + + dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n"); + + return 0; +error: + xgene_msi_remove(pdev); + return rc; +} + +static struct platform_driver xgene_msi_driver = { + .driver = { + .name = "xgene-msi", + .owner = THIS_MODULE, + .of_match_table = xgene_msi_match_table, + }, + .probe = xgene_msi_probe, + .remove = xgene_msi_remove, +}; + +static int __init xgene_pcie_msi_init(void) +{ + return platform_driver_register(&xgene_msi_driver); +} +subsys_initcall(xgene_pcie_msi_init); + +MODULE_AUTHOR("Duc Dang <dhdang@apm.com>"); +MODULE_DESCRIPTION("APM X-Gene PCIe MSI driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c index 22751ed..3e6faa1 100644 --- a/drivers/pci/host/pci-xgene.c +++ b/drivers/pci/host/pci-xgene.c @@ -468,6 +468,23 @@ static int xgene_pcie_setup(struct xgene_pcie_port *port, return 0; } +static int xgene_pcie_msi_enable(struct pci_bus *bus) +{ + struct device_node *msi_node; + + msi_node = of_parse_phandle(bus->dev.of_node, + "msi-parent", 0); + if (!msi_node) + return -ENODEV; + + bus->msi = of_pci_find_msi_chip_by_node(msi_node); + if (bus->msi) + bus->msi->dev = &bus->dev; + else + return -ENODEV; + return 0; +} + static int xgene_pcie_probe_bridge(struct platform_device *pdev) { struct device_node *dn = pdev->dev.of_node; @@ -504,6 +521,10 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev) if (!bus) return -ENOMEM; + if (IS_ENABLED(CONFIG_PCI_MSI)) + if (xgene_pcie_msi_enable(bus)) + dev_info(port->dev, "failed to enable MSI\n"); + pci_scan_child_bus(bus); pci_assign_unassigned_bus_resources(bus); pci_bus_add_devices(bus);