Message ID | 1462457096-19795-2-git-send-email-hch@lst.de (mailing list archive) |
---|---|
State | New, archived |
Delegated to: | Bjorn Helgaas |
Headers | show |
On Thu, May 05, 2016 at 04:04:55PM +0200, Christoph Hellwig wrote: > Add a new pci_alloc_irq_vectors helper that allocates MSI-X or multi-MSI > vectors for PCI device while isolating the driver from the arcane details. > > This include handling both MSI-X, MSI and legacy interrupt fallbacks > transparently, automatic capping to the available vectors as well as storing > the information needed for request_irq in the PCI device itself so that > a lot of boiler plate code in the driver can be removed. > > In the future this will also allow us to automatically set up spreading > for interrupt vectors without having to duplicate it in all the drivers. I like this a lot. One relatively minor comment below. > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > drivers/pci/msi.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > include/linux/pci.h | 19 +++++++++ > 2 files changed, 127 insertions(+) > > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c > index a080f44..a510484 100644 > --- a/drivers/pci/msi.c > +++ b/drivers/pci/msi.c > @@ -4,6 +4,7 @@ > * > * Copyright (C) 2003-2004 Intel > * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) > + * Copyright (c) 2016 Christoph Hellwig. > */ > > #include <linux/err.h> > @@ -1120,6 +1121,113 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, > } > EXPORT_SYMBOL(pci_enable_msix_range); > > +static int __pci_enable_msix(struct pci_dev *dev, int nr_vecs) > +{ > + struct msix_entry *msix_entries; > + int ret, i; > + > + msix_entries = kcalloc(nr_vecs, sizeof(struct msix_entry), GFP_KERNEL); > + if (!msix_entries) > + return -ENOMEM; > + > + for (i = 0; i < nr_vecs; i++) > + msix_entries[i].entry = i; > + > + ret = msix_capability_init(dev, msix_entries, nr_vecs); > + if (ret == 0) { > + for (i = 0; i < nr_vecs; i++) > + dev->irqs[i] = msix_entries[i].vector; > + } > + > + kfree(msix_entries); > + return ret; > +} > + > +static int __pci_enable_msi(struct pci_dev *dev, int nr_vecs) > +{ > + int ret, i; > + > + ret = msi_capability_init(dev, nr_vecs); > + if (ret == 0) { > + for (i = 0; i < nr_vecs; i++) > + dev->irqs[i] = dev->irq + i; > + } > + > + return ret; > +} > + > +/** > + * pci_alloc_irq_vectors - allocate multiple IRQs for a device > + * @dev: PCI device to operate on > + * @nr_vecs: number of vectors to operate on > + * @flags: flags or quirks for the allocation > + * > + * Allocate @nr_vecs interrupt vectors for @dev, using MSI-X or MSI > + * vectors if available, and fall back to a single legacy vector > + * if neither is available. Return the number of vectors allocated > + * (which might be smaller than @nr_vecs) if successful, or a negative > + * error code on error. The Linux irq numbers for the allocated > + * vectors are stored in pdev->irqs. I think the "flags" argument penalizes working devices unnecessarily. Everybody that implements MSI-X correctly has to pass that zero argument: pci_alloc_irq_vectors(pdev, nr_io_queues, 0); instead of putting the burden on the broken folks like this: pdev->msix_broken = 1; pci_alloc_irq_vectors(pdev, nr_io_queues); If we remember this via a bit in struct pci_dev, we can also make sure pci_enable_msix(), pci_enable_msix_range(), etc. fail as they should. > + */ > +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int nr_vecs, > + unsigned int flags) > +{ > + unsigned int ret; > + > + if (WARN_ON_ONCE(dev->msi_enabled || dev->msix_enabled)) > + return -EINVAL; > + > + if (!pci_msi_supported(dev, 1)) > + goto use_legacy_irq; > + > + if (dev->msix_cap && !(flags & PCI_IRQ_NOMSIX)) > + nr_vecs = min_t(unsigned int, nr_vecs, pci_msix_vec_count(dev)); > + else if (dev->msi_cap) > + nr_vecs = min_t(unsigned int, nr_vecs, pci_msi_vec_count(dev)); > + else > + goto use_legacy_irq; > + > + dev->irqs = kcalloc(nr_vecs, sizeof(u32), GFP_KERNEL); > + if (!dev->irqs) > + return -ENOMEM; > + > + if (dev->msix_cap && !(flags & PCI_IRQ_NOMSIX)) > + ret = __pci_enable_msix(dev, nr_vecs); > + else > + ret = __pci_enable_msi(dev, nr_vecs); > + if (ret) > + goto out_free_irqs; > + > + return 0; > + > +out_free_irqs: > + kfree(dev->irqs); > +use_legacy_irq: > + dev->irqs = &dev->irq; > + return 1; > +} > +EXPORT_SYMBOL(pci_alloc_irq_vectors); > + > +/** > + * pci_free_irq_vectors - free previously allocated IRQs for a device > + * @dev: PCI device to operate on > + * > + * Undoes the allocations and enabling in pci_alloc_irq_vectors(). > + */ > +void pci_free_irq_vectors(struct pci_dev *dev) > +{ > + if (dev->msix_enabled) > + pci_disable_msix(dev); > + else if (dev->msi_enabled) > + pci_disable_msi(dev); > + > + if (dev->irqs != &dev->irq) > + kfree(dev->irqs); > + dev->irqs = NULL; > +} > +EXPORT_SYMBOL(pci_free_irq_vectors); > + > + > struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) > { > return to_pci_dev(desc->dev); > diff --git a/include/linux/pci.h b/include/linux/pci.h > index 932ec74..e201d0d 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -322,6 +322,7 @@ struct pci_dev { > * directly, use the values stored here. They might be different! > */ > unsigned int irq; > + unsigned int *irqs; > struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ > > bool match_driver; /* Skip attaching driver */ > @@ -1255,6 +1256,8 @@ struct msix_entry { > u16 entry; /* driver uses to specify entry, OS writes */ > }; > > +#define PCI_IRQ_NOMSIX (1 << 0) /* don't try to use MSI-X interrupts */ > + > #ifdef CONFIG_PCI_MSI > int pci_msi_vec_count(struct pci_dev *dev); > void pci_msi_shutdown(struct pci_dev *dev); > @@ -1283,6 +1286,10 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, > return rc; > return 0; > } > + > +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int nr_vecs, > + unsigned int flags); > +void pci_free_irq_vectors(struct pci_dev *dev); > #else > static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } > static inline void pci_msi_shutdown(struct pci_dev *dev) { } > @@ -1306,6 +1313,18 @@ static inline int pci_enable_msix_range(struct pci_dev *dev, > static inline int pci_enable_msix_exact(struct pci_dev *dev, > struct msix_entry *entries, int nvec) > { return -ENOSYS; } > + > +static inline int pci_alloc_irq_vectors(struct pci_dev *dev, > + unsigned int nr_vecs, unsigned int flags) > +{ > + dev->irqs = &dev->irq; > + return 1; > +} > + > +static inline void pci_free_irq_vectors(struct pci_dev *dev) > +{ > + dev->irqs = NULL; > +} > #endif > > #ifdef CONFIG_PCIEPORTBUS > -- > 2.1.4 > -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, May 06, 2016 at 11:04:08AM -0500, Bjorn Helgaas wrote:
> I like this a lot. One relatively minor comment below.
Do you want this addressed? The problem is that I probably will also
need something like this flags argument for introducing the irq spreading
as we can't simply do it for all callers directly. If you take
the patch as-is I'd be happy to do another pass later to remove it
and look into different no-MSI quirking once we're done with the
MSI-X spreading work.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, May 06, 2016 at 06:28:23PM +0200, Christoph Hellwig wrote: > On Fri, May 06, 2016 at 11:04:08AM -0500, Bjorn Helgaas wrote: > > I like this a lot. One relatively minor comment below. > > Do you want this addressed? The problem is that I probably will also > need something like this flags argument for introducing the irq spreading > as we can't simply do it for all callers directly. If you take > the patch as-is I'd be happy to do another pass later to remove it > and look into different no-MSI quirking once we're done with the > MSI-X spreading work. I don't know what sort of clues you'll need for IRQ spreading, but that sounds like a good reason for the argument. So if you will need the flags argument for something else, then I think it's OK as-is. Bjorn -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, May 06, 2016 at 11:35:10AM -0500, Bjorn Helgaas wrote: > I don't know what sort of clues you'll need for IRQ spreading, but > that sounds like a good reason for the argument. So if you will need > the flags argument for something else, then I think it's OK as-is. The basic idea is that we want to spread the MSI(-X) vectors around properly right when allocating them. See the series around this patch when I last posted it. For for now various drivers use different ways of spreading them, so we'll need to make it opt-in in the beginning. In the long run we'll hopefully have everyone converted to the common algorithm and can get rid of the flag, but that might take a while. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, May 05, 2016 at 04:04:55PM +0200, Christoph Hellwig wrote: > Add a new pci_alloc_irq_vectors helper that allocates MSI-X or multi-MSI > vectors for PCI device while isolating the driver from the arcane details. > > This include handling both MSI-X, MSI and legacy interrupt fallbacks > transparently, automatic capping to the available vectors as well as storing > the information needed for request_irq in the PCI device itself so that > a lot of boiler plate code in the driver can be removed. > > In the future this will also allow us to automatically set up spreading > for interrupt vectors without having to duplicate it in all the drivers. > > Signed-off-by: Christoph Hellwig <hch@lst.de> Thanks, I applied this to pci/msi for v4.7. I'd be happy to apply the nvme change, too, given the appropriate acks. > --- > drivers/pci/msi.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > include/linux/pci.h | 19 +++++++++ > 2 files changed, 127 insertions(+) > > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c > index a080f44..a510484 100644 > --- a/drivers/pci/msi.c > +++ b/drivers/pci/msi.c > @@ -4,6 +4,7 @@ > * > * Copyright (C) 2003-2004 Intel > * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) > + * Copyright (c) 2016 Christoph Hellwig. > */ > > #include <linux/err.h> > @@ -1120,6 +1121,113 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, > } > EXPORT_SYMBOL(pci_enable_msix_range); > > +static int __pci_enable_msix(struct pci_dev *dev, int nr_vecs) > +{ > + struct msix_entry *msix_entries; > + int ret, i; > + > + msix_entries = kcalloc(nr_vecs, sizeof(struct msix_entry), GFP_KERNEL); > + if (!msix_entries) > + return -ENOMEM; > + > + for (i = 0; i < nr_vecs; i++) > + msix_entries[i].entry = i; > + > + ret = msix_capability_init(dev, msix_entries, nr_vecs); > + if (ret == 0) { > + for (i = 0; i < nr_vecs; i++) > + dev->irqs[i] = msix_entries[i].vector; > + } > + > + kfree(msix_entries); > + return ret; > +} > + > +static int __pci_enable_msi(struct pci_dev *dev, int nr_vecs) > +{ > + int ret, i; > + > + ret = msi_capability_init(dev, nr_vecs); > + if (ret == 0) { > + for (i = 0; i < nr_vecs; i++) > + dev->irqs[i] = dev->irq + i; > + } > + > + return ret; > +} > + > +/** > + * pci_alloc_irq_vectors - allocate multiple IRQs for a device > + * @dev: PCI device to operate on > + * @nr_vecs: number of vectors to operate on > + * @flags: flags or quirks for the allocation > + * > + * Allocate @nr_vecs interrupt vectors for @dev, using MSI-X or MSI > + * vectors if available, and fall back to a single legacy vector > + * if neither is available. Return the number of vectors allocated > + * (which might be smaller than @nr_vecs) if successful, or a negative > + * error code on error. The Linux irq numbers for the allocated > + * vectors are stored in pdev->irqs. > + */ > +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int nr_vecs, > + unsigned int flags) > +{ > + unsigned int ret; > + > + if (WARN_ON_ONCE(dev->msi_enabled || dev->msix_enabled)) > + return -EINVAL; > + > + if (!pci_msi_supported(dev, 1)) > + goto use_legacy_irq; > + > + if (dev->msix_cap && !(flags & PCI_IRQ_NOMSIX)) > + nr_vecs = min_t(unsigned int, nr_vecs, pci_msix_vec_count(dev)); > + else if (dev->msi_cap) > + nr_vecs = min_t(unsigned int, nr_vecs, pci_msi_vec_count(dev)); > + else > + goto use_legacy_irq; > + > + dev->irqs = kcalloc(nr_vecs, sizeof(u32), GFP_KERNEL); > + if (!dev->irqs) > + return -ENOMEM; > + > + if (dev->msix_cap && !(flags & PCI_IRQ_NOMSIX)) > + ret = __pci_enable_msix(dev, nr_vecs); > + else > + ret = __pci_enable_msi(dev, nr_vecs); > + if (ret) > + goto out_free_irqs; > + > + return 0; > + > +out_free_irqs: > + kfree(dev->irqs); > +use_legacy_irq: > + dev->irqs = &dev->irq; > + return 1; > +} > +EXPORT_SYMBOL(pci_alloc_irq_vectors); > + > +/** > + * pci_free_irq_vectors - free previously allocated IRQs for a device > + * @dev: PCI device to operate on > + * > + * Undoes the allocations and enabling in pci_alloc_irq_vectors(). > + */ > +void pci_free_irq_vectors(struct pci_dev *dev) > +{ > + if (dev->msix_enabled) > + pci_disable_msix(dev); > + else if (dev->msi_enabled) > + pci_disable_msi(dev); > + > + if (dev->irqs != &dev->irq) > + kfree(dev->irqs); > + dev->irqs = NULL; > +} > +EXPORT_SYMBOL(pci_free_irq_vectors); > + > + > struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) > { > return to_pci_dev(desc->dev); > diff --git a/include/linux/pci.h b/include/linux/pci.h > index 932ec74..e201d0d 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -322,6 +322,7 @@ struct pci_dev { > * directly, use the values stored here. They might be different! > */ > unsigned int irq; > + unsigned int *irqs; > struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ > > bool match_driver; /* Skip attaching driver */ > @@ -1255,6 +1256,8 @@ struct msix_entry { > u16 entry; /* driver uses to specify entry, OS writes */ > }; > > +#define PCI_IRQ_NOMSIX (1 << 0) /* don't try to use MSI-X interrupts */ > + > #ifdef CONFIG_PCI_MSI > int pci_msi_vec_count(struct pci_dev *dev); > void pci_msi_shutdown(struct pci_dev *dev); > @@ -1283,6 +1286,10 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, > return rc; > return 0; > } > + > +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int nr_vecs, > + unsigned int flags); > +void pci_free_irq_vectors(struct pci_dev *dev); > #else > static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } > static inline void pci_msi_shutdown(struct pci_dev *dev) { } > @@ -1306,6 +1313,18 @@ static inline int pci_enable_msix_range(struct pci_dev *dev, > static inline int pci_enable_msix_exact(struct pci_dev *dev, > struct msix_entry *entries, int nvec) > { return -ENOSYS; } > + > +static inline int pci_alloc_irq_vectors(struct pci_dev *dev, > + unsigned int nr_vecs, unsigned int flags) > +{ > + dev->irqs = &dev->irq; > + return 1; > +} > + > +static inline void pci_free_irq_vectors(struct pci_dev *dev) > +{ > + dev->irqs = NULL; > +} > #endif > > #ifdef CONFIG_PCIEPORTBUS > -- > 2.1.4 > -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, May 05, 2016 at 04:04:55PM +0200, Christoph Hellwig wrote: > Add a new pci_alloc_irq_vectors helper that allocates MSI-X or multi-MSI > vectors for PCI device while isolating the driver from the arcane details. > > This include handling both MSI-X, MSI and legacy interrupt fallbacks > transparently, automatic capping to the available vectors as well as storing > the information needed for request_irq in the PCI device itself so that > a lot of boiler plate code in the driver can be removed. > > In the future this will also allow us to automatically set up spreading > for interrupt vectors without having to duplicate it in all the drivers. Hi Christoph, Sorry for jumping in that late. The MSI/MSI-X enabling pattern you introduce seems quite common so I think it worth a separate function. However, it is not exactly what is stated in the changelog above, since it does not really do any fallbacks (see [1] below). Moreover, patch 2/2 not only removed pci_enable_msi[x]_range() internal fallback logic (from desired number of interrupts to available number of interrupts), but it also removed MSI-X to MSI fallback. May be it is what NVMe driver needs, but (once again) it is not what the changelog claims. > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > drivers/pci/msi.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > include/linux/pci.h | 19 +++++++++ > 2 files changed, 127 insertions(+) > > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c > index a080f44..a510484 100644 > --- a/drivers/pci/msi.c > +++ b/drivers/pci/msi.c > @@ -4,6 +4,7 @@ > * > * Copyright (C) 2003-2004 Intel > * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) > + * Copyright (c) 2016 Christoph Hellwig. > */ > > #include <linux/err.h> > @@ -1120,6 +1121,113 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, > } > EXPORT_SYMBOL(pci_enable_msix_range); > > +static int __pci_enable_msix(struct pci_dev *dev, int nr_vecs) > +{ > + struct msix_entry *msix_entries; > + int ret, i; > + > + msix_entries = kcalloc(nr_vecs, sizeof(struct msix_entry), GFP_KERNEL); > + if (!msix_entries) > + return -ENOMEM; > + > + for (i = 0; i < nr_vecs; i++) > + msix_entries[i].entry = i; > + > + ret = msix_capability_init(dev, msix_entries, nr_vecs); > + if (ret == 0) { > + for (i = 0; i < nr_vecs; i++) > + dev->irqs[i] = msix_entries[i].vector; > + } > + > + kfree(msix_entries); > + return ret; > +} > + > +static int __pci_enable_msi(struct pci_dev *dev, int nr_vecs) > +{ > + int ret, i; > + > + ret = msi_capability_init(dev, nr_vecs); > + if (ret == 0) { > + for (i = 0; i < nr_vecs; i++) > + dev->irqs[i] = dev->irq + i; > + } > + > + return ret; > +} > + > +/** > + * pci_alloc_irq_vectors - allocate multiple IRQs for a device > + * @dev: PCI device to operate on > + * @nr_vecs: number of vectors to operate on > + * @flags: flags or quirks for the allocation > + * > + * Allocate @nr_vecs interrupt vectors for @dev, using MSI-X or MSI > + * vectors if available, and fall back to a single legacy vector > + * if neither is available. Return the number of vectors allocated > + * (which might be smaller than @nr_vecs) if successful, or a negative > + * error code on error. The Linux irq numbers for the allocated > + * vectors are stored in pdev->irqs. > + */ > +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int nr_vecs, > + unsigned int flags) > +{ > + unsigned int ret; > + > + if (WARN_ON_ONCE(dev->msi_enabled || dev->msix_enabled)) > + return -EINVAL; > + > + if (!pci_msi_supported(dev, 1)) > + goto use_legacy_irq; > + > + if (dev->msix_cap && !(flags & PCI_IRQ_NOMSIX)) > + nr_vecs = min_t(unsigned int, nr_vecs, pci_msix_vec_count(dev)); > + else if (dev->msi_cap) > + nr_vecs = min_t(unsigned int, nr_vecs, pci_msi_vec_count(dev)); > + else > + goto use_legacy_irq; > + > + dev->irqs = kcalloc(nr_vecs, sizeof(u32), GFP_KERNEL); > + if (!dev->irqs) > + return -ENOMEM; > + > + if (dev->msix_cap && !(flags & PCI_IRQ_NOMSIX)) > + ret = __pci_enable_msix(dev, nr_vecs); > + else > + ret = __pci_enable_msi(dev, nr_vecs); 1. No fallbacks. > + if (ret) > + goto out_free_irqs; > + > + return 0; > + > +out_free_irqs: > + kfree(dev->irqs); > +use_legacy_irq: > + dev->irqs = &dev->irq; > + return 1; > +} > +EXPORT_SYMBOL(pci_alloc_irq_vectors); > + > +/** > + * pci_free_irq_vectors - free previously allocated IRQs for a device > + * @dev: PCI device to operate on > + * > + * Undoes the allocations and enabling in pci_alloc_irq_vectors(). > + */ > +void pci_free_irq_vectors(struct pci_dev *dev) > +{ > + if (dev->msix_enabled) > + pci_disable_msix(dev); > + else if (dev->msi_enabled) > + pci_disable_msi(dev); > + > + if (dev->irqs != &dev->irq) > + kfree(dev->irqs); > + dev->irqs = NULL; > +} > +EXPORT_SYMBOL(pci_free_irq_vectors); > + > + > struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) > { > return to_pci_dev(desc->dev); > diff --git a/include/linux/pci.h b/include/linux/pci.h > index 932ec74..e201d0d 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -322,6 +322,7 @@ struct pci_dev { > * directly, use the values stored here. They might be different! > */ > unsigned int irq; > + unsigned int *irqs; > struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ > > bool match_driver; /* Skip attaching driver */ > @@ -1255,6 +1256,8 @@ struct msix_entry { > u16 entry; /* driver uses to specify entry, OS writes */ > }; > > +#define PCI_IRQ_NOMSIX (1 << 0) /* don't try to use MSI-X interrupts */ > + > #ifdef CONFIG_PCI_MSI > int pci_msi_vec_count(struct pci_dev *dev); > void pci_msi_shutdown(struct pci_dev *dev); > @@ -1283,6 +1286,10 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, > return rc; > return 0; > } > + > +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int nr_vecs, > + unsigned int flags); > +void pci_free_irq_vectors(struct pci_dev *dev); > #else > static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } > static inline void pci_msi_shutdown(struct pci_dev *dev) { } > @@ -1306,6 +1313,18 @@ static inline int pci_enable_msix_range(struct pci_dev *dev, > static inline int pci_enable_msix_exact(struct pci_dev *dev, > struct msix_entry *entries, int nvec) > { return -ENOSYS; } > + > +static inline int pci_alloc_irq_vectors(struct pci_dev *dev, > + unsigned int nr_vecs, unsigned int flags) > +{ > + dev->irqs = &dev->irq; > + return 1; > +} > + > +static inline void pci_free_irq_vectors(struct pci_dev *dev) > +{ > + dev->irqs = NULL; > +} > #endif > > #ifdef CONFIG_PCIEPORTBUS > -- > 2.1.4 > -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Alexander, > Moreover, patch 2/2 not only removed pci_enable_msi[x]_range() > internal fallback logic (from desired number of interrupts to > available number of interrupts), but it also removed MSI-X to > MSI fallback. That is done in the very begining of the function, see the quoted part of the patch just below: > > + if (dev->msix_cap && !(flags & PCI_IRQ_NOMSIX)) > > + nr_vecs = min_t(unsigned int, nr_vecs, pci_msix_vec_count(dev)); > > + else if (dev->msi_cap) > > + nr_vecs = min_t(unsigned int, nr_vecs, pci_msi_vec_count(dev)); > > + else > > + goto use_legacy_irq; > > + if (dev->msix_cap && !(flags & PCI_IRQ_NOMSIX)) > > + ret = __pci_enable_msix(dev, nr_vecs); > > + else > > + ret = __pci_enable_msi(dev, nr_vecs); > > 1. No fallbacks. I read through the code in msi.c in detail and could not find a legitimate case where we have msix_cap but actually fail the MSI-X allocation. If that is a valid case I can happily add the fallback here as well. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, May 09, 2016 at 05:46:31PM -0500, Bjorn Helgaas wrote: > Thanks, I applied this to pci/msi for v4.7. I'd be happy to apply the > nvme change, too, given the appropriate acks. Given the merge conflict that Keith pointed out I suspect it'll be better to defer it until the next merge window. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, May 11, 2016 at 10:50:49AM +0200, Christoph Hellwig wrote: > Hi Alexander, > > > Moreover, patch 2/2 not only removed pci_enable_msi[x]_range() > > internal fallback logic (from desired number of interrupts to > > available number of interrupts), but it also removed MSI-X to > > MSI fallback. > > That is done in the very begining of the function, see the quoted > part of the patch just below: Nope. This check verifies MSI info, but fail to call arch-specific arch_setup_msi_irqs() function - that is where an arch actually does MSI interrupt allocation. I.e. some PPC platforms are known for its run-time quota which fails to allocate as many vectors as device MSI header reports. Such conditions are circumvented with a cycle in range family functions: do { /* pci_enable_msix() calls arch_setup_msi_irqs() */ rc = pci_enable_msix(dev, entries, nvec); if (rc < 0) { return rc; } else if (rc > 0) { if (rc < minvec) return -ENOSPC; nvec = rc; } } while (rc); > > > + if (dev->msix_cap && !(flags & PCI_IRQ_NOMSIX)) > > > + nr_vecs = min_t(unsigned int, nr_vecs, pci_msix_vec_count(dev)); > > > + else if (dev->msi_cap) > > > + nr_vecs = min_t(unsigned int, nr_vecs, pci_msi_vec_count(dev)); > > > + else > > > + goto use_legacy_irq; So above and below you choose either MSI-X or MSI. That is not a fallback from MSI-X to MSI, nor most possible number of whatever type of MSI vectors obtained from the underlying architecture. By contrast, the current NVMe driver implementation does it: vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues); if (vecs < 0) { vecs = pci_enable_msi_range(pdev, 1, min(nr_io_queues, 32)); if (vecs < 0) { vecs = 1; } else { for (i = 0; i < vecs; i++) dev->entry[i].vector = i + pdev->irq; } } Again, I do not know what change to NVMe driver is needed. I am just pointing out that the changlog for pci_alloc_irq_vectors() function is not entirely correct and the change to NVMe driver might be undesirable. > > > + if (dev->msix_cap && !(flags & PCI_IRQ_NOMSIX)) > > > + ret = __pci_enable_msix(dev, nr_vecs); > > > + else > > > + ret = __pci_enable_msi(dev, nr_vecs); > > > > 1. No fallbacks. > > I read through the code in msi.c in detail and could not find a legitimate > case where we have msix_cap but actually fail the MSI-X allocation. If that > is a valid case I can happily add the fallback here as well. > -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, May 12, 2016 at 01:04:58PM +0200, Alexander Gordeev wrote: > Because MSI vectors are allocated sequentially we do not really > need pdev->irqs[] for MSI. It is 32 items at most, but still. The allocator itself doesn't "need" it. Any driver that doesn't want to special case MSI vs MSI-X all through the code needs it, though. (oh, and please don't full quote the whole email, thank you!) -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, May 05, 2016 at 04:04:55PM +0200, Christoph Hellwig wrote: > Add a new pci_alloc_irq_vectors helper that allocates MSI-X or multi-MSI > vectors for PCI device while isolating the driver from the arcane details. > > This include handling both MSI-X, MSI and legacy interrupt fallbacks > transparently, automatic capping to the available vectors as well as storing > the information needed for request_irq in the PCI device itself so that > a lot of boiler plate code in the driver can be removed. > > In the future this will also allow us to automatically set up spreading > for interrupt vectors without having to duplicate it in all the drivers. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > drivers/pci/msi.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > include/linux/pci.h | 19 +++++++++ > 2 files changed, 127 insertions(+) > > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c > index a080f44..a510484 100644 > --- a/drivers/pci/msi.c > +++ b/drivers/pci/msi.c > @@ -4,6 +4,7 @@ > * > * Copyright (C) 2003-2004 Intel > * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) > + * Copyright (c) 2016 Christoph Hellwig. > */ > > #include <linux/err.h> > @@ -1120,6 +1121,113 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, > } > EXPORT_SYMBOL(pci_enable_msix_range); > > +static int __pci_enable_msix(struct pci_dev *dev, int nr_vecs) > +{ > + struct msix_entry *msix_entries; > + int ret, i; > + > + msix_entries = kcalloc(nr_vecs, sizeof(struct msix_entry), GFP_KERNEL); > + if (!msix_entries) > + return -ENOMEM; > + > + for (i = 0; i < nr_vecs; i++) > + msix_entries[i].entry = i; > + > + ret = msix_capability_init(dev, msix_entries, nr_vecs); > + if (ret == 0) { > + for (i = 0; i < nr_vecs; i++) > + dev->irqs[i] = msix_entries[i].vector; > + } > + > + kfree(msix_entries); > + return ret; > +} > + > +static int __pci_enable_msi(struct pci_dev *dev, int nr_vecs) > +{ > + int ret, i; > + > + ret = msi_capability_init(dev, nr_vecs); > + if (ret == 0) { > + for (i = 0; i < nr_vecs; i++) > + dev->irqs[i] = dev->irq + i; > + } > + > + return ret; > +} > + > +/** > + * pci_alloc_irq_vectors - allocate multiple IRQs for a device > + * @dev: PCI device to operate on > + * @nr_vecs: number of vectors to operate on > + * @flags: flags or quirks for the allocation > + * > + * Allocate @nr_vecs interrupt vectors for @dev, using MSI-X or MSI > + * vectors if available, and fall back to a single legacy vector > + * if neither is available. Return the number of vectors allocated > + * (which might be smaller than @nr_vecs) if successful, or a negative > + * error code on error. The Linux irq numbers for the allocated > + * vectors are stored in pdev->irqs. Because MSI vectors are allocated sequentially we do not really need pdev->irqs[] for MSI. It is 32 items at most, but still. > + */ > +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int nr_vecs, > + unsigned int flags) > +{ > + unsigned int ret; > + > + if (WARN_ON_ONCE(dev->msi_enabled || dev->msix_enabled)) > + return -EINVAL; > + > + if (!pci_msi_supported(dev, 1)) > + goto use_legacy_irq; > + > + if (dev->msix_cap && !(flags & PCI_IRQ_NOMSIX)) > + nr_vecs = min_t(unsigned int, nr_vecs, pci_msix_vec_count(dev)); > + else if (dev->msi_cap) > + nr_vecs = min_t(unsigned int, nr_vecs, pci_msi_vec_count(dev)); > + else > + goto use_legacy_irq; > + > + dev->irqs = kcalloc(nr_vecs, sizeof(u32), GFP_KERNEL); > + if (!dev->irqs) > + return -ENOMEM; > + > + if (dev->msix_cap && !(flags & PCI_IRQ_NOMSIX)) > + ret = __pci_enable_msix(dev, nr_vecs); > + else > + ret = __pci_enable_msi(dev, nr_vecs); > + if (ret) > + goto out_free_irqs; > + > + return 0; > + > +out_free_irqs: > + kfree(dev->irqs); > +use_legacy_irq: > + dev->irqs = &dev->irq; > + return 1; > +} > +EXPORT_SYMBOL(pci_alloc_irq_vectors); > + > +/** > + * pci_free_irq_vectors - free previously allocated IRQs for a device > + * @dev: PCI device to operate on > + * > + * Undoes the allocations and enabling in pci_alloc_irq_vectors(). > + */ > +void pci_free_irq_vectors(struct pci_dev *dev) > +{ > + if (dev->msix_enabled) > + pci_disable_msix(dev); > + else if (dev->msi_enabled) > + pci_disable_msi(dev); > + > + if (dev->irqs != &dev->irq) > + kfree(dev->irqs); > + dev->irqs = NULL; > +} > +EXPORT_SYMBOL(pci_free_irq_vectors); > + > + > struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) > { > return to_pci_dev(desc->dev); > diff --git a/include/linux/pci.h b/include/linux/pci.h > index 932ec74..e201d0d 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -322,6 +322,7 @@ struct pci_dev { > * directly, use the values stored here. They might be different! > */ > unsigned int irq; > + unsigned int *irqs; > struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ > > bool match_driver; /* Skip attaching driver */ > @@ -1255,6 +1256,8 @@ struct msix_entry { > u16 entry; /* driver uses to specify entry, OS writes */ > }; > > +#define PCI_IRQ_NOMSIX (1 << 0) /* don't try to use MSI-X interrupts */ > + > #ifdef CONFIG_PCI_MSI > int pci_msi_vec_count(struct pci_dev *dev); > void pci_msi_shutdown(struct pci_dev *dev); > @@ -1283,6 +1286,10 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, > return rc; > return 0; > } > + > +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int nr_vecs, > + unsigned int flags); > +void pci_free_irq_vectors(struct pci_dev *dev); > #else > static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } > static inline void pci_msi_shutdown(struct pci_dev *dev) { } > @@ -1306,6 +1313,18 @@ static inline int pci_enable_msix_range(struct pci_dev *dev, > static inline int pci_enable_msix_exact(struct pci_dev *dev, > struct msix_entry *entries, int nvec) > { return -ENOSYS; } > + > +static inline int pci_alloc_irq_vectors(struct pci_dev *dev, > + unsigned int nr_vecs, unsigned int flags) > +{ > + dev->irqs = &dev->irq; > + return 1; > +} > + > +static inline void pci_free_irq_vectors(struct pci_dev *dev) > +{ > + dev->irqs = NULL; > +} > #endif > > #ifdef CONFIG_PCIEPORTBUS > -- > 2.1.4 > -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, May 09, 2016 at 05:46:31PM -0500, Bjorn Helgaas wrote: > On Thu, May 05, 2016 at 04:04:55PM +0200, Christoph Hellwig wrote: > > Add a new pci_alloc_irq_vectors helper that allocates MSI-X or multi-MSI > > vectors for PCI device while isolating the driver from the arcane details. > > > > This include handling both MSI-X, MSI and legacy interrupt fallbacks > > transparently, automatic capping to the available vectors as well as storing > > the information needed for request_irq in the PCI device itself so that > > a lot of boiler plate code in the driver can be removed. > > > > In the future this will also allow us to automatically set up spreading > > for interrupt vectors without having to duplicate it in all the drivers. > > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > Thanks, I applied this to pci/msi for v4.7. Given the subsequent discussion, it seems a little premature to merge this for v4.7, so I'm dropping this for now. Bjorn -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Ok, I'll have an updated version ready early for the next merge window. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index a080f44..a510484 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -4,6 +4,7 @@ * * Copyright (C) 2003-2004 Intel * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) + * Copyright (c) 2016 Christoph Hellwig. */ #include <linux/err.h> @@ -1120,6 +1121,113 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, } EXPORT_SYMBOL(pci_enable_msix_range); +static int __pci_enable_msix(struct pci_dev *dev, int nr_vecs) +{ + struct msix_entry *msix_entries; + int ret, i; + + msix_entries = kcalloc(nr_vecs, sizeof(struct msix_entry), GFP_KERNEL); + if (!msix_entries) + return -ENOMEM; + + for (i = 0; i < nr_vecs; i++) + msix_entries[i].entry = i; + + ret = msix_capability_init(dev, msix_entries, nr_vecs); + if (ret == 0) { + for (i = 0; i < nr_vecs; i++) + dev->irqs[i] = msix_entries[i].vector; + } + + kfree(msix_entries); + return ret; +} + +static int __pci_enable_msi(struct pci_dev *dev, int nr_vecs) +{ + int ret, i; + + ret = msi_capability_init(dev, nr_vecs); + if (ret == 0) { + for (i = 0; i < nr_vecs; i++) + dev->irqs[i] = dev->irq + i; + } + + return ret; +} + +/** + * pci_alloc_irq_vectors - allocate multiple IRQs for a device + * @dev: PCI device to operate on + * @nr_vecs: number of vectors to operate on + * @flags: flags or quirks for the allocation + * + * Allocate @nr_vecs interrupt vectors for @dev, using MSI-X or MSI + * vectors if available, and fall back to a single legacy vector + * if neither is available. Return the number of vectors allocated + * (which might be smaller than @nr_vecs) if successful, or a negative + * error code on error. The Linux irq numbers for the allocated + * vectors are stored in pdev->irqs. + */ +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int nr_vecs, + unsigned int flags) +{ + unsigned int ret; + + if (WARN_ON_ONCE(dev->msi_enabled || dev->msix_enabled)) + return -EINVAL; + + if (!pci_msi_supported(dev, 1)) + goto use_legacy_irq; + + if (dev->msix_cap && !(flags & PCI_IRQ_NOMSIX)) + nr_vecs = min_t(unsigned int, nr_vecs, pci_msix_vec_count(dev)); + else if (dev->msi_cap) + nr_vecs = min_t(unsigned int, nr_vecs, pci_msi_vec_count(dev)); + else + goto use_legacy_irq; + + dev->irqs = kcalloc(nr_vecs, sizeof(u32), GFP_KERNEL); + if (!dev->irqs) + return -ENOMEM; + + if (dev->msix_cap && !(flags & PCI_IRQ_NOMSIX)) + ret = __pci_enable_msix(dev, nr_vecs); + else + ret = __pci_enable_msi(dev, nr_vecs); + if (ret) + goto out_free_irqs; + + return 0; + +out_free_irqs: + kfree(dev->irqs); +use_legacy_irq: + dev->irqs = &dev->irq; + return 1; +} +EXPORT_SYMBOL(pci_alloc_irq_vectors); + +/** + * pci_free_irq_vectors - free previously allocated IRQs for a device + * @dev: PCI device to operate on + * + * Undoes the allocations and enabling in pci_alloc_irq_vectors(). + */ +void pci_free_irq_vectors(struct pci_dev *dev) +{ + if (dev->msix_enabled) + pci_disable_msix(dev); + else if (dev->msi_enabled) + pci_disable_msi(dev); + + if (dev->irqs != &dev->irq) + kfree(dev->irqs); + dev->irqs = NULL; +} +EXPORT_SYMBOL(pci_free_irq_vectors); + + struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) { return to_pci_dev(desc->dev); diff --git a/include/linux/pci.h b/include/linux/pci.h index 932ec74..e201d0d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -322,6 +322,7 @@ struct pci_dev { * directly, use the values stored here. They might be different! */ unsigned int irq; + unsigned int *irqs; struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ bool match_driver; /* Skip attaching driver */ @@ -1255,6 +1256,8 @@ struct msix_entry { u16 entry; /* driver uses to specify entry, OS writes */ }; +#define PCI_IRQ_NOMSIX (1 << 0) /* don't try to use MSI-X interrupts */ + #ifdef CONFIG_PCI_MSI int pci_msi_vec_count(struct pci_dev *dev); void pci_msi_shutdown(struct pci_dev *dev); @@ -1283,6 +1286,10 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, return rc; return 0; } + +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int nr_vecs, + unsigned int flags); +void pci_free_irq_vectors(struct pci_dev *dev); #else static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } static inline void pci_msi_shutdown(struct pci_dev *dev) { } @@ -1306,6 +1313,18 @@ static inline int pci_enable_msix_range(struct pci_dev *dev, static inline int pci_enable_msix_exact(struct pci_dev *dev, struct msix_entry *entries, int nvec) { return -ENOSYS; } + +static inline int pci_alloc_irq_vectors(struct pci_dev *dev, + unsigned int nr_vecs, unsigned int flags) +{ + dev->irqs = &dev->irq; + return 1; +} + +static inline void pci_free_irq_vectors(struct pci_dev *dev) +{ + dev->irqs = NULL; +} #endif #ifdef CONFIG_PCIEPORTBUS
Add a new pci_alloc_irq_vectors helper that allocates MSI-X or multi-MSI vectors for PCI device while isolating the driver from the arcane details. This include handling both MSI-X, MSI and legacy interrupt fallbacks transparently, automatic capping to the available vectors as well as storing the information needed for request_irq in the PCI device itself so that a lot of boiler plate code in the driver can be removed. In the future this will also allow us to automatically set up spreading for interrupt vectors without having to duplicate it in all the drivers. Signed-off-by: Christoph Hellwig <hch@lst.de> --- drivers/pci/msi.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 19 +++++++++ 2 files changed, 127 insertions(+)