Message ID | 20190213105041.13537-2-ming.lei@redhat.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | genirq/affinity: add .calc_sets for improving IRQ allocation & spread | expand |
On Wed, Feb 13, 2019 at 06:50:37PM +0800, Ming Lei wrote: > Currently all parameters in 'affd' are read-only, so 'affd' is marked > as const in both pci_alloc_irq_vectors_affinity() and irq_create_affinity_masks(). s/all parameters in 'affd'/the contents of '*affd'/ > We have to ask driver to re-caculate set vectors after the whole IRQ > vectors are allocated later, and the result needs to be stored in 'affd'. > Also both the two interfaces are core APIs, which should be trusted. s/re-caculate/recalculate/ s/stored in 'affd'/stored in '*affd'/ s/both the two/both/ This is a little confusing because you're talking about both "IRQ vectors" and these other "set vectors", which I think are different things. I assume the "set vectors" are cpumasks showing the affinity of the IRQ vectors with some CPUs? AFAICT, *this* patch doesn't add anything that writes to *affd. I think the removal of "const" should be in the same patch that makes the removal necessary. > So don't mark 'affd' as const both pci_alloc_irq_vectors_affinity() and > irq_create_affinity_masks(). > > Signed-off-by: Ming Lei <ming.lei@redhat.com> > --- > drivers/pci/msi.c | 18 +++++++++--------- > include/linux/interrupt.h | 2 +- > include/linux/pci.h | 4 ++-- > kernel/irq/affinity.c | 2 +- > 4 files changed, 13 insertions(+), 13 deletions(-) > > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c > index 4c0b47867258..96978459e2a0 100644 > --- a/drivers/pci/msi.c > +++ b/drivers/pci/msi.c > @@ -532,7 +532,7 @@ static int populate_msi_sysfs(struct pci_dev *pdev) > } > > static struct msi_desc * > -msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd) > +msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd) > { > struct irq_affinity_desc *masks = NULL; > struct msi_desc *entry; > @@ -597,7 +597,7 @@ static int msi_verify_entries(struct pci_dev *dev) > * which could have been allocated. > */ > static int msi_capability_init(struct pci_dev *dev, int nvec, > - const struct irq_affinity *affd) > + struct irq_affinity *affd) > { > struct msi_desc *entry; > int ret; > @@ -669,7 +669,7 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries) > > static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, > struct msix_entry *entries, int nvec, > - const struct irq_affinity *affd) > + struct irq_affinity *affd) > { > struct irq_affinity_desc *curmsk, *masks = NULL; > struct msi_desc *entry; > @@ -736,7 +736,7 @@ static void msix_program_entries(struct pci_dev *dev, > * requested MSI-X entries with allocated irqs or non-zero for otherwise. > **/ > static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, > - int nvec, const struct irq_affinity *affd) > + int nvec, struct irq_affinity *affd) > { > int ret; > u16 control; > @@ -932,7 +932,7 @@ int pci_msix_vec_count(struct pci_dev *dev) > EXPORT_SYMBOL(pci_msix_vec_count); > > static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, > - int nvec, const struct irq_affinity *affd) > + int nvec, struct irq_affinity *affd) > { > int nr_entries; > int i, j; > @@ -1018,7 +1018,7 @@ int pci_msi_enabled(void) > EXPORT_SYMBOL(pci_msi_enabled); > > static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, > - const struct irq_affinity *affd) > + struct irq_affinity *affd) > { > int nvec; > int rc; > @@ -1086,7 +1086,7 @@ EXPORT_SYMBOL(pci_enable_msi); > > static int __pci_enable_msix_range(struct pci_dev *dev, > struct msix_entry *entries, int minvec, > - int maxvec, const struct irq_affinity *affd) > + int maxvec, struct irq_affinity *affd) > { > int rc, nvec = maxvec; > > @@ -1165,9 +1165,9 @@ EXPORT_SYMBOL(pci_enable_msix_range); > */ > int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, > unsigned int max_vecs, unsigned int flags, > - const struct irq_affinity *affd) > + struct irq_affinity *affd) > { > - static const struct irq_affinity msi_default_affd; > + struct irq_affinity msi_default_affd = {0}; > int msix_vecs = -ENOSPC; > int msi_vecs = -ENOSPC; > > diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h > index 7c9434652f36..1ed1014c9684 100644 > --- a/include/linux/interrupt.h > +++ b/include/linux/interrupt.h > @@ -332,7 +332,7 @@ extern int > irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); > > struct irq_affinity_desc * > -irq_create_affinity_masks(int nvec, const struct irq_affinity *affd); > +irq_create_affinity_masks(int nvec, struct irq_affinity *affd); > > int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd); > > diff --git a/include/linux/pci.h b/include/linux/pci.h > index 40b327b814aa..4eca42cf611b 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -1396,7 +1396,7 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, > } > int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, > unsigned int max_vecs, unsigned int flags, > - const struct irq_affinity *affd); > + struct irq_affinity *affd); > > void pci_free_irq_vectors(struct pci_dev *dev); > int pci_irq_vector(struct pci_dev *dev, unsigned int nr); > @@ -1422,7 +1422,7 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, > static inline int > pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, > unsigned int max_vecs, unsigned int flags, > - const struct irq_affinity *aff_desc) > + struct irq_affinity *aff_desc) > { > if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq) > return 1; > diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c > index 118b66d64a53..9200d3b26f7d 100644 > --- a/kernel/irq/affinity.c > +++ b/kernel/irq/affinity.c > @@ -239,7 +239,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, > * Returns the irq_affinity_desc pointer or NULL if allocation failed. > */ > struct irq_affinity_desc * > -irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) > +irq_create_affinity_masks(int nvecs, struct irq_affinity *affd) > { > int affvecs = nvecs - affd->pre_vectors - affd->post_vectors; > int curvec, usedvecs; > -- > 2.9.5 >
On Wed, 13 Feb 2019, Bjorn Helgaas wrote: > On Wed, Feb 13, 2019 at 06:50:37PM +0800, Ming Lei wrote: > > Currently all parameters in 'affd' are read-only, so 'affd' is marked > > as const in both pci_alloc_irq_vectors_affinity() and irq_create_affinity_masks(). > > s/all parameters in 'affd'/the contents of '*affd'/ > > > We have to ask driver to re-caculate set vectors after the whole IRQ > > vectors are allocated later, and the result needs to be stored in 'affd'. > > Also both the two interfaces are core APIs, which should be trusted. > > s/re-caculate/recalculate/ > s/stored in 'affd'/stored in '*affd'/ > s/both the two/both/ > > This is a little confusing because you're talking about both "IRQ > vectors" and these other "set vectors", which I think are different > things. I assume the "set vectors" are cpumasks showing the affinity > of the IRQ vectors with some CPUs? I think we should drop the whole vector wording completely. The driver does not care about vectors, it only cares about a block of interrupt numbers. These numbers are kernel managed and the interrupts just happen to have a CPU vector assigned at some point. Depending on the CPU architecture the underlying mechanism might not even be named vector. > AFAICT, *this* patch doesn't add anything that writes to *affd. I > think the removal of "const" should be in the same patch that makes > the removal necessary. So this should be: The interrupt affinity spreading mechanism supports to spread out affinities for one or more interrupt sets. A interrupt set contains one or more interrupts. Each set is mapped to a specific functionality of a device, e.g. general I/O queues and read I/O queus of multiqueue block devices. The number of interrupts per set is defined by the driver. It depends on the total number of available interrupts for the device, which is determined by the PCI capabilites and the availability of underlying CPU resources, and the number of queues which the device provides and the driver wants to instantiate. The driver passes initial configuration for the interrupt allocation via a pointer to struct affinity_desc. Right now the allocation mechanism is complex as it requires to have a loop in the driver to determine the maximum number of interrupts which are provided by the PCI capabilities and the underlying CPU resources. This loop would have to be replicated in every driver which wants to utilize this mechanism. That's unwanted code duplication and error prone. In order to move this into generic facilities it is required to have a mechanism, which allows the recalculation of the interrupt sets and their size, in the core code. As the core code does not have any knowledge about the underlying device, a driver specific callback will be added to struct affinity_desc, which will be invoked by the core code. The callback will get the number of available interupts as an argument, so the driver can calculate the corresponding number and size of interrupt sets. To support this, two modifications for the handling of struct affinity_desc are required: 1) The (optional) interrupt sets size information is contained in a separate array of integers and struct affinity_desc contains a pointer to it. This is cumbersome and as the maximum number of interrupt sets is small, there is no reason to have separate storage. Moving the size array into struct affinity_desc avoids indirections makes the code simpler. 2) At the moment the struct affinity_desc pointer which is handed in from the driver and passed through to several core functions is marked 'const'. With the upcoming callback to recalculate the number and size of interrupt sets, it's necessary to remove the 'const' qualifier. Otherwise the callback would not be able to update the data. Move the set size array into struct affinity_desc as a first preparatory step. The removal of the 'const' qualifier will be done when adding the callback. IOW, The first patch moves the set array into the struct itself. The second patch introduces the callback and removes the 'const' qualifier. I wouldn't mind to have the same changelog duplicated (+/- the last two paragraphs which need some update of course). Thanks, tglx
On Wed, Feb 13, 2019 at 09:56:36PM +0100, Thomas Gleixner wrote: > On Wed, 13 Feb 2019, Bjorn Helgaas wrote: > > On Wed, Feb 13, 2019 at 06:50:37PM +0800, Ming Lei wrote: > > > We have to ask driver to re-caculate set vectors after the whole IRQ > > > vectors are allocated later, and the result needs to be stored in 'affd'. > > > Also both the two interfaces are core APIs, which should be trusted. > > > > s/re-caculate/recalculate/ > > s/stored in 'affd'/stored in '*affd'/ > > s/both the two/both/ > > > > This is a little confusing because you're talking about both "IRQ > > vectors" and these other "set vectors", which I think are different > > things. I assume the "set vectors" are cpumasks showing the affinity > > of the IRQ vectors with some CPUs? > > I think we should drop the whole vector wording completely. > > The driver does not care about vectors, it only cares about a block of > interrupt numbers. These numbers are kernel managed and the interrupts just > happen to have a CPU vector assigned at some point. Depending on the CPU > architecture the underlying mechanism might not even be named vector. Perhaps longer term we could move affinity mask creation from the irq subsystem into a more generic library. Interrupts aren't the only resource that want to spread across CPUs. For example, blk-mq has it's own implementation to for polled queues, so I think a non-irq specific implementation would be a nice addition to the kernel lib.
On Wed, 13 Feb 2019, Keith Busch wrote: > On Wed, Feb 13, 2019 at 09:56:36PM +0100, Thomas Gleixner wrote: > > On Wed, 13 Feb 2019, Bjorn Helgaas wrote: > > > On Wed, Feb 13, 2019 at 06:50:37PM +0800, Ming Lei wrote: > > > > We have to ask driver to re-caculate set vectors after the whole IRQ > > > > vectors are allocated later, and the result needs to be stored in 'affd'. > > > > Also both the two interfaces are core APIs, which should be trusted. > > > > > > s/re-caculate/recalculate/ > > > s/stored in 'affd'/stored in '*affd'/ > > > s/both the two/both/ > > > > > > This is a little confusing because you're talking about both "IRQ > > > vectors" and these other "set vectors", which I think are different > > > things. I assume the "set vectors" are cpumasks showing the affinity > > > of the IRQ vectors with some CPUs? > > > > I think we should drop the whole vector wording completely. > > > > The driver does not care about vectors, it only cares about a block of > > interrupt numbers. These numbers are kernel managed and the interrupts just > > happen to have a CPU vector assigned at some point. Depending on the CPU > > architecture the underlying mechanism might not even be named vector. > > Perhaps longer term we could move affinity mask creation from the irq > subsystem into a more generic library. Interrupts aren't the only > resource that want to spread across CPUs. For example, blk-mq has it's > own implementation to for polled queues, so I think a non-irq specific > implementation would be a nice addition to the kernel lib. Agreed. There is nothing interrupt specific in that code aside of some name choices. Btw, while I have your attention. There popped up an issue recently related to that affinity logic. The current implementation fails when: /* * If there aren't any vectors left after applying the pre/post * vectors don't bother with assigning affinity. */ if (nvecs == affd->pre_vectors + affd->post_vectors) return NULL; Now the discussion arised, that in that case the affinity sets are not allocated and filled in for the pre/post vectors, but somehow the underlying device still works and later on triggers the warning in the blk-mq code because the MSI entries do not have affinity information attached. Sure, we could make that work, but there are several issues: 1) irq_create_affinity_masks() has another reason to return NULL: memory allocation fails. 2) Does it make sense at all. Right now the PCI allocator ignores the NULL return and proceeds without setting any affinities. As a consequence nothing is managed and everything happens to work. But that happens to work is more by chance than by design and the warning is bogus if this is an expected mode of operation. We should address these points in some way. Thanks, tglx
On Wed, Feb 13, 2019 at 10:41:55PM +0100, Thomas Gleixner wrote: > Btw, while I have your attention. There popped up an issue recently related > to that affinity logic. > > The current implementation fails when: > > /* > * If there aren't any vectors left after applying the pre/post > * vectors don't bother with assigning affinity. > */ > if (nvecs == affd->pre_vectors + affd->post_vectors) > return NULL; > > Now the discussion arised, that in that case the affinity sets are not > allocated and filled in for the pre/post vectors, but somehow the > underlying device still works and later on triggers the warning in the > blk-mq code because the MSI entries do not have affinity information > attached. > > Sure, we could make that work, but there are several issues: > > 1) irq_create_affinity_masks() has another reason to return NULL: > memory allocation fails. > > 2) Does it make sense at all. > > Right now the PCI allocator ignores the NULL return and proceeds without > setting any affinities. As a consequence nothing is managed and everything > happens to work. > > But that happens to work is more by chance than by design and the warning > is bogus if this is an expected mode of operation. > > We should address these points in some way. Ah, yes, that's a mistake in the nvme driver. It is assuming IO queues are always on managed interrupts, but that's not true if when only 1 vector could be allocated. This should be an appropriate fix to the warning: --- diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 022ea1ee63f8..f2ccebe1c926 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -506,7 +506,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) * affinity), so use the regular blk-mq cpu mapping */ map->queue_offset = qoff; - if (i != HCTX_TYPE_POLL) + if (i != HCTX_TYPE_POLL && dev->num_vecs > 1) blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); else blk_mq_map_queues(map); --
On Wed, 13 Feb 2019, Keith Busch wrote: Cc+ Huacai Chen > On Wed, Feb 13, 2019 at 10:41:55PM +0100, Thomas Gleixner wrote: > > Btw, while I have your attention. There popped up an issue recently related > > to that affinity logic. > > > > The current implementation fails when: > > > > /* > > * If there aren't any vectors left after applying the pre/post > > * vectors don't bother with assigning affinity. > > */ > > if (nvecs == affd->pre_vectors + affd->post_vectors) > > return NULL; > > > > Now the discussion arised, that in that case the affinity sets are not > > allocated and filled in for the pre/post vectors, but somehow the > > underlying device still works and later on triggers the warning in the > > blk-mq code because the MSI entries do not have affinity information > > attached. > > > > Sure, we could make that work, but there are several issues: > > > > 1) irq_create_affinity_masks() has another reason to return NULL: > > memory allocation fails. > > > > 2) Does it make sense at all. > > > > Right now the PCI allocator ignores the NULL return and proceeds without > > setting any affinities. As a consequence nothing is managed and everything > > happens to work. > > > > But that happens to work is more by chance than by design and the warning > > is bogus if this is an expected mode of operation. > > > > We should address these points in some way. > > Ah, yes, that's a mistake in the nvme driver. It is assuming IO queues are > always on managed interrupts, but that's not true if when only 1 vector > could be allocated. This should be an appropriate fix to the warning: Looks correct. Chen, can you please test that? > --- > diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c > index 022ea1ee63f8..f2ccebe1c926 100644 > --- a/drivers/nvme/host/pci.c > +++ b/drivers/nvme/host/pci.c > @@ -506,7 +506,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) > * affinity), so use the regular blk-mq cpu mapping > */ > map->queue_offset = qoff; > - if (i != HCTX_TYPE_POLL) > + if (i != HCTX_TYPE_POLL && dev->num_vecs > 1) > blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); > else > blk_mq_map_queues(map); > -- >
I'll test next week, but 4.19 has the same problem, how to fix that for 4.19? Huacai ------------------ Original ------------------ From: "Thomas Gleixner"<tglx@linutronix.de>; Date: Thu, Feb 14, 2019 04:50 PM To: "Keith Busch"<keith.busch@intel.com>; Cc: "Bjorn Helgaas"<helgaas@kernel.org>; "Jens Axboe"<axboe@kernel.dk>; "Sagi Grimberg"<sagi@grimberg.me>; "linux-pci"<linux-pci@vger.kernel.org>; "LKML"<linux-kernel@vger.kernel.org>; "linux-nvme"<linux-nvme@lists.infradead.org>; "Ming Lei"<ming.lei@redhat.com>; "linux-block"<linux-block@vger.kernel.org>; "Christoph Hellwig"<hch@lst.de>; "Huacai Chen"<chenhc@lemote.com>; Subject: Re: [PATCH V3 1/5] genirq/affinity: don't mark 'affd' as const On Wed, 13 Feb 2019, Keith Busch wrote: Cc+ Huacai Chen > On Wed, Feb 13, 2019 at 10:41:55PM +0100, Thomas Gleixner wrote: > > Btw, while I have your attention. There popped up an issue recently related > > to that affinity logic. > > > > The current implementation fails when: > > > > /* > > * If there aren't any vectors left after applying the pre/post > > * vectors don't bother with assigning affinity. > > */ > > if (nvecs == affd->pre_vectors + affd->post_vectors) > > return NULL; > > > > Now the discussion arised, that in that case the affinity sets are not > > allocated and filled in for the pre/post vectors, but somehow the > > underlying device still works and later on triggers the warning in the > > blk-mq code because the MSI entries do not have affinity information > > attached. > > > > Sure, we could make that work, but there are several issues: > > > > 1) irq_create_affinity_masks() has another reason to return NULL: > > memory allocation fails. > > > > 2) Does it make sense at all. > > > > Right now the PCI allocator ignores the NULL return and proceeds without > > setting any affinities. As a consequence nothing is managed and everything > > happens to work. > > > > But that happens to work is more by chance than by design and the warning > > is bogus if this is an expected mode of operation. > > > > We should address these points in some way. > > Ah, yes, that's a mistake in the nvme driver. It is assuming IO queues are > always on managed interrupts, but that's not true if when only 1 vector > could be allocated. This should be an appropriate fix to the warning: Looks correct. Chen, can you please test that? > --- > diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c > index 022ea1ee63f8..f2ccebe1c926 100644 > --- a/drivers/nvme/host/pci.c > +++ b/drivers/nvme/host/pci.c > @@ -506,7 +506,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) > * affinity), so use the regular blk-mq cpu mapping > */ > map->queue_offset = qoff; > - if (i != HCTX_TYPE_POLL) > + if (i != HCTX_TYPE_POLL && dev->num_vecs > 1) > blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); > else > blk_mq_map_queues(map); > -- >
On Thu, 14 Feb 2019, 陈华才 wrote:
Please do not top post....
> I'll test next week, but 4.19 has the same problem, how to fix that for 4.19?
By applying the very same patch perhaps?
Thanks,
tglx
My lemote.com email is too weak, I'll stop top post because this time I've cc-ed my gmail. I've tested, this patch can fix the nvme problem, but it can't be applied to 4.19 because of different context. And, I still think my original solution (genirq/affinity: Assign default affinity to pre/post vectors) is correct. There may be similar problems except nvme. Huacai ------------------ Original ------------------ From: "Thomas Gleixner"<tglx@linutronix.de>; Date: Thu, Feb 14, 2019 09:31 PM To: "陈华才"<chenhc@lemote.com>; Cc: "Keith Busch"<keith.busch@intel.com>; "Bjorn Helgaas"<helgaas@kernel.org>; "Jens Axboe"<axboe@kernel.dk>; "Sagi Grimberg"<sagi@grimberg.me>; "linux-pci"<linux-pci@vger.kernel.org>; "LKML"<linux-kernel@vger.kernel.org>; "linux-nvme"<linux-nvme@lists.infradead.org>; "Ming Lei"<ming.lei@redhat.com>; "linux-block"<linux-block@vger.kernel.org>; "Christoph Hellwig"<hch@lst.de>; Subject: Re: [PATCH V3 1/5] genirq/affinity: don't mark 'affd' as const On Thu, 14 Feb 2019, 陈华才 wrote: Please do not top post.... > I'll test next week, but 4.19 has the same problem, how to fix that for 4.19? By applying the very same patch perhaps? Thanks, tglx
On Tue, 19 Feb 2019, 陈华才 wrote: > > I've tested, this patch can fix the nvme problem, but it can't be applied > to 4.19 because of different context. And, I still think my original solution > (genirq/affinity: Assign default affinity to pre/post vectors) is correct. > There may be similar problems except nvme. As I explained you in great length, it is not correct because it's just papering over the underlying problem. Please stop advertising semantically broken duct tape solutions. Thanks, tglx
On Mon, Feb 18, 2019 at 04:42:27PM -0800, 陈华才 wrote: > I've tested, this patch can fix the nvme problem, but it can't be applied > to 4.19 because of different context. And, I still think my original solution > (genirq/affinity: Assign default affinity to pre/post vectors) is correct. > There may be similar problems except nvme. I'll send a proper patch to fix the warning. It's always a driver bug if they try to extract affinity from a non-managed irq, but I don't think this particular nvme instance is very urgent as the end result is the same, just without the warning.
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 4c0b47867258..96978459e2a0 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -532,7 +532,7 @@ static int populate_msi_sysfs(struct pci_dev *pdev) } static struct msi_desc * -msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd) +msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd) { struct irq_affinity_desc *masks = NULL; struct msi_desc *entry; @@ -597,7 +597,7 @@ static int msi_verify_entries(struct pci_dev *dev) * which could have been allocated. */ static int msi_capability_init(struct pci_dev *dev, int nvec, - const struct irq_affinity *affd) + struct irq_affinity *affd) { struct msi_desc *entry; int ret; @@ -669,7 +669,7 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries) static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, struct msix_entry *entries, int nvec, - const struct irq_affinity *affd) + struct irq_affinity *affd) { struct irq_affinity_desc *curmsk, *masks = NULL; struct msi_desc *entry; @@ -736,7 +736,7 @@ static void msix_program_entries(struct pci_dev *dev, * requested MSI-X entries with allocated irqs or non-zero for otherwise. **/ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, - int nvec, const struct irq_affinity *affd) + int nvec, struct irq_affinity *affd) { int ret; u16 control; @@ -932,7 +932,7 @@ int pci_msix_vec_count(struct pci_dev *dev) EXPORT_SYMBOL(pci_msix_vec_count); static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, - int nvec, const struct irq_affinity *affd) + int nvec, struct irq_affinity *affd) { int nr_entries; int i, j; @@ -1018,7 +1018,7 @@ int pci_msi_enabled(void) EXPORT_SYMBOL(pci_msi_enabled); static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, - const struct irq_affinity *affd) + struct irq_affinity *affd) { int nvec; int rc; @@ -1086,7 +1086,7 @@ EXPORT_SYMBOL(pci_enable_msi); static int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, - int maxvec, const struct irq_affinity *affd) + int maxvec, struct irq_affinity *affd) { int rc, nvec = maxvec; @@ -1165,9 +1165,9 @@ EXPORT_SYMBOL(pci_enable_msix_range); */ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, - const struct irq_affinity *affd) + struct irq_affinity *affd) { - static const struct irq_affinity msi_default_affd; + struct irq_affinity msi_default_affd = {0}; int msix_vecs = -ENOSPC; int msi_vecs = -ENOSPC; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 7c9434652f36..1ed1014c9684 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -332,7 +332,7 @@ extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); struct irq_affinity_desc * -irq_create_affinity_masks(int nvec, const struct irq_affinity *affd); +irq_create_affinity_masks(int nvec, struct irq_affinity *affd); int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd); diff --git a/include/linux/pci.h b/include/linux/pci.h index 40b327b814aa..4eca42cf611b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1396,7 +1396,7 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, } int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, - const struct irq_affinity *affd); + struct irq_affinity *affd); void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); @@ -1422,7 +1422,7 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, static inline int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, - const struct irq_affinity *aff_desc) + struct irq_affinity *aff_desc) { if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq) return 1; diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 118b66d64a53..9200d3b26f7d 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -239,7 +239,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, * Returns the irq_affinity_desc pointer or NULL if allocation failed. */ struct irq_affinity_desc * -irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) +irq_create_affinity_masks(int nvecs, struct irq_affinity *affd) { int affvecs = nvecs - affd->pre_vectors - affd->post_vectors; int curvec, usedvecs;
Currently all parameters in 'affd' are read-only, so 'affd' is marked as const in both pci_alloc_irq_vectors_affinity() and irq_create_affinity_masks(). We have to ask driver to re-caculate set vectors after the whole IRQ vectors are allocated later, and the result needs to be stored in 'affd'. Also both the two interfaces are core APIs, which should be trusted. So don't mark 'affd' as const both pci_alloc_irq_vectors_affinity() and irq_create_affinity_masks(). Signed-off-by: Ming Lei <ming.lei@redhat.com> --- drivers/pci/msi.c | 18 +++++++++--------- include/linux/interrupt.h | 2 +- include/linux/pci.h | 4 ++-- kernel/irq/affinity.c | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-)