Message ID | 20210211101616.13788-4-jgross@suse.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | xen/events: bug fixes and some diagnostic aids | expand |
Hi Juergen, On 11/02/2021 10:16, Juergen Gross wrote: > When changing the cpu affinity of an event it can happen today that > (with some unlucky timing) the same event will be handled on the old > and the new cpu at the same time. > > Avoid that by adding an "event active" flag to the per-event data and > call the handler only if this flag isn't set. > > Reported-by: Julien Grall <julien@xen.org> > Signed-off-by: Juergen Gross <jgross@suse.com> > --- > V2: > - new patch > --- > drivers/xen/events/events_base.c | 19 +++++++++++++++---- > 1 file changed, 15 insertions(+), 4 deletions(-) > > diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c > index e157e7506830..f7e22330dcef 100644 > --- a/drivers/xen/events/events_base.c > +++ b/drivers/xen/events/events_base.c > @@ -102,6 +102,7 @@ struct irq_info { > #define EVT_MASK_REASON_EXPLICIT 0x01 > #define EVT_MASK_REASON_TEMPORARY 0x02 > #define EVT_MASK_REASON_EOI_PENDING 0x04 > + u8 is_active; /* Is event just being handled? */ > unsigned irq; > evtchn_port_t evtchn; /* event channel */ > unsigned short cpu; /* cpu bound */ > @@ -622,6 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) > } > > info->eoi_time = 0; > + smp_store_release(&info->is_active, 0); > do_unmask(info, EVT_MASK_REASON_EOI_PENDING); > } > > @@ -809,13 +811,15 @@ static void pirq_query_unmask(int irq) > > static void eoi_pirq(struct irq_data *data) > { > - evtchn_port_t evtchn = evtchn_from_irq(data->irq); > + struct irq_info *info = info_for_irq(data->irq); > + evtchn_port_t evtchn = info ? info->evtchn : 0; > struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; > int rc = 0; > > if (!VALID_EVTCHN(evtchn)) > return; > > + smp_store_release(&info->is_active, 0); Would you mind to explain why you are using the release semantics? It is also not clear to me if there are any expected ordering between clearing is_active and clearing the pending bit. > clear_evtchn(evtchn); The 2 lines here seems to be a common pattern in this patch. So I would suggest to create a new helper. > > if (pirq_needs_eoi(data->irq)) { > @@ -1640,6 +1644,8 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) > } > > info = info_for_irq(irq); > + if (xchg_acquire(&info->is_active, 1)) > + return; > > if (ctrl->defer_eoi) { > info->eoi_cpu = smp_processor_id(); > @@ -1823,11 +1829,13 @@ static void disable_dynirq(struct irq_data *data) > > static void ack_dynirq(struct irq_data *data) > { > - evtchn_port_t evtchn = evtchn_from_irq(data->irq); > + struct irq_info *info = info_for_irq(data->irq); > + evtchn_port_t evtchn = info ? info->evtchn : 0; > > if (!VALID_EVTCHN(evtchn)) > return; > > + smp_store_release(&info->is_active, 0); > clear_evtchn(evtchn); > } > > @@ -1969,10 +1977,13 @@ static void restore_cpu_ipis(unsigned int cpu) > /* Clear an irq's pending state, in preparation for polling on it */ > void xen_clear_irq_pending(int irq) > { > - evtchn_port_t evtchn = evtchn_from_irq(irq); > + struct irq_info *info = info_for_irq(irq); > + evtchn_port_t evtchn = info ? info->evtchn : 0; > > - if (VALID_EVTCHN(evtchn)) > + if (VALID_EVTCHN(evtchn)) { > + smp_store_release(&info->is_active, 0); > clear_evtchn(evtchn); > + } > } > EXPORT_SYMBOL(xen_clear_irq_pending); > void xen_set_irq_pending(int irq) >
On 14.02.21 22:34, Julien Grall wrote: > Hi Juergen, > > On 11/02/2021 10:16, Juergen Gross wrote: >> When changing the cpu affinity of an event it can happen today that >> (with some unlucky timing) the same event will be handled on the old >> and the new cpu at the same time. >> >> Avoid that by adding an "event active" flag to the per-event data and >> call the handler only if this flag isn't set. >> >> Reported-by: Julien Grall <julien@xen.org> >> Signed-off-by: Juergen Gross <jgross@suse.com> >> --- >> V2: >> - new patch >> --- >> drivers/xen/events/events_base.c | 19 +++++++++++++++---- >> 1 file changed, 15 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/xen/events/events_base.c >> b/drivers/xen/events/events_base.c >> index e157e7506830..f7e22330dcef 100644 >> --- a/drivers/xen/events/events_base.c >> +++ b/drivers/xen/events/events_base.c >> @@ -102,6 +102,7 @@ struct irq_info { >> #define EVT_MASK_REASON_EXPLICIT 0x01 >> #define EVT_MASK_REASON_TEMPORARY 0x02 >> #define EVT_MASK_REASON_EOI_PENDING 0x04 >> + u8 is_active; /* Is event just being handled? */ >> unsigned irq; >> evtchn_port_t evtchn; /* event channel */ >> unsigned short cpu; /* cpu bound */ >> @@ -622,6 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info >> *info, bool spurious) >> } >> info->eoi_time = 0; >> + smp_store_release(&info->is_active, 0); >> do_unmask(info, EVT_MASK_REASON_EOI_PENDING); >> } >> @@ -809,13 +811,15 @@ static void pirq_query_unmask(int irq) >> static void eoi_pirq(struct irq_data *data) >> { >> - evtchn_port_t evtchn = evtchn_from_irq(data->irq); >> + struct irq_info *info = info_for_irq(data->irq); >> + evtchn_port_t evtchn = info ? info->evtchn : 0; >> struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; >> int rc = 0; >> if (!VALID_EVTCHN(evtchn)) >> return; >> + smp_store_release(&info->is_active, 0); > > Would you mind to explain why you are using the release semantics? It is basically releasing a lock. So release semantics seem to be appropriate. > It is also not clear to me if there are any expected ordering between > clearing is_active and clearing the pending bit. No, I don't think there is a specific ordering required. is_active is just guarding against two simultaneous IRQ handler calls for the same event being active. Clearing the pending bit is not part of the guarded section. > >> clear_evtchn(evtchn); > > > The 2 lines here seems to be a common pattern in this patch. So I would > suggest to create a new helper. Okay. Juergen
On 2/11/21 5:16 AM, Juergen Gross wrote: > @@ -622,6 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) > } > > info->eoi_time = 0; > + smp_store_release(&info->is_active, 0); Can this be done in lateeoi_ack_dynirq()/lateeoi_mask_ack_dynirq(), after we've masked the channel? Then it will be consistent with how how other chips do it, especially with the new helper. -boris > do_unmask(info, EVT_MASK_REASON_EOI_PENDING); > } >
On 15.02.21 22:35, Boris Ostrovsky wrote: > > On 2/11/21 5:16 AM, Juergen Gross wrote: > >> @@ -622,6 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) >> } >> >> info->eoi_time = 0; >> + smp_store_release(&info->is_active, 0); > > > Can this be done in lateeoi_ack_dynirq()/lateeoi_mask_ack_dynirq(), after we've masked the channel? Then it will be consistent with how how other chips do it, especially with the new helper. Yes, I think this should work. Juergen
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index e157e7506830..f7e22330dcef 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -102,6 +102,7 @@ struct irq_info { #define EVT_MASK_REASON_EXPLICIT 0x01 #define EVT_MASK_REASON_TEMPORARY 0x02 #define EVT_MASK_REASON_EOI_PENDING 0x04 + u8 is_active; /* Is event just being handled? */ unsigned irq; evtchn_port_t evtchn; /* event channel */ unsigned short cpu; /* cpu bound */ @@ -622,6 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) } info->eoi_time = 0; + smp_store_release(&info->is_active, 0); do_unmask(info, EVT_MASK_REASON_EOI_PENDING); } @@ -809,13 +811,15 @@ static void pirq_query_unmask(int irq) static void eoi_pirq(struct irq_data *data) { - evtchn_port_t evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; int rc = 0; if (!VALID_EVTCHN(evtchn)) return; + smp_store_release(&info->is_active, 0); clear_evtchn(evtchn); if (pirq_needs_eoi(data->irq)) { @@ -1640,6 +1644,8 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) } info = info_for_irq(irq); + if (xchg_acquire(&info->is_active, 1)) + return; if (ctrl->defer_eoi) { info->eoi_cpu = smp_processor_id(); @@ -1823,11 +1829,13 @@ static void disable_dynirq(struct irq_data *data) static void ack_dynirq(struct irq_data *data) { - evtchn_port_t evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (!VALID_EVTCHN(evtchn)) return; + smp_store_release(&info->is_active, 0); clear_evtchn(evtchn); } @@ -1969,10 +1977,13 @@ static void restore_cpu_ipis(unsigned int cpu) /* Clear an irq's pending state, in preparation for polling on it */ void xen_clear_irq_pending(int irq) { - evtchn_port_t evtchn = evtchn_from_irq(irq); + struct irq_info *info = info_for_irq(irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; - if (VALID_EVTCHN(evtchn)) + if (VALID_EVTCHN(evtchn)) { + smp_store_release(&info->is_active, 0); clear_evtchn(evtchn); + } } EXPORT_SYMBOL(xen_clear_irq_pending); void xen_set_irq_pending(int irq)
When changing the cpu affinity of an event it can happen today that (with some unlucky timing) the same event will be handled on the old and the new cpu at the same time. Avoid that by adding an "event active" flag to the per-event data and call the handler only if this flag isn't set. Reported-by: Julien Grall <julien@xen.org> Signed-off-by: Juergen Gross <jgross@suse.com> --- V2: - new patch --- drivers/xen/events/events_base.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-)