diff mbox series

[v2,3/8] xen/events: avoid handling the same event on two cpus at the same time

Message ID 20210211101616.13788-4-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series xen/events: bug fixes and some diagnostic aids | expand

Commit Message

Jürgen Groß Feb. 11, 2021, 10:16 a.m. UTC
When changing the cpu affinity of an event it can happen today that
(with some unlucky timing) the same event will be handled on the old
and the new cpu at the same time.

Avoid that by adding an "event active" flag to the per-event data and
call the handler only if this flag isn't set.

Reported-by: Julien Grall <julien@xen.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- new patch
---
 drivers/xen/events/events_base.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

Comments

Julien Grall Feb. 14, 2021, 9:34 p.m. UTC | #1
Hi Juergen,

On 11/02/2021 10:16, Juergen Gross wrote:
> When changing the cpu affinity of an event it can happen today that
> (with some unlucky timing) the same event will be handled on the old
> and the new cpu at the same time.
> 
> Avoid that by adding an "event active" flag to the per-event data and
> call the handler only if this flag isn't set.
> 
> Reported-by: Julien Grall <julien@xen.org>
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
> V2:
> - new patch
> ---
>   drivers/xen/events/events_base.c | 19 +++++++++++++++----
>   1 file changed, 15 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
> index e157e7506830..f7e22330dcef 100644
> --- a/drivers/xen/events/events_base.c
> +++ b/drivers/xen/events/events_base.c
> @@ -102,6 +102,7 @@ struct irq_info {
>   #define EVT_MASK_REASON_EXPLICIT	0x01
>   #define EVT_MASK_REASON_TEMPORARY	0x02
>   #define EVT_MASK_REASON_EOI_PENDING	0x04
> +	u8 is_active;		/* Is event just being handled? */
>   	unsigned irq;
>   	evtchn_port_t evtchn;   /* event channel */
>   	unsigned short cpu;     /* cpu bound */
> @@ -622,6 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
>   	}
>   
>   	info->eoi_time = 0;
> +	smp_store_release(&info->is_active, 0);
>   	do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
>   }
>   
> @@ -809,13 +811,15 @@ static void pirq_query_unmask(int irq)
>   
>   static void eoi_pirq(struct irq_data *data)
>   {
> -	evtchn_port_t evtchn = evtchn_from_irq(data->irq);
> +	struct irq_info *info = info_for_irq(data->irq);
> +	evtchn_port_t evtchn = info ? info->evtchn : 0;
>   	struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
>   	int rc = 0;
>   
>   	if (!VALID_EVTCHN(evtchn))
>   		return;
>   
> +	smp_store_release(&info->is_active, 0);

Would you mind to explain why you are using the release semantics?

It is also not clear to me if there are any expected ordering between 
clearing is_active and clearing the pending bit.

>   	clear_evtchn(evtchn);


The 2 lines here seems to be a common pattern in this patch. So I would 
suggest to create a new helper.

>   
>   	if (pirq_needs_eoi(data->irq)) {
> @@ -1640,6 +1644,8 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
>   	}
>   
>   	info = info_for_irq(irq);
> +	if (xchg_acquire(&info->is_active, 1))
> +		return;
>   
>   	if (ctrl->defer_eoi) {
>   		info->eoi_cpu = smp_processor_id();
> @@ -1823,11 +1829,13 @@ static void disable_dynirq(struct irq_data *data)
>   
>   static void ack_dynirq(struct irq_data *data)
>   {
> -	evtchn_port_t evtchn = evtchn_from_irq(data->irq);
> +	struct irq_info *info = info_for_irq(data->irq);
> +	evtchn_port_t evtchn = info ? info->evtchn : 0;
>   
>   	if (!VALID_EVTCHN(evtchn))
>   		return;
>   
> +	smp_store_release(&info->is_active, 0);
>   	clear_evtchn(evtchn);
>   }
>   
> @@ -1969,10 +1977,13 @@ static void restore_cpu_ipis(unsigned int cpu)
>   /* Clear an irq's pending state, in preparation for polling on it */
>   void xen_clear_irq_pending(int irq)
>   {
> -	evtchn_port_t evtchn = evtchn_from_irq(irq);
> +	struct irq_info *info = info_for_irq(irq);
> +	evtchn_port_t evtchn = info ? info->evtchn : 0;
>   
> -	if (VALID_EVTCHN(evtchn))
> +	if (VALID_EVTCHN(evtchn)) {
> +		smp_store_release(&info->is_active, 0);
>   		clear_evtchn(evtchn);
> +	}
>   }
>   EXPORT_SYMBOL(xen_clear_irq_pending);
>   void xen_set_irq_pending(int irq)
>
Jürgen Groß Feb. 15, 2021, 6:55 a.m. UTC | #2
On 14.02.21 22:34, Julien Grall wrote:
> Hi Juergen,
> 
> On 11/02/2021 10:16, Juergen Gross wrote:
>> When changing the cpu affinity of an event it can happen today that
>> (with some unlucky timing) the same event will be handled on the old
>> and the new cpu at the same time.
>>
>> Avoid that by adding an "event active" flag to the per-event data and
>> call the handler only if this flag isn't set.
>>
>> Reported-by: Julien Grall <julien@xen.org>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
>> ---
>> V2:
>> - new patch
>> ---
>>   drivers/xen/events/events_base.c | 19 +++++++++++++++----
>>   1 file changed, 15 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/xen/events/events_base.c 
>> b/drivers/xen/events/events_base.c
>> index e157e7506830..f7e22330dcef 100644
>> --- a/drivers/xen/events/events_base.c
>> +++ b/drivers/xen/events/events_base.c
>> @@ -102,6 +102,7 @@ struct irq_info {
>>   #define EVT_MASK_REASON_EXPLICIT    0x01
>>   #define EVT_MASK_REASON_TEMPORARY    0x02
>>   #define EVT_MASK_REASON_EOI_PENDING    0x04
>> +    u8 is_active;        /* Is event just being handled? */
>>       unsigned irq;
>>       evtchn_port_t evtchn;   /* event channel */
>>       unsigned short cpu;     /* cpu bound */
>> @@ -622,6 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info 
>> *info, bool spurious)
>>       }
>>       info->eoi_time = 0;
>> +    smp_store_release(&info->is_active, 0);
>>       do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
>>   }
>> @@ -809,13 +811,15 @@ static void pirq_query_unmask(int irq)
>>   static void eoi_pirq(struct irq_data *data)
>>   {
>> -    evtchn_port_t evtchn = evtchn_from_irq(data->irq);
>> +    struct irq_info *info = info_for_irq(data->irq);
>> +    evtchn_port_t evtchn = info ? info->evtchn : 0;
>>       struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
>>       int rc = 0;
>>       if (!VALID_EVTCHN(evtchn))
>>           return;
>> +    smp_store_release(&info->is_active, 0);
> 
> Would you mind to explain why you are using the release semantics?

It is basically releasing a lock. So release semantics seem to be
appropriate.

> It is also not clear to me if there are any expected ordering between 
> clearing is_active and clearing the pending bit.

No, I don't think there is a specific ordering required. is_active is
just guarding against two simultaneous IRQ handler calls for the same
event being active. Clearing the pending bit is not part of the guarded
section.

> 
>>       clear_evtchn(evtchn);
> 
> 
> The 2 lines here seems to be a common pattern in this patch. So I would 
> suggest to create a new helper.

Okay.


Juergen
Boris Ostrovsky Feb. 15, 2021, 9:35 p.m. UTC | #3
On 2/11/21 5:16 AM, Juergen Gross wrote:

> @@ -622,6 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
>  	}
>  
>  	info->eoi_time = 0;
> +	smp_store_release(&info->is_active, 0);


Can this be done in lateeoi_ack_dynirq()/lateeoi_mask_ack_dynirq(), after we've masked the channel? Then it will be consistent with how how other chips do it, especially with the new helper.


-boris


>  	do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
>  }
>
Jürgen Groß Feb. 18, 2021, 11:47 a.m. UTC | #4
On 15.02.21 22:35, Boris Ostrovsky wrote:
> 
> On 2/11/21 5:16 AM, Juergen Gross wrote:
> 
>> @@ -622,6 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
>>   	}
>>   
>>   	info->eoi_time = 0;
>> +	smp_store_release(&info->is_active, 0);
> 
> 
> Can this be done in lateeoi_ack_dynirq()/lateeoi_mask_ack_dynirq(), after we've masked the channel? Then it will be consistent with how how other chips do it, especially with the new helper.

Yes, I think this should work.


Juergen
diff mbox series

Patch

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index e157e7506830..f7e22330dcef 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -102,6 +102,7 @@  struct irq_info {
 #define EVT_MASK_REASON_EXPLICIT	0x01
 #define EVT_MASK_REASON_TEMPORARY	0x02
 #define EVT_MASK_REASON_EOI_PENDING	0x04
+	u8 is_active;		/* Is event just being handled? */
 	unsigned irq;
 	evtchn_port_t evtchn;   /* event channel */
 	unsigned short cpu;     /* cpu bound */
@@ -622,6 +623,7 @@  static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
 	}
 
 	info->eoi_time = 0;
+	smp_store_release(&info->is_active, 0);
 	do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
 }
 
@@ -809,13 +811,15 @@  static void pirq_query_unmask(int irq)
 
 static void eoi_pirq(struct irq_data *data)
 {
-	evtchn_port_t evtchn = evtchn_from_irq(data->irq);
+	struct irq_info *info = info_for_irq(data->irq);
+	evtchn_port_t evtchn = info ? info->evtchn : 0;
 	struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
 	int rc = 0;
 
 	if (!VALID_EVTCHN(evtchn))
 		return;
 
+	smp_store_release(&info->is_active, 0);
 	clear_evtchn(evtchn);
 
 	if (pirq_needs_eoi(data->irq)) {
@@ -1640,6 +1644,8 @@  void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
 	}
 
 	info = info_for_irq(irq);
+	if (xchg_acquire(&info->is_active, 1))
+		return;
 
 	if (ctrl->defer_eoi) {
 		info->eoi_cpu = smp_processor_id();
@@ -1823,11 +1829,13 @@  static void disable_dynirq(struct irq_data *data)
 
 static void ack_dynirq(struct irq_data *data)
 {
-	evtchn_port_t evtchn = evtchn_from_irq(data->irq);
+	struct irq_info *info = info_for_irq(data->irq);
+	evtchn_port_t evtchn = info ? info->evtchn : 0;
 
 	if (!VALID_EVTCHN(evtchn))
 		return;
 
+	smp_store_release(&info->is_active, 0);
 	clear_evtchn(evtchn);
 }
 
@@ -1969,10 +1977,13 @@  static void restore_cpu_ipis(unsigned int cpu)
 /* Clear an irq's pending state, in preparation for polling on it */
 void xen_clear_irq_pending(int irq)
 {
-	evtchn_port_t evtchn = evtchn_from_irq(irq);
+	struct irq_info *info = info_for_irq(irq);
+	evtchn_port_t evtchn = info ? info->evtchn : 0;
 
-	if (VALID_EVTCHN(evtchn))
+	if (VALID_EVTCHN(evtchn)) {
+		smp_store_release(&info->is_active, 0);
 		clear_evtchn(evtchn);
+	}
 }
 EXPORT_SYMBOL(xen_clear_irq_pending);
 void xen_set_irq_pending(int irq)