Message ID | 20201012092740.1617-2-jgross@suse.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | XSA-343 followup patches | expand |
> -----Original Message----- > From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of Juergen Gross > Sent: 12 October 2020 10:28 > To: xen-devel@lists.xenproject.org > Cc: Juergen Gross <jgross@suse.com>; Andrew Cooper <andrew.cooper3@citrix.com>; George Dunlap > <george.dunlap@citrix.com>; Ian Jackson <iwj@xenproject.org>; Jan Beulich <jbeulich@suse.com>; Julien > Grall <julien@xen.org>; Stefano Stabellini <sstabellini@kernel.org>; Wei Liu <wl@xen.org> > Subject: [PATCH v2 1/2] xen/events: access last_priority and last_vcpu_id together > > The queue for a fifo event is depending on the vcpu_id and the > priority of the event. When sending an event it might happen the > event needs to change queues and the old queue needs to be kept for > keeping the links between queue elements intact. For this purpose > the event channel contains last_priority and last_vcpu_id values > elements for being able to identify the old queue. > > In order to avoid races always access last_priority and last_vcpu_id > with a single atomic operation avoiding any inconsistencies. > > Signed-off-by: Juergen Gross <jgross@suse.com> > --- > xen/common/event_fifo.c | 25 +++++++++++++++++++------ > xen/include/xen/sched.h | 3 +-- > 2 files changed, 20 insertions(+), 8 deletions(-) > > diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c > index fc189152e1..fffbd409c8 100644 > --- a/xen/common/event_fifo.c > +++ b/xen/common/event_fifo.c > @@ -42,6 +42,14 @@ struct evtchn_fifo_domain { > unsigned int num_evtchns; > }; > > +union evtchn_fifo_lastq { > + u32 raw; > + struct { > + u8 last_priority; > + u16 last_vcpu_id; > + }; > +}; I guess you want to s/u32/uint32_t, etc. above. > + > static inline event_word_t *evtchn_fifo_word_from_port(const struct domain *d, > unsigned int port) > { > @@ -86,16 +94,18 @@ static struct evtchn_fifo_queue *lock_old_queue(const struct domain *d, > struct vcpu *v; > struct evtchn_fifo_queue *q, *old_q; > unsigned int try; > + union evtchn_fifo_lastq lastq; > > for ( try = 0; try < 3; try++ ) > { > - v = d->vcpu[evtchn->last_vcpu_id]; > - old_q = &v->evtchn_fifo->queue[evtchn->last_priority]; > + lastq.raw = read_atomic(&evtchn->fifo_lastq); > + v = d->vcpu[lastq.last_vcpu_id]; > + old_q = &v->evtchn_fifo->queue[lastq.last_priority]; > > spin_lock_irqsave(&old_q->lock, *flags); > > - v = d->vcpu[evtchn->last_vcpu_id]; > - q = &v->evtchn_fifo->queue[evtchn->last_priority]; > + v = d->vcpu[lastq.last_vcpu_id]; > + q = &v->evtchn_fifo->queue[lastq.last_priority]; > > if ( old_q == q ) > return old_q; > @@ -246,8 +256,11 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) > /* Moved to a different queue? */ > if ( old_q != q ) > { > - evtchn->last_vcpu_id = v->vcpu_id; > - evtchn->last_priority = q->priority; > + union evtchn_fifo_lastq lastq; > + > + lastq.last_vcpu_id = v->vcpu_id; > + lastq.last_priority = q->priority; > + write_atomic(&evtchn->fifo_lastq, lastq.raw); > You're going to leak some stack here I think. Perhaps add a 'pad' field between 'last_priority' and 'last_vcpu_id' and zero it? Paul > spin_unlock_irqrestore(&old_q->lock, flags); > spin_lock_irqsave(&q->lock, flags); > diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h > index d8ed83f869..a298ff4df8 100644 > --- a/xen/include/xen/sched.h > +++ b/xen/include/xen/sched.h > @@ -114,8 +114,7 @@ struct evtchn > u16 virq; /* state == ECS_VIRQ */ > } u; > u8 priority; > - u8 last_priority; > - u16 last_vcpu_id; > + u32 fifo_lastq; /* Data for fifo events identifying last queue. */ > #ifdef CONFIG_XSM > union { > #ifdef XSM_NEED_GENERIC_EVTCHN_SSID > -- > 2.26.2 >
On 12.10.20 11:48, Paul Durrant wrote: >> -----Original Message----- >> From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of Juergen Gross >> Sent: 12 October 2020 10:28 >> To: xen-devel@lists.xenproject.org >> Cc: Juergen Gross <jgross@suse.com>; Andrew Cooper <andrew.cooper3@citrix.com>; George Dunlap >> <george.dunlap@citrix.com>; Ian Jackson <iwj@xenproject.org>; Jan Beulich <jbeulich@suse.com>; Julien >> Grall <julien@xen.org>; Stefano Stabellini <sstabellini@kernel.org>; Wei Liu <wl@xen.org> >> Subject: [PATCH v2 1/2] xen/events: access last_priority and last_vcpu_id together >> >> The queue for a fifo event is depending on the vcpu_id and the >> priority of the event. When sending an event it might happen the >> event needs to change queues and the old queue needs to be kept for >> keeping the links between queue elements intact. For this purpose >> the event channel contains last_priority and last_vcpu_id values >> elements for being able to identify the old queue. >> >> In order to avoid races always access last_priority and last_vcpu_id >> with a single atomic operation avoiding any inconsistencies. >> >> Signed-off-by: Juergen Gross <jgross@suse.com> >> --- >> xen/common/event_fifo.c | 25 +++++++++++++++++++------ >> xen/include/xen/sched.h | 3 +-- >> 2 files changed, 20 insertions(+), 8 deletions(-) >> >> diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c >> index fc189152e1..fffbd409c8 100644 >> --- a/xen/common/event_fifo.c >> +++ b/xen/common/event_fifo.c >> @@ -42,6 +42,14 @@ struct evtchn_fifo_domain { >> unsigned int num_evtchns; >> }; >> >> +union evtchn_fifo_lastq { >> + u32 raw; >> + struct { >> + u8 last_priority; >> + u16 last_vcpu_id; >> + }; >> +}; > > I guess you want to s/u32/uint32_t, etc. above. Hmm, yes, probably. > >> + >> static inline event_word_t *evtchn_fifo_word_from_port(const struct domain *d, >> unsigned int port) >> { >> @@ -86,16 +94,18 @@ static struct evtchn_fifo_queue *lock_old_queue(const struct domain *d, >> struct vcpu *v; >> struct evtchn_fifo_queue *q, *old_q; >> unsigned int try; >> + union evtchn_fifo_lastq lastq; >> >> for ( try = 0; try < 3; try++ ) >> { >> - v = d->vcpu[evtchn->last_vcpu_id]; >> - old_q = &v->evtchn_fifo->queue[evtchn->last_priority]; >> + lastq.raw = read_atomic(&evtchn->fifo_lastq); >> + v = d->vcpu[lastq.last_vcpu_id]; >> + old_q = &v->evtchn_fifo->queue[lastq.last_priority]; >> >> spin_lock_irqsave(&old_q->lock, *flags); >> >> - v = d->vcpu[evtchn->last_vcpu_id]; >> - q = &v->evtchn_fifo->queue[evtchn->last_priority]; >> + v = d->vcpu[lastq.last_vcpu_id]; >> + q = &v->evtchn_fifo->queue[lastq.last_priority]; >> >> if ( old_q == q ) >> return old_q; >> @@ -246,8 +256,11 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) >> /* Moved to a different queue? */ >> if ( old_q != q ) >> { >> - evtchn->last_vcpu_id = v->vcpu_id; >> - evtchn->last_priority = q->priority; >> + union evtchn_fifo_lastq lastq; >> + >> + lastq.last_vcpu_id = v->vcpu_id; >> + lastq.last_priority = q->priority; >> + write_atomic(&evtchn->fifo_lastq, lastq.raw); >> > > You're going to leak some stack here I think. Perhaps add a 'pad' field between 'last_priority' and 'last_vcpu_id' and zero it? I can do that, but why? This is nothing a guest is supposed to see at any time. Juergen
> -----Original Message----- > From: Jürgen Groß <jgross@suse.com> > Sent: 12 October 2020 10:56 > To: paul@xen.org; xen-devel@lists.xenproject.org > Cc: 'Andrew Cooper' <andrew.cooper3@citrix.com>; 'George Dunlap' <george.dunlap@citrix.com>; 'Ian > Jackson' <iwj@xenproject.org>; 'Jan Beulich' <jbeulich@suse.com>; 'Julien Grall' <julien@xen.org>; > 'Stefano Stabellini' <sstabellini@kernel.org>; 'Wei Liu' <wl@xen.org> > Subject: Re: [PATCH v2 1/2] xen/events: access last_priority and last_vcpu_id together > > On 12.10.20 11:48, Paul Durrant wrote: > >> -----Original Message----- > >> From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of Juergen Gross > >> Sent: 12 October 2020 10:28 > >> To: xen-devel@lists.xenproject.org > >> Cc: Juergen Gross <jgross@suse.com>; Andrew Cooper <andrew.cooper3@citrix.com>; George Dunlap > >> <george.dunlap@citrix.com>; Ian Jackson <iwj@xenproject.org>; Jan Beulich <jbeulich@suse.com>; > Julien > >> Grall <julien@xen.org>; Stefano Stabellini <sstabellini@kernel.org>; Wei Liu <wl@xen.org> > >> Subject: [PATCH v2 1/2] xen/events: access last_priority and last_vcpu_id together > >> > >> The queue for a fifo event is depending on the vcpu_id and the > >> priority of the event. When sending an event it might happen the > >> event needs to change queues and the old queue needs to be kept for > >> keeping the links between queue elements intact. For this purpose > >> the event channel contains last_priority and last_vcpu_id values > >> elements for being able to identify the old queue. > >> > >> In order to avoid races always access last_priority and last_vcpu_id > >> with a single atomic operation avoiding any inconsistencies. > >> > >> Signed-off-by: Juergen Gross <jgross@suse.com> > >> --- > >> xen/common/event_fifo.c | 25 +++++++++++++++++++------ > >> xen/include/xen/sched.h | 3 +-- > >> 2 files changed, 20 insertions(+), 8 deletions(-) > >> > >> diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c > >> index fc189152e1..fffbd409c8 100644 > >> --- a/xen/common/event_fifo.c > >> +++ b/xen/common/event_fifo.c > >> @@ -42,6 +42,14 @@ struct evtchn_fifo_domain { > >> unsigned int num_evtchns; > >> }; > >> > >> +union evtchn_fifo_lastq { > >> + u32 raw; > >> + struct { > >> + u8 last_priority; > >> + u16 last_vcpu_id; > >> + }; > >> +}; > > > > I guess you want to s/u32/uint32_t, etc. above. > > Hmm, yes, probably. > > > > >> + > >> static inline event_word_t *evtchn_fifo_word_from_port(const struct domain *d, > >> unsigned int port) > >> { > >> @@ -86,16 +94,18 @@ static struct evtchn_fifo_queue *lock_old_queue(const struct domain *d, > >> struct vcpu *v; > >> struct evtchn_fifo_queue *q, *old_q; > >> unsigned int try; > >> + union evtchn_fifo_lastq lastq; > >> > >> for ( try = 0; try < 3; try++ ) > >> { > >> - v = d->vcpu[evtchn->last_vcpu_id]; > >> - old_q = &v->evtchn_fifo->queue[evtchn->last_priority]; > >> + lastq.raw = read_atomic(&evtchn->fifo_lastq); > >> + v = d->vcpu[lastq.last_vcpu_id]; > >> + old_q = &v->evtchn_fifo->queue[lastq.last_priority]; > >> > >> spin_lock_irqsave(&old_q->lock, *flags); > >> > >> - v = d->vcpu[evtchn->last_vcpu_id]; > >> - q = &v->evtchn_fifo->queue[evtchn->last_priority]; > >> + v = d->vcpu[lastq.last_vcpu_id]; > >> + q = &v->evtchn_fifo->queue[lastq.last_priority]; > >> > >> if ( old_q == q ) > >> return old_q; > >> @@ -246,8 +256,11 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) > >> /* Moved to a different queue? */ > >> if ( old_q != q ) > >> { > >> - evtchn->last_vcpu_id = v->vcpu_id; > >> - evtchn->last_priority = q->priority; > >> + union evtchn_fifo_lastq lastq; > >> + > >> + lastq.last_vcpu_id = v->vcpu_id; > >> + lastq.last_priority = q->priority; > >> + write_atomic(&evtchn->fifo_lastq, lastq.raw); > >> > > > > You're going to leak some stack here I think. Perhaps add a 'pad' field between 'last_priority' and > 'last_vcpu_id' and zero it? > > I can do that, but why? This is nothing a guest is supposed to see at > any time. True, but it would also be nice if the value of 'raw' was at least predictable. I guest just adding '= {}' to the declaration would actually be easiest. Paul > > > Juergen
On 12.10.2020 11:27, Juergen Gross wrote: > The queue for a fifo event is depending on the vcpu_id and the > priority of the event. When sending an event it might happen the > event needs to change queues and the old queue needs to be kept for > keeping the links between queue elements intact. For this purpose > the event channel contains last_priority and last_vcpu_id values > elements for being able to identify the old queue. > > In order to avoid races always access last_priority and last_vcpu_id > with a single atomic operation avoiding any inconsistencies. > > Signed-off-by: Juergen Gross <jgross@suse.com> I seem to vaguely recall that at the time this seemingly racy access was done on purpose by David. Did you go look at the old commits to understand whether there really is a race which can't be tolerated within the spec? > --- a/xen/include/xen/sched.h > +++ b/xen/include/xen/sched.h > @@ -114,8 +114,7 @@ struct evtchn > u16 virq; /* state == ECS_VIRQ */ > } u; > u8 priority; > - u8 last_priority; > - u16 last_vcpu_id; > + u32 fifo_lastq; /* Data for fifo events identifying last queue. */ This grows struct evtchn's size on at least 32-bit Arm. I'd like to suggest including "priority" in the union, and call the new field simply "fifo" or some such. Jan
On 13.10.20 15:58, Jan Beulich wrote: > On 12.10.2020 11:27, Juergen Gross wrote: >> The queue for a fifo event is depending on the vcpu_id and the >> priority of the event. When sending an event it might happen the >> event needs to change queues and the old queue needs to be kept for >> keeping the links between queue elements intact. For this purpose >> the event channel contains last_priority and last_vcpu_id values >> elements for being able to identify the old queue. >> >> In order to avoid races always access last_priority and last_vcpu_id >> with a single atomic operation avoiding any inconsistencies. >> >> Signed-off-by: Juergen Gross <jgross@suse.com> > > I seem to vaguely recall that at the time this seemingly racy > access was done on purpose by David. Did you go look at the > old commits to understand whether there really is a race which > can't be tolerated within the spec? At least the comments in the code tell us that the race regarding the writing of priority (not last_priority) is acceptable. Especially Julien was rather worried by the current situation. In case you can convince him the current handling is fine, we can easily drop this patch. > >> --- a/xen/include/xen/sched.h >> +++ b/xen/include/xen/sched.h >> @@ -114,8 +114,7 @@ struct evtchn >> u16 virq; /* state == ECS_VIRQ */ >> } u; >> u8 priority; >> - u8 last_priority; >> - u16 last_vcpu_id; >> + u32 fifo_lastq; /* Data for fifo events identifying last queue. */ > > This grows struct evtchn's size on at least 32-bit Arm. I'd > like to suggest including "priority" in the union, and call the > new field simply "fifo" or some such. This will add quite some complexity as suddenly all writes to the union will need to be made through a cmpxchg() scheme. Regarding the growth: struct evtchn is aligned to 64 bytes. So there is no growth at all, as the size will not be larger than those 64 bytes. Juergen
On 13.10.2020 16:20, Jürgen Groß wrote: > On 13.10.20 15:58, Jan Beulich wrote: >> On 12.10.2020 11:27, Juergen Gross wrote: >>> The queue for a fifo event is depending on the vcpu_id and the >>> priority of the event. When sending an event it might happen the >>> event needs to change queues and the old queue needs to be kept for >>> keeping the links between queue elements intact. For this purpose >>> the event channel contains last_priority and last_vcpu_id values >>> elements for being able to identify the old queue. >>> >>> In order to avoid races always access last_priority and last_vcpu_id >>> with a single atomic operation avoiding any inconsistencies. >>> >>> Signed-off-by: Juergen Gross <jgross@suse.com> >> >> I seem to vaguely recall that at the time this seemingly racy >> access was done on purpose by David. Did you go look at the >> old commits to understand whether there really is a race which >> can't be tolerated within the spec? > > At least the comments in the code tell us that the race regarding > the writing of priority (not last_priority) is acceptable. Ah, then it was comments. I knew I read something to this effect somewhere, recently. > Especially Julien was rather worried by the current situation. In > case you can convince him the current handling is fine, we can > easily drop this patch. Julien, in the light of the above - can you clarify the specific concerns you (still) have? >>> --- a/xen/include/xen/sched.h >>> +++ b/xen/include/xen/sched.h >>> @@ -114,8 +114,7 @@ struct evtchn >>> u16 virq; /* state == ECS_VIRQ */ >>> } u; >>> u8 priority; >>> - u8 last_priority; >>> - u16 last_vcpu_id; >>> + u32 fifo_lastq; /* Data for fifo events identifying last queue. */ >> >> This grows struct evtchn's size on at least 32-bit Arm. I'd >> like to suggest including "priority" in the union, and call the >> new field simply "fifo" or some such. > > This will add quite some complexity as suddenly all writes to the > union will need to be made through a cmpxchg() scheme. > > Regarding the growth: struct evtchn is aligned to 64 bytes. So there > is no growth at all, as the size will not be larger than those 64 > bytes. Oh, I didn't spot this attribute, which I consider at least suspicious. Without XSM I'm getting the impression that on 32-bit Arm the structure's size would be 32 bytes or less without it, so it looks as if it shouldn't be there unconditionally. Jan
Hi Jan, On 13/10/2020 15:26, Jan Beulich wrote: > On 13.10.2020 16:20, Jürgen Groß wrote: >> On 13.10.20 15:58, Jan Beulich wrote: >>> On 12.10.2020 11:27, Juergen Gross wrote: >>>> The queue for a fifo event is depending on the vcpu_id and the >>>> priority of the event. When sending an event it might happen the >>>> event needs to change queues and the old queue needs to be kept for >>>> keeping the links between queue elements intact. For this purpose >>>> the event channel contains last_priority and last_vcpu_id values >>>> elements for being able to identify the old queue. >>>> >>>> In order to avoid races always access last_priority and last_vcpu_id >>>> with a single atomic operation avoiding any inconsistencies. >>>> >>>> Signed-off-by: Juergen Gross <jgross@suse.com> >>> >>> I seem to vaguely recall that at the time this seemingly racy >>> access was done on purpose by David. Did you go look at the >>> old commits to understand whether there really is a race which >>> can't be tolerated within the spec? >> >> At least the comments in the code tell us that the race regarding >> the writing of priority (not last_priority) is acceptable. > > Ah, then it was comments. I knew I read something to this effect > somewhere, recently. > >> Especially Julien was rather worried by the current situation. In >> case you can convince him the current handling is fine, we can >> easily drop this patch. > > Julien, in the light of the above - can you clarify the specific > concerns you (still) have? Let me start with that the assumption if evtchn->lock is not held when evtchn_fifo_set_pending() is called. If it is held, then my comment is moot. From my understanding, the goal of lock_old_queue() is to return the old queue used. last_priority and last_vcpu_id may be updated separately and I could not convince myself that it would not be possible to return a queue that is neither the current one nor the old one. The following could happen if evtchn->priority and evtchn->notify_vcpu_id keeps changing between calls. pCPU0 | pCPU1 | evtchn_fifo_set_pending(v0,...) | | evtchn_fifo_set_pending(v1, ...) [...] | /* Queue has changed */ | evtchn->last_vcpu_id = v0 | | -> evtchn_old_queue() | v = d->vcpu[evtchn->last_vcpu_id]; | old_q = ... | spin_lock(old_q->...) | v = ... | q = ... | /* q and old_q would be the same */ | evtchn->las_priority = priority| If my diagram is correct, then pCPU1 would return a queue that is neither the current nor old one. In which case, I think it would at least be possible to corrupt the queue. From evtchn_fifo_set_pending(): /* * If this event was a tail, the old queue is now empty and * its tail must be invalidated to prevent adding an event to * the old queue from corrupting the new queue. */ if ( old_q->tail == port ) old_q->tail = 0; Did I miss anything? Cheers,
On 14.10.2020 13:40, Julien Grall wrote: > Hi Jan, > > On 13/10/2020 15:26, Jan Beulich wrote: >> On 13.10.2020 16:20, Jürgen Groß wrote: >>> On 13.10.20 15:58, Jan Beulich wrote: >>>> On 12.10.2020 11:27, Juergen Gross wrote: >>>>> The queue for a fifo event is depending on the vcpu_id and the >>>>> priority of the event. When sending an event it might happen the >>>>> event needs to change queues and the old queue needs to be kept for >>>>> keeping the links between queue elements intact. For this purpose >>>>> the event channel contains last_priority and last_vcpu_id values >>>>> elements for being able to identify the old queue. >>>>> >>>>> In order to avoid races always access last_priority and last_vcpu_id >>>>> with a single atomic operation avoiding any inconsistencies. >>>>> >>>>> Signed-off-by: Juergen Gross <jgross@suse.com> >>>> >>>> I seem to vaguely recall that at the time this seemingly racy >>>> access was done on purpose by David. Did you go look at the >>>> old commits to understand whether there really is a race which >>>> can't be tolerated within the spec? >>> >>> At least the comments in the code tell us that the race regarding >>> the writing of priority (not last_priority) is acceptable. >> >> Ah, then it was comments. I knew I read something to this effect >> somewhere, recently. >> >>> Especially Julien was rather worried by the current situation. In >>> case you can convince him the current handling is fine, we can >>> easily drop this patch. >> >> Julien, in the light of the above - can you clarify the specific >> concerns you (still) have? > > Let me start with that the assumption if evtchn->lock is not held when > evtchn_fifo_set_pending() is called. If it is held, then my comment is moot. But this isn't interesting - we know there are paths where it is held, and ones (interdomain sending) where it's the remote port's lock instead which is held. What's important here is that a _consistent_ lock be held (but it doesn't need to be evtchn's). > From my understanding, the goal of lock_old_queue() is to return the > old queue used. > > last_priority and last_vcpu_id may be updated separately and I could not > convince myself that it would not be possible to return a queue that is > neither the current one nor the old one. > > The following could happen if evtchn->priority and > evtchn->notify_vcpu_id keeps changing between calls. > > pCPU0 | pCPU1 > | > evtchn_fifo_set_pending(v0,...) | > | evtchn_fifo_set_pending(v1, ...) > [...] | > /* Queue has changed */ | > evtchn->last_vcpu_id = v0 | > | -> evtchn_old_queue() > | v = d->vcpu[evtchn->last_vcpu_id]; > | old_q = ... > | spin_lock(old_q->...) > | v = ... > | q = ... > | /* q and old_q would be the same */ > | > evtchn->las_priority = priority| > > If my diagram is correct, then pCPU1 would return a queue that is > neither the current nor old one. I think I agree. > In which case, I think it would at least be possible to corrupt the > queue. From evtchn_fifo_set_pending(): > > /* > * If this event was a tail, the old queue is now empty and > * its tail must be invalidated to prevent adding an event to > * the old queue from corrupting the new queue. > */ > if ( old_q->tail == port ) > old_q->tail = 0; > > Did I miss anything? I don't think you did. The important point though is that a consistent lock is being held whenever we come here, so two racing set_pending() aren't possible for one and the same evtchn. As a result I don't think the patch here is actually needed. If I take this further, then I think I can reason why it wasn't necessary to add further locking to send_guest_{global,vcpu}_virq(): The virq_lock is the "consistent lock" protecting ECS_VIRQ ports. The spin_barrier() while closing the port guards that side against the port changing to a different ECS_* behind the sending functions' backs. And binding such ports sets ->virq_to_evtchn[] last, with a suitable barrier (the unlock). Which leaves send_guest_pirq() before we can drop the IRQ-safe locking again. I guess we would need to work towards using the underlying irq_desc's lock as consistent lock here, but this certainly isn't the case just yet, and I'm not really certain this can be achieved. Jan
On 15.10.20 14:07, Jan Beulich wrote: > On 14.10.2020 13:40, Julien Grall wrote: >> Hi Jan, >> >> On 13/10/2020 15:26, Jan Beulich wrote: >>> On 13.10.2020 16:20, Jürgen Groß wrote: >>>> On 13.10.20 15:58, Jan Beulich wrote: >>>>> On 12.10.2020 11:27, Juergen Gross wrote: >>>>>> The queue for a fifo event is depending on the vcpu_id and the >>>>>> priority of the event. When sending an event it might happen the >>>>>> event needs to change queues and the old queue needs to be kept for >>>>>> keeping the links between queue elements intact. For this purpose >>>>>> the event channel contains last_priority and last_vcpu_id values >>>>>> elements for being able to identify the old queue. >>>>>> >>>>>> In order to avoid races always access last_priority and last_vcpu_id >>>>>> with a single atomic operation avoiding any inconsistencies. >>>>>> >>>>>> Signed-off-by: Juergen Gross <jgross@suse.com> >>>>> >>>>> I seem to vaguely recall that at the time this seemingly racy >>>>> access was done on purpose by David. Did you go look at the >>>>> old commits to understand whether there really is a race which >>>>> can't be tolerated within the spec? >>>> >>>> At least the comments in the code tell us that the race regarding >>>> the writing of priority (not last_priority) is acceptable. >>> >>> Ah, then it was comments. I knew I read something to this effect >>> somewhere, recently. >>> >>>> Especially Julien was rather worried by the current situation. In >>>> case you can convince him the current handling is fine, we can >>>> easily drop this patch. >>> >>> Julien, in the light of the above - can you clarify the specific >>> concerns you (still) have? >> >> Let me start with that the assumption if evtchn->lock is not held when >> evtchn_fifo_set_pending() is called. If it is held, then my comment is moot. > > But this isn't interesting - we know there are paths where it is > held, and ones (interdomain sending) where it's the remote port's > lock instead which is held. What's important here is that a > _consistent_ lock be held (but it doesn't need to be evtchn's). > >> From my understanding, the goal of lock_old_queue() is to return the >> old queue used. >> >> last_priority and last_vcpu_id may be updated separately and I could not >> convince myself that it would not be possible to return a queue that is >> neither the current one nor the old one. >> >> The following could happen if evtchn->priority and >> evtchn->notify_vcpu_id keeps changing between calls. >> >> pCPU0 | pCPU1 >> | >> evtchn_fifo_set_pending(v0,...) | >> | evtchn_fifo_set_pending(v1, ...) >> [...] | >> /* Queue has changed */ | >> evtchn->last_vcpu_id = v0 | >> | -> evtchn_old_queue() >> | v = d->vcpu[evtchn->last_vcpu_id]; >> | old_q = ... >> | spin_lock(old_q->...) >> | v = ... >> | q = ... >> | /* q and old_q would be the same */ >> | >> evtchn->las_priority = priority| >> >> If my diagram is correct, then pCPU1 would return a queue that is >> neither the current nor old one. > > I think I agree. > >> In which case, I think it would at least be possible to corrupt the >> queue. From evtchn_fifo_set_pending(): >> >> /* >> * If this event was a tail, the old queue is now empty and >> * its tail must be invalidated to prevent adding an event to >> * the old queue from corrupting the new queue. >> */ >> if ( old_q->tail == port ) >> old_q->tail = 0; >> >> Did I miss anything? > > I don't think you did. The important point though is that a consistent > lock is being held whenever we come here, so two racing set_pending() > aren't possible for one and the same evtchn. As a result I don't think > the patch here is actually needed. Julien, do you agree? Can i drop this patch? Juergen
On 15/10/2020 13:07, Jan Beulich wrote: > On 14.10.2020 13:40, Julien Grall wrote: >> Hi Jan, >> >> On 13/10/2020 15:26, Jan Beulich wrote: >>> On 13.10.2020 16:20, Jürgen Groß wrote: >>>> On 13.10.20 15:58, Jan Beulich wrote: >>>>> On 12.10.2020 11:27, Juergen Gross wrote: >>>>>> The queue for a fifo event is depending on the vcpu_id and the >>>>>> priority of the event. When sending an event it might happen the >>>>>> event needs to change queues and the old queue needs to be kept for >>>>>> keeping the links between queue elements intact. For this purpose >>>>>> the event channel contains last_priority and last_vcpu_id values >>>>>> elements for being able to identify the old queue. >>>>>> >>>>>> In order to avoid races always access last_priority and last_vcpu_id >>>>>> with a single atomic operation avoiding any inconsistencies. >>>>>> >>>>>> Signed-off-by: Juergen Gross <jgross@suse.com> >>>>> >>>>> I seem to vaguely recall that at the time this seemingly racy >>>>> access was done on purpose by David. Did you go look at the >>>>> old commits to understand whether there really is a race which >>>>> can't be tolerated within the spec? >>>> >>>> At least the comments in the code tell us that the race regarding >>>> the writing of priority (not last_priority) is acceptable. >>> >>> Ah, then it was comments. I knew I read something to this effect >>> somewhere, recently. >>> >>>> Especially Julien was rather worried by the current situation. In >>>> case you can convince him the current handling is fine, we can >>>> easily drop this patch. >>> >>> Julien, in the light of the above - can you clarify the specific >>> concerns you (still) have? >> >> Let me start with that the assumption if evtchn->lock is not held when >> evtchn_fifo_set_pending() is called. If it is held, then my comment is moot. > > But this isn't interesting - we know there are paths where it is > held, and ones (interdomain sending) where it's the remote port's > lock instead which is held. What's important here is that a > _consistent_ lock be held (but it doesn't need to be evtchn's). Yes, a _consistent_ lock *should* be sufficient. But it is better to use the same lock everywhere so it is easier to reason (see more below). > >> From my understanding, the goal of lock_old_queue() is to return the >> old queue used. >> >> last_priority and last_vcpu_id may be updated separately and I could not >> convince myself that it would not be possible to return a queue that is >> neither the current one nor the old one. >> >> The following could happen if evtchn->priority and >> evtchn->notify_vcpu_id keeps changing between calls. >> >> pCPU0 | pCPU1 >> | >> evtchn_fifo_set_pending(v0,...) | >> | evtchn_fifo_set_pending(v1, ...) >> [...] | >> /* Queue has changed */ | >> evtchn->last_vcpu_id = v0 | >> | -> evtchn_old_queue() >> | v = d->vcpu[evtchn->last_vcpu_id]; >> | old_q = ... >> | spin_lock(old_q->...) >> | v = ... >> | q = ... >> | /* q and old_q would be the same */ >> | >> evtchn->las_priority = priority| >> >> If my diagram is correct, then pCPU1 would return a queue that is >> neither the current nor old one. > > I think I agree. > >> In which case, I think it would at least be possible to corrupt the >> queue. From evtchn_fifo_set_pending(): >> >> /* >> * If this event was a tail, the old queue is now empty and >> * its tail must be invalidated to prevent adding an event to >> * the old queue from corrupting the new queue. >> */ >> if ( old_q->tail == port ) >> old_q->tail = 0; >> >> Did I miss anything? > > I don't think you did. The important point though is that a consistent > lock is being held whenever we come here, so two racing set_pending() > aren't possible for one and the same evtchn. As a result I don't think > the patch here is actually needed. I haven't yet read in full details the rest of the patches to say whether this is necessary or not. However, at a first glance, I think this is not a sane to rely on different lock to protect us. And don't get me started on the lack of documentation... Furthermore, the implementation of old_lock_queue() suggests that the code was planned to be lockless. Why would you need the loop otherwise? Therefore, regardless the rest of the discussion, I think this patch would be useful to have for our peace of mind. > > If I take this further, then I think I can reason why it wasn't > necessary to add further locking to send_guest_{global,vcpu}_virq(): > The virq_lock is the "consistent lock" protecting ECS_VIRQ ports. The > spin_barrier() while closing the port guards that side against the > port changing to a different ECS_* behind the sending functions' backs. > And binding such ports sets ->virq_to_evtchn[] last, with a suitable > barrier (the unlock). This makes sense. > > Which leaves send_guest_pirq() before we can drop the IRQ-safe locking > again. I guess we would need to work towards using the underlying > irq_desc's lock as consistent lock here, but this certainly isn't the > case just yet, and I'm not really certain this can be achieved. I can't comment on the PIRQ code but I think this is a risky approach (see more above). Cheers,
On 16.10.2020 11:36, Julien Grall wrote: > On 15/10/2020 13:07, Jan Beulich wrote: >> On 14.10.2020 13:40, Julien Grall wrote: >>> On 13/10/2020 15:26, Jan Beulich wrote: >>>> On 13.10.2020 16:20, Jürgen Groß wrote: >>>>> Especially Julien was rather worried by the current situation. In >>>>> case you can convince him the current handling is fine, we can >>>>> easily drop this patch. >>>> >>>> Julien, in the light of the above - can you clarify the specific >>>> concerns you (still) have? >>> >>> Let me start with that the assumption if evtchn->lock is not held when >>> evtchn_fifo_set_pending() is called. If it is held, then my comment is moot. >> >> But this isn't interesting - we know there are paths where it is >> held, and ones (interdomain sending) where it's the remote port's >> lock instead which is held. What's important here is that a >> _consistent_ lock be held (but it doesn't need to be evtchn's). > > Yes, a _consistent_ lock *should* be sufficient. But it is better to use > the same lock everywhere so it is easier to reason (see more below). But that's already not the case, due to the way interdomain channels have events sent. You did suggest acquiring both locks, but as indicated at the time I think this goes too far. As far as the doc aspect - we can improve the situation. Iirc it was you who made me add the respective comment ahead of struct evtchn_port_ops. >>> From my understanding, the goal of lock_old_queue() is to return the >>> old queue used. >>> >>> last_priority and last_vcpu_id may be updated separately and I could not >>> convince myself that it would not be possible to return a queue that is >>> neither the current one nor the old one. >>> >>> The following could happen if evtchn->priority and >>> evtchn->notify_vcpu_id keeps changing between calls. >>> >>> pCPU0 | pCPU1 >>> | >>> evtchn_fifo_set_pending(v0,...) | >>> | evtchn_fifo_set_pending(v1, ...) >>> [...] | >>> /* Queue has changed */ | >>> evtchn->last_vcpu_id = v0 | >>> | -> evtchn_old_queue() >>> | v = d->vcpu[evtchn->last_vcpu_id]; >>> | old_q = ... >>> | spin_lock(old_q->...) >>> | v = ... >>> | q = ... >>> | /* q and old_q would be the same */ >>> | >>> evtchn->las_priority = priority| >>> >>> If my diagram is correct, then pCPU1 would return a queue that is >>> neither the current nor old one. >> >> I think I agree. >> >>> In which case, I think it would at least be possible to corrupt the >>> queue. From evtchn_fifo_set_pending(): >>> >>> /* >>> * If this event was a tail, the old queue is now empty and >>> * its tail must be invalidated to prevent adding an event to >>> * the old queue from corrupting the new queue. >>> */ >>> if ( old_q->tail == port ) >>> old_q->tail = 0; >>> >>> Did I miss anything? >> >> I don't think you did. The important point though is that a consistent >> lock is being held whenever we come here, so two racing set_pending() >> aren't possible for one and the same evtchn. As a result I don't think >> the patch here is actually needed. > > I haven't yet read in full details the rest of the patches to say > whether this is necessary or not. However, at a first glance, I think > this is not a sane to rely on different lock to protect us. And don't > get me started on the lack of documentation... > > Furthermore, the implementation of old_lock_queue() suggests that the > code was planned to be lockless. Why would you need the loop otherwise? The lock-less aspect of this affects multiple accesses to e.g. the same queue, I think. I'm unconvinced it was really considered whether racing sending on the same channel is also safe this way. > Therefore, regardless the rest of the discussion, I think this patch > would be useful to have for our peace of mind. That's a fair position to take. My counterargument is mainly that readability (and hence maintainability) suffers with those changes. >> If I take this further, then I think I can reason why it wasn't >> necessary to add further locking to send_guest_{global,vcpu}_virq(): >> The virq_lock is the "consistent lock" protecting ECS_VIRQ ports. The >> spin_barrier() while closing the port guards that side against the >> port changing to a different ECS_* behind the sending functions' backs. >> And binding such ports sets ->virq_to_evtchn[] last, with a suitable >> barrier (the unlock). > > This makes sense. > >> >> Which leaves send_guest_pirq() before we can drop the IRQ-safe locking >> again. I guess we would need to work towards using the underlying >> irq_desc's lock as consistent lock here, but this certainly isn't the >> case just yet, and I'm not really certain this can be achieved. > I can't comment on the PIRQ code but I think this is a risky approach > (see more above). It may be; one would only know how risky it is once it is being tried. For the moment, with your apparent agreement above, I'll see whether I can put together a relaxation patch for the vIRQ sending. Main question is going to be whether in the process I wouldn't find a reason why this isn't a safe thing to do. Jan
Hi Jan, On 16/10/2020 13:09, Jan Beulich wrote: > On 16.10.2020 11:36, Julien Grall wrote: >> On 15/10/2020 13:07, Jan Beulich wrote: >>> On 14.10.2020 13:40, Julien Grall wrote: >>>> On 13/10/2020 15:26, Jan Beulich wrote: >>>>> On 13.10.2020 16:20, Jürgen Groß wrote: >>>>>> Especially Julien was rather worried by the current situation. In >>>>>> case you can convince him the current handling is fine, we can >>>>>> easily drop this patch. >>>>> >>>>> Julien, in the light of the above - can you clarify the specific >>>>> concerns you (still) have? >>>> >>>> Let me start with that the assumption if evtchn->lock is not held when >>>> evtchn_fifo_set_pending() is called. If it is held, then my comment is moot. >>> >>> But this isn't interesting - we know there are paths where it is >>> held, and ones (interdomain sending) where it's the remote port's >>> lock instead which is held. What's important here is that a >>> _consistent_ lock be held (but it doesn't need to be evtchn's). >> >> Yes, a _consistent_ lock *should* be sufficient. But it is better to use >> the same lock everywhere so it is easier to reason (see more below). > > But that's already not the case, due to the way interdomain channels > have events sent. You did suggest acquiring both locks, but as > indicated at the time I think this goes too far. As far as the doc > aspect - we can improve the situation. Iirc it was you who made me > add the respective comment ahead of struct evtchn_port_ops. > >>>> From my understanding, the goal of lock_old_queue() is to return the >>>> old queue used. >>>> >>>> last_priority and last_vcpu_id may be updated separately and I could not >>>> convince myself that it would not be possible to return a queue that is >>>> neither the current one nor the old one. >>>> >>>> The following could happen if evtchn->priority and >>>> evtchn->notify_vcpu_id keeps changing between calls. >>>> >>>> pCPU0 | pCPU1 >>>> | >>>> evtchn_fifo_set_pending(v0,...) | >>>> | evtchn_fifo_set_pending(v1, ...) >>>> [...] | >>>> /* Queue has changed */ | >>>> evtchn->last_vcpu_id = v0 | >>>> | -> evtchn_old_queue() >>>> | v = d->vcpu[evtchn->last_vcpu_id]; >>>> | old_q = ... >>>> | spin_lock(old_q->...) >>>> | v = ... >>>> | q = ... >>>> | /* q and old_q would be the same */ >>>> | >>>> evtchn->las_priority = priority| >>>> >>>> If my diagram is correct, then pCPU1 would return a queue that is >>>> neither the current nor old one. >>> >>> I think I agree. >>> >>>> In which case, I think it would at least be possible to corrupt the >>>> queue. From evtchn_fifo_set_pending(): >>>> >>>> /* >>>> * If this event was a tail, the old queue is now empty and >>>> * its tail must be invalidated to prevent adding an event to >>>> * the old queue from corrupting the new queue. >>>> */ >>>> if ( old_q->tail == port ) >>>> old_q->tail = 0; >>>> >>>> Did I miss anything? >>> >>> I don't think you did. The important point though is that a consistent >>> lock is being held whenever we come here, so two racing set_pending() >>> aren't possible for one and the same evtchn. As a result I don't think >>> the patch here is actually needed. >> >> I haven't yet read in full details the rest of the patches to say >> whether this is necessary or not. However, at a first glance, I think >> this is not a sane to rely on different lock to protect us. And don't >> get me started on the lack of documentation... >> >> Furthermore, the implementation of old_lock_queue() suggests that the >> code was planned to be lockless. Why would you need the loop otherwise? > > The lock-less aspect of this affects multiple accesses to e.g. > the same queue, I think. I don't think we are talking about the same thing. What I was referring to is the following code: static struct evtchn_fifo_queue *lock_old_queue(const struct domain *d, struct evtchn *evtchn, unsigned long *flags) { struct vcpu *v; struct evtchn_fifo_queue *q, *old_q; unsigned int try; for ( try = 0; try < 3; try++ ) { v = d->vcpu[evtchn->last_vcpu_id]; old_q = &v->evtchn_fifo->queue[evtchn->last_priority]; spin_lock_irqsave(&old_q->lock, *flags); v = d->vcpu[evtchn->last_vcpu_id]; q = &v->evtchn_fifo->queue[evtchn->last_priority]; if ( old_q == q ) return old_q; spin_unlock_irqrestore(&old_q->lock, *flags); } gprintk(XENLOG_WARNING, "dom%d port %d lost event (too many queue changes)\n", d->domain_id, evtchn->port); return NULL; } Given that evtchn->last_vcpu_id and evtchn->last_priority can only be modified in evtchn_fifo_set_pending(), this suggests that it is expected for the function to multiple called concurrently on the same event channel. > I'm unconvinced it was really considered > whether racing sending on the same channel is also safe this way. How would you explain the 3 try in lock_old_queue then? > >> Therefore, regardless the rest of the discussion, I think this patch >> would be useful to have for our peace of mind. > > That's a fair position to take. My counterargument is mainly > that readability (and hence maintainability) suffers with those > changes. We surely have different opinion... I don't particularly care about the approach as long as it is *properly* documented. Cheers,
On 20.10.2020 11:25, Julien Grall wrote: > Hi Jan, > > On 16/10/2020 13:09, Jan Beulich wrote: >> On 16.10.2020 11:36, Julien Grall wrote: >>> On 15/10/2020 13:07, Jan Beulich wrote: >>>> On 14.10.2020 13:40, Julien Grall wrote: >>>>> On 13/10/2020 15:26, Jan Beulich wrote: >>>>>> On 13.10.2020 16:20, Jürgen Groß wrote: >>>>>>> Especially Julien was rather worried by the current situation. In >>>>>>> case you can convince him the current handling is fine, we can >>>>>>> easily drop this patch. >>>>>> >>>>>> Julien, in the light of the above - can you clarify the specific >>>>>> concerns you (still) have? >>>>> >>>>> Let me start with that the assumption if evtchn->lock is not held when >>>>> evtchn_fifo_set_pending() is called. If it is held, then my comment is moot. >>>> >>>> But this isn't interesting - we know there are paths where it is >>>> held, and ones (interdomain sending) where it's the remote port's >>>> lock instead which is held. What's important here is that a >>>> _consistent_ lock be held (but it doesn't need to be evtchn's). >>> >>> Yes, a _consistent_ lock *should* be sufficient. But it is better to use >>> the same lock everywhere so it is easier to reason (see more below). >> >> But that's already not the case, due to the way interdomain channels >> have events sent. You did suggest acquiring both locks, but as >> indicated at the time I think this goes too far. As far as the doc >> aspect - we can improve the situation. Iirc it was you who made me >> add the respective comment ahead of struct evtchn_port_ops. >> >>>>> From my understanding, the goal of lock_old_queue() is to return the >>>>> old queue used. >>>>> >>>>> last_priority and last_vcpu_id may be updated separately and I could not >>>>> convince myself that it would not be possible to return a queue that is >>>>> neither the current one nor the old one. >>>>> >>>>> The following could happen if evtchn->priority and >>>>> evtchn->notify_vcpu_id keeps changing between calls. >>>>> >>>>> pCPU0 | pCPU1 >>>>> | >>>>> evtchn_fifo_set_pending(v0,...) | >>>>> | evtchn_fifo_set_pending(v1, ...) >>>>> [...] | >>>>> /* Queue has changed */ | >>>>> evtchn->last_vcpu_id = v0 | >>>>> | -> evtchn_old_queue() >>>>> | v = d->vcpu[evtchn->last_vcpu_id]; >>>>> | old_q = ... >>>>> | spin_lock(old_q->...) >>>>> | v = ... >>>>> | q = ... >>>>> | /* q and old_q would be the same */ >>>>> | >>>>> evtchn->las_priority = priority| >>>>> >>>>> If my diagram is correct, then pCPU1 would return a queue that is >>>>> neither the current nor old one. >>>> >>>> I think I agree. >>>> >>>>> In which case, I think it would at least be possible to corrupt the >>>>> queue. From evtchn_fifo_set_pending(): >>>>> >>>>> /* >>>>> * If this event was a tail, the old queue is now empty and >>>>> * its tail must be invalidated to prevent adding an event to >>>>> * the old queue from corrupting the new queue. >>>>> */ >>>>> if ( old_q->tail == port ) >>>>> old_q->tail = 0; >>>>> >>>>> Did I miss anything? >>>> >>>> I don't think you did. The important point though is that a consistent >>>> lock is being held whenever we come here, so two racing set_pending() >>>> aren't possible for one and the same evtchn. As a result I don't think >>>> the patch here is actually needed. >>> >>> I haven't yet read in full details the rest of the patches to say >>> whether this is necessary or not. However, at a first glance, I think >>> this is not a sane to rely on different lock to protect us. And don't >>> get me started on the lack of documentation... >>> >>> Furthermore, the implementation of old_lock_queue() suggests that the >>> code was planned to be lockless. Why would you need the loop otherwise? >> >> The lock-less aspect of this affects multiple accesses to e.g. >> the same queue, I think. > I don't think we are talking about the same thing. What I was referring > to is the following code: > > static struct evtchn_fifo_queue *lock_old_queue(const struct domain *d, > struct evtchn *evtchn, > unsigned long *flags) > { > struct vcpu *v; > struct evtchn_fifo_queue *q, *old_q; > unsigned int try; > > for ( try = 0; try < 3; try++ ) > { > v = d->vcpu[evtchn->last_vcpu_id]; > old_q = &v->evtchn_fifo->queue[evtchn->last_priority]; > > spin_lock_irqsave(&old_q->lock, *flags); > > v = d->vcpu[evtchn->last_vcpu_id]; > q = &v->evtchn_fifo->queue[evtchn->last_priority]; > > if ( old_q == q ) > return old_q; > > spin_unlock_irqrestore(&old_q->lock, *flags); > } > > gprintk(XENLOG_WARNING, > "dom%d port %d lost event (too many queue changes)\n", > d->domain_id, evtchn->port); > return NULL; > } > > Given that evtchn->last_vcpu_id and evtchn->last_priority can only be > modified in evtchn_fifo_set_pending(), this suggests that it is expected > for the function to multiple called concurrently on the same event channel. > >> I'm unconvinced it was really considered >> whether racing sending on the same channel is also safe this way. > > How would you explain the 3 try in lock_old_queue then? Queue changes (as said by the gprintk()) can't result from sending alone, but require re-binding to a different vCPU or altering the priority. I'm simply unconvinced that the code indeed fully reflects the original intentions. IOW I'm unsure whether we talk about the same thing ... Jan
On 20/10/2020 10:34, Jan Beulich wrote: > On 20.10.2020 11:25, Julien Grall wrote: >> Given that evtchn->last_vcpu_id and evtchn->last_priority can only be >> modified in evtchn_fifo_set_pending(), this suggests that it is expected >> for the function to multiple called concurrently on the same event channel. >> >>> I'm unconvinced it was really considered >>> whether racing sending on the same channel is also safe this way. >> >> How would you explain the 3 try in lock_old_queue then? > > Queue changes (as said by the gprintk()) can't result from sending > alone, but require re-binding to a different vCPU or altering the > priority. I agree with that. However, this doesn't change the fact that update to evtchn->last_priority and evtchn->last_vcpu can only happen when calling evtchn_fifo_set_pending(). If evtchn_fifo_set_pending() cannot be called concurrently for the same event, then there is *no* way for evtchn->last_{priority, vcpu} to be updated concurrently. > I'm simply unconvinced that the code indeed fully reflects > the original intentions. Do you mind (re-)sharing what was the original intentions? Cheers,
On 20.10.2020 12:01, Julien Grall wrote: > > > On 20/10/2020 10:34, Jan Beulich wrote: >> On 20.10.2020 11:25, Julien Grall wrote: >>> Given that evtchn->last_vcpu_id and evtchn->last_priority can only be >>> modified in evtchn_fifo_set_pending(), this suggests that it is expected >>> for the function to multiple called concurrently on the same event channel. >>> >>>> I'm unconvinced it was really considered >>>> whether racing sending on the same channel is also safe this way. >>> >>> How would you explain the 3 try in lock_old_queue then? >> >> Queue changes (as said by the gprintk()) can't result from sending >> alone, but require re-binding to a different vCPU or altering the >> priority. > > I agree with that. However, this doesn't change the fact that update to > evtchn->last_priority and evtchn->last_vcpu can only happen when calling > evtchn_fifo_set_pending(). > > If evtchn_fifo_set_pending() cannot be called concurrently for the same > event, then there is *no* way for evtchn->last_{priority, vcpu} to be > updated concurrently. > >> I'm simply unconvinced that the code indeed fully reflects >> the original intentions. > > Do you mind (re-)sharing what was the original intentions? If only I knew, I would. Jan
diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c index fc189152e1..fffbd409c8 100644 --- a/xen/common/event_fifo.c +++ b/xen/common/event_fifo.c @@ -42,6 +42,14 @@ struct evtchn_fifo_domain { unsigned int num_evtchns; }; +union evtchn_fifo_lastq { + u32 raw; + struct { + u8 last_priority; + u16 last_vcpu_id; + }; +}; + static inline event_word_t *evtchn_fifo_word_from_port(const struct domain *d, unsigned int port) { @@ -86,16 +94,18 @@ static struct evtchn_fifo_queue *lock_old_queue(const struct domain *d, struct vcpu *v; struct evtchn_fifo_queue *q, *old_q; unsigned int try; + union evtchn_fifo_lastq lastq; for ( try = 0; try < 3; try++ ) { - v = d->vcpu[evtchn->last_vcpu_id]; - old_q = &v->evtchn_fifo->queue[evtchn->last_priority]; + lastq.raw = read_atomic(&evtchn->fifo_lastq); + v = d->vcpu[lastq.last_vcpu_id]; + old_q = &v->evtchn_fifo->queue[lastq.last_priority]; spin_lock_irqsave(&old_q->lock, *flags); - v = d->vcpu[evtchn->last_vcpu_id]; - q = &v->evtchn_fifo->queue[evtchn->last_priority]; + v = d->vcpu[lastq.last_vcpu_id]; + q = &v->evtchn_fifo->queue[lastq.last_priority]; if ( old_q == q ) return old_q; @@ -246,8 +256,11 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) /* Moved to a different queue? */ if ( old_q != q ) { - evtchn->last_vcpu_id = v->vcpu_id; - evtchn->last_priority = q->priority; + union evtchn_fifo_lastq lastq; + + lastq.last_vcpu_id = v->vcpu_id; + lastq.last_priority = q->priority; + write_atomic(&evtchn->fifo_lastq, lastq.raw); spin_unlock_irqrestore(&old_q->lock, flags); spin_lock_irqsave(&q->lock, flags); diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index d8ed83f869..a298ff4df8 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -114,8 +114,7 @@ struct evtchn u16 virq; /* state == ECS_VIRQ */ } u; u8 priority; - u8 last_priority; - u16 last_vcpu_id; + u32 fifo_lastq; /* Data for fifo events identifying last queue. */ #ifdef CONFIG_XSM union { #ifdef XSM_NEED_GENERIC_EVTCHN_SSID
The queue for a fifo event is depending on the vcpu_id and the priority of the event. When sending an event it might happen the event needs to change queues and the old queue needs to be kept for keeping the links between queue elements intact. For this purpose the event channel contains last_priority and last_vcpu_id values elements for being able to identify the old queue. In order to avoid races always access last_priority and last_vcpu_id with a single atomic operation avoiding any inconsistencies. Signed-off-by: Juergen Gross <jgross@suse.com> --- xen/common/event_fifo.c | 25 +++++++++++++++++++------ xen/include/xen/sched.h | 3 +-- 2 files changed, 20 insertions(+), 8 deletions(-)