Message ID | 56FA779702000078000E0D0B@prv-mh.provo.novell.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
> -----Original Message----- > From: Jan Beulich [mailto:JBeulich@suse.com] > Sent: 29 March 2016 11:40 > To: xen-devel > Cc: Andrew Cooper; Paul Durrant; Chang Jianzhong; Keir (Xen.org) > Subject: [PATCH v2] x86/HVM: fix forwarding of internally cached requests > > Forwarding entire batches to the device model when an individual > iteration of them got rejected by internal device emulation handlers > with X86EMUL_UNHANDLEABLE is wrong: The device model would then > handle > all iterations, without the internal handler getting to see any past > the one it returned failure for. This causes misbehavior in at least > the MSI-X and VGA code, which want to see all such requests for > internal tracking/caching purposes. But note that this does not apply > to buffered I/O requests. > > This in turn means that the condition in hvm_process_io_intercept() of > when to crash the domain was wrong: Since X86EMUL_UNHANDLEABLE can > validly be returned by the individual device handlers, we mustn't > blindly crash the domain if such occurs on other than the initial > iteration. Instead we need to distinguish hvm_copy_*_guest_phys() > failures from device specific ones, and then the former need to always > be fatal to the domain (i.e. also on the first iteration), since > otherwise we again would end up forwarding a request to qemu which the > internal handler didn't get to see. > > The adjustment should be okay even for stdvga's MMIO handling: > - if it is not caching then the accept function would have failed so we > won't get into hvm_process_io_intercept(), > - if it issued the buffered ioreq then we only get to the p->count > reduction if hvm_send_ioreq() actually encountered an error (in which > we don't care about the request getting split up). > > Also commit 4faffc41d ("x86/hvm: limit reps to avoid the need to handle > retry") went too far in removing code from hvm_process_io_intercept(): > When there were successfully handled iterations, the function should > continue to return success with a clipped repeat count. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Paul Durrant <paul.durrant@citrix.com> > Cc: Chang Jianzhong <changjzh@gmail.com> > --- > v2: Make hvm_process_io_intercept() always return a valid X86EMUL_* > code by moving the domain_crash() invocations up. Add a stdvga > related paragraph to the commit message. > --- > I assume this also addresses the issue which > http://lists.xenproject.org/archives/html/xen-devel/2016-01/msg03189.html > attempted to deal with in a not really acceptable way. > > --- a/xen/arch/x86/hvm/emulate.c > +++ b/xen/arch/x86/hvm/emulate.c > @@ -95,7 +95,7 @@ static const struct hvm_io_handler null_ > }; > > static int hvmemul_do_io( > - bool_t is_mmio, paddr_t addr, unsigned long reps, unsigned int size, > + bool_t is_mmio, paddr_t addr, unsigned long *reps, unsigned int size, > uint8_t dir, bool_t df, bool_t data_is_addr, uintptr_t data) > { > struct vcpu *curr = current; > @@ -104,7 +104,7 @@ static int hvmemul_do_io( > .type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO, > .addr = addr, > .size = size, > - .count = reps, > + .count = *reps, > .dir = dir, > .df = df, > .data = data, > @@ -136,7 +136,7 @@ static int hvmemul_do_io( > if ( (p.type != is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO) || > (p.addr != addr) || > (p.size != size) || > - (p.count != reps) || > + (p.count != *reps) || > (p.dir != dir) || > (p.df != df) || > (p.data_is_ptr != data_is_addr) ) > @@ -214,7 +214,7 @@ static int hvmemul_do_io_buffer( > > BUG_ON(buffer == NULL); > > - rc = hvmemul_do_io(is_mmio, addr, *reps, size, dir, df, 0, > + rc = hvmemul_do_io(is_mmio, addr, reps, size, dir, df, 0, > (uintptr_t)buffer); > if ( rc == X86EMUL_UNHANDLEABLE && dir == IOREQ_READ ) > memset(buffer, 0xff, size); > @@ -305,13 +305,13 @@ static int hvmemul_do_io_addr( > count = 1; > } > > - rc = hvmemul_do_io(is_mmio, addr, count, size, dir, df, 1, > + rc = hvmemul_do_io(is_mmio, addr, &count, size, dir, df, 1, > ram_gpa); > + > if ( rc == X86EMUL_OKAY ) > - { > v->arch.hvm_vcpu.hvm_io.mmio_retry = (count < *reps); > - *reps = count; > - } > + > + *reps = count; > > out: > while ( nr_pages ) > --- a/xen/arch/x86/hvm/intercept.c > +++ b/xen/arch/x86/hvm/intercept.c > @@ -148,8 +148,8 @@ int hvm_process_io_intercept(const struc > ASSERT_UNREACHABLE(); > /* fall through */ > default: > - rc = X86EMUL_UNHANDLEABLE; > - break; > + domain_crash(current->domain); > + return X86EMUL_UNHANDLEABLE; > } > if ( rc != X86EMUL_OKAY ) > break; > @@ -178,8 +178,8 @@ int hvm_process_io_intercept(const struc > ASSERT_UNREACHABLE(); > /* fall through */ > default: > - rc = X86EMUL_UNHANDLEABLE; > - break; > + domain_crash(current->domain); > + return X86EMUL_UNHANDLEABLE; > } > if ( rc != X86EMUL_OKAY ) > break; > @@ -196,8 +196,20 @@ int hvm_process_io_intercept(const struc > } > } > > - if ( i != 0 && rc == X86EMUL_UNHANDLEABLE ) > - domain_crash(current->domain); > + if ( i ) > + { > + p->count = i; > + rc = X86EMUL_OKAY; > + } > + else if ( rc == X86EMUL_UNHANDLEABLE ) > + { > + /* > + * Don't forward entire batches to the device model: This would > + * prevent the internal handlers to see subsequent iterations of > + * the request. > + */ > + p->count = 1; > + } > > return rc; > } >
2016-03-29 18:39 GMT+08:00 Jan Beulich <JBeulich@suse.com>: > Forwarding entire batches to the device model when an individual > iteration of them got rejected by internal device emulation handlers > with X86EMUL_UNHANDLEABLE is wrong: The device model would then handle > all iterations, without the internal handler getting to see any past > the one it returned failure for. This causes misbehavior in at least > the MSI-X and VGA code, which want to see all such requests for > internal tracking/caching purposes. But note that this does not apply > to buffered I/O requests. > > This in turn means that the condition in hvm_process_io_intercept() of > when to crash the domain was wrong: Since X86EMUL_UNHANDLEABLE can > validly be returned by the individual device handlers, we mustn't > blindly crash the domain if such occurs on other than the initial > iteration. Instead we need to distinguish hvm_copy_*_guest_phys() > failures from device specific ones, and then the former need to always > be fatal to the domain (i.e. also on the first iteration), since > otherwise we again would end up forwarding a request to qemu which the > internal handler didn't get to see. > > The adjustment should be okay even for stdvga's MMIO handling: > - if it is not caching then the accept function would have failed so we > won't get into hvm_process_io_intercept(), > - if it issued the buffered ioreq then we only get to the p->count > reduction if hvm_send_ioreq() actually encountered an error (in which > we don't care about the request getting split up). > > Also commit 4faffc41d ("x86/hvm: limit reps to avoid the need to handle > retry") went too far in removing code from hvm_process_io_intercept(): > When there were successfully handled iterations, the function should > continue to return success with a clipped repeat count. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > Cc: Chang Jianzhong <changjzh@gmail.com> > --- > v2: Make hvm_process_io_intercept() always return a valid X86EMUL_* > code by moving the domain_crash() invocations up. Add a stdvga > related paragraph to the commit message. > --- > I assume this also addresses the issue which > http://lists.xenproject.org/archives/html/xen-devel/2016-01/msg03189.html > attempted to deal with in a not really acceptable way. > > I hope this issue is resolved, but it still exists. > --- a/xen/arch/x86/hvm/emulate.c > +++ b/xen/arch/x86/hvm/emulate.c > @@ -95,7 +95,7 @@ static const struct hvm_io_handler null_ > }; > > static int hvmemul_do_io( > - bool_t is_mmio, paddr_t addr, unsigned long reps, unsigned int size, > + bool_t is_mmio, paddr_t addr, unsigned long *reps, unsigned int size, > uint8_t dir, bool_t df, bool_t data_is_addr, uintptr_t data) > { > struct vcpu *curr = current; > @@ -104,7 +104,7 @@ static int hvmemul_do_io( > .type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO, > .addr = addr, > .size = size, > - .count = reps, > + .count = *reps, > .dir = dir, > .df = df, > .data = data, > @@ -136,7 +136,7 @@ static int hvmemul_do_io( > if ( (p.type != is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO) || > (p.addr != addr) || > (p.size != size) || > - (p.count != reps) || > + (p.count != *reps) || > (p.dir != dir) || > (p.df != df) || > (p.data_is_ptr != data_is_addr) ) > @@ -214,7 +214,7 @@ static int hvmemul_do_io_buffer( > > BUG_ON(buffer == NULL); > > - rc = hvmemul_do_io(is_mmio, addr, *reps, size, dir, df, 0, > + rc = hvmemul_do_io(is_mmio, addr, reps, size, dir, df, 0, > (uintptr_t)buffer); > if ( rc == X86EMUL_UNHANDLEABLE && dir == IOREQ_READ ) > memset(buffer, 0xff, size); > @@ -305,13 +305,13 @@ static int hvmemul_do_io_addr( > count = 1; > } > > - rc = hvmemul_do_io(is_mmio, addr, count, size, dir, df, 1, > + rc = hvmemul_do_io(is_mmio, addr, &count, size, dir, df, 1, > ram_gpa); > + > if ( rc == X86EMUL_OKAY ) > - { > v->arch.hvm_vcpu.hvm_io.mmio_retry = (count < *reps); > - *reps = count; > - } > + > + *reps = count; > > out: > while ( nr_pages ) > --- a/xen/arch/x86/hvm/intercept.c > +++ b/xen/arch/x86/hvm/intercept.c > @@ -148,8 +148,8 @@ int hvm_process_io_intercept(const struc > ASSERT_UNREACHABLE(); > /* fall through */ > default: > - rc = X86EMUL_UNHANDLEABLE; > - break; > + domain_crash(current->domain); > + return X86EMUL_UNHANDLEABLE; > } > if ( rc != X86EMUL_OKAY ) > break; > @@ -178,8 +178,8 @@ int hvm_process_io_intercept(const struc > ASSERT_UNREACHABLE(); > /* fall through */ > default: > - rc = X86EMUL_UNHANDLEABLE; > - break; > + domain_crash(current->domain); > + return X86EMUL_UNHANDLEABLE; > } > if ( rc != X86EMUL_OKAY ) > break; > @@ -196,8 +196,20 @@ int hvm_process_io_intercept(const struc > } > } > > - if ( i != 0 && rc == X86EMUL_UNHANDLEABLE ) > - domain_crash(current->domain); > + if ( i ) > + { > + p->count = i; > + rc = X86EMUL_OKAY; > + } > + else if ( rc == X86EMUL_UNHANDLEABLE ) > + { > + /* > + * Don't forward entire batches to the device model: This would > + * prevent the internal handlers to see subsequent iterations of > + * the request. > + */ > + p->count = 1; > + } > > return rc; > } > > >
>>> On 30.03.16 at 09:28, <changjzh@gmail.com> wrote: > 2016-03-29 18:39 GMT+08:00 Jan Beulich <JBeulich@suse.com>: >> Forwarding entire batches to the device model when an individual >> iteration of them got rejected by internal device emulation handlers >> with X86EMUL_UNHANDLEABLE is wrong: The device model would then handle >> all iterations, without the internal handler getting to see any past >> the one it returned failure for. This causes misbehavior in at least >> the MSI-X and VGA code, which want to see all such requests for >> internal tracking/caching purposes. But note that this does not apply >> to buffered I/O requests. >> >> This in turn means that the condition in hvm_process_io_intercept() of >> when to crash the domain was wrong: Since X86EMUL_UNHANDLEABLE can >> validly be returned by the individual device handlers, we mustn't >> blindly crash the domain if such occurs on other than the initial >> iteration. Instead we need to distinguish hvm_copy_*_guest_phys() >> failures from device specific ones, and then the former need to always >> be fatal to the domain (i.e. also on the first iteration), since >> otherwise we again would end up forwarding a request to qemu which the >> internal handler didn't get to see. >> >> The adjustment should be okay even for stdvga's MMIO handling: >> - if it is not caching then the accept function would have failed so we >> won't get into hvm_process_io_intercept(), >> - if it issued the buffered ioreq then we only get to the p->count >> reduction if hvm_send_ioreq() actually encountered an error (in which >> we don't care about the request getting split up). >> >> Also commit 4faffc41d ("x86/hvm: limit reps to avoid the need to handle >> retry") went too far in removing code from hvm_process_io_intercept(): >> When there were successfully handled iterations, the function should >> continue to return success with a clipped repeat count. >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> Cc: Chang Jianzhong <changjzh@gmail.com> >> --- >> v2: Make hvm_process_io_intercept() always return a valid X86EMUL_* >> code by moving the domain_crash() invocations up. Add a stdvga >> related paragraph to the commit message. >> --- >> I assume this also addresses the issue which >> http://lists.xenproject.org/archives/html/xen-devel/2016-01/msg03189.html >> attempted to deal with in a not really acceptable way. >> >> I hope this issue is resolved, but it still exists. Coming back to this: Have you made any progress with the analysis? Jan
(re-adding xen-devel) >>> On 21.04.16 at 09:48, <changjzh@gmail.com> wrote: > 2016-04-21 14:24 GMT+08:00 Jan Beulich <JBeulich@suse.com>: > >> >>> On 30.03.16 at 09:28, <changjzh@gmail.com> wrote: >> > 2016-03-29 18:39 GMT+08:00 Jan Beulich <JBeulich@suse.com>: >> >> Forwarding entire batches to the device model when an individual >> >> iteration of them got rejected by internal device emulation handlers >> >> with X86EMUL_UNHANDLEABLE is wrong: The device model would then handle >> >> all iterations, without the internal handler getting to see any past >> >> the one it returned failure for. This causes misbehavior in at least >> >> the MSI-X and VGA code, which want to see all such requests for >> >> internal tracking/caching purposes. But note that this does not apply >> >> to buffered I/O requests. >> >> >> >> This in turn means that the condition in hvm_process_io_intercept() of >> >> when to crash the domain was wrong: Since X86EMUL_UNHANDLEABLE can >> >> validly be returned by the individual device handlers, we mustn't >> >> blindly crash the domain if such occurs on other than the initial >> >> iteration. Instead we need to distinguish hvm_copy_*_guest_phys() >> >> failures from device specific ones, and then the former need to always >> >> be fatal to the domain (i.e. also on the first iteration), since >> >> otherwise we again would end up forwarding a request to qemu which the >> >> internal handler didn't get to see. >> >> >> >> The adjustment should be okay even for stdvga's MMIO handling: >> >> - if it is not caching then the accept function would have failed so we >> >> won't get into hvm_process_io_intercept(), >> >> - if it issued the buffered ioreq then we only get to the p->count >> >> reduction if hvm_send_ioreq() actually encountered an error (in which >> >> we don't care about the request getting split up). >> >> >> >> Also commit 4faffc41d ("x86/hvm: limit reps to avoid the need to handle >> >> retry") went too far in removing code from hvm_process_io_intercept(): >> >> When there were successfully handled iterations, the function should >> >> continue to return success with a clipped repeat count. >> >> >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> >> Cc: Chang Jianzhong <changjzh@gmail.com> >> >> --- >> >> v2: Make hvm_process_io_intercept() always return a valid X86EMUL_* >> >> code by moving the domain_crash() invocations up. Add a stdvga >> >> related paragraph to the commit message. >> >> --- >> >> I assume this also addresses the issue which >> >> >> http://lists.xenproject.org/archives/html/xen-devel/2016-01/msg03189.html >> >> attempted to deal with in a not really acceptable way. >> >> >> >> I hope this issue is resolved, but it still exists. >> >> Coming back to this: Have you made any progress with the analysis? >> > Win-guest xen log message? > hvmemul_do_io() { > ... > rc = hvm_io_intercept(&p); > // rc == 1 is printed in log > ... > } > > int hvm_io_intercept(ioreq_t *p){ > ... > handler = hvm_find_io_handler(p);// handler == NULL is printed in log > > if ( handler == NULL ){ > return X86EMUL_UNHANDLEABLE; > } > > rc = hvm_process_io_intercept(handler, p);// This function is not > called. > ... > } > > hvm_find_io_handler(ioreq_t *p) > -> hvm_mmio_accept(...) > -> msixtbl_range(...) > msixtbl_entry can not be found. > > Windows guest try to write msixtbl before it is registed > (msixtbl_pt_register()). > Win-guest do not write msixtbl after masixtble is registed. > This situation may be the root cause. Ah, but that's again an issue a patch was posted for already http://lists.xenproject.org/archives/html/xen-devel/2016-04/msg00041.html (albeit, as said there, not taking care of all possible cases yet). And it's a pre-existing issue, not one introduced by any half way recent change. Jan
--- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -95,7 +95,7 @@ static const struct hvm_io_handler null_ }; static int hvmemul_do_io( - bool_t is_mmio, paddr_t addr, unsigned long reps, unsigned int size, + bool_t is_mmio, paddr_t addr, unsigned long *reps, unsigned int size, uint8_t dir, bool_t df, bool_t data_is_addr, uintptr_t data) { struct vcpu *curr = current; @@ -104,7 +104,7 @@ static int hvmemul_do_io( .type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO, .addr = addr, .size = size, - .count = reps, + .count = *reps, .dir = dir, .df = df, .data = data, @@ -136,7 +136,7 @@ static int hvmemul_do_io( if ( (p.type != is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO) || (p.addr != addr) || (p.size != size) || - (p.count != reps) || + (p.count != *reps) || (p.dir != dir) || (p.df != df) || (p.data_is_ptr != data_is_addr) ) @@ -214,7 +214,7 @@ static int hvmemul_do_io_buffer( BUG_ON(buffer == NULL); - rc = hvmemul_do_io(is_mmio, addr, *reps, size, dir, df, 0, + rc = hvmemul_do_io(is_mmio, addr, reps, size, dir, df, 0, (uintptr_t)buffer); if ( rc == X86EMUL_UNHANDLEABLE && dir == IOREQ_READ ) memset(buffer, 0xff, size); @@ -305,13 +305,13 @@ static int hvmemul_do_io_addr( count = 1; } - rc = hvmemul_do_io(is_mmio, addr, count, size, dir, df, 1, + rc = hvmemul_do_io(is_mmio, addr, &count, size, dir, df, 1, ram_gpa); + if ( rc == X86EMUL_OKAY ) - { v->arch.hvm_vcpu.hvm_io.mmio_retry = (count < *reps); - *reps = count; - } + + *reps = count; out: while ( nr_pages ) --- a/xen/arch/x86/hvm/intercept.c +++ b/xen/arch/x86/hvm/intercept.c @@ -148,8 +148,8 @@ int hvm_process_io_intercept(const struc ASSERT_UNREACHABLE(); /* fall through */ default: - rc = X86EMUL_UNHANDLEABLE; - break; + domain_crash(current->domain); + return X86EMUL_UNHANDLEABLE; } if ( rc != X86EMUL_OKAY ) break; @@ -178,8 +178,8 @@ int hvm_process_io_intercept(const struc ASSERT_UNREACHABLE(); /* fall through */ default: - rc = X86EMUL_UNHANDLEABLE; - break; + domain_crash(current->domain); + return X86EMUL_UNHANDLEABLE; } if ( rc != X86EMUL_OKAY ) break; @@ -196,8 +196,20 @@ int hvm_process_io_intercept(const struc } } - if ( i != 0 && rc == X86EMUL_UNHANDLEABLE ) - domain_crash(current->domain); + if ( i ) + { + p->count = i; + rc = X86EMUL_OKAY; + } + else if ( rc == X86EMUL_UNHANDLEABLE ) + { + /* + * Don't forward entire batches to the device model: This would + * prevent the internal handlers to see subsequent iterations of + * the request. + */ + p->count = 1; + } return rc; }