Message ID | 1468314129-28465-4-git-send-email-yu.c.zhang@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
>>> On 12.07.16 at 11:02, <yu.c.zhang@linux.intel.com> wrote: > @@ -178,8 +179,34 @@ static int hvmemul_do_io( > break; > case X86EMUL_UNHANDLEABLE: > { > - struct hvm_ioreq_server *s = > - hvm_select_ioreq_server(curr->domain, &p); > + struct hvm_ioreq_server *s; > + > + if ( is_mmio ) > + { > + unsigned long gmfn = paddr_to_pfn(addr); > + p2m_type_t p2mt; > + > + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); > + > + if ( p2mt == p2m_ioreq_server ) > + { > + unsigned int flags; > + > + if ( dir != IOREQ_WRITE ) > + s = NULL; > + else > + { > + s = p2m_get_ioreq_server(currd, &flags); > + > + if ( !(flags & P2M_IOREQ_HANDLE_WRITE_ACCESS) ) > + s = NULL; > + } > + } > + else > + s = hvm_select_ioreq_server(currd, &p); > + } > + else > + s = hvm_select_ioreq_server(currd, &p); Wouldn't it both be more natural and make the logic even easier to follow if s got set to NULL up front, all the "else"-s dropped, and a simple if ( !s ) s = hvm_select_ioreq_server(currd, &p); be done in the end? > @@ -5447,6 +5452,21 @@ static int hvmop_set_mem_type( > if ( !is_hvm_domain(d) ) > goto out; > > + if ( a.hvmmem_type == HVMMEM_ioreq_server ) > + { > + unsigned int flags; > + struct hvm_ioreq_server *s; > + > + /* HVMMEM_ioreq_server is only supported for HAP enabled hvm. */ > + if ( !hap_enabled(d) ) > + goto out; > + > + /* Do not change to HVMMEM_ioreq_server if no ioreq server mapped. */ > + s = p2m_get_ioreq_server(d, &flags); > + if ( s == NULL ) > + goto out; Either drop s as an intermediate variable altogether (preferred), or constify it properly. > +int hvm_map_mem_type_to_ioreq_server(struct domain *d, ioservid_t id, > + uint32_t type, uint32_t flags) > +{ > + struct hvm_ioreq_server *s; > + int rc; > + > + /* For now, only HVMMEM_ioreq_server is supported. */ > + if ( type != HVMMEM_ioreq_server ) > + return -EINVAL; > + > + /* For now, only write emulation is supported. */ > + if ( flags & ~(XEN_HVMOP_IOREQ_MEM_ACCESS_WRITE) ) > + return -EINVAL; > + > + spin_lock(&d->arch.hvm_domain.ioreq_server.lock); This lock did get converted to a recursive one a little while back. > + rc = -ENOENT; > + list_for_each_entry ( s, > + &d->arch.hvm_domain.ioreq_server.list, > + list_entry ) > + { > + if ( s == d->arch.hvm_domain.default_ioreq_server ) > + continue; > + > + if ( s->id == id ) > + { > + rc = p2m_set_ioreq_server(d, flags, s); > + if ( rc == 0 ) > + dprintk(XENLOG_DEBUG, "%u %s type HVMMEM_ioreq_server.\n", > + s->id, (flags != 0) ? "mapped to" : "unmapped from"); Is this really useful? > --- a/xen/arch/x86/mm/p2m-ept.c > +++ b/xen/arch/x86/mm/p2m-ept.c > @@ -132,6 +132,13 @@ static void ept_p2m_type_to_flags(struct p2m_domain *p2m, ept_entry_t *entry, > entry->r = entry->w = entry->x = 1; > entry->a = entry->d = !!cpu_has_vmx_ept_ad; > break; > + case p2m_ioreq_server: > + entry->r = 1; > + entry->w = !(p2m->ioreq.flags & P2M_IOREQ_HANDLE_WRITE_ACCESS); > + entry->x = 0; > + entry->a = !!cpu_has_vmx_ept_ad; > + entry->d = entry->w && cpu_has_vmx_ept_ad; For self-consistency, could this become entry->d = entry->w && entry->a; ? > --- a/xen/arch/x86/mm/shadow/multi.c > +++ b/xen/arch/x86/mm/shadow/multi.c > @@ -3225,8 +3225,7 @@ static int sh_page_fault(struct vcpu *v, > } > > /* Need to hand off device-model MMIO to the device model */ > - if ( p2mt == p2m_mmio_dm > - || (p2mt == p2m_ioreq_server && ft == ft_demand_write) ) > + if ( p2mt == p2m_mmio_dm ) Could you remind me again what the code being removed here gets replaced by, or why it doesn't need any replacement? > @@ -336,6 +336,23 @@ struct p2m_domain { > struct ept_data ept; > /* NPT-equivalent structure could be added here. */ > }; > + > + struct { > + spinlock_t lock; > + /* > + * ioreq server who's responsible for the emulation of > + * gfns with specific p2m type(for now, p2m_ioreq_server). > + */ > + struct hvm_ioreq_server *server; > + /* > + * flags specifies whether read, write or both operations > + * are to be emulated by an ioreq server. > + */ > + unsigned int flags; > + > +#define P2M_IOREQ_HANDLE_WRITE_ACCESS XEN_HVMOP_IOREQ_MEM_ACCESS_WRITE > +#define P2M_IOREQ_HANDLE_READ_ACCESS XEN_HVMOP_IOREQ_MEM_ACCESS_READ I think I did say so on a previous iteration already: I can't see the value of these two defines, or in fact I can see these being actively dangerous: The rest of your patch assume that each pair shares their values (as there's no translation between them, but also no BUILD_BUG_ON() ensuring they're identical). > --- a/xen/include/public/hvm/hvm_op.h > +++ b/xen/include/public/hvm/hvm_op.h > @@ -89,7 +89,9 @@ typedef enum { > HVMMEM_unused, /* Placeholder; setting memory to this type > will fail for code after 4.7.0 */ > #endif > - HVMMEM_ioreq_server > + HVMMEM_ioreq_server /* Memory type claimed by an ioreq server; type > + changes to this value are only allowed after > + an ioreq server has claimed its ownership. */ Wouldn't it be worth also noting in the comment that only changes to/from rw are permitted? Jan
On 8/8/2016 11:40 PM, Jan Beulich wrote: >>>> On 12.07.16 at 11:02, <yu.c.zhang@linux.intel.com> wrote: >> @@ -178,8 +179,34 @@ static int hvmemul_do_io( >> break; >> case X86EMUL_UNHANDLEABLE: >> { >> - struct hvm_ioreq_server *s = >> - hvm_select_ioreq_server(curr->domain, &p); >> + struct hvm_ioreq_server *s; >> + >> + if ( is_mmio ) >> + { >> + unsigned long gmfn = paddr_to_pfn(addr); >> + p2m_type_t p2mt; >> + >> + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); >> + >> + if ( p2mt == p2m_ioreq_server ) >> + { >> + unsigned int flags; >> + >> + if ( dir != IOREQ_WRITE ) >> + s = NULL; >> + else >> + { >> + s = p2m_get_ioreq_server(currd, &flags); >> + >> + if ( !(flags & P2M_IOREQ_HANDLE_WRITE_ACCESS) ) >> + s = NULL; >> + } >> + } >> + else >> + s = hvm_select_ioreq_server(currd, &p); >> + } >> + else >> + s = hvm_select_ioreq_server(currd, &p); > Wouldn't it both be more natural and make the logic even easier > to follow if s got set to NULL up front, all the "else"-s dropped, > and a simple > > if ( !s ) > s = hvm_select_ioreq_server(currd, &p); > > be done in the end? Thanks, Jan. I'll have a try. >> @@ -5447,6 +5452,21 @@ static int hvmop_set_mem_type( >> if ( !is_hvm_domain(d) ) >> goto out; >> >> + if ( a.hvmmem_type == HVMMEM_ioreq_server ) >> + { >> + unsigned int flags; >> + struct hvm_ioreq_server *s; >> + >> + /* HVMMEM_ioreq_server is only supported for HAP enabled hvm. */ >> + if ( !hap_enabled(d) ) >> + goto out; >> + >> + /* Do not change to HVMMEM_ioreq_server if no ioreq server mapped. */ >> + s = p2m_get_ioreq_server(d, &flags); >> + if ( s == NULL ) >> + goto out; > Either drop s as an intermediate variable altogether (preferred), or > constify it properly. Got it. >> +int hvm_map_mem_type_to_ioreq_server(struct domain *d, ioservid_t id, >> + uint32_t type, uint32_t flags) >> +{ >> + struct hvm_ioreq_server *s; >> + int rc; >> + >> + /* For now, only HVMMEM_ioreq_server is supported. */ >> + if ( type != HVMMEM_ioreq_server ) >> + return -EINVAL; >> + >> + /* For now, only write emulation is supported. */ >> + if ( flags & ~(XEN_HVMOP_IOREQ_MEM_ACCESS_WRITE) ) >> + return -EINVAL; >> + >> + spin_lock(&d->arch.hvm_domain.ioreq_server.lock); > This lock did get converted to a recursive one a little while back. Oh. I see, should use the spin_lock_recursive/spin_unlock_recursive pair. Thanks. >> + rc = -ENOENT; >> + list_for_each_entry ( s, >> + &d->arch.hvm_domain.ioreq_server.list, >> + list_entry ) >> + { >> + if ( s == d->arch.hvm_domain.default_ioreq_server ) >> + continue; >> + >> + if ( s->id == id ) >> + { >> + rc = p2m_set_ioreq_server(d, flags, s); >> + if ( rc == 0 ) >> + dprintk(XENLOG_DEBUG, "%u %s type HVMMEM_ioreq_server.\n", >> + s->id, (flags != 0) ? "mapped to" : "unmapped from"); > Is this really useful? Sorry, are you referring to this debug message? I believe it helps, especially when multiple ioreq servers are claiming/disclaiming their ownership of the HVMMEM_ioreq_server. :) >> --- a/xen/arch/x86/mm/p2m-ept.c >> +++ b/xen/arch/x86/mm/p2m-ept.c >> @@ -132,6 +132,13 @@ static void ept_p2m_type_to_flags(struct p2m_domain *p2m, ept_entry_t *entry, >> entry->r = entry->w = entry->x = 1; >> entry->a = entry->d = !!cpu_has_vmx_ept_ad; >> break; >> + case p2m_ioreq_server: >> + entry->r = 1; >> + entry->w = !(p2m->ioreq.flags & P2M_IOREQ_HANDLE_WRITE_ACCESS); >> + entry->x = 0; >> + entry->a = !!cpu_has_vmx_ept_ad; >> + entry->d = entry->w && cpu_has_vmx_ept_ad; > For self-consistency, could this become > > entry->d = entry->w && entry->a; > > ? Yep. Thanks. :) >> --- a/xen/arch/x86/mm/shadow/multi.c >> +++ b/xen/arch/x86/mm/shadow/multi.c >> @@ -3225,8 +3225,7 @@ static int sh_page_fault(struct vcpu *v, >> } >> >> /* Need to hand off device-model MMIO to the device model */ >> - if ( p2mt == p2m_mmio_dm >> - || (p2mt == p2m_ioreq_server && ft == ft_demand_write) ) >> + if ( p2mt == p2m_mmio_dm ) > Could you remind me again what the code being removed here gets > replaced by, or why it doesn't need any replacement? HVMMEM_ioreq_server is now only supported for HAP enabled hvm, so shadow code does not need to handle this p2m type. >> @@ -336,6 +336,23 @@ struct p2m_domain { >> struct ept_data ept; >> /* NPT-equivalent structure could be added here. */ >> }; >> + >> + struct { >> + spinlock_t lock; >> + /* >> + * ioreq server who's responsible for the emulation of >> + * gfns with specific p2m type(for now, p2m_ioreq_server). >> + */ >> + struct hvm_ioreq_server *server; >> + /* >> + * flags specifies whether read, write or both operations >> + * are to be emulated by an ioreq server. >> + */ >> + unsigned int flags; >> + >> +#define P2M_IOREQ_HANDLE_WRITE_ACCESS XEN_HVMOP_IOREQ_MEM_ACCESS_WRITE >> +#define P2M_IOREQ_HANDLE_READ_ACCESS XEN_HVMOP_IOREQ_MEM_ACCESS_READ > I think I did say so on a previous iteration already: I can't see the > value of these two defines, or in fact I can see these being actively > dangerous: The rest of your patch assume that each pair shares > their values (as there's no translation between them, but also no > BUILD_BUG_ON() ensuring they're identical). Oh. I thought it was a coding style issue we were discussing - previously, there was a #define using the <underscore><uppercase> pattern, which should be deprecated. For the P2M_IOREQ_HANDLE_WRITE/READ_ACCESS definition, I agree they can be removed. > >> --- a/xen/include/public/hvm/hvm_op.h >> +++ b/xen/include/public/hvm/hvm_op.h >> @@ -89,7 +89,9 @@ typedef enum { >> HVMMEM_unused, /* Placeholder; setting memory to this type >> will fail for code after 4.7.0 */ >> #endif >> - HVMMEM_ioreq_server >> + HVMMEM_ioreq_server /* Memory type claimed by an ioreq server; type >> + changes to this value are only allowed after >> + an ioreq server has claimed its ownership. */ > Wouldn't it be worth also noting in the comment that only changes > to/from rw are permitted? > OK. Will add this comment in next version. Yu
>>> On 09.08.16 at 09:39, <yu.c.zhang@linux.intel.com> wrote: > On 8/8/2016 11:40 PM, Jan Beulich wrote: >>>>> On 12.07.16 at 11:02, <yu.c.zhang@linux.intel.com> wrote: >>> + rc = -ENOENT; >>> + list_for_each_entry ( s, >>> + &d->arch.hvm_domain.ioreq_server.list, >>> + list_entry ) >>> + { >>> + if ( s == d->arch.hvm_domain.default_ioreq_server ) >>> + continue; >>> + >>> + if ( s->id == id ) >>> + { >>> + rc = p2m_set_ioreq_server(d, flags, s); >>> + if ( rc == 0 ) >>> + dprintk(XENLOG_DEBUG, "%u %s type HVMMEM_ioreq_server.\n", >>> + s->id, (flags != 0) ? "mapped to" : "unmapped from"); >> Is this really useful? > > Sorry, are you referring to this debug message? > I believe it helps, especially when multiple ioreq servers are > claiming/disclaiming their ownership of > the HVMMEM_ioreq_server. :) Well, that's a configuration bug anyway right now, so I'm not really with you. But in the end it'll be Paul to judge ... Jan
> -----Original Message----- > From: Jan Beulich [mailto:JBeulich@suse.com] > Sent: 09 August 2016 09:11 > To: Paul Durrant; Yu Zhang > Cc: Andrew Cooper; George Dunlap; Jun Nakajima; Kevin Tian; > zhiyuan.lv@intel.com; xen-devel@lists.xen.org; Tim (Xen.org) > Subject: Re: [Xen-devel] [PATCH v5 3/4] x86/ioreq server: Add HVMOP to > map guest ram with p2m_ioreq_server to an ioreq server. > > >>> On 09.08.16 at 09:39, <yu.c.zhang@linux.intel.com> wrote: > > On 8/8/2016 11:40 PM, Jan Beulich wrote: > >>>>> On 12.07.16 at 11:02, <yu.c.zhang@linux.intel.com> wrote: > >>> + rc = -ENOENT; > >>> + list_for_each_entry ( s, > >>> + &d->arch.hvm_domain.ioreq_server.list, > >>> + list_entry ) > >>> + { > >>> + if ( s == d->arch.hvm_domain.default_ioreq_server ) > >>> + continue; > >>> + > >>> + if ( s->id == id ) > >>> + { > >>> + rc = p2m_set_ioreq_server(d, flags, s); > >>> + if ( rc == 0 ) > >>> + dprintk(XENLOG_DEBUG, "%u %s type > HVMMEM_ioreq_server.\n", > >>> + s->id, (flags != 0) ? "mapped to" : "unmapped from"); > >> Is this really useful? > > > > Sorry, are you referring to this debug message? > > I believe it helps, especially when multiple ioreq servers are > > claiming/disclaiming their ownership of > > the HVMMEM_ioreq_server. :) > > Well, that's a configuration bug anyway right now, so I'm not really > with you. But in the end it'll be Paul to judge ... > Indeed, I don't the message has a massive amount of value. More useful would be to add a call into keyhandler.c:dump_domains() to display the current claim. Paul > Jan
On 8/9/2016 4:20 PM, Paul Durrant wrote: >> -----Original Message----- >> From: Jan Beulich [mailto:JBeulich@suse.com] >> Sent: 09 August 2016 09:11 >> To: Paul Durrant; Yu Zhang >> Cc: Andrew Cooper; George Dunlap; Jun Nakajima; Kevin Tian; >> zhiyuan.lv@intel.com; xen-devel@lists.xen.org; Tim (Xen.org) >> Subject: Re: [Xen-devel] [PATCH v5 3/4] x86/ioreq server: Add HVMOP to >> map guest ram with p2m_ioreq_server to an ioreq server. >> >>>>> On 09.08.16 at 09:39, <yu.c.zhang@linux.intel.com> wrote: >>> On 8/8/2016 11:40 PM, Jan Beulich wrote: >>>>>>> On 12.07.16 at 11:02, <yu.c.zhang@linux.intel.com> wrote: >>>>> + rc = -ENOENT; >>>>> + list_for_each_entry ( s, >>>>> + &d->arch.hvm_domain.ioreq_server.list, >>>>> + list_entry ) >>>>> + { >>>>> + if ( s == d->arch.hvm_domain.default_ioreq_server ) >>>>> + continue; >>>>> + >>>>> + if ( s->id == id ) >>>>> + { >>>>> + rc = p2m_set_ioreq_server(d, flags, s); >>>>> + if ( rc == 0 ) >>>>> + dprintk(XENLOG_DEBUG, "%u %s type >> HVMMEM_ioreq_server.\n", >>>>> + s->id, (flags != 0) ? "mapped to" : "unmapped from"); >>>> Is this really useful? >>> Sorry, are you referring to this debug message? >>> I believe it helps, especially when multiple ioreq servers are >>> claiming/disclaiming their ownership of >>> the HVMMEM_ioreq_server. :) >> Well, that's a configuration bug anyway right now, so I'm not really >> with you. But in the end it'll be Paul to judge ... >> > Indeed, I don't the message has a massive amount of value. More useful would be to add a call into keyhandler.c:dump_domains() to display the current claim. > All right. Let's remove this debug message. The debug key handler can be updated in a separate patch, is this OK? :-) Yu
> -----Original Message----- > From: Yu Zhang [mailto:yu.c.zhang@linux.intel.com] > Sent: 09 August 2016 09:51 > To: Paul Durrant; Jan Beulich > Cc: Andrew Cooper; George Dunlap; Jun Nakajima; Kevin Tian; > zhiyuan.lv@intel.com; xen-devel@lists.xen.org; Tim (Xen.org) > Subject: Re: [Xen-devel] [PATCH v5 3/4] x86/ioreq server: Add HVMOP to > map guest ram with p2m_ioreq_server to an ioreq server. > > > > On 8/9/2016 4:20 PM, Paul Durrant wrote: > >> -----Original Message----- > >> From: Jan Beulich [mailto:JBeulich@suse.com] > >> Sent: 09 August 2016 09:11 > >> To: Paul Durrant; Yu Zhang > >> Cc: Andrew Cooper; George Dunlap; Jun Nakajima; Kevin Tian; > >> zhiyuan.lv@intel.com; xen-devel@lists.xen.org; Tim (Xen.org) > >> Subject: Re: [Xen-devel] [PATCH v5 3/4] x86/ioreq server: Add HVMOP to > >> map guest ram with p2m_ioreq_server to an ioreq server. > >> > >>>>> On 09.08.16 at 09:39, <yu.c.zhang@linux.intel.com> wrote: > >>> On 8/8/2016 11:40 PM, Jan Beulich wrote: > >>>>>>> On 12.07.16 at 11:02, <yu.c.zhang@linux.intel.com> wrote: > >>>>> + rc = -ENOENT; > >>>>> + list_for_each_entry ( s, > >>>>> + &d->arch.hvm_domain.ioreq_server.list, > >>>>> + list_entry ) > >>>>> + { > >>>>> + if ( s == d->arch.hvm_domain.default_ioreq_server ) > >>>>> + continue; > >>>>> + > >>>>> + if ( s->id == id ) > >>>>> + { > >>>>> + rc = p2m_set_ioreq_server(d, flags, s); > >>>>> + if ( rc == 0 ) > >>>>> + dprintk(XENLOG_DEBUG, "%u %s type > >> HVMMEM_ioreq_server.\n", > >>>>> + s->id, (flags != 0) ? "mapped to" : "unmapped from"); > >>>> Is this really useful? > >>> Sorry, are you referring to this debug message? > >>> I believe it helps, especially when multiple ioreq servers are > >>> claiming/disclaiming their ownership of > >>> the HVMMEM_ioreq_server. :) > >> Well, that's a configuration bug anyway right now, so I'm not really > >> with you. But in the end it'll be Paul to judge ... > >> > > Indeed, I don't the message has a massive amount of value. More useful > would be to add a call into keyhandler.c:dump_domains() to display the > current claim. > > > > All right. Let's remove this debug message. The debug key handler can be > updated in a separate patch, is this OK? :-) > OK with me. Paul > Yu
On 8/8/2016 11:40 PM, Jan Beulich wrote: >>>> On 12.07.16 at 11:02, <yu.c.zhang@linux.intel.com> wrote: >> @@ -178,8 +179,34 @@ static int hvmemul_do_io( >> break; >> case X86EMUL_UNHANDLEABLE: >> { >> - struct hvm_ioreq_server *s = >> - hvm_select_ioreq_server(curr->domain, &p); >> + struct hvm_ioreq_server *s; >> + >> + if ( is_mmio ) >> + { >> + unsigned long gmfn = paddr_to_pfn(addr); >> + p2m_type_t p2mt; >> + >> + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); >> + >> + if ( p2mt == p2m_ioreq_server ) >> + { >> + unsigned int flags; >> + >> + if ( dir != IOREQ_WRITE ) >> + s = NULL; >> + else >> + { >> + s = p2m_get_ioreq_server(currd, &flags); >> + >> + if ( !(flags & P2M_IOREQ_HANDLE_WRITE_ACCESS) ) >> + s = NULL; >> + } >> + } >> + else >> + s = hvm_select_ioreq_server(currd, &p); >> + } >> + else >> + s = hvm_select_ioreq_server(currd, &p); > Wouldn't it both be more natural and make the logic even easier > to follow if s got set to NULL up front, all the "else"-s dropped, > and a simple > > if ( !s ) > s = hvm_select_ioreq_server(currd, &p); > > be done in the end? > Sorry, Jan. I tried to simplify above code, but found the new code is still not very clean, because in some cases the s is supposed to return NULL instead of to be set from the hvm_select_ioreq_server(). To keep the same logic, the simplified code looks like this: case X86EMUL_UNHANDLEABLE: { - struct hvm_ioreq_server *s = - hvm_select_ioreq_server(curr->domain, &p); + struct hvm_ioreq_server *s = NULL; + p2m_type_t p2mt = p2m_invalid; + + if ( is_mmio && dir == IOREQ_WRITE ) + { + unsigned long gmfn = paddr_to_pfn(addr); + + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); + + if ( p2mt == p2m_ioreq_server ) + { + unsigned int flags; + + s = p2m_get_ioreq_server(currd, &flags); + if ( !(flags & XEN_HVMOP_IOREQ_MEM_ACCESS_WRITE) ) + s = NULL; + } + } + + if ( !s && p2mt != p2m_ioreq_server ) + s = hvm_select_ioreq_server(currd, &p); /* If there is no suitable backing DM, just ignore accesses */ if ( !s ) As you can see, definition of p2mt is moved outside the if ( is_mmio ) judgement, and is checked against p2m_ioreq_server before we search the ioreq server's rangeset in hvm_select_ioreq_server(). So I am not quite satisfied with this simplification. Any suggestions? [snip] Yu
>>> On 10.08.16 at 10:09, <yu.c.zhang@linux.intel.com> wrote: > > On 8/8/2016 11:40 PM, Jan Beulich wrote: >>>>> On 12.07.16 at 11:02, <yu.c.zhang@linux.intel.com> wrote: >>> @@ -178,8 +179,34 @@ static int hvmemul_do_io( >>> break; >>> case X86EMUL_UNHANDLEABLE: >>> { >>> - struct hvm_ioreq_server *s = >>> - hvm_select_ioreq_server(curr->domain, &p); >>> + struct hvm_ioreq_server *s; >>> + >>> + if ( is_mmio ) >>> + { >>> + unsigned long gmfn = paddr_to_pfn(addr); >>> + p2m_type_t p2mt; >>> + >>> + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); >>> + >>> + if ( p2mt == p2m_ioreq_server ) >>> + { >>> + unsigned int flags; >>> + >>> + if ( dir != IOREQ_WRITE ) >>> + s = NULL; >>> + else >>> + { >>> + s = p2m_get_ioreq_server(currd, &flags); >>> + >>> + if ( !(flags & P2M_IOREQ_HANDLE_WRITE_ACCESS) ) >>> + s = NULL; >>> + } >>> + } >>> + else >>> + s = hvm_select_ioreq_server(currd, &p); >>> + } >>> + else >>> + s = hvm_select_ioreq_server(currd, &p); >> Wouldn't it both be more natural and make the logic even easier >> to follow if s got set to NULL up front, all the "else"-s dropped, >> and a simple >> >> if ( !s ) >> s = hvm_select_ioreq_server(currd, &p); >> >> be done in the end? >> > > Sorry, Jan. I tried to simplify above code, but found the new code is > still not very > clean, because in some cases the s is supposed to return NULL instead > of to be > set from the hvm_select_ioreq_server(). > To keep the same logic, the simplified code looks like this: > > case X86EMUL_UNHANDLEABLE: > { > - struct hvm_ioreq_server *s = > - hvm_select_ioreq_server(curr->domain, &p); > + struct hvm_ioreq_server *s = NULL; > + p2m_type_t p2mt = p2m_invalid; > + > + if ( is_mmio && dir == IOREQ_WRITE ) > + { > + unsigned long gmfn = paddr_to_pfn(addr); > + > + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); > + > + if ( p2mt == p2m_ioreq_server ) > + { > + unsigned int flags; > + > + s = p2m_get_ioreq_server(currd, &flags); > + if ( !(flags & XEN_HVMOP_IOREQ_MEM_ACCESS_WRITE) ) > + s = NULL; > + } > + } > + > + if ( !s && p2mt != p2m_ioreq_server ) > + s = hvm_select_ioreq_server(currd, &p); > > /* If there is no suitable backing DM, just ignore accesses */ > if ( !s ) > > As you can see, definition of p2mt is moved outside the if ( is_mmio ) > judgement, > and is checked against p2m_ioreq_server before we search the ioreq > server's rangeset > in hvm_select_ioreq_server(). So I am not quite satisfied with this > simplification. > Any suggestions? I think it's better than the code was before, but an implicit part of my suggestion was that I'm not really convinced the " && p2mt != p2m_ioreq_server" part of your new conditional is really needed: Would it indeed be wrong to hand such a request to the "normal" ioreq server, instead of terminating it right away? (I guess that's a question to you as much as to Paul.) Jan
> -----Original Message----- > From: Jan Beulich [mailto:JBeulich@suse.com] > Sent: 10 August 2016 11:33 > To: Paul Durrant; Yu Zhang > Cc: Andrew Cooper; George Dunlap; Jun Nakajima; Kevin Tian; > zhiyuan.lv@intel.com; xen-devel@lists.xen.org; Tim (Xen.org) > Subject: Re: [PATCH v5 3/4] x86/ioreq server: Add HVMOP to map guest ram > with p2m_ioreq_server to an ioreq server. > > >>> On 10.08.16 at 10:09, <yu.c.zhang@linux.intel.com> wrote: > > > > > On 8/8/2016 11:40 PM, Jan Beulich wrote: > >>>>> On 12.07.16 at 11:02, <yu.c.zhang@linux.intel.com> wrote: > >>> @@ -178,8 +179,34 @@ static int hvmemul_do_io( > >>> break; > >>> case X86EMUL_UNHANDLEABLE: > >>> { > >>> - struct hvm_ioreq_server *s = > >>> - hvm_select_ioreq_server(curr->domain, &p); > >>> + struct hvm_ioreq_server *s; > >>> + > >>> + if ( is_mmio ) > >>> + { > >>> + unsigned long gmfn = paddr_to_pfn(addr); > >>> + p2m_type_t p2mt; > >>> + > >>> + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); > >>> + > >>> + if ( p2mt == p2m_ioreq_server ) > >>> + { > >>> + unsigned int flags; > >>> + > >>> + if ( dir != IOREQ_WRITE ) > >>> + s = NULL; > >>> + else > >>> + { > >>> + s = p2m_get_ioreq_server(currd, &flags); > >>> + > >>> + if ( !(flags & P2M_IOREQ_HANDLE_WRITE_ACCESS) ) > >>> + s = NULL; > >>> + } > >>> + } > >>> + else > >>> + s = hvm_select_ioreq_server(currd, &p); > >>> + } > >>> + else > >>> + s = hvm_select_ioreq_server(currd, &p); > >> Wouldn't it both be more natural and make the logic even easier > >> to follow if s got set to NULL up front, all the "else"-s dropped, > >> and a simple > >> > >> if ( !s ) > >> s = hvm_select_ioreq_server(currd, &p); > >> > >> be done in the end? > >> > > > > Sorry, Jan. I tried to simplify above code, but found the new code is > > still not very > > clean, because in some cases the s is supposed to return NULL instead > > of to be > > set from the hvm_select_ioreq_server(). > > To keep the same logic, the simplified code looks like this: > > > > case X86EMUL_UNHANDLEABLE: > > { > > - struct hvm_ioreq_server *s = > > - hvm_select_ioreq_server(curr->domain, &p); > > + struct hvm_ioreq_server *s = NULL; > > + p2m_type_t p2mt = p2m_invalid; > > + > > + if ( is_mmio && dir == IOREQ_WRITE ) > > + { > > + unsigned long gmfn = paddr_to_pfn(addr); > > + > > + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); > > + > > + if ( p2mt == p2m_ioreq_server ) > > + { > > + unsigned int flags; > > + > > + s = p2m_get_ioreq_server(currd, &flags); > > + if ( !(flags & XEN_HVMOP_IOREQ_MEM_ACCESS_WRITE) ) > > + s = NULL; > > + } > > + } > > + > > + if ( !s && p2mt != p2m_ioreq_server ) > > + s = hvm_select_ioreq_server(currd, &p); > > > > /* If there is no suitable backing DM, just ignore accesses */ > > if ( !s ) > > > > As you can see, definition of p2mt is moved outside the if ( is_mmio ) > > judgement, > > and is checked against p2m_ioreq_server before we search the ioreq > > server's rangeset > > in hvm_select_ioreq_server(). So I am not quite satisfied with this > > simplification. > > Any suggestions? > > I think it's better than the code was before, but an implicit part of > my suggestion was that I'm not really convinced the > " && p2mt != p2m_ioreq_server" part of your new conditional is > really needed: Would it indeed be wrong to hand such a request > to the "normal" ioreq server, instead of terminating it right away? > (I guess that's a question to you as much as to Paul.) > Well, I thought the correct semantics for p2m_ioreq_server without any interception are the same as p2m_ram_rw. In which case if we find an ioreq server, but it does not want to emulate writes, then we should complete the I/O by writing to guest RAM. But, how are we ever going to hit this code-path without some form of race with EPT configuration? Paul > Jan
On 8/10/2016 6:33 PM, Jan Beulich wrote: >>>> On 10.08.16 at 10:09, <yu.c.zhang@linux.intel.com> wrote: >> On 8/8/2016 11:40 PM, Jan Beulich wrote: >>>>>> On 12.07.16 at 11:02, <yu.c.zhang@linux.intel.com> wrote: >>>> @@ -178,8 +179,34 @@ static int hvmemul_do_io( >>>> break; >>>> case X86EMUL_UNHANDLEABLE: >>>> { >>>> - struct hvm_ioreq_server *s = >>>> - hvm_select_ioreq_server(curr->domain, &p); >>>> + struct hvm_ioreq_server *s; >>>> + >>>> + if ( is_mmio ) >>>> + { >>>> + unsigned long gmfn = paddr_to_pfn(addr); >>>> + p2m_type_t p2mt; >>>> + >>>> + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); >>>> + >>>> + if ( p2mt == p2m_ioreq_server ) >>>> + { >>>> + unsigned int flags; >>>> + >>>> + if ( dir != IOREQ_WRITE ) >>>> + s = NULL; >>>> + else >>>> + { >>>> + s = p2m_get_ioreq_server(currd, &flags); >>>> + >>>> + if ( !(flags & P2M_IOREQ_HANDLE_WRITE_ACCESS) ) >>>> + s = NULL; >>>> + } >>>> + } >>>> + else >>>> + s = hvm_select_ioreq_server(currd, &p); >>>> + } >>>> + else >>>> + s = hvm_select_ioreq_server(currd, &p); >>> Wouldn't it both be more natural and make the logic even easier >>> to follow if s got set to NULL up front, all the "else"-s dropped, >>> and a simple >>> >>> if ( !s ) >>> s = hvm_select_ioreq_server(currd, &p); >>> >>> be done in the end? >>> >> Sorry, Jan. I tried to simplify above code, but found the new code is >> still not very >> clean, because in some cases the s is supposed to return NULL instead >> of to be >> set from the hvm_select_ioreq_server(). >> To keep the same logic, the simplified code looks like this: >> >> case X86EMUL_UNHANDLEABLE: >> { >> - struct hvm_ioreq_server *s = >> - hvm_select_ioreq_server(curr->domain, &p); >> + struct hvm_ioreq_server *s = NULL; >> + p2m_type_t p2mt = p2m_invalid; >> + >> + if ( is_mmio && dir == IOREQ_WRITE ) >> + { >> + unsigned long gmfn = paddr_to_pfn(addr); >> + >> + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); >> + >> + if ( p2mt == p2m_ioreq_server ) >> + { >> + unsigned int flags; >> + >> + s = p2m_get_ioreq_server(currd, &flags); >> + if ( !(flags & XEN_HVMOP_IOREQ_MEM_ACCESS_WRITE) ) >> + s = NULL; >> + } >> + } >> + >> + if ( !s && p2mt != p2m_ioreq_server ) >> + s = hvm_select_ioreq_server(currd, &p); >> >> /* If there is no suitable backing DM, just ignore accesses */ >> if ( !s ) >> >> As you can see, definition of p2mt is moved outside the if ( is_mmio ) >> judgement, >> and is checked against p2m_ioreq_server before we search the ioreq >> server's rangeset >> in hvm_select_ioreq_server(). So I am not quite satisfied with this >> simplification. >> Any suggestions? > I think it's better than the code was before, but an implicit part of > my suggestion was that I'm not really convinced the > " && p2mt != p2m_ioreq_server" part of your new conditional is > really needed: Would it indeed be wrong to hand such a request > to the "normal" ioreq server, instead of terminating it right away? > (I guess that's a question to you as much as to Paul.) > Thanks for your reply, Jan. For " && p2mt != p2m_ioreq_server" condition, it is just to guarantee that if a write operation is trapped, and at the same period, device model changed the status of ioreq server, it should be discarded. A second thought is, I am now more worried about the " && dir == IOREQ_WRITE" condition, which we used previously to set s to NULL if it is not a write operation. However, if HVM uses a read-modify-write instruction to operate on a write-protected address, it will be treated as both read and write accesses in ept_handle_violation(). In such situation, we need to emulate the read access first(by just returning the value being fetched either in hypervisor or in device model), instead of discarding the read access. Thanks Yu
On 8/10/2016 6:43 PM, Paul Durrant wrote: >> -----Original Message----- >> From: Jan Beulich [mailto:JBeulich@suse.com] >> Sent: 10 August 2016 11:33 >> To: Paul Durrant; Yu Zhang >> Cc: Andrew Cooper; George Dunlap; Jun Nakajima; Kevin Tian; >> zhiyuan.lv@intel.com; xen-devel@lists.xen.org; Tim (Xen.org) >> Subject: Re: [PATCH v5 3/4] x86/ioreq server: Add HVMOP to map guest ram >> with p2m_ioreq_server to an ioreq server. >> >>>>> On 10.08.16 at 10:09, <yu.c.zhang@linux.intel.com> wrote: >>> On 8/8/2016 11:40 PM, Jan Beulich wrote: >>>>>>> On 12.07.16 at 11:02, <yu.c.zhang@linux.intel.com> wrote: >>>>> @@ -178,8 +179,34 @@ static int hvmemul_do_io( >>>>> break; >>>>> case X86EMUL_UNHANDLEABLE: >>>>> { >>>>> - struct hvm_ioreq_server *s = >>>>> - hvm_select_ioreq_server(curr->domain, &p); >>>>> + struct hvm_ioreq_server *s; >>>>> + >>>>> + if ( is_mmio ) >>>>> + { >>>>> + unsigned long gmfn = paddr_to_pfn(addr); >>>>> + p2m_type_t p2mt; >>>>> + >>>>> + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); >>>>> + >>>>> + if ( p2mt == p2m_ioreq_server ) >>>>> + { >>>>> + unsigned int flags; >>>>> + >>>>> + if ( dir != IOREQ_WRITE ) >>>>> + s = NULL; >>>>> + else >>>>> + { >>>>> + s = p2m_get_ioreq_server(currd, &flags); >>>>> + >>>>> + if ( !(flags & P2M_IOREQ_HANDLE_WRITE_ACCESS) ) >>>>> + s = NULL; >>>>> + } >>>>> + } >>>>> + else >>>>> + s = hvm_select_ioreq_server(currd, &p); >>>>> + } >>>>> + else >>>>> + s = hvm_select_ioreq_server(currd, &p); >>>> Wouldn't it both be more natural and make the logic even easier >>>> to follow if s got set to NULL up front, all the "else"-s dropped, >>>> and a simple >>>> >>>> if ( !s ) >>>> s = hvm_select_ioreq_server(currd, &p); >>>> >>>> be done in the end? >>>> >>> Sorry, Jan. I tried to simplify above code, but found the new code is >>> still not very >>> clean, because in some cases the s is supposed to return NULL instead >>> of to be >>> set from the hvm_select_ioreq_server(). >>> To keep the same logic, the simplified code looks like this: >>> >>> case X86EMUL_UNHANDLEABLE: >>> { >>> - struct hvm_ioreq_server *s = >>> - hvm_select_ioreq_server(curr->domain, &p); >>> + struct hvm_ioreq_server *s = NULL; >>> + p2m_type_t p2mt = p2m_invalid; >>> + >>> + if ( is_mmio && dir == IOREQ_WRITE ) >>> + { >>> + unsigned long gmfn = paddr_to_pfn(addr); >>> + >>> + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); >>> + >>> + if ( p2mt == p2m_ioreq_server ) >>> + { >>> + unsigned int flags; >>> + >>> + s = p2m_get_ioreq_server(currd, &flags); >>> + if ( !(flags & XEN_HVMOP_IOREQ_MEM_ACCESS_WRITE) ) >>> + s = NULL; >>> + } >>> + } >>> + >>> + if ( !s && p2mt != p2m_ioreq_server ) >>> + s = hvm_select_ioreq_server(currd, &p); >>> >>> /* If there is no suitable backing DM, just ignore accesses */ >>> if ( !s ) >>> >>> As you can see, definition of p2mt is moved outside the if ( is_mmio ) >>> judgement, >>> and is checked against p2m_ioreq_server before we search the ioreq >>> server's rangeset >>> in hvm_select_ioreq_server(). So I am not quite satisfied with this >>> simplification. >>> Any suggestions? >> I think it's better than the code was before, but an implicit part of >> my suggestion was that I'm not really convinced the >> " && p2mt != p2m_ioreq_server" part of your new conditional is >> really needed: Would it indeed be wrong to hand such a request >> to the "normal" ioreq server, instead of terminating it right away? >> (I guess that's a question to you as much as to Paul.) >> > Well, I thought the correct semantics for p2m_ioreq_server without any interception are the same as p2m_ram_rw. In which case if we find an ioreq server, but it does not want to emulate writes, then we should complete the I/O by writing to guest RAM. But, how are we ever going to hit this code-path without some form of race with EPT configuration? > Thanks Paul. I think the p2m type change race condition can be avoided in hvm_hap_nested_page_fault(), by delaying the call of put_gfn() after the ioreq is delivered. :) Otherwise we will need to keep the previous mem_handler, and use get_gfn_type() instead of get_gfn_query_unlocked() in hvmemul_do_io() - because, without a lock, another race condition can happen when 1> a p2m_ioreq_server gfn is changed to p2m_ram_rw; 2> we got this p2mt with value p2m_ram_rw by get_gfn_query_unlocked(); 3> p2m type of this gfn is changed back to p2m_ioreq_server; 4> mem_handler is called instead of deliver the ioreq to device model; However, the mem_handler may really be helpful for the read-modify-write case I mentioned in another mail. So hypervisor do not need to forward the read ioreq to the device model. B.R. Yu Yu
On 8/10/2016 6:43 PM, Yu Zhang wrote: > > > On 8/10/2016 6:33 PM, Jan Beulich wrote: >>>>> On 10.08.16 at 10:09, <yu.c.zhang@linux.intel.com> wrote: >>> On 8/8/2016 11:40 PM, Jan Beulich wrote: >>>>>>> On 12.07.16 at 11:02, <yu.c.zhang@linux.intel.com> wrote: >>>>> @@ -178,8 +179,34 @@ static int hvmemul_do_io( >>>>> break; >>>>> case X86EMUL_UNHANDLEABLE: >>>>> { >>>>> - struct hvm_ioreq_server *s = >>>>> - hvm_select_ioreq_server(curr->domain, &p); >>>>> + struct hvm_ioreq_server *s; >>>>> + >>>>> + if ( is_mmio ) >>>>> + { >>>>> + unsigned long gmfn = paddr_to_pfn(addr); >>>>> + p2m_type_t p2mt; >>>>> + >>>>> + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); >>>>> + >>>>> + if ( p2mt == p2m_ioreq_server ) >>>>> + { >>>>> + unsigned int flags; >>>>> + >>>>> + if ( dir != IOREQ_WRITE ) >>>>> + s = NULL; >>>>> + else >>>>> + { >>>>> + s = p2m_get_ioreq_server(currd, &flags); >>>>> + >>>>> + if ( !(flags & P2M_IOREQ_HANDLE_WRITE_ACCESS) ) >>>>> + s = NULL; >>>>> + } >>>>> + } >>>>> + else >>>>> + s = hvm_select_ioreq_server(currd, &p); >>>>> + } >>>>> + else >>>>> + s = hvm_select_ioreq_server(currd, &p); >>>> Wouldn't it both be more natural and make the logic even easier >>>> to follow if s got set to NULL up front, all the "else"-s dropped, >>>> and a simple >>>> >>>> if ( !s ) >>>> s = hvm_select_ioreq_server(currd, &p); >>>> >>>> be done in the end? >>>> >>> Sorry, Jan. I tried to simplify above code, but found the new code is >>> still not very >>> clean, because in some cases the s is supposed to return NULL instead >>> of to be >>> set from the hvm_select_ioreq_server(). >>> To keep the same logic, the simplified code looks like this: >>> >>> case X86EMUL_UNHANDLEABLE: >>> { >>> - struct hvm_ioreq_server *s = >>> - hvm_select_ioreq_server(curr->domain, &p); >>> + struct hvm_ioreq_server *s = NULL; >>> + p2m_type_t p2mt = p2m_invalid; >>> + >>> + if ( is_mmio && dir == IOREQ_WRITE ) >>> + { >>> + unsigned long gmfn = paddr_to_pfn(addr); >>> + >>> + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); >>> + >>> + if ( p2mt == p2m_ioreq_server ) >>> + { >>> + unsigned int flags; >>> + >>> + s = p2m_get_ioreq_server(currd, &flags); >>> + if ( !(flags & XEN_HVMOP_IOREQ_MEM_ACCESS_WRITE) ) >>> + s = NULL; >>> + } >>> + } >>> + >>> + if ( !s && p2mt != p2m_ioreq_server ) >>> + s = hvm_select_ioreq_server(currd, &p); >>> >>> /* If there is no suitable backing DM, just ignore >>> accesses */ >>> if ( !s ) >>> >>> As you can see, definition of p2mt is moved outside the if ( is_mmio ) >>> judgement, >>> and is checked against p2m_ioreq_server before we search the ioreq >>> server's rangeset >>> in hvm_select_ioreq_server(). So I am not quite satisfied with this >>> simplification. >>> Any suggestions? >> I think it's better than the code was before, but an implicit part of >> my suggestion was that I'm not really convinced the >> " && p2mt != p2m_ioreq_server" part of your new conditional is >> really needed: Would it indeed be wrong to hand such a request >> to the "normal" ioreq server, instead of terminating it right away? >> (I guess that's a question to you as much as to Paul.) >> > > Thanks for your reply, Jan. > For " && p2mt != p2m_ioreq_server" condition, it is just to guarantee > that if a write > operation is trapped, and at the same period, device model changed the > status of > ioreq server, it should be discarded. Hi Paul & Jan, any comments? > A second thought is, I am now more worried about the " && dir == > IOREQ_WRITE" > condition, which we used previously to set s to NULL if it is not a > write operation. > However, if HVM uses a read-modify-write instruction to operate on a > write-protected > address, it will be treated as both read and write accesses in > ept_handle_violation(). In > such situation, we need to emulate the read access first(by just > returning the value being > fetched either in hypervisor or in device model), instead of > discarding the read access. > Any suggestions about this guest read-modify-write instruction situation? Is my depiction clear? :) Thanks Yu
>>> On 11.08.16 at 10:47, <yu.c.zhang@linux.intel.com> wrote: > On 8/10/2016 6:43 PM, Yu Zhang wrote: >> For " && p2mt != p2m_ioreq_server" condition, it is just to guarantee >> that if a write >> operation is trapped, and at the same period, device model changed the >> status of >> ioreq server, it should be discarded. > > Hi Paul & Jan, any comments? Didn't Paul's "should behave like p2m_ram_rw" reply clarify things sufficiently? >> A second thought is, I am now more worried about the " && dir == >> IOREQ_WRITE" >> condition, which we used previously to set s to NULL if it is not a >> write operation. >> However, if HVM uses a read-modify-write instruction to operate on a >> write-protected >> address, it will be treated as both read and write accesses in >> ept_handle_violation(). In >> such situation, we need to emulate the read access first(by just >> returning the value being >> fetched either in hypervisor or in device model), instead of >> discarding the read access. > > Any suggestions about this guest read-modify-write instruction situation? > Is my depiction clear? :) Well, from your earlier reply I concluded that you'd just go ahead and put this into patch form, which we'd then look at. Jan
On 8/11/2016 4:58 PM, Jan Beulich wrote: >>>> On 11.08.16 at 10:47, <yu.c.zhang@linux.intel.com> wrote: >> On 8/10/2016 6:43 PM, Yu Zhang wrote: >>> For " && p2mt != p2m_ioreq_server" condition, it is just to guarantee >>> that if a write >>> operation is trapped, and at the same period, device model changed the >>> status of >>> ioreq server, it should be discarded. >> Hi Paul & Jan, any comments? > Didn't Paul's "should behave like p2m_ram_rw" reply clarify things > sufficiently? Oh, I may have misunderstood. I thought he was talking about the p2m change race condition. :) So please allow me to give a summary about my next to do for this: 1> To prevent p2m type change race condition, hvm_hap_nested_page_fault() need to be changed so that p2m_unlock() can be triggered after the write operation is handled; 2> If a gfn with p2m_ioreq_server is trapped, but the current ioreq server has been unmapped, it will be treated as a p2m_ram_rw; 3> If a gfn with p2m_ioreq_server is trapped, but the dir is IOREQ_READ, it will be treated as a read-modify-write case. >>> A second thought is, I am now more worried about the " && dir == >>> IOREQ_WRITE" >>> condition, which we used previously to set s to NULL if it is not a >>> write operation. >>> However, if HVM uses a read-modify-write instruction to operate on a >>> write-protected >>> address, it will be treated as both read and write accesses in >>> ept_handle_violation(). In >>> such situation, we need to emulate the read access first(by just >>> returning the value being >>> fetched either in hypervisor or in device model), instead of >>> discarding the read access. >> Any suggestions about this guest read-modify-write instruction situation? >> Is my depiction clear? :) > Well, from your earlier reply I concluded that you'd just go ahead > and put this into patch form, which we'd then look at. OK, thanks. I have give a rough summary in 3> above. I will have to take several days annual leave from this weekend due to some family urgency, and will be back after Aug 23. Can hardly seen the mailing list during this period, sorry for the inconvenience. :( Yu
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c index 855af4d..c235a40 100644 --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -100,6 +100,7 @@ static int hvmemul_do_io( uint8_t dir, bool_t df, bool_t data_is_addr, uintptr_t data) { struct vcpu *curr = current; + struct domain *currd = curr->domain; struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io; ioreq_t p = { .type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO, @@ -141,7 +142,7 @@ static int hvmemul_do_io( (p.dir != dir) || (p.df != df) || (p.data_is_ptr != data_is_addr) ) - domain_crash(curr->domain); + domain_crash(currd); if ( data_is_addr ) return X86EMUL_UNHANDLEABLE; @@ -178,8 +179,34 @@ static int hvmemul_do_io( break; case X86EMUL_UNHANDLEABLE: { - struct hvm_ioreq_server *s = - hvm_select_ioreq_server(curr->domain, &p); + struct hvm_ioreq_server *s; + + if ( is_mmio ) + { + unsigned long gmfn = paddr_to_pfn(addr); + p2m_type_t p2mt; + + (void) get_gfn_query_unlocked(currd, gmfn, &p2mt); + + if ( p2mt == p2m_ioreq_server ) + { + unsigned int flags; + + if ( dir != IOREQ_WRITE ) + s = NULL; + else + { + s = p2m_get_ioreq_server(currd, &flags); + + if ( !(flags & P2M_IOREQ_HANDLE_WRITE_ACCESS) ) + s = NULL; + } + } + else + s = hvm_select_ioreq_server(currd, &p); + } + else + s = hvm_select_ioreq_server(currd, &p); /* If there is no suitable backing DM, just ignore accesses */ if ( !s ) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 4453ec0..4d98cc6 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -5410,9 +5410,14 @@ static int hvmop_get_mem_type( static bool_t hvm_allow_p2m_type_change(p2m_type_t old, p2m_type_t new) { + if ( new == p2m_ioreq_server ) + return old == p2m_ram_rw; + + if ( old == p2m_ioreq_server ) + return new == p2m_ram_rw; + if ( p2m_is_ram(old) || - (p2m_is_hole(old) && new == p2m_mmio_dm) || - (old == p2m_ioreq_server && new == p2m_ram_rw) ) + (p2m_is_hole(old) && new == p2m_mmio_dm) ) return 1; return 0; @@ -5447,6 +5452,21 @@ static int hvmop_set_mem_type( if ( !is_hvm_domain(d) ) goto out; + if ( a.hvmmem_type == HVMMEM_ioreq_server ) + { + unsigned int flags; + struct hvm_ioreq_server *s; + + /* HVMMEM_ioreq_server is only supported for HAP enabled hvm. */ + if ( !hap_enabled(d) ) + goto out; + + /* Do not change to HVMMEM_ioreq_server if no ioreq server mapped. */ + s = p2m_get_ioreq_server(d, &flags); + if ( s == NULL ) + goto out; + } + rc = xsm_hvm_control(XSM_DM_PRIV, d, HVMOP_set_mem_type); if ( rc ) goto out; @@ -5509,6 +5529,43 @@ static int hvmop_set_mem_type( return rc; } +static int hvmop_map_mem_type_to_ioreq_server( + XEN_GUEST_HANDLE_PARAM(xen_hvm_map_mem_type_to_ioreq_server_t) uop) +{ + xen_hvm_map_mem_type_to_ioreq_server_t op; + struct domain *d; + int rc; + + if ( copy_from_guest(&op, uop, 1) ) + return -EFAULT; + + rc = rcu_lock_remote_domain_by_id(op.domid, &d); + if ( rc != 0 ) + return rc; + + rc = -EINVAL; + if ( !is_hvm_domain(d) ) + goto out; + + if ( op.pad != 0 ) + goto out; + + /* Only support for HAP enabled hvm. */ + if ( !hap_enabled(d) ) + goto out; + + rc = xsm_hvm_ioreq_server(XSM_DM_PRIV, d, + HVMOP_map_mem_type_to_ioreq_server); + if ( rc != 0 ) + goto out; + + rc = hvm_map_mem_type_to_ioreq_server(d, op.id, op.type, op.flags); + + out: + rcu_unlock_domain(d); + return rc; +} + long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) { unsigned long start_iter, mask; @@ -5548,6 +5605,11 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) guest_handle_cast(arg, xen_hvm_io_range_t)); break; + case HVMOP_map_mem_type_to_ioreq_server: + rc = hvmop_map_mem_type_to_ioreq_server( + guest_handle_cast(arg, xen_hvm_map_mem_type_to_ioreq_server_t)); + break; + case HVMOP_set_ioreq_server_state: rc = hvmop_set_ioreq_server_state( guest_handle_cast(arg, xen_hvm_set_ioreq_server_state_t)); diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c index 7148ac4..36a2298 100644 --- a/xen/arch/x86/hvm/ioreq.c +++ b/xen/arch/x86/hvm/ioreq.c @@ -753,6 +753,8 @@ int hvm_destroy_ioreq_server(struct domain *d, ioservid_t id) domain_pause(d); + p2m_destroy_ioreq_server(d, s); + hvm_ioreq_server_disable(s, 0); list_del(&s->list_entry); @@ -914,6 +916,45 @@ int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id, return rc; } +int hvm_map_mem_type_to_ioreq_server(struct domain *d, ioservid_t id, + uint32_t type, uint32_t flags) +{ + struct hvm_ioreq_server *s; + int rc; + + /* For now, only HVMMEM_ioreq_server is supported. */ + if ( type != HVMMEM_ioreq_server ) + return -EINVAL; + + /* For now, only write emulation is supported. */ + if ( flags & ~(XEN_HVMOP_IOREQ_MEM_ACCESS_WRITE) ) + return -EINVAL; + + spin_lock(&d->arch.hvm_domain.ioreq_server.lock); + + rc = -ENOENT; + list_for_each_entry ( s, + &d->arch.hvm_domain.ioreq_server.list, + list_entry ) + { + if ( s == d->arch.hvm_domain.default_ioreq_server ) + continue; + + if ( s->id == id ) + { + rc = p2m_set_ioreq_server(d, flags, s); + if ( rc == 0 ) + dprintk(XENLOG_DEBUG, "%u %s type HVMMEM_ioreq_server.\n", + s->id, (flags != 0) ? "mapped to" : "unmapped from"); + + break; + } + } + + spin_unlock(&d->arch.hvm_domain.ioreq_server.lock); + return rc; +} + int hvm_set_ioreq_server_state(struct domain *d, ioservid_t id, bool_t enabled) { diff --git a/xen/arch/x86/mm/hap/nested_hap.c b/xen/arch/x86/mm/hap/nested_hap.c index d41bb09..aa90a62 100644 --- a/xen/arch/x86/mm/hap/nested_hap.c +++ b/xen/arch/x86/mm/hap/nested_hap.c @@ -174,7 +174,7 @@ nestedhap_walk_L0_p2m(struct p2m_domain *p2m, paddr_t L1_gpa, paddr_t *L0_gpa, if ( *p2mt == p2m_mmio_direct ) goto direct_mmio_out; rc = NESTEDHVM_PAGEFAULT_MMIO; - if ( *p2mt == p2m_mmio_dm ) + if ( *p2mt == p2m_mmio_dm || *p2mt == p2m_ioreq_server ) goto out; rc = NESTEDHVM_PAGEFAULT_L0_ERROR; diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c index 7adc77d..5f06d40 100644 --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -132,6 +132,13 @@ static void ept_p2m_type_to_flags(struct p2m_domain *p2m, ept_entry_t *entry, entry->r = entry->w = entry->x = 1; entry->a = entry->d = !!cpu_has_vmx_ept_ad; break; + case p2m_ioreq_server: + entry->r = 1; + entry->w = !(p2m->ioreq.flags & P2M_IOREQ_HANDLE_WRITE_ACCESS); + entry->x = 0; + entry->a = !!cpu_has_vmx_ept_ad; + entry->d = entry->w && cpu_has_vmx_ept_ad; + break; case p2m_mmio_direct: entry->r = entry->x = 1; entry->w = !rangeset_contains_singleton(mmio_ro_ranges, @@ -171,7 +178,6 @@ static void ept_p2m_type_to_flags(struct p2m_domain *p2m, ept_entry_t *entry, entry->a = entry->d = !!cpu_has_vmx_ept_ad; break; case p2m_grant_map_ro: - case p2m_ioreq_server: entry->r = 1; entry->w = entry->x = 0; entry->a = !!cpu_has_vmx_ept_ad; diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c index 05aaf8f..6209e7b 100644 --- a/xen/arch/x86/mm/p2m-pt.c +++ b/xen/arch/x86/mm/p2m-pt.c @@ -72,7 +72,9 @@ static const unsigned long pgt[] = { PGT_l3_page_table }; -static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn, +static unsigned long p2m_type_to_flags(const struct p2m_domain *p2m, + p2m_type_t t, + mfn_t mfn, unsigned int level) { unsigned long flags; @@ -94,8 +96,13 @@ static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn, default: return flags | _PAGE_NX_BIT; case p2m_grant_map_ro: - case p2m_ioreq_server: return flags | P2M_BASE_FLAGS | _PAGE_NX_BIT; + case p2m_ioreq_server: + flags |= P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT; + if ( p2m->ioreq.flags & P2M_IOREQ_HANDLE_WRITE_ACCESS ) + return flags & ~_PAGE_RW; + else + return flags; case p2m_ram_ro: case p2m_ram_logdirty: case p2m_ram_shared: @@ -442,7 +449,8 @@ static int do_recalc(struct p2m_domain *p2m, unsigned long gfn) p2m_type_t p2mt = p2m_is_logdirty_range(p2m, gfn & mask, gfn | ~mask) ? p2m_ram_logdirty : p2m_ram_rw; unsigned long mfn = l1e_get_pfn(e); - unsigned long flags = p2m_type_to_flags(p2mt, _mfn(mfn), level); + unsigned long flags = p2m_type_to_flags(p2m, p2mt, + _mfn(mfn), level); if ( level ) { @@ -579,7 +587,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); l3e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ? l3e_from_pfn(mfn_x(mfn), - p2m_type_to_flags(p2mt, mfn, 2) | _PAGE_PSE) + p2m_type_to_flags(p2m, p2mt, mfn, 2) | _PAGE_PSE) : l3e_empty(); entry_content.l1 = l3e_content.l3; @@ -615,7 +623,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ) entry_content = p2m_l1e_from_pfn(mfn_x(mfn), - p2m_type_to_flags(p2mt, mfn, 0)); + p2m_type_to_flags(p2m, p2mt, mfn, 0)); else entry_content = l1e_empty(); @@ -651,7 +659,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ) l2e_content = l2e_from_pfn(mfn_x(mfn), - p2m_type_to_flags(p2mt, mfn, 1) | + p2m_type_to_flags(p2m, p2mt, mfn, 1) | _PAGE_PSE); else l2e_content = l2e_empty(); diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index 16733a4..5567181 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -83,6 +83,8 @@ static int p2m_initialise(struct domain *d, struct p2m_domain *p2m) else p2m_pt_init(p2m); + spin_lock_init(&p2m->ioreq.lock); + return ret; } @@ -289,6 +291,78 @@ void p2m_memory_type_changed(struct domain *d) } } +int p2m_set_ioreq_server(struct domain *d, + unsigned int flags, + struct hvm_ioreq_server *s) +{ + struct p2m_domain *p2m = p2m_get_hostp2m(d); + int rc; + + /* + * Use lock to prevent concurrent setting requirements + * from multiple ioreq serers. + */ + spin_lock(&p2m->ioreq.lock); + + /* Unmap ioreq server from p2m type by passing flags with 0. */ + if ( flags == 0 ) + { + rc = -EINVAL; + if ( p2m->ioreq.server != s ) + goto out; + + p2m->ioreq.server = NULL; + p2m->ioreq.flags = 0; + } + else + { + rc = -EBUSY; + if ( p2m->ioreq.server != NULL ) + goto out; + + p2m->ioreq.server = s; + p2m->ioreq.flags = flags; + } + + rc = 0; + + out: + spin_unlock(&p2m->ioreq.lock); + + return rc; +} + +struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d, + unsigned int *flags) +{ + struct p2m_domain *p2m = p2m_get_hostp2m(d); + struct hvm_ioreq_server *s; + + spin_lock(&p2m->ioreq.lock); + + s = p2m->ioreq.server; + *flags = p2m->ioreq.flags; + + spin_unlock(&p2m->ioreq.lock); + return s; +} + +void p2m_destroy_ioreq_server(const struct domain *d, + const struct hvm_ioreq_server *s) +{ + struct p2m_domain *p2m = p2m_get_hostp2m(d); + + spin_lock(&p2m->ioreq.lock); + + if ( p2m->ioreq.server == s ) + { + p2m->ioreq.server = NULL; + p2m->ioreq.flags = 0; + } + + spin_unlock(&p2m->ioreq.lock); +} + void p2m_enable_hardware_log_dirty(struct domain *d) { struct p2m_domain *p2m = p2m_get_hostp2m(d); diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c index 8c4b20e..2f40816 100644 --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -3225,8 +3225,7 @@ static int sh_page_fault(struct vcpu *v, } /* Need to hand off device-model MMIO to the device model */ - if ( p2mt == p2m_mmio_dm - || (p2mt == p2m_ioreq_server && ft == ft_demand_write) ) + if ( p2mt == p2m_mmio_dm ) { gpa = guest_walk_to_gpa(&gw); goto mmio; diff --git a/xen/include/asm-x86/hvm/ioreq.h b/xen/include/asm-x86/hvm/ioreq.h index fbf2c74..b43667a 100644 --- a/xen/include/asm-x86/hvm/ioreq.h +++ b/xen/include/asm-x86/hvm/ioreq.h @@ -37,6 +37,8 @@ int hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id, int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id, uint32_t type, uint64_t start, uint64_t end); +int hvm_map_mem_type_to_ioreq_server(struct domain *d, ioservid_t id, + uint32_t type, uint32_t flags); int hvm_set_ioreq_server_state(struct domain *d, ioservid_t id, bool_t enabled); diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h index 6785669..0950a91 100644 --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -89,7 +89,8 @@ typedef unsigned int p2m_query_t; | p2m_to_mask(p2m_ram_paging_out) \ | p2m_to_mask(p2m_ram_paged) \ | p2m_to_mask(p2m_ram_paging_in) \ - | p2m_to_mask(p2m_ram_shared)) + | p2m_to_mask(p2m_ram_shared) \ + | p2m_to_mask(p2m_ioreq_server)) /* Types that represent a physmap hole that is ok to replace with a shared * entry */ @@ -111,8 +112,7 @@ typedef unsigned int p2m_query_t; #define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty) \ | p2m_to_mask(p2m_ram_ro) \ | p2m_to_mask(p2m_grant_map_ro) \ - | p2m_to_mask(p2m_ram_shared) \ - | p2m_to_mask(p2m_ioreq_server)) + | p2m_to_mask(p2m_ram_shared)) /* Write-discard types, which should discard the write operations */ #define P2M_DISCARD_WRITE_TYPES (p2m_to_mask(p2m_ram_ro) \ @@ -336,6 +336,23 @@ struct p2m_domain { struct ept_data ept; /* NPT-equivalent structure could be added here. */ }; + + struct { + spinlock_t lock; + /* + * ioreq server who's responsible for the emulation of + * gfns with specific p2m type(for now, p2m_ioreq_server). + */ + struct hvm_ioreq_server *server; + /* + * flags specifies whether read, write or both operations + * are to be emulated by an ioreq server. + */ + unsigned int flags; + +#define P2M_IOREQ_HANDLE_WRITE_ACCESS XEN_HVMOP_IOREQ_MEM_ACCESS_WRITE +#define P2M_IOREQ_HANDLE_READ_ACCESS XEN_HVMOP_IOREQ_MEM_ACCESS_READ + } ioreq; }; /* get host p2m table */ @@ -842,6 +859,12 @@ static inline unsigned int p2m_get_iommu_flags(p2m_type_t p2mt) return flags; } +int p2m_set_ioreq_server(struct domain *d, unsigned int flags, + struct hvm_ioreq_server *s); +struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d, + unsigned int *flags); +void p2m_destroy_ioreq_server(const struct domain *d, const struct hvm_ioreq_server *s); + #endif /* _XEN_P2M_H */ /* diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h index b3e45cf..d484c5f 100644 --- a/xen/include/public/hvm/hvm_op.h +++ b/xen/include/public/hvm/hvm_op.h @@ -89,7 +89,9 @@ typedef enum { HVMMEM_unused, /* Placeholder; setting memory to this type will fail for code after 4.7.0 */ #endif - HVMMEM_ioreq_server + HVMMEM_ioreq_server /* Memory type claimed by an ioreq server; type + changes to this value are only allowed after + an ioreq server has claimed its ownership. */ } hvmmem_type_t; /* Following tools-only interfaces may change in future. */ @@ -383,6 +385,33 @@ struct xen_hvm_set_ioreq_server_state { typedef struct xen_hvm_set_ioreq_server_state xen_hvm_set_ioreq_server_state_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_ioreq_server_state_t); +/* + * HVMOP_map_mem_type_to_ioreq_server : map or unmap the IOREQ Server <id> + * to specific memroy type <type> + * for specific accesses <flags> + * + * For now, flags only accept the value of HVMOP_IOREQ_MEM_ACCESS_WRITE, + * which means only write operations are to be forwarded to an ioreq server. + * Support for the emulation of read operations can be added when an ioreq + * server has such requirement in future. + */ +#define HVMOP_map_mem_type_to_ioreq_server 26 +struct xen_hvm_map_mem_type_to_ioreq_server { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - ioreq server id */ + uint16_t type; /* IN - memory type */ + uint16_t pad; + uint32_t flags; /* IN - types of accesses to be forwarded to the + ioreq server. flags with 0 means to unmap the + ioreq server */ + +#define XEN_HVMOP_IOREQ_MEM_ACCESS_READ (1u << 0) +#define XEN_HVMOP_IOREQ_MEM_ACCESS_WRITE (1u << 1) +}; +typedef struct xen_hvm_map_mem_type_to_ioreq_server + xen_hvm_map_mem_type_to_ioreq_server_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_map_mem_type_to_ioreq_server_t); + #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ #if defined(__i386__) || defined(__x86_64__)