diff mbox

[v6,4/4] x86/ioreq server: Reset outstanding p2m_ioreq_server entries when an ioreq server unmaps.

Message ID 1472813240-11011-5-git-send-email-yu.c.zhang@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Yu Zhang Sept. 2, 2016, 10:47 a.m. UTC
This patch resets p2m_ioreq_server entries back to p2m_ram_rw,
after an ioreq server has unmapped. The resync is done both
asynchronously with the current p2m_change_entry_type_global()
interface, and synchronously by iterating the p2m table. The
synchronous resetting is necessary because we need to guarantee
the p2m table is clean before another ioreq server is mapped.
And since the sweeping of p2m table could be time consuming,
it is done with hypercall continuation. Asynchronous approach
is also taken so that p2m_ioreq_server entries can also be reset
when the HVM is scheduled between hypercall continuations.

This patch also disallows live migration, when there's still any
outstanding p2m_ioreq_server entry left. The core reason is our
current implementation of p2m_change_entry_type_global() can not
tell the state of p2m_ioreq_server entries(can not decide if an
entry is to be emulated or to be resynced).

Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
---
Cc: Paul Durrant <paul.durrant@citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Cc: Jun Nakajima <jun.nakajima@intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>

changes in v2: 
  - Move the calculation of ioreq server page entry_cout into 
    p2m_change_type_one() so that we do not need a seperate lock.
    Note: entry_count is also calculated in resolve_misconfig()/
    do_recalc(), fortunately callers of both routines have p2m
    lock protected already.
  - Simplify logic in hvmop_set_mem_type().
  - Introduce routine p2m_finish_type_change() to walk the p2m 
    table and do the p2m reset.
---
 xen/arch/x86/hvm/hvm.c    | 38 +++++++++++++++++++++++++++++++++---
 xen/arch/x86/mm/hap/hap.c |  9 +++++++++
 xen/arch/x86/mm/p2m-ept.c |  6 +++++-
 xen/arch/x86/mm/p2m-pt.c  | 10 ++++++++--
 xen/arch/x86/mm/p2m.c     | 49 +++++++++++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/p2m.h |  9 ++++++++-
 6 files changed, 114 insertions(+), 7 deletions(-)

Comments

Jan Beulich Sept. 5, 2016, 2:47 p.m. UTC | #1
>>> On 02.09.16 at 12:47, <yu.c.zhang@linux.intel.com> wrote:
> @@ -5551,7 +5553,35 @@ static int hvmop_map_mem_type_to_ioreq_server(
>      if ( rc != 0 )
>          goto out;
>  
> -    rc = hvm_map_mem_type_to_ioreq_server(d, op.id, op.type, op.flags);
> +    if ( gfn == 0 )
> +        rc = hvm_map_mem_type_to_ioreq_server(d, op.id, op.type, op.flags);
> +
> +    /*
> +     * Iterate p2m table when an ioreq server unmaps from p2m_ioreq_server,
> +     * and reset the remaining p2m_ioreq_server entries back to p2m_ram_rw.
> +     */
> +    if ( op.flags == 0 && rc == 0 )
> +    {
> +        struct p2m_domain *p2m = p2m_get_hostp2m(d);
> +
> +        while ( read_atomic(&p2m->ioreq.entry_count) &&
> +                gfn <= p2m->max_mapped_pfn )
> +        {
> +            unsigned long gfn_end = gfn + HVMOP_op_mask;
> +
> +            p2m_finish_type_change(d, gfn, gfn_end,
> +                                   p2m_ioreq_server, p2m_ram_rw);
> +
> +            /* Check for continuation if it's not the last iteration. */
> +            if ( ++gfn_end <= p2m->max_mapped_pfn &&
> +                hypercall_preempt_check() )
> +            {
> +                rc = -ERESTART;
> +                *iter = gfn_end;
> +                goto out;
> +            }
> +        }

"gfn" doesn't get updated, so if no preemption happens here, the
same GFN range will get acted on a second time (and so on, until
eventually preemption becomes necessary).

Also when you don't really need to use "goto", please try to avoid
it - here you could easily use just "break" instead.

> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -545,6 +545,9 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
>                      e.ipat = ipat;
>                      if ( e.recalc && p2m_is_changeable(e.sa_p2mt) )
>                      {
> +                         if ( e.sa_p2mt == p2m_ioreq_server )
> +                             p2m->ioreq.entry_count--;

ISTR that George had asked you to put this accounting elsewhere.

And then I'd really like you to add assertions making sure this
count doesn't underflow.

> @@ -965,7 +968,8 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m,
>      if ( is_epte_valid(ept_entry) )
>      {
>          if ( (recalc || ept_entry->recalc) &&
> -             p2m_is_changeable(ept_entry->sa_p2mt) )
> +             p2m_is_changeable(ept_entry->sa_p2mt) &&
> +             (ept_entry->sa_p2mt != p2m_ioreq_server) )
>              *t = p2m_is_logdirty_range(p2m, gfn, gfn) ? p2m_ram_logdirty
>                                                        : p2m_ram_rw;
>          else

Considering this (and at least one more similar adjustment further
down), is it really appropriate to include p2m_ioreq_server in the
set of "changeable" types?

> +void p2m_finish_type_change(struct domain *d,
> +                           unsigned long start, unsigned long end,
> +                           p2m_type_t ot, p2m_type_t nt)
> +{
> +    struct p2m_domain *p2m = p2m_get_hostp2m(d);
> +    p2m_type_t t;
> +    unsigned long gfn = start;
> +
> +    ASSERT(start <= end);
> +    ASSERT(ot != nt);
> +    ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt));
> +
> +    p2m_lock(p2m);
> +
> +    end = (end > p2m->max_mapped_pfn) ? p2m->max_mapped_pfn : end;

min() or an if() without else.

> +    while ( gfn <= end )
> +    {
> +        get_gfn_query_unlocked(d, gfn, &t);
> +
> +        if ( t == ot )
> +            p2m_change_type_one(d, gfn, t, nt);
> +
> +        gfn++;
> +    }
> +
> +    p2m_unlock(p2m);
> +}

And then I wonder why p2m_change_type_range() can't be used
instead of adding this new function.

Jan
Yu Zhang Sept. 9, 2016, 7:24 a.m. UTC | #2
On 9/9/2016 1:26 PM, Yu Zhang wrote:
> >>> On 02.09.16 at 12:47, <yu.c.zhang@xxxxxxxxxxxxxxx> wrote:
> > @@ -5551,7 +5553,35 @@ static int hvmop_map_mem_type_to_ioreq_server(
> >      if ( rc != 0 )
> >          goto out;
> >
> > -    rc = hvm_map_mem_type_to_ioreq_server(d, op.id, op.type, 
> op.flags);
> > +    if ( gfn == 0 )
> > +        rc = hvm_map_mem_type_to_ioreq_server(d, op.id, op.type, 
> op.flags);
> > +
> > +    /*
> > +     * Iterate p2m table when an ioreq server unmaps from 
> p2m_ioreq_server,
> > +     * and reset the remaining p2m_ioreq_server entries back to 
> p2m_ram_rw.
> > +     */
> > +    if ( op.flags == 0 && rc == 0 )
> > +    {
> > +        struct p2m_domain *p2m = p2m_get_hostp2m(d);
> > +
> > +        while ( read_atomic(&p2m->ioreq.entry_count) &&
> > +                gfn <= p2m->max_mapped_pfn )
> > +        {
> > +            unsigned long gfn_end = gfn + HVMOP_op_mask;
> > +
> > +            p2m_finish_type_change(d, gfn, gfn_end,
> > +                                   p2m_ioreq_server, p2m_ram_rw);
> > +
> > +            /* Check for continuation if it's not the last 
> iteration. */
> > +            if ( ++gfn_end <= p2m->max_mapped_pfn &&
> > +                hypercall_preempt_check() )
> > +            {
> > +                rc = -ERESTART;
> > +                *iter = gfn_end;
> > +                goto out;
> > +            }
> > +        }
>
> "gfn" doesn't get updated, so if no preemption happens here, the
> same GFN range will get acted on a second time (and so on, until
> eventually preemption becomes necessary).

Oh, right. Thanks for pointing out. :)

>
> Also when you don't really need to use "goto", please try to avoid
> it - here you could easily use just "break" instead.

OK.

>
> > --- a/xen/arch/x86/mm/p2m-ept.c
> > +++ b/xen/arch/x86/mm/p2m-ept.c
> > @@ -545,6 +545,9 @@ static int resolve_misconfig(struct p2m_domain 
> *p2m,
> > unsigned long gfn)
> >                      e.ipat = ipat;
> >                      if ( e.recalc && p2m_is_changeable(e.sa_p2mt) )
> >                      {
> > +                         if ( e.sa_p2mt == p2m_ioreq_server )
> > +                             p2m->ioreq.entry_count--;
>
> ISTR that George had asked you to put this accounting elsewhere.
>

Yes. You have good memory. : )

George's suggestion is to put inside atomic_write_ept_entry(), which is 
a quite core routine,
and is only for ept. And my suggestion is to put inside the 
p2m_type_change_one() and routine
resolve_misconfig()/do_recalc() as well, so that we can support both 
Intel EPT and AMD NPT -
like the p2m_change_entry_type_global().

I had given a more detailed explanation in a reply on Aug 30 in the v5 
thread. :)

> And then I'd really like you to add assertions making sure this
> count doesn't underflow.

OK.

>
> > @@ -965,7 +968,8 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m,
> >      if ( is_epte_valid(ept_entry) )
> >      {
> >          if ( (recalc || ept_entry->recalc) &&
> > -             p2m_is_changeable(ept_entry->sa_p2mt) )
> > +             p2m_is_changeable(ept_entry->sa_p2mt) &&
> > +             (ept_entry->sa_p2mt != p2m_ioreq_server) )
> >              *t = p2m_is_logdirty_range(p2m, gfn, gfn) ? 
> p2m_ram_logdirty
> >                                                        : p2m_ram_rw;
> >          else
>
> Considering this (and at least one more similar adjustment further
> down), is it really appropriate to include p2m_ioreq_server in the
> set of "changeable" types?

Well, I agree p2m_ioreq_server do have different behaviors than the
p2m_log_dirty, but removing p2m_ioreq_server from changeable type
would need more specific code for the p2m_ioreq_server in routines like
resolve_misconfig(), do_recalc() and p2m_change_entry_type_global() etc.
I've tried this approach and abandoned later. :)

>
> > +void p2m_finish_type_change(struct domain *d,
> > +                           unsigned long start, unsigned long end,
> > +                           p2m_type_t ot, p2m_type_t nt)
> > +{
> > +    struct p2m_domain *p2m = p2m_get_hostp2m(d);
> > +    p2m_type_t t;
> > +    unsigned long gfn = start;
> > +
> > +    ASSERT(start <= end);
> > +    ASSERT(ot != nt);
> > +    ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt));
> > +
> > +    p2m_lock(p2m);
> > +
> > +    end = (end > p2m->max_mapped_pfn) ? p2m->max_mapped_pfn : end;
>
> min() or an if() without else.

Got it. Thanks

>
> > +    while ( gfn <= end )
> > +    {
> > +        get_gfn_query_unlocked(d, gfn, &t);
> > +
> > +        if ( t == ot )
> > +            p2m_change_type_one(d, gfn, t, nt);
> > +
> > +        gfn++;
> > +    }
> > +
> > +    p2m_unlock(p2m);
> > +}
>
> And then I wonder why p2m_change_type_range() can't be used
> instead of adding this new function.
>

Well, like p2m_change_entry_type_global() , p2m_change_type_range() also
does the change asynchronously. And here this routine is introduced
to synchronously reset the p2m type.

> Jan

Thanks
Yu
Jan Beulich Sept. 9, 2016, 8:20 a.m. UTC | #3
>>> On 09.09.16 at 09:24, <yu.c.zhang@linux.intel.com> wrote:
> On 9/9/2016 1:26 PM, Yu Zhang wrote:
>> >>> On 02.09.16 at 12:47, <yu.c.zhang@xxxxxxxxxxxxxxx> wrote:
>> > @@ -965,7 +968,8 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m,
>> >      if ( is_epte_valid(ept_entry) )
>> >      {
>> >          if ( (recalc || ept_entry->recalc) &&
>> > -             p2m_is_changeable(ept_entry->sa_p2mt) )
>> > +             p2m_is_changeable(ept_entry->sa_p2mt) &&
>> > +             (ept_entry->sa_p2mt != p2m_ioreq_server) )
>> >              *t = p2m_is_logdirty_range(p2m, gfn, gfn) ? 
>> p2m_ram_logdirty
>> >                                                        : p2m_ram_rw;
>> >          else
>>
>> Considering this (and at least one more similar adjustment further
>> down), is it really appropriate to include p2m_ioreq_server in the
>> set of "changeable" types?
> 
> Well, I agree p2m_ioreq_server do have different behaviors than the
> p2m_log_dirty, but removing p2m_ioreq_server from changeable type
> would need more specific code for the p2m_ioreq_server in routines like
> resolve_misconfig(), do_recalc() and p2m_change_entry_type_global() etc.
> I've tried this approach and abandoned later. :)

I can see that the other option might require more adjustments, in
which case I guess this variant would be fine if you created another
helper (well named) inline function instead of open coding this in
several places. Of course such dissimilar handling in the various
places p2m_is_changeable() gets used right now will additionally
require good reasoning - after all that predicate exists to express
the similarities between different code paths.

>> > +    while ( gfn <= end )
>> > +    {
>> > +        get_gfn_query_unlocked(d, gfn, &t);
>> > +
>> > +        if ( t == ot )
>> > +            p2m_change_type_one(d, gfn, t, nt);
>> > +
>> > +        gfn++;
>> > +    }
>> > +
>> > +    p2m_unlock(p2m);
>> > +}
>>
>> And then I wonder why p2m_change_type_range() can't be used
>> instead of adding this new function.
>>
> 
> Well, like p2m_change_entry_type_global() , p2m_change_type_range() also
> does the change asynchronously. And here this routine is introduced
> to synchronously reset the p2m type.

Ah, of course. Silly me.

Jan
Yu Zhang Sept. 9, 2016, 9:24 a.m. UTC | #4
On 9/9/2016 4:20 PM, Jan Beulich wrote:
>>>> On 09.09.16 at 09:24, <yu.c.zhang@linux.intel.com> wrote:
>> On 9/9/2016 1:26 PM, Yu Zhang wrote:
>>>>>> On 02.09.16 at 12:47, <yu.c.zhang@xxxxxxxxxxxxxxx> wrote:
>>>> @@ -965,7 +968,8 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m,
>>>>       if ( is_epte_valid(ept_entry) )
>>>>       {
>>>>           if ( (recalc || ept_entry->recalc) &&
>>>> -             p2m_is_changeable(ept_entry->sa_p2mt) )
>>>> +             p2m_is_changeable(ept_entry->sa_p2mt) &&
>>>> +             (ept_entry->sa_p2mt != p2m_ioreq_server) )
>>>>               *t = p2m_is_logdirty_range(p2m, gfn, gfn) ?
>>> p2m_ram_logdirty
>>>>                                                         : p2m_ram_rw;
>>>>           else
>>> Considering this (and at least one more similar adjustment further
>>> down), is it really appropriate to include p2m_ioreq_server in the
>>> set of "changeable" types?
>> Well, I agree p2m_ioreq_server do have different behaviors than the
>> p2m_log_dirty, but removing p2m_ioreq_server from changeable type
>> would need more specific code for the p2m_ioreq_server in routines like
>> resolve_misconfig(), do_recalc() and p2m_change_entry_type_global() etc.
>> I've tried this approach and abandoned later. :)
> I can see that the other option might require more adjustments, in
> which case I guess this variant would be fine if you created another
> helper (well named) inline function instead of open coding this in
> several places. Of course such dissimilar handling in the various
> places p2m_is_changeable() gets used right now will additionally
> require good reasoning - after all that predicate exists to express
> the similarities between different code paths.

Thanks Jan.
Well, for the logic of p2m type recalculation, similarities between 
p2m_ioreq_server
and other changeable types exceeds their differences. As to the special 
cases, how
about we use a macro, i.e. p2m_is_ioreq?


>
> Jan
>
>

Yu
Jan Beulich Sept. 9, 2016, 9:44 a.m. UTC | #5
>>> On 09.09.16 at 11:24, <yu.c.zhang@linux.intel.com> wrote:

> 
> On 9/9/2016 4:20 PM, Jan Beulich wrote:
>>>>> On 09.09.16 at 09:24, <yu.c.zhang@linux.intel.com> wrote:
>>> On 9/9/2016 1:26 PM, Yu Zhang wrote:
>>>>>>> On 02.09.16 at 12:47, <yu.c.zhang@xxxxxxxxxxxxxxx> wrote:
>>>>> @@ -965,7 +968,8 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m,
>>>>>       if ( is_epte_valid(ept_entry) )
>>>>>       {
>>>>>           if ( (recalc || ept_entry->recalc) &&
>>>>> -             p2m_is_changeable(ept_entry->sa_p2mt) )
>>>>> +             p2m_is_changeable(ept_entry->sa_p2mt) &&
>>>>> +             (ept_entry->sa_p2mt != p2m_ioreq_server) )
>>>>>               *t = p2m_is_logdirty_range(p2m, gfn, gfn) ?
>>>> p2m_ram_logdirty
>>>>>                                                         : p2m_ram_rw;
>>>>>           else
>>>> Considering this (and at least one more similar adjustment further
>>>> down), is it really appropriate to include p2m_ioreq_server in the
>>>> set of "changeable" types?
>>> Well, I agree p2m_ioreq_server do have different behaviors than the
>>> p2m_log_dirty, but removing p2m_ioreq_server from changeable type
>>> would need more specific code for the p2m_ioreq_server in routines like
>>> resolve_misconfig(), do_recalc() and p2m_change_entry_type_global() etc.
>>> I've tried this approach and abandoned later. :)
>> I can see that the other option might require more adjustments, in
>> which case I guess this variant would be fine if you created another
>> helper (well named) inline function instead of open coding this in
>> several places. Of course such dissimilar handling in the various
>> places p2m_is_changeable() gets used right now will additionally
>> require good reasoning - after all that predicate exists to express
>> the similarities between different code paths.
> 
> Thanks Jan.
> Well, for the logic of p2m type recalculation, similarities between 
> p2m_ioreq_server
> and other changeable types exceeds their differences. As to the special 
> cases, how
> about we use a macro, i.e. p2m_is_ioreq?

That'd be better than the open coded check, but would still result
in (taking the above example)

             p2m_is_changeable(ept_entry->sa_p2mt) &&
             !p2m_is_ioreq(ept_entry->sa_p2mt) )

? What I'd prefer is a predicate that can be applied here on its own,
without involving && or ||.

Jan
Yu Zhang Sept. 9, 2016, 9:56 a.m. UTC | #6
On 9/9/2016 5:44 PM, Jan Beulich wrote:
>>>> On 09.09.16 at 11:24, <yu.c.zhang@linux.intel.com> wrote:
>> On 9/9/2016 4:20 PM, Jan Beulich wrote:
>>>>>> On 09.09.16 at 09:24, <yu.c.zhang@linux.intel.com> wrote:
>>>> On 9/9/2016 1:26 PM, Yu Zhang wrote:
>>>>>>>> On 02.09.16 at 12:47, <yu.c.zhang@xxxxxxxxxxxxxxx> wrote:
>>>>>> @@ -965,7 +968,8 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m,
>>>>>>        if ( is_epte_valid(ept_entry) )
>>>>>>        {
>>>>>>            if ( (recalc || ept_entry->recalc) &&
>>>>>> -             p2m_is_changeable(ept_entry->sa_p2mt) )
>>>>>> +             p2m_is_changeable(ept_entry->sa_p2mt) &&
>>>>>> +             (ept_entry->sa_p2mt != p2m_ioreq_server) )
>>>>>>                *t = p2m_is_logdirty_range(p2m, gfn, gfn) ?
>>>>> p2m_ram_logdirty
>>>>>>                                                          : p2m_ram_rw;
>>>>>>            else
>>>>> Considering this (and at least one more similar adjustment further
>>>>> down), is it really appropriate to include p2m_ioreq_server in the
>>>>> set of "changeable" types?
>>>> Well, I agree p2m_ioreq_server do have different behaviors than the
>>>> p2m_log_dirty, but removing p2m_ioreq_server from changeable type
>>>> would need more specific code for the p2m_ioreq_server in routines like
>>>> resolve_misconfig(), do_recalc() and p2m_change_entry_type_global() etc.
>>>> I've tried this approach and abandoned later. :)
>>> I can see that the other option might require more adjustments, in
>>> which case I guess this variant would be fine if you created another
>>> helper (well named) inline function instead of open coding this in
>>> several places. Of course such dissimilar handling in the various
>>> places p2m_is_changeable() gets used right now will additionally
>>> require good reasoning - after all that predicate exists to express
>>> the similarities between different code paths.
>> Thanks Jan.
>> Well, for the logic of p2m type recalculation, similarities between
>> p2m_ioreq_server
>> and other changeable types exceeds their differences. As to the special
>> cases, how
>> about we use a macro, i.e. p2m_is_ioreq?
> That'd be better than the open coded check, but would still result
> in (taking the above example)
>
>               p2m_is_changeable(ept_entry->sa_p2mt) &&
>               !p2m_is_ioreq(ept_entry->sa_p2mt) )
>
> ? What I'd prefer is a predicate that can be applied here on its own,
> without involving && or ||.
>

OK. I can think of 2 scenarios that p2m_ioreq_server needs special handling:

1> In ept_get_entry()/recal_type(), the p2m types are supposed to return 
as it is, instead of
changing to p2m_log_dirty. So we can use a macro or a inline function 
like p2m_check_changeable(),
which combines the

              p2m_is_changeable(ept_entry->sa_p2mt) &&
              !p2m_is_ioreq(ept_entry->sa_p2mt) )

together.

2> In resolve_misconfig()/do_recalc(), the entry_count gets decremented. We
do not need this new inline function, because they are in a separate 
if() statement.

Is this OK? :)

Thanks
Yu
Yu Zhang Sept. 9, 2016, 10:01 a.m. UTC | #7
On 9/9/2016 6:09 PM, Jan Beulich wrote:
>>>> On 09.09.16 at 11:56, <yu.c.zhang@linux.intel.com> wrote:
>> On 9/9/2016 5:44 PM, Jan Beulich wrote:
>>>>>> On 09.09.16 at 11:24, <yu.c.zhang@linux.intel.com> wrote:
>>>> On 9/9/2016 4:20 PM, Jan Beulich wrote:
>>>>>>>> On 09.09.16 at 09:24, <yu.c.zhang@linux.intel.com> wrote:
>>>>>> On 9/9/2016 1:26 PM, Yu Zhang wrote:
>>>>>>>>>> On 02.09.16 at 12:47, <yu.c.zhang@xxxxxxxxxxxxxxx> wrote:
>>>>>>>> @@ -965,7 +968,8 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m,
>>>>>>>>         if ( is_epte_valid(ept_entry) )
>>>>>>>>         {
>>>>>>>>             if ( (recalc || ept_entry->recalc) &&
>>>>>>>> -             p2m_is_changeable(ept_entry->sa_p2mt) )
>>>>>>>> +             p2m_is_changeable(ept_entry->sa_p2mt) &&
>>>>>>>> +             (ept_entry->sa_p2mt != p2m_ioreq_server) )
>>>>>>>>                 *t = p2m_is_logdirty_range(p2m, gfn, gfn) ?
>>>>>>> p2m_ram_logdirty
>>>>>>>>                                                           : p2m_ram_rw;
>>>>>>>>             else
>>>>>>> Considering this (and at least one more similar adjustment further
>>>>>>> down), is it really appropriate to include p2m_ioreq_server in the
>>>>>>> set of "changeable" types?
>>>>>> Well, I agree p2m_ioreq_server do have different behaviors than the
>>>>>> p2m_log_dirty, but removing p2m_ioreq_server from changeable type
>>>>>> would need more specific code for the p2m_ioreq_server in routines like
>>>>>> resolve_misconfig(), do_recalc() and p2m_change_entry_type_global() etc.
>>>>>> I've tried this approach and abandoned later. :)
>>>>> I can see that the other option might require more adjustments, in
>>>>> which case I guess this variant would be fine if you created another
>>>>> helper (well named) inline function instead of open coding this in
>>>>> several places. Of course such dissimilar handling in the various
>>>>> places p2m_is_changeable() gets used right now will additionally
>>>>> require good reasoning - after all that predicate exists to express
>>>>> the similarities between different code paths.
>>>> Thanks Jan.
>>>> Well, for the logic of p2m type recalculation, similarities between
>>>> p2m_ioreq_server
>>>> and other changeable types exceeds their differences. As to the special
>>>> cases, how
>>>> about we use a macro, i.e. p2m_is_ioreq?
>>> That'd be better than the open coded check, but would still result
>>> in (taking the above example)
>>>
>>>                p2m_is_changeable(ept_entry->sa_p2mt) &&
>>>                !p2m_is_ioreq(ept_entry->sa_p2mt) )
>>>
>>> ? What I'd prefer is a predicate that can be applied here on its own,
>>> without involving && or ||.
>>>
>> OK. I can think of 2 scenarios that p2m_ioreq_server needs special handling:
>>
>> 1> In ept_get_entry()/recal_type(), the p2m types are supposed to return
>> as it is, instead of
>> changing to p2m_log_dirty. So we can use a macro or a inline function
>> like p2m_check_changeable(),
>> which combines the
>>
>>                p2m_is_changeable(ept_entry->sa_p2mt) &&
>>                !p2m_is_ioreq(ept_entry->sa_p2mt) )
>>
>> together.
>>
>> 2> In resolve_misconfig()/do_recalc(), the entry_count gets decremented. We
>> do not need this new inline function, because they are in a separate
>> if() statement.
>>
>> Is this OK? :)
> Sounds reasonable. But please give George and others a chance to
> voice their opinions before you go that route.
>
>

Sure, thanks!

Yu
Jan Beulich Sept. 9, 2016, 10:09 a.m. UTC | #8
>>> On 09.09.16 at 11:56, <yu.c.zhang@linux.intel.com> wrote:

> 
> On 9/9/2016 5:44 PM, Jan Beulich wrote:
>>>>> On 09.09.16 at 11:24, <yu.c.zhang@linux.intel.com> wrote:
>>> On 9/9/2016 4:20 PM, Jan Beulich wrote:
>>>>>>> On 09.09.16 at 09:24, <yu.c.zhang@linux.intel.com> wrote:
>>>>> On 9/9/2016 1:26 PM, Yu Zhang wrote:
>>>>>>>>> On 02.09.16 at 12:47, <yu.c.zhang@xxxxxxxxxxxxxxx> wrote:
>>>>>>> @@ -965,7 +968,8 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m,
>>>>>>>        if ( is_epte_valid(ept_entry) )
>>>>>>>        {
>>>>>>>            if ( (recalc || ept_entry->recalc) &&
>>>>>>> -             p2m_is_changeable(ept_entry->sa_p2mt) )
>>>>>>> +             p2m_is_changeable(ept_entry->sa_p2mt) &&
>>>>>>> +             (ept_entry->sa_p2mt != p2m_ioreq_server) )
>>>>>>>                *t = p2m_is_logdirty_range(p2m, gfn, gfn) ?
>>>>>> p2m_ram_logdirty
>>>>>>>                                                          : p2m_ram_rw;
>>>>>>>            else
>>>>>> Considering this (and at least one more similar adjustment further
>>>>>> down), is it really appropriate to include p2m_ioreq_server in the
>>>>>> set of "changeable" types?
>>>>> Well, I agree p2m_ioreq_server do have different behaviors than the
>>>>> p2m_log_dirty, but removing p2m_ioreq_server from changeable type
>>>>> would need more specific code for the p2m_ioreq_server in routines like
>>>>> resolve_misconfig(), do_recalc() and p2m_change_entry_type_global() etc.
>>>>> I've tried this approach and abandoned later. :)
>>>> I can see that the other option might require more adjustments, in
>>>> which case I guess this variant would be fine if you created another
>>>> helper (well named) inline function instead of open coding this in
>>>> several places. Of course such dissimilar handling in the various
>>>> places p2m_is_changeable() gets used right now will additionally
>>>> require good reasoning - after all that predicate exists to express
>>>> the similarities between different code paths.
>>> Thanks Jan.
>>> Well, for the logic of p2m type recalculation, similarities between
>>> p2m_ioreq_server
>>> and other changeable types exceeds their differences. As to the special
>>> cases, how
>>> about we use a macro, i.e. p2m_is_ioreq?
>> That'd be better than the open coded check, but would still result
>> in (taking the above example)
>>
>>               p2m_is_changeable(ept_entry->sa_p2mt) &&
>>               !p2m_is_ioreq(ept_entry->sa_p2mt) )
>>
>> ? What I'd prefer is a predicate that can be applied here on its own,
>> without involving && or ||.
>>
> 
> OK. I can think of 2 scenarios that p2m_ioreq_server needs special handling:
> 
> 1> In ept_get_entry()/recal_type(), the p2m types are supposed to return 
> as it is, instead of
> changing to p2m_log_dirty. So we can use a macro or a inline function 
> like p2m_check_changeable(),
> which combines the
> 
>               p2m_is_changeable(ept_entry->sa_p2mt) &&
>               !p2m_is_ioreq(ept_entry->sa_p2mt) )
> 
> together.
> 
> 2> In resolve_misconfig()/do_recalc(), the entry_count gets decremented. We
> do not need this new inline function, because they are in a separate 
> if() statement.
> 
> Is this OK? :)

Sounds reasonable. But please give George and others a chance to
voice their opinions before you go that route.

Jan
Yu Zhang Sept. 20, 2016, 2:57 a.m. UTC | #9
On 9/9/2016 6:09 PM, Jan Beulich wrote:
>>>> On 09.09.16 at 11:56, <yu.c.zhang@linux.intel.com> wrote:
>> On 9/9/2016 5:44 PM, Jan Beulich wrote:
>>>>>> On 09.09.16 at 11:24, <yu.c.zhang@linux.intel.com> wrote:
>>>> On 9/9/2016 4:20 PM, Jan Beulich wrote:
>>>>>>>> On 09.09.16 at 09:24, <yu.c.zhang@linux.intel.com> wrote:
>>>>>> On 9/9/2016 1:26 PM, Yu Zhang wrote:
>>>>>>>>>> On 02.09.16 at 12:47, <yu.c.zhang@xxxxxxxxxxxxxxx> wrote:
>>>>>>>> @@ -965,7 +968,8 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m,
>>>>>>>>         if ( is_epte_valid(ept_entry) )
>>>>>>>>         {
>>>>>>>>             if ( (recalc || ept_entry->recalc) &&
>>>>>>>> -             p2m_is_changeable(ept_entry->sa_p2mt) )
>>>>>>>> +             p2m_is_changeable(ept_entry->sa_p2mt) &&
>>>>>>>> +             (ept_entry->sa_p2mt != p2m_ioreq_server) )
>>>>>>>>                 *t = p2m_is_logdirty_range(p2m, gfn, gfn) ?
>>>>>>> p2m_ram_logdirty
>>>>>>>>                                                           : p2m_ram_rw;
>>>>>>>>             else
>>>>>>> Considering this (and at least one more similar adjustment further
>>>>>>> down), is it really appropriate to include p2m_ioreq_server in the
>>>>>>> set of "changeable" types?
>>>>>> Well, I agree p2m_ioreq_server do have different behaviors than the
>>>>>> p2m_log_dirty, but removing p2m_ioreq_server from changeable type
>>>>>> would need more specific code for the p2m_ioreq_server in routines like
>>>>>> resolve_misconfig(), do_recalc() and p2m_change_entry_type_global() etc.
>>>>>> I've tried this approach and abandoned later. :)
>>>>> I can see that the other option might require more adjustments, in
>>>>> which case I guess this variant would be fine if you created another
>>>>> helper (well named) inline function instead of open coding this in
>>>>> several places. Of course such dissimilar handling in the various
>>>>> places p2m_is_changeable() gets used right now will additionally
>>>>> require good reasoning - after all that predicate exists to express
>>>>> the similarities between different code paths.
>>>> Thanks Jan.
>>>> Well, for the logic of p2m type recalculation, similarities between
>>>> p2m_ioreq_server
>>>> and other changeable types exceeds their differences. As to the special
>>>> cases, how
>>>> about we use a macro, i.e. p2m_is_ioreq?
>>> That'd be better than the open coded check, but would still result
>>> in (taking the above example)
>>>
>>>                p2m_is_changeable(ept_entry->sa_p2mt) &&
>>>                !p2m_is_ioreq(ept_entry->sa_p2mt) )
>>>
>>> ? What I'd prefer is a predicate that can be applied here on its own,
>>> without involving && or ||.
>>>
>> OK. I can think of 2 scenarios that p2m_ioreq_server needs special handling:
>>
>> 1> In ept_get_entry()/recal_type(), the p2m types are supposed to return
>> as it is, instead of
>> changing to p2m_log_dirty. So we can use a macro or a inline function
>> like p2m_check_changeable(),
>> which combines the
>>
>>                p2m_is_changeable(ept_entry->sa_p2mt) &&
>>                !p2m_is_ioreq(ept_entry->sa_p2mt) )
>>
>> together.
>>
>> 2> In resolve_misconfig()/do_recalc(), the entry_count gets decremented. We
>> do not need this new inline function, because they are in a separate
>> if() statement.
>>
>> Is this OK? :)
> Sounds reasonable. But please give George and others a chance to
> voice their opinions before you go that route.
>
> Jan

Hi George,

   Any comments on this series? :)

Yu
George Dunlap Sept. 22, 2016, 6:06 p.m. UTC | #10
On Tue, Sep 20, 2016 at 3:57 AM, Yu Zhang <yu.c.zhang@linux.intel.com> wrote:
>>>>> Well, for the logic of p2m type recalculation, similarities between
>>>>> p2m_ioreq_server
>>>>> and other changeable types exceeds their differences. As to the special
>>>>> cases, how
>>>>> about we use a macro, i.e. p2m_is_ioreq?
>>>>
>>>> That'd be better than the open coded check, but would still result
>>>> in (taking the above example)
>>>>
>>>>                p2m_is_changeable(ept_entry->sa_p2mt) &&
>>>>                !p2m_is_ioreq(ept_entry->sa_p2mt) )
>>>>
>>>> ? What I'd prefer is a predicate that can be applied here on its own,
>>>> without involving && or ||.
>>>>
>>> OK. I can think of 2 scenarios that p2m_ioreq_server needs special
>>> handling:
>>>
>>> 1> In ept_get_entry()/recal_type(), the p2m types are supposed to return
>>> as it is, instead of
>>> changing to p2m_log_dirty. So we can use a macro or a inline function
>>> like p2m_check_changeable(),
>>> which combines the
>>>
>>>                p2m_is_changeable(ept_entry->sa_p2mt) &&
>>>                !p2m_is_ioreq(ept_entry->sa_p2mt) )
>>>
>>> together.
>>>
>>> 2> In resolve_misconfig()/do_recalc(), the entry_count gets decremented.
>>> We
>>> do not need this new inline function, because they are in a separate
>>> if() statement.
>>>
>>> Is this OK? :)
>>
>> Sounds reasonable. But please give George and others a chance to
>> voice their opinions before you go that route.
>>
>> Jan
>
>
> Hi George,
>
>   Any comments on this series? :)

Well regarding the question you and Jan have been discussing, of what
to call / how to do the checks for changeable-but-not-ioreq, I don't
really care very much. :-)

I'm sorry it's taking so long to look at this series -- the code
you're trying to modify is already a bit of a tangled mess, and I
think this patch has a ways to go before it's ready.  I do think this
series is important, so I'll be coming back to it first thing Monday.

Regarding the pausing added in this patch -- you listed two reasons in
the first patch for the domain pausing.  The first was detecting
read-modify-write and acting appropriately; I think that can be done
with the patch that I sent you.

The second was the deadlock due to grabbing locks in a different
order.  I'm afraid having such a thing lying around, even if you've
avoided it for now by pausing the domain, is another major trap that
we should try to avoid laying for future developers if we can at all
help it.  The normal thing to do here is to simply have a locking
discipline -- in this case, I don't think it would be too difficult to
work out a locking order that would avoid the deadlock in a more
robust way than pausing and unpausing the domain.

With those two handled, we shouldn't need to pause the domain any more.

Thanks for your work on this -- I'll get back to patch 4/4 next week.

Peace,
 -George
Yu Zhang Sept. 23, 2016, 1:31 a.m. UTC | #11
On 9/23/2016 2:06 AM, George Dunlap wrote:
> On Tue, Sep 20, 2016 at 3:57 AM, Yu Zhang <yu.c.zhang@linux.intel.com> wrote:
>>>>>> Well, for the logic of p2m type recalculation, similarities between
>>>>>> p2m_ioreq_server
>>>>>> and other changeable types exceeds their differences. As to the special
>>>>>> cases, how
>>>>>> about we use a macro, i.e. p2m_is_ioreq?
>>>>> That'd be better than the open coded check, but would still result
>>>>> in (taking the above example)
>>>>>
>>>>>                 p2m_is_changeable(ept_entry->sa_p2mt) &&
>>>>>                 !p2m_is_ioreq(ept_entry->sa_p2mt) )
>>>>>
>>>>> ? What I'd prefer is a predicate that can be applied here on its own,
>>>>> without involving && or ||.
>>>>>
>>>> OK. I can think of 2 scenarios that p2m_ioreq_server needs special
>>>> handling:
>>>>
>>>> 1> In ept_get_entry()/recal_type(), the p2m types are supposed to return
>>>> as it is, instead of
>>>> changing to p2m_log_dirty. So we can use a macro or a inline function
>>>> like p2m_check_changeable(),
>>>> which combines the
>>>>
>>>>                 p2m_is_changeable(ept_entry->sa_p2mt) &&
>>>>                 !p2m_is_ioreq(ept_entry->sa_p2mt) )
>>>>
>>>> together.
>>>>
>>>> 2> In resolve_misconfig()/do_recalc(), the entry_count gets decremented.
>>>> We
>>>> do not need this new inline function, because they are in a separate
>>>> if() statement.
>>>>
>>>> Is this OK? :)
>>> Sounds reasonable. But please give George and others a chance to
>>> voice their opinions before you go that route.
>>>
>>> Jan
>>
>> Hi George,
>>
>>    Any comments on this series? :)
> Well regarding the question you and Jan have been discussing, of what
> to call / how to do the checks for changeable-but-not-ioreq, I don't
> really care very much. :-)
>
> I'm sorry it's taking so long to look at this series -- the code
> you're trying to modify is already a bit of a tangled mess, and I
> think this patch has a ways to go before it's ready.  I do think this
> series is important, so I'll be coming back to it first thing Monday.
>
> Regarding the pausing added in this patch -- you listed two reasons in
> the first patch for the domain pausing.  The first was detecting
> read-modify-write and acting appropriately; I think that can be done
> with the patch that I sent you.
>
> The second was the deadlock due to grabbing locks in a different
> order.  I'm afraid having such a thing lying around, even if you've
> avoided it for now by pausing the domain, is another major trap that
> we should try to avoid laying for future developers if we can at all
> help it.  The normal thing to do here is to simply have a locking
> discipline -- in this case, I don't think it would be too difficult to
> work out a locking order that would avoid the deadlock in a more
> robust way than pausing and unpausing the domain.
>
> With those two handled, we shouldn't need to pause the domain any more.

Thank you, George. Hope we can find a more elegant approach. :-)
B.R.
Yu

> Thanks for your work on this -- I'll get back to patch 4/4 next week.
>
> Peace,
>   -George
>
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 9b419d2..200c661 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -5522,11 +5522,13 @@  static int hvmop_set_mem_type(
 }
 
 static int hvmop_map_mem_type_to_ioreq_server(
-    XEN_GUEST_HANDLE_PARAM(xen_hvm_map_mem_type_to_ioreq_server_t) uop)
+    XEN_GUEST_HANDLE_PARAM(xen_hvm_map_mem_type_to_ioreq_server_t) uop,
+    unsigned long *iter)
 {
     xen_hvm_map_mem_type_to_ioreq_server_t op;
     struct domain *d;
     int rc;
+    unsigned long gfn = *iter;
 
     if ( copy_from_guest(&op, uop, 1) )
         return -EFAULT;
@@ -5551,7 +5553,35 @@  static int hvmop_map_mem_type_to_ioreq_server(
     if ( rc != 0 )
         goto out;
 
-    rc = hvm_map_mem_type_to_ioreq_server(d, op.id, op.type, op.flags);
+    if ( gfn == 0 )
+        rc = hvm_map_mem_type_to_ioreq_server(d, op.id, op.type, op.flags);
+
+    /*
+     * Iterate p2m table when an ioreq server unmaps from p2m_ioreq_server,
+     * and reset the remaining p2m_ioreq_server entries back to p2m_ram_rw.
+     */
+    if ( op.flags == 0 && rc == 0 )
+    {
+        struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+        while ( read_atomic(&p2m->ioreq.entry_count) &&
+                gfn <= p2m->max_mapped_pfn )
+        {
+            unsigned long gfn_end = gfn + HVMOP_op_mask;
+
+            p2m_finish_type_change(d, gfn, gfn_end,
+                                   p2m_ioreq_server, p2m_ram_rw);
+
+            /* Check for continuation if it's not the last iteration. */
+            if ( ++gfn_end <= p2m->max_mapped_pfn &&
+                hypercall_preempt_check() )
+            {
+                rc = -ERESTART;
+                *iter = gfn_end;
+                goto out;
+            }
+        }
+    }
 
  out:
     rcu_unlock_domain(d);
@@ -5570,6 +5600,7 @@  long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg)
         break;
     case HVMOP_modified_memory:
     case HVMOP_set_mem_type:
+    case HVMOP_map_mem_type_to_ioreq_server:
         mask = HVMOP_op_mask;
         break;
     }
@@ -5599,7 +5630,8 @@  long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg)
 
     case HVMOP_map_mem_type_to_ioreq_server:
         rc = hvmop_map_mem_type_to_ioreq_server(
-            guest_handle_cast(arg, xen_hvm_map_mem_type_to_ioreq_server_t));
+            guest_handle_cast(arg, xen_hvm_map_mem_type_to_ioreq_server_t),
+            &start_iter);
         break;
 
     case HVMOP_set_ioreq_server_state:
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index 3218fa2..5df2d62 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -190,6 +190,15 @@  out:
  */
 static int hap_enable_log_dirty(struct domain *d, bool_t log_global)
 {
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+    /*
+    * Refuse to turn on global log-dirty mode if
+    * there's outstanding p2m_ioreq_server pages.
+    */
+    if ( log_global && read_atomic(&p2m->ioreq.entry_count) )
+        return -EBUSY;
+
     /* turn on PG_log_dirty bit in paging mode */
     paging_lock(d);
     d->arch.paging.mode |= PG_log_dirty;
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 700420a..6679ae7 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -545,6 +545,9 @@  static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
                     e.ipat = ipat;
                     if ( e.recalc && p2m_is_changeable(e.sa_p2mt) )
                     {
+                         if ( e.sa_p2mt == p2m_ioreq_server )
+                             p2m->ioreq.entry_count--;
+
                          e.sa_p2mt = p2m_is_logdirty_range(p2m, gfn + i, gfn + i)
                                      ? p2m_ram_logdirty : p2m_ram_rw;
                          ept_p2m_type_to_flags(p2m, &e, e.sa_p2mt, e.access);
@@ -965,7 +968,8 @@  static mfn_t ept_get_entry(struct p2m_domain *p2m,
     if ( is_epte_valid(ept_entry) )
     {
         if ( (recalc || ept_entry->recalc) &&
-             p2m_is_changeable(ept_entry->sa_p2mt) )
+             p2m_is_changeable(ept_entry->sa_p2mt) &&
+             (ept_entry->sa_p2mt != p2m_ioreq_server) )
             *t = p2m_is_logdirty_range(p2m, gfn, gfn) ? p2m_ram_logdirty
                                                       : p2m_ram_rw;
         else
diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c
index 46a56fa..7f31c0e 100644
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -439,11 +439,13 @@  static int do_recalc(struct p2m_domain *p2m, unsigned long gfn)
          needs_recalc(l1, *pent) )
     {
         l1_pgentry_t e = *pent;
+        p2m_type_t p2mt_old;
 
         if ( !valid_recalc(l1, e) )
             P2M_DEBUG("bogus recalc leaf at d%d:%lx:%u\n",
                       p2m->domain->domain_id, gfn, level);
-        if ( p2m_is_changeable(p2m_flags_to_type(l1e_get_flags(e))) )
+        p2mt_old = p2m_flags_to_type(l1e_get_flags(e));
+        if ( p2m_is_changeable(p2mt_old) )
         {
             unsigned long mask = ~0UL << (level * PAGETABLE_ORDER);
             p2m_type_t p2mt = p2m_is_logdirty_range(p2m, gfn & mask, gfn | ~mask)
@@ -463,6 +465,10 @@  static int do_recalc(struct p2m_domain *p2m, unsigned long gfn)
                      mfn &= ~(_PAGE_PSE_PAT >> PAGE_SHIFT);
                 flags |= _PAGE_PSE;
             }
+
+            if ( p2mt_old == p2m_ioreq_server )
+                p2m->ioreq.entry_count--;
+
             e = l1e_from_pfn(mfn, flags);
             p2m_add_iommu_flags(&e, level,
                                 (p2mt == p2m_ram_rw)
@@ -729,7 +735,7 @@  p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
 static inline p2m_type_t recalc_type(bool_t recalc, p2m_type_t t,
                                      struct p2m_domain *p2m, unsigned long gfn)
 {
-    if ( !recalc || !p2m_is_changeable(t) )
+    if ( !recalc || !p2m_is_changeable(t) || (t == p2m_ioreq_server) )
         return t;
     return p2m_is_logdirty_range(p2m, gfn, gfn) ? p2m_ram_logdirty
                                                 : p2m_ram_rw;
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 6e4cb1f..6581a70 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -313,6 +313,9 @@  int p2m_set_ioreq_server(struct domain *d,
 
         p2m->ioreq.server = NULL;
         p2m->ioreq.flags = 0;
+
+        if ( read_atomic(&p2m->ioreq.entry_count) )
+            p2m_change_entry_type_global(d, p2m_ioreq_server, p2m_ram_rw);
     }
     else
     {
@@ -957,6 +960,23 @@  int p2m_change_type_one(struct domain *d, unsigned long gfn,
                          p2m->default_access)
          : -EBUSY;
 
+    if ( !rc )
+    {
+        switch ( nt )
+        {
+        case p2m_ram_rw:
+            if ( ot == p2m_ioreq_server )
+                p2m->ioreq.entry_count--;
+            break;
+        case p2m_ioreq_server:
+            if ( ot == p2m_ram_rw )
+                p2m->ioreq.entry_count++;
+            break;
+        default:
+            break;
+        }
+    }
+
     gfn_unlock(p2m, gfn, 0);
 
     return rc;
@@ -1021,6 +1041,35 @@  void p2m_change_type_range(struct domain *d,
     p2m_unlock(p2m);
 }
 
+/* Synchronously modify the p2m type of a range of gfns from ot to nt. */
+void p2m_finish_type_change(struct domain *d,
+                           unsigned long start, unsigned long end,
+                           p2m_type_t ot, p2m_type_t nt)
+{
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    p2m_type_t t;
+    unsigned long gfn = start;
+
+    ASSERT(start <= end);
+    ASSERT(ot != nt);
+    ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt));
+
+    p2m_lock(p2m);
+
+    end = (end > p2m->max_mapped_pfn) ? p2m->max_mapped_pfn : end;
+    while ( gfn <= end )
+    {
+        get_gfn_query_unlocked(d, gfn, &t);
+
+        if ( t == ot )
+            p2m_change_type_one(d, gfn, t, nt);
+
+        gfn++;
+    }
+
+    p2m_unlock(p2m);
+}
+
 /*
  * Returns:
  *    0              for success
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 4924c4b..bf9adca 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -120,7 +120,8 @@  typedef unsigned int p2m_query_t;
 
 /* Types that can be subject to bulk transitions. */
 #define P2M_CHANGEABLE_TYPES (p2m_to_mask(p2m_ram_rw) \
-                              | p2m_to_mask(p2m_ram_logdirty) )
+                              | p2m_to_mask(p2m_ram_logdirty) \
+                              | p2m_to_mask(p2m_ioreq_server))
 
 #define P2M_POD_TYPES (p2m_to_mask(p2m_populate_on_demand))
 
@@ -349,6 +350,7 @@  struct p2m_domain {
          * are to be emulated by an ioreq server.
          */
         unsigned int flags;
+        unsigned int entry_count;
     } ioreq;
 };
 
@@ -604,6 +606,11 @@  void p2m_change_type_range(struct domain *d,
 int p2m_change_type_one(struct domain *d, unsigned long gfn,
                         p2m_type_t ot, p2m_type_t nt);
 
+/* Synchronously change types across a range of p2m entries (start ... end) */
+void p2m_finish_type_change(struct domain *d,
+                           unsigned long start, unsigned long end,
+                           p2m_type_t ot, p2m_type_t nt);
+
 /* Report a change affecting memory types. */
 void p2m_memory_type_changed(struct domain *d);