diff mbox

[1/2] KVM: fix cache stale memslot info with correct mmio generation number

Message ID A07DA8BD-8756-427D-9925-B8696E47C73A@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong Aug. 18, 2014, 7:56 p.m. UTC
On Aug 19, 2014, at 2:47 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:

> 
>> I think this patch is auditable, page-fault is always called by holding
>> srcu-lock so that a page fault can’t go across synchronize_srcu_expedited.
>> Only these cases can happen:
>> 
>> 1)  page fault occurs before synchronize_srcu_expedited.
>>    In this case, vcpu will generate mmio-exit for the memslot being registered
>>    by the ioctl. That’s ok since the ioctl have not finished.
>> 
>> 2) page fault occurs after synchronize_srcu_expedited and during
>>   increasing generation-number.
>>   In this case, userspace may get wrong mmio-exit (that happen if handing
>>   page-fault is slower that the ioctl), that’s ok too since userspace need do
>>  the check anyway like i said above.
>> 
>> 3) page fault occurs after generation-number update
>>   that’s definitely correct. :)
>> 
>>> Another alternative could be to use the low bit to mark an in-progress
>>> change, and skip the caching if the low bit is set.  Similar to a
>>> seqcount (except if read_seqcount_retry fails, we just punt and not
>>> retry anything), you could use it even though the memory barriers
>>> provided by write_seqcount_begin/end are not too useful in this case.
>> 
>> I do not know how the bit works, page fault will cache the memslot before
>> the bit set and cache the generation-number after the bit set.
>> 
>> Maybe i missed your idea, could you please detail it?
> 
> Something like this:
> 
> -	update_memslots(slots, new, kvm->memslots->generation);
> +	/* ensure generation number is always increased. */
> +	slots->generation = old_memslots->generation + 1;
> +	update_memslots(slots, new);
> 	rcu_assign_pointer(kvm->memslots, slots);
> 	synchronize_srcu_expedited(&kvm->srcu);
> +	slots->generation++;
> 
> Then case 1 and 2 will just have a cache miss.

So, case 2 is what you concerned? :) I still think it is ok but i do not have
strong opinion on that. How about simplify it like this:


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

David Matlack Aug. 18, 2014, 9:15 p.m. UTC | #1
On Mon, Aug 18, 2014 at 12:56 PM, Xiao Guangrong
<xiaoguangrong.eric@gmail.com> wrote:
> @@ -287,9 +293,15 @@ static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
>
>  static bool check_mmio_spte(struct kvm *kvm, u64 spte)
>  {
> +       struct kvm_memslots *slots = kvm_memslots(kvm);
>         unsigned int kvm_gen, spte_gen;
>
> -       kvm_gen = kvm_current_mmio_generation(kvm);
> +       if (slots->updated)
> +               return false;
> +
> +       smp_rmb();
> +
> +       kvm_gen = __kvm_current_mmio_generation(slots);
>         spte_gen = get_mmio_spte_generation(spte);
>

What does this fix? Case 2 can still happen. (Case 2 is unavoidable unless we
block during memslot updates, which I don't think we should :).

>         trace_check_mmio_spte(spte, kvm_gen, spte_gen);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 4b6c01b..1d4e78f 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -96,7 +96,7 @@ static void hardware_disable_all(void);
>
>  static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
>  static void update_memslots(struct kvm_memslots *slots,
> -                           struct kvm_memory_slot *new, u64 last_generation);
> +                           struct kvm_memory_slot *new);
>
>  static void kvm_release_pfn_dirty(pfn_t pfn);
>  static void mark_page_dirty_in_slot(struct kvm *kvm,
> @@ -685,8 +685,7 @@ static void sort_memslots(struct kvm_memslots *slots)
>  }
>
>  static void update_memslots(struct kvm_memslots *slots,
> -                           struct kvm_memory_slot *new,
> -                           u64 last_generation)
> +                           struct kvm_memory_slot *new)
>  {
>         if (new) {
>                 int id = new->id;
> @@ -697,8 +696,6 @@ static void update_memslots(struct kvm_memslots *slots,
>                 if (new->npages != npages)
>                         sort_memslots(slots);
>         }
> -
> -       slots->generation = last_generation + 1;
>  }
>
>  static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
> @@ -720,10 +717,17 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
>  {
>         struct kvm_memslots *old_memslots = kvm->memslots;
>
> -       update_memslots(slots, new, kvm->memslots->generation);
> +       /* ensure generation number is always increased. */
> +       slots->updated = true;
> +       slots->generation = old_memslots->generation;
> +       update_memslots(slots, new);
>         rcu_assign_pointer(kvm->memslots, slots);
>         synchronize_srcu_expedited(&kvm->srcu);
>
> +       slots->generation++;
> +       smp_wmb();
> +       slots->updated = false;
> +
>         kvm_arch_memslots_updated(kvm);
>
>         return old_memslots;
>

This is effectively the same as the first approach.

I just realized how simple Paolo's idea is. I think it can be a one line
patch (without comments):

[...]
        update_memslots(slots, new, kvm->memslots->generation);
        rcu_assign_pointer(kvm->memslots, slots);
        synchronize_srcu_expedited(&kvm->srcu);
+       slots->generation++;

        kvm_arch_memslots_updated(kvm);
[...]
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Aug. 18, 2014, 9:24 p.m. UTC | #2
Il 18/08/2014 23:15, David Matlack ha scritto:
> I just realized how simple Paolo's idea is. I think it can be a one line
> patch (without comments):
> 
> [...]
>         update_memslots(slots, new, kvm->memslots->generation);
>         rcu_assign_pointer(kvm->memslots, slots);
>         synchronize_srcu_expedited(&kvm->srcu);
> +       slots->generation++;
> 
>         kvm_arch_memslots_updated(kvm);
> [...]

Yeah, you're right.  I think at this point it makes sense to put all
generation handling in install_new_memslots, but with proper comments
the above can do as well.

Would you like to send it?  Patch 2 still applies on top.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Matlack Aug. 18, 2014, 9:33 p.m. UTC | #3
On Mon, Aug 18, 2014 at 2:24 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 18/08/2014 23:15, David Matlack ha scritto:
>> I just realized how simple Paolo's idea is. I think it can be a one line
>> patch (without comments):
>>
>> [...]
>>         update_memslots(slots, new, kvm->memslots->generation);
>>         rcu_assign_pointer(kvm->memslots, slots);
>>         synchronize_srcu_expedited(&kvm->srcu);
>> +       slots->generation++;
>>
>>         kvm_arch_memslots_updated(kvm);
>> [...]
>
> Yeah, you're right.  I think at this point it makes sense to put all
> generation handling in install_new_memslots, but with proper comments
> the above can do as well.
>
> Would you like to send it?  Patch 2 still applies on top.

Sure, I like doing everything in install_new_memslots() as well so I'll fix
that.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Aug. 19, 2014, 3:50 a.m. UTC | #4
On 08/19/2014 05:15 AM, David Matlack wrote:
> On Mon, Aug 18, 2014 at 12:56 PM, Xiao Guangrong
> <xiaoguangrong.eric@gmail.com> wrote:
>> @@ -287,9 +293,15 @@ static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
>>
>>  static bool check_mmio_spte(struct kvm *kvm, u64 spte)
>>  {
>> +       struct kvm_memslots *slots = kvm_memslots(kvm);
>>         unsigned int kvm_gen, spte_gen;
>>
>> -       kvm_gen = kvm_current_mmio_generation(kvm);
>> +       if (slots->updated)
>> +               return false;
>> +
>> +       smp_rmb();
>> +
>> +       kvm_gen = __kvm_current_mmio_generation(slots);
>>         spte_gen = get_mmio_spte_generation(spte);
>>
> 
> What does this fix? Case 2 can still happen. (Case 2 is unavoidable unless we
> block during memslot updates, which I don't think we should :).

This exactly fixes case 2, slots->updated just acts as the "low bit"
but avoid generation number wrap-around and trick handling of the number.
More details please see below.

> 
>>         trace_check_mmio_spte(spte, kvm_gen, spte_gen);
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 4b6c01b..1d4e78f 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -96,7 +96,7 @@ static void hardware_disable_all(void);
>>
>>  static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
>>  static void update_memslots(struct kvm_memslots *slots,
>> -                           struct kvm_memory_slot *new, u64 last_generation);
>> +                           struct kvm_memory_slot *new);
>>
>>  static void kvm_release_pfn_dirty(pfn_t pfn);
>>  static void mark_page_dirty_in_slot(struct kvm *kvm,
>> @@ -685,8 +685,7 @@ static void sort_memslots(struct kvm_memslots *slots)
>>  }
>>
>>  static void update_memslots(struct kvm_memslots *slots,
>> -                           struct kvm_memory_slot *new,
>> -                           u64 last_generation)
>> +                           struct kvm_memory_slot *new)
>>  {
>>         if (new) {
>>                 int id = new->id;
>> @@ -697,8 +696,6 @@ static void update_memslots(struct kvm_memslots *slots,
>>                 if (new->npages != npages)
>>                         sort_memslots(slots);
>>         }
>> -
>> -       slots->generation = last_generation + 1;
>>  }
>>
>>  static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
>> @@ -720,10 +717,17 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
>>  {
>>         struct kvm_memslots *old_memslots = kvm->memslots;
>>
>> -       update_memslots(slots, new, kvm->memslots->generation);
>> +       /* ensure generation number is always increased. */
>> +       slots->updated = true;
>> +       slots->generation = old_memslots->generation;
>> +       update_memslots(slots, new);
>>         rcu_assign_pointer(kvm->memslots, slots);
>>         synchronize_srcu_expedited(&kvm->srcu);
>>
>> +       slots->generation++;
>> +       smp_wmb();
>> +       slots->updated = false;
>> +
>>         kvm_arch_memslots_updated(kvm);
>>
>>         return old_memslots;
>>
> 
> This is effectively the same as the first approach.
> 
> I just realized how simple Paolo's idea is. I think it can be a one line
> patch (without comments):
> 
> [...]
>         update_memslots(slots, new, kvm->memslots->generation);
>         rcu_assign_pointer(kvm->memslots, slots);
>         synchronize_srcu_expedited(&kvm->srcu);
> +       slots->generation++;
> 
>         kvm_arch_memslots_updated(kvm);
> [...]

Really? Unfortunately no. :)

See this scenario:

CPU 0                                  CPU 1
ioctl registering a new memslot which
contains GPA:
                           page-fault handler:
                             see it'a mmio access on GPA;

 assign the new memslots with generation number increased
                             cache the generation-number into spte;
                             fix the access and comeback to guest;
SRCU-sync
                             page-fault again and check the spte is a valid mmio-spte(*)
generation-number++;
return to userspace;
                             do mmio-emulation and inject mmio-exit;

!!! userspace receives a unexpected mmio-exit, that is case 2 i exactly
said in the last mail.


Note in the step *, my approach detects the invalid generation-number which
will invalidate the mmio spte properly .

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Matlack Aug. 19, 2014, 4:31 a.m. UTC | #5
On Mon, Aug 18, 2014 at 8:50 PM, Xiao Guangrong
<xiaoguangrong@linux.vnet.ibm.com> wrote:
> On 08/19/2014 05:15 AM, David Matlack wrote:
>> On Mon, Aug 18, 2014 at 12:56 PM, Xiao Guangrong
>> <xiaoguangrong.eric@gmail.com> wrote:
>>> @@ -287,9 +293,15 @@ static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
>>>
>>>  static bool check_mmio_spte(struct kvm *kvm, u64 spte)
>>>  {
>>> +       struct kvm_memslots *slots = kvm_memslots(kvm);
>>>         unsigned int kvm_gen, spte_gen;
>>>
>>> -       kvm_gen = kvm_current_mmio_generation(kvm);
>>> +       if (slots->updated)
>>> +               return false;
>>> +
>>> +       smp_rmb();
>>> +
>>> +       kvm_gen = __kvm_current_mmio_generation(slots);
>>>         spte_gen = get_mmio_spte_generation(spte);
>>>
>>
>> What does this fix? Case 2 can still happen. (Case 2 is unavoidable unless we
>> block during memslot updates, which I don't think we should :).
>
> This exactly fixes case 2, slots->updated just acts as the "low bit"
> but avoid generation number wrap-around and trick handling of the number.
> More details please see below.
>
>>
>>>         trace_check_mmio_spte(spte, kvm_gen, spte_gen);
>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>> index 4b6c01b..1d4e78f 100644
>>> --- a/virt/kvm/kvm_main.c
>>> +++ b/virt/kvm/kvm_main.c
>>> @@ -96,7 +96,7 @@ static void hardware_disable_all(void);
>>>
>>>  static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
>>>  static void update_memslots(struct kvm_memslots *slots,
>>> -                           struct kvm_memory_slot *new, u64 last_generation);
>>> +                           struct kvm_memory_slot *new);
>>>
>>>  static void kvm_release_pfn_dirty(pfn_t pfn);
>>>  static void mark_page_dirty_in_slot(struct kvm *kvm,
>>> @@ -685,8 +685,7 @@ static void sort_memslots(struct kvm_memslots *slots)
>>>  }
>>>
>>>  static void update_memslots(struct kvm_memslots *slots,
>>> -                           struct kvm_memory_slot *new,
>>> -                           u64 last_generation)
>>> +                           struct kvm_memory_slot *new)
>>>  {
>>>         if (new) {
>>>                 int id = new->id;
>>> @@ -697,8 +696,6 @@ static void update_memslots(struct kvm_memslots *slots,
>>>                 if (new->npages != npages)
>>>                         sort_memslots(slots);
>>>         }
>>> -
>>> -       slots->generation = last_generation + 1;
>>>  }
>>>
>>>  static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
>>> @@ -720,10 +717,17 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
>>>  {
>>>         struct kvm_memslots *old_memslots = kvm->memslots;
>>>
>>> -       update_memslots(slots, new, kvm->memslots->generation);
>>> +       /* ensure generation number is always increased. */
>>> +       slots->updated = true;
>>> +       slots->generation = old_memslots->generation;
>>> +       update_memslots(slots, new);
>>>         rcu_assign_pointer(kvm->memslots, slots);
>>>         synchronize_srcu_expedited(&kvm->srcu);
>>>
>>> +       slots->generation++;
>>> +       smp_wmb();
>>> +       slots->updated = false;
>>> +
>>>         kvm_arch_memslots_updated(kvm);
>>>
>>>         return old_memslots;
>>>
>>
>> This is effectively the same as the first approach.
>>
>> I just realized how simple Paolo's idea is. I think it can be a one line
>> patch (without comments):
>>
>> [...]
>>         update_memslots(slots, new, kvm->memslots->generation);
>>         rcu_assign_pointer(kvm->memslots, slots);
>>         synchronize_srcu_expedited(&kvm->srcu);
>> +       slots->generation++;
>>
>>         kvm_arch_memslots_updated(kvm);
>> [...]
>
> Really? Unfortunately no. :)
>
> See this scenario:
>
> CPU 0                                  CPU 1
> ioctl registering a new memslot which
> contains GPA:
>                            page-fault handler:
>                              see it'a mmio access on GPA;
>
>  assign the new memslots with generation number increased
>                              cache the generation-number into spte;
>                              fix the access and comeback to guest;
> SRCU-sync
>                              page-fault again and check the spte is a valid mmio-spte(*)
> generation-number++;
> return to userspace;
>                              do mmio-emulation and inject mmio-exit;
>
> !!! userspace receives a unexpected mmio-exit, that is case 2 i exactly
> said in the last mail.
>
>
> Note in the step *, my approach detects the invalid generation-number which
> will invalidate the mmio spte properly .

Sorry I didn't explain myself very well: Since we can get a single wrong
mmio exit no matter what, it has to be handled in userspace. So my point
was, it doesn't really help to fix that one very specific way that it can
happen, because it can just happen in other ways. (E.g. update memslots
occurs after is_noslot_pfn() and before mmio exit).

But it looks like you basically said the same thing earlier, so I think
we're on the same page.

The single line patch I suggested was only intended to fix the "forever
incorrectly exit mmio".
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Aug. 19, 2014, 4:41 a.m. UTC | #6
On 08/19/2014 12:31 PM, David Matlack wrote:
> On Mon, Aug 18, 2014 at 8:50 PM, Xiao Guangrong
> <xiaoguangrong@linux.vnet.ibm.com> wrote:
>> On 08/19/2014 05:15 AM, David Matlack wrote:
>>> On Mon, Aug 18, 2014 at 12:56 PM, Xiao Guangrong
>>> <xiaoguangrong.eric@gmail.com> wrote:
>>>> @@ -287,9 +293,15 @@ static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
>>>>
>>>>  static bool check_mmio_spte(struct kvm *kvm, u64 spte)
>>>>  {
>>>> +       struct kvm_memslots *slots = kvm_memslots(kvm);
>>>>         unsigned int kvm_gen, spte_gen;
>>>>
>>>> -       kvm_gen = kvm_current_mmio_generation(kvm);
>>>> +       if (slots->updated)
>>>> +               return false;
>>>> +
>>>> +       smp_rmb();
>>>> +
>>>> +       kvm_gen = __kvm_current_mmio_generation(slots);
>>>>         spte_gen = get_mmio_spte_generation(spte);
>>>>
>>>
>>> What does this fix? Case 2 can still happen. (Case 2 is unavoidable unless we
>>> block during memslot updates, which I don't think we should :).
>>
>> This exactly fixes case 2, slots->updated just acts as the "low bit"
>> but avoid generation number wrap-around and trick handling of the number.
>> More details please see below.
>>
>>>
>>>>         trace_check_mmio_spte(spte, kvm_gen, spte_gen);
>>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>>> index 4b6c01b..1d4e78f 100644
>>>> --- a/virt/kvm/kvm_main.c
>>>> +++ b/virt/kvm/kvm_main.c
>>>> @@ -96,7 +96,7 @@ static void hardware_disable_all(void);
>>>>
>>>>  static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
>>>>  static void update_memslots(struct kvm_memslots *slots,
>>>> -                           struct kvm_memory_slot *new, u64 last_generation);
>>>> +                           struct kvm_memory_slot *new);
>>>>
>>>>  static void kvm_release_pfn_dirty(pfn_t pfn);
>>>>  static void mark_page_dirty_in_slot(struct kvm *kvm,
>>>> @@ -685,8 +685,7 @@ static void sort_memslots(struct kvm_memslots *slots)
>>>>  }
>>>>
>>>>  static void update_memslots(struct kvm_memslots *slots,
>>>> -                           struct kvm_memory_slot *new,
>>>> -                           u64 last_generation)
>>>> +                           struct kvm_memory_slot *new)
>>>>  {
>>>>         if (new) {
>>>>                 int id = new->id;
>>>> @@ -697,8 +696,6 @@ static void update_memslots(struct kvm_memslots *slots,
>>>>                 if (new->npages != npages)
>>>>                         sort_memslots(slots);
>>>>         }
>>>> -
>>>> -       slots->generation = last_generation + 1;
>>>>  }
>>>>
>>>>  static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
>>>> @@ -720,10 +717,17 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
>>>>  {
>>>>         struct kvm_memslots *old_memslots = kvm->memslots;
>>>>
>>>> -       update_memslots(slots, new, kvm->memslots->generation);
>>>> +       /* ensure generation number is always increased. */
>>>> +       slots->updated = true;
>>>> +       slots->generation = old_memslots->generation;
>>>> +       update_memslots(slots, new);
>>>>         rcu_assign_pointer(kvm->memslots, slots);
>>>>         synchronize_srcu_expedited(&kvm->srcu);
>>>>
>>>> +       slots->generation++;
>>>> +       smp_wmb();
>>>> +       slots->updated = false;
>>>> +
>>>>         kvm_arch_memslots_updated(kvm);
>>>>
>>>>         return old_memslots;
>>>>
>>>
>>> This is effectively the same as the first approach.
>>>
>>> I just realized how simple Paolo's idea is. I think it can be a one line
>>> patch (without comments):
>>>
>>> [...]
>>>         update_memslots(slots, new, kvm->memslots->generation);
>>>         rcu_assign_pointer(kvm->memslots, slots);
>>>         synchronize_srcu_expedited(&kvm->srcu);
>>> +       slots->generation++;
>>>
>>>         kvm_arch_memslots_updated(kvm);
>>> [...]
>>
>> Really? Unfortunately no. :)
>>
>> See this scenario:
>>
>> CPU 0                                  CPU 1
>> ioctl registering a new memslot which
>> contains GPA:
>>                            page-fault handler:
>>                              see it'a mmio access on GPA;
>>
>>  assign the new memslots with generation number increased
>>                              cache the generation-number into spte;
>>                              fix the access and comeback to guest;
>> SRCU-sync
>>                              page-fault again and check the spte is a valid mmio-spte(*)
>> generation-number++;
>> return to userspace;
>>                              do mmio-emulation and inject mmio-exit;
>>
>> !!! userspace receives a unexpected mmio-exit, that is case 2 i exactly
>> said in the last mail.
>>
>>
>> Note in the step *, my approach detects the invalid generation-number which
>> will invalidate the mmio spte properly .
> 
> Sorry I didn't explain myself very well: Since we can get a single wrong
> mmio exit no matter what, it has to be handled in userspace. So my point
> was, it doesn't really help to fix that one very specific way that it can
> happen, because it can just happen in other ways. (E.g. update memslots
> occurs after is_noslot_pfn() and before mmio exit).
> 
> But it looks like you basically said the same thing earlier, so I think
> we're on the same page.
> 

Yes, that is what i try to explain in previous mails. :(

> The single line patch I suggested was only intended to fix the "forever
> incorrectly exit mmio".

My patch also fixes this case and that does not doubly increase the
number. I think this is the better one.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Matlack Aug. 19, 2014, 5 a.m. UTC | #7
On Mon, Aug 18, 2014 at 9:41 PM, Xiao Guangrong
<xiaoguangrong@linux.vnet.ibm.com> wrote:
> On 08/19/2014 12:31 PM, David Matlack wrote:
>> But it looks like you basically said the same thing earlier, so I think
>> we're on the same page.
>>
>
> Yes, that is what i try to explain in previous mails. :(

I'm glad we understand each other now! Sorry again for my confusion.

>> The single line patch I suggested was only intended to fix the "forever
>> incorrectly exit mmio".
>
> My patch also fixes this case and that does not doubly increase the
> number. I think this is the better one.

I prefer doubly increasing the generation for this reason: the updated boolean
requires extra code on the "client-side" to check if there's an update in
progress. And that makes it easy to get wrong. In fact, your patch
forgot to check the updated bit in mark_mmio_spte(). Doubly increasing the
generation requires no "client-side" code to work.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Aug. 19, 2014, 5:19 a.m. UTC | #8
On 08/19/2014 01:00 PM, David Matlack wrote:
> On Mon, Aug 18, 2014 at 9:41 PM, Xiao Guangrong
> <xiaoguangrong@linux.vnet.ibm.com> wrote:
>> On 08/19/2014 12:31 PM, David Matlack wrote:
>>> But it looks like you basically said the same thing earlier, so I think
>>> we're on the same page.
>>>
>>
>> Yes, that is what i try to explain in previous mails. :(
> 
> I'm glad we understand each other now! Sorry again for my confusion.

Yup, me too. :)

> 
>>> The single line patch I suggested was only intended to fix the "forever
>>> incorrectly exit mmio".
>>
>> My patch also fixes this case and that does not doubly increase the
>> number. I think this is the better one.
> 
> I prefer doubly increasing the generation for this reason: the updated boolean
> requires extra code on the "client-side" to check if there's an update in
> progress. And that makes it easy to get wrong. In fact, your patch
> forgot to check the updated bit in mark_mmio_spte(). Doubly increasing the
> generation requires no "client-side" code to work.

No, the updated patch is used to fix case 2 which i draw the scenario in
the last mail. I mean the original patch in this patchset which just
increase the number after srcu-sync.

Then could you tell me that your approach can do but my original patch can not?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Matlack Aug. 19, 2014, 5:40 a.m. UTC | #9
On Mon, Aug 18, 2014 at 10:19 PM, Xiao Guangrong
<xiaoguangrong@linux.vnet.ibm.com> wrote:
> On 08/19/2014 01:00 PM, David Matlack wrote:
>> On Mon, Aug 18, 2014 at 9:41 PM, Xiao Guangrong
>> <xiaoguangrong@linux.vnet.ibm.com> wrote:
>>> On 08/19/2014 12:31 PM, David Matlack wrote:
>>>> The single line patch I suggested was only intended to fix the "forever
>>>> incorrectly exit mmio".
>>>
>>> My patch also fixes this case and that does not doubly increase the
>>> number. I think this is the better one.
>>
>> I prefer doubly increasing the generation for this reason: the updated boolean
>> requires extra code on the "client-side" to check if there's an update in
>> progress. And that makes it easy to get wrong. In fact, your patch
>> forgot to check the updated bit in mark_mmio_spte(). Doubly increasing the
>> generation requires no "client-side" code to work.
>
> No, the updated patch is used to fix case 2 which i draw the scenario in
> the last mail. I mean the original patch in this patchset which just
> increase the number after srcu-sync.
>
> Then could you tell me that your approach can do but my original patch can not?

It avoids publishing new memslots with an old generation number attached to
them (even if it only lasts for a short period of time). Do you have a reason
why you don't want to doubly increase the generation?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Aug. 19, 2014, 5:55 a.m. UTC | #10
On 08/19/2014 01:40 PM, David Matlack wrote:
> On Mon, Aug 18, 2014 at 10:19 PM, Xiao Guangrong
> <xiaoguangrong@linux.vnet.ibm.com> wrote:
>> On 08/19/2014 01:00 PM, David Matlack wrote:
>>> On Mon, Aug 18, 2014 at 9:41 PM, Xiao Guangrong
>>> <xiaoguangrong@linux.vnet.ibm.com> wrote:
>>>> On 08/19/2014 12:31 PM, David Matlack wrote:
>>>>> The single line patch I suggested was only intended to fix the "forever
>>>>> incorrectly exit mmio".
>>>>
>>>> My patch also fixes this case and that does not doubly increase the
>>>> number. I think this is the better one.
>>>
>>> I prefer doubly increasing the generation for this reason: the updated boolean
>>> requires extra code on the "client-side" to check if there's an update in
>>> progress. And that makes it easy to get wrong. In fact, your patch
>>> forgot to check the updated bit in mark_mmio_spte(). Doubly increasing the
>>> generation requires no "client-side" code to work.
>>
>> No, the updated patch is used to fix case 2 which i draw the scenario in
>> the last mail. I mean the original patch in this patchset which just
>> increase the number after srcu-sync.
>>
>> Then could you tell me that your approach can do but my original patch can not?
> 
> It avoids publishing new memslots with an old generation number attached to
> them (even if it only lasts for a short period of time). 

I can not see the problem if that happen, could you please draw the scenario?

> Do you have a reason
> why you don't want to doubly increase the generation?

That more easily causes the number wrap-around.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9314678..9fabf6a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -234,16 +234,22 @@  static unsigned int get_mmio_spte_generation(u64 spte)
 	return gen;
 }
 
-static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
+static unsigned int __kvm_current_mmio_generation(struct kvm_memslots *slots)
 {
+
 	/*
 	 * Init kvm generation close to MMIO_MAX_GEN to easily test the
 	 * code of handling generation number wrap-around.
 	 */
-	return (kvm_memslots(kvm)->generation +
+	return (slots->generation +
 		      MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
 }
 
+static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
+{
+	return __kvm_current_mmio_generation(kvm_memslots(kvm));
+}
+
 static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
 			   unsigned access)
 {
@@ -287,9 +293,15 @@  static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
 
 static bool check_mmio_spte(struct kvm *kvm, u64 spte)
 {
+	struct kvm_memslots *slots = kvm_memslots(kvm);
 	unsigned int kvm_gen, spte_gen;
 
-	kvm_gen = kvm_current_mmio_generation(kvm);
+	if (slots->updated)
+		return false;
+
+	smp_rmb();
+	
+	kvm_gen = __kvm_current_mmio_generation(slots);
 	spte_gen = get_mmio_spte_generation(spte);
 
 	trace_check_mmio_spte(spte, kvm_gen, spte_gen);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4b6c01b..1d4e78f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -96,7 +96,7 @@  static void hardware_disable_all(void);
 
 static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
 static void update_memslots(struct kvm_memslots *slots,
-			    struct kvm_memory_slot *new, u64 last_generation);
+			    struct kvm_memory_slot *new);
 
 static void kvm_release_pfn_dirty(pfn_t pfn);
 static void mark_page_dirty_in_slot(struct kvm *kvm,
@@ -685,8 +685,7 @@  static void sort_memslots(struct kvm_memslots *slots)
 }
 
 static void update_memslots(struct kvm_memslots *slots,
-			    struct kvm_memory_slot *new,
-			    u64 last_generation)
+			    struct kvm_memory_slot *new)
 {
 	if (new) {
 		int id = new->id;
@@ -697,8 +696,6 @@  static void update_memslots(struct kvm_memslots *slots,
 		if (new->npages != npages)
 			sort_memslots(slots);
 	}
-
-	slots->generation = last_generation + 1;
 }
 
 static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
@@ -720,10 +717,17 @@  static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 {
 	struct kvm_memslots *old_memslots = kvm->memslots;
 
-	update_memslots(slots, new, kvm->memslots->generation);
+	/* ensure generation number is always increased. */
+	slots->updated = true;
+	slots->generation = old_memslots->generation;
+	update_memslots(slots, new);
 	rcu_assign_pointer(kvm->memslots, slots);
 	synchronize_srcu_expedited(&kvm->srcu);
 
+	slots->generation++;
+	smp_wmb();
+	slots->updated = false;
+
 	kvm_arch_memslots_updated(kvm);
 
 	return old_memslots;