diff mbox

KVM: Questions and comments on make_all_cpus_request

Message ID 1358297020-1548-1-git-send-email-cdall@cs.columbia.edu (mailing list archive)
State New, archived
Headers show

Commit Message

Christoffer Dall Jan. 16, 2013, 12:43 a.m. UTC
Hi KVM guys,

I've had a bit of challenges figuring out the exact functinality and
synchronization of vcpu->requests and friends.  In lack of a better
method, I wrote some comments as a patch.

I think this code really deserves some explaining, as it is really hard
to understand otherwise.  Unfortunately, I wasn't able to write down
concise and exact comments, but I hope someone else feels up to the
challenge.

Let me know if I just got this completely wrong and upside down.

Thanks,
Christoffer
---
 include/linux/kvm_host.h |    4 ++++
 virt/kvm/kvm_main.c      |   29 +++++++++++++++++++++++++++--
 2 files changed, 31 insertions(+), 2 deletions(-)

Comments

Takuya Yoshikawa Jan. 16, 2013, 1:20 p.m. UTC | #1
On Tue, 15 Jan 2013 19:43:40 -0500
Christoffer Dall <cdall@cs.columbia.edu> wrote:

> @@ -201,9 +222,13 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
>  {
>  	long dirty_count = kvm->tlbs_dirty;
>  
> -	smp_mb();
> +	smp_mb(); /* TODO: Someone should explain this! */
>  	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
>  		++kvm->stat.remote_tlb_flush;
> +	/*
> +	 * TODO: Someone should explain this, why is it a cmpxchg, what
> +	 * happens if the dirty is different from dirty_count?
> +	 */
>  	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
>  }

If it's different from the old value, that means another
attempt which forces us to care about TLBs has been done?

So I think making tlbs_dirty 0 is safe only when it's not
changed during executing this function.

	Takuya
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Jan. 16, 2013, 6:19 p.m. UTC | #2
On Tue, Jan 15, 2013 at 07:43:40PM -0500, Christoffer Dall wrote:
> Hi KVM guys,
> 
> I've had a bit of challenges figuring out the exact functinality and
> synchronization of vcpu->requests and friends.  In lack of a better
> method, I wrote some comments as a patch.
> 
> I think this code really deserves some explaining, as it is really hard
> to understand otherwise.  Unfortunately, I wasn't able to write down
> concise and exact comments, but I hope someone else feels up to the
> challenge.
> 
> Let me know if I just got this completely wrong and upside down.
> 
> Thanks,
> Christoffer
> ---
>  include/linux/kvm_host.h |    4 ++++
>  virt/kvm/kvm_main.c      |   29 +++++++++++++++++++++++++++--
>  2 files changed, 31 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index cbe0d68..25deef8 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -252,6 +252,10 @@ struct kvm_vcpu {
>  	struct kvm_vcpu_arch arch;
>  };
>  
> +/*
> + * XXX: Could we explain what we're trying to achieve? Is this an
> + * optimization as to not send multiple IPIs?
> + */
>  static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
>  {
>  	return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index e45c20c..ccc292d 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -165,7 +165,18 @@ static void ack_flush(void *_completed)
>  {
>  }
>  
> -static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
> +/**
> + * make_all_cpus_request - place request on vcpus
> + * @kvm: KVM Struct
> + * @req: Request to make on the VCPU
> + *
> + * This function places a request on a VCPU and ensures that the VCPU request
> + * is handled before returning from the function, if the VCPU is in guest
> + * mode (or exiting, or reading shadow page tables?).
> + *
> + * Returns true if at least of the vcpus were sent an IPI and responded to it,
> + */
> +static bool make_all_vcpus_request(struct kvm *kvm, unsigned int req)
>  {
>  	int i, cpu, me;
>  	cpumask_var_t cpus;
> @@ -179,9 +190,19 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
>  		kvm_make_request(req, vcpu);
>  		cpu = vcpu->cpu;
>  
> +		/*
> +		 * Is the following really true? Can we have an example of the
> +		 * race that would otherwise happen? Doesn't the make_request
> +		 * pair against the IPI and interrupt disabling, not the mode?
> +		 */
>  		/* Set ->requests bit before we read ->mode */
>  		smp_mb();

cpu0						cpu1
					
make_all_reqs
					
mode = vcpu->mode = OUTSIDE_GUEST_MODE
						vcpu->mode = IN_GUEST_MODE
						if (vcpu->requests)
set_bit(KVM_REQ_y, vcpu)
						
no kick, its outside guest mode

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Jan. 16, 2013, 7:26 p.m. UTC | #3
On Tue, Jan 15, 2013 at 07:43:40PM -0500, Christoffer Dall wrote:
> Hi KVM guys,
> 
> I've had a bit of challenges figuring out the exact functinality and
> synchronization of vcpu->requests and friends.  In lack of a better
> method, I wrote some comments as a patch.
> 
> I think this code really deserves some explaining, as it is really hard
> to understand otherwise.  Unfortunately, I wasn't able to write down
> concise and exact comments, but I hope someone else feels up to the
> challenge.
> 
> Let me know if I just got this completely wrong and upside down.
> 
> Thanks,
> Christoffer
> ---
>  include/linux/kvm_host.h |    4 ++++
>  virt/kvm/kvm_main.c      |   29 +++++++++++++++++++++++++++--
>  2 files changed, 31 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index cbe0d68..25deef8 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -252,6 +252,10 @@ struct kvm_vcpu {
>  	struct kvm_vcpu_arch arch;
>  };
>  
> +/*
> + * XXX: Could we explain what we're trying to achieve? Is this an
> + * optimization as to not send multiple IPIs?
> + */
Yes and no. This is to make parallel make_all_cpus_request() work without
locking. make_all_cpus_request() returns only after all vcpus receive the
IPI. It should never return before that. Now suppose we have only to
states IN_GUEST/OUTSIDE_GUEST. CPU A and B call make_all_cpus_request()
in parallel while vcpu C is in a guest mode (vcpu->mode == IN_GUEST).

A make_all_cpus_request()  | B make_all_cpus_request()    | C vcpu
                           |                              | C->mode = IN_GUEST
if (C->mode == IN_GUEST)   |                              |
   C->mode = OUTSIDE_GUEST |                              |
                           | if (C->mode == OUTSIDE_GUEST)|
                           |    return;                   |
   smp_call_function()     |                              |
                           |                              | vmexit
   return                  |                              |

As you can see B's  make_all_cpus_request() returns before vcpu C vmexit.
EXITING_GUEST_MODE fixes that since B will call smp_call_function()
after A will set C->mode to EXITING_GUEST_MODE. The alternative is to
call smp_call_function() no matter what the mode is but this is a lot of
useless IPIs.

>  static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
>  {
>  	return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index e45c20c..ccc292d 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -165,7 +165,18 @@ static void ack_flush(void *_completed)
>  {
>  }
>  
> -static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
> +/**
> + * make_all_cpus_request - place request on vcpus
> + * @kvm: KVM Struct
> + * @req: Request to make on the VCPU
> + *
> + * This function places a request on a VCPU and ensures that the VCPU request
> + * is handled before returning from the function, if the VCPU is in guest
> + * mode (or exiting, or reading shadow page tables?).
> + *
> + * Returns true if at least of the vcpus were sent an IPI and responded to it,
> + */
> +static bool make_all_vcpus_request(struct kvm *kvm, unsigned int req)
>  {
>  	int i, cpu, me;
>  	cpumask_var_t cpus;
> @@ -179,9 +190,19 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
>  		kvm_make_request(req, vcpu);
>  		cpu = vcpu->cpu;
>  
> +		/*
> +		 * Is the following really true? Can we have an example of the
> +		 * race that would otherwise happen? Doesn't the make_request
> +		 * pair against the IPI and interrupt disabling, not the mode?
> +		 */
>  		/* Set ->requests bit before we read ->mode */
>  		smp_mb();

A make_all_cpus_request()          |     B vcpu
                                   |  B->mode = OUTSIDE_GUEST_MODE
if (B->mode == OUTSIDE_GUEST_MODE) |
   skip smp_call_function          |
                                   |  B->mode = IN_GUEST_MODE
                                   |  if (B->requests)
                                   |     skip guest entry
kvm_make_request(B)                |
                                   | vmenter

As you can see above if we change order vcpu can enter guest mode
without serving requests.

>  
> +		/*
> +		 * Set the bit on the CPU mask for all CPUs which are somehow
> +		 * running a guest (IN_GUEST_MODE, EXITING_GUEST_MODE, and
> +		 * READING_SHADOW_PAGE_TABLES).
> +		 */
>  		if (cpus != NULL && cpu != -1 && cpu != me &&
>  		      kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE)
>  			cpumask_set_cpu(cpu, cpus);
> @@ -201,9 +222,13 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
>  {
>  	long dirty_count = kvm->tlbs_dirty;
>  
> -	smp_mb();
> +	smp_mb(); /* TODO: Someone should explain this! */
>  	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
>  		++kvm->stat.remote_tlb_flush;
> +	/*
> +	 * TODO: Someone should explain this, why is it a cmpxchg, what
> +	 * happens if the dirty is different from dirty_count?
> +	 */
>  	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
>  }
>  
> -- 
> 1.7.9.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cbe0d68..25deef8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -252,6 +252,10 @@  struct kvm_vcpu {
 	struct kvm_vcpu_arch arch;
 };
 
+/*
+ * XXX: Could we explain what we're trying to achieve? Is this an
+ * optimization as to not send multiple IPIs?
+ */
 static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
 {
 	return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e45c20c..ccc292d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -165,7 +165,18 @@  static void ack_flush(void *_completed)
 {
 }
 
-static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+/**
+ * make_all_cpus_request - place request on vcpus
+ * @kvm: KVM Struct
+ * @req: Request to make on the VCPU
+ *
+ * This function places a request on a VCPU and ensures that the VCPU request
+ * is handled before returning from the function, if the VCPU is in guest
+ * mode (or exiting, or reading shadow page tables?).
+ *
+ * Returns true if at least of the vcpus were sent an IPI and responded to it,
+ */
+static bool make_all_vcpus_request(struct kvm *kvm, unsigned int req)
 {
 	int i, cpu, me;
 	cpumask_var_t cpus;
@@ -179,9 +190,19 @@  static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 		kvm_make_request(req, vcpu);
 		cpu = vcpu->cpu;
 
+		/*
+		 * Is the following really true? Can we have an example of the
+		 * race that would otherwise happen? Doesn't the make_request
+		 * pair against the IPI and interrupt disabling, not the mode?
+		 */
 		/* Set ->requests bit before we read ->mode */
 		smp_mb();
 
+		/*
+		 * Set the bit on the CPU mask for all CPUs which are somehow
+		 * running a guest (IN_GUEST_MODE, EXITING_GUEST_MODE, and
+		 * READING_SHADOW_PAGE_TABLES).
+		 */
 		if (cpus != NULL && cpu != -1 && cpu != me &&
 		      kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE)
 			cpumask_set_cpu(cpu, cpus);
@@ -201,9 +222,13 @@  void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
 	long dirty_count = kvm->tlbs_dirty;
 
-	smp_mb();
+	smp_mb(); /* TODO: Someone should explain this! */
 	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
 		++kvm->stat.remote_tlb_flush;
+	/*
+	 * TODO: Someone should explain this, why is it a cmpxchg, what
+	 * happens if the dirty is different from dirty_count?
+	 */
 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }