From patchwork Mon Nov 23 14:06:00 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Gleb Natapov X-Patchwork-Id: 62174 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id nANEBoTK018662 for ; Mon, 23 Nov 2009 14:11:50 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754027AbZKWOJa (ORCPT ); Mon, 23 Nov 2009 09:09:30 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1753604AbZKWOJ1 (ORCPT ); Mon, 23 Nov 2009 09:09:27 -0500 Received: from mx1.redhat.com ([209.132.183.28]:19080 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753567AbZKWOJZ (ORCPT ); Mon, 23 Nov 2009 09:09:25 -0500 Received: from int-mx01.intmail.prod.int.phx2.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id nANE9BGV010738 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Mon, 23 Nov 2009 09:09:11 -0500 Received: from dhcp-1-237.tlv.redhat.com (dhcp-1-237.tlv.redhat.com [10.35.1.237]) by int-mx01.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id nANE7sPf022360; Mon, 23 Nov 2009 09:09:11 -0500 Received: by dhcp-1-237.tlv.redhat.com (Postfix, from userid 13519) id 1BC4C18D470; Mon, 23 Nov 2009 16:06:08 +0200 (IST) From: Gleb Natapov To: kvm@vger.kernel.org Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, avi@redhat.com, mingo@elte.hu, a.p.zijlstra@chello.nl, tglx@linutronix.de, hpa@zytor.com, riel@redhat.com Subject: [PATCH v2 05/12] Handle asynchronous page fault in a PV guest. Date: Mon, 23 Nov 2009 16:06:00 +0200 Message-Id: <1258985167-29178-6-git-send-email-gleb@redhat.com> In-Reply-To: <1258985167-29178-1-git-send-email-gleb@redhat.com> References: <1258985167-29178-1-git-send-email-gleb@redhat.com> X-Scanned-By: MIMEDefang 2.67 on 10.5.11.11 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index d7d7079..79bb7f2 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -49,6 +49,9 @@ struct kvm_mmu_op_release_pt { __u64 pt_phys; }; +#define KVM_PV_REASON_PAGE_NOT_PRESENT 1 +#define KVM_PV_REASON_PAGE_READY 2 + struct kvm_vcpu_pv_apf_data { __u32 reason; __u32 enabled; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index fdd0b95..09444c9 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include #include @@ -54,6 +56,130 @@ static void kvm_io_delay(void) { } +#define KVM_TASK_SLEEP_HASHBITS 8 +#define KVM_TASK_SLEEP_HASHSIZE (1<list) { + struct kvm_task_sleep_node *n = + hlist_entry(p, typeof(*n), link); + if (n->token == token) + return n; + } + + return NULL; +} + +static void apf_task_wait(struct task_struct *tsk, u32 token) +{ + u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); + struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; + struct kvm_task_sleep_node n, *e; + DEFINE_WAIT(wait); + + spin_lock(&b->lock); + e = _find_apf_task(b, token); + if (e) { + /* dummy entry exist -> wake up was delivered ahead of PF */ + hlist_del(&e->link); + kfree(e); + spin_unlock(&b->lock); + return; + } + + n.token = token; + init_waitqueue_head(&n.wq); + hlist_add_head(&n.link, &b->list); + spin_unlock(&b->lock); + + for (;;) { + prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE); + if (hlist_unhashed(&n.link)) + break; + schedule(); + } + finish_wait(&n.wq, &wait); + + return; +} + +static void apf_task_wake(u32 token) +{ + u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); + struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; + struct kvm_task_sleep_node *n; + +again: + spin_lock(&b->lock); + n = _find_apf_task(b, token); + if (!n) { + /* + * async PF was not yet handled. + * Add dummy entry for the token. + */ + n = kmalloc(sizeof(*n), GFP_ATOMIC); + if (!n) { + /* + * Allocation failed! Busy wait while other vcpu + * handles async PF. + */ + spin_unlock(&b->lock); + cpu_relax(); + goto again; + } + n->token = token; + hlist_add_head(&n->link, &b->list); + } else { + hlist_del_init(&n->link); + if (waitqueue_active(&n->wq)) + wake_up(&n->wq); + } + spin_unlock(&b->lock); + return; +} + +int kvm_handle_pf(struct pt_regs *regs, unsigned long error_code) +{ + u32 reason, token; + + if (!per_cpu(apf_reason, smp_processor_id()).enabled) + return 0; + + reason = per_cpu(apf_reason, smp_processor_id()).reason; + per_cpu(apf_reason, smp_processor_id()).reason = 0; + + token = (u32)read_cr2(); + + switch (reason) { + default: + return 0; + case KVM_PV_REASON_PAGE_NOT_PRESENT: + /* page is swapped out by the host. */ + apf_task_wait(current, token); + break; + case KVM_PV_REASON_PAGE_READY: + apf_task_wake(token); + break; + } + + return 1; +} + static void kvm_mmu_op(void *buffer, unsigned len) { int r; @@ -207,6 +333,9 @@ static void __init paravirt_ops_setup(void) if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) pv_cpu_ops.io_delay = kvm_io_delay; + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) + pv_cpu_ops.handle_pf = kvm_handle_pf; + if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) { pv_mmu_ops.set_pte = kvm_set_pte; pv_mmu_ops.set_pte_at = kvm_set_pte_at; @@ -270,11 +399,14 @@ static void __init kvm_smp_prepare_boot_cpu(void) void __init kvm_guest_init(void) { + int i; if (!kvm_para_available()) return; paravirt_ops_setup(); register_reboot_notifier(&kvm_pv_reboot_nb); + for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) + spin_lock_init(&async_pf_sleepers[i].lock); #ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; #else