diff mbox

[-V3,4/4] powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation.

Message ID 1372743918-12293-4-git-send-email-aneesh.kumar@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Aneesh Kumar K.V July 2, 2013, 5:45 a.m. UTC
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Both RMA and hash page table request will be a multiple of 256K. We can use
a chunk size of 256K to track the free/used 256K chunk in the bitmap. This
should help to reduce the bitmap size.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  3 +++
 arch/powerpc/kvm/book3s_hv_cma.c    | 35 ++++++++++++++++++++++++-----------
 arch/powerpc/kvm/book3s_hv_cma.h    |  5 +++++
 3 files changed, 32 insertions(+), 11 deletions(-)

Comments

Marko Weber | ZBF July 2, 2013, 6:29 a.m. UTC | #1
hello,

my virtual windows machine freezes when i copy data to an smb share.
it was reproduceable for me several times.
in systemlog i find this:

Jul  2 08:22:00 databunka kernel: general protection fault: 0000 [#1] 
SMP
Jul  2 08:22:00 databunka kernel: Modules linked in: vhost_net macvtap 
macvlan ebtable_nat ebtables ipt_MASQUERADE iptable_nat nf_nat_ipv4 
nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack it87 
hwmon_vid ipt_REJECT xt_CHECKSUM iptable_mangle xt_tcpudp iptable_filter 
ip_tables x_tables bridge stp llc tun acpi_cpufreq mperf kvm_amd kvm 
ata_generic nvidia(PO) r8169 pata_acpi k10temp pata_atiixp mii i2c_piix4 
processor button
Jul  2 08:22:00 databunka kernel: CPU 2
Jul  2 08:22:00 databunka kernel: Pid: 3694, comm: vhost-3693 Tainted: P 
           O 3.9.8databunka_3.9.8 #1 Gigabyte Technology Co., Ltd. 
GA-MA785GT-UD3H/GA-MA785GT-UD3H
Jul  2 08:22:00 databunka kernel: RIP: 0010:[<ffffffff81090ba4>]  
[<ffffffff81090ba4>] put_page+0x9/0x2e
Jul  2 08:22:00 databunka kernel: RSP: 0018:ffff88037cc37bf8  EFLAGS: 
00010212
Jul  2 08:22:00 databunka kernel: RAX: ffff88038dca0cc0 RBX: 
0003f00fee030006 RCX: ffff8803f91ec01c
Jul  2 08:22:00 databunka kernel: RDX: 0000000000000140 RSI: 
0000000000000246 RDI: 0003f00fee030006
Jul  2 08:22:00 databunka kernel: RBP: ffff88037cc37c08 R08: 
ffff880389344518 R09: 0000000000001000
Jul  2 08:22:00 databunka kernel: R10: ffff88038dfe27f8 R11: 
0000008000000000 R12: 0000000000000012
Jul  2 08:22:00 databunka kernel: R13: 000000000000000c R14: 
ffff8803eb4edb80 R15: ffff880389340001
Jul  2 08:22:00 databunka kernel: FS:  00007f0edef09700(0000) 
GS:ffff8803ffd00000(0000) knlGS:0000000000000000
Jul  2 08:22:00 databunka kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 
000000008005003b
Jul  2 08:22:00 databunka kernel: CR2: 000007fefeeb7081 CR3: 
000000038dfe2000 CR4: 00000000000007a0
Jul  2 08:22:00 databunka kernel: DR0: 0000000000000000 DR1: 
0000000000000000 DR2: 0000000000000000
Jul  2 08:22:00 databunka kernel: DR3: 0000000000000000 DR6: 
00000000ffff0ff0 DR7: 0000000000000400
Jul  2 08:22:00 databunka kernel: Process vhost-3693 (pid: 3694, 
threadinfo ffff88037cc36000, task ffff88038a40a340)
Jul  2 08:22:00 databunka kernel: Stack:
Jul  2 08:22:00 databunka kernel: ffff88038dca0cc0 ffff8803eb4edb80 
ffff88037cc37c28 ffffffff8144377b
Jul  2 08:22:00 databunka kernel: ffff8803eb4edb80 000000000000efbe 
ffff88037cc37c48 ffffffff8144380b
Jul  2 08:22:00 databunka kernel: 0000000000000000 ffff8803eb4edb80 
ffff88037cc37c68 ffffffff814438a4
Jul  2 08:22:00 databunka kernel: Call Trace:
Jul  2 08:22:00 databunka kernel: [<ffffffff8144377b>] 
skb_release_data+0x80/0xfa
Jul  2 08:22:00 databunka kernel: [<ffffffff8144380b>] 
__kfree_skb+0x16/0x7d
Jul  2 08:22:00 databunka kernel: [<ffffffff814438a4>] 
kfree_skb+0x32/0x36
Jul  2 08:22:00 databunka kernel: [<ffffffffa000e83c>] 
tun_get_user+0x277/0x622 [tun]
Jul  2 08:22:00 databunka kernel: [<ffffffff8101f0dc>] ? 
default_spin_lock_flags+0x9/0xd
Jul  2 08:22:00 databunka kernel: [<ffffffffa000ec36>] 
tun_sendmsg+0x4f/0x70 [tun]
Jul  2 08:22:00 databunka kernel: [<ffffffff814cccc1>] ? 
_cond_resched+0x9/0x1d
Jul  2 08:22:00 databunka kernel: [<ffffffffa0989940>] 
handle_tx+0x3a4/0x4ae [vhost_net]
Jul  2 08:22:00 databunka kernel: [<ffffffffa0989a6c>] 
handle_tx_kick+0x10/0x12 [vhost_net]
Jul  2 08:22:00 databunka kernel: [<ffffffffa0987751>] 
vhost_worker+0xf6/0x15b [vhost_net]
Jul  2 08:22:00 databunka kernel: [<ffffffff814cd9a9>] ? 
_raw_spin_unlock_irqrestore+0x15/0x18
Jul  2 08:22:00 databunka kernel: [<ffffffffa098765b>] ? 
kref_sub.constprop.12+0x1d/0x1d [vhost_net]
Jul  2 08:22:00 databunka kernel: [<ffffffff81041605>] kthread+0x88/0x90
Jul  2 08:22:00 databunka kernel: [<ffffffff81040000>] ? 
parse_args+0x206/0x256
Jul  2 08:22:00 databunka kernel: [<ffffffff8104157d>] ? 
__kthread_parkme+0x60/0x60
Jul  2 08:22:00 databunka kernel: [<ffffffff814ce4fc>] 
ret_from_fork+0x7c/0xb0
Jul  2 08:22:00 databunka kernel: [<ffffffff8104157d>] ? 
__kthread_parkme+0x60/0x60
Jul  2 08:22:00 databunka kernel: Code: 83 ec 10 48 8d 55 fc c7 45 fc 00 
00 00 00 e8 07 ff ff ff 48 63 45 fc 65 48 01 04 25 d0 da 00 00 c9 c3 55 
48 89 e5 53 48 89 fb 50 <48> f7 03 00 c0 00 00 74 07 e8 46 fc ff ff eb 
11 e8 ca f7 ff ff
Jul  2 08:22:00 databunka kernel: RSP <ffff88037cc37bf8>
Jul  2 08:22:00 databunka kernel: ---[ end trace 38c4bb5d1100b013 ]---


dunoo if am right with posting this. do i have to post this to libvirt 
chan?

thanks

,marko
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Mackerras July 3, 2013, 6:16 a.m. UTC | #2
On Tue, Jul 02, 2013 at 11:15:18AM +0530, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> Both RMA and hash page table request will be a multiple of 256K. We can use
> a chunk size of 256K to track the free/used 256K chunk in the bitmap. This
> should help to reduce the bitmap size.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Acked-by: Paul Mackerras <paulus@samba.org>

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 354f4bb..7eb5dda 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,6 +37,8 @@ 
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 
+#include "book3s_hv_cma.h"
+
 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
 #define MAX_LPID_970	63
 
@@ -71,6 +73,7 @@  long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 
 	/* Next try to allocate from the preallocated pool */
 	if (!hpt) {
+		VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
 		page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
 		if (page) {
 			hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
index e04b269..d9d3d85 100644
--- a/arch/powerpc/kvm/book3s_hv_cma.c
+++ b/arch/powerpc/kvm/book3s_hv_cma.c
@@ -24,6 +24,8 @@ 
 #include <linux/sizes.h>
 #include <linux/slab.h>
 
+#include "book3s_hv_cma.h"
+
 struct kvm_cma {
 	unsigned long	base_pfn;
 	unsigned long	count;
@@ -96,6 +98,7 @@  struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
 	int ret;
 	struct page *page = NULL;
 	struct kvm_cma *cma = &kvm_cma_area;
+	unsigned long chunk_count, nr_chunk;
 	unsigned long mask, pfn, pageno, start = 0;
 
 
@@ -107,21 +110,27 @@  struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
 
 	if (!nr_pages)
 		return NULL;
-
+	/*
+	 * align mask with chunk size. The bit tracks pages in chunk size
+	 */
 	VM_BUG_ON(!is_power_of_2(align_pages));
-	mask = align_pages - 1;
+	mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
+	BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
+
+	chunk_count = cma->count >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+	nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
 	mutex_lock(&kvm_cma_mutex);
 	for (;;) {
-		pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
-						    start, nr_pages, mask);
-		if (pageno >= cma->count)
+		pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
+						    start, nr_chunk, mask);
+		if (pageno >= chunk_count)
 			break;
 
-		pfn = cma->base_pfn + pageno;
+		pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
 		ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
 		if (ret == 0) {
-			bitmap_set(cma->bitmap, pageno, nr_pages);
+			bitmap_set(cma->bitmap, pageno, nr_chunk);
 			page = pfn_to_page(pfn);
 			memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
 			break;
@@ -150,9 +159,9 @@  struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
 bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
 {
 	unsigned long pfn;
+	unsigned long nr_chunk;
 	struct kvm_cma *cma = &kvm_cma_area;
 
-
 	if (!cma || !pages)
 		return false;
 
@@ -164,9 +173,12 @@  bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
 		return false;
 
 	VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
+	nr_chunk = nr_pages >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
 	mutex_lock(&kvm_cma_mutex);
-	bitmap_clear(cma->bitmap, pfn - cma->base_pfn, nr_pages);
+	bitmap_clear(cma->bitmap,
+		     (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
+		     nr_chunk);
 	free_contig_range(pfn, nr_pages);
 	mutex_unlock(&kvm_cma_mutex);
 
@@ -204,13 +216,14 @@  static int __init kvm_cma_activate_area(unsigned long base_pfn,
 static int __init kvm_cma_init_reserved_areas(void)
 {
 	int bitmap_size, ret;
+	unsigned long chunk_count;
 	struct kvm_cma *cma = &kvm_cma_area;
 
 	pr_debug("%s()\n", __func__);
 	if (!cma->count)
 		return 0;
-
-	bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
+	chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+	bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
 	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
 	if (!cma->bitmap)
 		return -ENOMEM;
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
index 788bc3b..655144f 100644
--- a/arch/powerpc/kvm/book3s_hv_cma.h
+++ b/arch/powerpc/kvm/book3s_hv_cma.h
@@ -14,6 +14,11 @@ 
 
 #ifndef __POWERPC_KVM_CMA_ALLOC_H__
 #define __POWERPC_KVM_CMA_ALLOC_H__
+/*
+ * Both RMA and Hash page allocation will be multiple of 256K.
+ */
+#define KVM_CMA_CHUNK_ORDER	18
+
 extern struct page *kvm_alloc_cma(unsigned long nr_pages,
 				  unsigned long align_pages);
 extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);