From patchwork Wed May 27 16:22:04 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Avi Kivity X-Patchwork-Id: 26497 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n4RGMEde017675 for ; Wed, 27 May 2009 16:22:15 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1757814AbZE0QWI (ORCPT ); Wed, 27 May 2009 12:22:08 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1757022AbZE0QWH (ORCPT ); Wed, 27 May 2009 12:22:07 -0400 Received: from mx2.redhat.com ([66.187.237.31]:49212 "EHLO mx2.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756316AbZE0QWF (ORCPT ); Wed, 27 May 2009 12:22:05 -0400 Received: from int-mx2.corp.redhat.com (int-mx2.corp.redhat.com [172.16.27.26]) by mx2.redhat.com (8.13.8/8.13.8) with ESMTP id n4RGM7M1008076; Wed, 27 May 2009 12:22:07 -0400 Received: from ns3.rdu.redhat.com (ns3.rdu.redhat.com [10.11.255.199]) by int-mx2.corp.redhat.com (8.13.1/8.13.1) with ESMTP id n4RGM6Th002053; Wed, 27 May 2009 12:22:06 -0400 Received: from cleopatra.tlv.redhat.com (cleopatra.tlv.redhat.com [10.35.255.11]) by ns3.rdu.redhat.com (8.13.8/8.13.8) with ESMTP id n4RGM4b0010749; Wed, 27 May 2009 12:22:05 -0400 Received: from balrog.qumranet.com (dhcp-1-197.tlv.redhat.com [10.35.1.197]) by cleopatra.tlv.redhat.com (Postfix) with ESMTP id 869D3250ABF; Wed, 27 May 2009 19:22:04 +0300 (IDT) Message-ID: <4A1D68AC.3050300@redhat.com> Date: Wed, 27 May 2009 19:22:04 +0300 From: Avi Kivity User-Agent: Thunderbird 2.0.0.21 (X11/20090320) MIME-Version: 1.0 To: nicolas prochazka CC: kvm@vger.kernel.org Subject: Re: Remove qemu_alloc_physram() References: <2803e73b0905270308l70cf562jaca72bf67d1aafff@mail.gmail.com> <4A1D3711.2070603@redhat.com> <2803e73b0905270630t5579f141lcb1013ae9aa146a9@mail.gmail.com> <2803e73b0905270640n56d9bfa3x15c0ecabd0b4676f@mail.gmail.com> <4A1D4739.4070208@redhat.com> In-Reply-To: <4A1D4739.4070208@redhat.com> X-Scanned-By: MIMEDefang 2.58 on 172.16.27.26 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org Avi Kivity wrote: > nicolas prochazka wrote: >> without -mem-prealloc >> HugePages_Total: 2560 >> HugePages_Free: 2296 >> HugePages_Rsvd: 0 >> >> so after minimum test, i can say that's your patch seems to be correct >> this problem. >> > > It isn't correct, it doesn't generate the right alignment. > Better patch attached. diff --git a/exec.c b/exec.c index e241f05..c024b8b 100644 --- a/exec.c +++ b/exec.c @@ -2484,6 +2484,113 @@ static ram_addr_t kqemu_ram_alloc(ram_addr_t size) } #endif +#ifdef __linux__ + +#include + +#define HUGETLBFS_MAGIC 0x958458f6 + +static long gethugepagesize(const char *path) +{ + struct statfs fs; + int ret; + + do { + ret = statfs(path, &fs); + } while (ret != 0 && errno == EINTR); + + if (ret != 0) { + perror("statfs"); + return 0; + } + + if (fs.f_type != HUGETLBFS_MAGIC) + fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path); + + return fs.f_bsize; +} + +static void *file_ram_alloc(ram_addr_t memory, const char *path) +{ + char *filename; + void *area; + int fd; +#ifdef MAP_POPULATE + int flags; +#endif + unsigned long hpagesize; + extern int mem_prealloc; + + if (!path) { + return NULL; + } + + hpagesize = gethugepagesize(path); + if (!hpagesize) { + return NULL; + } + + if (memory < hpagesize) { + return NULL; + } + + if (kvm_enabled() && !kvm_has_sync_mmu()) { + fprintf(stderr, "host lacks mmu notifiers, disabling --mem-path\n"); + return NULL; + } + + if (asprintf(&filename, "%s/kvm.XXXXXX", path) == -1) { + return NULL; + } + + fd = mkstemp(filename); + if (fd < 0) { + perror("mkstemp"); + free(filename); + return NULL; + } + unlink(filename); + free(filename); + + memory = (memory+hpagesize-1) & ~(hpagesize-1); + + /* + * ftruncate is not supported by hugetlbfs in older + * hosts, so don't bother checking for errors. + * If anything goes wrong with it under other filesystems, + * mmap will fail. + */ + ftruncate(fd, memory); + +#ifdef MAP_POPULATE + /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case + * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED + * to sidestep this quirk. + */ + flags = mem_prealloc ? MAP_POPULATE|MAP_SHARED : MAP_PRIVATE; + area = mmap(0, memory, PROT_READ|PROT_WRITE, flags, fd, 0); +#else + area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); +#endif + if (area == MAP_FAILED) { + perror("alloc_mem_area: can't mmap hugetlbfs pages"); + close(fd); + return (NULL); + } + return area; +} + +#else + +static void *file_ram_alloc(ram_addr_t memory, const char *path) +{ + return NULL; +} + +#endif + +extern const char *mem_path; + ram_addr_t qemu_ram_alloc(ram_addr_t size) { RAMBlock *new_block; @@ -2497,7 +2604,10 @@ ram_addr_t qemu_ram_alloc(ram_addr_t size) size = TARGET_PAGE_ALIGN(size); new_block = qemu_malloc(sizeof(*new_block)); - new_block->host = qemu_vmalloc(size); + new_block->host = file_ram_alloc(size, mem_path); + if (!new_block->host) { + new_block->host = qemu_vmalloc(size); + } new_block->offset = last_ram_offset; new_block->length = size; diff --git a/hw/pc.c b/hw/pc.c index 9e99b7c..74754a3 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -895,18 +895,11 @@ static void pc_init1(ram_addr_t ram_size, vmport_init(); /* allocate RAM */ - ram_addr = qemu_ram_alloc(0xa0000); + ram_addr = qemu_ram_alloc(below_4g_mem_size); cpu_register_physical_memory(0, 0xa0000, ram_addr); - - /* Allocate, even though we won't register, so we don't break the - * phys_ram_base + PA assumption. This range includes vga (0xa0000 - 0xc0000), - * and some bios areas, which will be registered later - */ - ram_addr = qemu_ram_alloc(0x100000 - 0xa0000); - ram_addr = qemu_ram_alloc(below_4g_mem_size - 0x100000); cpu_register_physical_memory(0x100000, below_4g_mem_size - 0x100000, - ram_addr); + ram_addr + 0x100000); /* above 4giga memory allocation */ if (above_4g_mem_size > 0) { @@ -914,14 +907,6 @@ static void pc_init1(ram_addr_t ram_size, hw_error("To much RAM for 32-bit physical address"); #else ram_addr = qemu_ram_alloc(above_4g_mem_size); - if (hpagesize) { - if (ram_addr & (hpagesize-1)) { - unsigned long aligned_addr; - aligned_addr = (ram_addr + hpagesize - 1) & ~(hpagesize-1); - qemu_ram_alloc(aligned_addr - ram_addr); - ram_addr = aligned_addr; - } - } cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size, ram_addr); diff --git a/vl.c b/vl.c index db8265b..8aea7d6 100644 --- a/vl.c +++ b/vl.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #if defined(__NetBSD__) @@ -268,7 +267,6 @@ const char *mem_path = NULL; #ifdef MAP_POPULATE int mem_prealloc = 1; /* force preallocation of physical target memory */ #endif -long hpagesize = 0; #ifdef TARGET_ARM int old_param = 0; #endif @@ -4856,90 +4854,6 @@ int qemu_uuid_parse(const char *str, uint8_t *uuid) #define MAX_NET_CLIENTS 32 -#ifdef USE_KVM - -#define HUGETLBFS_MAGIC 0x958458f6 - -static long gethugepagesize(const char *path) -{ - struct statfs fs; - int ret; - - do { - ret = statfs(path, &fs); - } while (ret != 0 && errno == EINTR); - - if (ret != 0) { - perror("statfs"); - return 0; - } - - if (fs.f_type != HUGETLBFS_MAGIC) - fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path); - - return fs.f_bsize; -} - -static void *alloc_mem_area(size_t memory, unsigned long *len, const char *path) -{ - char *filename; - void *area; - int fd; -#ifdef MAP_POPULATE - int flags; -#endif - - if (!kvm_has_sync_mmu()) { - fprintf(stderr, "host lacks mmu notifiers, disabling --mem-path\n"); - return NULL; - } - - if (asprintf(&filename, "%s/kvm.XXXXXX", path) == -1) - return NULL; - - hpagesize = gethugepagesize(path); - if (!hpagesize) - return NULL; - - fd = mkstemp(filename); - if (fd < 0) { - perror("mkstemp"); - free(filename); - return NULL; - } - unlink(filename); - free(filename); - - memory = (memory+hpagesize-1) & ~(hpagesize-1); - - /* - * ftruncate is not supported by hugetlbfs in older - * hosts, so don't bother checking for errors. - * If anything goes wrong with it under other filesystems, - * mmap will fail. - */ - ftruncate(fd, memory); - -#ifdef MAP_POPULATE - /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case - * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED - * to sidestep this quirk. - */ - flags = mem_prealloc ? MAP_POPULATE|MAP_SHARED : MAP_PRIVATE; - area = mmap(0, memory, PROT_READ|PROT_WRITE, flags, fd, 0); -#else - area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); -#endif - if (area == MAP_FAILED) { - perror("alloc_mem_area: can't mmap hugetlbfs pages"); - close(fd); - return (NULL); - } - *len = memory; - return area; -} -#endif - #ifndef _WIN32 static void termsig_handler(int signal)