Message ID | 1446184587-142784-9-git-send-email-guangrong.xiao@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 30.10.2015 08:56, Xiao Guangrong wrote: > Currently file_ram_alloc() is designed for hugetlbfs, however, the memory > of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file > locates at DAX enabled filesystem > > So this patch let it work on any kind of path > > Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> > --- > exec.c | 56 +++++++++++++++++--------------------------------------- > 1 file changed, 17 insertions(+), 39 deletions(-) > > diff --git a/exec.c b/exec.c > index 8af2570..3ca7e50 100644 > --- a/exec.c > +++ b/exec.c > @@ -1174,32 +1174,6 @@ void qemu_mutex_unlock_ramlist(void) > } > > #ifdef __linux__ > - > -#include <sys/vfs.h> > - > -#define HUGETLBFS_MAGIC 0x958458f6 > - > -static long gethugepagesize(const char *path, Error **errp) > -{ > - struct statfs fs; > - int ret; > - > - do { > - ret = statfs(path, &fs); > - } while (ret != 0 && errno == EINTR); > - > - if (ret != 0) { > - error_setg_errno(errp, errno, "failed to get page size of file %s", > - path); > - return 0; > - } > - > - if (fs.f_type != HUGETLBFS_MAGIC) > - fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path); > - > - return fs.f_bsize; > -} > - > static void *file_ram_alloc(RAMBlock *block, > ram_addr_t memory, > const char *path, > @@ -1210,20 +1184,24 @@ static void *file_ram_alloc(RAMBlock *block, > char *c; > void *area; > int fd; > - uint64_t hpagesize; > - Error *local_err = NULL; > + uint64_t pagesize; > > - hpagesize = gethugepagesize(path, &local_err); > - if (local_err) { > - error_propagate(errp, local_err); > + pagesize = qemu_file_get_page_size(path); > + if (!pagesize) { > + error_setg(errp, "can't get page size for %s", path); > goto error; > } > - block->mr->align = hpagesize; > > - if (memory < hpagesize) { > + if (pagesize == getpagesize()) { > + fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n"); > + } It is strange to see this warning every time. Shouldn't the differentiation be done explicitly in command line? May be separate option mem-tlb, or separate flag tlbfs=on, or for new feature - new option mem-file, or prefixes for paths (tlbfs://, file://).. Or the other way to not mix things but split them. > + > + block->mr->align = pagesize; > + > + if (memory < pagesize) { > error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to " > - "or larger than huge page size 0x%" PRIx64, > - memory, hpagesize); > + "or larger than page size 0x%" PRIx64, > + memory, pagesize); > goto error; > } > > @@ -1247,14 +1225,14 @@ static void *file_ram_alloc(RAMBlock *block, > fd = mkstemp(filename); > if (fd < 0) { > error_setg_errno(errp, errno, > - "unable to create backing store for hugepages"); > + "unable to create backing store for path %s", path); > g_free(filename); > goto error; > } > unlink(filename); > g_free(filename); > > - memory = ROUND_UP(memory, hpagesize); > + memory = ROUND_UP(memory, pagesize); > > /* > * ftruncate is not supported by hugetlbfs in older > @@ -1266,10 +1244,10 @@ static void *file_ram_alloc(RAMBlock *block, > perror("ftruncate"); > } > > - area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED); > + area = qemu_ram_mmap(fd, memory, pagesize, block->flags & RAM_SHARED); > if (area == MAP_FAILED) { > error_setg_errno(errp, errno, > - "unable to map backing store for hugepages"); > + "unable to map backing store for path %s", path); > close(fd); > goto error; > }
On 10/30/2015 10:04 PM, Vladimir Sementsov-Ogievskiy wrote: > On 30.10.2015 08:56, Xiao Guangrong wrote: >> Currently file_ram_alloc() is designed for hugetlbfs, however, the memory >> of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file >> locates at DAX enabled filesystem >> >> So this patch let it work on any kind of path >> >> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> >> --- >> exec.c | 56 +++++++++++++++++--------------------------------------- >> 1 file changed, 17 insertions(+), 39 deletions(-) >> >> diff --git a/exec.c b/exec.c >> index 8af2570..3ca7e50 100644 >> --- a/exec.c >> +++ b/exec.c >> @@ -1174,32 +1174,6 @@ void qemu_mutex_unlock_ramlist(void) >> } >> #ifdef __linux__ >> - >> -#include <sys/vfs.h> >> - >> -#define HUGETLBFS_MAGIC 0x958458f6 >> - >> -static long gethugepagesize(const char *path, Error **errp) >> -{ >> - struct statfs fs; >> - int ret; >> - >> - do { >> - ret = statfs(path, &fs); >> - } while (ret != 0 && errno == EINTR); >> - >> - if (ret != 0) { >> - error_setg_errno(errp, errno, "failed to get page size of file %s", >> - path); >> - return 0; >> - } >> - >> - if (fs.f_type != HUGETLBFS_MAGIC) >> - fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path); >> - >> - return fs.f_bsize; >> -} >> - >> static void *file_ram_alloc(RAMBlock *block, >> ram_addr_t memory, >> const char *path, >> @@ -1210,20 +1184,24 @@ static void *file_ram_alloc(RAMBlock *block, >> char *c; >> void *area; >> int fd; >> - uint64_t hpagesize; >> - Error *local_err = NULL; >> + uint64_t pagesize; >> - hpagesize = gethugepagesize(path, &local_err); >> - if (local_err) { >> - error_propagate(errp, local_err); >> + pagesize = qemu_file_get_page_size(path); >> + if (!pagesize) { >> + error_setg(errp, "can't get page size for %s", path); >> goto error; >> } >> - block->mr->align = hpagesize; >> - if (memory < hpagesize) { >> + if (pagesize == getpagesize()) { >> + fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n"); >> + } > > It is strange to see this warning every time. > > Shouldn't the differentiation be done explicitly in command line? May be separate option mem-tlb, or > separate flag tlbfs=on, or for new feature - new option mem-file, or prefixes for paths (tlbfs://, > file://).. Or the other way to not mix things but split them. This is just a reminder to users. Currently Qemu do not stop user to append a regular file for its backend memory, particularly, hugetlbfs is not the only way to use HugePage for the THP-enabled system. We can implement your idea as a independent patchset in the future. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Oct 31, 2015 at 03:44:39PM +0800, Xiao Guangrong wrote: > On 10/30/2015 10:04 PM, Vladimir Sementsov-Ogievskiy wrote: > >On 30.10.2015 08:56, Xiao Guangrong wrote: > >>Currently file_ram_alloc() is designed for hugetlbfs, however, the memory > >>of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file > >>locates at DAX enabled filesystem > >> > >>So this patch let it work on any kind of path > >> > >>Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> [...] > >>- block->mr->align = hpagesize; > >>- if (memory < hpagesize) { > >>+ if (pagesize == getpagesize()) { > >>+ fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n"); > >>+ } > > > >It is strange to see this warning every time. > > > >Shouldn't the differentiation be done explicitly in command line? May be separate option mem-tlb, or > >separate flag tlbfs=on, or for new feature - new option mem-file, or prefixes for paths (tlbfs://, > >file://).. Or the other way to not mix things but split them. > > This is just a reminder to users. Currently Qemu do not stop user to append a regular file > for its backend memory, particularly, hugetlbfs is not the only way to use HugePage for > the THP-enabled system. > > We can implement your idea as a independent patchset in the future. Isn't the whole point of this patch to make the code not specific to hugetlbfs anymore? It makes the warning obsolete.
On 10/31/2015 09:55 PM, Eduardo Habkost wrote: > On Sat, Oct 31, 2015 at 03:44:39PM +0800, Xiao Guangrong wrote: >> On 10/30/2015 10:04 PM, Vladimir Sementsov-Ogievskiy wrote: >>> On 30.10.2015 08:56, Xiao Guangrong wrote: >>>> Currently file_ram_alloc() is designed for hugetlbfs, however, the memory >>>> of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file >>>> locates at DAX enabled filesystem >>>> >>>> So this patch let it work on any kind of path >>>> >>>> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> > [...] >>>> - block->mr->align = hpagesize; >>>> - if (memory < hpagesize) { >>>> + if (pagesize == getpagesize()) { >>>> + fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n"); >>>> + } >>> >>> It is strange to see this warning every time. >>> >>> Shouldn't the differentiation be done explicitly in command line? May be separate option mem-tlb, or >>> separate flag tlbfs=on, or for new feature - new option mem-file, or prefixes for paths (tlbfs://, >>> file://).. Or the other way to not mix things but split them. >> >> This is just a reminder to users. Currently Qemu do not stop user to append a regular file >> for its backend memory, particularly, hugetlbfs is not the only way to use HugePage for >> the THP-enabled system. >> >> We can implement your idea as a independent patchset in the future. > > Isn't the whole point of this patch to make the code not specific to > hugetlbfs anymore? It makes the warning obsolete. > Before this patchset, this warning still can be triggered, for example, we can specify the @path to /tmp/ I agree this warning could be improved, but let us do it independently. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Oct 30, 2015 at 01:56:02PM +0800, Xiao Guangrong wrote: > Currently file_ram_alloc() is designed for hugetlbfs, however, the memory > of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file > locates at DAX enabled filesystem > > So this patch let it work on any kind of path > > Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> So this allows regular memory to be specified directly. This needs to be split out and merged separately from acpi/nvdimm bits. Alternatively, if it's possible to use nvdimm with DAX fs (similar to hugetlbfs), leave these patches off for now. > --- > exec.c | 56 +++++++++++++++++--------------------------------------- > 1 file changed, 17 insertions(+), 39 deletions(-) > > diff --git a/exec.c b/exec.c > index 8af2570..3ca7e50 100644 > --- a/exec.c > +++ b/exec.c > @@ -1174,32 +1174,6 @@ void qemu_mutex_unlock_ramlist(void) > } > > #ifdef __linux__ > - > -#include <sys/vfs.h> > - > -#define HUGETLBFS_MAGIC 0x958458f6 > - > -static long gethugepagesize(const char *path, Error **errp) > -{ > - struct statfs fs; > - int ret; > - > - do { > - ret = statfs(path, &fs); > - } while (ret != 0 && errno == EINTR); > - > - if (ret != 0) { > - error_setg_errno(errp, errno, "failed to get page size of file %s", > - path); > - return 0; > - } > - > - if (fs.f_type != HUGETLBFS_MAGIC) > - fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path); > - > - return fs.f_bsize; > -} > - > static void *file_ram_alloc(RAMBlock *block, > ram_addr_t memory, > const char *path, > @@ -1210,20 +1184,24 @@ static void *file_ram_alloc(RAMBlock *block, > char *c; > void *area; > int fd; > - uint64_t hpagesize; > - Error *local_err = NULL; > + uint64_t pagesize; > > - hpagesize = gethugepagesize(path, &local_err); > - if (local_err) { > - error_propagate(errp, local_err); > + pagesize = qemu_file_get_page_size(path); > + if (!pagesize) { > + error_setg(errp, "can't get page size for %s", path); > goto error; > } > - block->mr->align = hpagesize; > > - if (memory < hpagesize) { > + if (pagesize == getpagesize()) { > + fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n"); > + } > + > + block->mr->align = pagesize; > + > + if (memory < pagesize) { > error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to " > - "or larger than huge page size 0x%" PRIx64, > - memory, hpagesize); > + "or larger than page size 0x%" PRIx64, > + memory, pagesize); > goto error; > } > > @@ -1247,14 +1225,14 @@ static void *file_ram_alloc(RAMBlock *block, > fd = mkstemp(filename); > if (fd < 0) { > error_setg_errno(errp, errno, > - "unable to create backing store for hugepages"); > + "unable to create backing store for path %s", path); > g_free(filename); > goto error; > } > unlink(filename); > g_free(filename); Looks like we are still calling mkstemp/unlink here. How does this work? > > - memory = ROUND_UP(memory, hpagesize); > + memory = ROUND_UP(memory, pagesize); > > /* > * ftruncate is not supported by hugetlbfs in older > @@ -1266,10 +1244,10 @@ static void *file_ram_alloc(RAMBlock *block, > perror("ftruncate"); > } > > - area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED); > + area = qemu_ram_mmap(fd, memory, pagesize, block->flags & RAM_SHARED); > if (area == MAP_FAILED) { > error_setg_errno(errp, errno, > - "unable to map backing store for hugepages"); > + "unable to map backing store for path %s", path); > close(fd); > goto error; > } > -- > 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 11/09/2015 06:39 PM, Michael S. Tsirkin wrote: > On Fri, Oct 30, 2015 at 01:56:02PM +0800, Xiao Guangrong wrote: >> Currently file_ram_alloc() is designed for hugetlbfs, however, the memory >> of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file >> locates at DAX enabled filesystem >> >> So this patch let it work on any kind of path >> >> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> > > So this allows regular memory to be specified directly. > This needs to be split out and merged separately > from acpi/nvdimm bits. Yup, that is in my plan. > > Alternatively, if it's possible to use nvdimm with DAX fs > (similar to hugetlbfs), leave these patches off for now. > DAX is a filesystem mount options, it is not easy to get this info from a file. > >> --- >> exec.c | 56 +++++++++++++++++--------------------------------------- >> 1 file changed, 17 insertions(+), 39 deletions(-) >> >> diff --git a/exec.c b/exec.c >> index 8af2570..3ca7e50 100644 >> --- a/exec.c >> +++ b/exec.c >> @@ -1174,32 +1174,6 @@ void qemu_mutex_unlock_ramlist(void) >> } >> >> #ifdef __linux__ >> - >> -#include <sys/vfs.h> >> - >> -#define HUGETLBFS_MAGIC 0x958458f6 >> - >> -static long gethugepagesize(const char *path, Error **errp) >> -{ >> - struct statfs fs; >> - int ret; >> - >> - do { >> - ret = statfs(path, &fs); >> - } while (ret != 0 && errno == EINTR); >> - >> - if (ret != 0) { >> - error_setg_errno(errp, errno, "failed to get page size of file %s", >> - path); >> - return 0; >> - } >> - >> - if (fs.f_type != HUGETLBFS_MAGIC) >> - fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path); >> - >> - return fs.f_bsize; >> -} >> - >> static void *file_ram_alloc(RAMBlock *block, >> ram_addr_t memory, >> const char *path, >> @@ -1210,20 +1184,24 @@ static void *file_ram_alloc(RAMBlock *block, >> char *c; >> void *area; >> int fd; >> - uint64_t hpagesize; >> - Error *local_err = NULL; >> + uint64_t pagesize; >> >> - hpagesize = gethugepagesize(path, &local_err); >> - if (local_err) { >> - error_propagate(errp, local_err); >> + pagesize = qemu_file_get_page_size(path); >> + if (!pagesize) { >> + error_setg(errp, "can't get page size for %s", path); >> goto error; >> } >> - block->mr->align = hpagesize; >> >> - if (memory < hpagesize) { >> + if (pagesize == getpagesize()) { >> + fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n"); >> + } >> + >> + block->mr->align = pagesize; >> + >> + if (memory < pagesize) { >> error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to " >> - "or larger than huge page size 0x%" PRIx64, >> - memory, hpagesize); >> + "or larger than page size 0x%" PRIx64, >> + memory, pagesize); >> goto error; >> } >> >> @@ -1247,14 +1225,14 @@ static void *file_ram_alloc(RAMBlock *block, >> fd = mkstemp(filename); >> if (fd < 0) { >> error_setg_errno(errp, errno, >> - "unable to create backing store for hugepages"); >> + "unable to create backing store for path %s", path); >> g_free(filename); >> goto error; >> } >> unlink(filename); >> g_free(filename); > > Looks like we are still calling mkstemp/unlink here. > How does this work? Hmm? We have got the fd so the file can be safely unlinked (kernel does not actually unlink the file since mkstemp() holds a refcount.). And this patch just renames the variables, no logic changed. Will drop it from the serials for now on. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/exec.c b/exec.c index 8af2570..3ca7e50 100644 --- a/exec.c +++ b/exec.c @@ -1174,32 +1174,6 @@ void qemu_mutex_unlock_ramlist(void) } #ifdef __linux__ - -#include <sys/vfs.h> - -#define HUGETLBFS_MAGIC 0x958458f6 - -static long gethugepagesize(const char *path, Error **errp) -{ - struct statfs fs; - int ret; - - do { - ret = statfs(path, &fs); - } while (ret != 0 && errno == EINTR); - - if (ret != 0) { - error_setg_errno(errp, errno, "failed to get page size of file %s", - path); - return 0; - } - - if (fs.f_type != HUGETLBFS_MAGIC) - fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path); - - return fs.f_bsize; -} - static void *file_ram_alloc(RAMBlock *block, ram_addr_t memory, const char *path, @@ -1210,20 +1184,24 @@ static void *file_ram_alloc(RAMBlock *block, char *c; void *area; int fd; - uint64_t hpagesize; - Error *local_err = NULL; + uint64_t pagesize; - hpagesize = gethugepagesize(path, &local_err); - if (local_err) { - error_propagate(errp, local_err); + pagesize = qemu_file_get_page_size(path); + if (!pagesize) { + error_setg(errp, "can't get page size for %s", path); goto error; } - block->mr->align = hpagesize; - if (memory < hpagesize) { + if (pagesize == getpagesize()) { + fprintf(stderr, "Memory is not allocated from HugeTlbfs.\n"); + } + + block->mr->align = pagesize; + + if (memory < pagesize) { error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to " - "or larger than huge page size 0x%" PRIx64, - memory, hpagesize); + "or larger than page size 0x%" PRIx64, + memory, pagesize); goto error; } @@ -1247,14 +1225,14 @@ static void *file_ram_alloc(RAMBlock *block, fd = mkstemp(filename); if (fd < 0) { error_setg_errno(errp, errno, - "unable to create backing store for hugepages"); + "unable to create backing store for path %s", path); g_free(filename); goto error; } unlink(filename); g_free(filename); - memory = ROUND_UP(memory, hpagesize); + memory = ROUND_UP(memory, pagesize); /* * ftruncate is not supported by hugetlbfs in older @@ -1266,10 +1244,10 @@ static void *file_ram_alloc(RAMBlock *block, perror("ftruncate"); } - area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED); + area = qemu_ram_mmap(fd, memory, pagesize, block->flags & RAM_SHARED); if (area == MAP_FAILED) { error_setg_errno(errp, errno, - "unable to map backing store for hugepages"); + "unable to map backing store for path %s", path); close(fd); goto error; }
Currently file_ram_alloc() is designed for hugetlbfs, however, the memory of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file locates at DAX enabled filesystem So this patch let it work on any kind of path Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> --- exec.c | 56 +++++++++++++++++--------------------------------------- 1 file changed, 17 insertions(+), 39 deletions(-)