diff mbox

[V2] kvm tools: Enable O_DIRECT support

Message ID 1342106323-27774-1-git-send-email-asias.hejun@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Asias He July 12, 2012, 3:18 p.m. UTC
With Direct I/O, file reads and writes go directly from the applications
to the storage device, bypassing the operating system read and write
caches. This is useful for applications that manage their own caches.

Open a disk image with O_DIRECT:
   $ lkvm run -d ~/img/test.img,direct

The original readonly flag is still supported.
Open a disk image with O_DIRECT and readonly:
   $ lkvm run -d ~/img/test.img,direct,ro

Signed-off-by: Asias He <asias.hejun@gmail.com>
---
 tools/kvm/builtin-run.c            |   23 +++++++++++++++--------
 tools/kvm/disk/blk.c               |    4 ++--
 tools/kvm/disk/core.c              |   19 ++++++++++++++-----
 tools/kvm/include/kvm/disk-image.h |    5 +++--
 4 files changed, 34 insertions(+), 17 deletions(-)

Comments

Pekka Enberg July 12, 2012, 4:05 p.m. UTC | #1
On Thu, Jul 12, 2012 at 6:18 PM, Asias He <asias.hejun@gmail.com> wrote:
> With Direct I/O, file reads and writes go directly from the applications
> to the storage device, bypassing the operating system read and write
> caches. This is useful for applications that manage their own caches.
>
> Open a disk image with O_DIRECT:
>    $ lkvm run -d ~/img/test.img,direct
>
> The original readonly flag is still supported.
> Open a disk image with O_DIRECT and readonly:
>    $ lkvm run -d ~/img/test.img,direct,ro
>
> Signed-off-by: Asias He <asias.hejun@gmail.com>

Well, what's the use case for this?

                        Pekka
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Asias He July 14, 2012, 2:03 a.m. UTC | #2
On Fri, Jul 13, 2012 at 12:05 AM, Pekka Enberg <penberg@kernel.org> wrote:
> On Thu, Jul 12, 2012 at 6:18 PM, Asias He <asias.hejun@gmail.com> wrote:
>> With Direct I/O, file reads and writes go directly from the applications
>> to the storage device, bypassing the operating system read and write
>> caches. This is useful for applications that manage their own caches.
>>
>> Open a disk image with O_DIRECT:
>>    $ lkvm run -d ~/img/test.img,direct
>>
>> The original readonly flag is still supported.
>> Open a disk image with O_DIRECT and readonly:
>>    $ lkvm run -d ~/img/test.img,direct,ro
>>
>> Signed-off-by: Asias He <asias.hejun@gmail.com>
>
> Well, what's the use case for this?

With O_DIRECT, host page cache will be skipped. It saves the copying
of data between page cache and user space, thus reduces cpu
utilization and memory usage. This might be useful for users with
high-end storage.

It's also useful when we benchmark the performance of the storage
stack in kvm tool side because it excludes one factor we are not
interested in.
diff mbox

Patch

diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index a120fe2..8e1627e 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -127,9 +127,10 @@  void kvm_run_set_wrapper_sandbox(void)
 
 static int img_name_parser(const struct option *opt, const char *arg, int unset)
 {
-	char *sep;
-	struct stat st;
 	char path[PATH_MAX];
+	const char *cur;
+	struct stat st;
+	char *sep;
 
 	if (stat(arg, &st) == 0 &&
 	    S_ISDIR(st.st_mode)) {
@@ -169,12 +170,18 @@  static int img_name_parser(const struct option *opt, const char *arg, int unset)
 		die("Currently only 4 images are supported");
 
 	disk_image[image_count].filename = arg;
-	sep = strstr(arg, ",");
-	if (sep) {
-		if (strcmp(sep + 1, "ro") == 0)
-			disk_image[image_count].readonly = true;
-		*sep = 0;
-	}
+	cur = arg;
+	do {
+		sep = strstr(cur, ",");
+		if (sep) {
+			if (strncmp(sep + 1, "ro", 2) == 0)
+				disk_image[image_count].readonly = true;
+			else if (strncmp(sep + 1, "direct", 6) == 0)
+				disk_image[image_count].direct = true;
+			*sep = 0;
+			cur = sep + 1;
+		}
+	} while (sep);
 
 	image_count++;
 
diff --git a/tools/kvm/disk/blk.c b/tools/kvm/disk/blk.c
index cf853c1..37581d3 100644
--- a/tools/kvm/disk/blk.c
+++ b/tools/kvm/disk/blk.c
@@ -33,7 +33,7 @@  static bool is_mounted(struct stat *st)
 	return false;
 }
 
-struct disk_image *blkdev__probe(const char *filename, struct stat *st)
+struct disk_image *blkdev__probe(const char *filename, int flags, struct stat *st)
 {
 	struct disk_image *disk;
 	int fd, r;
@@ -52,7 +52,7 @@  struct disk_image *blkdev__probe(const char *filename, struct stat *st)
 	 * Be careful! We are opening host block device!
 	 * Open it readonly since we do not want to break user's data on disk.
 	 */
-	fd = open(filename, O_RDWR);
+	fd = open(filename, flags);
 	if (fd < 0)
 		return ERR_PTR(fd);
 
diff --git a/tools/kvm/disk/core.c b/tools/kvm/disk/core.c
index 5542d42..621c940 100644
--- a/tools/kvm/disk/core.c
+++ b/tools/kvm/disk/core.c
@@ -75,21 +75,28 @@  struct disk_image *disk_image__new(int fd, u64 size,
 	return disk;
 }
 
-struct disk_image *disk_image__open(const char *filename, bool readonly)
+struct disk_image *disk_image__open(const char *filename, bool readonly, bool direct)
 {
 	struct disk_image *disk;
 	struct stat st;
-	int fd;
+	int fd, flags;
+
+	if (readonly)
+		flags = O_RDONLY;
+	else
+		flags = O_RDWR;
+	if (direct)
+		flags |= O_DIRECT;
 
 	if (stat(filename, &st) < 0)
 		return ERR_PTR(-errno);
 
 	/* blk device ?*/
-	disk = blkdev__probe(filename, &st);
+	disk = blkdev__probe(filename, flags, &st);
 	if (!IS_ERR_OR_NULL(disk))
 		return disk;
 
-	fd = open(filename, readonly ? O_RDONLY : O_RDWR);
+	fd = open(filename, flags);
 	if (fd < 0)
 		return ERR_PTR(fd);
 
@@ -116,6 +123,7 @@  struct disk_image **disk_image__open_all(struct disk_image_params *params, int c
 	struct disk_image **disks;
 	const char *filename;
 	bool readonly;
+	bool direct;
 	void *err;
 	int i;
 
@@ -131,10 +139,11 @@  struct disk_image **disk_image__open_all(struct disk_image_params *params, int c
 	for (i = 0; i < count; i++) {
 		filename = params[i].filename;
 		readonly = params[i].readonly;
+		direct = params[i].direct;
 		if (!filename)
 			continue;
 
-		disks[i] = disk_image__open(filename, readonly);
+		disks[i] = disk_image__open(filename, readonly, direct);
 		if (IS_ERR_OR_NULL(disks[i])) {
 			pr_err("Loading disk image '%s' failed", filename);
 			err = disks[i];
diff --git a/tools/kvm/include/kvm/disk-image.h b/tools/kvm/include/kvm/disk-image.h
index 5d09875..7ae17f8 100644
--- a/tools/kvm/include/kvm/disk-image.h
+++ b/tools/kvm/include/kvm/disk-image.h
@@ -42,6 +42,7 @@  struct disk_image_operations {
 struct disk_image_params {
 	const char *filename;
 	bool readonly;
+	bool direct;
 };
 
 struct disk_image {
@@ -58,7 +59,7 @@  struct disk_image {
 #endif
 };
 
-struct disk_image *disk_image__open(const char *filename, bool readonly);
+struct disk_image *disk_image__open(const char *filename, bool readonly, bool direct);
 struct disk_image **disk_image__open_all(struct disk_image_params *params, int count);
 struct disk_image *disk_image__new(int fd, u64 size, struct disk_image_operations *ops, int mmap);
 int disk_image__close(struct disk_image *disk);
@@ -71,7 +72,7 @@  ssize_t disk_image__write(struct disk_image *disk, u64 sector, const struct iove
 ssize_t disk_image__get_serial(struct disk_image *disk, void *buffer, ssize_t *len);
 
 struct disk_image *raw_image__probe(int fd, struct stat *st, bool readonly);
-struct disk_image *blkdev__probe(const char *filename, struct stat *st);
+struct disk_image *blkdev__probe(const char *filename, int flags, struct stat *st);
 
 ssize_t raw_image__read(struct disk_image *disk, u64 sector,
 				const struct iovec *iov, int iovcount, void *param);