diff mbox

DAX: enable iostat for read/write

Message ID 1476465913-25305-1-git-send-email-toshi.kani@hpe.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kani, Toshi Oct. 14, 2016, 5:25 p.m. UTC
DAX IO path does not support iostat, but its metadata IO path does.
Therefore, iostat shows metadata IO statistics only, which has been
confusing to users.

Add iostat support to the DAX read/write path.

Note, iostat still does not support the DAX mmap path as it allows
user applications to access directly.

Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/dax.c |   37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Dan Williams Oct. 14, 2016, 5:35 p.m. UTC | #1
On Fri, Oct 14, 2016 at 10:25 AM, Toshi Kani <toshi.kani@hpe.com> wrote:
> DAX IO path does not support iostat, but its metadata IO path does.
> Therefore, iostat shows metadata IO statistics only, which has been
> confusing to users.
>
> Add iostat support to the DAX read/write path.
>
> Note, iostat still does not support the DAX mmap path as it allows
> user applications to access directly.
>
> Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
> ---
>  fs/dax.c |   37 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 37 insertions(+)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index 014defd..3aaaac2 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -144,6 +144,34 @@ static sector_t to_sector(const struct buffer_head *bh,
>         return sector;
>  }
>
> +static void dax_iostat_start(struct gendisk *disk, struct iov_iter *iter,
> +                            unsigned long *start)
> +{
> +       int rw = iov_iter_rw(iter);
> +       int sec = iov_iter_count(iter) >> 9;

Should this be a minimum of one sector since we allow unaligned
transfers through this interface?

...or is iov_iter_count() somehow guaranteed to be sector aligned here?
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kani, Toshi Oct. 14, 2016, 6:47 p.m. UTC | #2
T24gRnJpLCAyMDE2LTEwLTE0IGF0IDEwOjM1IC0wNzAwLCBEYW4gV2lsbGlhbXMgd3JvdGU6DQo+
IE9uIEZyaSwgT2N0IDE0LCAyMDE2IGF0IDEwOjI1IEFNLCBUb3NoaSBLYW5pIDx0b3NoaS5rYW5p
QGhwZS5jb20+DQo+IHdyb3RlOg0KPiA+IA0KPiA+IERBWCBJTyBwYXRoIGRvZXMgbm90IHN1cHBv
cnQgaW9zdGF0LCBidXQgaXRzIG1ldGFkYXRhIElPIHBhdGggZG9lcy4NCj4gPiBUaGVyZWZvcmUs
IGlvc3RhdCBzaG93cyBtZXRhZGF0YSBJTyBzdGF0aXN0aWNzIG9ubHksIHdoaWNoIGhhcyBiZWVu
DQo+ID4gY29uZnVzaW5nIHRvIHVzZXJzLg0KPiA+IA0KPiA+IEFkZCBpb3N0YXQgc3VwcG9ydCB0
byB0aGUgREFYIHJlYWQvd3JpdGUgcGF0aC4NCj4gPiANCj4gPiBOb3RlLCBpb3N0YXQgc3RpbGwg
ZG9lcyBub3Qgc3VwcG9ydCB0aGUgREFYIG1tYXAgcGF0aCBhcyBpdCBhbGxvd3MNCj4gPiB1c2Vy
IGFwcGxpY2F0aW9ucyB0byBhY2Nlc3MgZGlyZWN0bHkuDQo+ID4gDQo+ID4gU2lnbmVkLW9mZi1i
eTogVG9zaGkgS2FuaSA8dG9zaGkua2FuaUBocGUuY29tPg0KPiA+IENjOiBBbmRyZXcgTW9ydG9u
IDxha3BtQGxpbnV4LWZvdW5kYXRpb24ub3JnPg0KPiA+IENjOiBBbGV4YW5kZXIgVmlybyA8dmly
b0B6ZW5pdi5saW51eC5vcmcudWs+DQo+ID4gQ2M6IERhbiBXaWxsaWFtcyA8ZGFuLmoud2lsbGlh
bXNAaW50ZWwuY29tPg0KPiA+IENjOiBSb3NzIFp3aXNsZXIgPHJvc3Muendpc2xlckBsaW51eC5p
bnRlbC5jb20+DQo+ID4gLS0tDQo+ID4gwqBmcy9kYXguYyB8wqDCoMKgMzcgKysrKysrKysrKysr
KysrKysrKysrKysrKysrKysrKysrKysrKw0KPiA+IMKgMSBmaWxlIGNoYW5nZWQsIDM3IGluc2Vy
dGlvbnMoKykNCj4gPiANCj4gPiBkaWZmIC0tZ2l0IGEvZnMvZGF4LmMgYi9mcy9kYXguYw0KPiA+
IGluZGV4IDAxNGRlZmQuLjNhYWFhYzIgMTAwNjQ0DQo+ID4gLS0tIGEvZnMvZGF4LmMNCj4gPiAr
KysgYi9mcy9kYXguYw0KPiA+IEBAIC0xNDQsNiArMTQ0LDM0IEBAIHN0YXRpYyBzZWN0b3JfdCB0
b19zZWN0b3IoY29uc3Qgc3RydWN0DQo+ID4gYnVmZmVyX2hlYWQgKmJoLA0KPiA+IMKgwqDCoMKg
wqDCoMKgwqByZXR1cm4gc2VjdG9yOw0KPiA+IMKgfQ0KPiA+IA0KPiA+ICtzdGF0aWMgdm9pZCBk
YXhfaW9zdGF0X3N0YXJ0KHN0cnVjdCBnZW5kaXNrICpkaXNrLCBzdHJ1Y3QgaW92X2l0ZXINCj4g
PiAqaXRlciwNCj4gPiArwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKg
wqDCoMKgwqDCoMKgwqB1bnNpZ25lZCBsb25nICpzdGFydCkNCj4gPiArew0KPiA+ICvCoMKgwqDC
oMKgwqDCoGludCBydyA9IGlvdl9pdGVyX3J3KGl0ZXIpOw0KPiA+ICvCoMKgwqDCoMKgwqDCoGlu
dCBzZWMgPSBpb3ZfaXRlcl9jb3VudChpdGVyKSA+PiA5Ow0KPiANCj4gU2hvdWxkIHRoaXMgYmUg
YSBtaW5pbXVtIG9mIG9uZSBzZWN0b3Igc2luY2Ugd2UgYWxsb3cgdW5hbGlnbmVkDQo+IHRyYW5z
ZmVycyB0aHJvdWdoIHRoaXMgaW50ZXJmYWNlPw0KPiANCj4gLi4ub3IgaXMgaW92X2l0ZXJfY291
bnQoKSBzb21laG93IGd1YXJhbnRlZWQgdG8gYmUgc2VjdG9yIGFsaWduZWQNCj4gaGVyZT8NCg0K
WW91IGFyZSByaWdodC4gSSB3aWxsIHVwZGF0ZSB0byBzZXQgYSBtaW5pbXVtIG9mIG9uZSBzZWN0
b3IgaW4gdjIuwqANCg0KVGhhbmtzIQ0KLVRvc2hp
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner Oct. 15, 2016, 7:54 a.m. UTC | #3
On Fri, Oct 14, 2016 at 11:25:13AM -0600, Toshi Kani wrote:
> DAX IO path does not support iostat, but its metadata IO path does.
> Therefore, iostat shows metadata IO statistics only, which has been
> confusing to users.
> 
> Add iostat support to the DAX read/write path.
> 
> Note, iostat still does not support the DAX mmap path as it allows
> user applications to access directly.
> 
> Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
> ---
>  fs/dax.c |   37 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 37 insertions(+)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 014defd..3aaaac2 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -144,6 +144,34 @@ static sector_t to_sector(const struct buffer_head *bh,
>  	return sector;
>  }
>  
> +static void dax_iostat_start(struct gendisk *disk, struct iov_iter *iter,
> +			     unsigned long *start)
> +{
> +	int rw = iov_iter_rw(iter);
> +	int sec = iov_iter_count(iter) >> 9;
> +	int cpu = part_stat_lock();
> +
> +	*start = jiffies;
> +	part_round_stats(cpu, &disk->part0);
> +	part_stat_inc(cpu, &disk->part0, ios[rw]);
> +	part_stat_add(cpu, &disk->part0, sectors[rw], sec);
> +	part_inc_in_flight(&disk->part0, rw);
> +	part_stat_unlock();
> +}

Why reimplement generic_start_io_acct() and generic_end_io_acct()?

-Dave.
Kani, Toshi Oct. 17, 2016, 5:40 p.m. UTC | #4
T24gU2F0LCAyMDE2LTEwLTE1IGF0IDE4OjU0ICsxMTAwLCBEYXZlIENoaW5uZXIgd3JvdGU6DQo+
IE9uIEZyaSwgT2N0IDE0LCAyMDE2IGF0IDExOjI1OjEzQU0gLTA2MDAsIFRvc2hpIEthbmkgd3Jv
dGU6DQrCoDoNCj4gPiArc3RhdGljIHZvaWQgZGF4X2lvc3RhdF9zdGFydChzdHJ1Y3QgZ2VuZGlz
ayAqZGlzaywgc3RydWN0IGlvdl9pdGVyDQo+ID4gKml0ZXIsDQo+ID4gKwkJCcKgwqDCoMKgwqB1
bnNpZ25lZCBsb25nICpzdGFydCkNCj4gPiArew0KPiA+ICsJaW50IHJ3ID0gaW92X2l0ZXJfcnco
aXRlcik7DQo+ID4gKwlpbnQgc2VjID0gaW92X2l0ZXJfY291bnQoaXRlcikgPj4gOTsNCj4gPiAr
CWludCBjcHUgPSBwYXJ0X3N0YXRfbG9jaygpOw0KPiA+ICsNCj4gPiArCSpzdGFydCA9IGppZmZp
ZXM7DQo+ID4gKwlwYXJ0X3JvdW5kX3N0YXRzKGNwdSwgJmRpc2stPnBhcnQwKTsNCj4gPiArCXBh
cnRfc3RhdF9pbmMoY3B1LCAmZGlzay0+cGFydDAsIGlvc1tyd10pOw0KPiA+ICsJcGFydF9zdGF0
X2FkZChjcHUsICZkaXNrLT5wYXJ0MCwgc2VjdG9yc1tyd10sIHNlYyk7DQo+ID4gKwlwYXJ0X2lu
Y19pbl9mbGlnaHQoJmRpc2stPnBhcnQwLCBydyk7DQo+ID4gKwlwYXJ0X3N0YXRfdW5sb2NrKCk7
DQo+ID4gK30NCj4gDQo+IFdoeSByZWltcGxlbWVudCBnZW5lcmljX3N0YXJ0X2lvX2FjY3QoKSBh
bmQgZ2VuZXJpY19lbmRfaW9fYWNjdCgpPw0KDQpJdCB3YXMgbW9kZWxlZCBhZnRlciBfX25kX2lv
c3RhdF9zdGFydCgpIC8gbmRfaW9zdGFydF9lbmQoKS4gwqBJIGFncmVlDQp0aGF0IHdlIGNhbiB1
c2XCoGdlbmVyaWNfc3RhcnRfaW9fYWNjdCgpIGFuZCBnZW5lcmljX2VuZF9pb19hY2N0KCkgaGVy
ZS4NCg0KU2hvdWxkIHdlIGFsc28gY2hhbmdlIHRoZSBuZCBpbnRlcmZhY2UgdG8gdXNlIHRoZSBn
ZW5lcmljIHZlcnNpb24gYXMNCndlbGw/DQoNClRoYW5rcywNCi1Ub3NoaQ0KDQpwcy4NClNvcnJ5
IEkgcmVhbGl6ZWQgdGhpcyBjb21tZW50IGFmdGVyIHNlbmRpbmcgb3V0IHYyLi4u
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dan Williams Oct. 17, 2016, 6:55 p.m. UTC | #5
On Mon, Oct 17, 2016 at 10:40 AM, Kani, Toshimitsu <toshi.kani@hpe.com> wrote:
> On Sat, 2016-10-15 at 18:54 +1100, Dave Chinner wrote:
>> On Fri, Oct 14, 2016 at 11:25:13AM -0600, Toshi Kani wrote:
>  :
>> > +static void dax_iostat_start(struct gendisk *disk, struct iov_iter
>> > *iter,
>> > +     unsigned long *start)
>> > +{
>> > +int rw = iov_iter_rw(iter);
>> > +int sec = iov_iter_count(iter) >> 9;
>> > +int cpu = part_stat_lock();
>> > +
>> > +*start = jiffies;
>> > +part_round_stats(cpu, &disk->part0);
>> > +part_stat_inc(cpu, &disk->part0, ios[rw]);
>> > +part_stat_add(cpu, &disk->part0, sectors[rw], sec);
>> > +part_inc_in_flight(&disk->part0, rw);
>> > +part_stat_unlock();
>> > +}
>>
>> Why reimplement generic_start_io_acct() and generic_end_io_acct()?
>
> It was modeled after __nd_iostat_start() / nd_iostart_end().  I agree
> that we can use generic_start_io_acct() and generic_end_io_acct() here.
>
> Should we also change the nd interface to use the generic version as
> well?

Yes, sounds good to me.
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kani, Toshi Oct. 17, 2016, 7:12 p.m. UTC | #6
On Mon, 2016-10-17 at 11:55 -0700, Dan Williams wrote:
> On Mon, Oct 17, 2016 at 10:40 AM, Kani, Toshimitsu <toshi.kani@hpe.co

> m> wrote:

> > 

> > On Sat, 2016-10-15 at 18:54 +1100, Dave Chinner wrote:

> > > 

> > > On Fri, Oct 14, 2016 at 11:25:13AM -0600, Toshi Kani wrote:

> >  :

> > > 

> > > > 

> > > > +static void dax_iostat_start(struct gendisk *disk, struct

> > > > iov_iter

> > > > *iter,

> > > > +     unsigned long *start)

> > > > +{

> > > > +int rw = iov_iter_rw(iter);

> > > > +int sec = iov_iter_count(iter) >> 9;

> > > > +int cpu = part_stat_lock();

> > > > +

> > > > +*start = jiffies;

> > > > +part_round_stats(cpu, &disk->part0);

> > > > +part_stat_inc(cpu, &disk->part0, ios[rw]);

> > > > +part_stat_add(cpu, &disk->part0, sectors[rw], sec);

> > > > +part_inc_in_flight(&disk->part0, rw);

> > > > +part_stat_unlock();

> > > > +}

> > > 

> > > Why reimplement generic_start_io_acct() and

> > > generic_end_io_acct()?

> > 

> > It was modeled after __nd_iostat_start() / nd_iostart_end().  I

> > agree that we can use generic_start_io_acct() and

> > generic_end_io_acct() here.

> > 

> > Should we also change the nd interface to use the generic version

> > as well?

> 

> Yes, sounds good to me.


Will do. Thanks!
-Toshi
diff mbox

Patch

diff --git a/fs/dax.c b/fs/dax.c
index 014defd..3aaaac2 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -144,6 +144,34 @@  static sector_t to_sector(const struct buffer_head *bh,
 	return sector;
 }
 
+static void dax_iostat_start(struct gendisk *disk, struct iov_iter *iter,
+			     unsigned long *start)
+{
+	int rw = iov_iter_rw(iter);
+	int sec = iov_iter_count(iter) >> 9;
+	int cpu = part_stat_lock();
+
+	*start = jiffies;
+	part_round_stats(cpu, &disk->part0);
+	part_stat_inc(cpu, &disk->part0, ios[rw]);
+	part_stat_add(cpu, &disk->part0, sectors[rw], sec);
+	part_inc_in_flight(&disk->part0, rw);
+	part_stat_unlock();
+}
+
+static void dax_iostat_end(struct gendisk *disk, struct iov_iter *iter,
+			   unsigned long start)
+{
+	unsigned long duration = jiffies - start;
+	int rw = iov_iter_rw(iter);
+	int cpu = part_stat_lock();
+
+	part_stat_add(cpu, &disk->part0, ticks[rw], duration);
+	part_round_stats(cpu, &disk->part0);
+	part_dec_in_flight(&disk->part0, rw);
+	part_stat_unlock();
+}
+
 static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 		      loff_t start, loff_t end, get_block_t get_block,
 		      struct buffer_head *bh)
@@ -265,9 +293,12 @@  ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
 	ssize_t retval = -EINVAL;
 	loff_t pos = iocb->ki_pos;
 	loff_t end = pos + iov_iter_count(iter);
+	struct gendisk *disk;
+	unsigned long start = 0;
 
 	memset(&bh, 0, sizeof(bh));
 	bh.b_bdev = inode->i_sb->s_bdev;
+	disk = bh.b_bdev->bd_disk;
 
 	if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
 		inode_lock(inode);
@@ -276,8 +307,14 @@  ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
 	if (!(flags & DIO_SKIP_DIO_COUNT))
 		inode_dio_begin(inode);
 
+	if (blk_queue_io_stat(disk->queue))
+		dax_iostat_start(disk, iter, &start);
+
 	retval = dax_io(inode, iter, pos, end, get_block, &bh);
 
+	if (start)
+		dax_iostat_end(disk, iter, start);
+
 	if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
 		inode_unlock(inode);