Message ID | 1497498312-17704-5-git-send-email-axboe@kernel.dk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jun 14, 2017 at 09:45:05PM -0600, Jens Axboe wrote: > Add four flags for the pwritev2(2) system call, allowing an application > to give the kernel a hint about what on-media life times can be > expected from a given write. > > The intent is for these values to be relative to each other, no > absolute meaning should be attached to these flag names. > > Define IOCB flags to carry this over this information from the pwritev2 > RWF_WRITE_LIFE_* flags. > > Reviewed-by: Andreas Dilger <adilger@dilger.ca> > Signed-off-by: Jens Axboe <axboe@kernel.dk> > --- > fs/read_write.c | 9 ++++++++- > include/linux/fs.h | 12 ++++++++++++ > include/uapi/linux/fs.h | 10 ++++++++++ > 3 files changed, 30 insertions(+), 1 deletion(-) > > diff --git a/fs/read_write.c b/fs/read_write.c > index 47c1d4484df9..9cb2314efca3 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -678,7 +678,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, > struct kiocb kiocb; > ssize_t ret; > > - if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC)) > + if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_WRITE_LIFE_MASK)) > return -EOPNOTSUPP; > > init_sync_kiocb(&kiocb, filp); > @@ -688,6 +688,13 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, > kiocb.ki_flags |= IOCB_DSYNC; > if (flags & RWF_SYNC) > kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC); > + if (flags & RWF_WRITE_LIFE_MASK) { > + struct inode *inode = file_inode(filp); > + > + inode->i_write_hint = (flags & RWF_WRITE_LIFE_MASK) >> > + RWF_WRITE_LIFE_SHIFT; Hmm, so once set, hints stick around until someone else sets a different one. I suppose it's unlikely that you'd have two programs writing to the same inode with different write hints, right? Just wondering if anyone will be surprised that they thought they were writing to an _EXTREME hint file but someone else switched it to _SHORT on them. Also, how does userspace query the write hint value once set? > + kiocb.ki_flags |= inode->i_write_hint << IOCB_WRITE_LIFE_SHIFT; > + } > kiocb.ki_pos = *ppos; > > if (type == READ) > diff --git a/include/linux/fs.h b/include/linux/fs.h > index f4f9df8ed059..63798b67fcfe 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -269,6 +269,12 @@ struct writeback_control; > #define IOCB_SYNC (1 << 5) > #define IOCB_WRITE (1 << 6) > > +/* > + * Steal 4-bits for stream information, this allows 16 valid streams > + */ > +#define IOCB_WRITE_LIFE_SHIFT 7 > +#define IOCB_WRITE_LIFE_MASK (BIT(7) | BIT(8) | BIT(9) | BIT(10)) > + > struct kiocb { > struct file *ki_filp; > loff_t ki_pos; > @@ -292,6 +298,12 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) > }; > } > > +static inline int iocb_write_hint(const struct kiocb *iocb) > +{ > + return (iocb->ki_flags & IOCB_WRITE_LIFE_MASK) >> > + IOCB_WRITE_LIFE_SHIFT; > +} > + > /* > * "descriptor" for what we're up to with a read. > * This allows us to use the same read code yet > diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h > index 24e61a54feaa..58b7ee06b380 100644 > --- a/include/uapi/linux/fs.h > +++ b/include/uapi/linux/fs.h > @@ -361,4 +361,14 @@ struct fscrypt_key { > #define RWF_DSYNC 0x00000002 /* per-IO O_DSYNC */ > #define RWF_SYNC 0x00000004 /* per-IO O_SYNC */ > > +/* > + * Data life time write flags, steal 4 bits for that > + */ > +#define RWF_WRITE_LIFE_SHIFT 4 > +#define RWF_WRITE_LIFE_MASK 0x000000f0 /* 4 bits of stream ID */ > +#define RWF_WRITE_LIFE_SHORT (1 << RWF_WRITE_LIFE_SHIFT) > +#define RWF_WRITE_LIFE_MEDIUM (2 << RWF_WRITE_LIFE_SHIFT) > +#define RWF_WRITE_LIFE_LONG (3 << RWF_WRITE_LIFE_SHIFT) > +#define RWF_WRITE_LIFE_EXTREME (4 << RWF_WRITE_LIFE_SHIFT) Should O_TMPFILE files ought to be created with i_write_hint = RWF_WRITE_LIFE_SHORT by default? --D > + > #endif /* _UAPI_LINUX_FS_H */ > -- > 2.7.4 >
On 06/14/2017 10:15 PM, Darrick J. Wong wrote: >> diff --git a/fs/read_write.c b/fs/read_write.c >> index 47c1d4484df9..9cb2314efca3 100644 >> --- a/fs/read_write.c >> +++ b/fs/read_write.c >> @@ -678,7 +678,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, >> struct kiocb kiocb; >> ssize_t ret; >> >> - if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC)) >> + if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_WRITE_LIFE_MASK)) >> return -EOPNOTSUPP; >> >> init_sync_kiocb(&kiocb, filp); >> @@ -688,6 +688,13 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, >> kiocb.ki_flags |= IOCB_DSYNC; >> if (flags & RWF_SYNC) >> kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC); >> + if (flags & RWF_WRITE_LIFE_MASK) { >> + struct inode *inode = file_inode(filp); >> + >> + inode->i_write_hint = (flags & RWF_WRITE_LIFE_MASK) >> >> + RWF_WRITE_LIFE_SHIFT; > > Hmm, so once set, hints stick around until someone else sets a different > one. I suppose it's unlikely that you'd have two programs writing to > the same inode with different write hints, right? You'd hope so... There's really no good way to support that with buffered writes. For the NVMe use case, you'd be no worse off than you were without hints, however. But I do think one change should be made above - we only reset the hint if someone passes a new hint in. But we probably also want to do so for the case where no hint is passed in, but one is currently set. > Also, how does userspace query the write hint value once set? It doesn't. Ideally this hint would be "for this write only", but that's not really possible with deferred write back. >> +/* >> + * Data life time write flags, steal 4 bits for that >> + */ >> +#define RWF_WRITE_LIFE_SHIFT 4 >> +#define RWF_WRITE_LIFE_MASK 0x000000f0 /* 4 bits of stream ID */ >> +#define RWF_WRITE_LIFE_SHORT (1 << RWF_WRITE_LIFE_SHIFT) >> +#define RWF_WRITE_LIFE_MEDIUM (2 << RWF_WRITE_LIFE_SHIFT) >> +#define RWF_WRITE_LIFE_LONG (3 << RWF_WRITE_LIFE_SHIFT) >> +#define RWF_WRITE_LIFE_EXTREME (4 << RWF_WRITE_LIFE_SHIFT) > > Should O_TMPFILE files ought to be created with i_write_hint = > RWF_WRITE_LIFE_SHORT by default? The answer here is "it depends". If we're already using hints on that device, then yes, O_TMPFILE is a clear candidate for RWF_WRITE_LIFE_SHORT. However, if we are not, then we should not set it as it may have implications on how the device manages writes. For that case we'll potentially only be driving a single stream, short writes, and that may not be enough to saturate device bandwidth. I would rather leave that for future experimentation. There are similar things we can do with short lived writes, like apply them to the log writes in the file system. But all of that should be built on top of what we end up agreeing on, not included from the get-go. I'd rather get the basic usage and model going first before we further complicate matters.
I think Darrick has a very valid concern here - using RWF_* flags to affect inode or fd-wide state is extremely counter productive. Combined with the fact that the streams need a special setup in NVMe I'm tempted to say that the interface really should be fadvise or similar, which would keep the setup out of the I/O path and make clear it's a sticky interface. For direct I/O RWF_* would make some sense, but we'd still have to deal with the setup issue.
On Wed, Jun 14, 2017 at 09:15:03PM -0700, Darrick J. Wong wrote: > > + */ > > +#define RWF_WRITE_LIFE_SHIFT 4 > > +#define RWF_WRITE_LIFE_MASK 0x000000f0 /* 4 bits of stream ID */ > > +#define RWF_WRITE_LIFE_SHORT (1 << RWF_WRITE_LIFE_SHIFT) > > +#define RWF_WRITE_LIFE_MEDIUM (2 << RWF_WRITE_LIFE_SHIFT) > > +#define RWF_WRITE_LIFE_LONG (3 << RWF_WRITE_LIFE_SHIFT) > > +#define RWF_WRITE_LIFE_EXTREME (4 << RWF_WRITE_LIFE_SHIFT) > > Should O_TMPFILE files ought to be created with i_write_hint = > RWF_WRITE_LIFE_SHORT by default? Depends... One of the uses for that is to have them linked into permanent place once completely written...
On 06/15/2017 02:19 AM, Christoph Hellwig wrote: > I think Darrick has a very valid concern here - using RWF_* flags > to affect inode or fd-wide state is extremely counter productive. > > Combined with the fact that the streams need a special setup in NVMe > I'm tempted to say that the interface really should be fadvise or > similar, which would keep the setup out of the I/O path and make clear > it's a sticky interface. For direct I/O RWF_* would make some sense, > but we'd still have to deal with the setup issue. OK, which is exactly how I had implemented the interface 2 years ago. I can resurrect that part and dump the RWF_* flags. I agree the RWF_* flags are confusing for buffered IO, since they are persistent. For O_DIRECT, they make more sense. So the question is if we want to retain the RWF_WRITE_LIFE_* hints at all, or simply go back to the fadvise with something ala: POSIX_FADV_WRITE_HINT_SET Set write life time hint POSIX_FADV_WRITE_HINT_GET Get write life time hint I'd still very much like to stick to the same four hints, and not require any special setup. I think the lazy setup for nvme is fine. Some devices can do it at probe time, others will want to do it lazily to not waste resources. Do we really want to have the HINT_SET bubble down to the device and allocate streams? I want to keep this simple to use. The RWF_WRITE_LIFE_* are easy to use and adopt. But I do agree that being able to query the setting is useful, and then we may as well introduce a get/set fadvise interface for doing so. Do people like the S_WRITE_LIFE_* part, or should we keep the i_write_hint and just shrink it to 8 bytes?
On 06/15/2017 08:21 AM, Jens Axboe wrote: > On 06/15/2017 02:19 AM, Christoph Hellwig wrote: >> I think Darrick has a very valid concern here - using RWF_* flags >> to affect inode or fd-wide state is extremely counter productive. >> >> Combined with the fact that the streams need a special setup in NVMe >> I'm tempted to say that the interface really should be fadvise or >> similar, which would keep the setup out of the I/O path and make clear >> it's a sticky interface. For direct I/O RWF_* would make some sense, >> but we'd still have to deal with the setup issue. > > OK, which is exactly how I had implemented the interface 2 years ago. > I can resurrect that part and dump the RWF_* flags. I agree the RWF_* > flags are confusing for buffered IO, since they are persistent. For > O_DIRECT, they make more sense. So the question is if we want to > retain the RWF_WRITE_LIFE_* hints at all, or simply go back to the > fadvise with something ala: > > POSIX_FADV_WRITE_HINT_SET Set write life time hint > POSIX_FADV_WRITE_HINT_GET Get write life time hint And then I remembered why fadvise _sucks_. It returns the error value directly. So 0 is success > 0 is some error. That does not work well for adding a set/get interface. Additionally, with fadvise, we have to overload either 'offset' or 'length' for the write hint for the set operation. Not super pretty. Any objections to making the auxiliary interface fcntl(2) based? That would be a cleaner fit, imho.
On Thu, Jun 15, 2017 at 09:23:02AM -0600, Jens Axboe wrote: > And then I remembered why fadvise _sucks_. It returns the error value > directly. So 0 is success > 0 is some error. That does not work well > for adding a set/get interface. Additionally, with fadvise, we have > to overload either 'offset' or 'length' for the write hint for the > set operation. Not super pretty. > > Any objections to making the auxiliary interface fcntl(2) based? > That would be a cleaner fit, imho. fcntl sounds fine to me.
On Thu, Jun 15, 2017 at 08:21:18AM -0600, Jens Axboe wrote: > I'd still very much like to stick to the same four hints, and not Agreed. In fact I'd go a little further: we should have a u16 hints; that goes all the way down from fcntl to the driver, right now we'll allocate the first 3 bits for the write lifetime hints (2.5, so we have one value spare, as they don't need to flags but can be enum values), leaving more space for other kinds of hints. > require any special setup. I think the lazy setup for nvme is fine. Some > devices can do it at probe time, others will want to do it lazily to not > waste resources. Do we really want to have the HINT_SET bubble down to > the device and allocate streams? If I look at the mess for allocating the streams I think we need it to bubble down. That way the device can allocate the stream at time of the fcntl and we can keep the low level driver very simple.
On 06/16/2017 01:33 AM, Christoph Hellwig wrote: > On Thu, Jun 15, 2017 at 08:21:18AM -0600, Jens Axboe wrote: >> I'd still very much like to stick to the same four hints, and not > > Agreed. In fact I'd go a little further: we should have a > > u16 hints; > > that goes all the way down from fcntl to the driver, right now > we'll allocate the first 3 bits for the write lifetime hints (2.5, > so we have one value spare, as they don't need to flags but can be > enum values), leaving more space for other kinds of hints. Did you see v5? It adds enum write_hint and passes it all the way down, until we transform them into rq/bio flags. >> require any special setup. I think the lazy setup for nvme is fine. Some >> devices can do it at probe time, others will want to do it lazily to not >> waste resources. Do we really want to have the HINT_SET bubble down to >> the device and allocate streams? > > If I look at the mess for allocating the streams I think we need it > to bubble down. That way the device can allocate the stream at time > of the fcntl and we can keep the low level driver very simple. Mess? The NVMe code seems pretty straight forward to me. Is it purely the lazy alloc part you're referring to? I'm fine with bubbling down the hint and setting everything up inline from the fcntl() call, but that means we need to make things like btrfs and md/dm aware of it and resolve all mappings. That sort-of sucks, especially since they don't otherwise need to know about it. If another driver needs similar setup like NVMe, I'd much rather just fix it up there. From the API point of view, there would be no changes.
On 06/16/2017 01:30 AM, Christoph Hellwig wrote: > On Thu, Jun 15, 2017 at 09:23:02AM -0600, Jens Axboe wrote: >> And then I remembered why fadvise _sucks_. It returns the error value >> directly. So 0 is success > 0 is some error. That does not work well >> for adding a set/get interface. Additionally, with fadvise, we have >> to overload either 'offset' or 'length' for the write hint for the >> set operation. Not super pretty. >> >> Any objections to making the auxiliary interface fcntl(2) based? >> That would be a cleaner fit, imho. > > fcntl sounds fine to me. v5 implemented that, much better fit than fadvise.
On 06/16/2017 08:35 AM, Jens Axboe wrote: >> If I look at the mess for allocating the streams I think we need it >> to bubble down. That way the device can allocate the stream at time >> of the fcntl and we can keep the low level driver very simple. > > Mess? The NVMe code seems pretty straight forward to me. Is it purely > the lazy alloc part you're referring to? I'm fine with bubbling down the > hint and setting everything up inline from the fcntl() call, but that > means we need to make things like btrfs and md/dm aware of it and > resolve all mappings. That sort-of sucks, especially since they don't > otherwise need to know about it. This is what the old code looked like, back when it was implemented as managed streams, for btrfs: http://git.kernel.dk/cgit/linux-block/commit/?id=93b4d9370250ea9273e1e71775df85964ff52922 We don't need the close part anymore, but the device iteration for constituent devices would be the same. On top of that, we'd have to touch all of drivers/md/ as well, or it won't work for anyone that's using raid or dm. Why not just let it resolve lazily? That way we don't have to touch drivers that need not know about this, since the information is already being passed all the way down without having to modify drivers. If you object to the nvme driver open coding this, I can put that in block instead and catch it earlier. Then nvme would not have to worry about it. But honestly, I'd much rather make it generic once we have another user that needs this, than do it up front.
On Fri, Jun 16, 2017 at 08:35:07AM -0600, Jens Axboe wrote: > > Agreed. In fact I'd go a little further: we should have a > > > > u16 hints; > > > > that goes all the way down from fcntl to the driver, right now > > we'll allocate the first 3 bits for the write lifetime hints (2.5, > > so we have one value spare, as they don't need to flags but can be > > enum values), leaving more space for other kinds of hints. > > Did you see v5? It adds enum write_hint and passes it all the way down, > until we transform them into rq/bio flags. Yes. But with all the way down I mean all the way down to the driver :) > Mess? The NVMe code seems pretty straight forward to me. Is it purely > the lazy alloc part you're referring to? Yes. > I'm fine with bubbling down the > hint and setting everything up inline from the fcntl() call, but that > means we need to make things like btrfs and md/dm aware of it and > resolve all mappings. That sort-of sucks, especially since they don't > otherwise need to know about it. True, that sucks. But taking the hint and doing things behind the scenes sounds nasty. > If another driver needs similar setup like NVMe, I'd much rather just > fix it up there. From the API point of view, there would be no changes. Ok..
On 06/16/2017 09:52 AM, Christoph Hellwig wrote: > On Fri, Jun 16, 2017 at 08:35:07AM -0600, Jens Axboe wrote: >>> Agreed. In fact I'd go a little further: we should have a >>> >>> u16 hints; >>> >>> that goes all the way down from fcntl to the driver, right now >>> we'll allocate the first 3 bits for the write lifetime hints (2.5, >>> so we have one value spare, as they don't need to flags but can be >>> enum values), leaving more space for other kinds of hints. >> >> Did you see v5? It adds enum write_hint and passes it all the way down, >> until we transform them into rq/bio flags. > > Yes. But with all the way down I mean all the way down to the driver :) Only missing part is the request flags. And why make that any different than the flags we already have now? It'd be trivial to pack the value into the request flags as well, but I'm struggling to see the point of that, honestly. Please expand on why you think changing the request flags to also carry that value would be useful, as opposed to just mapping it when we setup the request. If you have a valid concern I don't mind making the change, but I just don't see one right now. >> Mess? The NVMe code seems pretty straight forward to me. Is it purely >> the lazy alloc part you're referring to? > > Yes. > >> I'm fine with bubbling down the >> hint and setting everything up inline from the fcntl() call, but that >> means we need to make things like btrfs and md/dm aware of it and >> resolve all mappings. That sort-of sucks, especially since they don't >> otherwise need to know about it. > > True, that sucks. But taking the hint and doing things behind the > scenes sounds nasty. How so? It's pretty straight forward. The only downside I see is that we'll have a delay between seeing the first valid write hint and to the time when nvme has it enabled. But that's so short, and in the grander scheme of things, doesn't matter one bit. The alternative is a bunch of infrastructure to do this inline. I think that choice is pretty clear here. >> If another driver needs similar setup like NVMe, I'd much rather just >> fix it up there. From the API point of view, there would be no changes. > > Ok.. >
On Fri, Jun 16, 2017 at 09:59:38AM -0600, Jens Axboe wrote: > >> Did you see v5? It adds enum write_hint and passes it all the way down, > >> until we transform them into rq/bio flags. > > > > Yes. But with all the way down I mean all the way down to the driver :) > > Only missing part is the request flags. And why make that any different > than the flags we already have now? It'd be trivial to pack the value > into the request flags as well, but I'm struggling to see the point of > that, honestly. > > Please expand on why you think changing the request flags to also > carry that value would be useful, as opposed to just mapping it when > we setup the request. If you have a valid concern I don't mind making > the change, but I just don't see one right now. Mostly so that we can pass the value down in one form through the whole stack. > How so? It's pretty straight forward. The only downside I see is that > we'll have a delay between seeing the first valid write hint and to > the time when nvme has it enabled. But that's so short, and in the > grander scheme of things, doesn't matter one bit. I'll take your word for that. To be honest I hope future standards won't make the mistake to come up with something as ugly as streams and just take something like our hints on a per-I/O basis..
On 06/16/2017 12:02 PM, Christoph Hellwig wrote: > On Fri, Jun 16, 2017 at 09:59:38AM -0600, Jens Axboe wrote: >>>> Did you see v5? It adds enum write_hint and passes it all the way down, >>>> until we transform them into rq/bio flags. >>> >>> Yes. But with all the way down I mean all the way down to the driver :) >> >> Only missing part is the request flags. And why make that any different >> than the flags we already have now? It'd be trivial to pack the value >> into the request flags as well, but I'm struggling to see the point of >> that, honestly. >> >> Please expand on why you think changing the request flags to also >> carry that value would be useful, as opposed to just mapping it when >> we setup the request. If you have a valid concern I don't mind making >> the change, but I just don't see one right now. > > Mostly so that we can pass the value down in one form through the whole > stack. OK good, so we have that now. I guess the one change we could further make is have the write hints be a subset of the possible hints. I already made changes to reflect that in the latest posting, where we have a rw_hint enum. I haven't made any division of it into read vs write hints, but that could always be done and it won't impact the API since we can just keep the write hints in the lower 8-16 bits, or whatever we choose. >> How so? It's pretty straight forward. The only downside I see is that >> we'll have a delay between seeing the first valid write hint and to >> the time when nvme has it enabled. But that's so short, and in the >> grander scheme of things, doesn't matter one bit. > > I'll take your word for that. To be honest I hope future standards > won't make the mistake to come up with something as ugly as streams > and just take something like our hints on a per-I/O basis.. Well, at least streams is better than how it started out...
diff --git a/fs/read_write.c b/fs/read_write.c index 47c1d4484df9..9cb2314efca3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -678,7 +678,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, struct kiocb kiocb; ssize_t ret; - if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC)) + if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_WRITE_LIFE_MASK)) return -EOPNOTSUPP; init_sync_kiocb(&kiocb, filp); @@ -688,6 +688,13 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, kiocb.ki_flags |= IOCB_DSYNC; if (flags & RWF_SYNC) kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC); + if (flags & RWF_WRITE_LIFE_MASK) { + struct inode *inode = file_inode(filp); + + inode->i_write_hint = (flags & RWF_WRITE_LIFE_MASK) >> + RWF_WRITE_LIFE_SHIFT; + kiocb.ki_flags |= inode->i_write_hint << IOCB_WRITE_LIFE_SHIFT; + } kiocb.ki_pos = *ppos; if (type == READ) diff --git a/include/linux/fs.h b/include/linux/fs.h index f4f9df8ed059..63798b67fcfe 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -269,6 +269,12 @@ struct writeback_control; #define IOCB_SYNC (1 << 5) #define IOCB_WRITE (1 << 6) +/* + * Steal 4-bits for stream information, this allows 16 valid streams + */ +#define IOCB_WRITE_LIFE_SHIFT 7 +#define IOCB_WRITE_LIFE_MASK (BIT(7) | BIT(8) | BIT(9) | BIT(10)) + struct kiocb { struct file *ki_filp; loff_t ki_pos; @@ -292,6 +298,12 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) }; } +static inline int iocb_write_hint(const struct kiocb *iocb) +{ + return (iocb->ki_flags & IOCB_WRITE_LIFE_MASK) >> + IOCB_WRITE_LIFE_SHIFT; +} + /* * "descriptor" for what we're up to with a read. * This allows us to use the same read code yet diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 24e61a54feaa..58b7ee06b380 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -361,4 +361,14 @@ struct fscrypt_key { #define RWF_DSYNC 0x00000002 /* per-IO O_DSYNC */ #define RWF_SYNC 0x00000004 /* per-IO O_SYNC */ +/* + * Data life time write flags, steal 4 bits for that + */ +#define RWF_WRITE_LIFE_SHIFT 4 +#define RWF_WRITE_LIFE_MASK 0x000000f0 /* 4 bits of stream ID */ +#define RWF_WRITE_LIFE_SHORT (1 << RWF_WRITE_LIFE_SHIFT) +#define RWF_WRITE_LIFE_MEDIUM (2 << RWF_WRITE_LIFE_SHIFT) +#define RWF_WRITE_LIFE_LONG (3 << RWF_WRITE_LIFE_SHIFT) +#define RWF_WRITE_LIFE_EXTREME (4 << RWF_WRITE_LIFE_SHIFT) + #endif /* _UAPI_LINUX_FS_H */