@@ -243,6 +243,66 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
}
#endif
+static long fcntl_rw_hint(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct inode *inode = file_inode(file);
+ bool on_file = false;
+ enum rw_hint hint;
+ long ret = 0;
+
+ switch (cmd) {
+ case F_GET_FILE_RW_HINT:
+ on_file = true;
+ case F_GET_RW_HINT:
+ /*
+ * If we ask for the file descriptor hint and it isn't set,
+ * return the underlying inode write hint. This is what
+ * writeback does as well.
+ */
+ hint = RWF_WRITE_LIFE_NOT_SET;
+ if (on_file)
+ hint = file->f_write_hint;
+
+ if (!on_file || hint == RWF_WRITE_LIFE_NOT_SET)
+ hint = mask_to_write_hint(inode->i_flags,
+ S_WRITE_LIFE_SHIFT);
+ if (put_user(hint, (u64 __user *) arg))
+ ret = -EFAULT;
+ break;
+ case F_SET_FILE_RW_HINT:
+ on_file = true;
+ case F_SET_RW_HINT:
+ if (get_user(hint, (u64 __user *) arg)) {
+ ret = -EFAULT;
+ break;
+ }
+ switch (hint) {
+ case RWF_WRITE_LIFE_NOT_SET:
+ case RWH_WRITE_LIFE_NONE:
+ case RWH_WRITE_LIFE_SHORT:
+ case RWH_WRITE_LIFE_MEDIUM:
+ case RWH_WRITE_LIFE_LONG:
+ case RWH_WRITE_LIFE_EXTREME:
+ if (on_file) {
+ spin_lock(&file->f_lock);
+ file->f_write_hint = hint;
+ spin_unlock(&file->f_lock);
+ } else
+ inode_set_write_hint(inode, hint);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
struct file *filp)
{
@@ -337,6 +397,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_GET_SEALS:
err = shmem_fcntl(filp, cmd, arg);
break;
+ case F_GET_RW_HINT:
+ case F_SET_RW_HINT:
+ case F_GET_FILE_RW_HINT:
+ case F_SET_FILE_RW_HINT:
+ err = fcntl_rw_hint(filp, cmd, arg);
+ break;
default:
break;
}
@@ -2120,3 +2120,14 @@ struct timespec current_time(struct inode *inode)
return timespec_trunc(now, inode->i_sb->s_time_gran);
}
EXPORT_SYMBOL(current_time);
+
+void inode_set_write_hint(struct inode *inode, enum rw_hint hint)
+{
+ unsigned int flags = write_hint_to_mask(hint, S_WRITE_LIFE_SHIFT);
+
+ if (flags != mask_to_write_hint(inode->i_flags, S_WRITE_LIFE_SHIFT)) {
+ inode_lock(inode);
+ inode_set_flags(inode, flags, S_WRITE_LIFE_MASK);
+ inode_unlock(inode);
+ }
+}
@@ -759,6 +759,7 @@ static int do_dentry_open(struct file *f,
likely(f->f_op->write || f->f_op->write_iter))
f->f_mode |= FMODE_CAN_WRITE;
+ f->f_write_hint = WRITE_LIFE_NOT_SET;
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
@@ -274,6 +274,13 @@ struct writeback_control;
#define IOCB_WRITE (1 << 6)
#define IOCB_NOWAIT (1 << 7)
+/*
+ * Steal 3 bits for write hint information, this allows 8 valid hints
+ */
+#define IOCB_WRITE_LIFE_SHIFT 8
+#define IOCB_WRITE_LIFE_MASK (7 << IOCB_WRITE_LIFE_SHIFT)
+
+
struct kiocb {
struct file *ki_filp;
loff_t ki_pos;
@@ -297,6 +304,12 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
};
}
+static inline int iocb_write_hint(const struct kiocb *iocb)
+{
+ return (iocb->ki_flags & IOCB_WRITE_LIFE_MASK) >>
+ IOCB_WRITE_LIFE_SHIFT;
+}
+
/*
* "descriptor" for what we're up to with a read.
* This allows us to use the same read code yet
@@ -828,6 +841,20 @@ struct file_ra_state {
loff_t prev_pos; /* Cache last read() position */
};
+#include <linux/fcntl.h>
+
+/*
+ * Write life time hint values.
+ */
+enum rw_hint {
+ WRITE_LIFE_NOT_SET = 0,
+ WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE,
+ WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT,
+ WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
+ WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
+ WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
+};
+
/*
* Check if @index falls in the readahead windows.
*/
@@ -851,6 +878,7 @@ struct file {
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
+ enum rw_hint f_write_hint;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
@@ -1026,8 +1054,6 @@ struct file_lock_context {
#define OFFT_OFFSET_MAX INT_LIMIT(off_t)
#endif
-#include <linux/fcntl.h>
-
extern void send_sigio(struct fown_struct *fown, int fd, int band);
/*
@@ -1833,6 +1859,14 @@ struct super_operations {
#endif
/*
+ * Expected life time hint of a write for this inode. This uses the
+ * WRITE_LIFE_* encoding, we just need to define the shift. We need
+ * 3 bits for this. Next S_* value is 131072, bit 17.
+ */
+#define S_WRITE_LIFE_SHIFT 14 /* 16384, next bit */
+#define S_WRITE_LIFE_MASK (7 << S_WRITE_LIFE_SHIFT)
+
+/*
* Note that nosuid etc flags are inode-specific: setting some file-system
* flags just means all the inodes inherit those flags by default. It might be
* possible to override it selectively if you really wanted to with some
@@ -1878,6 +1912,39 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode)
return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
}
+static inline unsigned int write_hint_to_mask(enum rw_hint hint,
+ unsigned int shift)
+{
+ return hint << shift;
+}
+
+static inline enum rw_hint mask_to_write_hint(unsigned int mask,
+ unsigned int shift)
+{
+ return (mask >> shift) & 0x7;
+}
+
+static inline enum rw_hint inode_write_hint(struct inode *inode)
+{
+ enum rw_hint ret = WRITE_LIFE_NONE;
+
+ if (inode) {
+ ret = mask_to_write_hint(inode->i_flags, S_WRITE_LIFE_SHIFT);
+ if (ret == WRITE_LIFE_NOT_SET)
+ ret = WRITE_LIFE_NONE;
+ }
+
+ return ret;
+}
+
+static inline enum rw_hint file_write_hint(struct file *file)
+{
+ if (file->f_write_hint != WRITE_LIFE_NOT_SET)
+ return file->f_write_hint;
+
+ return inode_write_hint(file_inode(file));
+}
+
/*
* Inode state bits. Protected by inode->i_lock
*
@@ -2764,6 +2831,7 @@ extern struct inode *new_inode(struct super_block *sb);
extern void free_inode_nonrcu(struct inode *inode);
extern int should_remove_suid(struct dentry *);
extern int file_remove_privs(struct file *);
+extern void inode_set_write_hint(struct inode *inode, enum rw_hint hint);
extern void __insert_inode_hash(struct inode *, unsigned long hashval);
static inline void insert_inode_hash(struct inode *inode)
@@ -3060,6 +3128,8 @@ static inline int iocb_flags(struct file *file)
res |= IOCB_DSYNC;
if (file->f_flags & __O_SYNC)
res |= IOCB_SYNC;
+
+ res |= write_hint_to_mask(file->f_write_hint, IOCB_WRITE_LIFE_SHIFT);
return res;
}
@@ -43,6 +43,27 @@
/* (1U << 31) is reserved for signed error codes */
/*
+ * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
+ * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
+ * the specific file.
+ */
+#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11)
+#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12)
+#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13)
+#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
+
+/*
+ * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
+ * used to clear any hints previously set.
+ */
+#define RWF_WRITE_LIFE_NOT_SET 0
+#define RWH_WRITE_LIFE_NONE 1
+#define RWH_WRITE_LIFE_SHORT 2
+#define RWH_WRITE_LIFE_MEDIUM 3
+#define RWH_WRITE_LIFE_LONG 4
+#define RWH_WRITE_LIFE_EXTREME 5
+
+/*
* Types of directory notifications that may be requested.
*/
#define DN_ACCESS 0x00000001 /* File accessed */