@@ -43,6 +43,7 @@
#include <linux/sched.h>
#include <linux/mount.h>
#include <linux/falloc.h>
+#include <linux/ktime.h>
#include <uapi/linux/lustre/lustre_fiemap.h>
#include <uapi/linux/lustre/lustre_ioctl.h>
@@ -414,6 +415,8 @@ int ll_file_release(struct inode *inode, struct file *file)
lli->lli_async_rc = 0;
}
+ lli->lli_close_fd_time = ktime_get();
+
rc = ll_md_close(inode, file);
if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
@@ -745,6 +748,29 @@ static int ll_local_open(struct file *file, struct lookup_intent *it,
return 0;
}
+void ll_track_file_opens(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+
+ /* do not skew results with delays from never-opened inodes */
+ if (ktime_to_ns(lli->lli_close_fd_time))
+ ll_stats_ops_tally(sbi, LPROC_LL_INODE_OPCLTM,
+ ktime_us_delta(ktime_get(), lli->lli_close_fd_time));
+
+ if (ktime_after(ktime_get(),
+ ktime_add_ms(lli->lli_close_fd_time,
+ sbi->ll_oc_max_ms))) {
+ lli->lli_open_fd_count = 1;
+ lli->lli_close_fd_time = ns_to_ktime(0);
+ } else {
+ lli->lli_open_fd_count++;
+ }
+
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_OCOUNT,
+ lli->lli_open_fd_count);
+}
+
/* Open a file, and (for the very first open) create objects on the OSTs at
* this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
* creation or open until ll_lov_setstripe() ioctl is called.
@@ -791,6 +817,7 @@ int ll_file_open(struct inode *inode, struct file *file)
if (S_ISDIR(inode->i_mode))
ll_authorize_statahead(inode, fd);
+ ll_track_file_opens(inode);
if (is_root_inode(inode)) {
file->private_data = fd;
return 0;
@@ -868,6 +895,7 @@ int ll_file_open(struct inode *inode, struct file *file)
LASSERT(*och_usecount == 0);
if (!it->it_disposition) {
struct dentry *dentry = file_dentry(file);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ll_dentry_data *ldd;
/* We cannot just request lock handle now, new ELC code
@@ -884,20 +912,42 @@ int ll_file_open(struct inode *inode, struct file *file)
* handle to be returned from LOOKUP|OPEN request,
* for example if the target entry was a symlink.
*
- * Only fetch MDS_OPEN_LOCK if this is in NFS path,
- * marked by a bit set in ll_iget_for_nfs. Clear the
- * bit so that it's not confusing later callers.
+ * In NFS path we know there's pathologic behavior
+ * so we always enable open lock caching when coming
+ * from there. It's detected by setting a flag in
+ * ll_iget_for_nfs.
*
- * NB; when ldd is NULL, it must have come via normal
- * lookup path only, since ll_iget_for_nfs always calls
- * ll_d_init().
+ * After reaching number of opens of this inode
+ * we always ask for an open lock on it to handle
+ * bad userspace actors that open and close files
+ * in a loop for absolutely no good reason
*/
ldd = ll_d2d(dentry);
- if (ldd && ldd->lld_nfs_dentry) {
+ if (filename_is_volatile(dentry->d_name.name,
+ dentry->d_name.len,
+ NULL)) {
+ /* There really is nothing here, but this
+ * make this more readable I think.
+ * We do not want openlock for volatile
+ * files under any circumstances
+ */
+ } else if (ldd && ldd->lld_nfs_dentry) {
+ /* NFS path. This also happens to catch
+ * open by fh files I guess
+ */
+ it->it_flags |= MDS_OPEN_LOCK;
+ /* clear the flag for future lookups */
ldd->lld_nfs_dentry = 0;
- if (!filename_is_volatile(dentry->d_name.name,
- dentry->d_name.len,
- NULL))
+ } else if (sbi->ll_oc_thrsh_count > 0) {
+ /* Take MDS_OPEN_LOCK with many opens */
+ if (lli->lli_open_fd_count >=
+ sbi->ll_oc_thrsh_count)
+ it->it_flags |= MDS_OPEN_LOCK;
+
+ /* If this is open after we just closed */
+ else if (ktime_before(ktime_get(),
+ ktime_add_ms(lli->lli_close_fd_time,
+ sbi->ll_oc_thrsh_ms)))
it->it_flags |= MDS_OPEN_LOCK;
}
@@ -137,9 +137,15 @@ struct ll_inode_info {
struct obd_client_handle *lli_mds_read_och;
struct obd_client_handle *lli_mds_write_och;
struct obd_client_handle *lli_mds_exec_och;
- u64 lli_open_fd_read_count;
- u64 lli_open_fd_write_count;
- u64 lli_open_fd_exec_count;
+ u64 lli_open_fd_read_count;
+ u64 lli_open_fd_write_count;
+ u64 lli_open_fd_exec_count;
+
+ /* Number of times this inode was opened */
+ u64 lli_open_fd_count;
+ /* When last close was performed on this inode */
+ ktime_t lli_close_fd_time;
+
/* Protects access to och pointers and their usage counters */
struct mutex lli_och_mutex;
@@ -765,6 +771,19 @@ struct ll_sb_info {
unsigned int ll_heat_decay_weight;
unsigned int ll_heat_period_second;
+ /* Opens of the same inode before we start requesting open lock */
+ u32 ll_oc_thrsh_count;
+
+ /* Time in ms between last inode close and next open to be considered
+ * instant back to back and would trigger an open lock request
+ */
+ u32 ll_oc_thrsh_ms;
+
+ /* Time in ms after last file close that we no longer count prior
+ * opens
+ */
+ u32 ll_oc_max_ms;
+
/* filesystem fsname */
char ll_fsname[LUSTRE_MAXFSNAME + 1];
@@ -788,6 +807,10 @@ struct ll_sb_info {
#define SBI_DEFAULT_HEAT_DECAY_WEIGHT ((80 * 256 + 50) / 100)
#define SBI_DEFAULT_HEAT_PERIOD_SECOND (60)
+#define SBI_DEFAULT_OPENCACHE_THRESHOLD_COUNT (5)
+#define SBI_DEFAULT_OPENCACHE_THRESHOLD_MS (100) /* 0.1 second */
+#define SBI_DEFAULT_OPENCACHE_THRESHOLD_MAX_MS (60000) /* 1 minute */
+
/*
* per file-descriptor read-ahead data.
*/
@@ -1029,6 +1052,8 @@ enum {
LPROC_LL_REMOVEXATTR,
LPROC_LL_INODE_PERM,
LPROC_LL_FALLOCATE,
+ LPROC_LL_INODE_OCOUNT,
+ LPROC_LL_INODE_OPCLTM,
LPROC_LL_FILE_OPCODES
};
@@ -1088,6 +1113,7 @@ enum ldlm_mode ll_take_md_lock(struct inode *inode, u64 bits,
int ll_file_release(struct inode *inode, struct file *file);
int ll_release_openhandle(struct inode *inode, struct lookup_intent *it);
int ll_md_real_close(struct inode *inode, fmode_t fmode);
+void ll_track_file_opens(struct inode *inode);
int ll_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
int ll_getattr_dentry(struct dentry *de, struct kstat *stat, u32 request_mask,
@@ -190,6 +190,11 @@ static struct ll_sb_info *ll_init_sbi(void)
/* Per-filesystem file heat */
sbi->ll_heat_decay_weight = SBI_DEFAULT_HEAT_DECAY_WEIGHT;
sbi->ll_heat_period_second = SBI_DEFAULT_HEAT_PERIOD_SECOND;
+
+ /* Per-fs open heat level before requesting open lock */
+ sbi->ll_oc_thrsh_count = SBI_DEFAULT_OPENCACHE_THRESHOLD_COUNT;
+ sbi->ll_oc_max_ms = SBI_DEFAULT_OPENCACHE_THRESHOLD_MAX_MS;
+ sbi->ll_oc_thrsh_ms = SBI_DEFAULT_OPENCACHE_THRESHOLD_MS;
return sbi;
out_destroy_ra:
kfree(sbi->ll_foreign_symlink_upcall);
@@ -1369,6 +1369,105 @@ static ssize_t heat_period_second_store(struct kobject *kobj,
}
LUSTRE_RW_ATTR(heat_period_second);
+static ssize_t opencache_threshold_count_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+
+ if (sbi->ll_oc_thrsh_count)
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ sbi->ll_oc_thrsh_count);
+ else
+ return snprintf(buf, PAGE_SIZE, "off\n");
+}
+
+static ssize_t opencache_threshold_count_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t count)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buffer, 10, &val);
+ if (rc) {
+ bool enable;
+ /* also accept "off" to disable and "on" to always cache */
+ rc = kstrtobool(buffer, &enable);
+ if (rc)
+ return rc;
+ val = enable;
+ }
+ sbi->ll_oc_thrsh_count = val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(opencache_threshold_count);
+
+static ssize_t opencache_threshold_ms_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sbi->ll_oc_thrsh_ms);
+}
+
+static ssize_t opencache_threshold_ms_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t count)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buffer, 10, &val);
+ if (rc)
+ return rc;
+
+ sbi->ll_oc_thrsh_ms = val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(opencache_threshold_ms);
+
+static ssize_t opencache_max_ms_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sbi->ll_oc_max_ms);
+}
+
+static ssize_t opencache_max_ms_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t count)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buffer, 10, &val);
+ if (rc)
+ return rc;
+
+ sbi->ll_oc_max_ms = val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(opencache_max_ms);
+
static int ll_unstable_stats_seq_show(struct seq_file *m, void *v)
{
struct super_block *sb = m->private;
@@ -1568,6 +1667,8 @@ struct ldebugfs_vars lprocfs_llite_obd_vars[] = {
&lustre_attr_max_read_ahead_mb.attr,
&lustre_attr_max_read_ahead_per_file_mb.attr,
&lustre_attr_max_read_ahead_whole_mb.attr,
+ &lustre_attr_max_read_ahead_async_active.attr,
+ &lustre_attr_read_ahead_async_file_threshold_mb.attr,
&lustre_attr_read_ahead_range_kb.attr,
&lustre_attr_checksums.attr,
&lustre_attr_checksum_pages.attr,
@@ -1587,8 +1688,9 @@ struct ldebugfs_vars lprocfs_llite_obd_vars[] = {
&lustre_attr_file_heat.attr,
&lustre_attr_heat_decay_percentage.attr,
&lustre_attr_heat_period_second.attr,
- &lustre_attr_max_read_ahead_async_active.attr,
- &lustre_attr_read_ahead_async_file_threshold_mb.attr,
+ &lustre_attr_opencache_threshold_count.attr,
+ &lustre_attr_opencache_threshold_ms.attr,
+ &lustre_attr_opencache_max_ms.attr,
NULL,
};
@@ -1624,12 +1726,16 @@ static void sbi_kobj_release(struct kobject *kobj)
{ LPROC_LL_LLSEEK, LPROCFS_TYPE_LATENCY, "seek" },
{ LPROC_LL_FSYNC, LPROCFS_TYPE_LATENCY, "fsync" },
{ LPROC_LL_READDIR, LPROCFS_TYPE_LATENCY, "readdir" },
+ { LPROC_LL_INODE_OCOUNT, LPROCFS_TYPE_REQS |
+ LPROCFS_CNTR_AVGMINMAX |
+ LPROCFS_CNTR_STDDEV, "opencount" },
+ { LPROC_LL_INODE_OPCLTM, LPROCFS_TYPE_LATENCY, "openclosetime" },
/* inode operation */
{ LPROC_LL_SETATTR, LPROCFS_TYPE_LATENCY, "setattr" },
{ LPROC_LL_TRUNC, LPROCFS_TYPE_LATENCY, "truncate" },
{ LPROC_LL_FLOCK, LPROCFS_TYPE_LATENCY, "flock" },
{ LPROC_LL_GETATTR, LPROCFS_TYPE_LATENCY, "getattr" },
- { LPROC_LL_FALLOCATE, LPROCFS_TYPE_LATENCY, "fallocate" },
+ { LPROC_LL_FALLOCATE, LPROCFS_TYPE_LATENCY, "fallocate" },
/* dir inode operation */
{ LPROC_LL_CREATE, LPROCFS_TYPE_LATENCY, "create" },
{ LPROC_LL_LINK, LPROCFS_TYPE_LATENCY, "link" },
@@ -1148,6 +1148,13 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
OBD_FAIL_TIMEOUT(OBD_FAIL_LLITE_CREATE_FILE_PAUSE2, cfs_fail_val);
+ /* We can only arrive at this path when we have no inode, so
+ * we only need to request open lock if it was requested
+ * for every open
+ */
+ if (ll_i2sbi(dir)->ll_oc_thrsh_count == 1)
+ it->it_flags |= MDS_OPEN_LOCK;
+
/* Dentry added to dcache tree in ll_lookup_it */
de = ll_lookup_it(dir, dentry, it, &secctx, &secctxlen, &pca, encrypt,
&encctx, &encctxlen);