@@ -47,6 +47,7 @@
#include <linux/compat.h>
#include <linux/aio.h>
#include <linux/parser.h>
+#include <linux/seqlock.h>
#include <lustre_crypto.h>
#include <range_lock.h>
#include <linux/namei.h>
@@ -287,6 +288,7 @@ struct ll_inode_info {
struct mutex lli_xattrs_enq_lock;
struct list_head lli_xattrs; /* ll_xattr_entry->xe_list */
struct list_head lli_lccs; /* list of ll_cl_context */
+ seqlock_t lli_page_inv_lock;
};
static inline void ll_trunc_sem_init(struct ll_trunc_sem *sem)
@@ -1834,4 +1836,6 @@ int ll_file_open_encrypt(struct inode *inode, struct file *filp)
bool ll_foreign_is_openable(struct dentry *dentry, unsigned int flags);
bool ll_foreign_is_removable(struct dentry *dentry, bool unset);
+int ll_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+
#endif /* LLITE_INTERNAL_H */
@@ -1213,6 +1213,7 @@ void ll_lli_init(struct ll_inode_info *lli)
memset(lli->lli_jobid, 0, sizeof(lli->lli_jobid));
/* ll_cl_context initialize */
INIT_LIST_HEAD(&lli->lli_lccs);
+ seqlock_init(&lli->lli_page_inv_lock);
}
int ll_fill_super(struct super_block *sb)
@@ -257,6 +257,25 @@ static inline vm_fault_t to_fault_error(int result)
return result;
}
+int ll_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vma->vm_file);
+ int ret;
+ unsigned int seq;
+
+ /* this seqlock lets us notice if a page has been deleted on this inode
+ * during the fault process, allowing us to catch an erroneous SIGBUS
+ * See LU-16160
+ */
+ do {
+ seq = read_seqbegin(&ll_i2info(inode)->lli_page_inv_lock);
+ ret = filemap_fault(vmf);
+ } while (read_seqretry(&ll_i2info(inode)->lli_page_inv_lock, seq) &&
+ (ret & VM_FAULT_SIGBUS));
+
+ return ret;
+}
+
/**
* Lustre implementation of a vm_operations_struct::fault() method, called by
* VM to server page fault (both in kernel and user space).
@@ -63,6 +63,42 @@ static void vvp_page_discard(const struct lu_env *env,
ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED);
}
+static void vvp_page_delete(const struct lu_env *env,
+ const struct cl_page_slice *slice)
+{
+ struct cl_page *cp = slice->cpl_page;
+
+ if (cp->cp_type == CPT_CACHEABLE) {
+ struct page *vmpage = cp->cp_vmpage;
+ struct inode *inode = vmpage->mapping->host;
+
+ LASSERT(PageLocked(vmpage));
+ LASSERT((struct cl_page *)vmpage->private == cp);
+
+ /* Drop the reference count held in vvp_page_init */
+ refcount_dec(&cp->cp_ref);
+
+ ClearPagePrivate(vmpage);
+ vmpage->private = 0;
+
+ /* clearpageuptodate prevents the page being read by the
+ * kernel after it has been deleted from Lustre, which avoids
+ * potential stale data reads. The seqlock allows us to see
+ * that a page was potentially deleted and catch the resulting
+ * SIGBUS - see ll_filemap_fault() (LU-16160)
+ */
+ write_seqlock(&ll_i2info(inode)->lli_page_inv_lock);
+ ClearPageUptodate(vmpage);
+ write_sequnlock(&ll_i2info(inode)->lli_page_inv_lock);
+
+ /*
+ * The reference from vmpage to cl_page is removed,
+ * but the reference back is still here. It is removed
+ * later in cl_page_free().
+ */
+ }
+}
+
/**
* Handles page transfer errors at VM level.
*
@@ -146,6 +182,7 @@ static void vvp_page_completion_write(const struct lu_env *env,
}
static const struct cl_page_operations vvp_page_ops = {
+ .cpo_delete = vvp_page_delete,
.cpo_discard = vvp_page_discard,
.io = {
[CRT_READ] = {
@@ -704,7 +704,6 @@ void cl_page_discard(const struct lu_env *env,
static void __cl_page_delete(const struct lu_env *env, struct cl_page *cp)
{
const struct cl_page_slice *slice;
- struct page *vmpage;
int i;
PASSERT(env, cp, cp->cp_state != CPS_FREEING);
@@ -719,23 +718,6 @@ static void __cl_page_delete(const struct lu_env *env, struct cl_page *cp)
if (slice->cpl_ops->cpo_delete)
(*slice->cpl_ops->cpo_delete)(env, slice);
}
-
- if (cp->cp_type == CPT_CACHEABLE) {
- vmpage = cp->cp_vmpage;
- LASSERT(PageLocked(vmpage));
- LASSERT((struct cl_page *)vmpage->private == cp);
-
- /* Drop the reference count held in vvp_page_init */
- refcount_dec(&cp->cp_ref);
- ClearPagePrivate(vmpage);
- vmpage->private = 0;
-
- /*
- * The reference from vmpage to cl_page is removed,
- * but the reference back is still here. It is removed
- * later in cl_page_free().
- */
- }
}
/**