@@ -1371,6 +1371,8 @@ struct ll_cl_context {
struct cl_io *lcc_io;
struct cl_page *lcc_page;
enum lcc_type lcc_type;
+ struct kiocb *lcc_iocb;
+ struct iov_iter *lcc_iter;
};
struct ll_thread_info {
@@ -1858,11 +1858,14 @@ int ll_readpage(struct file *file, struct page *vmpage)
{
struct inode *inode = file_inode(file);
struct cl_object *clob = ll_i2info(inode)->lli_clob;
- struct ll_cl_context *lcc;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
const struct lu_env *env = NULL;
+ struct cl_read_ahead ra = { 0 };
+ struct ll_cl_context *lcc;
struct cl_io *io = NULL;
+ struct iov_iter *iter;
struct cl_page *page;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct kiocb *iocb;
int result;
if (OBD_FAIL_PRECHECK(OBD_FAIL_LLITE_READPAGE_PAUSE)) {
@@ -1911,6 +1914,8 @@ int ll_readpage(struct file *file, struct page *vmpage)
struct ll_readahead_state *ras = &fd->fd_ras;
struct lu_env *local_env = NULL;
+ CDEBUG(D_VFSTRACE, "fast read pgno: %ld\n", vmpage->index);
+
result = -ENODATA;
/*
@@ -1968,6 +1973,47 @@ int ll_readpage(struct file *file, struct page *vmpage)
return result;
}
+ if (lcc && lcc->lcc_type != LCC_MMAP) {
+ iocb = lcc->lcc_iocb;
+ iter = lcc->lcc_iter;
+
+ CDEBUG(D_VFSTRACE, "pgno:%ld, cnt:%ld, pos:%lld\n",
+ vmpage->index, iter->count, iocb->ki_pos);
+
+ /*
+ * This handles a kernel bug introduced in kernel 5.12:
+ * comment: cbd59c48ae2bcadc4a7599c29cf32fd3f9b78251
+ * ("mm/filemap: use head pages in generic_file_buffered_read")
+ *
+ * See above in this function for a full description of the
+ * bug. Briefly, the kernel will try to read 1 more page than
+ * was actually requested *if that page is already in cache*.
+ *
+ * Because this page is beyond the boundary of the requested
+ * read, Lustre does not lock it as part of the read. This
+ * means we must check if there is a valid dlmlock on this
+ * page and reference it before we attempt to read in the
+ * page. If there is not a valid dlmlock, then we are racing
+ * with dlmlock cancellation and the page is being removed
+ * from the cache.
+ *
+ * That means we should return AOP_TRUNCATED_PAGE, which will
+ * cause the kernel to retry the read, which should allow the
+ * page to be removed from cache as the lock is cancelled.
+ *
+ * This should never occur except in kernels with the bug
+ * mentioned above.
+ */
+ if (cl_offset(clob, vmpage->index) >= iter->count + iocb->ki_pos) {
+ result = cl_io_read_ahead(env, io, vmpage->index, &ra);
+ if (result < 0 || vmpage->index > ra.cra_end_idx) {
+ cl_read_ahead_release(env, &ra);
+ unlock_page(vmpage);
+ return AOP_TRUNCATED_PAGE;
+ }
+ }
+ }
+
/**
* Direct read can fall back to buffered read, but DIO is done
* with lockless i/o, and buffered requires LDLM locking, so in
@@ -1979,7 +2025,8 @@ int ll_readpage(struct file *file, struct page *vmpage)
unlock_page(vmpage);
io->ci_dio_lock = 1;
io->ci_need_restart = 1;
- return -ENOLCK;
+ result = -ENOLCK;
+ goto out;
}
page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
@@ -1999,5 +2046,10 @@ int ll_readpage(struct file *file, struct page *vmpage)
unlock_page(vmpage);
result = PTR_ERR(page);
}
+
+out:
+ if (ra.cra_release)
+ cl_read_ahead_release(env, &ra);
+
return result;
}
@@ -806,6 +806,7 @@ static int vvp_io_read_start(const struct lu_env *env,
loff_t pos = io->u.ci_rd.rd.crw_pos;
size_t cnt = io->u.ci_rd.rd.crw_count;
size_t tot = vio->vui_tot_count;
+ struct ll_cl_context *lcc;
int exceed = 0;
int result;
struct iov_iter iter;
@@ -868,9 +869,14 @@ static int vvp_io_read_start(const struct lu_env *env,
file_accessed(file);
LASSERT(vio->vui_iocb->ki_pos == pos);
iter = *vio->vui_iter;
- result = generic_file_read_iter(vio->vui_iocb, &iter);
- goto out;
+ lcc = ll_cl_find(inode);
+ lcc->lcc_iter = &iter;
+ lcc->lcc_iocb = vio->vui_iocb;
+ CDEBUG(D_VFSTRACE, "cnt:%ld,iocb pos:%lld\n", lcc->lcc_iter->count,
+ lcc->lcc_iocb->ki_pos);
+
+ result = generic_file_read_iter(vio->vui_iocb, &iter);
out:
if (result >= 0) {
if (result < cnt)