@@ -492,6 +492,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_LLITE_PAGE_INVALIDATE_PAUSE 0x1421
#define OBD_FAIL_LLITE_READPAGE_PAUSE 0x1422
#define OBD_FAIL_LLITE_PANIC_ON_ESTALE 0x1423
+#define OBD_FAIL_LLITE_READPAGE_PAUSE2 0x1424
#define OBD_FAIL_FID_INDIR 0x1501
#define OBD_FAIL_FID_INLMA 0x1502
@@ -1990,12 +1990,15 @@ ll_do_fast_read(struct kiocb *iocb, struct iov_iter *iter)
result = generic_file_read_iter(iocb, iter);
- /*
- * If the first page is not in cache, generic_file_aio_read() will be
- * returned with -ENODATA.
+ /* If the first page is not in cache, generic_file_aio_read() will be
+ * returned with -ENODATA. Fall back to full read path.
* See corresponding code in ll_readpage().
+ *
+ * if we raced with page deletion, we might get EIO. Rather than add
+ * locking to the fast path for this rare case, fall back to the full
+ * read path. (See vvp_io_read_start() for rest of handling.
*/
- if (result == -ENODATA)
+ if (result == -ENODATA || result == -EIO)
result = 0;
if (result > 0) {
@@ -2046,5 +2046,13 @@ int ll_readpage(struct file *file, struct page *vmpage)
if (ra.cra_release)
cl_read_ahead_release(env, &ra);
+ /* this delay gives time for the actual read of the page to finish and
+ * unlock the page in vvp_page_completion_read before we return to our
+ * caller and the caller tries to use the page, allowing us to test
+ * races with the page being unlocked after readpage() but before it's
+ * used by the caller
+ */
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LLITE_READPAGE_PAUSE2, cfs_fail_val);
+
return result;
}
@@ -811,8 +811,10 @@ static int vvp_io_read_start(const struct lu_env *env,
size_t cnt = io->u.ci_rd.rd.crw_count;
size_t tot = vio->vui_tot_count;
struct ll_cl_context *lcc;
+ unsigned int seq;
int exceed = 0;
int result;
+ int total_bytes_read = 0;
struct iov_iter iter;
pgoff_t page_offset;
@@ -878,12 +880,29 @@ static int vvp_io_read_start(const struct lu_env *env,
lcc->lcc_end_index = DIV_ROUND_UP(pos + iter.count, PAGE_SIZE);
CDEBUG(D_VFSTRACE, "count:%ld iocb pos:%lld\n", iter.count, pos);
- result = generic_file_read_iter(vio->vui_iocb, &iter);
+ /* this seqlock lets us notice if a page has been deleted on this inode
+ * during the fault process, allowing us to catch an erroneous short
+ * read or EIO
+ * See LU-16160
+ */
+ do {
+ seq = read_seqbegin(&ll_i2info(inode)->lli_page_inv_lock);
+ result = generic_file_read_iter(vio->vui_iocb, &iter);
+ if (result >= 0) {
+ io->ci_nob += result;
+ total_bytes_read += result;
+ }
+ /* if we got a short read or -EIO and we raced with page invalidation,
+ * retry
+ */
+ } while (read_seqretry(&ll_i2info(inode)->lli_page_inv_lock, seq) &&
+ ((result >= 0 && iov_iter_count(&iter) > 0) ||
+ result == -EIO));
+
out:
if (result >= 0) {
- if (result < cnt)
+ if (total_bytes_read < cnt)
io->ci_continue = 0;
- io->ci_nob += result;
result = 0;
} else if (result == -EIOCBQUEUED) {
io->ci_nob += vio->u.readwrite.vui_read;