@@ -458,8 +458,8 @@
/* was OBD_FAIL_LLOG_CATINFO_NET 0x1309 until 2.3 */
#define OBD_FAIL_MDS_SYNC_CAPA_SL 0x1310
#define OBD_FAIL_SEQ_ALLOC 0x1311
-#define OBD_FAIL_PLAIN_RECORDS 0x1319
-#define OBD_FAIL_CATALOG_FULL_CHECK 0x131a
+#define OBD_FAIL_PLAIN_RECORDS 0x1319
+#define OBD_FAIL_CATALOG_FULL_CHECK 0x131a
#define OBD_FAIL_LLITE 0x1400
#define OBD_FAIL_LLITE_FAULT_TRUNC_RACE 0x1401
@@ -488,6 +488,8 @@
#define OBD_FAIL_LLITE_PAGE_ALLOC 0x1418
#define OBD_FAIL_LLITE_OPEN_DELAY 0x1419
#define OBD_FAIL_LLITE_XATTR_PAUSE 0x1420
+#define OBD_FAIL_LLITE_PAGE_INVALIDATE_PAUSE 0x1421
+#define OBD_FAIL_LLITE_READPAGE_PAUSE 0x1422
#define OBD_FAIL_FID_INDIR 0x1501
#define OBD_FAIL_FID_INLMA 0x1502
@@ -1865,6 +1865,41 @@ int ll_readpage(struct file *file, struct page *vmpage)
struct ll_sb_info *sbi = ll_i2sbi(inode);
int result;
+ if (OBD_FAIL_PRECHECK(OBD_FAIL_LLITE_READPAGE_PAUSE)) {
+ unlock_page(vmpage);
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LLITE_READPAGE_PAUSE, cfs_fail_val);
+ lock_page(vmpage);
+ }
+
+ /*
+ * The @vmpage got truncated.
+ * This is a kernel bug introduced since kernel 5.12:
+ * comment: cbd59c48ae2bcadc4a7599c29cf32fd3f9b78251
+ * ("mm/filemap: use head pages in generic_file_buffered_read")
+ *
+ * The page end offset calculation in filemap_get_read_batch() was off
+ * by one. When a read is submitted with end offset 1048575, then it
+ * calculates the end page for read of 256 where it should be 255. This
+ * results in the readpage() for the page with index 256 is over stripe
+ * boundary and may not covered by a DLM extent lock.
+ *
+ * This happens in a corner race case: filemap_get_read_batch() adds
+ * the page with index 256 for read which is not in the current read
+ * I/O context, and this page is being invalidated and will be removed
+ * from page cache due to the lock protected it being revoken. This
+ * results in this page in the read path not covered by any DLM lock.
+ *
+ * The solution is simple. Check whether the page was truncated in
+ * ->readpage(). If so, just return AOP_TRUNCATED_PAGE to the upper
+ * caller. Then the kernel will retry to batch pages, and it will not
+ * add the truncated page into batches as it was removed from page
+ * cache of the file.
+ */
+ if (vmpage->mapping != inode->i_mapping) {
+ unlock_page(vmpage);
+ return AOP_TRUNCATED_PAGE;
+ }
+
lcc = ll_cl_find(inode);
if (lcc) {
env = lcc->lcc_env;
@@ -96,6 +96,13 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
}
cl_env_percpu_put(env);
}
+
+ if (OBD_FAIL_PRECHECK(OBD_FAIL_LLITE_PAGE_INVALIDATE_PAUSE)) {
+ unlock_page(vmpage);
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LLITE_PAGE_INVALIDATE_PAUSE,
+ cfs_fail_val);
+ lock_page(vmpage);
+ }
}
static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)