@@ -42,6 +42,7 @@
#include <linux/mount.h>
#include <linux/fdtable.h>
#include <linux/fs_struct.h>
+#include <../mm/internal.h>
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
@@ -238,6 +239,8 @@ long aio_do_openat(int fd, const char *filename, int flags, int mode);
long aio_do_unlinkat(int fd, const char *filename, int flags, int mode);
long aio_foo_at(struct aio_kiocb *req, do_foo_at_t do_foo_at);
+long aio_readahead(struct aio_kiocb *iocb, unsigned long len);
+
static __always_inline bool aio_may_use_threads(void)
{
#if IS_ENABLED(CONFIG_AIO_THREAD)
@@ -1812,6 +1815,137 @@ long aio_foo_at(struct aio_kiocb *req, do_foo_at_t do_foo_at)
AIO_THREAD_NEED_FILES |
AIO_THREAD_NEED_CRED);
}
+
+static int aio_ra_filler(void *data, struct page *page)
+{
+ struct file *file = data;
+
+ return file->f_mapping->a_ops->readpage(file, page);
+}
+
+static long aio_ra_wait_on_pages(struct file *file, pgoff_t start,
+ unsigned long nr)
+{
+ struct address_space *mapping = file->f_mapping;
+ unsigned long i;
+
+ /* Wait on pages starting at the end to holdfully avoid too many
+ * wakeups.
+ */
+ for (i = nr; i-- > 0; ) {
+ pgoff_t index = start + i;
+ struct page *page;
+
+ /* First do the quick check to see if the page is present and
+ * uptodate.
+ */
+ rcu_read_lock();
+ page = radix_tree_lookup(&mapping->page_tree, index);
+ rcu_read_unlock();
+
+ if (page && !radix_tree_exceptional_entry(page) &&
+ PageUptodate(page)) {
+ continue;
+ }
+
+ page = read_cache_page(mapping, index, aio_ra_filler, file);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+ page_cache_release(page);
+ }
+ return 0;
+}
+
+static long aio_thread_op_readahead(struct aio_kiocb *iocb)
+{
+ pgoff_t start, end, nr, offset;
+ long ret = 0;
+
+ start = iocb->common.ki_pos >> PAGE_CACHE_SHIFT;
+ end = (iocb->common.ki_pos + iocb->ki_data - 1) >> PAGE_CACHE_SHIFT;
+ nr = end - start + 1;
+
+ for (offset = 0; offset < nr; ) {
+ pgoff_t chunk = nr - offset;
+ unsigned long max_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE;
+
+ if (chunk > max_chunk)
+ chunk = max_chunk;
+
+ ret = __do_page_cache_readahead(iocb->common.ki_filp->f_mapping,
+ iocb->common.ki_filp,
+ start + offset, chunk, 0, 1);
+ if (ret <= 0)
+ break;
+ offset += ret;
+ }
+
+ if (!offset && ret < 0)
+ return ret;
+
+ if (offset > 0) {
+ ret = aio_ra_wait_on_pages(iocb->common.ki_filp, start, offset);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (offset == nr)
+ return iocb->ki_data;
+ if (offset > 0)
+ return ((start + offset) << PAGE_CACHE_SHIFT) -
+ iocb->common.ki_pos;
+ return 0;
+}
+
+long aio_readahead(struct aio_kiocb *iocb, unsigned long len)
+{
+ struct address_space *mapping = iocb->common.ki_filp->f_mapping;
+ pgoff_t index, end;
+ loff_t epos, isize;
+ int do_io = 0;
+
+ if (!mapping || !mapping->a_ops)
+ return -EBADF;
+ if (!mapping->a_ops->readpage && !mapping->a_ops->readpages)
+ return -EBADF;
+ if (!len)
+ return 0;
+
+ epos = iocb->common.ki_pos + len;
+ if (epos < 0)
+ return -EINVAL;
+ isize = i_size_read(mapping->host);
+ if (isize < epos) {
+ epos = isize - iocb->common.ki_pos;
+ if (epos <= 0)
+ return 0;
+ if ((unsigned long)epos != epos)
+ return -EINVAL;
+ len = epos;
+ }
+
+ index = iocb->common.ki_pos >> PAGE_CACHE_SHIFT;
+ end = (iocb->common.ki_pos + len - 1) >> PAGE_CACHE_SHIFT;
+ iocb->ki_data = len;
+ if (end < index)
+ return -EINVAL;
+
+ do {
+ struct page *page;
+
+ rcu_read_lock();
+ page = radix_tree_lookup(&mapping->page_tree, index);
+ rcu_read_unlock();
+
+ if (!page || radix_tree_exceptional_entry(page) ||
+ !PageUptodate(page))
+ do_io = 1;
+ } while (!do_io && (index++ < end));
+
+ if (do_io)
+ return aio_thread_queue_iocb(iocb, aio_thread_op_readahead, 0);
+ return len;
+}
#endif /* IS_ENABLED(CONFIG_AIO_THREAD) */
/*
@@ -1922,6 +2056,13 @@ rw_common:
ret = aio_foo_at(req, aio_do_unlinkat);
break;
+ case IOCB_CMD_READAHEAD:
+ if (user_iocb->aio_buf)
+ return -EINVAL;
+ if (aio_may_use_threads())
+ ret = aio_readahead(req, user_iocb->aio_nbytes);
+ break;
+
default:
pr_debug("EINVAL: no operation provided\n");
return -EINVAL;
@@ -47,6 +47,7 @@ enum {
IOCB_CMD_OPENAT = 9,
IOCB_CMD_UNLINKAT = 10,
+ IOCB_CMD_READAHEAD = 12,
};
/*