new file mode 100644
@@ -0,0 +1,20 @@
+dm-loop
+=======
+
+Device-Mapper's "loop" target provides a mapping to a
+backing file. This is similar to a loop device created
+by losetup with less overhead, hence higher iops and bandwidth.
+
+
+Parameters: <path_name>
+
+<path_name> path to existing file to map block io to
+
+
+Example:
+
+dmsetup create loop --table "0 $TWO_GiB loop /tmp/loopfile"
+
+This will create a 2GiB loop device /dev/mapper/loop mapped
+to existing /tmp/loopfile which has to be 2GiB in size or
+bigger for the creation to succeed.
@@ -390,6 +390,13 @@ config DM_ZERO
A target that discards writes, and returns all zeroes for
reads. Useful in some recovery situations.
+config DM_LOOP
+ tristate "Loop target (EXPERIMENTAL)"
+ depends on BLK_DEV_DM
+ ---help---
+ A target that redirects IOs to a backing file.
+ E.g. useful in testing.
+
config DM_MULTIPATH
tristate "Multipath target"
depends on BLK_DEV_DM
@@ -57,6 +57,7 @@ obj-$(CONFIG_DM_PERSISTENT_DATA) += persistent-data/
obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o
obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o
obj-$(CONFIG_DM_ZERO) += dm-zero.o
+obj-$(CONFIG_DM_LOOP) += dm-loop.o
obj-$(CONFIG_DM_RAID) += dm-raid.o
obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o
obj-$(CONFIG_DM_VERITY) += dm-verity.o
new file mode 100644
@@ -0,0 +1,352 @@
+/*
+ * Copyright (C) 2018 Red Hat GmbH
+ *
+ * Simple loop target which redirects
+ * io in parallel to a backing file.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/device-mapper.h>
+#include <linux/falloc.h>
+#include <linux/uio.h>
+#include <linux/module.h>
+
+#define DM_MSG_PREFIX "loop"
+#define WORKQUEUE_NAME "dm-kloopd"
+
+/* Global workqueue shared by all loop mappings */
+static struct workqueue_struct *kloopd_wq = NULL;
+static atomic_t kloopd_wq_users = ATOMIC_INIT(0);
+
+/* Registry of all loop devices to prevent using the same files multiple times */
+static LIST_HEAD(loop_devs);
+
+/* loop context */
+struct loop_c {
+ struct file *file; /* Backing file */
+
+ /* Workqueue */
+ spinlock_t lock;
+ struct bio_list bios;
+ struct work_struct bios_ws;
+
+ struct dm_target *ti;
+ char *path; /* Status table output */
+ struct list_head list;
+};
+
+/* bio context for wrokqueue */
+struct bio_c {
+ struct work_struct bio_ws;
+ struct bio *bio;
+ struct loop_c *lc;
+};
+
+/* Is file of @lc already in use? */
+static int __file_in_use(struct loop_c *lc)
+{
+ struct loop_c *cur;
+
+ list_for_each_entry(cur, &loop_devs, list)
+ if (cur != lc && cur->file->f_inode == lc->file->f_inode)
+ return -EPERM;
+ return 0;
+}
+
+/* Use punch hole to discard bio_sectors(@bio) in backing file starting at @pos */
+static void loop_discard(struct loop_c *lc, struct bio *bio)
+{
+ if (lc->file->f_op->fallocate) {
+ int r = lc->file->f_op->fallocate(lc->file,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ to_bytes(bio->bi_iter.bi_sector),
+ to_bytes(bio_sectors(bio)));
+ if (unlikely(r && r != -EOPNOTSUPP && r != -EINVAL))
+ bio->bi_status = errno_to_blk_status(-EIO);
+ }
+}
+
+/* Sync a backing file range @pos - @end (FUA, PREFLUSH) */
+static void loop_fsync_range(struct loop_c *lc, loff_t pos, loff_t end)
+{
+ int r = vfs_fsync_range(lc->file, pos, end, 0);
+
+ if (unlikely(r && r != -EINVAL))
+ DMERR("Error fsync range");
+ else
+ cond_resched();
+}
+
+/* Check for any IO error after reading or writing a bio_vec */
+static int loop_check_io_error(ssize_t bytes, loff_t pos,
+ struct bio_vec *bvec, const char *what)
+{
+ if (likely(bytes == bvec->bv_len))
+ return 0;
+
+ DMERR_LIMIT("%s error[%lld] at byte offset %llu, length %u",
+ what, (long long) bytes, (unsigned long long) pos, bvec->bv_len);
+ return (bytes < 0) ? (int) bytes : -EIO;
+}
+
+/*
+ * Read/write @bio payload from/to backing file at @pos.
+ *
+ * Returns 0 on success and < 0 on error (e.g. -EIO).
+ */
+static void loop_rw_bio(struct loop_c *lc, struct bio *bio)
+{
+ int r = 0;
+ bool write = op_is_write(bio_op(bio));
+ ssize_t bytes;
+ loff_t bio_begin, bio_end = 0, pos = to_bytes(bio->bi_iter.bi_sector);
+ struct bio_vec bvec;
+ struct iov_iter io_iter;
+
+ if (unlikely(write && (bio->bi_opf & (REQ_FUA | REQ_PREFLUSH)))) {
+ bio_begin = pos;
+ bio_end = pos + bio_cur_bytes(bio);
+ }
+
+ bio_for_each_segment(bvec, bio, bio->bi_iter) {
+ iov_iter_bvec(&io_iter, ITER_BVEC, &bvec, 1, bvec.bv_len);
+
+ if (write) {
+ file_start_write(lc->file);
+ bytes = vfs_iter_write(lc->file, &io_iter, &pos, 0);
+ file_end_write(lc->file);
+ r = loop_check_io_error(bytes, pos, &bvec, "write");
+ if (r)
+ break;
+ } else {
+ bytes = vfs_iter_read(lc->file, &io_iter, &pos, 0);
+ r = loop_check_io_error(bytes, pos, &bvec, "read");
+ if (r) {
+ zero_fill_bio(bio);
+ break;
+ }
+
+ flush_dcache_page(bvec.bv_page);
+ }
+
+ cond_resched();
+ }
+
+ if (unlikely(r < 0))
+ bio->bi_status = errno_to_blk_status(r);
+
+ if (unlikely(bio_end))
+ /* FUA, ... requested -> flush the bio defined range */
+ loop_fsync_range(lc, bio_begin, bio_end);
+}
+
+/* Worker thread function to process file IO for single bio */
+static void loop_process_bio(struct work_struct *work)
+{
+ struct bio_c *bio_c = container_of(work, struct bio_c, bio_ws);
+ struct bio *bio = bio_c->bio;
+
+ current->flags |= PF_LESS_THROTTLE;
+
+ switch (bio_op(bio)) {
+ case REQ_OP_READ:
+ case REQ_OP_WRITE:
+ loop_rw_bio(bio_c->lc, bio);
+ break;
+ case REQ_OP_FLUSH:
+ loop_fsync_range(bio_c->lc, 0, LLONG_MAX);
+ break;
+ case REQ_OP_DISCARD:
+ loop_discard(bio_c->lc, bio);
+ break;
+ default:
+ bio->bi_status = errno_to_blk_status(-EIO);
+ }
+
+ bio_endio(bio);
+}
+
+/* Worker thread function to process all bios */
+static void loop_process_bios(struct work_struct *work)
+{
+ struct loop_c *lc = container_of(work, struct loop_c, bios_ws);
+ struct bio_list bl;
+ struct bio *bio;
+ struct bio_c *bio_c;
+
+ current->flags |= PF_LESS_THROTTLE;
+
+ /* Take out input bios to process... */
+ bio_list_init(&bl);
+ spin_lock_irq(&lc->lock);
+ bio_list_merge(&bl, &lc->bios);
+ bio_list_init(&lc->bios);
+ spin_unlock_irq(&lc->lock);
+
+ while ((bio = bio_list_pop(&bl))) {
+ bio_c = dm_per_bio_data(bio, lc->ti->per_io_data_size);
+ INIT_WORK(&bio_c->bio_ws, loop_process_bio);
+ bio_c->bio = bio;
+ bio_c->lc = lc;
+ queue_work(kloopd_wq, &bio_c->bio_ws);
+ }
+}
+
+/* Release loop context resources of @lc */
+static void destroy_loop(struct loop_c *lc)
+{
+ if (lc) {
+ list_del(&lc->list);
+ if (kloopd_wq && atomic_dec_and_test(&kloopd_wq_users)) {
+ destroy_workqueue(kloopd_wq);
+ kloopd_wq = NULL;
+ }
+ if (lc->file)
+ filp_close(lc->file, NULL);
+ if (lc->path)
+ kfree(lc->path);
+ kfree(lc);
+ }
+}
+
+/*
+ * Construct a loop mapping on a (sparse) file.
+ *
+ * Argument:
+ * <file_path>: path to backing file
+ */
+static int loop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+ int r = -ENOMEM;
+ struct loop_c *lc;
+
+ if (argc != 1) {
+ ti->error = "Invalid argument count";
+ return -EINVAL;
+ }
+
+ lc = ti->private = kzalloc(sizeof(*lc), GFP_KERNEL);
+ if (!lc) {
+ ti->error = "Cannot allocate context";
+ goto err;
+ }
+
+ spin_lock_init(&lc->lock);
+ bio_list_init(&lc->bios);
+ INIT_WORK(&lc->bios_ws, loop_process_bios);
+ list_add(&lc->list, &loop_devs);
+
+ ti->num_discard_bios = 1;
+ ti->discards_supported = true;
+ ti->flush_supported = true;
+ ti->per_io_data_size = sizeof(struct bio_c);
+ lc->ti = ti;
+
+ lc->path = kstrdup(argv[0], GFP_KERNEL);
+ if (!lc->path) {
+ ti->error = "Cannot allocate path";
+ goto err;
+ }
+
+ /* Open existing backing file */
+ lc->file = filp_open(lc->path, O_EXCL | O_LARGEFILE | O_RDWR, 0);
+ if (IS_ERR(lc->file)) {
+ ti->error = "Cannot open backing file";
+ r = PTR_ERR(lc->file);
+ lc->file = NULL;
+ goto err;
+ }
+
+ r = __file_in_use(lc);
+ if (r) {
+ ti->error = "Cannot use same file multiple times";
+ goto err;
+ }
+
+ if (ti->len > to_sector(i_size_read(lc->file->f_mapping->host))) {
+ ti->error = "Backing file too small";
+ r = -ENOSPC;
+ goto err;
+ }
+
+ r = dm_set_target_max_io_len(ti, min(ti->len, (sector_t) UINT_MAX));
+ if (r)
+ goto err;
+
+ /* Alloc global workqueue with first loop mapping construction */
+ if (atomic_inc_return(&kloopd_wq_users) == 1) {
+ kloopd_wq = alloc_workqueue(WORKQUEUE_NAME, WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+ if (!kloopd_wq) {
+ DMERR("Cannot start workqueue %s", WORKQUEUE_NAME);
+ atomic_set(&kloopd_wq_users, 0);
+ r = -ENOMEM;
+ goto err;
+ }
+ }
+
+ return 0;
+err:
+ destroy_loop(lc);
+ return r;
+}
+
+static void loop_dtr(struct dm_target *ti)
+{
+ destroy_loop(ti->private);
+}
+
+static int loop_map(struct dm_target *ti, struct bio *bio)
+{
+ struct loop_c *lc = ti->private;
+
+ /* Not a singleton target... */
+ bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
+
+ spin_lock_irq(&lc->lock);
+ bio_list_add(&lc->bios, bio);
+ spin_unlock_irq(&lc->lock);
+
+ queue_work(kloopd_wq, &lc->bios_ws);
+
+ return DM_MAPIO_SUBMITTED;
+}
+
+static void loop_status(struct dm_target *ti, status_type_t type,
+ unsigned status_flags, char *result, unsigned maxlen)
+{
+ if (type == STATUSTYPE_TABLE) {
+ struct loop_c *lc = ti->private;
+ int sz = 0;
+
+ DMEMIT("%s", lc->path);
+ }
+}
+
+static struct target_type loop_target = {
+ .name = "loop",
+ .version = {1, 0, 0},
+ .module = THIS_MODULE,
+ .ctr = loop_ctr,
+ .dtr = loop_dtr,
+ .map = loop_map,
+ .status = loop_status,
+};
+
+static int __init dm_loop_init(void)
+{
+ return dm_register_target(&loop_target);
+}
+
+static void __exit dm_loop_exit(void)
+{
+ dm_unregister_target(&loop_target);
+}
+
+/* Module hooks */
+module_init(dm_loop_init);
+module_exit(dm_loop_exit);
+
+MODULE_DESCRIPTION(DM_NAME " loop target");
+MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>");
+MODULE_LICENSE("GPL");
Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> --- Documentation/device-mapper/loop.txt | 20 ++ drivers/md/Kconfig | 7 + drivers/md/Makefile | 1 + drivers/md/dm-loop.c | 352 +++++++++++++++++++++++++++++++++++ 4 files changed, 380 insertions(+) create mode 100644 Documentation/device-mapper/loop.txt create mode 100644 drivers/md/dm-loop.c