diff mbox series

[v8,3/4] Adds blk_interposer to md.

Message ID 1617968884-15149-4-git-send-email-sergei.shtepa@veeam.com (mailing list archive)
State New, archived
Headers show
Series block device interposer | expand

Commit Message

Sergei Shtepa April 9, 2021, 11:48 a.m. UTC
* The new flag DM_INTERPOSE_FLAG allows to specify that the dm target
will be attached using blk_interposer.
* The [interpose] option allows to specify which device will be
attached via the interposer.
* The connection and disconnection of the interrupter is performed in
the functions __dm_suspend() and __dm_resume(). The flag
DM_SUSPEND_DETACH_IP_FLAG was added for this purpose.
* dm_submit_bio() sets BIO_INTERPOSED for each bio from the interposer.

Signed-off-by: Sergei Shtepa <sergei.shtepa@veeam.com>
---
 drivers/md/dm-core.h          |   1 +
 drivers/md/dm-ioctl.c         |  95 ++++++++++---
 drivers/md/dm-table.c         |  68 ++++++++-
 drivers/md/dm.c               | 254 ++++++++++++++++++++++++++++++----
 drivers/md/dm.h               |   8 +-
 include/linux/device-mapper.h |   1 +
 include/uapi/linux/dm-ioctl.h |   6 +
 7 files changed, 375 insertions(+), 58 deletions(-)

Comments

kernel test robot April 9, 2021, 2:12 p.m. UTC | #1
Hi Sergei,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on block/for-next]
[also build test WARNING on hch-configfs/for-next v5.12-rc6]
[cannot apply to dm/for-next next-20210409]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Sergei-Shtepa/block-device-interposer/20210409-194943
base:   https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
config: arm-randconfig-r025-20210409 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project dd453a1389b6a7e6d9214b449d3c54981b1a89b6)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm cross compiling tool for clang build
        # apt-get install binutils-arm-linux-gnueabi
        # https://github.com/0day-ci/linux/commit/df79fb333cb0a1263a1f03f54de425507e3c2238
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Sergei-Shtepa/block-device-interposer/20210409-194943
        git checkout df79fb333cb0a1263a1f03f54de425507e3c2238
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/md/dm.c:2682:5: warning: no previous prototype for function '__dm_attach_interposer' [-Wmissing-prototypes]
   int __dm_attach_interposer(struct mapped_device *md)
       ^
   drivers/md/dm.c:2682:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   int __dm_attach_interposer(struct mapped_device *md)
   ^
   static 
>> drivers/md/dm.c:2724:5: warning: no previous prototype for function '__dm_detach_interposer' [-Wmissing-prototypes]
   int __dm_detach_interposer(struct mapped_device *md)
       ^
   drivers/md/dm.c:2724:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   int __dm_detach_interposer(struct mapped_device *md)
   ^
   static 
   2 warnings generated.


vim +/__dm_attach_interposer +2682 drivers/md/dm.c

  2681	
> 2682	int __dm_attach_interposer(struct mapped_device *md)
  2683	{
  2684		int r;
  2685		struct dm_table *map;
  2686		struct block_device *original_bdev = NULL;
  2687	
  2688		if (dm_interposer_attached(md))
  2689			return 0;
  2690	
  2691		map = rcu_dereference_protected(md->map,
  2692						lockdep_is_held(&md->suspend_lock));
  2693		if (!map) {
  2694			DMERR("%s: interposers table is not initialized",
  2695				dm_device_name(md));
  2696			return -EINVAL;
  2697		}
  2698	
  2699		original_bdev = get_interposed_bdev(map);
  2700		if (!original_bdev) {
  2701			DMERR("%s: interposer cannot get interposed device from table",
  2702				dm_device_name(md));
  2703			return -EINVAL;
  2704		}
  2705	
  2706		bdev_interposer_lock(original_bdev);
  2707	
  2708		r = bdev_interposer_attach(original_bdev, dm_disk(md)->part0);
  2709		if (r)
  2710			DMERR("%s: failed to attach interposer",
  2711				dm_device_name(md));
  2712		else
  2713			set_bit(DMF_INTERPOSER_ATTACHED, &md->flags);
  2714	
  2715		bdev_interposer_unlock(original_bdev);
  2716	
  2717		unlock_bdev_fs(md, original_bdev);
  2718	
  2719		bdput(original_bdev);
  2720	
  2721		return r;
  2722	}
  2723	
> 2724	int __dm_detach_interposer(struct mapped_device *md)
  2725	{
  2726		struct dm_table *map = NULL;
  2727		struct block_device *original_bdev;
  2728	
  2729		if (!dm_interposer_attached(md))
  2730			return 0;
  2731		/*
  2732		 * If mapped device is suspended, but should be detached
  2733		 * we just detach without freeze fs on interposed device.
  2734		 */
  2735		map = rcu_dereference_protected(md->map,
  2736				lockdep_is_held(&md->suspend_lock));
  2737		if (!map) {
  2738			/*
  2739			 * If table is not initialized then interposed device
  2740			 * cannot be attached
  2741			 */
  2742			DMERR("%s: table is not initialized for device",
  2743				dm_device_name(md));
  2744			return -EINVAL;
  2745		}
  2746	
  2747		original_bdev = get_interposed_bdev(map);
  2748		if (!original_bdev) {
  2749			DMERR("%s: interposer cannot get interposed device from table",
  2750				dm_device_name(md));
  2751			return -EINVAL;
  2752		}
  2753	
  2754		bdev_interposer_lock(original_bdev);
  2755	
  2756		bdev_interposer_detach(original_bdev);
  2757		clear_bit(DMF_INTERPOSER_ATTACHED, &md->flags);
  2758	
  2759		bdev_interposer_unlock(original_bdev);
  2760	
  2761		bdput(original_bdev);
  2762		return 0;
  2763	}
  2764	/*
  2765	 * We need to be able to change a mapping table under a mounted
  2766	 * filesystem.  For example we might want to move some data in
  2767	 * the background.  Before the table can be swapped with
  2768	 * dm_bind_table, dm_suspend must be called to flush any in
  2769	 * flight bios and ensure that any further io gets deferred.
  2770	 */
  2771	/*
  2772	 * Suspend mechanism in request-based dm.
  2773	 *
  2774	 * 1. Flush all I/Os by lock_fs() if needed.
  2775	 * 2. Stop dispatching any I/O by stopping the request_queue.
  2776	 * 3. Wait for all in-flight I/Os to be completed or requeued.
  2777	 *
  2778	 * To abort suspend, start the request_queue.
  2779	 */
  2780	int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
  2781	{
  2782		struct dm_table *map = NULL;
  2783		int r = 0;
  2784	
  2785	retry:
  2786		mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
  2787	
  2788		if (dm_suspended_md(md)) {
  2789			if (suspend_flags & DM_SUSPEND_DETACH_IP_FLAG)
  2790				r = __dm_detach_interposer(md);
  2791			else
  2792				r = -EINVAL;
  2793	
  2794			goto out_unlock;
  2795		}
  2796	
  2797		if (dm_suspended_internally_md(md)) {
  2798			/* already internally suspended, wait for internal resume */
  2799			mutex_unlock(&md->suspend_lock);
  2800			r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
  2801			if (r)
  2802				return r;
  2803			goto retry;
  2804		}
  2805	
  2806		map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
  2807	
  2808		r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED);
  2809		if (r)
  2810			goto out_unlock;
  2811	
  2812		set_bit(DMF_POST_SUSPENDING, &md->flags);
  2813		dm_table_postsuspend_targets(map);
  2814		clear_bit(DMF_POST_SUSPENDING, &md->flags);
  2815	
  2816	out_unlock:
  2817		mutex_unlock(&md->suspend_lock);
  2818		return r;
  2819	}
  2820	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot April 9, 2021, 2:39 p.m. UTC | #2
Hi Sergei,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on block/for-next]
[also build test WARNING on hch-configfs/for-next v5.12-rc6]
[cannot apply to dm/for-next next-20210409]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Sergei-Shtepa/block-device-interposer/20210409-194943
base:   https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
config: i386-randconfig-m021-20210409 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/df79fb333cb0a1263a1f03f54de425507e3c2238
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Sergei-Shtepa/block-device-interposer/20210409-194943
        git checkout df79fb333cb0a1263a1f03f54de425507e3c2238
        # save the attached .config to linux build tree
        make W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/md/dm.c:2682:5: warning: no previous prototype for '__dm_attach_interposer' [-Wmissing-prototypes]
    2682 | int __dm_attach_interposer(struct mapped_device *md)
         |     ^~~~~~~~~~~~~~~~~~~~~~
>> drivers/md/dm.c:2724:5: warning: no previous prototype for '__dm_detach_interposer' [-Wmissing-prototypes]
    2724 | int __dm_detach_interposer(struct mapped_device *md)
         |     ^~~~~~~~~~~~~~~~~~~~~~


vim +/__dm_attach_interposer +2682 drivers/md/dm.c

  2681	
> 2682	int __dm_attach_interposer(struct mapped_device *md)
  2683	{
  2684		int r;
  2685		struct dm_table *map;
  2686		struct block_device *original_bdev = NULL;
  2687	
  2688		if (dm_interposer_attached(md))
  2689			return 0;
  2690	
  2691		map = rcu_dereference_protected(md->map,
  2692						lockdep_is_held(&md->suspend_lock));
  2693		if (!map) {
  2694			DMERR("%s: interposers table is not initialized",
  2695				dm_device_name(md));
  2696			return -EINVAL;
  2697		}
  2698	
  2699		original_bdev = get_interposed_bdev(map);
  2700		if (!original_bdev) {
  2701			DMERR("%s: interposer cannot get interposed device from table",
  2702				dm_device_name(md));
  2703			return -EINVAL;
  2704		}
  2705	
  2706		bdev_interposer_lock(original_bdev);
  2707	
  2708		r = bdev_interposer_attach(original_bdev, dm_disk(md)->part0);
  2709		if (r)
  2710			DMERR("%s: failed to attach interposer",
  2711				dm_device_name(md));
  2712		else
  2713			set_bit(DMF_INTERPOSER_ATTACHED, &md->flags);
  2714	
  2715		bdev_interposer_unlock(original_bdev);
  2716	
  2717		unlock_bdev_fs(md, original_bdev);
  2718	
  2719		bdput(original_bdev);
  2720	
  2721		return r;
  2722	}
  2723	
> 2724	int __dm_detach_interposer(struct mapped_device *md)
  2725	{
  2726		struct dm_table *map = NULL;
  2727		struct block_device *original_bdev;
  2728	
  2729		if (!dm_interposer_attached(md))
  2730			return 0;
  2731		/*
  2732		 * If mapped device is suspended, but should be detached
  2733		 * we just detach without freeze fs on interposed device.
  2734		 */
  2735		map = rcu_dereference_protected(md->map,
  2736				lockdep_is_held(&md->suspend_lock));
  2737		if (!map) {
  2738			/*
  2739			 * If table is not initialized then interposed device
  2740			 * cannot be attached
  2741			 */
  2742			DMERR("%s: table is not initialized for device",
  2743				dm_device_name(md));
  2744			return -EINVAL;
  2745		}
  2746	
  2747		original_bdev = get_interposed_bdev(map);
  2748		if (!original_bdev) {
  2749			DMERR("%s: interposer cannot get interposed device from table",
  2750				dm_device_name(md));
  2751			return -EINVAL;
  2752		}
  2753	
  2754		bdev_interposer_lock(original_bdev);
  2755	
  2756		bdev_interposer_detach(original_bdev);
  2757		clear_bit(DMF_INTERPOSER_ATTACHED, &md->flags);
  2758	
  2759		bdev_interposer_unlock(original_bdev);
  2760	
  2761		bdput(original_bdev);
  2762		return 0;
  2763	}
  2764	/*
  2765	 * We need to be able to change a mapping table under a mounted
  2766	 * filesystem.  For example we might want to move some data in
  2767	 * the background.  Before the table can be swapped with
  2768	 * dm_bind_table, dm_suspend must be called to flush any in
  2769	 * flight bios and ensure that any further io gets deferred.
  2770	 */
  2771	/*
  2772	 * Suspend mechanism in request-based dm.
  2773	 *
  2774	 * 1. Flush all I/Os by lock_fs() if needed.
  2775	 * 2. Stop dispatching any I/O by stopping the request_queue.
  2776	 * 3. Wait for all in-flight I/Os to be completed or requeued.
  2777	 *
  2778	 * To abort suspend, start the request_queue.
  2779	 */
  2780	int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
  2781	{
  2782		struct dm_table *map = NULL;
  2783		int r = 0;
  2784	
  2785	retry:
  2786		mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
  2787	
  2788		if (dm_suspended_md(md)) {
  2789			if (suspend_flags & DM_SUSPEND_DETACH_IP_FLAG)
  2790				r = __dm_detach_interposer(md);
  2791			else
  2792				r = -EINVAL;
  2793	
  2794			goto out_unlock;
  2795		}
  2796	
  2797		if (dm_suspended_internally_md(md)) {
  2798			/* already internally suspended, wait for internal resume */
  2799			mutex_unlock(&md->suspend_lock);
  2800			r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
  2801			if (r)
  2802				return r;
  2803			goto retry;
  2804		}
  2805	
  2806		map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
  2807	
  2808		r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED);
  2809		if (r)
  2810			goto out_unlock;
  2811	
  2812		set_bit(DMF_POST_SUSPENDING, &md->flags);
  2813		dm_table_postsuspend_targets(map);
  2814		clear_bit(DMF_POST_SUSPENDING, &md->flags);
  2815	
  2816	out_unlock:
  2817		mutex_unlock(&md->suspend_lock);
  2818		return r;
  2819	}
  2820	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot April 9, 2021, 5:03 p.m. UTC | #3
Hi Sergei,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on block/for-next]
[also build test WARNING on hch-configfs/for-next v5.12-rc6]
[cannot apply to dm/for-next next-20210409]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Sergei-Shtepa/block-device-interposer/20210409-194943
base:   https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
config: x86_64-randconfig-s032-20210409 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.3-279-g6d5d9b42-dirty
        # https://github.com/0day-ci/linux/commit/df79fb333cb0a1263a1f03f54de425507e3c2238
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Sergei-Shtepa/block-device-interposer/20210409-194943
        git checkout df79fb333cb0a1263a1f03f54de425507e3c2238
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
>> drivers/md/dm-table.c:337:63: sparse: sparse: incorrect type in argument 3 (different base types) @@     expected restricted fmode_t [usertype] mode @@     got bool [usertype] interpose @@
   drivers/md/dm-table.c:337:63: sparse:     expected restricted fmode_t [usertype] mode
   drivers/md/dm-table.c:337:63: sparse:     got bool [usertype] interpose
--
>> drivers/md/dm.c:2682:5: sparse: sparse: symbol '__dm_attach_interposer' was not declared. Should it be static?
>> drivers/md/dm.c:2724:5: sparse: sparse: symbol '__dm_detach_interposer' was not declared. Should it be static?

Please review and possibly fold the followup patch.

vim +337 drivers/md/dm-table.c

   322	
   323	/*
   324	 * This upgrades the mode on an already open dm_dev, being
   325	 * careful to leave things as they were if we fail to reopen the
   326	 * device and not to touch the existing bdev field in case
   327	 * it is accessed concurrently.
   328	 */
   329	static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
   330				bool interpose, struct mapped_device *md)
   331	{
   332		int r;
   333		struct dm_dev *old_dev, *new_dev;
   334	
   335		old_dev = dd->dm_dev;
   336	
 > 337		r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev, interpose,
   338					dd->dm_dev->mode | new_mode, &new_dev);
   339		if (r)
   340			return r;
   341	
   342		dd->dm_dev = new_dev;
   343		dm_put_table_device(md, old_dev);
   344	
   345		return 0;
   346	}
   347	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 5953ff2bd260..431b82461eae 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -112,6 +112,7 @@  struct mapped_device {
 	/* for blk-mq request-based DM support */
 	struct blk_mq_tag_set *tag_set;
 	bool init_tio_pdu:1;
+	bool interpose:1;
 
 	struct srcu_struct io_barrier;
 };
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 1ca65b434f1f..7ec37526920b 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -294,11 +294,29 @@  static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool
 			md = hc->md;
 			dm_get(md);
 
-			if (keep_open_devices &&
-			    dm_lock_for_deletion(md, mark_deferred, only_deferred)) {
-				dm_put(md);
-				dev_skipped++;
-				continue;
+			if (md->interpose) {
+				int r;
+
+				/*
+				 * Interposer should be suspended and detached
+				 * from the interposed block device.
+				 */
+				r = dm_suspend(md, DM_SUSPEND_DETACH_IP_FLAG |
+						   DM_SUSPEND_LOCKFS_FLAG);
+				if (r) {
+					DMERR("%s: unable to suspend and detach interposer",
+						dm_device_name(md));
+					dm_put(md);
+					dev_skipped++;
+					continue;
+				}
+			} else {
+				if (keep_open_devices &&
+				    dm_lock_for_deletion(md, mark_deferred, only_deferred)) {
+					dm_put(md);
+					dev_skipped++;
+					continue;
+				}
 			}
 
 			t = __hash_remove(hc);
@@ -732,6 +750,9 @@  static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
 	if (dm_test_deferred_remove_flag(md))
 		param->flags |= DM_DEFERRED_REMOVE;
 
+	if (dm_interposer_attached(md))
+		param->flags |= DM_INTERPOSE_FLAG;
+
 	param->dev = huge_encode_dev(disk_devt(disk));
 
 	/*
@@ -878,20 +899,37 @@  static int dev_remove(struct file *filp, struct dm_ioctl *param, size_t param_si
 
 	md = hc->md;
 
-	/*
-	 * Ensure the device is not open and nothing further can open it.
-	 */
-	r = dm_lock_for_deletion(md, !!(param->flags & DM_DEFERRED_REMOVE), false);
-	if (r) {
-		if (r == -EBUSY && param->flags & DM_DEFERRED_REMOVE) {
+	if (!md->interpose) {
+		/*
+		 * Ensure the device is not open and nothing further can open it.
+		 */
+		r = dm_lock_for_deletion(md, !!(param->flags & DM_DEFERRED_REMOVE), false);
+		if (r) {
+			if (r == -EBUSY && param->flags & DM_DEFERRED_REMOVE) {
+				up_write(&_hash_lock);
+				dm_put(md);
+				return 0;
+			}
+			DMDEBUG_LIMIT("unable to remove open device %s",
+					hc->name);
 			up_write(&_hash_lock);
 			dm_put(md);
-			return 0;
+			return r;
+		}
+	} else {
+		/*
+		 * Interposer should be suspended and detached from
+		 * the interposed block device.
+		 */
+		r = dm_suspend(md, DM_SUSPEND_DETACH_IP_FLAG |
+				   DM_SUSPEND_LOCKFS_FLAG);
+		if (r) {
+			DMERR("%s: unable to suspend and detach interposer",
+				dm_device_name(md));
+			up_write(&_hash_lock);
+			dm_put(md);
+			return r;
 		}
-		DMDEBUG_LIMIT("unable to remove open device %s", hc->name);
-		up_write(&_hash_lock);
-		dm_put(md);
-		return r;
 	}
 
 	t = __hash_remove(hc);
@@ -1050,6 +1088,7 @@  static int do_resume(struct dm_ioctl *param)
 
 	md = hc->md;
 
+
 	new_map = hc->new_map;
 	hc->new_map = NULL;
 	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
@@ -1063,8 +1102,14 @@  static int do_resume(struct dm_ioctl *param)
 			suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
 		if (param->flags & DM_NOFLUSH_FLAG)
 			suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG;
-		if (!dm_suspended_md(md))
-			dm_suspend(md, suspend_flags);
+
+		if (md->interpose) {
+			if (!dm_suspended_md(md) || dm_interposer_attached(md))
+				dm_suspend(md, suspend_flags | DM_SUSPEND_DETACH_IP_FLAG);
+		} else {
+			if (!dm_suspended_md(md))
+				dm_suspend(md, suspend_flags);
+		}
 
 		old_map = dm_swap_table(md, new_map);
 		if (IS_ERR(old_map)) {
@@ -1267,6 +1312,11 @@  static inline fmode_t get_mode(struct dm_ioctl *param)
 	return mode;
 }
 
+static inline bool get_interpose_flag(struct dm_ioctl *param)
+{
+	return (param->flags & DM_INTERPOSE_FLAG);
+}
+
 static int next_target(struct dm_target_spec *last, uint32_t next, void *end,
 		       struct dm_target_spec **spec, char **target_params)
 {
@@ -1289,11 +1339,6 @@  static int populate_table(struct dm_table *table,
 	void *end = (void *) param + param_size;
 	char *target_params;
 
-	if (!param->target_count) {
-		DMWARN("populate_table: no targets specified");
-		return -EINVAL;
-	}
-
 	for (i = 0; i < param->target_count; i++) {
 
 		r = next_target(spec, next, end, &spec, &target_params);
@@ -1338,6 +1383,8 @@  static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si
 	if (!md)
 		return -ENXIO;
 
+	md->interpose = get_interpose_flag(param);
+
 	r = dm_table_create(&t, get_mode(param), param->target_count, md);
 	if (r)
 		goto err;
@@ -2098,6 +2145,8 @@  int __init dm_early_create(struct dm_ioctl *dmi,
 	if (r)
 		goto err_hash_remove;
 
+	md->interpose = get_interpose_flag(dmi);
+
 	/* add targets */
 	for (i = 0; i < dmi->target_count; i++) {
 		r = dm_table_add_target(t, spec_array[i]->target_type,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index e5f0f1703c5d..23574c727f2b 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -327,14 +327,14 @@  static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
  * it is accessed concurrently.
  */
 static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
-			struct mapped_device *md)
+			bool interpose, struct mapped_device *md)
 {
 	int r;
 	struct dm_dev *old_dev, *new_dev;
 
 	old_dev = dd->dm_dev;
 
-	r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev,
+	r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev, interpose,
 				dd->dm_dev->mode | new_mode, &new_dev);
 	if (r)
 		return r;
@@ -367,6 +367,8 @@  int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
 {
 	int r;
 	dev_t dev;
+	size_t ofs = 0;
+	bool interpose = false;
 	unsigned int major, minor;
 	char dummy;
 	struct dm_dev_internal *dd;
@@ -374,13 +376,40 @@  int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
 
 	BUG_ON(!t);
 
-	if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) {
+	/*
+	 * Extract extended options for device
+	 */
+	if (path[0] == '[') {
+		const char *interpose_opt = "interpose";
+		size_t opt_pos = 1;
+		size_t opt_len;
+
+		/*
+		 * Because only one option is supported yet, the parser
+		 * can be simplest.
+		 */
+		opt_len = strlen(interpose_opt);
+		if ((opt_pos + opt_len) < strlen(path) &&
+		    memcmp(&path[opt_pos], interpose_opt, opt_len) == 0) {
+			interpose = true;
+
+			if (!t->md->interpose)
+				t->md->interpose = true;
+		} else {
+			DMERR("Invalid devices extended options %s", path);
+			return -EINVAL;
+		}
+
+		ofs = opt_pos + opt_len + 1;
+	}
+
+	if (sscanf(&path[ofs], "%u:%u%c", &major, &minor, &dummy) == 2) {
 		/* Extract the major/minor numbers */
 		dev = MKDEV(major, minor);
 		if (MAJOR(dev) != major || MINOR(dev) != minor)
 			return -EOVERFLOW;
 	} else {
-		dev = dm_get_dev_t(path);
+		dev = dm_get_dev_t(&path[ofs]);
 		if (!dev)
 			return -ENODEV;
 	}
@@ -391,7 +420,8 @@  int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
 		if (!dd)
 			return -ENOMEM;
 
-		if ((r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev))) {
+		r = dm_get_table_device(t->md, dev, mode, interpose, &dd->dm_dev);
+		if (r) {
 			kfree(dd);
 			return r;
 		}
@@ -401,14 +431,40 @@  int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
 		goto out;
 
 	} else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) {
-		r = upgrade_mode(dd, mode, t->md);
+		r = upgrade_mode(dd, mode, interpose, t->md);
 		if (r)
 			return r;
 	}
 	refcount_inc(&dd->count);
 out:
+	if (interpose) {
+		struct block_device *original = dd->dm_dev->bdev;
+		/*
+		 * Interposer target should cover all underlying device
+		 */
+		if (ti->begin != 0) {
+			DMERR("%s: target offset should be zero for dm interposer",
+			      dm_device_name(t->md));
+			r = -EINVAL;
+			goto fail;
+		}
+		if (bdev_nr_sectors(original) != ti->len) {
+			DMERR("%s: interposer and interposed block device size should be equal",
+			      dm_device_name(t->md));
+			r = -EINVAL;
+			goto fail;
+		}
+	}
+
 	*result = dd->dm_dev;
 	return 0;
+fail:
+	if (refcount_dec_and_test(&dd->count)) {
+		dm_put_table_device(t->md, dd->dm_dev);
+		list_del(&dd->list);
+		kfree(dd);
+	}
+	return r;
 }
 EXPORT_SYMBOL(dm_get_device);
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3f3be9408afa..04142454c4ee 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -149,6 +149,7 @@  EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_nr);
 #define DMF_DEFERRED_REMOVE 6
 #define DMF_SUSPENDED_INTERNALLY 7
 #define DMF_POST_SUSPENDING 8
+#define DMF_INTERPOSER_ATTACHED 9
 
 #define DM_NUMA_NODE NUMA_NO_NODE
 static int dm_numa_node = DM_NUMA_NODE;
@@ -757,18 +758,24 @@  static int open_table_device(struct table_device *td, dev_t dev,
 			     struct mapped_device *md)
 {
 	struct block_device *bdev;
-
+	fmode_t mode = td->dm_dev.mode;
+	void *holder = NULL;
 	int r;
 
 	BUG_ON(td->dm_dev.bdev);
 
-	bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr);
+	if (!td->dm_dev.interpose) {
+		mode |= FMODE_EXCL;
+		holder = _dm_claim_ptr;
+	}
+
+	bdev = blkdev_get_by_dev(dev, mode, holder);
 	if (IS_ERR(bdev))
 		return PTR_ERR(bdev);
 
 	r = bd_link_disk_holder(bdev, dm_disk(md));
 	if (r) {
-		blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL);
+		blkdev_put(bdev, mode);
 		return r;
 	}
 
@@ -782,11 +789,16 @@  static int open_table_device(struct table_device *td, dev_t dev,
  */
 static void close_table_device(struct table_device *td, struct mapped_device *md)
 {
+	fmode_t mode = td->dm_dev.mode;
+
 	if (!td->dm_dev.bdev)
 		return;
 
 	bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
-	blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
+	if (!td->dm_dev.interpose)
+		mode |= FMODE_EXCL;
+	blkdev_put(td->dm_dev.bdev, mode);
+
 	put_dax(td->dm_dev.dax_dev);
 	td->dm_dev.bdev = NULL;
 	td->dm_dev.dax_dev = NULL;
@@ -805,7 +817,7 @@  static struct table_device *find_table_device(struct list_head *l, dev_t dev,
 }
 
 int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
-			struct dm_dev **result)
+			bool interpose, struct dm_dev **result)
 {
 	int r;
 	struct table_device *td;
@@ -821,6 +833,7 @@  int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
 
 		td->dm_dev.mode = mode;
 		td->dm_dev.bdev = NULL;
+		td->dm_dev.interpose = interpose;
 
 		if ((r = open_table_device(td, dev, md))) {
 			mutex_unlock(&md->table_devices_lock);
@@ -1496,13 +1509,12 @@  static int __send_empty_flush(struct clone_info *ci)
 static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
 				    sector_t sector, unsigned *len)
 {
-	struct bio *bio = ci->bio;
 	struct dm_target_io *tio;
 	int r;
 
 	tio = alloc_tio(ci, ti, 0, GFP_NOIO);
 	tio->len_ptr = len;
-	r = clone_bio(tio, bio, sector, *len);
+	r = clone_bio(tio, ci->bio, sector, *len);
 	if (r < 0) {
 		free_tio(tio);
 		return r;
@@ -1696,6 +1708,13 @@  static blk_qc_t dm_submit_bio(struct bio *bio)
 		goto out;
 	}
 
+	/*
+	 * If md is an interposer, then we must set the BIO_INTERPOSE flag
+	 * so that the request is not re-interposed.
+	 */
+	if (md->interpose)
+		bio_set_flag(bio, BIO_INTERPOSED);
+
 	/* If suspended, queue this IO for later */
 	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
 		if (bio->bi_opf & REQ_NOWAIT)
@@ -2410,7 +2429,8 @@  static void dm_queue_flush(struct mapped_device *md)
  */
 struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
 {
-	struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL);
+	struct dm_table *live_map = NULL;
+	struct dm_table *map = ERR_PTR(-EINVAL);
 	struct queue_limits limits;
 	int r;
 
@@ -2453,26 +2473,50 @@  struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
  * Functions to lock and unlock any filesystem running on the
  * device.
  */
-static int lock_fs(struct mapped_device *md)
+static int lock_bdev_fs(struct mapped_device *md, struct block_device *bdev)
 {
 	int r;
 
 	WARN_ON(test_bit(DMF_FROZEN, &md->flags));
 
-	r = freeze_bdev(md->disk->part0);
+	r = freeze_bdev(bdev);
 	if (!r)
 		set_bit(DMF_FROZEN, &md->flags);
 	return r;
 }
 
-static void unlock_fs(struct mapped_device *md)
+static void unlock_bdev_fs(struct mapped_device *md, struct block_device *bdev)
 {
 	if (!test_bit(DMF_FROZEN, &md->flags))
 		return;
-	thaw_bdev(md->disk->part0);
+	thaw_bdev(bdev);
 	clear_bit(DMF_FROZEN, &md->flags);
 }
 
+static inline int lock_fs(struct mapped_device *md)
+{
+	return lock_bdev_fs(md, md->disk->part0);
+}
+
+static inline void unlock_fs(struct mapped_device *md)
+{
+	unlock_bdev_fs(md, md->disk->part0);
+}
+
+static inline struct block_device *get_interposed_bdev(struct dm_table *t)
+{
+	struct dm_dev_internal *dd;
+
+	/*
+	 * For interposer should be only one device in dm table
+	 */
+	list_for_each_entry(dd, dm_table_get_devices(t), list)
+		if (dd->dm_dev->interpose)
+			return bdgrab(dd->dm_dev->bdev);
+
+	return NULL;
+}
+
 /*
  * @suspend_flags: DM_SUSPEND_LOCKFS_FLAG and/or DM_SUSPEND_NOFLUSH_FLAG
  * @task_state: e.g. TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE
@@ -2488,7 +2532,10 @@  static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
 {
 	bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
 	bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
-	int r;
+	bool detach_ip = suspend_flags & DM_SUSPEND_DETACH_IP_FLAG
+			 && md->interpose;
+	struct block_device *original_bdev = NULL;
+	int r = 0;
 
 	lockdep_assert_held(&md->suspend_lock);
 
@@ -2507,18 +2554,50 @@  static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
 	 */
 	dm_table_presuspend_targets(map);
 
+	if (!md->interpose) {
+		/*
+		 * Flush I/O to the device.
+		 * Any I/O submitted after lock_fs() may not be flushed.
+		 * noflush takes precedence over do_lockfs.
+		 * (lock_fs() flushes I/Os and waits for them to complete.)
+		 */
+		if (!noflush && do_lockfs)
+			r = lock_fs(md);
+	} else if (map) {
+		/*
+		 * Interposer should not lock mapped device, but
+		 * should freeze interposed device and lock it.
+		 */
+		original_bdev = get_interposed_bdev(map);
+		if (!original_bdev) {
+			r = -EINVAL;
+			DMERR("%s: interposer cannot get interposed device from table",
+				dm_device_name(md));
+			goto presuspend_undo;
+		}
+
+		if (!noflush && do_lockfs) {
+			r = lock_bdev_fs(md, original_bdev);
+			if (r) {
+				DMERR("%s: interposer cannot freeze interposed device",
+					dm_device_name(md));
+				goto presuspend_undo;
+			}
+		}
+
+		bdev_interposer_lock(original_bdev);
+	}
 	/*
-	 * Flush I/O to the device.
-	 * Any I/O submitted after lock_fs() may not be flushed.
-	 * noflush takes precedence over do_lockfs.
-	 * (lock_fs() flushes I/Os and waits for them to complete.)
+	 * If map is not initialized, then we cannot suspend
+	 * interposed device
 	 */
-	if (!noflush && do_lockfs) {
-		r = lock_fs(md);
-		if (r) {
-			dm_table_presuspend_undo_targets(map);
-			return r;
-		}
+
+presuspend_undo:
+	if (r) {
+		if (original_bdev)
+			bdput(original_bdev);
+		dm_table_presuspend_undo_targets(map);
+		return r;
 	}
 
 	/*
@@ -2559,14 +2638,40 @@  static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
 	if (map)
 		synchronize_srcu(&md->io_barrier);
 
-	/* were we interrupted ? */
-	if (r < 0) {
+	if (r == 0) { /* the wait ended successfully */
+		if (md->interpose && original_bdev) {
+			if (detach_ip) {
+				bdev_interposer_detach(original_bdev);
+				clear_bit(DMF_INTERPOSER_ATTACHED, &md->flags);
+			}
+
+			bdev_interposer_unlock(original_bdev);
+
+			if (detach_ip) {
+				/*
+				 * If th interposer is detached, then there is
+				 * no reason in keeping the queue of the
+				 * interposed device stopped.
+				 */
+				unlock_bdev_fs(md, original_bdev);
+			}
+
+			bdput(original_bdev);
+		}
+	} else { /* were we interrupted ? */
 		dm_queue_flush(md);
 
 		if (dm_request_based(md))
 			dm_start_queue(md->queue);
 
-		unlock_fs(md);
+		if (md->interpose && original_bdev) {
+			bdev_interposer_unlock(original_bdev);
+			unlock_bdev_fs(md, original_bdev);
+
+			bdput(original_bdev);
+		} else
+			unlock_fs(md);
+
 		dm_table_presuspend_undo_targets(map);
 		/* pushback list is already flushed, so skip flush */
 	}
@@ -2574,6 +2679,88 @@  static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
 	return r;
 }
 
+int __dm_attach_interposer(struct mapped_device *md)
+{
+	int r;
+	struct dm_table *map;
+	struct block_device *original_bdev = NULL;
+
+	if (dm_interposer_attached(md))
+		return 0;
+
+	map = rcu_dereference_protected(md->map,
+					lockdep_is_held(&md->suspend_lock));
+	if (!map) {
+		DMERR("%s: interposers table is not initialized",
+			dm_device_name(md));
+		return -EINVAL;
+	}
+
+	original_bdev = get_interposed_bdev(map);
+	if (!original_bdev) {
+		DMERR("%s: interposer cannot get interposed device from table",
+			dm_device_name(md));
+		return -EINVAL;
+	}
+
+	bdev_interposer_lock(original_bdev);
+
+	r = bdev_interposer_attach(original_bdev, dm_disk(md)->part0);
+	if (r)
+		DMERR("%s: failed to attach interposer",
+			dm_device_name(md));
+	else
+		set_bit(DMF_INTERPOSER_ATTACHED, &md->flags);
+
+	bdev_interposer_unlock(original_bdev);
+
+	unlock_bdev_fs(md, original_bdev);
+
+	bdput(original_bdev);
+
+	return r;
+}
+
+int __dm_detach_interposer(struct mapped_device *md)
+{
+	struct dm_table *map = NULL;
+	struct block_device *original_bdev;
+
+	if (!dm_interposer_attached(md))
+		return 0;
+	/*
+	 * If mapped device is suspended, but should be detached
+	 * we just detach without freeze fs on interposed device.
+	 */
+	map = rcu_dereference_protected(md->map,
+			lockdep_is_held(&md->suspend_lock));
+	if (!map) {
+		/*
+		 * If table is not initialized then interposed device
+		 * cannot be attached
+		 */
+		DMERR("%s: table is not initialized for device",
+			dm_device_name(md));
+		return -EINVAL;
+	}
+
+	original_bdev = get_interposed_bdev(map);
+	if (!original_bdev) {
+		DMERR("%s: interposer cannot get interposed device from table",
+			dm_device_name(md));
+		return -EINVAL;
+	}
+
+	bdev_interposer_lock(original_bdev);
+
+	bdev_interposer_detach(original_bdev);
+	clear_bit(DMF_INTERPOSER_ATTACHED, &md->flags);
+
+	bdev_interposer_unlock(original_bdev);
+
+	bdput(original_bdev);
+	return 0;
+}
 /*
  * We need to be able to change a mapping table under a mounted
  * filesystem.  For example we might want to move some data in
@@ -2599,7 +2786,11 @@  int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 	mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
 
 	if (dm_suspended_md(md)) {
-		r = -EINVAL;
+		if (suspend_flags & DM_SUSPEND_DETACH_IP_FLAG)
+			r = __dm_detach_interposer(md);
+		else
+			r = -EINVAL;
+
 		goto out_unlock;
 	}
 
@@ -2645,8 +2836,10 @@  static int __dm_resume(struct mapped_device *md, struct dm_table *map)
 	if (dm_request_based(md))
 		dm_start_queue(md->queue);
 
-	unlock_fs(md);
+	if (md->interpose)
+		return __dm_attach_interposer(md);
 
+	unlock_fs(md);
 	return 0;
 }
 
@@ -2880,6 +3073,11 @@  int dm_suspended_md(struct mapped_device *md)
 	return test_bit(DMF_SUSPENDED, &md->flags);
 }
 
+int dm_interposer_attached(struct mapped_device *md)
+{
+	return test_bit(DMF_INTERPOSER_ATTACHED, &md->flags);
+}
+
 static int dm_post_suspending_md(struct mapped_device *md)
 {
 	return test_bit(DMF_POST_SUSPENDING, &md->flags);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index b441ad772c18..35f71e48abd1 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -28,6 +28,7 @@ 
  */
 #define DM_SUSPEND_LOCKFS_FLAG		(1 << 0)
 #define DM_SUSPEND_NOFLUSH_FLAG		(1 << 1)
+#define DM_SUSPEND_DETACH_IP_FLAG	(1 << 2)
 
 /*
  * Status feature flags
@@ -122,6 +123,11 @@  int dm_deleting_md(struct mapped_device *md);
  */
 int dm_suspended_md(struct mapped_device *md);
 
+/*
+ * Is the interposer of this mapped_device is attached?
+ */
+int dm_interposer_attached(struct mapped_device *md);
+
 /*
  * Internal suspend and resume methods.
  */
@@ -180,7 +186,7 @@  int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only
 int dm_cancel_deferred_remove(struct mapped_device *md);
 int dm_request_based(struct mapped_device *md);
 int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
-			struct dm_dev **result);
+			bool interpose, struct dm_dev **result);
 void dm_put_table_device(struct mapped_device *md, struct dm_dev *d);
 
 int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 5c641f930caf..3a7abb347702 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -159,6 +159,7 @@  struct dm_dev {
 	struct block_device *bdev;
 	struct dax_device *dax_dev;
 	fmode_t mode;
+	bool interpose;
 	char name[16];
 };
 
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index fcff6669137b..73a5b712cd0d 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -362,4 +362,10 @@  enum {
  */
 #define DM_INTERNAL_SUSPEND_FLAG	(1 << 18) /* Out */
 
+/*
+ * If set, the underlying device should open without FMODE_EXCL
+ * and attach mapped device via bdev_interposer.
+ */
+#define DM_INTERPOSE_FLAG		(1 << 19) /* In/Out */
+
 #endif				/* _LINUX_DM_IOCTL_H */