diff mbox series

hibernate: unlock swap bdev for writing when uswsusp is active

Message ID 20200229170825.GX8045@magnolia (mailing list archive)
State New, archived
Headers show
Series hibernate: unlock swap bdev for writing when uswsusp is active | expand

Commit Message

Darrick J. Wong Feb. 29, 2020, 5:08 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

It turns out that there /is/ one use case for programs being able to
write to swap devices, and that is the userspace hibernation code.  The
uswsusp ioctls allow userspace to lease parts of swap devices, so turn
S_SWAPFILE off when invoking suspend.

Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap devices")
Reported-by: Domenico Andreoli <domenico.andreoli@linux.com>
Reported-by: Marian Klein <mkleinsoft@gmail.com>
Tested-by: Marian Klein <mkleinsoft@gmail.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 include/linux/swap.h |    1 +
 kernel/power/user.c  |   11 ++++++++++-
 mm/swapfile.c        |   26 ++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

Comments

Domenico Andreoli Feb. 29, 2020, 6:07 p.m. UTC | #1
On Sat, Feb 29, 2020 at 09:08:25AM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> It turns out that there /is/ one use case for programs being able to
> write to swap devices, and that is the userspace hibernation code.  The
> uswsusp ioctls allow userspace to lease parts of swap devices, so turn
> S_SWAPFILE off when invoking suspend.
> 
> Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap devices")
> Reported-by: Domenico Andreoli <domenico.andreoli@linux.com>
> Reported-by: Marian Klein <mkleinsoft@gmail.com>

I also tested it yesterday but was not satisfied, unfortunately I did
not come with my comment in time.

Yes, I confirm that the uswsusp works again but also checked that
swap_relockall() is not triggered at all and therefore after the first
hibernation cycle the S_SWAPFILE bit remains cleared and the whole
swap_relockall() is useless.

I'm not sure this patch should be merged in the current form.

Regards,
Domenico

> Tested-by: Marian Klein <mkleinsoft@gmail.com>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  include/linux/swap.h |    1 +
>  kernel/power/user.c  |   11 ++++++++++-
>  mm/swapfile.c        |   26 ++++++++++++++++++++++++++
>  3 files changed, 37 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 1e99f7ac1d7e..add93e205850 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -458,6 +458,7 @@ extern void swap_free(swp_entry_t);
>  extern void swapcache_free_entries(swp_entry_t *entries, int n);
>  extern int free_swap_and_cache(swp_entry_t);
>  extern int swap_type_of(dev_t, sector_t, struct block_device **);
> +extern void swap_relockall(void);
>  extern unsigned int count_swap_pages(int, int);
>  extern sector_t map_swap_page(struct page *, struct block_device **);
>  extern sector_t swapdev_block(int, pgoff_t);
> diff --git a/kernel/power/user.c b/kernel/power/user.c
> index 77438954cc2b..b11f7037ce5e 100644
> --- a/kernel/power/user.c
> +++ b/kernel/power/user.c
> @@ -271,6 +271,8 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
>  			break;
>  		}
>  		error = hibernation_restore(data->platform_support);
> +		if (!error)
> +			swap_relockall();
>  		break;
>  
>  	case SNAPSHOT_FREE:
> @@ -372,10 +374,17 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
>  			 */
>  			swdev = new_decode_dev(swap_area.dev);
>  			if (swdev) {
> +				struct block_device *bd;
> +
>  				offset = swap_area.offset;
> -				data->swap = swap_type_of(swdev, offset, NULL);
> +				data->swap = swap_type_of(swdev, offset, &bd);
>  				if (data->swap < 0)
>  					error = -ENODEV;
> +
> +				inode_lock(bd->bd_inode);
> +				bd->bd_inode->i_flags &= ~S_SWAPFILE;
> +				inode_unlock(bd->bd_inode);
> +				bdput(bd);
>  			} else {
>  				data->swap = -1;
>  				error = -EINVAL;
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index b2a2e45c9a36..439bfb7263d3 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -1799,6 +1799,32 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
>  	return -ENODEV;
>  }
>  
> +/* Re-lock swap devices after resuming from userspace suspend. */
> +void swap_relockall(void)
> +{
> +	int type;
> +
> +	spin_lock(&swap_lock);
> +	for (type = 0; type < nr_swapfiles; type++) {
> +		struct swap_info_struct *sis = swap_info[type];
> +		struct block_device *bdev = bdgrab(sis->bdev);
> +
> +		/*
> +		 * uswsusp only knows how to suspend to block devices, so we
> +		 * can skip swap files.
> +		 */
> +		if (!(sis->flags & SWP_WRITEOK) ||
> +		    !(sis->flags & SWP_BLKDEV))
> +			continue;
> +
> +		inode_lock(bdev->bd_inode);
> +		bdev->bd_inode->i_flags |= S_SWAPFILE;
> +		inode_unlock(bdev->bd_inode);
> +		bdput(bdev);
> +	}
> +	spin_unlock(&swap_lock);
> +}
> +
>  /*
>   * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
>   * corresponding to given index in swap_info (swap type).
Darrick J. Wong Feb. 29, 2020, 6:38 p.m. UTC | #2
On Sat, Feb 29, 2020 at 07:07:16PM +0100, Domenico Andreoli wrote:
> On Sat, Feb 29, 2020 at 09:08:25AM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > It turns out that there /is/ one use case for programs being able to
> > write to swap devices, and that is the userspace hibernation code.  The
> > uswsusp ioctls allow userspace to lease parts of swap devices, so turn
> > S_SWAPFILE off when invoking suspend.
> > 
> > Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap devices")
> > Reported-by: Domenico Andreoli <domenico.andreoli@linux.com>
> > Reported-by: Marian Klein <mkleinsoft@gmail.com>
> 
> I also tested it yesterday but was not satisfied, unfortunately I did
> not come with my comment in time.
> 
> Yes, I confirm that the uswsusp works again but also checked that
> swap_relockall() is not triggered at all and therefore after the first
> hibernation cycle the S_SWAPFILE bit remains cleared and the whole
> swap_relockall() is useless.
> 
> I'm not sure this patch should be merged in the current form.

NNGGHHGGHGH /me is rapidly losing his sanity and will soon just revert
the whole security feature because I'm getting fed up with people
yelling at me *while I'm on vacation* trying to *restore* my sanity.  I
really don't want to be QAing userspace-directed hibernation right now.

...right, the patch is broken because we have to relock the swapfiles in
whatever code executes after we jump back to the restored kernel, not in
the one that's doing the restoring.  Does this help?

OTOH, maybe we should just leave the swapfiles unlocked after resume.
Userspace has clearly demonstrated the one usecase for writing to the
swapfile, which means anyone could have jumped in while uswsusp was
running and written whatever crap they wanted to the parts of the swap
file that weren't leased for the hibernate image.

--D

From: Darrick J. Wong <darrick.wong@oracle.com>
Subject: [PATCH] hibernate: unlock swap bdev for writing when uswsusp is active

It turns out that there /is/ one use case for programs being able to
write to swap devices, and that is the userspace hibernation code.  The
uswsusp ioctls allow userspace to lease parts of swap devices, so turn
S_SWAPFILE off when invoking suspend.

Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap devices")
Reported-by: Domenico Andreoli <domenico.andreoli@linux.com>
Reported-by: Marian Klein <mkleinsoft@gmail.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 include/linux/swap.h     |    1 +
 kernel/power/hibernate.c |    4 ++++
 kernel/power/user.c      |    9 ++++++++-
 mm/swapfile.c            |   26 ++++++++++++++++++++++++++
 4 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1e99f7ac1d7e..add93e205850 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -458,6 +458,7 @@ extern void swap_free(swp_entry_t);
 extern void swapcache_free_entries(swp_entry_t *entries, int n);
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
+extern void swap_relockall(void);
 extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct page *, struct block_device **);
 extern sector_t swapdev_block(int, pgoff_t);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 6dbeedb7354c..aa5a6701614d 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -31,6 +31,7 @@
 #include <linux/genhd.h>
 #include <linux/ktime.h>
 #include <linux/security.h>
+#include <linux/swap.h>
 #include <trace/events/power.h>
 
 #include "power.h"
@@ -399,6 +400,9 @@ int hibernation_snapshot(int platform_mode)
 	 * image creation has failed and (2) after a successful restore.
 	 */
 
+	/* Lock the swap files, just in case uswsusp was active. */
+	swap_relockall();
+
 	/* We may need to release the preallocated image pages here. */
 	if (error || !in_suspend)
 		swsusp_free();
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 77438954cc2b..a3ae9cbbfcf0 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -372,10 +372,17 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 			 */
 			swdev = new_decode_dev(swap_area.dev);
 			if (swdev) {
+				struct block_device *bd;
+
 				offset = swap_area.offset;
-				data->swap = swap_type_of(swdev, offset, NULL);
+				data->swap = swap_type_of(swdev, offset, &bd);
 				if (data->swap < 0)
 					error = -ENODEV;
+
+				inode_lock(bd->bd_inode);
+				bd->bd_inode->i_flags &= ~S_SWAPFILE;
+				inode_unlock(bd->bd_inode);
+				bdput(bd);
 			} else {
 				data->swap = -1;
 				error = -EINVAL;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b2a2e45c9a36..439bfb7263d3 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1799,6 +1799,32 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
 	return -ENODEV;
 }
 
+/* Re-lock swap devices after resuming from userspace suspend. */
+void swap_relockall(void)
+{
+	int type;
+
+	spin_lock(&swap_lock);
+	for (type = 0; type < nr_swapfiles; type++) {
+		struct swap_info_struct *sis = swap_info[type];
+		struct block_device *bdev = bdgrab(sis->bdev);
+
+		/*
+		 * uswsusp only knows how to suspend to block devices, so we
+		 * can skip swap files.
+		 */
+		if (!(sis->flags & SWP_WRITEOK) ||
+		    !(sis->flags & SWP_BLKDEV))
+			continue;
+
+		inode_lock(bdev->bd_inode);
+		bdev->bd_inode->i_flags |= S_SWAPFILE;
+		inode_unlock(bdev->bd_inode);
+		bdput(bdev);
+	}
+	spin_unlock(&swap_lock);
+}
+
 /*
  * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
  * corresponding to given index in swap_info (swap type).
Domenico Andreoli Feb. 29, 2020, 8:02 p.m. UTC | #3
On Sat, Feb 29, 2020 at 10:38:20AM -0800, Darrick J. Wong wrote:
> On Sat, Feb 29, 2020 at 07:07:16PM +0100, Domenico Andreoli wrote:
> > On Sat, Feb 29, 2020 at 09:08:25AM -0800, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > It turns out that there /is/ one use case for programs being able to
> > > write to swap devices, and that is the userspace hibernation code.  The
> > > uswsusp ioctls allow userspace to lease parts of swap devices, so turn
> > > S_SWAPFILE off when invoking suspend.
> > > 
> > > Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap devices")
> > > Reported-by: Domenico Andreoli <domenico.andreoli@linux.com>
> > > Reported-by: Marian Klein <mkleinsoft@gmail.com>
> > 
> > I also tested it yesterday but was not satisfied, unfortunately I did
> > not come with my comment in time.
> > 
> > Yes, I confirm that the uswsusp works again but also checked that
> > swap_relockall() is not triggered at all and therefore after the first
> > hibernation cycle the S_SWAPFILE bit remains cleared and the whole
> > swap_relockall() is useless.
> > 
> > I'm not sure this patch should be merged in the current form.
> 
> NNGGHHGGHGH /me is rapidly losing his sanity and will soon just revert
> the whole security feature because I'm getting fed up with people
> yelling at me *while I'm on vacation* trying to *restore* my sanity.  I
> really don't want to be QAing userspace-directed hibernation right now.

Maybe we could proceed with the first patch to amend the regression and
postpone the improved fix to a later patch? Don't loose sanity for this.

> ...right, the patch is broken because we have to relock the swapfiles in
> whatever code executes after we jump back to the restored kernel, not in
> the one that's doing the restoring.  Does this help?

I made a few unsuccessful attempts in kernel/power/hibernate.c and
eventually I'm switching to qemu to speed up the test cycle.

> OTOH, maybe we should just leave the swapfiles unlocked after resume.
> Userspace has clearly demonstrated the one usecase for writing to the
> swapfile, which means anyone could have jumped in while uswsusp was
> running and written whatever crap they wanted to the parts of the swap
> file that weren't leased for the hibernate image.

Essentially, if the hibernation is supported the swapfile is not totally
safe. Maybe user-space hibernation should be a separate option.

> 
> --D
Rafael J. Wysocki March 1, 2020, 9:35 p.m. UTC | #4
On Sat, Feb 29, 2020 at 9:02 PM Domenico Andreoli
<domenico.andreoli@linux.com> wrote:
>
> On Sat, Feb 29, 2020 at 10:38:20AM -0800, Darrick J. Wong wrote:
> > On Sat, Feb 29, 2020 at 07:07:16PM +0100, Domenico Andreoli wrote:
> > > On Sat, Feb 29, 2020 at 09:08:25AM -0800, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > >
> > > > It turns out that there /is/ one use case for programs being able to
> > > > write to swap devices, and that is the userspace hibernation code.  The
> > > > uswsusp ioctls allow userspace to lease parts of swap devices, so turn
> > > > S_SWAPFILE off when invoking suspend.
> > > >
> > > > Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap devices")
> > > > Reported-by: Domenico Andreoli <domenico.andreoli@linux.com>
> > > > Reported-by: Marian Klein <mkleinsoft@gmail.com>
> > >
> > > I also tested it yesterday but was not satisfied, unfortunately I did
> > > not come with my comment in time.
> > >
> > > Yes, I confirm that the uswsusp works again but also checked that
> > > swap_relockall() is not triggered at all and therefore after the first
> > > hibernation cycle the S_SWAPFILE bit remains cleared and the whole
> > > swap_relockall() is useless.
> > >
> > > I'm not sure this patch should be merged in the current form.
> >
> > NNGGHHGGHGH /me is rapidly losing his sanity and will soon just revert
> > the whole security feature because I'm getting fed up with people
> > yelling at me *while I'm on vacation* trying to *restore* my sanity.  I
> > really don't want to be QAing userspace-directed hibernation right now.
>
> Maybe we could proceed with the first patch to amend the regression and
> postpone the improved fix to a later patch? Don't loose sanity for this.

I would concur here.

> > ...right, the patch is broken because we have to relock the swapfiles in
> > whatever code executes after we jump back to the restored kernel, not in
> > the one that's doing the restoring.  Does this help?
>
> I made a few unsuccessful attempts in kernel/power/hibernate.c and
> eventually I'm switching to qemu to speed up the test cycle.
>
> > OTOH, maybe we should just leave the swapfiles unlocked after resume.
> > Userspace has clearly demonstrated the one usecase for writing to the
> > swapfile, which means anyone could have jumped in while uswsusp was
> > running and written whatever crap they wanted to the parts of the swap
> > file that weren't leased for the hibernate image.
>
> Essentially, if the hibernation is supported the swapfile is not totally
> safe.

But that's only the case with the userspace variant, isn't it?

> Maybe user-space hibernation should be a separate option.

That actually is not a bad idea at all in my view.

Thanks!
Marian Klein March 2, 2020, 4:51 a.m. UTC | #5
Hi Darrick

If security is a concern, maybe it should in kernel config
( CONFIG_ENABLE_HIBERNATION  =  Y/N )
For the security hardened server systems with no hibernation need you
simply configure it to N.

Also the concern the other process can hijack s2disk is unlikely. You
have to realized all processors except for one
are down (See dmesg bellow.) by the time the snapshot image is being  written.
So you can disable scheduler time sharing on this only one processor
when you get snapshot request and no other process can jump in.

I think you can allow (unlock) writing ONLY to  device specified in
kernel parameter
resume=/dev/<swap_device>  and
only when CONFIG_ENABLE_HIBERNATION  =  Y  and there is only one CPU
active (CPU0) and
time sharing scheduler is down and user group from another kernel
parameter snapshot_gid invoked snapshot.
For me secure enough. If any rogue  program pretended to be legitimate
user space hibernation
program it would have to go via actual hibernation cycle (powering off
computer) and that would be obvious to user if that was not triggered
by him
or configured by him to trigger automatically.
It is no way a program secretly could write to resume/swap device.

For normal users often the security is less of concern as they know
who works with their laptop , etc.


[ 1243.100159] Disabling non-boot CPUs ...
[ 1243.101448] smpboot: CPU 1 is now offline
[ 1243.103291] smpboot: CPU 2 is now offline
[ 1243.104851] smpboot: CPU 3 is now offline
[ 1243.106522] smpboot: CPU 4 is now offline
[ 1243.108200] smpboot: CPU 5 is now offline
[ 1243.109928] smpboot: CPU 6 is now offline
[ 1243.111501] smpboot: CPU 7 is now offline
[ 1243.113364] PM: Creating hibernation image:
[ 1243.597752] PM: Need to copy 161991 pages
[ 1243.597756] PM: Normal pages needed: 161991 + 1024, available pages: 3967507
[ 1244.202907] PM: Hibernation image created (161991 pages copied)

On Sun, 1 Mar 2020 at 21:35, Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> On Sat, Feb 29, 2020 at 9:02 PM Domenico Andreoli
> <domenico.andreoli@linux.com> wrote:
> >
> > On Sat, Feb 29, 2020 at 10:38:20AM -0800, Darrick J. Wong wrote:
> > > On Sat, Feb 29, 2020 at 07:07:16PM +0100, Domenico Andreoli wrote:
> > > > On Sat, Feb 29, 2020 at 09:08:25AM -0800, Darrick J. Wong wrote:
> > > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > >
> > > > > It turns out that there /is/ one use case for programs being able to
> > > > > write to swap devices, and that is the userspace hibernation code.  The
> > > > > uswsusp ioctls allow userspace to lease parts of swap devices, so turn
> > > > > S_SWAPFILE off when invoking suspend.
> > > > >
> > > > > Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap devices")
> > > > > Reported-by: Domenico Andreoli <domenico.andreoli@linux.com>
> > > > > Reported-by: Marian Klein <mkleinsoft@gmail.com>
> > > >
> > > > I also tested it yesterday but was not satisfied, unfortunately I did
> > > > not come with my comment in time.
> > > >
> > > > Yes, I confirm that the uswsusp works again but also checked that
> > > > swap_relockall() is not triggered at all and therefore after the first
> > > > hibernation cycle the S_SWAPFILE bit remains cleared and the whole
> > > > swap_relockall() is useless.
> > > >
> > > > I'm not sure this patch should be merged in the current form.
> > >
> > > NNGGHHGGHGH /me is rapidly losing his sanity and will soon just revert
> > > the whole security feature because I'm getting fed up with people
> > > yelling at me *while I'm on vacation* trying to *restore* my sanity.  I
> > > really don't want to be QAing userspace-directed hibernation right now.
> >
> > Maybe we could proceed with the first patch to amend the regression and
> > postpone the improved fix to a later patch? Don't loose sanity for this.
>
> I would concur here.
>
> > > ...right, the patch is broken because we have to relock the swapfiles in
> > > whatever code executes after we jump back to the restored kernel, not in
> > > the one that's doing the restoring.  Does this help?
> >
> > I made a few unsuccessful attempts in kernel/power/hibernate.c and
> > eventually I'm switching to qemu to speed up the test cycle.
> >
> > > OTOH, maybe we should just leave the swapfiles unlocked after resume.
> > > Userspace has clearly demonstrated the one usecase for writing to the
> > > swapfile, which means anyone could have jumped in while uswsusp was
> > > running and written whatever crap they wanted to the parts of the swap
> > > file that weren't leased for the hibernate image.
> >
> > Essentially, if the hibernation is supported the swapfile is not totally
> > safe.
>
> But that's only the case with the userspace variant, isn't it?
>
> > Maybe user-space hibernation should be a separate option.
>
> That actually is not a bad idea at all in my view.
>
> Thanks!
Darrick J. Wong March 3, 2020, 7:02 p.m. UTC | #6
On Sun, Mar 01, 2020 at 10:35:36PM +0100, Rafael J. Wysocki wrote:
> On Sat, Feb 29, 2020 at 9:02 PM Domenico Andreoli
> <domenico.andreoli@linux.com> wrote:
> >
> > On Sat, Feb 29, 2020 at 10:38:20AM -0800, Darrick J. Wong wrote:
> > > On Sat, Feb 29, 2020 at 07:07:16PM +0100, Domenico Andreoli wrote:
> > > > On Sat, Feb 29, 2020 at 09:08:25AM -0800, Darrick J. Wong wrote:
> > > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > >
> > > > > It turns out that there /is/ one use case for programs being able to
> > > > > write to swap devices, and that is the userspace hibernation code.  The
> > > > > uswsusp ioctls allow userspace to lease parts of swap devices, so turn
> > > > > S_SWAPFILE off when invoking suspend.
> > > > >
> > > > > Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap devices")
> > > > > Reported-by: Domenico Andreoli <domenico.andreoli@linux.com>
> > > > > Reported-by: Marian Klein <mkleinsoft@gmail.com>
> > > >
> > > > I also tested it yesterday but was not satisfied, unfortunately I did
> > > > not come with my comment in time.
> > > >
> > > > Yes, I confirm that the uswsusp works again but also checked that
> > > > swap_relockall() is not triggered at all and therefore after the first
> > > > hibernation cycle the S_SWAPFILE bit remains cleared and the whole
> > > > swap_relockall() is useless.
> > > >
> > > > I'm not sure this patch should be merged in the current form.
> > >
> > > NNGGHHGGHGH /me is rapidly losing his sanity and will soon just revert
> > > the whole security feature because I'm getting fed up with people
> > > yelling at me *while I'm on vacation* trying to *restore* my sanity.  I
> > > really don't want to be QAing userspace-directed hibernation right now.
> >
> > Maybe we could proceed with the first patch to amend the regression and
> > postpone the improved fix to a later patch? Don't loose sanity for this.
> 
> I would concur here.
> 
> > > ...right, the patch is broken because we have to relock the swapfiles in
> > > whatever code executes after we jump back to the restored kernel, not in
> > > the one that's doing the restoring.  Does this help?
> >
> > I made a few unsuccessful attempts in kernel/power/hibernate.c and
> > eventually I'm switching to qemu to speed up the test cycle.
> >
> > > OTOH, maybe we should just leave the swapfiles unlocked after resume.
> > > Userspace has clearly demonstrated the one usecase for writing to the
> > > swapfile, which means anyone could have jumped in while uswsusp was
> > > running and written whatever crap they wanted to the parts of the swap
> > > file that weren't leased for the hibernate image.
> >
> > Essentially, if the hibernation is supported the swapfile is not totally
> > safe.
> 
> But that's only the case with the userspace variant, isn't it?

Yes.

> > Maybe user-space hibernation should be a separate option.
> 
> That actually is not a bad idea at all in my view.

The trouble with kconfig options is that the distros will be pressued
into setting CONFIG_HIBERNATE_USERSPACE=y to avoid regressing their
uswsusp users, which makes the added security code pointless.  As this
has clearly sucked me into a conflict that I don't have the resources to
pursue, I'm going to revert the write patch checks and move on with
life.

--D

> Thanks!
Domenico Andreoli March 3, 2020, 10:51 p.m. UTC | #7
On March 3, 2020 7:02:12 PM UTC, "Darrick J. Wong" <darrick.wong@oracle.com> wrote:
>On Sun, Mar 01, 2020 at 10:35:36PM +0100, Rafael J. Wysocki wrote:
>> On Sat, Feb 29, 2020 at 9:02 PM Domenico Andreoli
>> <domenico.andreoli@linux.com> wrote:
>> >
>> > On Sat, Feb 29, 2020 at 10:38:20AM -0800, Darrick J. Wong wrote:
>> > > On Sat, Feb 29, 2020 at 07:07:16PM +0100, Domenico Andreoli
>wrote:
>> > > > On Sat, Feb 29, 2020 at 09:08:25AM -0800, Darrick J. Wong
>wrote:
>> > > > > From: Darrick J. Wong <darrick.wong@oracle.com>
>> > > > >
>> > > > > It turns out that there /is/ one use case for programs being
>able to
>> > > > > write to swap devices, and that is the userspace hibernation
>code.  The
>> > > > > uswsusp ioctls allow userspace to lease parts of swap
>devices, so turn
>> > > > > S_SWAPFILE off when invoking suspend.
>> > > > >
>> > > > > Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap
>devices")
>> > > > > Reported-by: Domenico Andreoli <domenico.andreoli@linux.com>
>> > > > > Reported-by: Marian Klein <mkleinsoft@gmail.com>
>> > > >
>> > > > I also tested it yesterday but was not satisfied, unfortunately
>I did
>> > > > not come with my comment in time.
>> > > >
>> > > > Yes, I confirm that the uswsusp works again but also checked
>that
>> > > > swap_relockall() is not triggered at all and therefore after
>the first
>> > > > hibernation cycle the S_SWAPFILE bit remains cleared and the
>whole
>> > > > swap_relockall() is useless.
>> > > >
>> > > > I'm not sure this patch should be merged in the current form.
>> > >
>> > > NNGGHHGGHGH /me is rapidly losing his sanity and will soon just
>revert
>> > > the whole security feature because I'm getting fed up with people
>> > > yelling at me *while I'm on vacation* trying to *restore* my
>sanity.  I
>> > > really don't want to be QAing userspace-directed hibernation
>right now.
>> >
>> > Maybe we could proceed with the first patch to amend the regression
>and
>> > postpone the improved fix to a later patch? Don't loose sanity for
>this.
>> 
>> I would concur here.
>> 
>> > > ...right, the patch is broken because we have to relock the
>swapfiles in
>> > > whatever code executes after we jump back to the restored kernel,
>not in
>> > > the one that's doing the restoring.  Does this help?
>> >
>> > I made a few unsuccessful attempts in kernel/power/hibernate.c and
>> > eventually I'm switching to qemu to speed up the test cycle.
>> >
>> > > OTOH, maybe we should just leave the swapfiles unlocked after
>resume.
>> > > Userspace has clearly demonstrated the one usecase for writing to
>the
>> > > swapfile, which means anyone could have jumped in while uswsusp
>was
>> > > running and written whatever crap they wanted to the parts of the
>swap
>> > > file that weren't leased for the hibernate image.
>> >
>> > Essentially, if the hibernation is supported the swapfile is not
>totally
>> > safe.
>> 
>> But that's only the case with the userspace variant, isn't it?
>
>Yes.
>
>> > Maybe user-space hibernation should be a separate option.
>> 
>> That actually is not a bad idea at all in my view.
>
>The trouble with kconfig options is that the distros will be pressued
>into setting CONFIG_HIBERNATE_USERSPACE=y to avoid regressing their
>uswsusp users, which makes the added security code pointless.  As this

True but there are not only distros otherwise the kernel would not have any option at all.

It's actually very nice that if hibernation is disabled no userspace is ever allowed to write to the swap.

>has clearly sucked me into a conflict that I don't have the resources
>to
>pursue, I'm going to revert the write patch checks and move on with
>life.

I don't see the need of reverting anything, I can deal with these issues if you are busy on something else.

>
>--D
>
>> Thanks!
Darrick J. Wong March 4, 2020, 1:18 a.m. UTC | #8
On Tue, Mar 03, 2020 at 10:51:22PM +0000, Domenico Andreoli wrote:
> 
> 
> On March 3, 2020 7:02:12 PM UTC, "Darrick J. Wong" <darrick.wong@oracle.com> wrote:
> >On Sun, Mar 01, 2020 at 10:35:36PM +0100, Rafael J. Wysocki wrote:
> >> On Sat, Feb 29, 2020 at 9:02 PM Domenico Andreoli
> >> <domenico.andreoli@linux.com> wrote:
> >> >
> >> > On Sat, Feb 29, 2020 at 10:38:20AM -0800, Darrick J. Wong wrote:
> >> > > On Sat, Feb 29, 2020 at 07:07:16PM +0100, Domenico Andreoli
> >wrote:
> >> > > > On Sat, Feb 29, 2020 at 09:08:25AM -0800, Darrick J. Wong
> >wrote:
> >> > > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> >> > > > >
> >> > > > > It turns out that there /is/ one use case for programs being
> >able to
> >> > > > > write to swap devices, and that is the userspace hibernation
> >code.  The
> >> > > > > uswsusp ioctls allow userspace to lease parts of swap
> >devices, so turn
> >> > > > > S_SWAPFILE off when invoking suspend.
> >> > > > >
> >> > > > > Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap
> >devices")
> >> > > > > Reported-by: Domenico Andreoli <domenico.andreoli@linux.com>
> >> > > > > Reported-by: Marian Klein <mkleinsoft@gmail.com>
> >> > > >
> >> > > > I also tested it yesterday but was not satisfied, unfortunately
> >I did
> >> > > > not come with my comment in time.
> >> > > >
> >> > > > Yes, I confirm that the uswsusp works again but also checked
> >that
> >> > > > swap_relockall() is not triggered at all and therefore after
> >the first
> >> > > > hibernation cycle the S_SWAPFILE bit remains cleared and the
> >whole
> >> > > > swap_relockall() is useless.
> >> > > >
> >> > > > I'm not sure this patch should be merged in the current form.
> >> > >
> >> > > NNGGHHGGHGH /me is rapidly losing his sanity and will soon just
> >revert
> >> > > the whole security feature because I'm getting fed up with people
> >> > > yelling at me *while I'm on vacation* trying to *restore* my
> >sanity.  I
> >> > > really don't want to be QAing userspace-directed hibernation
> >right now.
> >> >
> >> > Maybe we could proceed with the first patch to amend the regression
> >and
> >> > postpone the improved fix to a later patch? Don't loose sanity for
> >this.
> >> 
> >> I would concur here.
> >> 
> >> > > ...right, the patch is broken because we have to relock the
> >swapfiles in
> >> > > whatever code executes after we jump back to the restored kernel,
> >not in
> >> > > the one that's doing the restoring.  Does this help?
> >> >
> >> > I made a few unsuccessful attempts in kernel/power/hibernate.c and
> >> > eventually I'm switching to qemu to speed up the test cycle.
> >> >
> >> > > OTOH, maybe we should just leave the swapfiles unlocked after
> >resume.
> >> > > Userspace has clearly demonstrated the one usecase for writing to
> >the
> >> > > swapfile, which means anyone could have jumped in while uswsusp
> >was
> >> > > running and written whatever crap they wanted to the parts of the
> >swap
> >> > > file that weren't leased for the hibernate image.
> >> >
> >> > Essentially, if the hibernation is supported the swapfile is not
> >totally
> >> > safe.
> >> 
> >> But that's only the case with the userspace variant, isn't it?
> >
> >Yes.
> >
> >> > Maybe user-space hibernation should be a separate option.
> >> 
> >> That actually is not a bad idea at all in my view.
> >
> >The trouble with kconfig options is that the distros will be pressued
> >into setting CONFIG_HIBERNATE_USERSPACE=y to avoid regressing their
> >uswsusp users, which makes the added security code pointless.  As this
> 
> True but there are not only distros otherwise the kernel would not
> have any option at all.
> 
> It's actually very nice that if hibernation is disabled no userspace
> is ever allowed to write to the swap.
> 
> >has clearly sucked me into a conflict that I don't have the resources
> >to
> >pursue, I'm going to revert the write patch checks and move on with
> >life.
> 
> I don't see the need of reverting anything, I can deal with these
> issues if you are busy on something else.

If you want to work on the patch, please do!  Starting from the revert
patch I sent earlier, I /think/ only the first chunk (the one that
touches blkdev_write_iter) of that patch actually has to be applied to
re-enable uswsusp.  That could probably be turned into:

	if (IS_SWAPFILE(...) && !IS_ENABLED(HIBERNATION))
		return -ETXTBSY;

Though perhaps a better thing to check here rather than the Kconfig
option is whether or not the system is locked out against hibernation?
e.g.,

	if (IS_SWAPFILE(...) && !hibernation_available())
		return -EXTBSY;

--D

> >
> >--D
> >
> >> Thanks!
Domenico Andreoli March 4, 2020, 8:34 a.m. UTC | #9
On Tue, Mar 03, 2020 at 05:18:40PM -0800, Darrick J. Wong wrote:
> On Tue, Mar 03, 2020 at 10:51:22PM +0000, Domenico Andreoli wrote:
> > 
> > I don't see the need of reverting anything, I can deal with these
> > issues if you are busy on something else.
> 
> If you want to work on the patch, please do!  Starting from the revert
> patch I sent earlier, I /think/ only the first chunk (the one that
> touches blkdev_write_iter) of that patch actually has to be applied to
> re-enable uswsusp.  That could probably be turned into:
> 
> 	if (IS_SWAPFILE(...) && !IS_ENABLED(HIBERNATION))
> 		return -ETXTBSY;

I've just sent such patch, I don't know how it will play with the whole
revert of yesterday and that akpm has already taken in his tree.

Ideally this should go in 5.6-rc and also in stable kernels > 5.2.

> 
> Though perhaps a better thing to check here rather than the Kconfig
> option is whether or not the system is locked out against hibernation?
> e.g.,
> 
> 	if (IS_SWAPFILE(...) && !hibernation_available())
> 		return -EXTBSY;

This is the kind of improved fix I'm going to prepare for a coming
merge window.

Regards,
Domenico
Domenico Andreoli April 20, 2020, 6:52 p.m. UTC | #10
On Sun, Mar 01, 2020 at 10:35:36PM +0100, Rafael J. Wysocki wrote:
> On Sat, Feb 29, 2020 at 9:02 PM Domenico Andreoli <domenico.andreoli@linux.com> wrote:
> >
> > Maybe user-space hibernation should be a separate option.
> 
> That actually is not a bad idea at all in my view.

I prepared a patch for this:
https://lore.kernel.org/linux-pm/20200413190843.044112674@gmail.com/

Regards,
Domenico
Darrick J. Wong April 21, 2020, 3:43 p.m. UTC | #11
On Mon, Apr 20, 2020 at 08:52:55PM +0200, Domenico Andreoli wrote:
> On Sun, Mar 01, 2020 at 10:35:36PM +0100, Rafael J. Wysocki wrote:
> > On Sat, Feb 29, 2020 at 9:02 PM Domenico Andreoli <domenico.andreoli@linux.com> wrote:
> > >
> > > Maybe user-space hibernation should be a separate option.
> > 
> > That actually is not a bad idea at all in my view.
> 
> I prepared a patch for this:
> https://lore.kernel.org/linux-pm/20200413190843.044112674@gmail.com/

If you succeed in making uswsusp a kconfig option, can you amend the
"!hibernation available()" test in blkdev_write_iter so that users of
in-kernel hibernate are protected against userspace swap device
scribbles, please?

--D

> Regards,
> Domenico
> 
> -- 
> rsa4096: 3B10 0CA1 8674 ACBA B4FE  FCD2 CE5B CF17 9960 DE13
> ed25519: FFB4 0CC3 7F2E 091D F7DA  356E CC79 2832 ED38 CB05
Domenico Andreoli April 21, 2020, 6:39 p.m. UTC | #12
On Tue, Apr 21, 2020 at 08:43:33AM -0700, Darrick J. Wong wrote:
> On Mon, Apr 20, 2020 at 08:52:55PM +0200, Domenico Andreoli wrote:
> > On Sun, Mar 01, 2020 at 10:35:36PM +0100, Rafael J. Wysocki wrote:
> > > On Sat, Feb 29, 2020 at 9:02 PM Domenico Andreoli <domenico.andreoli@linux.com> wrote:
> > > >
> > > > Maybe user-space hibernation should be a separate option.
> > > 
> > > That actually is not a bad idea at all in my view.
> > 
> > I prepared a patch for this:
> > https://lore.kernel.org/linux-pm/20200413190843.044112674@gmail.com/
> 
> If you succeed in making uswsusp a kconfig option, can you amend the
> "!hibernation available()" test in blkdev_write_iter so that users of
> in-kernel hibernate are protected against userspace swap device
> scribbles, please?

Yes, that's the plan.

> 
> --D

Domenico
diff mbox series

Patch

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1e99f7ac1d7e..add93e205850 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -458,6 +458,7 @@  extern void swap_free(swp_entry_t);
 extern void swapcache_free_entries(swp_entry_t *entries, int n);
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
+extern void swap_relockall(void);
 extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct page *, struct block_device **);
 extern sector_t swapdev_block(int, pgoff_t);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 77438954cc2b..b11f7037ce5e 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -271,6 +271,8 @@  static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 			break;
 		}
 		error = hibernation_restore(data->platform_support);
+		if (!error)
+			swap_relockall();
 		break;
 
 	case SNAPSHOT_FREE:
@@ -372,10 +374,17 @@  static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 			 */
 			swdev = new_decode_dev(swap_area.dev);
 			if (swdev) {
+				struct block_device *bd;
+
 				offset = swap_area.offset;
-				data->swap = swap_type_of(swdev, offset, NULL);
+				data->swap = swap_type_of(swdev, offset, &bd);
 				if (data->swap < 0)
 					error = -ENODEV;
+
+				inode_lock(bd->bd_inode);
+				bd->bd_inode->i_flags &= ~S_SWAPFILE;
+				inode_unlock(bd->bd_inode);
+				bdput(bd);
 			} else {
 				data->swap = -1;
 				error = -EINVAL;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b2a2e45c9a36..439bfb7263d3 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1799,6 +1799,32 @@  int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
 	return -ENODEV;
 }
 
+/* Re-lock swap devices after resuming from userspace suspend. */
+void swap_relockall(void)
+{
+	int type;
+
+	spin_lock(&swap_lock);
+	for (type = 0; type < nr_swapfiles; type++) {
+		struct swap_info_struct *sis = swap_info[type];
+		struct block_device *bdev = bdgrab(sis->bdev);
+
+		/*
+		 * uswsusp only knows how to suspend to block devices, so we
+		 * can skip swap files.
+		 */
+		if (!(sis->flags & SWP_WRITEOK) ||
+		    !(sis->flags & SWP_BLKDEV))
+			continue;
+
+		inode_lock(bdev->bd_inode);
+		bdev->bd_inode->i_flags |= S_SWAPFILE;
+		inode_unlock(bdev->bd_inode);
+		bdput(bdev);
+	}
+	spin_unlock(&swap_lock);
+}
+
 /*
  * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
  * corresponding to given index in swap_info (swap type).