diff mbox series

[1/2] xfs: fix realtime file data space leak

Message ID fe86a7464d77f770736404a9fbfcdfbb04b59826.1574799066.git.osandov@fb.com (mailing list archive)
State Accepted, archived
Headers show
Series xfs: fixes for realtime file truncation | expand

Commit Message

Omar Sandoval Nov. 26, 2019, 8:13 p.m. UTC
From: Omar Sandoval <osandov@fb.com>

Realtime files in XFS allocate extents in rextsize units. However, the
written/unwritten state of those extents is still tracked in blocksize
units. Therefore, a realtime file can be split up into written and
unwritten extents that are not necessarily aligned to the realtime
extent size. __xfs_bunmapi() has some logic to handle these various
corner cases. Consider how it handles the following case:

1. The last extent is unwritten.
2. The last extent is smaller than the realtime extent size.
3. startblock of the last extent is not aligned to the realtime extent
   size, but startblock + blockcount is.

In this case, __xfs_bunmapi() calls xfs_bmap_add_extent_unwritten_real()
to set the second-to-last extent to unwritten. This should merge the
last and second-to-last extents, so __xfs_bunmapi() moves on to the
second-to-last extent.

However, if the size of the last and second-to-last extents combined is
greater than MAXEXTLEN, xfs_bmap_add_extent_unwritten_real() does not
merge the two extents. When that happens, __xfs_bunmapi() skips past the
last extent without unmapping it, thus leaking the space.

Fix it by only unwriting the minimum amount needed to align the last
extent to the realtime extent size, which is guaranteed to merge with
the last extent.

Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

Comments

Darrick J. Wong Dec. 3, 2019, 1:56 a.m. UTC | #1
On Tue, Nov 26, 2019 at 12:13:28PM -0800, Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> Realtime files in XFS allocate extents in rextsize units. However, the
> written/unwritten state of those extents is still tracked in blocksize
> units. Therefore, a realtime file can be split up into written and
> unwritten extents that are not necessarily aligned to the realtime
> extent size. __xfs_bunmapi() has some logic to handle these various
> corner cases. Consider how it handles the following case:
> 
> 1. The last extent is unwritten.
> 2. The last extent is smaller than the realtime extent size.
> 3. startblock of the last extent is not aligned to the realtime extent
>    size, but startblock + blockcount is.
> 
> In this case, __xfs_bunmapi() calls xfs_bmap_add_extent_unwritten_real()
> to set the second-to-last extent to unwritten. This should merge the
> last and second-to-last extents, so __xfs_bunmapi() moves on to the
> second-to-last extent.
> 
> However, if the size of the last and second-to-last extents combined is
> greater than MAXEXTLEN, xfs_bmap_add_extent_unwritten_real() does not
> merge the two extents. When that happens, __xfs_bunmapi() skips past the
> last extent without unmapping it, thus leaking the space.
> 
> Fix it by only unwriting the minimum amount needed to align the last
> extent to the realtime extent size, which is guaranteed to merge with
> the last extent.
> 
> Signed-off-by: Omar Sandoval <osandov@fb.com>
> ---
>  fs/xfs/libxfs/xfs_bmap.c | 25 ++++++++++++++-----------
>  1 file changed, 14 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index 02469d59c787..6f8791a1e460 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -5376,16 +5376,17 @@ __xfs_bunmapi(
>  		}
>  		div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
>  		if (mod) {
> +			xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
> +
>  			/*
>  			 * Realtime extent is lined up at the end but not
>  			 * at the front.  We'll get rid of full extents if
>  			 * we can.
>  			 */
> -			mod = mp->m_sb.sb_rextsize - mod;
> -			if (del.br_blockcount > mod) {
> -				del.br_blockcount -= mod;
> -				del.br_startoff += mod;
> -				del.br_startblock += mod;
> +			if (del.br_blockcount > off) {
> +				del.br_blockcount -= off;
> +				del.br_startoff += off;
> +				del.br_startblock += off;

Ok, so we make this change so that we no longer change @mod once it's
set by the div64 operation...

>  			} else if (del.br_startoff == start &&
>  				   (del.br_state == XFS_EXT_UNWRITTEN ||
>  				    tp->t_blk_res == 0)) {
> @@ -5403,6 +5404,7 @@ __xfs_bunmapi(
>  				continue;
>  			} else if (del.br_state == XFS_EXT_UNWRITTEN) {
>  				struct xfs_bmbt_irec	prev;
> +				xfs_fileoff_t		unwrite_start;
>  
>  				/*
>  				 * This one is already unwritten.
> @@ -5416,12 +5418,13 @@ __xfs_bunmapi(
>  				ASSERT(!isnullstartblock(prev.br_startblock));
>  				ASSERT(del.br_startblock ==
>  				       prev.br_startblock + prev.br_blockcount);
> -				if (prev.br_startoff < start) {
> -					mod = start - prev.br_startoff;
> -					prev.br_blockcount -= mod;
> -					prev.br_startblock += mod;
> -					prev.br_startoff = start;
> -				}

...and here, we have a @del extent that is unwritten and a @prev extent
that is written.  We aim to trick xfs_bmap_add_extent_unwritten_real
into extending @del towards startoff==0 and returning with @icur
pointing at @del (not @prev) so that the next time we go around the loop
we see an rtextsize-aligned @del and simply unmap it...

> +				unwrite_start = max3(start,
> +						     del.br_startoff - mod,
> +						     prev.br_startoff);

...however, if @prev is too long to convert+combine with @del, the
conversion routine converts @prev to unwritten and returns with @icur
pointing to @prev, not @del.  That's how we leak @del.

This patch fixes that by capping the conversion to the start of the
rtext alignment, which means that we can always merge with @del and
always return with @icur pointing at @del.  Ok, that's exactly what the
commit message says.

It was /really/ helpful to be able to use the test case to walk through
exactly what this patch is trying to fix.

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> +				mod = unwrite_start - prev.br_startoff;
> +				prev.br_startoff = unwrite_start;
> +				prev.br_startblock += mod;
> +				prev.br_blockcount -= mod;
>  				prev.br_state = XFS_EXT_UNWRITTEN;
>  				error = xfs_bmap_add_extent_unwritten_real(tp,
>  						ip, whichfork, &icur, &cur,
> -- 
> 2.24.0
>
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 02469d59c787..6f8791a1e460 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5376,16 +5376,17 @@  __xfs_bunmapi(
 		}
 		div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
 		if (mod) {
+			xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
+
 			/*
 			 * Realtime extent is lined up at the end but not
 			 * at the front.  We'll get rid of full extents if
 			 * we can.
 			 */
-			mod = mp->m_sb.sb_rextsize - mod;
-			if (del.br_blockcount > mod) {
-				del.br_blockcount -= mod;
-				del.br_startoff += mod;
-				del.br_startblock += mod;
+			if (del.br_blockcount > off) {
+				del.br_blockcount -= off;
+				del.br_startoff += off;
+				del.br_startblock += off;
 			} else if (del.br_startoff == start &&
 				   (del.br_state == XFS_EXT_UNWRITTEN ||
 				    tp->t_blk_res == 0)) {
@@ -5403,6 +5404,7 @@  __xfs_bunmapi(
 				continue;
 			} else if (del.br_state == XFS_EXT_UNWRITTEN) {
 				struct xfs_bmbt_irec	prev;
+				xfs_fileoff_t		unwrite_start;
 
 				/*
 				 * This one is already unwritten.
@@ -5416,12 +5418,13 @@  __xfs_bunmapi(
 				ASSERT(!isnullstartblock(prev.br_startblock));
 				ASSERT(del.br_startblock ==
 				       prev.br_startblock + prev.br_blockcount);
-				if (prev.br_startoff < start) {
-					mod = start - prev.br_startoff;
-					prev.br_blockcount -= mod;
-					prev.br_startblock += mod;
-					prev.br_startoff = start;
-				}
+				unwrite_start = max3(start,
+						     del.br_startoff - mod,
+						     prev.br_startoff);
+				mod = unwrite_start - prev.br_startoff;
+				prev.br_startoff = unwrite_start;
+				prev.br_startblock += mod;
+				prev.br_blockcount -= mod;
 				prev.br_state = XFS_EXT_UNWRITTEN;
 				error = xfs_bmap_add_extent_unwritten_real(tp,
 						ip, whichfork, &icur, &cur,