diff mbox series

[2/6] xfs: invert the realtime summary cache

Message ID e3ae5bfc7cd4b640e83a25f001169d4ae50d797a.1687296675.git.osandov@osandov.com (mailing list archive)
State Superseded, archived
Headers show
Series xfs: CPU usage optimizations for realtime allocator | expand

Commit Message

Omar Sandoval June 20, 2023, 9:32 p.m. UTC
From: Omar Sandoval <osandov@fb.com>

In commit 355e3532132b ("xfs: cache minimum realtime summary level"), I
added a cache of the minimum level of the realtime summary that has any
free extents. However, it turns out that the _maximum_ level is more
useful for upcoming optimizations, and basically equivalent for the
existing usage. So, let's change the meaning of the cache to be the
maximum level + 1, or 0 if there are no free extents.

Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 fs/xfs/libxfs/xfs_rtbitmap.c |  6 +++---
 fs/xfs/xfs_mount.h           |  6 +++---
 fs/xfs/xfs_rtalloc.c         | 31 +++++++++++++++++++------------
 3 files changed, 25 insertions(+), 18 deletions(-)

Comments

Darrick J. Wong July 12, 2023, 10:40 p.m. UTC | #1
On Tue, Jun 20, 2023 at 02:32:12PM -0700, Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> In commit 355e3532132b ("xfs: cache minimum realtime summary level"), I
> added a cache of the minimum level of the realtime summary that has any
> free extents. However, it turns out that the _maximum_ level is more
> useful for upcoming optimizations, and basically equivalent for the
> existing usage. So, let's change the meaning of the cache to be the
> maximum level + 1, or 0 if there are no free extents.

Hmm.  If I'm reading xfs_rtmodify_summary_int right, m_rsum_cache[b] now
tells us the maximum log2(length) of the free extents starting in
rtbitmap block b?

IOWs, let's say the cache contents are:

{2, 3, 2, 15, 8}

Someone asks for a 400rtx (realtime extent) allocation, so we want to
find a free space of at least magnitude floor(log2(400)) == 8.

The cache tells us that there aren't any free extents longer than 2^1
blocks in rtbitmap blocks 0 and 2; longer than 2^2 blocks in rtbmp block
1; longer than 2^7 blocks in rtbmp block 4; nor longer than 2^14 blocks
in rtbmp block 3?

From the cache contents, we should therefore examine rtbitmap block 3.

If the cache contents were instead:

{2, 3, 2, 8, 8}

Then we instead might scan rtbitmap blocks 3 and 4 for the longest
allocation that we can get?  Looking back at the original commit, that
seems to make more sense to me...

> Signed-off-by: Omar Sandoval <osandov@fb.com>
> ---
>  fs/xfs/libxfs/xfs_rtbitmap.c |  6 +++---
>  fs/xfs/xfs_mount.h           |  6 +++---
>  fs/xfs/xfs_rtalloc.c         | 31 +++++++++++++++++++------------
>  3 files changed, 25 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
> index 1a832c9a412f..d9493f64adfc 100644
> --- a/fs/xfs/libxfs/xfs_rtbitmap.c
> +++ b/fs/xfs/libxfs/xfs_rtbitmap.c
> @@ -503,10 +503,10 @@ xfs_rtmodify_summary_int(
>  
>  		*sp += delta;
>  		if (mp->m_rsum_cache) {
> -			if (*sp == 0 && log == mp->m_rsum_cache[bbno])
> -				mp->m_rsum_cache[bbno]++;
> -			if (*sp != 0 && log < mp->m_rsum_cache[bbno])
> +			if (*sp == 0 && log + 1 == mp->m_rsum_cache[bbno])
>  				mp->m_rsum_cache[bbno] = log;
> +			if (*sp != 0 && log >= mp->m_rsum_cache[bbno])
> +				mp->m_rsum_cache[bbno] = log + 1;
>  		}
>  		xfs_trans_log_buf(tp, bp, first, first + sizeof(*sp) - 1);
>  	}
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index 6c09f89534d3..964541c36730 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -103,9 +103,9 @@ typedef struct xfs_mount {
>  
>  	/*
>  	 * Optional cache of rt summary level per bitmap block with the
> -	 * invariant that m_rsum_cache[bbno] <= the minimum i for which
> -	 * rsum[i][bbno] != 0. Reads and writes are serialized by the rsumip
> -	 * inode lock.
> +	 * invariant that m_rsum_cache[bbno] > the maximum i for which
> +	 * rsum[i][bbno] != 0, or 0 if rsum[i][bbno] == 0 for all i.
> +	 * Reads and writes are serialized by the rsumip inode lock.
>  	 */
>  	uint8_t			*m_rsum_cache;
>  	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
> diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
> index 61ef13286654..d3c76532d20e 100644
> --- a/fs/xfs/xfs_rtalloc.c
> +++ b/fs/xfs/xfs_rtalloc.c
> @@ -56,14 +56,19 @@ xfs_rtany_summary(
>  	int		log;		/* loop counter, log2 of ext. size */
>  	xfs_suminfo_t	sum;		/* summary data */
>  
> -	/* There are no extents at levels < m_rsum_cache[bbno]. */
> -	if (mp->m_rsum_cache && low < mp->m_rsum_cache[bbno])
> -		low = mp->m_rsum_cache[bbno];
> +	/* There are no extents at levels >= m_rsum_cache[bbno]. */
> +	if (mp->m_rsum_cache) {
> +		high = min(high, mp->m_rsum_cache[bbno] - 1);
> +		if (low > high) {
> +			*stat = 0;
> +			return 0;
> +		}
> +	}
>  
>  	/*
>  	 * Loop over logs of extent sizes.
>  	 */
> -	for (log = low; log <= high; log++) {
> +	for (log = high; log >= low; log--) {
>  		/*
>  		 * Get one summary datum.
>  		 */
> @@ -84,9 +89,9 @@ xfs_rtany_summary(
>  	 */
>  	*stat = 0;
>  out:
> -	/* There were no extents at levels < log. */
> -	if (mp->m_rsum_cache && log > mp->m_rsum_cache[bbno])
> -		mp->m_rsum_cache[bbno] = log;
> +	/* There were no extents at levels > log. */
> +	if (mp->m_rsum_cache && log + 1 < mp->m_rsum_cache[bbno])
> +		mp->m_rsum_cache[bbno] = log + 1;
>  	return 0;
>  }
>  
> @@ -878,12 +883,14 @@ xfs_alloc_rsum_cache(
>  	xfs_extlen_t	rbmblocks)	/* number of rt bitmap blocks */
>  {
>  	/*
> -	 * The rsum cache is initialized to all zeroes, which is trivially a
> -	 * lower bound on the minimum level with any free extents. We can
> -	 * continue without the cache if it couldn't be allocated.
> +	 * The rsum cache is initialized to the maximum value, which is
> +	 * trivially an upper bound on the maximum level with any free extents.
> +	 * We can continue without the cache if it couldn't be allocated.
>  	 */
> -	mp->m_rsum_cache = kvzalloc(rbmblocks, GFP_KERNEL);
> -	if (!mp->m_rsum_cache)
> +	mp->m_rsum_cache = kvmalloc(rbmblocks, GFP_KERNEL);
> +	if (mp->m_rsum_cache)
> +		memset(mp->m_rsum_cache, -1, rbmblocks);
> +	else
>  		xfs_warn(mp, "could not allocate realtime summary cache");
>  }
>  
> -- 
> 2.41.0
>
Omar Sandoval July 17, 2023, 7:54 p.m. UTC | #2
On Wed, Jul 12, 2023 at 03:40:01PM -0700, Darrick J. Wong wrote:
> On Tue, Jun 20, 2023 at 02:32:12PM -0700, Omar Sandoval wrote:
> > From: Omar Sandoval <osandov@fb.com>
> > 
> > In commit 355e3532132b ("xfs: cache minimum realtime summary level"), I
> > added a cache of the minimum level of the realtime summary that has any
> > free extents. However, it turns out that the _maximum_ level is more
> > useful for upcoming optimizations, and basically equivalent for the
> > existing usage. So, let's change the meaning of the cache to be the
> > maximum level + 1, or 0 if there are no free extents.
> 
> Hmm.  If I'm reading xfs_rtmodify_summary_int right, m_rsum_cache[b] now
> tells us the maximum log2(length) of the free extents starting in
> rtbitmap block b?
> 
> IOWs, let's say the cache contents are:
> 
> {2, 3, 2, 15, 8}
> 
> Someone asks for a 400rtx (realtime extent) allocation, so we want to
> find a free space of at least magnitude floor(log2(400)) == 8.
> 
> The cache tells us that there aren't any free extents longer than 2^1
> blocks in rtbitmap blocks 0 and 2; longer than 2^2 blocks in rtbmp block
> 1; longer than 2^7 blocks in rtbmp block 4; nor longer than 2^14 blocks
> in rtbmp block 3?

There's a potential for an off-by-one bug here, so just to make sure
we're saying the same thing: the realtime summary for level n contains
the number of free extents starting in a bitmap block such that
floor(log2(size_in_realtime_extents)) == n. The maximum size of a free
extent in level n is therefore 2^(n + 1) - 1 realtime extents.

So in your example, the cache is telling us that realtime bitmap blocks
0 and 2 don't have anything free in levels 2 or above, and therefore
don't have any free extents longer than _or equal to_ 2^2.

I'll try to reword the commit message and comments to make this
unambiguous.
Darrick J. Wong Aug. 1, 2023, 11:17 p.m. UTC | #3
On Mon, Jul 17, 2023 at 12:54:24PM -0700, Omar Sandoval wrote:
> On Wed, Jul 12, 2023 at 03:40:01PM -0700, Darrick J. Wong wrote:
> > On Tue, Jun 20, 2023 at 02:32:12PM -0700, Omar Sandoval wrote:
> > > From: Omar Sandoval <osandov@fb.com>
> > > 
> > > In commit 355e3532132b ("xfs: cache minimum realtime summary level"), I
> > > added a cache of the minimum level of the realtime summary that has any
> > > free extents. However, it turns out that the _maximum_ level is more
> > > useful for upcoming optimizations, and basically equivalent for the
> > > existing usage. So, let's change the meaning of the cache to be the
> > > maximum level + 1, or 0 if there are no free extents.
> > 
> > Hmm.  If I'm reading xfs_rtmodify_summary_int right, m_rsum_cache[b] now
> > tells us the maximum log2(length) of the free extents starting in
> > rtbitmap block b?
> > 
> > IOWs, let's say the cache contents are:
> > 
> > {2, 3, 2, 15, 8}
> > 
> > Someone asks for a 400rtx (realtime extent) allocation, so we want to
> > find a free space of at least magnitude floor(log2(400)) == 8.
> > 
> > The cache tells us that there aren't any free extents longer than 2^1
> > blocks in rtbitmap blocks 0 and 2; longer than 2^2 blocks in rtbmp block
> > 1; longer than 2^7 blocks in rtbmp block 4; nor longer than 2^14 blocks
> > in rtbmp block 3?
> 
> There's a potential for an off-by-one bug here, so just to make sure
> we're saying the same thing: the realtime summary for level n contains
> the number of free extents starting in a bitmap block such that
> floor(log2(size_in_realtime_extents)) == n. The maximum size of a free
> extent in level n is therefore 2^(n + 1) - 1 realtime extents.
> 
> So in your example, the cache is telling us that realtime bitmap blocks
> 0 and 2 don't have anything free in levels 2 or above, and therefore
> don't have any free extents longer than _or equal to_ 2^2.

D'oh.  I forgot that subtlety that the maximum size of a free
extent in level n is therefore 2^(n + 1) - 1 realtime extents.

> I'll try to reword the commit message and comments to make this
> unambiguous.

Ok, thanks.  A couple of quick examples (feel free to use mine) would be
helpful for descrambling my brain. :)

--D
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 1a832c9a412f..d9493f64adfc 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -503,10 +503,10 @@  xfs_rtmodify_summary_int(
 
 		*sp += delta;
 		if (mp->m_rsum_cache) {
-			if (*sp == 0 && log == mp->m_rsum_cache[bbno])
-				mp->m_rsum_cache[bbno]++;
-			if (*sp != 0 && log < mp->m_rsum_cache[bbno])
+			if (*sp == 0 && log + 1 == mp->m_rsum_cache[bbno])
 				mp->m_rsum_cache[bbno] = log;
+			if (*sp != 0 && log >= mp->m_rsum_cache[bbno])
+				mp->m_rsum_cache[bbno] = log + 1;
 		}
 		xfs_trans_log_buf(tp, bp, first, first + sizeof(*sp) - 1);
 	}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 6c09f89534d3..964541c36730 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -103,9 +103,9 @@  typedef struct xfs_mount {
 
 	/*
 	 * Optional cache of rt summary level per bitmap block with the
-	 * invariant that m_rsum_cache[bbno] <= the minimum i for which
-	 * rsum[i][bbno] != 0. Reads and writes are serialized by the rsumip
-	 * inode lock.
+	 * invariant that m_rsum_cache[bbno] > the maximum i for which
+	 * rsum[i][bbno] != 0, or 0 if rsum[i][bbno] == 0 for all i.
+	 * Reads and writes are serialized by the rsumip inode lock.
 	 */
 	uint8_t			*m_rsum_cache;
 	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 61ef13286654..d3c76532d20e 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -56,14 +56,19 @@  xfs_rtany_summary(
 	int		log;		/* loop counter, log2 of ext. size */
 	xfs_suminfo_t	sum;		/* summary data */
 
-	/* There are no extents at levels < m_rsum_cache[bbno]. */
-	if (mp->m_rsum_cache && low < mp->m_rsum_cache[bbno])
-		low = mp->m_rsum_cache[bbno];
+	/* There are no extents at levels >= m_rsum_cache[bbno]. */
+	if (mp->m_rsum_cache) {
+		high = min(high, mp->m_rsum_cache[bbno] - 1);
+		if (low > high) {
+			*stat = 0;
+			return 0;
+		}
+	}
 
 	/*
 	 * Loop over logs of extent sizes.
 	 */
-	for (log = low; log <= high; log++) {
+	for (log = high; log >= low; log--) {
 		/*
 		 * Get one summary datum.
 		 */
@@ -84,9 +89,9 @@  xfs_rtany_summary(
 	 */
 	*stat = 0;
 out:
-	/* There were no extents at levels < log. */
-	if (mp->m_rsum_cache && log > mp->m_rsum_cache[bbno])
-		mp->m_rsum_cache[bbno] = log;
+	/* There were no extents at levels > log. */
+	if (mp->m_rsum_cache && log + 1 < mp->m_rsum_cache[bbno])
+		mp->m_rsum_cache[bbno] = log + 1;
 	return 0;
 }
 
@@ -878,12 +883,14 @@  xfs_alloc_rsum_cache(
 	xfs_extlen_t	rbmblocks)	/* number of rt bitmap blocks */
 {
 	/*
-	 * The rsum cache is initialized to all zeroes, which is trivially a
-	 * lower bound on the minimum level with any free extents. We can
-	 * continue without the cache if it couldn't be allocated.
+	 * The rsum cache is initialized to the maximum value, which is
+	 * trivially an upper bound on the maximum level with any free extents.
+	 * We can continue without the cache if it couldn't be allocated.
 	 */
-	mp->m_rsum_cache = kvzalloc(rbmblocks, GFP_KERNEL);
-	if (!mp->m_rsum_cache)
+	mp->m_rsum_cache = kvmalloc(rbmblocks, GFP_KERNEL);
+	if (mp->m_rsum_cache)
+		memset(mp->m_rsum_cache, -1, rbmblocks);
+	else
 		xfs_warn(mp, "could not allocate realtime summary cache");
 }