diff mbox series

[v3,03/14] xfs: simplify extent allocation alignment

Message ID 20240801163057.3981192-4-john.g.garry@oracle.com (mailing list archive)
State Superseded, archived
Headers show
Series forcealign for xfs | expand

Commit Message

John Garry Aug. 1, 2024, 4:30 p.m. UTC
From: Dave Chinner <dchinner@redhat.com>

We currently align extent allocation to stripe unit or stripe width.
That is specified by an external parameter to the allocation code,
which then manipulates the xfs_alloc_args alignment configuration in
interesting ways.

The args->alignment field specifies extent start alignment, but
because we may be attempting non-aligned allocation first there are
also slop variables that allow for those allocation attempts to
account for aligned allocation if they fail.

This gets much more complex as we introduce forced allocation
alignment, where extent size hints are used to generate the extent
start alignment. extent size hints currently only affect extent
lengths (via args->prod and args->mod) and so with this change we
will have two different start alignment conditions.

Avoid this complexity by always using args->alignment to indicate
extent start alignment, and always using args->prod/mod to indicate
extent length adjustment.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
[jpg: fixup alignslop references in xfs_trace.h and xfs_ialloc.c]
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c  |  4 +-
 fs/xfs/libxfs/xfs_alloc.h  |  2 +-
 fs/xfs/libxfs/xfs_bmap.c   | 95 ++++++++++++++++----------------------
 fs/xfs/libxfs/xfs_ialloc.c | 10 ++--
 fs/xfs/xfs_trace.h         |  8 ++--
 5 files changed, 53 insertions(+), 66 deletions(-)

Comments

Darrick J. Wong Aug. 6, 2024, 6:56 p.m. UTC | #1
On Thu, Aug 01, 2024 at 04:30:46PM +0000, John Garry wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> We currently align extent allocation to stripe unit or stripe width.
> That is specified by an external parameter to the allocation code,
> which then manipulates the xfs_alloc_args alignment configuration in
> interesting ways.
> 
> The args->alignment field specifies extent start alignment, but
> because we may be attempting non-aligned allocation first there are
> also slop variables that allow for those allocation attempts to
> account for aligned allocation if they fail.
> 
> This gets much more complex as we introduce forced allocation
> alignment, where extent size hints are used to generate the extent
> start alignment. extent size hints currently only affect extent
> lengths (via args->prod and args->mod) and so with this change we
> will have two different start alignment conditions.
> 
> Avoid this complexity by always using args->alignment to indicate
> extent start alignment, and always using args->prod/mod to indicate
> extent length adjustment.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> [jpg: fixup alignslop references in xfs_trace.h and xfs_ialloc.c]
> Signed-off-by: John Garry <john.g.garry@oracle.com>

Going back to the 6/21 posting[1], what were the answers to the
questions I posted?  Did I correctly figure out what alignslop refers
to?  AFAICT this is the same patch though at least it has the nits
fixed.

[1] https://lore.kernel.org/linux-xfs/20240621202914.GT3058325@frogsfrogsfrogs/

--D

> ---
>  fs/xfs/libxfs/xfs_alloc.c  |  4 +-
>  fs/xfs/libxfs/xfs_alloc.h  |  2 +-
>  fs/xfs/libxfs/xfs_bmap.c   | 95 ++++++++++++++++----------------------
>  fs/xfs/libxfs/xfs_ialloc.c | 10 ++--
>  fs/xfs/xfs_trace.h         |  8 ++--
>  5 files changed, 53 insertions(+), 66 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
> index bf08b9e9d9ac..a9ab7d71c558 100644
> --- a/fs/xfs/libxfs/xfs_alloc.c
> +++ b/fs/xfs/libxfs/xfs_alloc.c
> @@ -2506,7 +2506,7 @@ xfs_alloc_space_available(
>  	reservation = xfs_ag_resv_needed(pag, args->resv);
>  
>  	/* do we have enough contiguous free space for the allocation? */
> -	alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
> +	alloc_len = args->minlen + (args->alignment - 1) + args->alignslop;
>  	longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
>  	if (longest < alloc_len)
>  		return false;
> @@ -2535,7 +2535,7 @@ xfs_alloc_space_available(
>  	 * allocation as we know that will definitely succeed and match the
>  	 * callers alignment constraints.
>  	 */
> -	alloc_len = args->maxlen + (args->alignment - 1) + args->minalignslop;
> +	alloc_len = args->maxlen + (args->alignment - 1) + args->alignslop;
>  	if (longest < alloc_len) {
>  		args->maxlen = args->minlen;
>  		ASSERT(args->maxlen > 0);
> diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
> index fae170825be0..473822a5d4e9 100644
> --- a/fs/xfs/libxfs/xfs_alloc.h
> +++ b/fs/xfs/libxfs/xfs_alloc.h
> @@ -46,7 +46,7 @@ typedef struct xfs_alloc_arg {
>  	xfs_extlen_t	minleft;	/* min blocks must be left after us */
>  	xfs_extlen_t	total;		/* total blocks needed in xaction */
>  	xfs_extlen_t	alignment;	/* align answer to multiple of this */
> -	xfs_extlen_t	minalignslop;	/* slop for minlen+alignment calcs */
> +	xfs_extlen_t	alignslop;	/* slop for alignment calcs */
>  	xfs_agblock_t	min_agbno;	/* set an agbno range for NEAR allocs */
>  	xfs_agblock_t	max_agbno;	/* ... */
>  	xfs_extlen_t	len;		/* output: actual size of extent */
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index 7df74c35d9f9..25a87e1154bb 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -3286,6 +3286,10 @@ xfs_bmap_select_minlen(
>  	xfs_extlen_t		blen)
>  {
>  
> +	/* Adjust best length for extent start alignment. */
> +	if (blen > args->alignment)
> +		blen -= args->alignment;
> +
>  	/*
>  	 * Since we used XFS_ALLOC_FLAG_TRYLOCK in _longest_free_extent(), it is
>  	 * possible that there is enough contiguous free space for this request.
> @@ -3394,35 +3398,43 @@ xfs_bmap_alloc_account(
>  	xfs_trans_mod_dquot_byino(ap->tp, ap->ip, fld, ap->length);
>  }
>  
> -static int
> +/*
> + * Calculate the extent start alignment and the extent length adjustments that
> + * constrain this allocation.
> + *
> + * Extent start alignment is currently determined by stripe configuration and is
> + * carried in args->alignment, whilst extent length adjustment is determined by
> + * extent size hints and is carried by args->prod and args->mod.
> + *
> + * Low level allocation code is free to either ignore or override these values
> + * as required.
> + */
> +static void
>  xfs_bmap_compute_alignments(
>  	struct xfs_bmalloca	*ap,
>  	struct xfs_alloc_arg	*args)
>  {
>  	struct xfs_mount	*mp = args->mp;
>  	xfs_extlen_t		align = 0; /* minimum allocation alignment */
> -	int			stripe_align = 0;
>  
>  	/* stripe alignment for allocation is determined by mount parameters */
>  	if (mp->m_swidth && xfs_has_swalloc(mp))
> -		stripe_align = mp->m_swidth;
> +		args->alignment = mp->m_swidth;
>  	else if (mp->m_dalign)
> -		stripe_align = mp->m_dalign;
> +		args->alignment = mp->m_dalign;
>  
>  	if (ap->flags & XFS_BMAPI_COWFORK)
>  		align = xfs_get_cowextsz_hint(ap->ip);
>  	else if (ap->datatype & XFS_ALLOC_USERDATA)
>  		align = xfs_get_extsz_hint(ap->ip);
> +
>  	if (align) {
>  		if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
>  					ap->eof, 0, ap->conv, &ap->offset,
>  					&ap->length))
>  			ASSERT(0);
>  		ASSERT(ap->length);
> -	}
>  
> -	/* apply extent size hints if obtained earlier */
> -	if (align) {
>  		args->prod = align;
>  		div_u64_rem(ap->offset, args->prod, &args->mod);
>  		if (args->mod)
> @@ -3437,7 +3449,6 @@ xfs_bmap_compute_alignments(
>  			args->mod = args->prod - args->mod;
>  	}
>  
> -	return stripe_align;
>  }
>  
>  static void
> @@ -3509,7 +3520,7 @@ xfs_bmap_exact_minlen_extent_alloc(
>  	args.total = ap->total;
>  
>  	args.alignment = 1;
> -	args.minalignslop = 0;
> +	args.alignslop = 0;
>  
>  	args.minleft = ap->minleft;
>  	args.wasdel = ap->wasdel;
> @@ -3549,7 +3560,6 @@ xfs_bmap_btalloc_at_eof(
>  	struct xfs_bmalloca	*ap,
>  	struct xfs_alloc_arg	*args,
>  	xfs_extlen_t		blen,
> -	int			stripe_align,
>  	bool			ag_only)
>  {
>  	struct xfs_mount	*mp = args->mp;
> @@ -3563,23 +3573,15 @@ xfs_bmap_btalloc_at_eof(
>  	 * allocation.
>  	 */
>  	if (ap->offset) {
> -		xfs_extlen_t	nextminlen = 0;
> +		xfs_extlen_t	alignment = args->alignment;
>  
>  		/*
> -		 * Compute the minlen+alignment for the next case.  Set slop so
> -		 * that the value of minlen+alignment+slop doesn't go up between
> -		 * the calls.
> +		 * Compute the alignment slop for the fallback path so we ensure
> +		 * we account for the potential alignment space required by the
> +		 * fallback paths before we modify the AGF and AGFL here.
>  		 */
>  		args->alignment = 1;
> -		if (blen > stripe_align && blen <= args->maxlen)
> -			nextminlen = blen - stripe_align;
> -		else
> -			nextminlen = args->minlen;
> -		if (nextminlen + stripe_align > args->minlen + 1)
> -			args->minalignslop = nextminlen + stripe_align -
> -					args->minlen - 1;
> -		else
> -			args->minalignslop = 0;
> +		args->alignslop = alignment - args->alignment;
>  
>  		if (!caller_pag)
>  			args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
> @@ -3597,19 +3599,8 @@ xfs_bmap_btalloc_at_eof(
>  		 * Exact allocation failed. Reset to try an aligned allocation
>  		 * according to the original allocation specification.
>  		 */
> -		args->alignment = stripe_align;
> -		args->minlen = nextminlen;
> -		args->minalignslop = 0;
> -	} else {
> -		/*
> -		 * Adjust minlen to try and preserve alignment if we
> -		 * can't guarantee an aligned maxlen extent.
> -		 */
> -		args->alignment = stripe_align;
> -		if (blen > args->alignment &&
> -		    blen <= args->maxlen + args->alignment)
> -			args->minlen = blen - args->alignment;
> -		args->minalignslop = 0;
> +		args->alignment = alignment;
> +		args->alignslop = 0;
>  	}
>  
>  	if (ag_only) {
> @@ -3627,9 +3618,8 @@ xfs_bmap_btalloc_at_eof(
>  		return 0;
>  
>  	/*
> -	 * Allocation failed, so turn return the allocation args to their
> -	 * original non-aligned state so the caller can proceed on allocation
> -	 * failure as if this function was never called.
> +	 * Aligned allocation failed, so all fallback paths from here drop the
> +	 * start alignment requirement as we know it will not succeed.
>  	 */
>  	args->alignment = 1;
>  	return 0;
> @@ -3637,7 +3627,9 @@ xfs_bmap_btalloc_at_eof(
>  
>  /*
>   * We have failed multiple allocation attempts so now are in a low space
> - * allocation situation. Try a locality first full filesystem minimum length
> + * allocation situation. We give up on any attempt at aligned allocation here.
> + *
> + * Try a locality first full filesystem minimum length
>   * allocation whilst still maintaining necessary total block reservation
>   * requirements.
>   *
> @@ -3654,6 +3646,7 @@ xfs_bmap_btalloc_low_space(
>  {
>  	int			error;
>  
> +	args->alignment = 1;
>  	if (args->minlen > ap->minlen) {
>  		args->minlen = ap->minlen;
>  		error = xfs_alloc_vextent_start_ag(args, ap->blkno);
> @@ -3673,13 +3666,11 @@ xfs_bmap_btalloc_low_space(
>  static int
>  xfs_bmap_btalloc_filestreams(
>  	struct xfs_bmalloca	*ap,
> -	struct xfs_alloc_arg	*args,
> -	int			stripe_align)
> +	struct xfs_alloc_arg	*args)
>  {
>  	xfs_extlen_t		blen = 0;
>  	int			error = 0;
>  
> -
>  	error = xfs_filestream_select_ag(ap, args, &blen);
>  	if (error)
>  		return error;
> @@ -3698,8 +3689,7 @@ xfs_bmap_btalloc_filestreams(
>  
>  	args->minlen = xfs_bmap_select_minlen(ap, args, blen);
>  	if (ap->aeof)
> -		error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
> -				true);
> +		error = xfs_bmap_btalloc_at_eof(ap, args, blen, true);
>  
>  	if (!error && args->fsbno == NULLFSBLOCK)
>  		error = xfs_alloc_vextent_near_bno(args, ap->blkno);
> @@ -3723,8 +3713,7 @@ xfs_bmap_btalloc_filestreams(
>  static int
>  xfs_bmap_btalloc_best_length(
>  	struct xfs_bmalloca	*ap,
> -	struct xfs_alloc_arg	*args,
> -	int			stripe_align)
> +	struct xfs_alloc_arg	*args)
>  {
>  	xfs_extlen_t		blen = 0;
>  	int			error;
> @@ -3748,8 +3737,7 @@ xfs_bmap_btalloc_best_length(
>  	 * trying.
>  	 */
>  	if (ap->aeof && !(ap->tp->t_flags & XFS_TRANS_LOWMODE)) {
> -		error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
> -				false);
> +		error = xfs_bmap_btalloc_at_eof(ap, args, blen, false);
>  		if (error || args->fsbno != NULLFSBLOCK)
>  			return error;
>  	}
> @@ -3776,27 +3764,26 @@ xfs_bmap_btalloc(
>  		.resv		= XFS_AG_RESV_NONE,
>  		.datatype	= ap->datatype,
>  		.alignment	= 1,
> -		.minalignslop	= 0,
> +		.alignslop	= 0,
>  	};
>  	xfs_fileoff_t		orig_offset;
>  	xfs_extlen_t		orig_length;
>  	int			error;
> -	int			stripe_align;
>  
>  	ASSERT(ap->length);
>  	orig_offset = ap->offset;
>  	orig_length = ap->length;
>  
> -	stripe_align = xfs_bmap_compute_alignments(ap, &args);
> +	xfs_bmap_compute_alignments(ap, &args);
>  
>  	/* Trim the allocation back to the maximum an AG can fit. */
>  	args.maxlen = min(ap->length, mp->m_ag_max_usable);
>  
>  	if ((ap->datatype & XFS_ALLOC_USERDATA) &&
>  	    xfs_inode_is_filestream(ap->ip))
> -		error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align);
> +		error = xfs_bmap_btalloc_filestreams(ap, &args);
>  	else
> -		error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align);
> +		error = xfs_bmap_btalloc_best_length(ap, &args);
>  	if (error)
>  		return error;
>  
> diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
> index 0af5b7a33d05..2fa29d2f004e 100644
> --- a/fs/xfs/libxfs/xfs_ialloc.c
> +++ b/fs/xfs/libxfs/xfs_ialloc.c
> @@ -758,12 +758,12 @@ xfs_ialloc_ag_alloc(
>  		 *
>  		 * For an exact allocation, alignment must be 1,
>  		 * however we need to take cluster alignment into account when
> -		 * fixing up the freelist. Use the minalignslop field to
> -		 * indicate that extra blocks might be required for alignment,
> -		 * but not to use them in the actual exact allocation.
> +		 * fixing up the freelist. Use the alignslop field to indicate
> +		 * that extra blocks might be required for alignment, but not
> +		 * to use them in the actual exact allocation.
>  		 */
>  		args.alignment = 1;
> -		args.minalignslop = igeo->cluster_align - 1;
> +		args.alignslop = igeo->cluster_align - 1;
>  
>  		/* Allow space for the inode btree to split. */
>  		args.minleft = igeo->inobt_maxlevels;
> @@ -783,7 +783,7 @@ xfs_ialloc_ag_alloc(
>  		 * on, so reset minalignslop to ensure it is not included in
>  		 * subsequent requests.
>  		 */
> -		args.minalignslop = 0;
> +		args.alignslop = 0;
>  	}
>  
>  	if (unlikely(args.fsbno == NULLFSBLOCK)) {
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 5646d300b286..fb0c46d9a6d9 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -1811,7 +1811,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
>  		__field(xfs_extlen_t, minleft)
>  		__field(xfs_extlen_t, total)
>  		__field(xfs_extlen_t, alignment)
> -		__field(xfs_extlen_t, minalignslop)
> +		__field(xfs_extlen_t, alignslop)
>  		__field(xfs_extlen_t, len)
>  		__field(char, wasdel)
>  		__field(char, wasfromfl)
> @@ -1830,7 +1830,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
>  		__entry->minleft = args->minleft;
>  		__entry->total = args->total;
>  		__entry->alignment = args->alignment;
> -		__entry->minalignslop = args->minalignslop;
> +		__entry->alignslop = args->alignslop;
>  		__entry->len = args->len;
>  		__entry->wasdel = args->wasdel;
>  		__entry->wasfromfl = args->wasfromfl;
> @@ -1839,7 +1839,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
>  		__entry->highest_agno = args->tp->t_highest_agno;
>  	),
>  	TP_printk("dev %d:%d agno 0x%x agbno 0x%x minlen %u maxlen %u mod %u "
> -		  "prod %u minleft %u total %u alignment %u minalignslop %u "
> +		  "prod %u minleft %u total %u alignment %u alignslop %u "
>  		  "len %u wasdel %d wasfromfl %d resv %d "
>  		  "datatype 0x%x highest_agno 0x%x",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
> @@ -1852,7 +1852,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
>  		  __entry->minleft,
>  		  __entry->total,
>  		  __entry->alignment,
> -		  __entry->minalignslop,
> +		  __entry->alignslop,
>  		  __entry->len,
>  		  __entry->wasdel,
>  		  __entry->wasfromfl,
> -- 
> 2.31.1
> 
>
Dave Chinner Aug. 6, 2024, 11:52 p.m. UTC | #2
On Tue, Aug 06, 2024 at 11:56:51AM -0700, Darrick J. Wong wrote:
> On Thu, Aug 01, 2024 at 04:30:46PM +0000, John Garry wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > We currently align extent allocation to stripe unit or stripe width.
> > That is specified by an external parameter to the allocation code,
> > which then manipulates the xfs_alloc_args alignment configuration in
> > interesting ways.
> > 
> > The args->alignment field specifies extent start alignment, but
> > because we may be attempting non-aligned allocation first there are
> > also slop variables that allow for those allocation attempts to
> > account for aligned allocation if they fail.
> > 
> > This gets much more complex as we introduce forced allocation
> > alignment, where extent size hints are used to generate the extent
> > start alignment. extent size hints currently only affect extent
> > lengths (via args->prod and args->mod) and so with this change we
> > will have two different start alignment conditions.
> > 
> > Avoid this complexity by always using args->alignment to indicate
> > extent start alignment, and always using args->prod/mod to indicate
> > extent length adjustment.
> > 
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > [jpg: fixup alignslop references in xfs_trace.h and xfs_ialloc.c]
> > Signed-off-by: John Garry <john.g.garry@oracle.com>
> 
> Going back to the 6/21 posting[1], what were the answers to the
> questions I posted?  Did I correctly figure out what alignslop refers
> to?

Hard to say.

alignslop is basically an temporary accounting mechanism used to
prevent filesystem shutdowns when the AG is near ENOSPC and exact
BNO allocation is attempted and fails because there isn't an exact
free space available. This exact bno allocation attempt can dirty
the AGFL, and before we dirty the transaction *we must guarantee the
allocation will succeed*. If the allocation fails after we've
started modifying metadata (for whatever reason) we will cancel a
dirty transaction and shut down the filesystem.

Hence the first allocation done from the xfs_bmap_btalloc() context
needs to account for every block the specific allocation and all the
failure fallback attempts *may require* before it starts modifying
metadata. The contiguous exact bno allocation case isn't an aligned
allocation, but it will be followed by an aligned allocation attempt
if it fails and so it must take into account the space requirements
of aligned allocation even though it is not an aligned allocation
itself.

args->alignslop allows xfs_alloc_space_available() to take this
space requirement into account for any allocation that has lesser
alignment requirements than any subsequent allocation attempt that
may follow if this specific allocation attempt fails.

IOWs, args->alignslop is similar to args->minleft and args->total in
purpose, but it only affects the accounting for this specific
allocation attempt rather than defining the amount of space
that needs to remain available at the successful completion of this
allocation for future allocations within this transaction context.

-Dave.
Darrick J. Wong Aug. 7, 2024, 12:23 a.m. UTC | #3
On Wed, Aug 07, 2024 at 09:52:06AM +1000, Dave Chinner wrote:
> On Tue, Aug 06, 2024 at 11:56:51AM -0700, Darrick J. Wong wrote:
> > On Thu, Aug 01, 2024 at 04:30:46PM +0000, John Garry wrote:
> > > From: Dave Chinner <dchinner@redhat.com>
> > > 
> > > We currently align extent allocation to stripe unit or stripe width.
> > > That is specified by an external parameter to the allocation code,
> > > which then manipulates the xfs_alloc_args alignment configuration in
> > > interesting ways.
> > > 
> > > The args->alignment field specifies extent start alignment, but
> > > because we may be attempting non-aligned allocation first there are
> > > also slop variables that allow for those allocation attempts to
> > > account for aligned allocation if they fail.
> > > 
> > > This gets much more complex as we introduce forced allocation
> > > alignment, where extent size hints are used to generate the extent
> > > start alignment. extent size hints currently only affect extent
> > > lengths (via args->prod and args->mod) and so with this change we
> > > will have two different start alignment conditions.
> > > 
> > > Avoid this complexity by always using args->alignment to indicate
> > > extent start alignment, and always using args->prod/mod to indicate
> > > extent length adjustment.
> > > 
> > > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > > [jpg: fixup alignslop references in xfs_trace.h and xfs_ialloc.c]
> > > Signed-off-by: John Garry <john.g.garry@oracle.com>
> > 
> > Going back to the 6/21 posting[1], what were the answers to the
> > questions I posted?  Did I correctly figure out what alignslop refers
> > to?
> 
> Hard to say.
> 
> alignslop is basically an temporary accounting mechanism used to
> prevent filesystem shutdowns when the AG is near ENOSPC and exact
> BNO allocation is attempted and fails because there isn't an exact
> free space available. This exact bno allocation attempt can dirty
> the AGFL, and before we dirty the transaction *we must guarantee the
> allocation will succeed*. If the allocation fails after we've
> started modifying metadata (for whatever reason) we will cancel a
> dirty transaction and shut down the filesystem.
> 
> Hence the first allocation done from the xfs_bmap_btalloc() context
> needs to account for every block the specific allocation and all the
> failure fallback attempts *may require* before it starts modifying
> metadata. The contiguous exact bno allocation case isn't an aligned
> allocation, but it will be followed by an aligned allocation attempt
> if it fails and so it must take into account the space requirements
> of aligned allocation even though it is not an aligned allocation
> itself.
> 
> args->alignslop allows xfs_alloc_space_available() to take this
> space requirement into account for any allocation that has lesser
> alignment requirements than any subsequent allocation attempt that
> may follow if this specific allocation attempt fails.
> 
> IOWs, args->alignslop is similar to args->minleft and args->total in
> purpose, but it only affects the accounting for this specific
> allocation attempt rather than defining the amount of space
> that needs to remain available at the successful completion of this
> allocation for future allocations within this transaction context.

Oh, okay.  IOWs, "slop" is a means for alloc callers to communicate that
they need to perform an aligned allocation, but that right now they want
to try an exact allocation (with looser alignment) so that they might
get better locality.  However, they don't want the exact allocation scan
to commit to a certain AG unless the aligned allocation will be able to
find an aligned space *somewhere* in that AG if the exact scan fails.
For any other kind of allocation situation, slop should be zero.

If the above statement is correct, could we paste that into the
definition of struct xfs_alloc_arg so that I don't have to recollect all
this the next time I see something involving alignslop?  After which
I'm ok saying:

Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

> 
> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
>
Dave Chinner Aug. 7, 2024, 12:34 a.m. UTC | #4
On Tue, Aug 06, 2024 at 05:23:58PM -0700, Darrick J. Wong wrote:
> On Wed, Aug 07, 2024 at 09:52:06AM +1000, Dave Chinner wrote:
> > On Tue, Aug 06, 2024 at 11:56:51AM -0700, Darrick J. Wong wrote:
> > > On Thu, Aug 01, 2024 at 04:30:46PM +0000, John Garry wrote:
> > > > From: Dave Chinner <dchinner@redhat.com>
> > > > 
> > > > We currently align extent allocation to stripe unit or stripe width.
> > > > That is specified by an external parameter to the allocation code,
> > > > which then manipulates the xfs_alloc_args alignment configuration in
> > > > interesting ways.
> > > > 
> > > > The args->alignment field specifies extent start alignment, but
> > > > because we may be attempting non-aligned allocation first there are
> > > > also slop variables that allow for those allocation attempts to
> > > > account for aligned allocation if they fail.
> > > > 
> > > > This gets much more complex as we introduce forced allocation
> > > > alignment, where extent size hints are used to generate the extent
> > > > start alignment. extent size hints currently only affect extent
> > > > lengths (via args->prod and args->mod) and so with this change we
> > > > will have two different start alignment conditions.
> > > > 
> > > > Avoid this complexity by always using args->alignment to indicate
> > > > extent start alignment, and always using args->prod/mod to indicate
> > > > extent length adjustment.
> > > > 
> > > > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > > > [jpg: fixup alignslop references in xfs_trace.h and xfs_ialloc.c]
> > > > Signed-off-by: John Garry <john.g.garry@oracle.com>
> > > 
> > > Going back to the 6/21 posting[1], what were the answers to the
> > > questions I posted?  Did I correctly figure out what alignslop refers
> > > to?
> > 
> > Hard to say.
> > 
> > alignslop is basically an temporary accounting mechanism used to
> > prevent filesystem shutdowns when the AG is near ENOSPC and exact
> > BNO allocation is attempted and fails because there isn't an exact
> > free space available. This exact bno allocation attempt can dirty
> > the AGFL, and before we dirty the transaction *we must guarantee the
> > allocation will succeed*. If the allocation fails after we've
> > started modifying metadata (for whatever reason) we will cancel a
> > dirty transaction and shut down the filesystem.
> > 
> > Hence the first allocation done from the xfs_bmap_btalloc() context
> > needs to account for every block the specific allocation and all the
> > failure fallback attempts *may require* before it starts modifying
> > metadata. The contiguous exact bno allocation case isn't an aligned
> > allocation, but it will be followed by an aligned allocation attempt
> > if it fails and so it must take into account the space requirements
> > of aligned allocation even though it is not an aligned allocation
> > itself.
> > 
> > args->alignslop allows xfs_alloc_space_available() to take this
> > space requirement into account for any allocation that has lesser
> > alignment requirements than any subsequent allocation attempt that
> > may follow if this specific allocation attempt fails.
> > 
> > IOWs, args->alignslop is similar to args->minleft and args->total in
> > purpose, but it only affects the accounting for this specific
> > allocation attempt rather than defining the amount of space
> > that needs to remain available at the successful completion of this
> > allocation for future allocations within this transaction context.
> 
> Oh, okay.  IOWs, "slop" is a means for alloc callers to communicate that
> they need to perform an aligned allocation, but that right now they want
> to try an exact allocation (with looser alignment) so that they might
> get better locality.  However, they don't want the exact allocation scan
> to commit to a certain AG unless the aligned allocation will be able to
> find an aligned space *somewhere* in that AG if the exact scan fails.
> For any other kind of allocation situation, slop should be zero.
> 
> If the above statement is correct,

It is, except for a small nit: alignslop isn't exact bno allocation
specific, it allows any sort of unaligned -> fail -> aligned
allocation fallback pattern to select an AG where the fallback
aligned allocation will have space to succeed.

> could we paste that into the
> definition of struct xfs_alloc_arg so that I don't have to recollect all
> this the next time I see something involving alignslop?

Sure.

> After which
> I'm ok saying:
> 
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>

Thanks!

-Dave.
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index bf08b9e9d9ac..a9ab7d71c558 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2506,7 +2506,7 @@  xfs_alloc_space_available(
 	reservation = xfs_ag_resv_needed(pag, args->resv);
 
 	/* do we have enough contiguous free space for the allocation? */
-	alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
+	alloc_len = args->minlen + (args->alignment - 1) + args->alignslop;
 	longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
 	if (longest < alloc_len)
 		return false;
@@ -2535,7 +2535,7 @@  xfs_alloc_space_available(
 	 * allocation as we know that will definitely succeed and match the
 	 * callers alignment constraints.
 	 */
-	alloc_len = args->maxlen + (args->alignment - 1) + args->minalignslop;
+	alloc_len = args->maxlen + (args->alignment - 1) + args->alignslop;
 	if (longest < alloc_len) {
 		args->maxlen = args->minlen;
 		ASSERT(args->maxlen > 0);
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index fae170825be0..473822a5d4e9 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -46,7 +46,7 @@  typedef struct xfs_alloc_arg {
 	xfs_extlen_t	minleft;	/* min blocks must be left after us */
 	xfs_extlen_t	total;		/* total blocks needed in xaction */
 	xfs_extlen_t	alignment;	/* align answer to multiple of this */
-	xfs_extlen_t	minalignslop;	/* slop for minlen+alignment calcs */
+	xfs_extlen_t	alignslop;	/* slop for alignment calcs */
 	xfs_agblock_t	min_agbno;	/* set an agbno range for NEAR allocs */
 	xfs_agblock_t	max_agbno;	/* ... */
 	xfs_extlen_t	len;		/* output: actual size of extent */
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 7df74c35d9f9..25a87e1154bb 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3286,6 +3286,10 @@  xfs_bmap_select_minlen(
 	xfs_extlen_t		blen)
 {
 
+	/* Adjust best length for extent start alignment. */
+	if (blen > args->alignment)
+		blen -= args->alignment;
+
 	/*
 	 * Since we used XFS_ALLOC_FLAG_TRYLOCK in _longest_free_extent(), it is
 	 * possible that there is enough contiguous free space for this request.
@@ -3394,35 +3398,43 @@  xfs_bmap_alloc_account(
 	xfs_trans_mod_dquot_byino(ap->tp, ap->ip, fld, ap->length);
 }
 
-static int
+/*
+ * Calculate the extent start alignment and the extent length adjustments that
+ * constrain this allocation.
+ *
+ * Extent start alignment is currently determined by stripe configuration and is
+ * carried in args->alignment, whilst extent length adjustment is determined by
+ * extent size hints and is carried by args->prod and args->mod.
+ *
+ * Low level allocation code is free to either ignore or override these values
+ * as required.
+ */
+static void
 xfs_bmap_compute_alignments(
 	struct xfs_bmalloca	*ap,
 	struct xfs_alloc_arg	*args)
 {
 	struct xfs_mount	*mp = args->mp;
 	xfs_extlen_t		align = 0; /* minimum allocation alignment */
-	int			stripe_align = 0;
 
 	/* stripe alignment for allocation is determined by mount parameters */
 	if (mp->m_swidth && xfs_has_swalloc(mp))
-		stripe_align = mp->m_swidth;
+		args->alignment = mp->m_swidth;
 	else if (mp->m_dalign)
-		stripe_align = mp->m_dalign;
+		args->alignment = mp->m_dalign;
 
 	if (ap->flags & XFS_BMAPI_COWFORK)
 		align = xfs_get_cowextsz_hint(ap->ip);
 	else if (ap->datatype & XFS_ALLOC_USERDATA)
 		align = xfs_get_extsz_hint(ap->ip);
+
 	if (align) {
 		if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
 					ap->eof, 0, ap->conv, &ap->offset,
 					&ap->length))
 			ASSERT(0);
 		ASSERT(ap->length);
-	}
 
-	/* apply extent size hints if obtained earlier */
-	if (align) {
 		args->prod = align;
 		div_u64_rem(ap->offset, args->prod, &args->mod);
 		if (args->mod)
@@ -3437,7 +3449,6 @@  xfs_bmap_compute_alignments(
 			args->mod = args->prod - args->mod;
 	}
 
-	return stripe_align;
 }
 
 static void
@@ -3509,7 +3520,7 @@  xfs_bmap_exact_minlen_extent_alloc(
 	args.total = ap->total;
 
 	args.alignment = 1;
-	args.minalignslop = 0;
+	args.alignslop = 0;
 
 	args.minleft = ap->minleft;
 	args.wasdel = ap->wasdel;
@@ -3549,7 +3560,6 @@  xfs_bmap_btalloc_at_eof(
 	struct xfs_bmalloca	*ap,
 	struct xfs_alloc_arg	*args,
 	xfs_extlen_t		blen,
-	int			stripe_align,
 	bool			ag_only)
 {
 	struct xfs_mount	*mp = args->mp;
@@ -3563,23 +3573,15 @@  xfs_bmap_btalloc_at_eof(
 	 * allocation.
 	 */
 	if (ap->offset) {
-		xfs_extlen_t	nextminlen = 0;
+		xfs_extlen_t	alignment = args->alignment;
 
 		/*
-		 * Compute the minlen+alignment for the next case.  Set slop so
-		 * that the value of minlen+alignment+slop doesn't go up between
-		 * the calls.
+		 * Compute the alignment slop for the fallback path so we ensure
+		 * we account for the potential alignment space required by the
+		 * fallback paths before we modify the AGF and AGFL here.
 		 */
 		args->alignment = 1;
-		if (blen > stripe_align && blen <= args->maxlen)
-			nextminlen = blen - stripe_align;
-		else
-			nextminlen = args->minlen;
-		if (nextminlen + stripe_align > args->minlen + 1)
-			args->minalignslop = nextminlen + stripe_align -
-					args->minlen - 1;
-		else
-			args->minalignslop = 0;
+		args->alignslop = alignment - args->alignment;
 
 		if (!caller_pag)
 			args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
@@ -3597,19 +3599,8 @@  xfs_bmap_btalloc_at_eof(
 		 * Exact allocation failed. Reset to try an aligned allocation
 		 * according to the original allocation specification.
 		 */
-		args->alignment = stripe_align;
-		args->minlen = nextminlen;
-		args->minalignslop = 0;
-	} else {
-		/*
-		 * Adjust minlen to try and preserve alignment if we
-		 * can't guarantee an aligned maxlen extent.
-		 */
-		args->alignment = stripe_align;
-		if (blen > args->alignment &&
-		    blen <= args->maxlen + args->alignment)
-			args->minlen = blen - args->alignment;
-		args->minalignslop = 0;
+		args->alignment = alignment;
+		args->alignslop = 0;
 	}
 
 	if (ag_only) {
@@ -3627,9 +3618,8 @@  xfs_bmap_btalloc_at_eof(
 		return 0;
 
 	/*
-	 * Allocation failed, so turn return the allocation args to their
-	 * original non-aligned state so the caller can proceed on allocation
-	 * failure as if this function was never called.
+	 * Aligned allocation failed, so all fallback paths from here drop the
+	 * start alignment requirement as we know it will not succeed.
 	 */
 	args->alignment = 1;
 	return 0;
@@ -3637,7 +3627,9 @@  xfs_bmap_btalloc_at_eof(
 
 /*
  * We have failed multiple allocation attempts so now are in a low space
- * allocation situation. Try a locality first full filesystem minimum length
+ * allocation situation. We give up on any attempt at aligned allocation here.
+ *
+ * Try a locality first full filesystem minimum length
  * allocation whilst still maintaining necessary total block reservation
  * requirements.
  *
@@ -3654,6 +3646,7 @@  xfs_bmap_btalloc_low_space(
 {
 	int			error;
 
+	args->alignment = 1;
 	if (args->minlen > ap->minlen) {
 		args->minlen = ap->minlen;
 		error = xfs_alloc_vextent_start_ag(args, ap->blkno);
@@ -3673,13 +3666,11 @@  xfs_bmap_btalloc_low_space(
 static int
 xfs_bmap_btalloc_filestreams(
 	struct xfs_bmalloca	*ap,
-	struct xfs_alloc_arg	*args,
-	int			stripe_align)
+	struct xfs_alloc_arg	*args)
 {
 	xfs_extlen_t		blen = 0;
 	int			error = 0;
 
-
 	error = xfs_filestream_select_ag(ap, args, &blen);
 	if (error)
 		return error;
@@ -3698,8 +3689,7 @@  xfs_bmap_btalloc_filestreams(
 
 	args->minlen = xfs_bmap_select_minlen(ap, args, blen);
 	if (ap->aeof)
-		error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
-				true);
+		error = xfs_bmap_btalloc_at_eof(ap, args, blen, true);
 
 	if (!error && args->fsbno == NULLFSBLOCK)
 		error = xfs_alloc_vextent_near_bno(args, ap->blkno);
@@ -3723,8 +3713,7 @@  xfs_bmap_btalloc_filestreams(
 static int
 xfs_bmap_btalloc_best_length(
 	struct xfs_bmalloca	*ap,
-	struct xfs_alloc_arg	*args,
-	int			stripe_align)
+	struct xfs_alloc_arg	*args)
 {
 	xfs_extlen_t		blen = 0;
 	int			error;
@@ -3748,8 +3737,7 @@  xfs_bmap_btalloc_best_length(
 	 * trying.
 	 */
 	if (ap->aeof && !(ap->tp->t_flags & XFS_TRANS_LOWMODE)) {
-		error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align,
-				false);
+		error = xfs_bmap_btalloc_at_eof(ap, args, blen, false);
 		if (error || args->fsbno != NULLFSBLOCK)
 			return error;
 	}
@@ -3776,27 +3764,26 @@  xfs_bmap_btalloc(
 		.resv		= XFS_AG_RESV_NONE,
 		.datatype	= ap->datatype,
 		.alignment	= 1,
-		.minalignslop	= 0,
+		.alignslop	= 0,
 	};
 	xfs_fileoff_t		orig_offset;
 	xfs_extlen_t		orig_length;
 	int			error;
-	int			stripe_align;
 
 	ASSERT(ap->length);
 	orig_offset = ap->offset;
 	orig_length = ap->length;
 
-	stripe_align = xfs_bmap_compute_alignments(ap, &args);
+	xfs_bmap_compute_alignments(ap, &args);
 
 	/* Trim the allocation back to the maximum an AG can fit. */
 	args.maxlen = min(ap->length, mp->m_ag_max_usable);
 
 	if ((ap->datatype & XFS_ALLOC_USERDATA) &&
 	    xfs_inode_is_filestream(ap->ip))
-		error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align);
+		error = xfs_bmap_btalloc_filestreams(ap, &args);
 	else
-		error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align);
+		error = xfs_bmap_btalloc_best_length(ap, &args);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 0af5b7a33d05..2fa29d2f004e 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -758,12 +758,12 @@  xfs_ialloc_ag_alloc(
 		 *
 		 * For an exact allocation, alignment must be 1,
 		 * however we need to take cluster alignment into account when
-		 * fixing up the freelist. Use the minalignslop field to
-		 * indicate that extra blocks might be required for alignment,
-		 * but not to use them in the actual exact allocation.
+		 * fixing up the freelist. Use the alignslop field to indicate
+		 * that extra blocks might be required for alignment, but not
+		 * to use them in the actual exact allocation.
 		 */
 		args.alignment = 1;
-		args.minalignslop = igeo->cluster_align - 1;
+		args.alignslop = igeo->cluster_align - 1;
 
 		/* Allow space for the inode btree to split. */
 		args.minleft = igeo->inobt_maxlevels;
@@ -783,7 +783,7 @@  xfs_ialloc_ag_alloc(
 		 * on, so reset minalignslop to ensure it is not included in
 		 * subsequent requests.
 		 */
-		args.minalignslop = 0;
+		args.alignslop = 0;
 	}
 
 	if (unlikely(args.fsbno == NULLFSBLOCK)) {
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 5646d300b286..fb0c46d9a6d9 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1811,7 +1811,7 @@  DECLARE_EVENT_CLASS(xfs_alloc_class,
 		__field(xfs_extlen_t, minleft)
 		__field(xfs_extlen_t, total)
 		__field(xfs_extlen_t, alignment)
-		__field(xfs_extlen_t, minalignslop)
+		__field(xfs_extlen_t, alignslop)
 		__field(xfs_extlen_t, len)
 		__field(char, wasdel)
 		__field(char, wasfromfl)
@@ -1830,7 +1830,7 @@  DECLARE_EVENT_CLASS(xfs_alloc_class,
 		__entry->minleft = args->minleft;
 		__entry->total = args->total;
 		__entry->alignment = args->alignment;
-		__entry->minalignslop = args->minalignslop;
+		__entry->alignslop = args->alignslop;
 		__entry->len = args->len;
 		__entry->wasdel = args->wasdel;
 		__entry->wasfromfl = args->wasfromfl;
@@ -1839,7 +1839,7 @@  DECLARE_EVENT_CLASS(xfs_alloc_class,
 		__entry->highest_agno = args->tp->t_highest_agno;
 	),
 	TP_printk("dev %d:%d agno 0x%x agbno 0x%x minlen %u maxlen %u mod %u "
-		  "prod %u minleft %u total %u alignment %u minalignslop %u "
+		  "prod %u minleft %u total %u alignment %u alignslop %u "
 		  "len %u wasdel %d wasfromfl %d resv %d "
 		  "datatype 0x%x highest_agno 0x%x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -1852,7 +1852,7 @@  DECLARE_EVENT_CLASS(xfs_alloc_class,
 		  __entry->minleft,
 		  __entry->total,
 		  __entry->alignment,
-		  __entry->minalignslop,
+		  __entry->alignslop,
 		  __entry->len,
 		  __entry->wasdel,
 		  __entry->wasfromfl,