diff mbox

Some questions about the freelist allocator in XFS

Message ID CAGeO4WPh8JcLeHEhkikbJH+kSuiYFoc_yv+97Cxtuo44qp0YGw@mail.gmail.com (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Kaho Ng July 7, 2016, 2:35 a.m. UTC
I am trying to investigate how freelist allocator in xfs interacts
with freespace B+Tree allocator.
First I prepared a patch against
linux-source/fs/xfs/libxfs/xfs_alloc.c to print debugging messages
(The kernel version used is linux-3.10.0-327.22.2.el7).
Then, I wrote a simple utility to make TONS of
holes in a filesystem by calling fallocate() to punch holes in a file
that is almost as large as the volume size.

I created an XFS filesystem image by the following steps:
1. fallocate -l 80G /mnt/disk2/xfs
2. mkfs.xfs -f -d agcount=1 /mnt/disk2/xfs

Then I created a large file by fallocate:
fallocate -l 85823746048 /mnt/test/abc

which left only 4 blocks available in the volume finally:
/dev/loop0      20961280 20961276         4 100% /mnt/test

The result of xfs_bmap against /mnt/test/abc:
/mnt/test/abc:
 EXT: FILE-OFFSET      BLOCK-RANGE      AG AG-OFFSET              TOTAL FLAGS
   0: [0..167624503]:  83000..167707503  0 (83000..167707503) 167624504 10000

After that, I used the hole-punching utility above to create holes on
the files, and captured the output of kmsg.

When reading the log output, I realised that there is no B+Tree split
triggered by xfs_alloc_fix_freelist() when calling xfs_free_extent().
Isn't B+Tree split possible in by-size B+Tree even when truncating a
longer freespace record to shorter one? But what I found in the log is
only a few tree shrinks... And when reading the source code of
freespace allocator I found that a B+Tree growth in this case is
impossible at least...
diff mbox

Patch

--- linux-3.10.0-327.22.2.el7.a/fs/xfs/libxfs/xfs_alloc.c	2016-07-07 03:57:19.366512544 +0800
+++ linux-3.10.0-327.22.2.el7.b/fs/xfs/libxfs/xfs_alloc.c	2016-07-07 09:30:32.178766480 +0800
@@ -309,7 +309,8 @@  xfs_alloc_fixup_trees(
 	xfs_extlen_t	flen,		/* length of free extent */
 	xfs_agblock_t	rbno,		/* starting block of returned extent */
 	xfs_extlen_t	rlen,		/* length of returned extent */
-	int		flags)		/* flags, XFSA_FIXUP_... */
+	int		flags,		/* flags, XFSA_FIXUP_... */
+	int		isfl)
 {
 	int		error;		/* error code */
 	int		i;		/* operation results */
@@ -376,15 +377,27 @@  xfs_alloc_fixup_trees(
 		nfbno1 = rbno + rlen;
 		nflen1 = flen - rlen;
 		nfbno2 = NULLAGBLOCK;
+		if (isfl)
+			xfs_warn(mp,
+				"Case 1 Inserting: nfbno1: %d, nflen1: %d, nfbno2: %d, nflen2: %d, fbno: %d, flen: %d, rbno: %d, rlen: %d",
+				nfbno1, nflen1, nfbno2, nflen2, fbno, flen, rbno, rlen);
 	} else if (rbno + rlen == fbno + flen) {
 		nfbno1 = fbno;
 		nflen1 = flen - rlen;
 		nfbno2 = NULLAGBLOCK;
+		if (isfl)
+			xfs_warn(mp,
+				"Case 2 Inserting: nfbno1: %d, nflen1: %d, nfbno2: %d, nflen2: %d, fbno: %d, flen: %d, rbno: %d, rlen: %d",
+				nfbno1, nflen1, nfbno2, nflen2, fbno, flen, rbno, rlen);
 	} else {
 		nfbno1 = fbno;
 		nflen1 = rbno - fbno;
 		nfbno2 = rbno + rlen;
 		nflen2 = (fbno + flen) - nfbno2;
+		if (isfl)
+			xfs_warn(mp,
+				"Case 3 Inserting: nfbno1: %d, nflen1: %d, nfbno2: %d, nflen2: %d, fbno: %d, flen: %d, rbno: %d, rlen: %d",
+				nfbno1, nflen1, nfbno2, nflen2, fbno, flen, rbno, rlen);
 	}
 	/*
 	 * Delete the entry from the by-size btree.
@@ -396,19 +409,31 @@  xfs_alloc_fixup_trees(
 	 * Add new by-size btree entry(s).
 	 */
 	if (nfbno1 != NULLAGBLOCK) {
+		struct xfs_btree_block	*cntblock;
 		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 			return error;
+		cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 		XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
+		xfs_warn(mp,
+	"B+Tree before insert: isfl: %d, bb_numrec: %d, addr: %llu", isfl, xfs_btree_get_numrecs(cntblock), XFS_BUF_ADDR(cnt_cur->bc_bufs[0]));
 		if ((error = xfs_btree_insert(cnt_cur, &i)))
 			return error;
+		xfs_warn(mp,
+	"B+Tree after insert: isfl: %d, bb_numrec: %d, addr: %llu", isfl, xfs_btree_get_numrecs(cntblock), XFS_BUF_ADDR(cnt_cur->bc_bufs[0]));
 		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 	}
 	if (nfbno2 != NULLAGBLOCK) {
+		struct xfs_btree_block	*cntblock;
+		cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 			return error;
 		XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
+		xfs_warn(mp,
+	"B+Tree before insert: isfl: %d, bb_numrec: %d, addr: %llu", isfl, xfs_btree_get_numrecs(cntblock), XFS_BUF_ADDR(cnt_cur->bc_bufs[0]));
 		if ((error = xfs_btree_insert(cnt_cur, &i)))
 			return error;
+		xfs_warn(mp,
+	"B+Tree after insert: isfl: %d, bb_numrec: %d, addr: %llu", isfl, xfs_btree_get_numrecs(cntblock), XFS_BUF_ADDR(cnt_cur->bc_bufs[0]));
 		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 	}
 	/*
@@ -730,7 +755,7 @@  xfs_alloc_ag_vextent_exact(
 	ASSERT(args->agbno + args->len <=
 		be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 	error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
-				      args->len, XFSA_FIXUP_BNO_OK);
+				      args->len, XFSA_FIXUP_BNO_OK, args->isfl);
 	if (error) {
 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 		goto error0;
@@ -1028,7 +1053,7 @@  restart:
 		 * Fix up the btree entries.
 		 */
 		if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
-				ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
+				ltlen, bnew, blen, XFSA_FIXUP_CNT_OK, args->isfl)))
 			goto error0;
 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
@@ -1219,7 +1244,7 @@  restart:
 	args->agbno = ltnew;
 
 	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
-			ltnew, rlen, XFSA_FIXUP_BNO_OK)))
+			ltnew, rlen, XFSA_FIXUP_BNO_OK, args->isfl)))
 		goto error0;
 
 	if (j)
@@ -1420,7 +1445,7 @@  restart:
 	bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 		args->agno, XFS_BTNUM_BNO);
 	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
-			rbno, rlen, XFSA_FIXUP_CNT_OK)))
+			rbno, rlen, XFSA_FIXUP_CNT_OK, args->isfl)))
 		goto error0;
 	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);