diff mbox

[1/2] btrfs: fix compress=zlib when stream end crosses page boundary

Message ID alpine.LNX.2.00.1501032236270.30995@cheri.shyou.org (mailing list archive)
State New, archived
Headers show

Commit Message

Danielle Church Jan. 4, 2015, 3:48 a.m. UTC
In zlib_compress_pages(), if the last looped call to zlib_deflate ends 
between 1 and 5 bytes short of a page boundary, the final call with 
Z_FINISH is unable to write the final 6 bytes of the zlib stream and bails 
out.  This causes compress_file_range() in inode.c to distrust the 
compressor and flag ths inode nocompress, with the end result that the 
rest of a potentially highly-compressible file is stored in expanded form.

You can demonstrate this with the following script, setting MOUNTPOINT to 
an otherwise-unused btrfs mount with compress=zlib:

MOUNTPOINT=/mnt
sync; df $MOUNTPOINT
{ head -c 6630 /usr/src/linux/Documentation/BUG-HUNTING;
   cat /dev/zero; } | head -c 1310720 > $MOUNTPOINT/test
sync; df $MOUNTPOINT
rm -f $MOUNTPOINT/test

The selection of 6630 bytes from BUG-HUNTING plus nulls provides for a 
first 128KiB chunk that compresses to just over 4 KiB, and the other nine 
chunks compress easily within a single page each, so this should compress 
to 11 pages or 44 KiB; instead it uses significantly more, depending on 
how many threads were able to compress chunks before nocompress was set.

The following patch fixes this by moving the Z_FINISH calls to inside the 
page read/write loop, allowing it to call as many times as needed.

Signed-off-by: Danielle Church <dchurch@cheri.shyou.org>
---
  fs/btrfs/zlib.c | 26 +++++++++++++++++---------
  1 file changed, 17 insertions(+), 9 deletions(-)
diff mbox

Patch

diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index fb22fd8..1dc0455 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -89,6 +89,7 @@  static int zlib_compress_pages(struct list_head *ws,
  	struct page *in_page = NULL;
  	struct page *out_page = NULL;
  	unsigned long bytes_left;
+	int deflate_flush = Z_SYNC_FLUSH;

  	*out_pages = 0;
  	*total_out = 0;
@@ -120,8 +121,12 @@  static int zlib_compress_pages(struct list_head *ws,
  	workspace->strm.avail_out = PAGE_CACHE_SIZE;
  	workspace->strm.avail_in = min(len, PAGE_CACHE_SIZE);

-	while (workspace->strm.total_in < len) {
-		ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
+	while (deflate_flush != Z_FINISH || ret != Z_STREAM_END) {
+		ret = zlib_deflate(&workspace->strm, deflate_flush);
+		/* we're all done, including the stream end */
+		if (ret == Z_STREAM_END && deflate_flush == Z_FINISH)
+			break;
+
  		if (ret != Z_OK) {
  			printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n",
  			       ret);
@@ -159,9 +164,12 @@  static int zlib_compress_pages(struct list_head *ws,
  			workspace->strm.avail_out = PAGE_CACHE_SIZE;
  			workspace->strm.next_out = cpage_out;
  		}
-		/* we're all done */
-		if (workspace->strm.total_in >= len)
-			break;
+		/* we're all done with input data; keep looping until stream end is written */
+		if (workspace->strm.total_in >= len) {
+			deflate_flush = Z_FINISH;
+			workspace->strm.avail_in = 0;
+			continue;
+		}

  		/* we've read in a full page, get a new one */
  		if (workspace->strm.avail_in == 0) {
@@ -181,11 +189,11 @@  static int zlib_compress_pages(struct list_head *ws,
  			workspace->strm.next_in = data_in;
  		}
  	}
-	workspace->strm.avail_in = 0;
-	ret = zlib_deflate(&workspace->strm, Z_FINISH);
-	zlib_deflateEnd(&workspace->strm);
+	ret = zlib_deflateEnd(&workspace->strm);

-	if (ret != Z_STREAM_END) {
+	if (ret != Z_OK) {
+		printk(KERN_DEBUG "BTRFS: deflateEnd returned %d\n",
+		       ret);
  		ret = -EIO;
  		goto out;
  	}