diff mbox

[1/3] Btrfs: heuristic add simple sampling logic

Message ID 20170724113708.18088-2-nefelim4ag@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Timofey Titovets July 24, 2017, 11:37 a.m. UTC
Get small sample from input data
and calculate byte type count for that sample

Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com>
---
 fs/btrfs/compression.c | 24 ++++++++++++++++++++++--
 fs/btrfs/compression.h | 11 +++++++++++
 2 files changed, 33 insertions(+), 2 deletions(-)

Comments

Josef Bacik July 24, 2017, 2:55 p.m. UTC | #1
On Mon, Jul 24, 2017 at 02:37:06PM +0300, Timofey Titovets wrote:
> Get small sample from input data
> and calculate byte type count for that sample
> 
> Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com>
> ---
>  fs/btrfs/compression.c | 24 ++++++++++++++++++++++--
>  fs/btrfs/compression.h | 11 +++++++++++
>  2 files changed, 33 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
> index 63f54bd2d5bb..1501d4fe90cc 100644
> --- a/fs/btrfs/compression.c
> +++ b/fs/btrfs/compression.c
> @@ -1068,15 +1068,35 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
>  	u64 index = start >> PAGE_SHIFT;
>  	u64 end_index = end >> PAGE_SHIFT;
>  	struct page *page;
> -	int ret = 1;
> +	struct heuristic_bucket_item *bucket;
> +	int a, b, ret;
> +	u8 symbol, *input_data;
> +
> +	ret = 1;
> +
> +	bucket = kcalloc(BTRFS_HEURISTIC_BUCKET_SIZE,
> +		sizeof(struct heuristic_bucket_item), GFP_NOFS);
> +
> +	if (!bucket)
> +		goto out;
>  
>  	while (index <= end_index) {
>  		page = find_get_page(inode->i_mapping, index);
> -		kmap(page);
> +		input_data = kmap(page);
> +		a = 0;
> +		while (a < PAGE_SIZE) {
> +			for (b = 0; b < BTRFS_HEURISTIC_READ_SIZE; b++) {
> +				symbol = input_data[a+b];
> +				bucket[symbol].count++;
> +			}
> +			a += BTRFS_HEURISTIC_ITERATOR_OFFSET;
> +		}
>  		kunmap(page);
>  		put_page(page);
>  		index++;
>  	}
>  
> +out:
> +	kfree(bucket);
>  	return ret;
>  }
> diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
> index d1f4eee2d0af..984943e5e1ae 100644
> --- a/fs/btrfs/compression.h
> +++ b/fs/btrfs/compression.h
> @@ -129,6 +129,17 @@ struct btrfs_compress_op {
>  extern const struct btrfs_compress_op btrfs_zlib_compress;
>  extern const struct btrfs_compress_op btrfs_lzo_compress;
>  
> +struct heuristic_bucket_item {
> +       u8  padding;
> +       u8  symbol;
> +       u16 count;
> +};
> +
> +#define BTRFS_HEURISTIC_READ_SIZE 16
> +#define BTRFS_HEURISTIC_READS_PER_PAGE 8*PAGE_SIZE/4096

I hate magic numbers, why is this 8*PAGE_SIZE/4096?  If you want to check every
512 bytes why not just set BTRFS_HEURISTIC_ITERATOR_OFFSET to 512?  That makes
it easier to understand what you are trying to accomplish.  Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 63f54bd2d5bb..1501d4fe90cc 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1068,15 +1068,35 @@  int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
 	u64 index = start >> PAGE_SHIFT;
 	u64 end_index = end >> PAGE_SHIFT;
 	struct page *page;
-	int ret = 1;
+	struct heuristic_bucket_item *bucket;
+	int a, b, ret;
+	u8 symbol, *input_data;
+
+	ret = 1;
+
+	bucket = kcalloc(BTRFS_HEURISTIC_BUCKET_SIZE,
+		sizeof(struct heuristic_bucket_item), GFP_NOFS);
+
+	if (!bucket)
+		goto out;
 
 	while (index <= end_index) {
 		page = find_get_page(inode->i_mapping, index);
-		kmap(page);
+		input_data = kmap(page);
+		a = 0;
+		while (a < PAGE_SIZE) {
+			for (b = 0; b < BTRFS_HEURISTIC_READ_SIZE; b++) {
+				symbol = input_data[a+b];
+				bucket[symbol].count++;
+			}
+			a += BTRFS_HEURISTIC_ITERATOR_OFFSET;
+		}
 		kunmap(page);
 		put_page(page);
 		index++;
 	}
 
+out:
+	kfree(bucket);
 	return ret;
 }
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index d1f4eee2d0af..984943e5e1ae 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -129,6 +129,17 @@  struct btrfs_compress_op {
 extern const struct btrfs_compress_op btrfs_zlib_compress;
 extern const struct btrfs_compress_op btrfs_lzo_compress;
 
+struct heuristic_bucket_item {
+       u8  padding;
+       u8  symbol;
+       u16 count;
+};
+
+#define BTRFS_HEURISTIC_READ_SIZE 16
+#define BTRFS_HEURISTIC_READS_PER_PAGE 8*PAGE_SIZE/4096
+#define BTRFS_HEURISTIC_ITERATOR_OFFSET PAGE_SIZE/BTRFS_HEURISTIC_READS_PER_PAGE
+#define BTRFS_HEURISTIC_BUCKET_SIZE 256
+
 int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
 
 #endif