@@ -18,6 +18,7 @@
#include <linux/pagemap.h>
#include <linux/string.h>
#include <linux/bio.h>
+#include <linux/sort.h>
#include "compression.h"
#define READ_SIZE 16
@@ -32,6 +33,8 @@
#define MAX_INPUT_PAGES ((BTRFS_MAX_UNCOMPRESSED >> PAGE_SHIFT)+1)
#define MAX_SAMPLE_SIZE (MAX_INPUT_PAGES*PAGE_SIZE*READ_SIZE/ITER_SHIFT)
#define BYTE_SET_THRESHOLD 64
+#define BYTE_CORE_SET_LOW BYTE_SET_THRESHOLD
+#define BYTE_CORE_SET_HIGH 200 // ~80%
struct bucket_item {
u32 count;
@@ -74,6 +77,45 @@ static struct list_head *heuristic_alloc_workspace(void)
return ERR_PTR(-ENOMEM);
}
+/* For bucket sorting */
+static inline int bucket_compare(const void *lv, const void *rv)
+{
+ struct bucket_item *l = (struct bucket_item *)(lv);
+ struct bucket_item *r = (struct bucket_item *)(rv);
+
+ return r->count - l->count;
+}
+
+/*
+ * Byte Core set size
+ * How many bytes use 90% of sample
+ */
+static int byte_core_set_size(struct workspace *workspace)
+{
+ int a = 0;
+ u32 coreset_sum = 0;
+ struct bucket_item *bucket = workspace->bucket;
+ u32 core_set_threshold = workspace->sample_size*90/100;
+
+ /* Sort in reverse order */
+ sort(bucket, BUCKET_SIZE, sizeof(*bucket),
+ &bucket_compare, NULL);
+
+ for (; a < BYTE_CORE_SET_LOW; a++)
+ coreset_sum += bucket[a].count;
+
+ if (coreset_sum > core_set_threshold)
+ return a;
+
+ for (; a < BYTE_CORE_SET_HIGH && bucket[a].count > 0; a++) {
+ coreset_sum += bucket[a].count;
+ if (coreset_sum > core_set_threshold)
+ break;
+ }
+
+ return a;
+}
+
static int byte_set_size(const struct workspace *workspace)
{
int a = 0;
@@ -161,7 +203,14 @@ static int heuristic(struct list_head *ws, struct inode *inode,
if (a > BYTE_SET_THRESHOLD)
return 2;
- return 1;
+ a = byte_core_set_size(workspace);
+ if (a <= BYTE_CORE_SET_LOW)
+ return 3;
+
+ if (a >= BYTE_CORE_SET_HIGH)
+ return 0;
+
+ return 4;
}
const struct btrfs_compress_op btrfs_heuristic = {
Calculate byte core set for data sample: Sort bucket's numbers in decreasing order Count how many numbers use 90% of sample If core set are low (<=25%), data are easily compressible If core set high (>=80%), data are not compressible Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com> --- fs/btrfs/heuristic.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) -- 2.14.1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html