Message ID | 20170825091845.4120-7-nefelim4ag@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Aug 25, 2017 at 12:18:45PM +0300, Timofey Titovets wrote: > Calculate byte core set for data sample: > Sort bucket's numbers in decreasing order > Count how many numbers use 90% of sample > If core set are low (<=25%), data are easily compressible > If core set high (>=80%), data are not compressible > > Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com> > --- > fs/btrfs/heuristic.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 50 insertions(+), 1 deletion(-) > > diff --git a/fs/btrfs/heuristic.c b/fs/btrfs/heuristic.c > index ef723e991576..df0cefa42857 100644 > --- a/fs/btrfs/heuristic.c > +++ b/fs/btrfs/heuristic.c > @@ -18,6 +18,7 @@ > #include <linux/pagemap.h> > #include <linux/string.h> > #include <linux/bio.h> > +#include <linux/sort.h> > #include "compression.h" > > #define READ_SIZE 16 > @@ -25,6 +26,8 @@ > #define BUCKET_SIZE 256 > #define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED*READ_SIZE/ITER_SHIFT) > #define BYTE_SET_THRESHOLD 64 > +#define BYTE_CORE_SET_LOW BYTE_SET_THRESHOLD > +#define BYTE_CORE_SET_HIGH 200 // ~80% Please don't use the // c++ style of comments. > > struct bucket_item { > u32 count; > @@ -67,6 +70,45 @@ static struct list_head *heuristic_alloc_workspace(void) > return ERR_PTR(-ENOMEM); > } > > +/* For bucket sorting */ "Compare buckets by size, ascending */ > +static inline int bucket_compare(const void *lv, const void *rv) > +{ > + struct bucket_item *l = (struct bucket_item *)(lv); > + struct bucket_item *r = (struct bucket_item *)(rv); The const should be used also for the specific types. > + > + return r->count - l->count; > +} > + > +/* > + * Byte Core set size > + * How many bytes use 90% of sample > + */ > +static int byte_core_set_size(struct workspace *ws) > +{ > + u32 a = 0; > + u32 coreset_sum = 0; > + u32 core_set_threshold = ws->sample_size*90/100; u32 core_set_threshold = ws->sample_size * 90 / 100; > + struct bucket_item *bucket = ws->bucket; > + > + /* Sort in reverse order */ So if it's reverse, please add _rev to the end of the comparator function. The common naming is "comp_WHAT" so I suggest to use comp_bucket_rev. > + sort(bucket, BUCKET_SIZE, sizeof(*bucket), > + &bucket_compare, NULL); > + > + for (; a < BYTE_CORE_SET_LOW; a++) > + coreset_sum += bucket[a].count; > + > + if (coreset_sum > core_set_threshold) > + return a; > + > + for (; a < BYTE_CORE_SET_HIGH && bucket[a].count > 0; a++) { > + coreset_sum += bucket[a].count; > + if (coreset_sum > core_set_threshold) > + break; > + } > + > + return a; > +} > + > static u32 byte_set_size(const struct workspace *ws) > { > u32 a = 0; > @@ -164,7 +206,14 @@ static int heuristic(struct list_head *ws, struct inode *inode, > if (a > BYTE_SET_THRESHOLD) > return 2; > > - return 1; > + a = byte_core_set_size(workspace); > + if (a <= BYTE_CORE_SET_LOW) > + return 3; > + > + if (a >= BYTE_CORE_SET_HIGH) > + return 0; > + > + return 4; > } > > const struct btrfs_compress_op btrfs_heuristic = { > -- > 2.14.1 > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 25, 2017 at 12:18:45PM +0300, Timofey Titovets wrote: > Calculate byte core set for data sample: > Sort bucket's numbers in decreasing order > Count how many numbers use 90% of sample > If core set are low (<=25%), data are easily compressible > If core set high (>=80%), data are not compressible > > Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com> > --- > fs/btrfs/heuristic.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 50 insertions(+), 1 deletion(-) > > diff --git a/fs/btrfs/heuristic.c b/fs/btrfs/heuristic.c > index ef723e991576..df0cefa42857 100644 > --- a/fs/btrfs/heuristic.c > +++ b/fs/btrfs/heuristic.c > @@ -18,6 +18,7 @@ > #include <linux/pagemap.h> > #include <linux/string.h> > #include <linux/bio.h> > +#include <linux/sort.h> > #include "compression.h" > > #define READ_SIZE 16 > @@ -25,6 +26,8 @@ > #define BUCKET_SIZE 256 > #define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED*READ_SIZE/ITER_SHIFT) > #define BYTE_SET_THRESHOLD 64 > +#define BYTE_CORE_SET_LOW BYTE_SET_THRESHOLD > +#define BYTE_CORE_SET_HIGH 200 // ~80% > > struct bucket_item { > u32 count; > @@ -67,6 +70,45 @@ static struct list_head *heuristic_alloc_workspace(void) > return ERR_PTR(-ENOMEM); > } > > +/* For bucket sorting */ > +static inline int bucket_compare(const void *lv, const void *rv) > +{ > + struct bucket_item *l = (struct bucket_item *)(lv); > + struct bucket_item *r = (struct bucket_item *)(rv); > + > + return r->count - l->count; > +} > + > +/* > + * Byte Core set size > + * How many bytes use 90% of sample > + */ > +static int byte_core_set_size(struct workspace *ws) > +{ > + u32 a = 0; > + u32 coreset_sum = 0; > + u32 core_set_threshold = ws->sample_size*90/100; > + struct bucket_item *bucket = ws->bucket; > + > + /* Sort in reverse order */ > + sort(bucket, BUCKET_SIZE, sizeof(*bucket), > + &bucket_compare, NULL); > + > + for (; a < BYTE_CORE_SET_LOW; a++) > + coreset_sum += bucket[a].count; > + > + if (coreset_sum > core_set_threshold) > + return a; > + > + for (; a < BYTE_CORE_SET_HIGH && bucket[a].count > 0; a++) { > + coreset_sum += bucket[a].count; > + if (coreset_sum > core_set_threshold) > + break; > + } > + > + return a; > +} > + > static u32 byte_set_size(const struct workspace *ws) > { > u32 a = 0; > @@ -164,7 +206,14 @@ static int heuristic(struct list_head *ws, struct inode *inode, > if (a > BYTE_SET_THRESHOLD) > return 2; > > - return 1; > + a = byte_core_set_size(workspace); > + if (a <= BYTE_CORE_SET_LOW) > + return 3; > + > + if (a >= BYTE_CORE_SET_HIGH) > + return 0; > + > + return 4; So the return value determines guessed compressibility, this could use some human readable enums or defines. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/btrfs/heuristic.c b/fs/btrfs/heuristic.c index ef723e991576..df0cefa42857 100644 --- a/fs/btrfs/heuristic.c +++ b/fs/btrfs/heuristic.c @@ -18,6 +18,7 @@ #include <linux/pagemap.h> #include <linux/string.h> #include <linux/bio.h> +#include <linux/sort.h> #include "compression.h" #define READ_SIZE 16 @@ -25,6 +26,8 @@ #define BUCKET_SIZE 256 #define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED*READ_SIZE/ITER_SHIFT) #define BYTE_SET_THRESHOLD 64 +#define BYTE_CORE_SET_LOW BYTE_SET_THRESHOLD +#define BYTE_CORE_SET_HIGH 200 // ~80% struct bucket_item { u32 count; @@ -67,6 +70,45 @@ static struct list_head *heuristic_alloc_workspace(void) return ERR_PTR(-ENOMEM); } +/* For bucket sorting */ +static inline int bucket_compare(const void *lv, const void *rv) +{ + struct bucket_item *l = (struct bucket_item *)(lv); + struct bucket_item *r = (struct bucket_item *)(rv); + + return r->count - l->count; +} + +/* + * Byte Core set size + * How many bytes use 90% of sample + */ +static int byte_core_set_size(struct workspace *ws) +{ + u32 a = 0; + u32 coreset_sum = 0; + u32 core_set_threshold = ws->sample_size*90/100; + struct bucket_item *bucket = ws->bucket; + + /* Sort in reverse order */ + sort(bucket, BUCKET_SIZE, sizeof(*bucket), + &bucket_compare, NULL); + + for (; a < BYTE_CORE_SET_LOW; a++) + coreset_sum += bucket[a].count; + + if (coreset_sum > core_set_threshold) + return a; + + for (; a < BYTE_CORE_SET_HIGH && bucket[a].count > 0; a++) { + coreset_sum += bucket[a].count; + if (coreset_sum > core_set_threshold) + break; + } + + return a; +} + static u32 byte_set_size(const struct workspace *ws) { u32 a = 0; @@ -164,7 +206,14 @@ static int heuristic(struct list_head *ws, struct inode *inode, if (a > BYTE_SET_THRESHOLD) return 2; - return 1; + a = byte_core_set_size(workspace); + if (a <= BYTE_CORE_SET_LOW) + return 3; + + if (a >= BYTE_CORE_SET_HIGH) + return 0; + + return 4; } const struct btrfs_compress_op btrfs_heuristic = {
Calculate byte core set for data sample: Sort bucket's numbers in decreasing order Count how many numbers use 90% of sample If core set are low (<=25%), data are easily compressible If core set high (>=80%), data are not compressible Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com> --- fs/btrfs/heuristic.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) -- 2.14.1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html