From patchwork Thu Jan 6 18:25:00 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Josef Bacik X-Patchwork-Id: 460181 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p06ItJhL015000 for ; Thu, 6 Jan 2011 18:55:20 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752864Ab1AFSfR (ORCPT ); Thu, 6 Jan 2011 13:35:17 -0500 Received: from mx1.redhat.com ([209.132.183.28]:23094 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753218Ab1AFSfP (ORCPT ); Thu, 6 Jan 2011 13:35:15 -0500 Received: from int-mx02.intmail.prod.int.phx2.redhat.com (int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id p06IZEg6026923 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Thu, 6 Jan 2011 13:35:15 -0500 Received: from localhost.localdomain (test1244.test.redhat.com [10.10.10.244]) by int-mx02.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id p06IZD9c008343 for ; Thu, 6 Jan 2011 13:35:14 -0500 From: Josef Bacik To: linux-btrfs@vger.kernel.org Subject: [PATCH] Btrfs-progs: add dedup functionality Date: Thu, 6 Jan 2011 13:25:00 -0500 Message-Id: <1294338300-24259-3-git-send-email-josef@redhat.com> In-Reply-To: <1294338300-24259-1-git-send-email-josef@redhat.com> References: <1294338300-24259-1-git-send-email-josef@redhat.com> X-Scanned-By: MIMEDefang 2.67 on 10.5.11.12 Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Thu, 06 Jan 2011 18:55:20 +0000 (UTC) diff --git a/Makefile b/Makefile index 525676e..b9a51c4 100644 --- a/Makefile +++ b/Makefile @@ -15,10 +15,11 @@ INSTALL= install prefix ?= /usr/local bindir = $(prefix)/bin LIBS=-luuid +GCRYPT_LIBS=`libgcrypt-config --libs` +GCRYPT_CFLAGS=`libgcrypt-config --cflags` progs = btrfsctl mkfs.btrfs btrfs-debug-tree btrfs-show btrfs-vol btrfsck \ - btrfs \ - btrfs-map-logical + btrfs btrfs-map-logical btrfs-dedup # make C=1 to enable sparse ifdef C @@ -50,6 +51,9 @@ btrfs-vol: $(objects) btrfs-vol.o btrfs-show: $(objects) btrfs-show.o gcc $(CFLAGS) -o btrfs-show btrfs-show.o $(objects) $(LDFLAGS) $(LIBS) +btrfs-dedup: $(objects) dedup.o + gcc $(CFLAGS) -o btrfs-dedup dedup.c $(objects) $(LDFLAGS) $(GCRYPT_LIBS) $(GCRYPT_CFLAGS) $(LIBS) + btrfsck: $(objects) btrfsck.o gcc $(CFLAGS) -o btrfsck btrfsck.o $(objects) $(LDFLAGS) $(LIBS) diff --git a/dedup.c b/dedup.c new file mode 100644 index 0000000..b752a6f --- /dev/null +++ b/dedup.c @@ -0,0 +1,654 @@ +/* + * Copyright (C) 2011 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#define _GNU_SOURCE 1 +#define X_OPEN_SOURCE 500 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "rbtree.h" +#include "ioctl.h" + +struct file_entry { + ino_t ino; + unsigned long pos; + unsigned long len; + unsigned long physical; + char *hash; + int entries; + int fd; + struct file_entry *next; + struct rb_node n; +}; + +struct file { + char *name; + ino_t ino; + struct rb_node n; +}; + +static unsigned int digest_len; +static char *digest; +static struct rb_root hash_tree; +static struct rb_root file_tree; +unsigned int buffer_len = 65536; + +static void print_hash(uint64_t *hash) +{ + int i; + + for (i = 0; i < 8; i++) { + printf("%llx", (unsigned long long)hash[i]); + } +} + +static char *hash_buffer(void *buffer, int len) +{ + gcry_md_hash_buffer(GCRY_MD_SHA256, digest, buffer, len); + + return strndup(digest, digest_len); +} + +static struct rb_node *hash_tree_insert(struct file_entry *fe) +{ + struct rb_node **p = &hash_tree.rb_node; + struct rb_node *parent = NULL; + struct file_entry *entry; + + while (*p) { + int cmp; + + parent = *p; + entry = rb_entry(parent, struct file_entry, n); + + cmp = memcmp(fe->hash, entry->hash, digest_len); + + if (cmp < 0) + p = &(*p)->rb_left; + else if (cmp > 0) + p = &(*p)->rb_right; + else + return parent; + } + + rb_link_node(&fe->n, parent, p); + rb_insert_color(&fe->n, &hash_tree); + + return NULL; +} + +#if 0 +static unsigned long logical_to_physical(int fd, unsigned long logical, + unsigned long len) +{ + struct fiemap *map; + struct fiemap_extent *extent; + unsigned long physical = 0; + int size = sizeof(struct fiemap) + sizeof(struct fiemap_extent); + int ret; + + map = malloc(sizeof(struct fiemap) + sizeof(struct fiemap_extent)); + if (!map) + return 0; + + memset(map, 0, size); + map->fm_start = logical; + map->fm_length = len; + map->fm_flags = FIEMAP_FLAG_SYNC; + map->fm_extent_count = 1; + ret = ioctl(fd, FS_IOC_FIEMAP, map); + if (ret) { + fprintf(stderr, "failed to do fiemap %d\n", errno); + return 0; + } + extent = &map->fm_extents[0]; + + physical = extent->fe_physical; + if (extent->fe_logical < logical) + physical += (logical - extent->fe_logical); + free(map); + + return physical; +} +#endif + +static struct rb_node *file_tree_insert(struct file *file) +{ + struct rb_node **p = &file_tree.rb_node; + struct rb_node *parent = NULL; + struct file *entry; + + while (*p) { + parent = *p; + entry = rb_entry(parent, struct file, n); + + if (file->ino < entry->ino) + p = &(*p)->rb_left; + else if (file->ino > entry->ino) + p = &(*p)->rb_right; + else + return parent; + } + + rb_link_node(&file->n, parent, p); + rb_insert_color(&file->n, &file_tree); + + return NULL; +} + +static struct file *file_tree_search(ino_t ino) +{ + struct rb_node *node = file_tree.rb_node; + struct file *entry; + + while (node) { + entry = rb_entry(node, struct file, n); + if (ino < entry->ino) + node = node->rb_left; + else if (ino > entry->ino) + node = node->rb_right; + else + return entry; + } + + return NULL; +} + +static int hash_file(char *filename) +{ + char *buffer; + struct rb_node *node; + struct file *file; + int fd; + ssize_t bytes; + size_t pos = 0; + struct stat st; + ino_t ino; + + buffer = malloc(buffer_len); + if (!buffer) { + fprintf(stderr, "failed to alloc buffer\n"); + return 1; + } + + file = malloc(sizeof(*file)); + if (!file) { + free(buffer); + fprintf(stderr, "failed to alloc file thing\n"); + return 1; + } + + fd = open(filename, O_RDONLY); + if (fd < 0) { + free(buffer); + return 1; + } + + if (fstat(fd, &st) < 0) { + fprintf(stderr, "failed to stat\n"); + free(buffer); + return 1; + } + + ino = st.st_ino; + file->ino = ino; + + node = file_tree_insert(file); + if (node) { + printf("wtf?\n"); + free(file); + free(buffer); + close(fd); + return 0; + } + + file->name = strdup(filename); + + while ((bytes = read(fd, buffer, buffer_len)) > 0) { + struct file_entry *entry = malloc(sizeof(struct file_entry)); + + if (!entry) { + fprintf(stderr, "failed to allocate entry\n"); + bytes = -1; + break; + } + + entry->ino = ino; + entry->pos = pos; + entry->len = bytes; + entry->hash = hash_buffer(buffer, bytes); + if (!entry->hash) { + fprintf(stderr, "failed to allocate hash\n"); + bytes = -1; + break; + } + entry->next = NULL; + entry->entries = 0; + node = hash_tree_insert(entry); + if (node) { + struct file_entry *existing; + + existing = rb_entry(node, struct file_entry, n); + free(entry->hash); + entry->hash = NULL; + entry->next = existing->next; + existing->next = entry; + existing->entries++; + } else { +// entry->physical = logical_to_physical(fd, pos, bytes); + } + pos += bytes; + } + + close(fd); + free(buffer); + + if (bytes < 0) + printf("Bytes negative, error %d\n", errno); + return (bytes < 0); +} + +static void print_stats() +{ + struct rb_node *node; + int total_extents = 0; + int total_duplicates = 0; + + for (node = rb_first(&hash_tree); node; node = rb_next(node)) { + struct file_entry *entry; + entry = rb_entry(node, struct file_entry, n); + total_extents++; + if (entry->entries) + total_duplicates += entry->entries; + } + + printf("Total extents hashed:\t%d\n", total_extents); + printf("Total duplicates:\t%d\n", total_duplicates); + printf("Total space saved:\t%d\n", total_duplicates * buffer_len); + +} + +static void free_hash_tree() +{ + struct rb_node *node; + + while ((node = rb_first(&hash_tree))) { + struct file_entry *entry; + entry = rb_entry(node, struct file_entry, n); + rb_erase(node, &hash_tree); + do { + struct file_entry *temp; + + if (entry->next == NULL) + break; + temp = entry->next; + free(entry->hash); + free(entry); + entry = temp; + } while (1); + + free(entry->hash); + free(entry); + } +} + +static void free_file_tree() +{ + struct rb_node *node; + + while ((node = rb_first(&file_tree))) { + struct file *entry; + entry = rb_entry(node, struct file, n); + rb_erase(node, &file_tree); + free(entry->name); + free(entry); + } +} + +static void verify_match(struct file_entry *fe) +{ + struct file_entry *orig = fe; + struct file *file; + char *buffer; + char *temp; + ssize_t bytes; + int fd; + + buffer = malloc(buffer_len); + if (!buffer) + return; + + temp = malloc(buffer_len); + if (!temp) { + free(buffer); + return; + } + + file = file_tree_search(fe->ino); + if (!file) { + fprintf(stderr, "couldn't find file, weird %lu\n", fe->ino); + goto out; + } + + fd = open(file->name, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "failed to open%s\n", file->name); + goto out; + } + + bytes = pread(fd, buffer, fe->len, fe->pos); + close(fd); + if (bytes < fe->len) { + fprintf(stderr, "short read\n"); + goto out; + } + + while (fe->next != NULL) { + struct file_entry *prev = fe; + + fe = fe->next; + file = file_tree_search(fe->ino); + if (!file) { + fprintf(stderr, "couldn't find file, weird\n"); + prev->next = fe->next; + free(fe->hash); + free(fe); + orig->entries--; + fe = prev; + continue; + } + + fd = open(file->name, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "couldn't open %s\n", file->name); + prev->next = fe->next; + free(fe->hash); + free(fe); + orig->entries--; + fe = prev; + continue; + } + + bytes = pread(fd, temp, fe->len, fe->pos); + close(fd); + if (bytes < fe->len) { + fprintf(stderr, "short read\n"); + prev->next = fe->next; + free(fe->hash); + free(fe); + orig->entries--; + fe = prev; + continue; + } + + if (memcmp(buffer, temp, fe->len)) { + fprintf(stderr, "manual compare doesn't match: %s\n", + file->name); + prev->next = fe->next; + free(fe->hash); + free(fe); + orig->entries--; + fe = prev; + continue; + } + } + + if (!orig->entries) { + rb_erase(&orig->n, &hash_tree); + free(orig->hash); + free(orig); + } +out: + free(buffer); + free(temp); +} + +static void prune_entries() +{ + struct rb_node *node; + + node = rb_first(&hash_tree); + while (node) { + struct file_entry *entry; + entry = rb_entry(node, struct file_entry, n); + if (entry->entries) { + node = rb_next(node); + verify_match(entry); + continue; + } + + node = rb_next(node); + rb_erase(&entry->n, &hash_tree); + free(entry->hash); + free(entry); + } +} + +static void print_hashes() +{ + struct rb_node *hash_node; + + for (hash_node = rb_first(&hash_tree); hash_node; + hash_node = rb_next(hash_node)) { + struct file_entry *fe; + struct file *file; + + fe = rb_entry(hash_node, struct file_entry, n); + + file = file_tree_search(fe->ino); + if (!file) { + fprintf(stderr, "couldnt find file, weird %lu\n", fe->ino); + break; + } + + print_hash((uint64_t *)fe->hash); + printf("\n"); + printf("\t%s: pos=%lu, phys=%lu, len=%lu\n", file->name, + fe->pos, fe->physical, fe->len); + + while (fe->next != NULL) { + fe = fe->next; + file = file_tree_search(fe->ino); + if (!file) { + fprintf(stderr, "couldnt find file, weird\n"); + continue; + } + + printf("\t%s: pos=%lu, len=%lu\n", file->name, + fe->pos, fe->len); + } + } +} + + +static void do_dedup() +{ + struct rb_node *node; + struct btrfs_ioctl_same_args *args; + struct btrfs_ioctl_file_extent_info *info; + size_t size; + int allocated_entries = 1; + + size = sizeof(*args) + sizeof(*info); + args = malloc(size); + if (!args) { + fprintf(stderr, "error allocating ioctl arguments\n"); + return; + } + + memset(args, 0, size); + + for (node = rb_first(&hash_tree); node; node = rb_next(node)) { + struct file_entry *fe, *orig; + struct file *file; + int fd; + int ret; + + orig = fe = rb_entry(node, struct file_entry, n); + + file = file_tree_search(fe->ino); + if (!file) { + fprintf(stderr, "couldnt find file, weird %lu\n", fe->ino); + break; + } + + fd = open(file->name, O_WRONLY); + if (fd < 0) { + fprintf(stderr, "error opening file (%s) for dedup: %s\n", + file->name, strerror(errno)); + continue; + } + + if (fe->entries > allocated_entries) { + allocated_entries = fe->entries; + size = sizeof(*args) + + (sizeof(*info) * allocated_entries); + args = realloc(args, size); + if (!args) { + fprintf(stderr, "error allocating ioctl " + "arguments\n"); + return; + } + memset(args, 0, size); + } + + args->total_files = 0; + args->logical_offset = fe->pos; + args->length = fe->len; + + info = &args->info[0]; + while (fe->next != NULL) { + fe = fe->next; + file = file_tree_search(fe->ino); + if (!file) { + fprintf(stderr, "couldnt find file, weird\n"); + continue; + } + + fe->fd = info->fd = open(file->name, O_WRONLY); + if (info->fd < 0) { + fprintf(stderr, "error opening file (%s) for " + "dedup: %s\n", file->name, + strerror(errno)); + continue; + } + info->logical_offset = fe->pos; + info++; + args->total_files++; + } + + if (args->total_files == 0) { + close(fd); + continue; + } + + ret = ioctl(fd, BTRFS_IOC_FILE_EXTENT_SAME, args); + if (ret) + fprintf(stderr, "failed to do extent same %d\n", + errno); + + fe = orig; + while (fe->next != NULL) { + fe = fe->next; + if (fe->fd >= 0) + close(fe->fd); + } + close(fd); + } +} + +static void scan_dir(char *dirname) +{ + DIR *dir; + struct dirent *dirent; + + dir = opendir(dirname); + if (!dir) { + fprintf(stderr, "failed to open dir %s: %d\n", dirname, errno); + return; + } + + while (((dirent = readdir(dir)) != NULL)) { + char name[PATH_MAX]; + if (dirent->d_type == DT_DIR) { + if (dirent->d_name[0] == '.') + continue; + snprintf(name, PATH_MAX, "%s/%s", dirname, + dirent->d_name); + scan_dir(name); + } else if (dirent->d_type == DT_REG) { + snprintf(name, PATH_MAX, "%s/%s", dirname, + dirent->d_name); + hash_file(name); + } + } + + closedir(dir); +} + +int main(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "dedup \n"); + exit(1); + } + + if (!gcry_check_version(GCRYPT_VERSION)) { + fprintf(stderr, "libgcrypt version mismatch\n"); + exit(1); + } + + gcry_control(GCRYCTL_DISABLE_SECMEM, 0); + gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0); + + digest_len = gcry_md_get_algo_dlen(GCRY_MD_SHA256); + digest = malloc(digest_len); + if (!digest) { + fprintf(stderr, "failed to alloc digest\n"); + exit(1); + } + + hash_tree = RB_ROOT; + file_tree = RB_ROOT; + + scan_dir(argv[1]); + + print_stats(); + + prune_entries(); + + print_hashes(); + + do_dedup(); + + free_hash_tree(); + free_file_tree(); + free(digest); + + return 0; +} diff --git a/ioctl.h b/ioctl.h index 4ace64f..c4c4858 100644 --- a/ioctl.h +++ b/ioctl.h @@ -138,6 +138,20 @@ struct btrfs_ioctl_disk_info_args { __u64 devices[0]; }; +#define BTRFS_SAME_EXTENT_HASH_SHA256 1 + +struct btrfs_ioctl_file_extent_info { + __s64 fd; + __u64 logical_offset; +}; + +struct btrfs_ioctl_same_args { + __u64 logical_offset; + __u64 length; + __u64 total_files; + struct btrfs_ioctl_file_extent_info info[0]; +}; + #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ @@ -177,4 +191,6 @@ struct btrfs_ioctl_disk_info_args { struct btrfs_ioctl_space_args) #define BTRFS_IOC_DISK_INFO _IOWR(BTRFS_IOCTL_MAGIC, 21, \ struct btrfs_ioctl_disk_info_args) +#define BTRFS_IOC_FILE_EXTENT_SAME _IOW(BTRFS_IOCTL_MAGIC, 24, \ + struct btrfs_ioctl_same_args) #endif