@@ -7,4 +7,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o acl.o free-space-cache.o zlib.o \
- compression.o delayed-ref.o relocation.o
+ compression.o delayed-ref.o relocation.o hotdata_map.o \
+ hotdata_hash.o
+
+btrfs-$(CONFIG_DEBUG_FS) += debugfs.o
@@ -31,6 +31,8 @@
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
+#include "hotdata_map.h"
+#include "hotdata_hash.h"
struct btrfs_trans_handle;
struct btrfs_transaction;
@@ -877,6 +879,7 @@ struct btrfs_fs_info {
struct mutex cleaner_mutex;
struct mutex chunk_mutex;
struct mutex volume_mutex;
+
/*
* this protects the ordered operations list only while we are
* processing all of the entries on it. This way we make
@@ -950,6 +953,7 @@ struct btrfs_fs_info {
struct btrfs_workers endio_meta_write_workers;
struct btrfs_workers endio_write_workers;
struct btrfs_workers submit_workers;
+
/*
* fixup workers take dirty pages that didn't properly go through
* the cow mechanism and make them safe to write. It happens
@@ -958,6 +962,7 @@ struct btrfs_fs_info {
struct btrfs_workers fixup_workers;
struct task_struct *transaction_kthread;
struct task_struct *cleaner_kthread;
+
int thread_pool_size;
struct kobject super_kobj;
@@ -1092,6 +1097,15 @@ struct btrfs_root {
/* red-black tree that keeps track of in-memory inodes */
struct rb_root inode_tree;
+ /* red-black tree that keeps track of fs-wide hot data */
+ struct hot_inode_tree hot_inode_tree;
+
+ /* hash map of inode temperature */
+ struct heat_hashlist_entry heat_inode_hl[HEAT_HASH_SIZE];
+
+ /* hash map of range temperature */
+ struct heat_hashlist_entry heat_range_hl[HEAT_HASH_SIZE];
+
/*
* right now this just gets used so that a root has its own devid
* for stat. It may be used for more later
@@ -1192,6 +1206,8 @@ struct btrfs_root {
#define BTRFS_MOUNT_NOSSD (1 << 9)
#define BTRFS_MOUNT_DISCARD (1 << 10)
#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
+#define BTRFS_MOUNT_HOTDATA_TRACK (1 << 12)
+#define BTRFS_MOUNT_HOTDATA_MOVE (1 << 13)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1211,6 +1227,24 @@ struct btrfs_root {
#define BTRFS_INODE_NODUMP (1 << 8)
#define BTRFS_INODE_NOATIME (1 << 9)
#define BTRFS_INODE_DIRSYNC (1 << 10)
+#define BTRFS_INODE_NO_HOTDATA_TRACK (1 << 11)
+#define BTRFS_INODE_NO_HOTDATA_MOVE (1 << 12)
+
+/* Hot data tracking -- guard macros */
+#define BTRFS_TRACKING_HOT_DATA(btrfs_root) \
+(btrfs_test_opt(btrfs_root, HOTDATA_TRACK))
+
+#define BTRFS_MOVING_HOT_DATA(btrfs_root) \
+((btrfs_test_opt(btrfs_root, HOTDATA_TRACK)) && \
+!(btrfs_root->fs_info->sb->s_flags & MS_RDONLY))
+
+#define BTRFS_TRACK_THIS_INODE(btrfs_inode) \
+((BTRFS_TRACKING_HOT_DATA(btrfs_inode->root)) && \
+!(btrfs_inode->flags & BTRFS_INODE_NO_HOTDATA_TRACK))
+
+#define BTRFS_MOVE_THIS_INODE(btrfs_inode) \
+((BTRFS_MOVING_HOT_DATA(btrfs_inode->root)) && \
+!(btrfs_inode->flags & BTRFS_INODE_NO_HOTDATA_MOVE))
/* some macros to generate set/get funcs for the struct fields. This
* assumes there is a lefoo_to_cpu for every type, so lets make a simple
@@ -2457,6 +2491,14 @@ int btrfs_sysfs_add_root(struct btrfs_root *root);
void btrfs_sysfs_del_root(struct btrfs_root *root);
void btrfs_sysfs_del_super(struct btrfs_fs_info *root);
+#ifdef CONFIG_DEBUG_FS
+/* debugfs.c */
+int btrfs_init_debugfs(void);
+void btrfs_exit_debugfs(void);
+int btrfs_init_debugfs_volume(const char *, struct super_block *);
+void btrfs_exit_debugfs_volume(struct super_block *);
+#endif
+
/* xattr.c */
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
@@ -39,6 +39,7 @@
#include "locking.h"
#include "tree-log.h"
#include "free-space-cache.h"
+#include "hotdata_hash.h"
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
@@ -893,11 +894,32 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
return 0;
}
+static inline void __setup_hotdata(struct btrfs_root *root)
+{
+ int i;
+
+ hot_inode_tree_init(&root->hot_inode_tree);
+
+ memset(&root->heat_inode_hl, 0, sizeof(root->heat_inode_hl));
+ memset(&root->heat_range_hl, 0, sizeof(root->heat_range_hl));
+ for (i = 0; i < HEAT_HASH_SIZE; i++) {
+ INIT_HLIST_HEAD(&root->heat_inode_hl[i].hashhead);
+ INIT_HLIST_HEAD(&root->heat_range_hl[i].hashhead);
+
+ rwlock_init(&root->heat_inode_hl[i].rwlock);
+ rwlock_init(&root->heat_range_hl[i].rwlock);
+
+ root->heat_inode_hl[i].temperature = i;
+ root->heat_range_hl[i].temperature = i;
+ }
+}
+
static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
u32 stripesize, struct btrfs_root *root,
struct btrfs_fs_info *fs_info,
u64 objectid)
{
+
root->node = NULL;
root->commit_root = NULL;
root->sectorsize = sectorsize;
@@ -945,6 +967,10 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
memset(&root->root_kobj, 0, sizeof(root->root_kobj));
+
+ if (BTRFS_TRACKING_HOT_DATA(root))
+ __setup_hotdata(root);
+
root->defrag_trans_start = fs_info->generation;
init_completion(&root->kobj_unregister);
root->defrag_running = 0;
@@ -2324,6 +2350,9 @@ static void free_fs_root(struct btrfs_root *root)
down_write(&root->anon_super.s_umount);
kill_anon_super(&root->anon_super);
}
+
+ free_heat_hashlists(root);
+ free_hot_inode_tree(root);
free_extent_buffer(root->node);
free_extent_buffer(root->commit_root);
kfree(root->name);
@@ -2468,8 +2468,10 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
int ret = 0;
int done = 0;
int nr_to_write_done = 0;
+ int nr_written = 0;
struct pagevec pvec;
int nr_pages;
+ pgoff_t start;
pgoff_t index;
pgoff_t end; /* Inclusive */
int scanned = 0;
@@ -2486,6 +2488,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
range_whole = 1;
scanned = 1;
}
+ start = index << PAGE_CACHE_SHIFT;
retry:
while (!done && !nr_to_write_done && (index <= end) &&
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
@@ -2547,6 +2550,7 @@ retry:
* at any time
*/
nr_to_write_done = wbc->nr_to_write <= 0;
+ nr_written += 1;
}
pagevec_release(&pvec);
cond_resched();
@@ -2560,6 +2564,20 @@ retry:
index = 0;
goto retry;
}
+
+ /*
+ * i_ino = 1 appears to come from metadata operations, ignore
+ * those writes
+ */
+ if (BTRFS_TRACK_THIS_INODE(BTRFS_I(mapping->host)) &&
+ mapping->host->i_ino > 1) {
+ printk(KERN_INFO "btrfs recorded a write %lu, %lu, %lu\n",
+ mapping->host->i_ino, start, nr_written *
+ PAGE_CACHE_SIZE);
+ btrfs_update_freqs(mapping->host, start,
+ nr_written * PAGE_CACHE_SIZE, 1);
+ }
+
return ret;
}
@@ -37,6 +37,7 @@
#include <linux/posix_acl.h>
#include <linux/falloc.h>
#include <linux/slab.h>
+#include <linux/pagevec.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
@@ -50,6 +51,7 @@
#include "tree-log.h"
#include "compression.h"
#include "locking.h"
+#include "hotdata_map.h"
struct btrfs_iget_args {
u64 ino;
@@ -4515,6 +4517,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
if (btrfs_test_opt(root, NODATACOW))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
+ if (!btrfs_test_opt(root, HOTDATA_TRACK))
+ BTRFS_I(inode)->flags |= BTRFS_INODE_NO_HOTDATA_TRACK;
+ if (!btrfs_test_opt(root, HOTDATA_MOVE))
+ BTRFS_I(inode)->flags |= BTRFS_INODE_NO_HOTDATA_MOVE;
}
insert_inode_hash(inode);
@@ -5781,6 +5787,10 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
lockstart = offset;
lockend = offset + count - 1;
+ if (BTRFS_TRACK_THIS_INODE(BTRFS_I(inode)) && count > 0)
+ btrfs_update_freqs(inode, lockstart, (u64) count,
+ writing);
+
if (writing) {
ret = btrfs_delalloc_reserve_space(inode, count);
if (ret)
@@ -5860,7 +5870,15 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
int btrfs_readpage(struct file *file, struct page *page)
{
struct extent_io_tree *tree;
+ u64 start;
+
tree = &BTRFS_I(page->mapping->host)->io_tree;
+ start = (u64) page->index << PAGE_CACHE_SHIFT;
+
+ if (BTRFS_TRACK_THIS_INODE(BTRFS_I(page->mapping->host)))
+ btrfs_update_freqs(page->mapping->host, start,
+ PAGE_CACHE_SIZE, 0);
+
return extent_read_full_page(tree, page, btrfs_get_extent);
}
@@ -5892,7 +5910,16 @@ btrfs_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
struct extent_io_tree *tree;
+ u64 start, len;
+
tree = &BTRFS_I(mapping->host)->io_tree;
+ start = (u64) (list_entry(pages->prev, struct page, lru)->index)
+ << PAGE_CACHE_SHIFT;
+ len = nr_pages * PAGE_CACHE_SIZE;
+
+ if (len > 0 && BTRFS_TRACK_THIS_INODE(BTRFS_I(mapping->host)))
+ btrfs_update_freqs(mapping->host, start, len, 0);
+
return extent_readpages(tree, mapping, pages, nr_pages,
btrfs_get_extent);
}
@@ -51,6 +51,8 @@
#include "version.h"
#include "export.h"
#include "compression.h"
+#include "hotdata_map.h"
+#include "hotdata_hash.h"
static const struct super_operations btrfs_super_ops;
@@ -59,6 +61,9 @@ static void btrfs_put_super(struct super_block *sb)
struct btrfs_root *root = btrfs_sb(sb);
int ret;
+ if (BTRFS_TRACKING_HOT_DATA(root))
+ btrfs_exit_debugfs_volume(sb);
+
ret = close_ctree(root);
sb->s_fs_info = NULL;
}
@@ -68,7 +73,7 @@ enum {
Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
- Opt_discard, Opt_err,
+ Opt_discard, Opt_hotdatatrack, Opt_hotdatamove, Opt_err,
};
static match_table_t tokens = {
@@ -92,6 +97,8 @@ static match_table_t tokens = {
{Opt_flushoncommit, "flushoncommit"},
{Opt_ratio, "metadata_ratio=%d"},
{Opt_discard, "discard"},
+ {Opt_hotdatatrack, "hotdatatrack"},
+ {Opt_hotdatamove, "hotdatamove"},
{Opt_err, NULL},
};
@@ -235,6 +242,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_discard:
btrfs_set_opt(info->mount_opt, DISCARD);
break;
+ case Opt_hotdatamove:
+ printk(KERN_INFO "btrfs: turning on hot data "
+ "migration\n");
+ printk(KERN_INFO " (implies hotdatatrack, "
+ "no ssd_spread)\n");
+ btrfs_set_opt(info->mount_opt, HOTDATA_MOVE);
+ btrfs_clear_opt(info->mount_opt, SSD_SPREAD);
+ case Opt_hotdatatrack:
+ printk(KERN_INFO "btrfs: turning on hot data"
+ " tracking\n");
+ btrfs_set_opt(info->mount_opt, HOTDATA_TRACK);
+ break;
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);
@@ -457,6 +476,7 @@ static int btrfs_fill_super(struct super_block *sb,
printk("btrfs: open_ctree failed\n");
return PTR_ERR(tree_root);
}
+
sb->s_fs_info = tree_root;
disk_super = &tree_root->fs_info->super_copy;
@@ -659,6 +679,9 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
mnt->mnt_sb = s;
mnt->mnt_root = root;
+ if (btrfs_test_opt(btrfs_sb(s), HOTDATA_TRACK))
+ btrfs_init_debugfs_volume(dev_name, s);
+
kfree(subvol_name);
return 0;
@@ -846,18 +869,30 @@ static int __init init_btrfs_fs(void)
if (err)
goto free_sysfs;
- err = extent_io_init();
+ err = btrfs_init_debugfs();
if (err)
goto free_cachep;
+ err = extent_io_init();
+ if (err)
+ goto free_debugfs;
+
err = extent_map_init();
if (err)
goto free_extent_io;
- err = btrfs_interface_init();
+ err = hot_inode_item_init();
if (err)
goto free_extent_map;
+ err = hot_range_item_init();
+ if (err)
+ goto free_hot_inode_item;
+
+ err = btrfs_interface_init();
+ if (err)
+ goto free_hot_range_item;
+
err = register_filesystem(&btrfs_fs_type);
if (err)
goto unregister_ioctl;
@@ -867,10 +902,16 @@ static int __init init_btrfs_fs(void)
unregister_ioctl:
btrfs_interface_exit();
+free_hot_range_item:
+ hot_range_item_exit();
+free_hot_inode_item:
+ hot_inode_item_exit();
free_extent_map:
extent_map_exit();
free_extent_io:
extent_io_exit();
+free_debugfs:
+ btrfs_exit_debugfs();
free_cachep:
btrfs_destroy_cachep();
free_sysfs:
@@ -886,6 +927,7 @@ static void __exit exit_btrfs_fs(void)
btrfs_interface_exit();
unregister_filesystem(&btrfs_fs_type);
btrfs_exit_sysfs();
+ btrfs_exit_debugfs();
btrfs_cleanup_fs_uuids();
btrfs_zlib_exit();
}