From patchwork Tue Jul 27 22:00:22 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: bchociej@gmail.com X-Patchwork-Id: 114639 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.4/8.14.3) with ESMTP id o6RM2Z1A003709 for ; Tue, 27 Jul 2010 22:02:36 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752098Ab0G0WBd (ORCPT ); Tue, 27 Jul 2010 18:01:33 -0400 Received: from mail-qy0-f181.google.com ([209.85.216.181]:40722 "EHLO mail-qy0-f181.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752040Ab0G0WB3 (ORCPT ); Tue, 27 Jul 2010 18:01:29 -0400 Received: by qyk8 with SMTP id 8so3003574qyk.19 for ; Tue, 27 Jul 2010 15:01:28 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:received:received:from:to:cc:subject:date :message-id:x-mailer:in-reply-to:references; bh=bVibsc5aTTV6/3HYxLI104kjwSSNhKd/qt09ZpxV0us=; b=EDcQFVWAcuNDBDzgZmQQZmuJiCBEiD0hLieoFmGNAFJsYk9vCWnTjkg8t2BMWpiJji QV2J1TtvIgg1NsoYDdYaRHhYT3o5kqg4yUWC0Besd7bvz/vUpKG5aDLqnfIIhLhg2Ami 0bLNKi9m75EHI5e42yhhfbukVwyIwJqHv57+A= DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=from:to:cc:subject:date:message-id:x-mailer:in-reply-to:references; b=seT2O6j5tvypTW9MsdVhG1ZUM6htmKOF2uXc/PIzRhOrUhz5WOdR2FhrQCWYrxskR8 AvzkKflEVwvXR9AJoUK+6d4vK+VtUbisH2jErjWEC63Jgi/kXo7ezbtg37CGb4Yfq07r eFO9QDl4icA5z8InleQpY3phZ3blaWiJhmPuo= Received: by 10.224.43.163 with SMTP id w35mr7760380qae.92.1280268088214; Tue, 27 Jul 2010 15:01:28 -0700 (PDT) Received: from localhost.localdomain ([32.97.110.65]) by mx.google.com with ESMTPS id b8sm1643830vci.21.2010.07.27.15.01.24 (version=SSLv3 cipher=RC4-MD5); Tue, 27 Jul 2010 15:01:26 -0700 (PDT) From: bchociej@gmail.com To: chris.mason@oracle.com, linux-btrfs@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org, cmm@us.ibm.com, bcchocie@us.ibm.com, mrlupfer@us.ibm.com, crscott@us.ibm.com, linux-kernel@vger.kernel.org Subject: [RFC PATCH 4/5] Btrfs: Add debugfs interface for hot data stats Date: Tue, 27 Jul 2010 17:00:22 -0500 Message-Id: <1280268023-18408-5-git-send-email-bchociej@gmail.com> X-Mailer: git-send-email 1.7.0.4 In-Reply-To: <1280268023-18408-1-git-send-email-bchociej@gmail.com> References: <1280268023-18408-1-git-send-email-bchociej@gmail.com> Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Tue, 27 Jul 2010 22:02:36 +0000 (UTC) diff --git a/fs/btrfs/debugfs.c b/fs/btrfs/debugfs.c new file mode 100644 index 0000000..a0e7bb7 --- /dev/null +++ b/fs/btrfs/debugfs.c @@ -0,0 +1,500 @@ +#include +#include +#include +#include +#include +#include +#include "ctree.h" +#include "hotdata_map.h" +#include "hotdata_hash.h" +#include "debugfs.h" + +/* + * debugfs.c contains the code to interface with the btrfs debugfs. + * The debugfs outputs range- and file-level access frequency + * statistics for each mounted volume. + */ + +static int copy_msg_to_log(struct debugfs_vol_data *data, char *msg, int len) +{ + struct lstring *debugfs_log = data->debugfs_log; + uint new_log_alloc_size; + char *new_log; + + if (len >= data->log_alloc_size - debugfs_log->len) { + /* Not enough room in the log buffer for the new message. */ + /* Allocate a bigger buffer. */ + new_log_alloc_size = data->log_alloc_size + LOG_PAGE_SIZE; + new_log = vmalloc(new_log_alloc_size); + + if (new_log) { + memcpy(new_log, debugfs_log->str, + debugfs_log->len); + memset(new_log + debugfs_log->len, 0, + new_log_alloc_size - debugfs_log->len); + vfree(debugfs_log->str); + debugfs_log->str = new_log; + data->log_alloc_size = new_log_alloc_size; + } else { + WARN_ON(1); + if (data->log_alloc_size - debugfs_log->len) { + #define err_msg "No more memory!\n" + strlcpy(debugfs_log->str + + debugfs_log->len, + err_msg, data->log_alloc_size - + debugfs_log->len); + debugfs_log->len += + min((typeof(debugfs_log->len)) + sizeof(err_msg), + ((typeof(debugfs_log->len)) + data->log_alloc_size - + debugfs_log->len)); + } + return 0; + } + } + + memcpy(debugfs_log->str + debugfs_log->len, + data->log_work_buff, len); + debugfs_log->len += (unsigned long) len; + + return len; +} + +/* Returns the number of bytes written to the log. */ +static int debugfs_log(struct debugfs_vol_data *data, const char *fmt, ...) +{ + struct lstring *debugfs_log = data->debugfs_log; + va_list args; + int len; + + if (debugfs_log->str == NULL) + return -1; + + spin_lock(&data->log_lock); + + va_start(args, fmt); + len = vsnprintf(data->log_work_buff, sizeof(data->log_work_buff), fmt, + args); + va_end(args); + + if (len >= sizeof(data->log_work_buff)) { + #define truncate_msg "The next message has been truncated.\n" + copy_msg_to_log(data, truncate_msg, sizeof(truncate_msg)); + } + + len = copy_msg_to_log(data, data->log_work_buff, len); + spin_unlock(&data->log_lock); + + return len; +} + +/* initialize a log corresponding to a btrfs volume */ +static int debugfs_log_init(struct debugfs_vol_data *data) +{ + int err = 0; + struct lstring *debugfs_log = data->debugfs_log; + + spin_lock(&data->log_lock); + debugfs_log->str = vmalloc(INIT_LOG_ALLOC_SIZE); + + if (debugfs_log->str) { + memset(debugfs_log->str, 0, INIT_LOG_ALLOC_SIZE); + data->log_alloc_size = INIT_LOG_ALLOC_SIZE; + } else { + err = -ENOMEM; + } + + spin_unlock(&data->log_lock); + return err; +} + +/* free a log corresponding to a btrfs volume */ +static void debugfs_log_exit(struct debugfs_vol_data *data) +{ + struct lstring *debugfs_log = data->debugfs_log; + spin_lock(&data->log_lock); + vfree(debugfs_log->str); + debugfs_log->str = NULL; + debugfs_log->len = 0; + spin_unlock(&data->log_lock); +} + +/* fops to override for printing range data */ +static const struct file_operations btrfs_debugfs_range_fops = { + .read = __btrfs_debugfs_range_read, + .open = __btrfs_debugfs_open, +}; + +/* fops to override for printing inode data */ +static const struct file_operations btrfs_debugfs_inode_fops = { + .read = __btrfs_debugfs_inode_read, + .open = __btrfs_debugfs_open, +}; + +/* initialize debugfs for btrfs at module init */ +int btrfs_init_debugfs(void) +{ + debugfs_root_dentry = debugfs_create_dir(DEBUGFS_ROOT_NAME, NULL); + /*init list of debugfs data list */ + INIT_LIST_HEAD(&debugfs_vol_data_list); + /*init lock to list of debugfs data list */ + spin_lock_init(&data_list_lock); + if (!debugfs_root_dentry) + goto debugfs_error; + return 0; + +debugfs_error: + return -EIO; +} + +/* + * on each volume mount, initialize the debugfs dentries and associated + * structures (debugfs_vol_data and debugfs_log) + */ +int btrfs_init_debugfs_volume(const char *uuid, struct super_block *sb) +{ + struct dentry *debugfs_volume_entry = NULL; + struct dentry *debugfs_range_entry = NULL; + struct dentry *debugfs_inode_entry = NULL; + struct debugfs_vol_data *range_data = NULL; + struct debugfs_vol_data *inode_data = NULL; + size_t dev_name_length = strlen(uuid); + char dev[NAME_MAX]; + + if (!debugfs_root_dentry) + goto debugfs_error; + + /* create debugfs folder for this volume by mounted dev name */ + memcpy(dev, uuid + DEV_NAME_CHOP, dev_name_length - + DEV_NAME_CHOP + 1); + debugfs_volume_entry = debugfs_create_dir(dev, debugfs_root_dentry); + + if (!debugfs_volume_entry) + goto debugfs_error; + + /* malloc and initialize debugfs_vol_data for range_data */ + range_data = kmalloc(sizeof(struct debugfs_vol_data), + GFP_KERNEL | GFP_NOFS); + memset(range_data, 0, sizeof(struct debugfs_vol_data)); + range_data->debugfs_log = NULL; + range_data->sb = sb; + spin_lock_init(&range_data->log_lock); + range_data->log_alloc_size = 0; + + /* malloc and initialize debugfs_vol_data for range_data */ + inode_data = kmalloc(sizeof(struct debugfs_vol_data), + GFP_KERNEL | GFP_NOFS); + memset(inode_data, 0, sizeof(struct debugfs_vol_data)); + inode_data->debugfs_log = NULL; + inode_data->sb = sb; + spin_lock_init(&inode_data->log_lock); + inode_data->log_alloc_size = 0; + + /* add debugfs_vol_data for inode data and range data for + * volume to list */ + range_data->de = debugfs_volume_entry; + inode_data->de = debugfs_volume_entry; + spin_lock(&data_list_lock); + list_add(&range_data->node, &debugfs_vol_data_list); + list_add(&inode_data->node, &debugfs_vol_data_list); + spin_unlock(&data_list_lock); + + /* create debugfs range_data file */ + debugfs_range_entry = debugfs_create_file("range_data", + S_IFREG | S_IRUSR | S_IWUSR | + S_IRUGO, + debugfs_volume_entry, + (void *) range_data, + &btrfs_debugfs_range_fops); + if (!debugfs_range_entry) + goto debugfs_error; + + /* create debugfs inode_data file */ + debugfs_inode_entry = debugfs_create_file("inode_data", + S_IFREG | S_IRUSR | S_IWUSR | + S_IRUGO, + debugfs_volume_entry, + (void *) inode_data, + &btrfs_debugfs_inode_fops); + + if (!debugfs_inode_entry) + goto debugfs_error; + + return 0; + +debugfs_error: + + kfree(range_data); + kfree(inode_data); + + return -EIO; +} + +/* find volume mounted (match by superblock) and remove + * debugfs dentry + */ +void btrfs_exit_debugfs_volume(struct super_block *sb) +{ + struct list_head *head; + struct list_head *pos; + struct debugfs_vol_data *data; + spin_lock(&data_list_lock); + head = &debugfs_vol_data_list; + /* must clean up memory assicatied with superblock */ + list_for_each(pos, head) + { + data = list_entry(pos, struct debugfs_vol_data, node); + if (data->sb == sb) { + list_del(pos); + debugfs_remove_recursive(data->de); + kfree(data); + data = NULL; + break; + } + } + spin_unlock(&data_list_lock); +} + +/* clean up memory and remove dentries for debugsfs */ +void btrfs_exit_debugfs(void) +{ + /* first iterate through debugfs_vol_data_list and free memory */ + struct list_head *head; + struct list_head *pos; + struct list_head *cur; + struct debugfs_vol_data *data; + + spin_lock(&data_list_lock); + head = &debugfs_vol_data_list; + list_for_each_safe(pos, cur, head) { + data = list_entry(pos, struct debugfs_vol_data, node); + if (data && pos != head) + kfree(data); + } + spin_unlock(&data_list_lock); + + /* remove all debugfs entries recursively from the root */ + debugfs_remove_recursive(debugfs_root_dentry); +} + +/* debugfs open file override from fops table */ +int __btrfs_debugfs_open(struct inode *inode, struct file *file) +{ + if (inode->i_private) + file->private_data = inode->i_private; + + return 0; +} + +/* debugfs read file override from fops table */ +ssize_t __btrfs_debugfs_range_read(struct file *file, char __user *user, + size_t count, loff_t *ppos) +{ + int err = 0; + struct super_block *sb; + struct btrfs_root *root; + struct btrfs_root *fs_root; + struct hot_inode_item *current_hot_inode; + struct debugfs_vol_data *data; + struct lstring *debugfs_log; + + data = (struct debugfs_vol_data *) file->private_data; + sb = data->sb; + root = btrfs_sb(sb); + fs_root = (struct btrfs_root *) root->fs_info->fs_root; + + if (!data->debugfs_log) { + /* initialize debugfs log corresponding to this volume*/ + debugfs_log = kmalloc(sizeof(struct lstring), + GFP_KERNEL | GFP_NOFS); + debugfs_log->str = NULL, + debugfs_log->len = 0; + data->debugfs_log = debugfs_log; + debugfs_log_init(data); + } + + if ((unsigned long) *ppos > 0) { + /* caller is continuing a previous read, don't walk tree */ + if ((unsigned long) *ppos >= data->debugfs_log->len) + goto clean_up; + + goto print_to_user; + } + + /* walk the inode tree */ + + current_hot_inode = find_next_hot_inode(fs_root, 0); + + while (current_hot_inode) { + /* walk ranges, print data to debugfs log */ + __walk_range_tree(current_hot_inode, data); + + free_hot_inode_item(current_hot_inode); + current_hot_inode = find_next_hot_inode(fs_root, + (u64) current_hot_inode->i_ino + 1); + } + +print_to_user: + + if (data->debugfs_log->len) { + err = simple_read_from_buffer(user, count, ppos, + data->debugfs_log->str, + data->debugfs_log->len); + } + + return err; + +clean_up: + + /* reader has finished the file */ + /* clean up */ + + debugfs_log_exit(data); + kfree(data->debugfs_log); + data->debugfs_log = NULL; + + return 0; +} + +/* debugfs read file override from fops table */ +ssize_t __btrfs_debugfs_inode_read(struct file *file, char __user *user, + size_t count, loff_t *ppos) +{ + int err = 0; + struct super_block *sb; + struct btrfs_root *root; + struct btrfs_root *fs_root; + struct hot_inode_item *current_hot_inode; + struct debugfs_vol_data *data; + struct lstring *debugfs_log; + + data = (struct debugfs_vol_data *) file->private_data; + sb = data->sb; + root = btrfs_sb(sb); + fs_root = (struct btrfs_root *) root->fs_info->fs_root; + + if (!data->debugfs_log) { + /* initialize debugfs log corresponding to this volume */ + debugfs_log = kmalloc(sizeof(struct lstring), + GFP_KERNEL | GFP_NOFS); + debugfs_log->str = NULL, + debugfs_log->len = 0; + data->debugfs_log = debugfs_log; + debugfs_log_init(data); + } + + if ((unsigned long) *ppos > 0) { + /* caller is continuing a previous read, don't walk tree */ + if ((unsigned long) *ppos >= data->debugfs_log->len) + goto clean_up; + + goto print_to_user; + } + + /* walk the inode tree */ + + current_hot_inode = find_next_hot_inode(fs_root, 0); + + while (current_hot_inode) { + /* walk ranges, print data to debugfs log */ + __print_inode_freq_data(current_hot_inode, data); + + free_hot_inode_item(current_hot_inode); + current_hot_inode = find_next_hot_inode(fs_root, + (u64) current_hot_inode->i_ino + 1); + } + +print_to_user: + + if (data->debugfs_log->len) { + err = simple_read_from_buffer(user, count, ppos, + data->debugfs_log->str, + data->debugfs_log->len); + } + + return err; + +clean_up: + + /* reader has finished the file */ + /* clean up */ + debugfs_log_exit(data); + kfree(data->debugfs_log); + data->debugfs_log = NULL; + + return 0; +} + +/* + * Take the inode, find ranges associated with inode + * and print each range data struct + */ +void __walk_range_tree(struct hot_inode_item *hot_inode, + struct debugfs_vol_data *data) +{ + struct hot_range_tree *inode_range_tree; + struct rb_node *node; + struct hot_range_item *current_range; + + inode_range_tree = &hot_inode->hot_range_tree; + read_lock(&inode_range_tree->lock); + node = rb_first(&inode_range_tree->map); + + /* Walk the hot_range_tree for inode */ + while (node) { + current_range = rb_entry(node, struct hot_range_item, rb_node); + __print_range_freq_data(hot_inode, current_range, data); + node = rb_next(node); + } + read_unlock(&inode_range_tree->lock); +} + +/* Print frequency data for each range to log */ +void __print_range_freq_data(struct hot_inode_item *hot_inode, + struct hot_range_item *hot_range, + struct debugfs_vol_data *data) +{ + struct btrfs_freq_data *freq_data; + int temp; + freq_data = &hot_range->freq_data; + read_lock(&hot_range->heat_node->hlist->rwlock); + temp = hot_range->heat_node->hlist->temperature; + read_unlock(&hot_range->heat_node->hlist->rwlock); + + /* Always lock hot_inode_item first */ + spin_lock(&hot_inode->lock); + spin_lock(&hot_range->lock); + debugfs_log(data, "inode #%lu, range start " + "%llu (range len %llu) reads %u, writes %u, temp %u\n", + hot_inode->i_ino, + hot_range->start, + hot_range->len, + freq_data->nr_reads, + freq_data->nr_writes, + temp); + spin_unlock(&hot_range->lock); + spin_unlock(&hot_inode->lock); +} + +/* Print frequency data for each freq data to log */ +void __print_inode_freq_data(struct hot_inode_item *hot_inode, + struct debugfs_vol_data *data) +{ + struct btrfs_freq_data *freq_data; + int temp; + freq_data = &hot_inode->freq_data; + + read_lock(&hot_inode->heat_node->hlist->rwlock); + temp = hot_inode->heat_node->hlist->temperature; + read_unlock(&hot_inode->heat_node->hlist->rwlock); + + spin_lock(&hot_inode->lock); + debugfs_log(data, "inode #%lu, reads %u, writes %u, temp %u\n", + hot_inode->i_ino, + freq_data->nr_reads, + freq_data->nr_writes, + temp); + spin_unlock(&hot_inode->lock); +} + diff --git a/fs/btrfs/debugfs.h b/fs/btrfs/debugfs.h new file mode 100644 index 0000000..bdd4938 --- /dev/null +++ b/fs/btrfs/debugfs.h @@ -0,0 +1,57 @@ +#ifndef __BTRFS_DEBUGFS__ +#define __BTRFS_DEBUGFS__ + +/* size of log to vmalloc */ +#define INIT_LOG_ALLOC_SIZE (PAGE_SIZE * 10) +#define LOG_PAGE_SIZE (PAGE_SIZE * 10) + +/* number of chars of device name of chop off for making debugfs folder + * e.g. /dev/sda -> sda */ +#define DEV_NAME_CHOP 5 + +/* list to keep track of each mounted volumes debugfs_vol_data */ +static struct list_head debugfs_vol_data_list; +/* lock for debugfs_vol_data_list */ +static spinlock_t data_list_lock; + +/* + * Name for BTRFS data in debugfs directory + * e.g. /sys/kernel/debug/btrfs_data + */ +#define DEBUGFS_ROOT_NAME "btrfs_data" +/* pointer to top level debugfs dentry */ +static struct dentry *debugfs_root_dentry; + +/* log to output to userspace in debugfs files */ +struct lstring { + char *str; + unsigned long len; +}; + +/* + * debugfs_vol_data is a struct of items that is passed to the debugfs + */ +struct debugfs_vol_data { + struct list_head node; /* protected by data_list_lock */ + struct lstring *debugfs_log; + struct super_block *sb; + struct dentry *de; + spinlock_t log_lock; /* protects debugfs_log */ + char log_work_buff[1024]; + uint log_alloc_size; +}; + +ssize_t __btrfs_debugfs_range_read(struct file *file, char __user *user, + size_t size, loff_t *len); +ssize_t __btrfs_debugfs_inode_read(struct file *file, char __user *user, + size_t size, loff_t *len); +int __btrfs_debugfs_open(struct inode *inode, struct file *file); +void __walk_range_tree(struct hot_inode_item *hot_inode, + struct debugfs_vol_data *data); +void __print_range_freq_data(struct hot_inode_item *hot_inode, + struct hot_range_item *hot_range, + struct debugfs_vol_data *data); +void __print_inode_freq_data(struct hot_inode_item *hot_inode, + struct debugfs_vol_data *data); + +#endif