diff mbox

[2/2,RESEND] btrfs: introduce shrinker for rb_tree that keeps valid btrfs_devices

Message ID 1421311988-11976-2-git-send-email-guihc.fnst@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gui Hecheng Jan. 15, 2015, 8:53 a.m. UTC
The following patch:
	btrfs: remove empty fs_devices to prevent memory runout

introduces @valid_dev_root aiming at recording @btrfs_device objects that
have corresponding block devices with btrfs.
But if a block device is broken or unplugged, no one tells the
@valid_dev_root to cleanup the "dead" objects.

To recycle the memory occuppied by those "dead"s, we could rely on
the shrinker. The shrinker's scan function will traverse the
@valid_dev_root and trys to open the devices one by one, if it fails
or encounters a non-btrfs it will remove the "dead" @btrfs_device.

A special case to deal with is that a block device is unplugged and
replugged, then it appears with a new @bdev->bd_dev as devnum.
In this case, we should remove the older since we should have a new
one for that block device already.

Signed-off-by: Gui Hecheng <guihc.fnst@cn.fujitsu.com>
---
 fs/btrfs/super.c   | 10 ++++++++
 fs/btrfs/volumes.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/btrfs/volumes.h |  4 +++
 3 files changed, 87 insertions(+), 1 deletion(-)

Comments

David Sterba Jan. 23, 2015, 6:10 p.m. UTC | #1
On Thu, Jan 15, 2015 at 04:53:08PM +0800, Gui Hecheng wrote:
> The following patch:
> 	btrfs: remove empty fs_devices to prevent memory runout
> 
> introduces @valid_dev_root aiming at recording @btrfs_device objects that
> have corresponding block devices with btrfs.
> But if a block device is broken or unplugged, no one tells the
> @valid_dev_root to cleanup the "dead" objects.
> 
> To recycle the memory occuppied by those "dead"s, we could rely on
> the shrinker. The shrinker's scan function will traverse the
> @valid_dev_root and trys to open the devices one by one, if it fails
> or encounters a non-btrfs it will remove the "dead" @btrfs_device.

I don't see why shrinker is used here.

linux.git/linux/shrinker.h:

"A callback you can register to apply pressure to ageable caches."

How is guaranteed that it will take action at the right time?
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Anand Jain Jan. 26, 2015, 9:44 a.m. UTC | #2
I think we won't need this patch. the coming sysfs changes will
have entry point to handle missing devices/FSID. (inspired by md).
That will be much cleaner to trigger the clean up based on the
device FS changes.
The proposed fix in this Patch, can it handle things like when
customer decides to overwrite btrfs device with ext FS. ? would
that still leave some stale fs_devices. ?

Thanks, Anand.


On 01/24/2015 02:10 AM, David Sterba wrote:
> On Thu, Jan 15, 2015 at 04:53:08PM +0800, Gui Hecheng wrote:
>> The following patch:
>> 	btrfs: remove empty fs_devices to prevent memory runout
>>
>> introduces @valid_dev_root aiming at recording @btrfs_device objects that
>> have corresponding block devices with btrfs.
>> But if a block device is broken or unplugged, no one tells the
>> @valid_dev_root to cleanup the "dead" objects.
>>
>> To recycle the memory occuppied by those "dead"s, we could rely on
>> the shrinker. The shrinker's scan function will traverse the
>> @valid_dev_root and trys to open the devices one by one, if it fails
>> or encounters a non-btrfs it will remove the "dead" @btrfs_device.
>
> I don't see why shrinker is used here.
>
> linux.git/linux/shrinker.h:
>
> "A callback you can register to apply pressure to ageable caches."
>
> How is guaranteed that it will take action at the right time?
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 001cba5..022381e 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2017,6 +2017,12 @@  static struct miscdevice btrfs_misc = {
 	.fops		= &btrfs_ctl_fops
 };
 
+static struct shrinker btrfs_valid_dev_shrinker = {
+	.scan_objects = btrfs_valid_dev_scan,
+	.count_objects = btrfs_valid_dev_count,
+	.seeks = DEFAULT_SEEKS,
+};
+
 MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
 MODULE_ALIAS("devname:btrfs-control");
 
@@ -2130,6 +2136,8 @@  static int __init init_btrfs_fs(void)
 
 	btrfs_init_lockdep();
 
+	register_shrinker(&btrfs_valid_dev_shrinker);
+
 	btrfs_print_info();
 
 	err = btrfs_run_sanity_tests();
@@ -2143,6 +2151,7 @@  static int __init init_btrfs_fs(void)
 	return 0;
 
 unregister_ioctl:
+	unregister_shrinker(&btrfs_valid_dev_shrinker);
 	btrfs_interface_exit();
 free_end_io_wq:
 	btrfs_end_io_wq_exit();
@@ -2183,6 +2192,7 @@  static void __exit exit_btrfs_fs(void)
 	btrfs_interface_exit();
 	btrfs_end_io_wq_exit();
 	unregister_filesystem(&btrfs_fs_type);
+	unregister_shrinker(&btrfs_valid_dev_shrinker);
 	btrfs_exit_sysfs();
 	btrfs_cleanup_valid_dev_root();
 	btrfs_cleanup_fs_uuids();
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 228a7e0..5462557 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -54,6 +54,7 @@  static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
 DEFINE_MUTEX(uuid_mutex);
 static LIST_HEAD(fs_uuids);
 static struct rb_root valid_dev_root = RB_ROOT;
+static atomic_long_t unopened_dev_count = ATOMIC_LONG_INIT(0);
 
 static struct btrfs_device *insert_valid_device(struct btrfs_device *new_dev)
 {
@@ -130,6 +131,8 @@  static void free_invalid_device(struct btrfs_device *invalid_dev)
 {
 	struct btrfs_fs_devices *old_fs;
 
+	atomic_long_dec(&unopened_dev_count);
+
 	old_fs = invalid_dev->fs_devices;
 	mutex_lock(&old_fs->device_list_mutex);
 	list_del(&invalid_dev->dev_list);
@@ -605,6 +608,7 @@  static noinline int device_list_add(const char *path,
 		list_add_rcu(&device->dev_list, &fs_devices->devices);
 		fs_devices->num_devices++;
 		mutex_unlock(&fs_devices->device_list_mutex);
+		atomic_long_inc(&unopened_dev_count);
 
 		ret = 1;
 		device->fs_devices = fs_devices;
@@ -778,6 +782,7 @@  again:
 			blkdev_put(device->bdev, device->mode);
 			device->bdev = NULL;
 			fs_devices->open_devices--;
+			atomic_long_inc(&unopened_dev_count);
 		}
 		if (device->writeable) {
 			list_del_init(&device->dev_alloc_list);
@@ -840,8 +845,10 @@  static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 		struct btrfs_device *new_device;
 		struct rcu_string *name;
 
-		if (device->bdev)
+		if (device->bdev) {
 			fs_devices->open_devices--;
+			atomic_long_inc(&unopened_dev_count);
+		}
 
 		if (device->writeable &&
 		    device->devid != BTRFS_DEV_REPLACE_DEVID) {
@@ -971,6 +978,7 @@  static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 			fs_devices->rotating = 1;
 
 		fs_devices->open_devices++;
+		atomic_long_dec(&unopened_dev_count);
 		if (device->writeable &&
 		    device->devid != BTRFS_DEV_REPLACE_DEVID) {
 			fs_devices->rw_devices++;
@@ -6848,3 +6856,67 @@  void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
 	}
 	unlock_chunks(root);
 }
+
+static unsigned long shrink_valid_dev_root(void)
+{
+	struct rb_node *n;
+	struct btrfs_device *device;
+	struct block_device *bdev;
+	struct buffer_head *bh;
+	unsigned long freed = 0;
+	unsigned long possible_deads;
+	int ret = 0;
+	dev_t cur_devnum;
+
+	mutex_lock(&uuid_mutex);
+
+	possible_deads = atomic_long_read(&unopened_dev_count);
+	if (!possible_deads)
+		goto out;
+
+	for (n = rb_first(&valid_dev_root); n ; n = rb_next(n)) {
+		device = rb_entry(n, struct btrfs_device, rb_node);
+
+		if (device->bdev)
+			continue;
+		if (!device->name)
+			continue;
+
+		ret = btrfs_get_bdev_and_sb(device->name->str, FMODE_READ,
+						NULL, 0, &bdev, &bh);
+		/* can't open as btrfs, not valid, drop it */
+		if (ret)
+			goto shrink;
+
+		cur_devnum = bdev->bd_dev;
+
+		brelse(bh);
+		blkdev_put(bdev, FMODE_READ);
+
+		if (device->devnum == cur_devnum)
+			continue;
+		/* bdev->bd_dev changed, not valid, drop it */
+
+shrink:
+		rb_erase(n, &valid_dev_root);
+		free_invalid_device(device);
+
+		freed++;
+	}
+
+out:
+	mutex_unlock(&uuid_mutex);
+	return freed;
+}
+
+unsigned long btrfs_valid_dev_scan(struct shrinker *shrink,
+				   struct shrink_control *sc)
+{
+	return shrink_valid_dev_root();
+}
+
+unsigned long btrfs_valid_dev_count(struct shrinker *shrink,
+				    struct shrink_control *sc)
+{
+	return atomic_long_read(&unopened_dev_count);
+}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 7f5c7ea..49f4fff 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -432,6 +432,10 @@  struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
 					const u8 *uuid);
 int btrfs_rm_device(struct btrfs_root *root, char *device_path);
 void btrfs_cleanup_valid_dev_root(void);
+unsigned long btrfs_valid_dev_scan(struct shrinker *shrink,
+				   struct shrink_control *sc);
+unsigned long btrfs_valid_dev_count(struct shrinker *shrink,
+				    struct shrink_control *sc);
 void btrfs_cleanup_fs_uuids(void);
 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
 int btrfs_grow_device(struct btrfs_trans_handle *trans,