diff mbox

[RFC,06/10] fs: Rework i_count

Message ID 20170224162044.267733351@infradead.org (mailing list archive)
State New, archived
Headers show

Commit Message

Peter Zijlstra Feb. 24, 2017, 3:43 p.m. UTC
The inode count: i_count, does not conform to normal reference
counting semantics, its a usage count. That is, objects can and do
live without any references (i_count == 0).

This is because the reference from the inode hash table is not
accounted. This makes that find_inode_fast() can result in 0->1
transitions and similarly, iput() can do 1->0->1 transitions.

This patch changes things to include the inode hash table's reference
and reworks iput() to avoid spurious drops to 0.

It does mean that we now call super_operations::drop_inode() with
non-zero i_count, but I could not find an implementation where this
mattered. Only once we really decide to fully drop the inode and
remove it from the hash will we do the final dec_and_test.

This basically boils down to pushing part of iput() into
atomic_dec_and_lock().

Also, this would allow an RCU based find_inode*().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 fs/btrfs/inode.c   |    2 +-
 fs/inode.c         |   24 +++++++++++++++++++-----
 include/linux/fs.h |   11 ++++++++++-
 3 files changed, 30 insertions(+), 7 deletions(-)

Comments

Al Viro Feb. 24, 2017, 8:49 p.m. UTC | #1
On Fri, Feb 24, 2017 at 04:43:35PM +0100, Peter Zijlstra wrote:

>  {
> -	return atomic_read(&inode->i_count);
> +	int i_count = atomic_read(&inode->i_count);
> +
> +	/*
> +	 * In order to preserve the 'old' usage-count semantics, remove the
> +	 * reference that the hash-table has.

What does it have to do with hashtable, when you are bumping it for _all_
inodes, hashed or not hashed?
diff mbox

Patch

--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3172,7 +3172,7 @@  void btrfs_add_delayed_iput(struct inode
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_inode *binode = BTRFS_I(inode);
 
-	if (atomic_add_unless(&inode->i_count, -1, 1))
+	if (atomic_add_unless(&inode->i_count, -1, 2))
 		return;
 
 	spin_lock(&fs_info->delayed_iput_lock);
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -135,7 +135,7 @@  int inode_init_always(struct super_block
 	inode->i_sb = sb;
 	inode->i_blkbits = sb->s_blocksize_bits;
 	inode->i_flags = 0;
-	atomic_set(&inode->i_count, 1);
+	atomic_set(&inode->i_count, 2); /* hashed and ref */
 	inode->i_op = &empty_iops;
 	inode->i_fop = &no_open_fops;
 	inode->__i_nlink = 1;
@@ -387,7 +387,7 @@  static void init_once(void *foo)
 void __iget(struct inode *inode)
 {
 	lockdep_assert_held(&inode->i_lock);
-	atomic_inc(&inode->i_count);
+	WARN_ON(!atomic_inc_not_zero(&inode->i_count));
 }
 
 /*
@@ -395,7 +395,7 @@  void __iget(struct inode *inode)
  */
 void ihold(struct inode *inode)
 {
-	WARN_ON(atomic_inc_return(&inode->i_count) < 2);
+	WARN_ON(atomic_inc_return(&inode->i_count) < 3);
 }
 EXPORT_SYMBOL(ihold);
 
@@ -814,6 +814,8 @@  static struct inode *find_inode_fast(str
 {
 	struct inode *inode = NULL;
 
+	lockdep_assert_held(&inode_hash_lock);
+
 repeat:
 	hlist_for_each_entry(inode, head, i_hash) {
 		if (inode->i_ino != ino)
@@ -1488,11 +1490,12 @@  void iput(struct inode *inode)
 
 	BUG_ON(inode->i_state & I_CLEAR);
 retry:
-	if (!atomic_dec_and_lock(&inode->i_count, &inode->i_lock))
+	if (atomic_add_unless(&inode->i_count, -1, 2))
 		return;
 
+	spin_lock(&inode->i_lock);
+
 	if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
-		atomic_inc(&inode->i_count);
 		inode->i_state &= ~I_DIRTY_TIME;
 		spin_unlock(&inode->i_lock);
 		trace_writeback_lazytime_iput(inode);
@@ -1500,6 +1503,7 @@  void iput(struct inode *inode)
 		goto retry;
 	}
 
+	atomic_dec(&inode->i_count); /* 2 -> 1 */
 	WARN_ON(inode->i_state & I_NEW);
 
 	/*
@@ -1520,6 +1524,16 @@  void iput(struct inode *inode)
 		spin_unlock(&inode->i_lock);
 		return;
 	}
+
+	/*
+	 * If, at this point, only the hashtable has a reference left
+	 * continue to take the inode out, otherwise someone got a ref
+	 * while we weren't looking.
+	 */
+	if (atomic_cmpxchg(&inode->i_count, 1, 0) != 1) {
+		spin_unlock(&inode->i_lock);
+		return;
+	}
 
 	if (!drop) {
 		inode->i_state |= I_WILL_FREE;
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2713,7 +2713,16 @@  extern unsigned int get_next_ino(void);
 
 static inline int i_count(struct inode *inode)
 {
-	return atomic_read(&inode->i_count);
+	int i_count = atomic_read(&inode->i_count);
+
+	/*
+	 * In order to preserve the 'old' usage-count semantics, remove the
+	 * reference that the hash-table has.
+	 */
+	if (i_count)
+		i_count--;
+
+	return i_count;
 }
 
 extern void __iget(struct inode * inode);