@@ -1193,7 +1193,7 @@ xfs_reclaim_inode(
*
* Return the number of inodes freed.
*/
-STATIC int
+int
xfs_reclaim_inodes_ag(
struct xfs_mount *mp,
int flags,
@@ -1297,40 +1297,196 @@ xfs_reclaim_inodes_ag(
return freed;
}
-void
-xfs_reclaim_inodes(
- struct xfs_mount *mp)
+enum lru_status
+xfs_inode_reclaim_isolate(
+ struct list_head *item,
+ struct list_lru_one *lru,
+ spinlock_t *lru_lock,
+ void *arg)
{
- xfs_reclaim_inodes_ag(mp, SYNC_WAIT, INT_MAX);
+ struct xfs_ireclaim_args *ra = arg;
+ struct inode *inode = container_of(item, struct inode, i_lru);
+ struct xfs_inode *ip = XFS_I(inode);
+ enum lru_status ret;
+ xfs_lsn_t lsn = 0;
+
+ /* Careful: inversion of iflags_lock and everything else here */
+ if (!spin_trylock(&ip->i_flags_lock))
+ return LRU_SKIP;
+
+ /* if we are in shutdown, we'll reclaim it even if dirty */
+ ret = LRU_ROTATE;
+ if (!xfs_inode_clean(ip) && !__xfs_iflags_test(ip, XFS_ISTALE) &&
+ !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ lsn = ip->i_itemp->ili_item.li_lsn;
+ ra->dirty_skipped++;
+ goto out_unlock_flags;
+ }
+
+ ret = LRU_SKIP;
+ if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
+ goto out_unlock_flags;
+
+ if (!__xfs_iflock_nowait(ip)) {
+ lsn = ip->i_itemp->ili_item.li_lsn;
+ ra->dirty_skipped++;
+ goto out_unlock_inode;
+ }
+
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ goto reclaim;
+
+ /*
+ * Now the inode is locked, we can actually determine if it is dirty
+ * without racing with anything.
+ */
+ ret = LRU_ROTATE;
+ if (xfs_ipincount(ip)) {
+ ra->dirty_skipped++;
+ goto out_ifunlock;
+ }
+ if (!xfs_inode_clean(ip) && !__xfs_iflags_test(ip, XFS_ISTALE)) {
+ lsn = ip->i_itemp->ili_item.li_lsn;
+ ra->dirty_skipped++;
+ goto out_ifunlock;
+ }
+
+reclaim:
+ /*
+ * Once we mark the inode with XFS_IRECLAIM, no-one will grab it again.
+ * RCU lookups will still find the inode, but they'll stop when they set
+ * the IRECLAIM flag. Hence we can leave the inode locked as we move it
+ * to the dispose list so we can deal with shutdown cleanup there
+ * outside the LRU lock context.
+ */
+ __xfs_iflags_set(ip, XFS_IRECLAIM);
+ list_lru_isolate_move(lru, &inode->i_lru, &ra->freeable);
+ spin_unlock(&ip->i_flags_lock);
+ return LRU_REMOVED;
+
+out_ifunlock:
+ xfs_ifunlock(ip);
+out_unlock_inode:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_unlock_flags:
+ spin_unlock(&ip->i_flags_lock);
+
+ if (lsn && XFS_LSN_CMP(lsn, ra->lowest_lsn) < 0)
+ ra->lowest_lsn = lsn;
+ return ret;
}
-/*
- * Scan a certain number of inodes for reclaim.
- *
- * When called we make sure that there is a background (fast) inode reclaim in
- * progress, while we will throttle the speed of reclaim via doing synchronous
- * reclaim of inodes. That means if we come across dirty inodes, we wait for
- * them to be cleaned, which we hope will not be very long due to the
- * background walker having already kicked the IO off on those dirty inodes.
- */
-long
-xfs_reclaim_inodes_nr(
- struct xfs_mount *mp,
- int nr_to_scan)
+static void
+xfs_dispose_inode(
+ struct xfs_inode *ip)
{
- int sync_mode = 0;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_perag *pag;
+ xfs_ino_t ino;
+
+ ASSERT(xfs_isiflocked(ip));
+ ASSERT(xfs_inode_clean(ip) || xfs_iflags_test(ip, XFS_ISTALE) ||
+ XFS_FORCED_SHUTDOWN(mp));
+ ASSERT(ip->i_ino != 0);
/*
- * For kswapd, we kick background inode writeback. For direct
- * reclaim, we issue and wait on inode writeback to throttle
- * reclaim rates and avoid shouty OOM-death.
+ * Process the shutdown reclaim work we deferred from the LRU isolation
+ * callback before we go any further.
*/
- if (current_is_kswapd())
- xfs_ail_push_all(mp->m_ail);
- else
- sync_mode |= SYNC_WAIT;
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ xfs_iunpin_wait(ip);
+ xfs_iflush_abort(ip, false);
+ } else {
+ xfs_ifunlock(ip);
+ }
- return xfs_reclaim_inodes_ag(mp, sync_mode, nr_to_scan);
+ /*
+ * Because we use RCU freeing we need to ensure the inode always appears
+ * to be reclaimed with an invalid inode number when in the free state.
+ * We do this as early as possible under the ILOCK so that
+ * xfs_iflush_cluster() and xfs_ifree_cluster() can be guaranteed to
+ * detect races with us here. By doing this, we guarantee that once
+ * xfs_iflush_cluster() or xfs_ifree_cluster() has locked XFS_ILOCK that
+ * it will see either a valid inode that will serialise correctly, or it
+ * will see an invalid inode that it can skip.
+ */
+ spin_lock(&ip->i_flags_lock);
+ ino = ip->i_ino; /* for radix_tree_delete */
+ ip->i_flags = XFS_IRECLAIM;
+ ip->i_ino = 0;
+ spin_unlock(&ip->i_flags_lock);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ XFS_STATS_INC(mp, xs_ig_reclaims);
+ /*
+ * Remove the inode from the per-AG radix tree.
+ *
+ * Because radix_tree_delete won't complain even if the item was never
+ * added to the tree assert that it's been there before to catch
+ * problems with the inode life time early on.
+ */
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
+ spin_lock(&pag->pag_ici_lock);
+ if (!radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino)))
+ ASSERT(0);
+ spin_unlock(&pag->pag_ici_lock);
+ xfs_perag_put(pag);
+
+ /*
+ * Here we do an (almost) spurious inode lock in order to coordinate
+ * with inode cache radix tree lookups. This is because the lookup
+ * can reference the inodes in the cache without taking references.
+ *
+ * We make that OK here by ensuring that we wait until the inode is
+ * unlocked after the lookup before we go ahead and free it.
+ *
+ * XXX: need to check this is still true. Not sure it is.
+ */
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_qm_dqdetach(ip);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ __xfs_inode_free(ip);
+}
+
+void
+xfs_dispose_inodes(
+ struct list_head *freeable)
+{
+ while (!list_empty(freeable)) {
+ struct inode *inode;
+
+ inode = list_first_entry(freeable, struct inode, i_lru);
+ list_del_init(&inode->i_lru);
+
+ xfs_dispose_inode(XFS_I(inode));
+ cond_resched();
+ }
+}
+void
+xfs_reclaim_inodes(
+ struct xfs_mount *mp)
+{
+ while (list_lru_count(&mp->m_inode_lru)) {
+ struct xfs_ireclaim_args ra;
+ long freed, to_free;
+
+ INIT_LIST_HEAD(&ra.freeable);
+ ra.lowest_lsn = NULLCOMMITLSN;
+ to_free = list_lru_count(&mp->m_inode_lru);
+
+ freed = list_lru_walk(&mp->m_inode_lru, xfs_inode_reclaim_isolate,
+ &ra, to_free);
+ xfs_dispose_inodes(&ra.freeable);
+
+ if (freed == 0) {
+ xfs_log_force(mp, XFS_LOG_SYNC);
+ xfs_ail_push_all(mp->m_ail);
+ } else if (ra.lowest_lsn != NULLCOMMITLSN) {
+ xfs_ail_push_sync(mp->m_ail, ra.lowest_lsn);
+ }
+ cond_resched();
+ }
}
STATIC int
@@ -49,8 +49,16 @@ int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
struct xfs_inode * xfs_inode_alloc(struct xfs_mount *mp, xfs_ino_t ino);
void xfs_inode_free(struct xfs_inode *ip);
+struct xfs_ireclaim_args {
+ struct list_head freeable;
+ xfs_lsn_t lowest_lsn;
+ unsigned long dirty_skipped;
+};
+
+enum lru_status xfs_inode_reclaim_isolate(struct list_head *item,
+ struct list_lru_one *lru, spinlock_t *lru_lock, void *arg);
+void xfs_dispose_inodes(struct list_head *freeable);
void xfs_reclaim_inodes(struct xfs_mount *mp);
-long xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
@@ -263,6 +263,14 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
extern void __xfs_iflock(struct xfs_inode *ip);
+static inline int __xfs_iflock_nowait(struct xfs_inode *ip)
+{
+ if (ip->i_flags & XFS_IFLOCK)
+ return false;
+ ip->i_flags |= XFS_IFLOCK;
+ return true;
+}
+
static inline int xfs_iflock_nowait(struct xfs_inode *ip)
{
return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
@@ -17,6 +17,7 @@
#include "xfs_alloc.h"
#include "xfs_fsops.h"
#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
@@ -1811,23 +1812,56 @@ xfs_fs_mount(
}
static long
-xfs_fs_nr_cached_objects(
+xfs_fs_free_cached_objects(
struct super_block *sb,
struct shrink_control *sc)
{
- /* Paranoia: catch incorrect calls during mount setup or teardown */
- if (WARN_ON_ONCE(!sb->s_fs_info))
- return 0;
+ struct xfs_mount *mp = XFS_M(sb);
+ struct xfs_ireclaim_args ra;
+ long freed;
- return list_lru_shrink_count(&XFS_M(sb)->m_inode_lru, sc);
+ INIT_LIST_HEAD(&ra.freeable);
+ ra.lowest_lsn = NULLCOMMITLSN;
+ ra.dirty_skipped = 0;
+
+ freed = list_lru_shrink_walk(&mp->m_inode_lru, sc,
+ xfs_inode_reclaim_isolate, &ra);
+ xfs_dispose_inodes(&ra.freeable);
+
+ /*
+ * Deal with dirty inodes. We will have the LSN of
+ * the oldest dirty inode in our reclaim args if we skipped any.
+ *
+ * For kswapd, if we skipped too many dirty inodes (i.e. more dirty than
+ * we freed) then we need kswapd to back off once it's scan has been
+ * completed. That way it will have some clean inodes once it comes back
+ * and can make progress, but make sure we have inode cleaning in
+ * progress.
+ *
+ * Direct reclaim will be throttled by the caller as it winds the
+ * priority up. All we need to do is keep pushing on dirty inodes
+ * in the background so when we come back progress will be made.
+ */
+ if (current_is_kswapd() && ra.dirty_skipped >= freed) {
+ if (current->reclaim_state)
+ current->reclaim_state->need_backoff = true;
+ }
+ if (ra.lowest_lsn != NULLCOMMITLSN)
+ xfs_ail_push(mp->m_ail, ra.lowest_lsn);
+
+ return freed;
}
static long
-xfs_fs_free_cached_objects(
+xfs_fs_nr_cached_objects(
struct super_block *sb,
struct shrink_control *sc)
{
- return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
+ /* Paranoia: catch incorrect calls during mount setup or teardown */
+ if (WARN_ON_ONCE(!sb->s_fs_info))
+ return 0;
+
+ return list_lru_shrink_count(&XFS_M(sb)->m_inode_lru, sc);
}
static const struct super_operations xfs_super_operations = {