diff mbox

[3/3] ceph: rework trim caps code

Message ID 1375683030-28305-10-git-send-email-zheng.z.yan@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Yan, Zheng Aug. 5, 2013, 6:10 a.m. UTC
From: "Yan, Zheng" <zheng.z.yan@intel.com>

The trim caps code that handles SESSION_RECALL_STATE message has
two issues. First, it uses d_prune_aliases() to prune dentries.
This confuses our 'dir complete' check, because d_prune_aliases()
unhashes dentries before calling our d_prune() callback. Second,
it only prune dentries, inodes with zero reference are still in
the icache unless there is memory pressure.

The fix is adding d_delete() and drop_inode() VFS callback. VFS
calls our d_delete()/drop_inode() callback when a dentry/inode's
last reference is dropped. If true is returned, VFS drop the
dentry/inode from the cache. When handling SESSION_RECALL_STATE
message, we mark inode with CEPH_I_TRIMCAPS flag. Our callbacks
check the flag and session->s_trim_caps to decide if VFS should
drop a dentry/inode. To trim an inode, we just need to reference
dentries associated with the inode, then release them.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/caps.c       | 30 ++++++++++++++++++++++++++++++
 fs/ceph/dir.c        | 12 ++++++++++++
 fs/ceph/inode.c      | 11 +++++++++++
 fs/ceph/mds_client.c | 37 ++++++++++++++++++++++++++++++-------
 fs/ceph/mds_client.h |  4 ++--
 fs/ceph/super.c      |  1 +
 fs/ceph/super.h      |  3 +++
 7 files changed, 89 insertions(+), 9 deletions(-)

Comments

Gregory Farnum Aug. 23, 2013, 8:27 p.m. UTC | #1
Did this patch get dropped on purpose? I also don't see it in our
testing branch.
-Greg
Software Engineer #42 @ http://inktank.com | http://ceph.com


On Sun, Aug 4, 2013 at 11:10 PM, Yan, Zheng <zheng.z.yan@intel.com> wrote:
> From: "Yan, Zheng" <zheng.z.yan@intel.com>
>
> The trim caps code that handles SESSION_RECALL_STATE message has
> two issues. First, it uses d_prune_aliases() to prune dentries.
> This confuses our 'dir complete' check, because d_prune_aliases()
> unhashes dentries before calling our d_prune() callback. Second,
> it only prune dentries, inodes with zero reference are still in
> the icache unless there is memory pressure.
>
> The fix is adding d_delete() and drop_inode() VFS callback. VFS
> calls our d_delete()/drop_inode() callback when a dentry/inode's
> last reference is dropped. If true is returned, VFS drop the
> dentry/inode from the cache. When handling SESSION_RECALL_STATE
> message, we mark inode with CEPH_I_TRIMCAPS flag. Our callbacks
> check the flag and session->s_trim_caps to decide if VFS should
> drop a dentry/inode. To trim an inode, we just need to reference
> dentries associated with the inode, then release them.
>
> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> ---
>  fs/ceph/caps.c       | 30 ++++++++++++++++++++++++++++++
>  fs/ceph/dir.c        | 12 ++++++++++++
>  fs/ceph/inode.c      | 11 +++++++++++
>  fs/ceph/mds_client.c | 37 ++++++++++++++++++++++++++++++-------
>  fs/ceph/mds_client.h |  4 ++--
>  fs/ceph/super.c      |  1 +
>  fs/ceph/super.h      |  3 +++
>  7 files changed, 89 insertions(+), 9 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 49590a0..332dc8d 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -882,6 +882,36 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
>  }
>
>  /*
> + * Used by ceph_d_delete()/ceph_drop_inode(). check if we should drop
> + * the inode.
> + */
> +int ceph_trim_caps(struct ceph_inode_info *ci, int drop_inode)
> +{
> +       struct ceph_cap *cap;
> +       struct rb_node *p;
> +       int ret = 0;
> +
> +       if (!spin_trylock(&ci->i_ceph_lock))
> +               return 0;
> +
> +       if (ci->i_ceph_flags & CEPH_I_TRIMCAPS) {
> +               for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
> +                       cap = rb_entry(p, struct ceph_cap, ci_node);
> +                       if (atomic_read(&cap->session->s_trim_caps) <= 0)
> +                               continue;
> +                       if (drop_inode)
> +                               atomic_dec(&cap->session->s_trim_caps);
> +                       ret = 1;
> +               }
> +               if (!ret)
> +                       ci->i_ceph_flags &= ~CEPH_I_TRIMCAPS;
> +       }
> +
> +       spin_unlock(&ci->i_ceph_lock);
> +       return ret;
> +}
> +
> +/*
>   * called under i_ceph_lock
>   */
>  static int __ceph_is_any_caps(struct ceph_inode_info *ci)
> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> index 0e4da4a..9741f34 100644
> --- a/fs/ceph/dir.c
> +++ b/fs/ceph/dir.c
> @@ -1043,6 +1043,17 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
>         return valid;
>  }
>
> +static int ceph_d_delete(const struct dentry *dentry)
> +{
> +       if (dentry->d_inode) {
> +               struct ceph_inode_info *ci = ceph_inode(dentry->d_inode);
> +               /* need trim caps? */
> +               if (ci->i_ceph_flags & CEPH_I_TRIMCAPS)
> +                       return ceph_trim_caps(ci, false);
> +       }
> +       return 0;
> +}
> +
>  /*
>   * Release our ceph_dentry_info.
>   */
> @@ -1300,6 +1311,7 @@ const struct inode_operations ceph_dir_iops = {
>
>  const struct dentry_operations ceph_dentry_ops = {
>         .d_revalidate = ceph_d_revalidate,
> +       .d_delete = ceph_d_delete,
>         .d_release = ceph_d_release,
>         .d_prune = ceph_d_prune,
>  };
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index 01e4db1..371590a 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -431,6 +431,17 @@ void ceph_destroy_inode(struct inode *inode)
>         call_rcu(&inode->i_rcu, ceph_i_callback);
>  }
>
> +int ceph_drop_inode(struct inode *inode)
> +{
> +       struct ceph_inode_info *ci = ceph_inode(inode);
> +       int ret = generic_drop_inode(inode);
> +
> +       /* need trim caps? */
> +       if (!ret && (ci->i_ceph_flags & CEPH_I_TRIMCAPS))
> +               ret = ceph_trim_caps(ci, true);
> +
> +       return ret ;
> +}
>
>  /*
>   * Helpers to fill in size, ctime, mtime, and atime.  We have to be
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 2a42b5f..7c2b8d6 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -435,7 +435,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
>         s->s_renew_seq = 0;
>         INIT_LIST_HEAD(&s->s_caps);
>         s->s_nr_caps = 0;
> -       s->s_trim_caps = 0;
> +       atomic_set(&s->s_trim_caps, 0);
>         atomic_set(&s->s_ref, 1);
>         INIT_LIST_HEAD(&s->s_waiting);
>         INIT_LIST_HEAD(&s->s_unsafe);
> @@ -1207,9 +1207,10 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
>  {
>         struct ceph_mds_session *session = arg;
>         struct ceph_inode_info *ci = ceph_inode(inode);
> +       struct dentry *dentry;
>         int used, oissued, mine;
>
> -       if (session->s_trim_caps <= 0)
> +       if (atomic_read(&session->s_trim_caps) <= 0)
>                 return -1;
>
>         spin_lock(&ci->i_ceph_lock);
> @@ -1225,16 +1226,39 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
>         if ((used & ~oissued) & mine)
>                 goto out;   /* we need these caps */
>
> -       session->s_trim_caps--;
>         if (oissued) {
>                 /* we aren't the only cap.. just remove us */
>                 __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
>                                     cap->mseq, cap->issue_seq);
>                 __ceph_remove_cap(cap);
> +               atomic_dec(&session->s_trim_caps);
>         } else {
>                 /* try to drop referring dentries */
> +               ci->i_ceph_flags |= CEPH_I_TRIMCAPS;
>                 spin_unlock(&ci->i_ceph_lock);
> -               d_prune_aliases(inode);
> +               /*
> +                * can't use d_prune_aliases(), because it unhashes dentries
> +                * before calling ceph_d_prune().
> +                */
> +restart:
> +               spin_lock(&inode->i_lock);
> +               hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
> +                       spin_lock(&dentry->d_lock);
> +                       if (!dentry->d_count) {
> +                               dget_dlock(dentry);
> +                               spin_unlock(&dentry->d_lock);
> +                               spin_unlock(&inode->i_lock);
> +                               /*
> +                                * our d_delete callback returns true when
> +                                * CEPH_I_TRIMCAPS is set. This makes VFS
> +                                * drop the dentry.
> +                                */
> +                               dput(dentry);
> +                               goto restart;
> +                       }
> +                       spin_unlock(&dentry->d_lock);
> +               }
> +               spin_unlock(&inode->i_lock);
>                 dout("trim_caps_cb %p cap %p  pruned, count now %d\n",
>                      inode, cap, atomic_read(&inode->i_count));
>                 return 0;
> @@ -1257,12 +1281,11 @@ static int trim_caps(struct ceph_mds_client *mdsc,
>         dout("trim_caps mds%d start: %d / %d, trim %d\n",
>              session->s_mds, session->s_nr_caps, max_caps, trim_caps);
>         if (trim_caps > 0) {
> -               session->s_trim_caps = trim_caps;
> +               atomic_set(&session->s_trim_caps, trim_caps);
>                 iterate_session_caps(session, trim_caps_cb, session);
>                 dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
>                      session->s_mds, session->s_nr_caps, max_caps,
> -                       trim_caps - session->s_trim_caps);
> -               session->s_trim_caps = 0;
> +                    trim_caps - atomic_read(&session->s_trim_caps));
>         }
>         return 0;
>  }
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index c2a19fb..d0e6566 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -130,8 +130,8 @@ struct ceph_mds_session {
>         /* protected by s_cap_lock */
>         spinlock_t        s_cap_lock;
>         struct list_head  s_caps;     /* all caps issued by this session */
> -       int               s_nr_caps, s_trim_caps;
> -       int               s_num_cap_releases;
> +       int               s_nr_caps, s_num_cap_releases;
> +       atomic_t          s_trim_caps;
>         struct list_head  s_cap_releases; /* waiting cap_release messages */
>         struct list_head  s_cap_releases_done; /* ready to send */
>         struct ceph_cap  *s_cap_iterator;
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 6627b26..bcf26bf 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -655,6 +655,7 @@ static const struct super_operations ceph_super_ops = {
>         .alloc_inode    = ceph_alloc_inode,
>         .destroy_inode  = ceph_destroy_inode,
>         .write_inode    = ceph_write_inode,
> +       .drop_inode     = ceph_drop_inode,
>         .sync_fs        = ceph_sync_fs,
>         .put_super      = ceph_put_super,
>         .show_options   = ceph_show_options,
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index e81c0b6..b87691a 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -421,6 +421,7 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
>  /*
>   * Ceph inode.
>   */
> +#define CEPH_I_TRIMCAPS  1  /* trim caps when possible */
>  #define CEPH_I_NODELAY   4  /* do not delay cap release */
>  #define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
>  #define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
> @@ -487,6 +488,7 @@ extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented);
>  extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t);
>  extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
>                                     struct ceph_cap *cap);
> +extern int ceph_trim_caps(struct ceph_inode_info *ci, int drop_inode);
>
>  static inline int ceph_caps_issued(struct ceph_inode_info *ci)
>  {
> @@ -675,6 +677,7 @@ extern const struct inode_operations ceph_file_iops;
>
>  extern struct inode *ceph_alloc_inode(struct super_block *sb);
>  extern void ceph_destroy_inode(struct inode *inode);
> +extern int ceph_drop_inode(struct inode *inode);
>
>  extern struct inode *ceph_get_inode(struct super_block *sb,
>                                     struct ceph_vino vino);
> --
> 1.8.1.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sage Weil Aug. 23, 2013, 8:37 p.m. UTC | #2
On Fri, 23 Aug 2013, Gregory Farnum wrote:
> Did this patch get dropped on purpose? I also don't see it in our
> testing branch.

We fixed d_prune to be less lame instead.  That patch is waiting for Al's 
attention on -fsdevel.

sage


> -Greg
> Software Engineer #42 @ http://inktank.com | http://ceph.com
> 
> 
> On Sun, Aug 4, 2013 at 11:10 PM, Yan, Zheng <zheng.z.yan@intel.com> wrote:
> > From: "Yan, Zheng" <zheng.z.yan@intel.com>
> >
> > The trim caps code that handles SESSION_RECALL_STATE message has
> > two issues. First, it uses d_prune_aliases() to prune dentries.
> > This confuses our 'dir complete' check, because d_prune_aliases()
> > unhashes dentries before calling our d_prune() callback. Second,
> > it only prune dentries, inodes with zero reference are still in
> > the icache unless there is memory pressure.
> >
> > The fix is adding d_delete() and drop_inode() VFS callback. VFS
> > calls our d_delete()/drop_inode() callback when a dentry/inode's
> > last reference is dropped. If true is returned, VFS drop the
> > dentry/inode from the cache. When handling SESSION_RECALL_STATE
> > message, we mark inode with CEPH_I_TRIMCAPS flag. Our callbacks
> > check the flag and session->s_trim_caps to decide if VFS should
> > drop a dentry/inode. To trim an inode, we just need to reference
> > dentries associated with the inode, then release them.
> >
> > Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> > ---
> >  fs/ceph/caps.c       | 30 ++++++++++++++++++++++++++++++
> >  fs/ceph/dir.c        | 12 ++++++++++++
> >  fs/ceph/inode.c      | 11 +++++++++++
> >  fs/ceph/mds_client.c | 37 ++++++++++++++++++++++++++++++-------
> >  fs/ceph/mds_client.h |  4 ++--
> >  fs/ceph/super.c      |  1 +
> >  fs/ceph/super.h      |  3 +++
> >  7 files changed, 89 insertions(+), 9 deletions(-)
> >
> > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> > index 49590a0..332dc8d 100644
> > --- a/fs/ceph/caps.c
> > +++ b/fs/ceph/caps.c
> > @@ -882,6 +882,36 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
> >  }
> >
> >  /*
> > + * Used by ceph_d_delete()/ceph_drop_inode(). check if we should drop
> > + * the inode.
> > + */
> > +int ceph_trim_caps(struct ceph_inode_info *ci, int drop_inode)
> > +{
> > +       struct ceph_cap *cap;
> > +       struct rb_node *p;
> > +       int ret = 0;
> > +
> > +       if (!spin_trylock(&ci->i_ceph_lock))
> > +               return 0;
> > +
> > +       if (ci->i_ceph_flags & CEPH_I_TRIMCAPS) {
> > +               for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
> > +                       cap = rb_entry(p, struct ceph_cap, ci_node);
> > +                       if (atomic_read(&cap->session->s_trim_caps) <= 0)
> > +                               continue;
> > +                       if (drop_inode)
> > +                               atomic_dec(&cap->session->s_trim_caps);
> > +                       ret = 1;
> > +               }
> > +               if (!ret)
> > +                       ci->i_ceph_flags &= ~CEPH_I_TRIMCAPS;
> > +       }
> > +
> > +       spin_unlock(&ci->i_ceph_lock);
> > +       return ret;
> > +}
> > +
> > +/*
> >   * called under i_ceph_lock
> >   */
> >  static int __ceph_is_any_caps(struct ceph_inode_info *ci)
> > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > index 0e4da4a..9741f34 100644
> > --- a/fs/ceph/dir.c
> > +++ b/fs/ceph/dir.c
> > @@ -1043,6 +1043,17 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
> >         return valid;
> >  }
> >
> > +static int ceph_d_delete(const struct dentry *dentry)
> > +{
> > +       if (dentry->d_inode) {
> > +               struct ceph_inode_info *ci = ceph_inode(dentry->d_inode);
> > +               /* need trim caps? */
> > +               if (ci->i_ceph_flags & CEPH_I_TRIMCAPS)
> > +                       return ceph_trim_caps(ci, false);
> > +       }
> > +       return 0;
> > +}
> > +
> >  /*
> >   * Release our ceph_dentry_info.
> >   */
> > @@ -1300,6 +1311,7 @@ const struct inode_operations ceph_dir_iops = {
> >
> >  const struct dentry_operations ceph_dentry_ops = {
> >         .d_revalidate = ceph_d_revalidate,
> > +       .d_delete = ceph_d_delete,
> >         .d_release = ceph_d_release,
> >         .d_prune = ceph_d_prune,
> >  };
> > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> > index 01e4db1..371590a 100644
> > --- a/fs/ceph/inode.c
> > +++ b/fs/ceph/inode.c
> > @@ -431,6 +431,17 @@ void ceph_destroy_inode(struct inode *inode)
> >         call_rcu(&inode->i_rcu, ceph_i_callback);
> >  }
> >
> > +int ceph_drop_inode(struct inode *inode)
> > +{
> > +       struct ceph_inode_info *ci = ceph_inode(inode);
> > +       int ret = generic_drop_inode(inode);
> > +
> > +       /* need trim caps? */
> > +       if (!ret && (ci->i_ceph_flags & CEPH_I_TRIMCAPS))
> > +               ret = ceph_trim_caps(ci, true);
> > +
> > +       return ret ;
> > +}
> >
> >  /*
> >   * Helpers to fill in size, ctime, mtime, and atime.  We have to be
> > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > index 2a42b5f..7c2b8d6 100644
> > --- a/fs/ceph/mds_client.c
> > +++ b/fs/ceph/mds_client.c
> > @@ -435,7 +435,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
> >         s->s_renew_seq = 0;
> >         INIT_LIST_HEAD(&s->s_caps);
> >         s->s_nr_caps = 0;
> > -       s->s_trim_caps = 0;
> > +       atomic_set(&s->s_trim_caps, 0);
> >         atomic_set(&s->s_ref, 1);
> >         INIT_LIST_HEAD(&s->s_waiting);
> >         INIT_LIST_HEAD(&s->s_unsafe);
> > @@ -1207,9 +1207,10 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
> >  {
> >         struct ceph_mds_session *session = arg;
> >         struct ceph_inode_info *ci = ceph_inode(inode);
> > +       struct dentry *dentry;
> >         int used, oissued, mine;
> >
> > -       if (session->s_trim_caps <= 0)
> > +       if (atomic_read(&session->s_trim_caps) <= 0)
> >                 return -1;
> >
> >         spin_lock(&ci->i_ceph_lock);
> > @@ -1225,16 +1226,39 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
> >         if ((used & ~oissued) & mine)
> >                 goto out;   /* we need these caps */
> >
> > -       session->s_trim_caps--;
> >         if (oissued) {
> >                 /* we aren't the only cap.. just remove us */
> >                 __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
> >                                     cap->mseq, cap->issue_seq);
> >                 __ceph_remove_cap(cap);
> > +               atomic_dec(&session->s_trim_caps);
> >         } else {
> >                 /* try to drop referring dentries */
> > +               ci->i_ceph_flags |= CEPH_I_TRIMCAPS;
> >                 spin_unlock(&ci->i_ceph_lock);
> > -               d_prune_aliases(inode);
> > +               /*
> > +                * can't use d_prune_aliases(), because it unhashes dentries
> > +                * before calling ceph_d_prune().
> > +                */
> > +restart:
> > +               spin_lock(&inode->i_lock);
> > +               hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
> > +                       spin_lock(&dentry->d_lock);
> > +                       if (!dentry->d_count) {
> > +                               dget_dlock(dentry);
> > +                               spin_unlock(&dentry->d_lock);
> > +                               spin_unlock(&inode->i_lock);
> > +                               /*
> > +                                * our d_delete callback returns true when
> > +                                * CEPH_I_TRIMCAPS is set. This makes VFS
> > +                                * drop the dentry.
> > +                                */
> > +                               dput(dentry);
> > +                               goto restart;
> > +                       }
> > +                       spin_unlock(&dentry->d_lock);
> > +               }
> > +               spin_unlock(&inode->i_lock);
> >                 dout("trim_caps_cb %p cap %p  pruned, count now %d\n",
> >                      inode, cap, atomic_read(&inode->i_count));
> >                 return 0;
> > @@ -1257,12 +1281,11 @@ static int trim_caps(struct ceph_mds_client *mdsc,
> >         dout("trim_caps mds%d start: %d / %d, trim %d\n",
> >              session->s_mds, session->s_nr_caps, max_caps, trim_caps);
> >         if (trim_caps > 0) {
> > -               session->s_trim_caps = trim_caps;
> > +               atomic_set(&session->s_trim_caps, trim_caps);
> >                 iterate_session_caps(session, trim_caps_cb, session);
> >                 dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
> >                      session->s_mds, session->s_nr_caps, max_caps,
> > -                       trim_caps - session->s_trim_caps);
> > -               session->s_trim_caps = 0;
> > +                    trim_caps - atomic_read(&session->s_trim_caps));
> >         }
> >         return 0;
> >  }
> > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> > index c2a19fb..d0e6566 100644
> > --- a/fs/ceph/mds_client.h
> > +++ b/fs/ceph/mds_client.h
> > @@ -130,8 +130,8 @@ struct ceph_mds_session {
> >         /* protected by s_cap_lock */
> >         spinlock_t        s_cap_lock;
> >         struct list_head  s_caps;     /* all caps issued by this session */
> > -       int               s_nr_caps, s_trim_caps;
> > -       int               s_num_cap_releases;
> > +       int               s_nr_caps, s_num_cap_releases;
> > +       atomic_t          s_trim_caps;
> >         struct list_head  s_cap_releases; /* waiting cap_release messages */
> >         struct list_head  s_cap_releases_done; /* ready to send */
> >         struct ceph_cap  *s_cap_iterator;
> > diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> > index 6627b26..bcf26bf 100644
> > --- a/fs/ceph/super.c
> > +++ b/fs/ceph/super.c
> > @@ -655,6 +655,7 @@ static const struct super_operations ceph_super_ops = {
> >         .alloc_inode    = ceph_alloc_inode,
> >         .destroy_inode  = ceph_destroy_inode,
> >         .write_inode    = ceph_write_inode,
> > +       .drop_inode     = ceph_drop_inode,
> >         .sync_fs        = ceph_sync_fs,
> >         .put_super      = ceph_put_super,
> >         .show_options   = ceph_show_options,
> > diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> > index e81c0b6..b87691a 100644
> > --- a/fs/ceph/super.h
> > +++ b/fs/ceph/super.h
> > @@ -421,6 +421,7 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
> >  /*
> >   * Ceph inode.
> >   */
> > +#define CEPH_I_TRIMCAPS  1  /* trim caps when possible */
> >  #define CEPH_I_NODELAY   4  /* do not delay cap release */
> >  #define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
> >  #define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
> > @@ -487,6 +488,7 @@ extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented);
> >  extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t);
> >  extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
> >                                     struct ceph_cap *cap);
> > +extern int ceph_trim_caps(struct ceph_inode_info *ci, int drop_inode);
> >
> >  static inline int ceph_caps_issued(struct ceph_inode_info *ci)
> >  {
> > @@ -675,6 +677,7 @@ extern const struct inode_operations ceph_file_iops;
> >
> >  extern struct inode *ceph_alloc_inode(struct super_block *sb);
> >  extern void ceph_destroy_inode(struct inode *inode);
> > +extern int ceph_drop_inode(struct inode *inode);
> >
> >  extern struct inode *ceph_get_inode(struct super_block *sb,
> >                                     struct ceph_vino vino);
> > --
> > 1.8.1.4
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 49590a0..332dc8d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -882,6 +882,36 @@  int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
 }
 
 /*
+ * Used by ceph_d_delete()/ceph_drop_inode(). check if we should drop
+ * the inode.
+ */
+int ceph_trim_caps(struct ceph_inode_info *ci, int drop_inode)
+{
+	struct ceph_cap *cap;
+	struct rb_node *p;
+	int ret = 0;
+
+	if (!spin_trylock(&ci->i_ceph_lock))
+		return 0;
+
+	if (ci->i_ceph_flags & CEPH_I_TRIMCAPS) {
+		for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+			cap = rb_entry(p, struct ceph_cap, ci_node);
+			if (atomic_read(&cap->session->s_trim_caps) <= 0)
+				continue;
+			if (drop_inode)
+				atomic_dec(&cap->session->s_trim_caps);
+			ret = 1;
+		}
+		if (!ret)
+			ci->i_ceph_flags &= ~CEPH_I_TRIMCAPS;
+	}
+
+	spin_unlock(&ci->i_ceph_lock);
+	return ret;
+}
+
+/*
  * called under i_ceph_lock
  */
 static int __ceph_is_any_caps(struct ceph_inode_info *ci)
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 0e4da4a..9741f34 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1043,6 +1043,17 @@  static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
 	return valid;
 }
 
+static int ceph_d_delete(const struct dentry *dentry)
+{
+	if (dentry->d_inode) {
+		struct ceph_inode_info *ci = ceph_inode(dentry->d_inode);
+		/* need trim caps? */
+		if (ci->i_ceph_flags & CEPH_I_TRIMCAPS)
+			return ceph_trim_caps(ci, false);
+	}
+	return 0;
+}
+
 /*
  * Release our ceph_dentry_info.
  */
@@ -1300,6 +1311,7 @@  const struct inode_operations ceph_dir_iops = {
 
 const struct dentry_operations ceph_dentry_ops = {
 	.d_revalidate = ceph_d_revalidate,
+	.d_delete = ceph_d_delete,
 	.d_release = ceph_d_release,
 	.d_prune = ceph_d_prune,
 };
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 01e4db1..371590a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -431,6 +431,17 @@  void ceph_destroy_inode(struct inode *inode)
 	call_rcu(&inode->i_rcu, ceph_i_callback);
 }
 
+int ceph_drop_inode(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int ret = generic_drop_inode(inode);
+
+	/* need trim caps? */
+	if (!ret && (ci->i_ceph_flags & CEPH_I_TRIMCAPS))
+		ret = ceph_trim_caps(ci, true);
+
+	return ret ;
+}
 
 /*
  * Helpers to fill in size, ctime, mtime, and atime.  We have to be
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 2a42b5f..7c2b8d6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -435,7 +435,7 @@  static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	s->s_renew_seq = 0;
 	INIT_LIST_HEAD(&s->s_caps);
 	s->s_nr_caps = 0;
-	s->s_trim_caps = 0;
+	atomic_set(&s->s_trim_caps, 0);
 	atomic_set(&s->s_ref, 1);
 	INIT_LIST_HEAD(&s->s_waiting);
 	INIT_LIST_HEAD(&s->s_unsafe);
@@ -1207,9 +1207,10 @@  static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
 {
 	struct ceph_mds_session *session = arg;
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct dentry *dentry;
 	int used, oissued, mine;
 
-	if (session->s_trim_caps <= 0)
+	if (atomic_read(&session->s_trim_caps) <= 0)
 		return -1;
 
 	spin_lock(&ci->i_ceph_lock);
@@ -1225,16 +1226,39 @@  static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
 	if ((used & ~oissued) & mine)
 		goto out;   /* we need these caps */
 
-	session->s_trim_caps--;
 	if (oissued) {
 		/* we aren't the only cap.. just remove us */
 		__queue_cap_release(session, ceph_ino(inode), cap->cap_id,
 				    cap->mseq, cap->issue_seq);
 		__ceph_remove_cap(cap);
+		atomic_dec(&session->s_trim_caps);
 	} else {
 		/* try to drop referring dentries */
+		ci->i_ceph_flags |= CEPH_I_TRIMCAPS;
 		spin_unlock(&ci->i_ceph_lock);
-		d_prune_aliases(inode);
+		/*
+		 * can't use d_prune_aliases(), because it unhashes dentries
+		 * before calling ceph_d_prune().
+		 */
+restart:
+		spin_lock(&inode->i_lock);
+		hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+			spin_lock(&dentry->d_lock);
+			if (!dentry->d_count) {
+				dget_dlock(dentry);
+				spin_unlock(&dentry->d_lock);
+				spin_unlock(&inode->i_lock);
+				/*
+				 * our d_delete callback returns true when
+				 * CEPH_I_TRIMCAPS is set. This makes VFS
+				 * drop the dentry.
+				 */
+				dput(dentry);
+				goto restart;
+			}
+			spin_unlock(&dentry->d_lock);
+		}
+		spin_unlock(&inode->i_lock);
 		dout("trim_caps_cb %p cap %p  pruned, count now %d\n",
 		     inode, cap, atomic_read(&inode->i_count));
 		return 0;
@@ -1257,12 +1281,11 @@  static int trim_caps(struct ceph_mds_client *mdsc,
 	dout("trim_caps mds%d start: %d / %d, trim %d\n",
 	     session->s_mds, session->s_nr_caps, max_caps, trim_caps);
 	if (trim_caps > 0) {
-		session->s_trim_caps = trim_caps;
+		atomic_set(&session->s_trim_caps, trim_caps);
 		iterate_session_caps(session, trim_caps_cb, session);
 		dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
 		     session->s_mds, session->s_nr_caps, max_caps,
-			trim_caps - session->s_trim_caps);
-		session->s_trim_caps = 0;
+		     trim_caps - atomic_read(&session->s_trim_caps));
 	}
 	return 0;
 }
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index c2a19fb..d0e6566 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -130,8 +130,8 @@  struct ceph_mds_session {
 	/* protected by s_cap_lock */
 	spinlock_t        s_cap_lock;
 	struct list_head  s_caps;     /* all caps issued by this session */
-	int               s_nr_caps, s_trim_caps;
-	int               s_num_cap_releases;
+	int               s_nr_caps, s_num_cap_releases;
+	atomic_t	  s_trim_caps;
 	struct list_head  s_cap_releases; /* waiting cap_release messages */
 	struct list_head  s_cap_releases_done; /* ready to send */
 	struct ceph_cap  *s_cap_iterator;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 6627b26..bcf26bf 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -655,6 +655,7 @@  static const struct super_operations ceph_super_ops = {
 	.alloc_inode	= ceph_alloc_inode,
 	.destroy_inode	= ceph_destroy_inode,
 	.write_inode    = ceph_write_inode,
+	.drop_inode     = ceph_drop_inode,
 	.sync_fs        = ceph_sync_fs,
 	.put_super	= ceph_put_super,
 	.show_options   = ceph_show_options,
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index e81c0b6..b87691a 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -421,6 +421,7 @@  static inline struct inode *ceph_find_inode(struct super_block *sb,
 /*
  * Ceph inode.
  */
+#define CEPH_I_TRIMCAPS  1  /* trim caps when possible */
 #define CEPH_I_NODELAY   4  /* do not delay cap release */
 #define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
 #define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
@@ -487,6 +488,7 @@  extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented);
 extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t);
 extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
 				    struct ceph_cap *cap);
+extern int ceph_trim_caps(struct ceph_inode_info *ci, int drop_inode);
 
 static inline int ceph_caps_issued(struct ceph_inode_info *ci)
 {
@@ -675,6 +677,7 @@  extern const struct inode_operations ceph_file_iops;
 
 extern struct inode *ceph_alloc_inode(struct super_block *sb);
 extern void ceph_destroy_inode(struct inode *inode);
+extern int ceph_drop_inode(struct inode *inode);
 
 extern struct inode *ceph_get_inode(struct super_block *sb,
 				    struct ceph_vino vino);