@@ -333,6 +333,7 @@ struct dlm_rsb {
struct list_head res_recover_list; /* used for recovery */
struct list_head res_toss_q_list;
int res_recover_locks_count;
+ struct rcu_head rcu;
char *res_lvbptr;
char res_name[DLM_RESNAME_MAXLEN+1];
@@ -366,6 +367,8 @@ enum rsb_flags {
RSB_RECOVER_GRANT,
RSB_RECOVER_LVB_INVAL,
RSB_TOSS,
+ /* if rsb is part of ls_rsbtbl or not */
+ RSB_HASHED,
};
static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag)
@@ -582,6 +582,7 @@ void dlm_rsb_toss_timer(struct timer_list *timer)
list_del(&r->res_rsbs_list);
rhashtable_remove_fast(&ls->ls_rsbtbl, &r->res_node,
dlm_rhash_rsb_params);
+ rsb_clear_flag(r, RSB_HASHED);
/* not necessary to held the ls_rsbtbl_lock when
* calling send_remove()
@@ -658,8 +659,14 @@ int dlm_search_rsb_tree(struct rhashtable *rhash, const void *name, int len,
static int rsb_insert(struct dlm_rsb *rsb, struct rhashtable *rhash)
{
- return rhashtable_insert_fast(rhash, &rsb->res_node,
- dlm_rhash_rsb_params);
+ int rv;
+
+ rv = rhashtable_insert_fast(rhash, &rsb->res_node,
+ dlm_rhash_rsb_params);
+ if (!rv)
+ rsb_set_flag(rsb, RSB_HASHED);
+
+ return rv;
}
/*
@@ -773,12 +780,24 @@ static int find_rsb_dir(struct dlm_ls *ls, const void *name, int len,
do_toss:
write_lock_bh(&ls->ls_rsbtbl_lock);
-
- /* retry lookup under write lock to see if its still in toss state
- * if not it's in keep state and we relookup - unlikely path.
+ /* toss handling move to out of toss handling requires ls_rsbtbl_lock
+ * to held in write lock state. However during the transition from
+ * previously read lock state to write lock state the rsb can be
+ * removed out of the ls_rsbtbl hash or change it's state to keep
+ * state. If RSB_HASHED is not set anymore somebody else removed
+ * the entry out of the ls_rsbtbl hash, the lookup is protected by
+ * rcu read lock and is an rcu read critical section, a possible free
+ * of a rsb (that usually occurs when a rsb is removed out of the hash)
+ * cannot be done between the read to write lock state transition as
+ * kfree_rcu() of rsb will not occur during this rcu read critical
+ * section for this reason a check on if RSB_HASHED is enough to check
+ * if this situation happened. The recheck on RSB_TOSS is required to
+ * check if the rsb is still in toss state, because somebody else
+ * could take it out of RSB_TOSS state. If this is the case we do a
+ * relookup under read lock as this is a very unlikely case.
+ * The RSB_HASHED case can happen and we need to be prepared for this.
*/
- error = dlm_search_rsb_tree(&ls->ls_rsbtbl, name, len, &r);
- if (!error) {
+ if (rsb_flag(r, RSB_HASHED)) {
if (!rsb_flag(r, RSB_TOSS)) {
write_unlock_bh(&ls->ls_rsbtbl_lock);
goto retry;
@@ -950,11 +969,24 @@ static int find_rsb_nodir(struct dlm_ls *ls, const void *name, int len,
do_toss:
write_lock_bh(&ls->ls_rsbtbl_lock);
- /* retry lookup under write lock to see if its still in toss state
- * if not it's in keep state and we relookup - unlikely path.
+ /* toss handling move to out of toss handling requires ls_rsbtbl_lock
+ * to held in write lock state. However during the transition from
+ * previously read lock state to write lock state the rsb can be
+ * removed out of the ls_rsbtbl hash or change it's state to keep
+ * state. If RSB_HASHED is not set anymore somebody else removed
+ * the entry out of the ls_rsbtbl hash, the lookup is protected by
+ * rcu read lock and is an rcu read critical section, a possible free
+ * of a rsb (that usually occurs when a rsb is removed out of the hash)
+ * cannot be done between the read to write lock state transition as
+ * kfree_rcu() of rsb will not occur during this rcu read critical
+ * section for this reason a check on if RSB_HASHED is enough to check
+ * if this situation happened. The recheck on RSB_TOSS is required to
+ * check if the rsb is still in toss state, because somebody else
+ * could take it out of RSB_TOSS state. If this is the case we do a
+ * relookup under read lock as this is a very unlikely case.
+ * The RSB_HASHED case can happen and we need to be prepared for this.
*/
- error = dlm_search_rsb_tree(&ls->ls_rsbtbl, name, len, &r);
- if (!error) {
+ if (rsb_flag(r, RSB_HASHED)) {
if (!rsb_flag(r, RSB_TOSS)) {
write_unlock_bh(&ls->ls_rsbtbl_lock);
goto retry;
@@ -1046,6 +1078,7 @@ static int find_rsb(struct dlm_ls *ls, const void *name, int len,
{
int dir_nodeid;
uint32_t hash;
+ int rv;
if (len > DLM_RESNAME_MAXLEN)
return -EINVAL;
@@ -1053,12 +1086,20 @@ static int find_rsb(struct dlm_ls *ls, const void *name, int len,
hash = jhash(name, len, 0);
dir_nodeid = dlm_hash2nodeid(ls, hash);
+ /* hold the rcu lock here to prevent freeing of the rsb
+ * while looking it up, there are currently a optimization
+ * to not lookup the rsb twice instead look if its still
+ * part of the rsbtbl hash.
+ */
+ rcu_read_lock();
if (dlm_no_directory(ls))
- return find_rsb_nodir(ls, name, len, hash, dir_nodeid,
- from_nodeid, flags, r_ret);
- else
- return find_rsb_dir(ls, name, len, hash, dir_nodeid,
+ rv = find_rsb_nodir(ls, name, len, hash, dir_nodeid,
from_nodeid, flags, r_ret);
+ else
+ rv = find_rsb_dir(ls, name, len, hash, dir_nodeid,
+ from_nodeid, flags, r_ret);
+ rcu_read_unlock();
+ return rv;
}
/* we have received a request and found that res_master_nodeid != our_nodeid,
@@ -1215,8 +1256,9 @@ static void __dlm_master_lookup(struct dlm_ls *ls, struct dlm_rsb *r, int our_no
* . dlm_master_lookup RECOVER_MASTER (fix_master 1, from_master 0)
*/
-int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *name,
- int len, unsigned int flags, int *r_nodeid, int *result)
+static int _dlm_master_lookup(struct dlm_ls *ls, int from_nodeid,
+ const char *name, int len, unsigned int flags,
+ int *r_nodeid, int *result)
{
struct dlm_rsb *r = NULL;
uint32_t hash;
@@ -1243,7 +1285,6 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *name,
}
retry:
-
/* check if the rsb is in keep state under read lock - likely path */
read_lock_bh(&ls->ls_rsbtbl_lock);
error = dlm_search_rsb_tree(&ls->ls_rsbtbl, name, len, &r);
@@ -1278,11 +1319,24 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *name,
/* unlikely path - relookup under write */
write_lock_bh(&ls->ls_rsbtbl_lock);
- /* rsb_mod_timer() requires to held ls_rsbtbl_lock in write lock
- * check if the rsb is still in toss state, if not relookup
+ /* toss handling move to out of toss handling requires ls_rsbtbl_lock
+ * to held in write lock state. However during the transition from
+ * previously read lock state to write lock state the rsb can be
+ * removed out of the ls_rsbtbl hash or change it's state to keep
+ * state. If RSB_HASHED is not set anymore somebody else removed
+ * the entry out of the ls_rsbtbl hash, the lookup is protected by
+ * rcu read lock and is an rcu read critical section, a possible free
+ * of a rsb (that usually occurs when a rsb is removed out of the hash)
+ * cannot be done between the read to write lock state transition as
+ * kfree_rcu() of rsb will not occur during this rcu read critical
+ * section for this reason a check on if RSB_HASHED is enough to check
+ * if this situation happened. The recheck on RSB_TOSS is required to
+ * check if the rsb is still in toss state, because somebody else
+ * could take it out of RSB_TOSS state. If this is the case we do a
+ * relookup under read lock as this is a very unlikely case.
+ * The RSB_HASHED case can happen and we need to be prepared for this.
*/
- error = dlm_search_rsb_tree(&ls->ls_rsbtbl, name, len, &r);
- if (!error) {
+ if (rsb_flag(r, RSB_HASHED)) {
if (!rsb_flag(r, RSB_TOSS)) {
write_unlock_bh(&ls->ls_rsbtbl_lock);
/* something as changed, very unlikely but
@@ -1290,6 +1344,7 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *name,
*/
goto retry;
}
+
} else {
write_unlock_bh(&ls->ls_rsbtbl_lock);
goto not_found;
@@ -1346,6 +1401,23 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *name,
return error;
}
+int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *name,
+ int len, unsigned int flags, int *r_nodeid, int *result)
+{
+ int rv;
+
+ /* hold the rcu lock here to prevent freeing of the rsb
+ * while looking it up, there are currently a optimization
+ * to not lookup the rsb twice instead look if its still
+ * part of the rsbtbl hash.
+ */
+ rcu_read_lock();
+ rv = _dlm_master_lookup(ls, from_nodeid, name, len, flags, r_nodeid,
+ result);
+ rcu_read_unlock();
+ return rv;
+}
+
static void dlm_dump_rsb_hash(struct dlm_ls *ls, uint32_t hash)
{
struct dlm_rsb *r;
@@ -4308,6 +4380,7 @@ static void receive_remove(struct dlm_ls *ls, const struct dlm_message *ms)
list_del(&r->res_rsbs_list);
rhashtable_remove_fast(&ls->ls_rsbtbl, &r->res_node,
dlm_rhash_rsb_params);
+ rsb_clear_flag(r, RSB_HASHED);
write_unlock_bh(&ls->ls_rsbtbl_lock);
free_toss_rsb(r);
@@ -101,13 +101,26 @@ struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls)
return r;
}
-void dlm_free_rsb(struct dlm_rsb *r)
+static void __dlm_free_rsb(struct rcu_head *rcu)
{
+ struct dlm_rsb *r = container_of(rcu, struct dlm_rsb, rcu);
+
if (r->res_lvbptr)
dlm_free_lvb(r->res_lvbptr);
kmem_cache_free(rsb_cache, r);
}
+void dlm_free_rsb(struct dlm_rsb *r)
+{
+ /* wait until all current rcu read critical sections
+ * are done until calling __dlm_free_rsb. Currently
+ * this is used to avoid an use after free when checking
+ * of the rsb is still part of ls_rsbtbl by testing on
+ * RSB_HASHED flag.
+ */
+ call_rcu(&r->rcu, __dlm_free_rsb);
+}
+
struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
{
struct dlm_lkb *lkb;
This patch converts the rsb to be protected under rcu. When the rcu lock is held it cannot be freed. In combination with the new introduced flag RSB_HASHED we can check if the rsb is still part of the ls_rsbtbl hashtable, it cannot be part of another table. If its still part of the ls_rsbtbl we can avoid a second dlm_search_rsb_tree() call. Signed-off-by: Alexander Aring <aahringo@redhat.com> --- fs/dlm/dlm_internal.h | 3 ++ fs/dlm/lock.c | 117 ++++++++++++++++++++++++++++++++++-------- fs/dlm/memory.c | 15 +++++- 3 files changed, 112 insertions(+), 23 deletions(-)