@@ -92,8 +92,9 @@ static void o2quo_make_decision(struct work_struct *work)
int quorum;
int lowest_hb, lowest_reachable = 0, fence = 0;
struct o2quo_state *qs = &o2quo_state;
+ unsigned long flags;
- spin_lock(&qs->qs_lock);
+ spin_lock_irqsave(&qs->qs_lock, flags);
lowest_hb = find_first_bit(qs->qs_hb_bm, O2NM_MAX_NODES);
if (lowest_hb != O2NM_MAX_NODES)
@@ -146,14 +147,14 @@ static void o2quo_make_decision(struct work_struct *work)
out:
if (fence) {
- spin_unlock(&qs->qs_lock);
+ spin_unlock_irqrestore(&qs->qs_lock, flags);
o2quo_fence_self();
} else {
mlog(ML_NOTICE, "not fencing this node, heartbeating: %d, "
"connected: %d, lowest: %d (%sreachable)\n",
qs->qs_heartbeating, qs->qs_connected, lowest_hb,
lowest_reachable ? "" : "un");
- spin_unlock(&qs->qs_lock);
+ spin_unlock_irqrestore(&qs->qs_lock, flags);
}
As &qs->qs_lock is also acquired by the timer o2net_idle_timer() which executes under softirq context, code executing under process context should disable irq before acquiring the lock, otherwise deadlock could happen if the process context hold the lock then preempt by the timer. Possible deadlock scenario: o2quo_make_decision (workqueue) -> spin_lock(&qs->qs_lock); <timer interrupt> -> o2net_idle_timer -> o2quo_conn_err -> spin_lock(&qs->qs_lock); (deadlock here) This flaw was found using an experimental static analysis tool we are developing for irq-related deadlock. The tentative patch fix the potential deadlock by spin_lock_irqsave(). Signed-off-by: Chengfeng Ye <dg573847474@gmail.com> --- fs/ocfs2/cluster/quorum.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)