From: David Teigland Date: Tue, 31 Oct 2006 17:55:56 +0000 (-0600) Subject: [DLM] fix requestqueue race X-Git-Tag: v2.6.20-rc1~145^2^2~5^2~32 X-Git-Url: http://pilppa.com/gitweb/?a=commitdiff_plain;h=d4400156d415540086c34a06e5d233122d6bf56a;p=linux-2.6-omap-h63xx.git [DLM] fix requestqueue race Red Hat BZ 211914 There's a race between dlm_recoverd (1) enabling locking and (2) clearing out the requestqueue, and dlm_recvd (1) checking if locking is enabled and (2) adding a message to the requestqueue. An order of recoverd(1), recvd(1), recvd(2), recoverd(2) will result in a message being left on the requestqueue. The fix is to have dlm_recvd check if dlm_recoverd has enabled locking after taking the mutex for the requestqueue and if it has processing the message instead of queueing it. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 3f2befa4797..6088a16926b 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -3028,10 +3028,17 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) while (1) { if (dlm_locking_stopped(ls)) { - if (!recovery) - dlm_add_requestqueue(ls, nodeid, hd); - error = -EINTR; - goto out; + if (recovery) { + error = -EINTR; + goto out; + } + error = dlm_add_requestqueue(ls, nodeid, hd); + if (error == -EAGAIN) + continue; + else { + error = -EINTR; + goto out; + } } if (lock_recovery_try(ls)) diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index 7b2b089634a..0226d2a0a0f 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c @@ -30,26 +30,39 @@ struct rq_entry { * lockspace is enabled on some while still suspended on others. */ -void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) +int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) { struct rq_entry *e; int length = hd->h_length; + int rv = 0; if (dlm_is_removed(ls, nodeid)) - return; + return 0; e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); if (!e) { log_print("dlm_add_requestqueue: out of memory\n"); - return; + return 0; } e->nodeid = nodeid; memcpy(e->request, hd, length); + /* We need to check dlm_locking_stopped() after taking the mutex to + avoid a race where dlm_recoverd enables locking and runs + process_requestqueue between our earlier dlm_locking_stopped check + and this addition to the requestqueue. */ + mutex_lock(&ls->ls_requestqueue_mutex); - list_add_tail(&e->list, &ls->ls_requestqueue); + if (dlm_locking_stopped(ls)) + list_add_tail(&e->list, &ls->ls_requestqueue); + else { + log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid); + kfree(e); + rv = -EAGAIN; + } mutex_unlock(&ls->ls_requestqueue_mutex); + return rv; } int dlm_process_requestqueue(struct dlm_ls *ls) diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h index 349f0d292d9..6a53ea03335 100644 --- a/fs/dlm/requestqueue.h +++ b/fs/dlm/requestqueue.h @@ -13,7 +13,7 @@ #ifndef __REQUESTQUEUE_DOT_H__ #define __REQUESTQUEUE_DOT_H__ -void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); +int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); int dlm_process_requestqueue(struct dlm_ls *ls); void dlm_wait_requestqueue(struct dlm_ls *ls); void dlm_purge_requestqueue(struct dlm_ls *ls);