@@ -141,6 +141,7 @@ struct ceph_connection {
struct ceph_messenger *msgr;
struct socket *sock;
unsigned long state; /* connection state (see flags above) */
+ unsigned long last_rcv;
const char *error_msg; /* error message, if any */
struct ceph_entity_addr peer_addr; /* peer address */
@@ -416,6 +416,7 @@ void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
memset(con, 0, sizeof(*con));
atomic_set(&con->nref, 1);
con->msgr = msgr;
+ con->last_rcv = jiffies;
mutex_init(&con->mutex);
INIT_LIST_HEAD(&con->out_queue);
INIT_LIST_HEAD(&con->out_sent);
@@ -1855,6 +1856,7 @@ more:
ret = process_connect(con);
if (ret < 0)
goto out;
+ con->last_rcv = jiffies;
goto more;
}
@@ -1870,6 +1872,7 @@ more:
ret = ceph_tcp_recvmsg(con->sock, buf, skip);
if (ret <= 0)
goto out;
+ con->last_rcv = jiffies;
con->in_base_pos += ret;
if (con->in_base_pos)
goto more;
@@ -1881,6 +1884,7 @@ more:
ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1);
if (ret <= 0)
goto out;
+ con->last_rcv = jiffies;
dout("try_read got tag %d\n", (int)con->in_tag);
switch (con->in_tag) {
case CEPH_MSGR_TAG_MSG:
@@ -1889,6 +1893,9 @@ more:
case CEPH_MSGR_TAG_ACK:
prepare_read_ack(con);
break;
+ case CEPH_MSGR_TAG_KEEPALIVE:
+ prepare_read_tag(con);
+ goto out;
case CEPH_MSGR_TAG_CLOSE:
set_bit(CLOSED, &con->state); /* fixme */
goto out;
@@ -1910,6 +1917,7 @@ more:
}
goto out;
}
+ con->last_rcv = jiffies;
if (con->in_tag == CEPH_MSGR_TAG_READY)
goto more;
process_message(con);
@@ -1919,6 +1927,7 @@ more:
ret = read_partial_ack(con);
if (ret <= 0)
goto out;
+ con->last_rcv = jiffies;
process_ack(con);
goto more;
}
@@ -1094,6 +1094,15 @@ static void handle_timeout(struct work_struct *work)
osd = req->r_osd;
BUG_ON(!osd);
+
+ /*
+ * Only reset osd if we haven't recently received something
+ * from it - if we have, it's just busy, and hasn't gotten
+ * to this request yet.
+ */
+ if (time_before(jiffies, osd->o_con.last_rcv + timeout))
+ break;
+
pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
req->r_tid, osd->o_osd);
__kick_osd_requests(osdc, osd);
Previously, when clients' sustained offered write load exceeded the sustained throughput of the OSDs, normal operation was that client messages timed out while waiting to be processed by the OSDs. The client response to this was to reset the connection to the OSD handling a timed-out message. Ceph OSDs can now send keepalives when waiting for sufficient buffer space to receive a message from a client. This patch causes clients to notice the keepalives, and not reset a connection serving a timed-out message if anything, particularly a keepalive, has been received recently. Signed-off-by: Jim Schutt <jaschut@sandia.gov> --- include/linux/ceph/messenger.h | 1 + net/ceph/messenger.c | 9 +++++++++ net/ceph/osd_client.c | 9 +++++++++ 3 files changed, 19 insertions(+), 0 deletions(-)