diff mbox series

[v2] multipathd: use timestamps to tell when the directio checker timed out

Message ID 20241105025739.450449-1-bmarzins@redhat.com (mailing list archive)
State Not Applicable, archived
Delegated to: Benjamin Marzinski
Headers show
Series [v2] multipathd: use timestamps to tell when the directio checker timed out | expand

Commit Message

Benjamin Marzinski Nov. 5, 2024, 2:57 a.m. UTC
Instead of counting the number of times the path checker has been
called and treating that as the number of seconds that have passed,
calculate the actual timestamp when the checker will time out, and
check that instead.

Suggested-by: Martin Wilck <mwilck@suse.com>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---

Changes in V2 (as suggested by Martin Wilck):
Switch from tracking the timeout with time_t to struct timespec, to
to keep timeouts from being late by up to a second.

 libmultipath/checkers/directio.c | 42 ++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 13 deletions(-)

Comments

Martin Wilck Nov. 6, 2024, 3:40 p.m. UTC | #1
On Mon, 2024-11-04 at 21:57 -0500, Benjamin Marzinski wrote:
> Instead of counting the number of times the path checker has been
> called and treating that as the number of seconds that have passed,
> calculate the actual timestamp when the checker will time out, and
> check that instead.
> 
> Suggested-by: Martin Wilck <mwilck@suse.com>
> Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>

Reviewed-by: Martin Wilck <mwilck@suse.com>
Martin Wilck Nov. 6, 2024, 10:40 p.m. UTC | #2
On Wed, 2024-11-06 at 16:40 +0100, Martin Wilck wrote:
> On Mon, 2024-11-04 at 21:57 -0500, Benjamin Marzinski wrote:
> > Instead of counting the number of times the path checker has been
> > called and treating that as the number of seconds that have passed,
> > calculate the actual timestamp when the checker will time out, and
> > check that instead.
> > 
> > Suggested-by: Martin Wilck <mwilck@suse.com>
> > Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> 
> Reviewed-by: Martin Wilck <mwilck@suse.com>
> 

This breaks the directio test. I can fix it.

Martin
diff mbox series

Patch

diff --git a/libmultipath/checkers/directio.c b/libmultipath/checkers/directio.c
index 27227894..a7422a87 100644
--- a/libmultipath/checkers/directio.c
+++ b/libmultipath/checkers/directio.c
@@ -60,13 +60,26 @@  const char *libcheck_msgtable[] = {
 #define LOG(prio, fmt, args...) condlog(prio, "directio: " fmt, ##args)
 
 struct directio_context {
-	unsigned int	running;
-	int		reset_flags;
+	struct timespec timeout;
+	int reset_flags;
 	struct aio_group *aio_grp;
 	struct async_req *req;
 	bool checked_state;
 };
 
+static bool is_running(struct directio_context *ct) {
+	return (ct->timeout.tv_sec != 0 || ct->timeout.tv_nsec != 0);
+}
+
+static void start_running(struct directio_context *ct, int timeout_secs) {
+	get_monotonic_time(&ct->timeout);
+	ct->timeout.tv_sec += timeout_secs;
+}
+
+static void stop_running(struct directio_context *ct) {
+	ct->timeout.tv_sec = ct->timeout.tv_nsec = 0;
+}
+
 static struct aio_group *
 add_aio_group(void)
 {
@@ -234,9 +247,9 @@  void libcheck_free (struct checker * c)
 		}
 	}
 
-	if (ct->running && ct->req->state != PATH_PENDING)
-		ct->running = 0;
-	if (!ct->running) {
+	if (is_running(ct) && ct->req->state != PATH_PENDING)
+		stop_running(ct);
+	if (!is_running(ct)) {
 		free(ct->req->buf);
 		free(ct->req);
 		ct->aio_grp->holders--;
@@ -304,7 +317,7 @@  check_pending(struct directio_context *ct, struct timespec timeout)
 		r = get_events(ct->aio_grp, &timeout);
 
 		if (ct->req->state != PATH_PENDING) {
-			ct->running = 0;
+			stop_running(ct);
 			return;
 		} else if (r == 0 ||
 			   (timeout.tv_sec == 0 && timeout.tv_nsec == 0))
@@ -330,10 +343,10 @@  check_state(int fd, struct directio_context *ct, int sync, int timeout_secs)
 	if (sync > 0)
 		LOG(4, "called in synchronous mode");
 
-	if (ct->running) {
+	if (is_running(ct)) {
 		ct->checked_state = true;
 		if (ct->req->state != PATH_PENDING) {
-			ct->running = 0;
+			stop_running(ct);
 			return ct->req->state;
 		}
 	} else {
@@ -348,9 +361,9 @@  check_state(int fd, struct directio_context *ct, int sync, int timeout_secs)
 			LOG(3, "io_submit error %i", -rc);
 			return PATH_UNCHECKED;
 		}
+		start_running(ct, timeout_secs);
 		ct->checked_state = false;
 	}
-	ct->running++;
 	if (!sync)
 		return PATH_PENDING;
 
@@ -388,7 +401,7 @@  static void set_msgid(struct checker *c, int state)
 bool libcheck_need_wait(struct checker *c)
 {
 	struct directio_context *ct = (struct directio_context *)c->context;
-	return (ct && ct->running && ct->req->state == PATH_PENDING &&
+	return (ct && is_running(ct) && ct->req->state == PATH_PENDING &&
 		!ct->checked_state);
 }
 
@@ -400,7 +413,7 @@  int libcheck_pending(struct checker *c)
 	struct timespec no_wait = { .tv_sec = 0 };
 
 	/* The if path checker isn't running, just return the exiting value. */
-	if (!ct || !ct->running) {
+	if (!ct || !is_running(ct)) {
 		rc = c->path_state;
 		goto out;
 	}
@@ -408,10 +421,13 @@  int libcheck_pending(struct checker *c)
 	if (ct->req->state == PATH_PENDING)
 		check_pending(ct, no_wait);
 	else
-		ct->running = 0;
+		stop_running(ct);
 	rc = ct->req->state;
 	if (rc == PATH_PENDING) {
-		if (ct->running > c->timeout) {
+		struct timespec now;
+
+		get_monotonic_time(&now);
+		if (timespeccmp(&now, &ct->timeout) > 0) {
 			LOG(3, "abort check on timeout");
 			io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event);
 			rc = PATH_DOWN;