diff mbox series

[v5,23/23] multipathd: use timestamps to tell when the directio checker timed out

Message ID 20241015032835.2693247-24-bmarzins@redhat.com (mailing list archive)
State Not Applicable, archived
Delegated to: Benjamin Marzinski
Headers show
Series path checker refactor and misc fixes | expand

Commit Message

Benjamin Marzinski Oct. 15, 2024, 3:28 a.m. UTC
Instead of counting the number of times the path checker has been
called and treating that as the number of seconds that have passed,
calculate the actual timestamp when the checker will time out, and
check that instead.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
 libmultipath/checkers/directio.c | 44 ++++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 13 deletions(-)

Comments

Martin Wilck Nov. 3, 2024, 8:56 p.m. UTC | #1
On Mon, 2024-10-14 at 23:28 -0400, Benjamin Marzinski wrote:
> Instead of counting the number of times the path checker has been
> called and treating that as the number of seconds that have passed,
> calculate the actual timestamp when the checker will time out, and
> check that instead.
> 
> Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> ---
>  libmultipath/checkers/directio.c | 44 ++++++++++++++++++++++--------
> --
>  1 file changed, 31 insertions(+), 13 deletions(-)
> 
> diff --git a/libmultipath/checkers/directio.c
> b/libmultipath/checkers/directio.c
> index 27227894..d35a6bac 100644
> --- a/libmultipath/checkers/directio.c
> +++ b/libmultipath/checkers/directio.c
> @@ -60,13 +60,28 @@ const char *libcheck_msgtable[] = {
>  #define LOG(prio, fmt, args...) condlog(prio, "directio: " fmt,
> ##args)
>  
>  struct directio_context {
> -	unsigned int	running;
> -	int		reset_flags;
> +	time_t timeout;


I'd have used a struct timespec here rather than time_t.

> +	int reset_flags;
>  	struct aio_group *aio_grp;
>  	struct async_req *req;
>  	bool checked_state;
>  };
>  
> +bool is_running(struct directio_context *ct) {
> +	return (ct->timeout != 0);
> +}
> +
> +void start_running(struct directio_context *ct, int timeout_secs) {
> +	struct timespec now;
> +
> +	get_monotonic_time(&now);
> +	ct->timeout = now.tv_sec + timeout_secs;
> +}
> +
> +void stop_running(struct directio_context *ct) {
> +	ct->timeout = 0;
> +}
> +

I think the 3 functions above should be static.

>  static struct aio_group *
>  add_aio_group(void)
>  {
> @@ -234,9 +249,9 @@ void libcheck_free (struct checker * c)
>  		}
>  	}
>  
> -	if (ct->running && ct->req->state != PATH_PENDING)
> -		ct->running = 0;
> -	if (!ct->running) {
> +	if (is_running(ct) && ct->req->state != PATH_PENDING)
> +		stop_running(ct);
> +	if (!is_running(ct)) {
>  		free(ct->req->buf);
>  		free(ct->req);
>  		ct->aio_grp->holders--;
> @@ -304,7 +319,7 @@ check_pending(struct directio_context *ct, struct
> timespec timeout)
>  		r = get_events(ct->aio_grp, &timeout);
>  
>  		if (ct->req->state != PATH_PENDING) {
> -			ct->running = 0;
> +			stop_running(ct);
>  			return;
>  		} else if (r == 0 ||
>  			   (timeout.tv_sec == 0 && timeout.tv_nsec
> == 0))
> @@ -330,10 +345,10 @@ check_state(int fd, struct directio_context
> *ct, int sync, int timeout_secs)
>  	if (sync > 0)
>  		LOG(4, "called in synchronous mode");
>  
> -	if (ct->running) {
> +	if (is_running(ct)) {
>  		ct->checked_state = true;
>  		if (ct->req->state != PATH_PENDING) {
> -			ct->running = 0;
> +			stop_running(ct);
>  			return ct->req->state;
>  		}
>  	} else {
> @@ -348,9 +363,9 @@ check_state(int fd, struct directio_context *ct,
> int sync, int timeout_secs)
>  			LOG(3, "io_submit error %i", -rc);
>  			return PATH_UNCHECKED;
>  		}
> +		start_running(ct, timeout_secs);
>  		ct->checked_state = false;
>  	}
> -	ct->running++;
>  	if (!sync)
>  		return PATH_PENDING;
>  
> @@ -388,7 +403,7 @@ static void set_msgid(struct checker *c, int
> state)
>  bool libcheck_need_wait(struct checker *c)
>  {
>  	struct directio_context *ct = (struct directio_context *)c-
> >context;
> -	return (ct && ct->running && ct->req->state == PATH_PENDING
> &&
> +	return (ct && is_running(ct) && ct->req->state ==
> PATH_PENDING &&
>  		!ct->checked_state);
>  }
>  
> @@ -400,7 +415,7 @@ int libcheck_pending(struct checker *c)
>  	struct timespec no_wait = { .tv_sec = 0 };
>  
>  	/* The if path checker isn't running, just return the
> exiting value. */
> -	if (!ct || !ct->running) {
> +	if (!ct || !is_running(ct)) {
>  		rc = c->path_state;
>  		goto out;
>  	}
> @@ -408,10 +423,13 @@ int libcheck_pending(struct checker *c)
>  	if (ct->req->state == PATH_PENDING)
>  		check_pending(ct, no_wait);
>  	else
> -		ct->running = 0;
> +		stop_running(ct);
>  	rc = ct->req->state;
>  	if (rc == PATH_PENDING) {
> -		if (ct->running > c->timeout) {
> +		struct timespec now;
> +
> +		get_monotonic_time(&now);
> +		if (now.tv_sec > ct->timeout) {

We could timeout a second too late here, that's why I'd suggest using
timespec.

>  			LOG(3, "abort check on timeout");
>  			io_cancel(ct->aio_grp->ioctx, &ct->req->io,
> &event);
>  			rc = PATH_DOWN;

Regards
Martin
Benjamin Marzinski Nov. 5, 2024, 3 a.m. UTC | #2
On Sun, Nov 03, 2024 at 09:56:40PM +0100, Martin Wilck wrote:
> On Mon, 2024-10-14 at 23:28 -0400, Benjamin Marzinski wrote:
> > Instead of counting the number of times the path checker has been
> > called and treating that as the number of seconds that have passed,
> > calculate the actual timestamp when the checker will time out, and
> > check that instead.
> > 
> > Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> > ---
> >  libmultipath/checkers/directio.c | 44 ++++++++++++++++++++++--------
> > --
> >  1 file changed, 31 insertions(+), 13 deletions(-)
> > 
> > diff --git a/libmultipath/checkers/directio.c
> > b/libmultipath/checkers/directio.c
> > index 27227894..d35a6bac 100644
> > --- a/libmultipath/checkers/directio.c
> > +++ b/libmultipath/checkers/directio.c
> > @@ -60,13 +60,28 @@ const char *libcheck_msgtable[] = {
> >  #define LOG(prio, fmt, args...) condlog(prio, "directio: " fmt,
> > ##args)
> >  
> >  struct directio_context {
> > -	unsigned int	running;
> > -	int		reset_flags;
> > +	time_t timeout;
> 
> 
> I'd have used a struct timespec here rather than time_t.

Sure.

> 
> > +	int reset_flags;
> >  	struct aio_group *aio_grp;
> >  	struct async_req *req;
> >  	bool checked_state;
> >  };
> >  
> > +bool is_running(struct directio_context *ct) {
> > +	return (ct->timeout != 0);
> > +}
> > +
> > +void start_running(struct directio_context *ct, int timeout_secs) {
> > +	struct timespec now;
> > +
> > +	get_monotonic_time(&now);
> > +	ct->timeout = now.tv_sec + timeout_secs;
> > +}
> > +
> > +void stop_running(struct directio_context *ct) {
> > +	ct->timeout = 0;
> > +}
> > +
> 
> I think the 3 functions above should be static.

Oops. Yeah. Sending a new version.

-Ben

> 
> >  static struct aio_group *
> >  add_aio_group(void)
> >  {
> > @@ -234,9 +249,9 @@ void libcheck_free (struct checker * c)
> >  		}
> >  	}
> >  
> > -	if (ct->running && ct->req->state != PATH_PENDING)
> > -		ct->running = 0;
> > -	if (!ct->running) {
> > +	if (is_running(ct) && ct->req->state != PATH_PENDING)
> > +		stop_running(ct);
> > +	if (!is_running(ct)) {
> >  		free(ct->req->buf);
> >  		free(ct->req);
> >  		ct->aio_grp->holders--;
> > @@ -304,7 +319,7 @@ check_pending(struct directio_context *ct, struct
> > timespec timeout)
> >  		r = get_events(ct->aio_grp, &timeout);
> >  
> >  		if (ct->req->state != PATH_PENDING) {
> > -			ct->running = 0;
> > +			stop_running(ct);
> >  			return;
> >  		} else if (r == 0 ||
> >  			   (timeout.tv_sec == 0 && timeout.tv_nsec
> > == 0))
> > @@ -330,10 +345,10 @@ check_state(int fd, struct directio_context
> > *ct, int sync, int timeout_secs)
> >  	if (sync > 0)
> >  		LOG(4, "called in synchronous mode");
> >  
> > -	if (ct->running) {
> > +	if (is_running(ct)) {
> >  		ct->checked_state = true;
> >  		if (ct->req->state != PATH_PENDING) {
> > -			ct->running = 0;
> > +			stop_running(ct);
> >  			return ct->req->state;
> >  		}
> >  	} else {
> > @@ -348,9 +363,9 @@ check_state(int fd, struct directio_context *ct,
> > int sync, int timeout_secs)
> >  			LOG(3, "io_submit error %i", -rc);
> >  			return PATH_UNCHECKED;
> >  		}
> > +		start_running(ct, timeout_secs);
> >  		ct->checked_state = false;
> >  	}
> > -	ct->running++;
> >  	if (!sync)
> >  		return PATH_PENDING;
> >  
> > @@ -388,7 +403,7 @@ static void set_msgid(struct checker *c, int
> > state)
> >  bool libcheck_need_wait(struct checker *c)
> >  {
> >  	struct directio_context *ct = (struct directio_context *)c-
> > >context;
> > -	return (ct && ct->running && ct->req->state == PATH_PENDING
> > &&
> > +	return (ct && is_running(ct) && ct->req->state ==
> > PATH_PENDING &&
> >  		!ct->checked_state);
> >  }
> >  
> > @@ -400,7 +415,7 @@ int libcheck_pending(struct checker *c)
> >  	struct timespec no_wait = { .tv_sec = 0 };
> >  
> >  	/* The if path checker isn't running, just return the
> > exiting value. */
> > -	if (!ct || !ct->running) {
> > +	if (!ct || !is_running(ct)) {
> >  		rc = c->path_state;
> >  		goto out;
> >  	}
> > @@ -408,10 +423,13 @@ int libcheck_pending(struct checker *c)
> >  	if (ct->req->state == PATH_PENDING)
> >  		check_pending(ct, no_wait);
> >  	else
> > -		ct->running = 0;
> > +		stop_running(ct);
> >  	rc = ct->req->state;
> >  	if (rc == PATH_PENDING) {
> > -		if (ct->running > c->timeout) {
> > +		struct timespec now;
> > +
> > +		get_monotonic_time(&now);
> > +		if (now.tv_sec > ct->timeout) {
> 
> We could timeout a second too late here, that's why I'd suggest using
> timespec.
> 
> >  			LOG(3, "abort check on timeout");
> >  			io_cancel(ct->aio_grp->ioctx, &ct->req->io,
> > &event);
> >  			rc = PATH_DOWN;
> 
> Regards
> Martin
diff mbox series

Patch

diff --git a/libmultipath/checkers/directio.c b/libmultipath/checkers/directio.c
index 27227894..d35a6bac 100644
--- a/libmultipath/checkers/directio.c
+++ b/libmultipath/checkers/directio.c
@@ -60,13 +60,28 @@  const char *libcheck_msgtable[] = {
 #define LOG(prio, fmt, args...) condlog(prio, "directio: " fmt, ##args)
 
 struct directio_context {
-	unsigned int	running;
-	int		reset_flags;
+	time_t timeout;
+	int reset_flags;
 	struct aio_group *aio_grp;
 	struct async_req *req;
 	bool checked_state;
 };
 
+bool is_running(struct directio_context *ct) {
+	return (ct->timeout != 0);
+}
+
+void start_running(struct directio_context *ct, int timeout_secs) {
+	struct timespec now;
+
+	get_monotonic_time(&now);
+	ct->timeout = now.tv_sec + timeout_secs;
+}
+
+void stop_running(struct directio_context *ct) {
+	ct->timeout = 0;
+}
+
 static struct aio_group *
 add_aio_group(void)
 {
@@ -234,9 +249,9 @@  void libcheck_free (struct checker * c)
 		}
 	}
 
-	if (ct->running && ct->req->state != PATH_PENDING)
-		ct->running = 0;
-	if (!ct->running) {
+	if (is_running(ct) && ct->req->state != PATH_PENDING)
+		stop_running(ct);
+	if (!is_running(ct)) {
 		free(ct->req->buf);
 		free(ct->req);
 		ct->aio_grp->holders--;
@@ -304,7 +319,7 @@  check_pending(struct directio_context *ct, struct timespec timeout)
 		r = get_events(ct->aio_grp, &timeout);
 
 		if (ct->req->state != PATH_PENDING) {
-			ct->running = 0;
+			stop_running(ct);
 			return;
 		} else if (r == 0 ||
 			   (timeout.tv_sec == 0 && timeout.tv_nsec == 0))
@@ -330,10 +345,10 @@  check_state(int fd, struct directio_context *ct, int sync, int timeout_secs)
 	if (sync > 0)
 		LOG(4, "called in synchronous mode");
 
-	if (ct->running) {
+	if (is_running(ct)) {
 		ct->checked_state = true;
 		if (ct->req->state != PATH_PENDING) {
-			ct->running = 0;
+			stop_running(ct);
 			return ct->req->state;
 		}
 	} else {
@@ -348,9 +363,9 @@  check_state(int fd, struct directio_context *ct, int sync, int timeout_secs)
 			LOG(3, "io_submit error %i", -rc);
 			return PATH_UNCHECKED;
 		}
+		start_running(ct, timeout_secs);
 		ct->checked_state = false;
 	}
-	ct->running++;
 	if (!sync)
 		return PATH_PENDING;
 
@@ -388,7 +403,7 @@  static void set_msgid(struct checker *c, int state)
 bool libcheck_need_wait(struct checker *c)
 {
 	struct directio_context *ct = (struct directio_context *)c->context;
-	return (ct && ct->running && ct->req->state == PATH_PENDING &&
+	return (ct && is_running(ct) && ct->req->state == PATH_PENDING &&
 		!ct->checked_state);
 }
 
@@ -400,7 +415,7 @@  int libcheck_pending(struct checker *c)
 	struct timespec no_wait = { .tv_sec = 0 };
 
 	/* The if path checker isn't running, just return the exiting value. */
-	if (!ct || !ct->running) {
+	if (!ct || !is_running(ct)) {
 		rc = c->path_state;
 		goto out;
 	}
@@ -408,10 +423,13 @@  int libcheck_pending(struct checker *c)
 	if (ct->req->state == PATH_PENDING)
 		check_pending(ct, no_wait);
 	else
-		ct->running = 0;
+		stop_running(ct);
 	rc = ct->req->state;
 	if (rc == PATH_PENDING) {
-		if (ct->running > c->timeout) {
+		struct timespec now;
+
+		get_monotonic_time(&now);
+		if (now.tv_sec > ct->timeout) {
 			LOG(3, "abort check on timeout");
 			io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event);
 			rc = PATH_DOWN;