diff mbox series

[04/15] libmultipath: remove pending wait code from libcheck_check calls

Message ID 20240828221757.4060548-5-bmarzins@redhat.com (mailing list archive)
State Not Applicable, archived
Delegated to: Benjamin Marzinski
Headers show
Series Yet Another path checker refactor | expand

Commit Message

Benjamin Marzinski Aug. 28, 2024, 10:17 p.m. UTC
When the tur and directio checkers start an asynchronous checker, they
now immediately return with the path in PATH_PENDING, instead of
waiting in checker_check(). Instead the wait now happens in
checker_get_state().

Additionally, the directio checker now waits for 1 ms, like the tur
checker does. Also like the tur checker it now only waits once. If it
is still pending after the first call to checker_get_state(). It will
not wait at all on future calls, and will just process the already
completed IOs.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
 libmultipath/checkers.c          |  7 +++-
 libmultipath/checkers/directio.c | 57 +++++++++++++++++---------------
 libmultipath/checkers/tur.c      | 13 +++-----
 3 files changed, 41 insertions(+), 36 deletions(-)

Comments

Martin Wilck Sept. 4, 2024, 8:05 p.m. UTC | #1
On Wed, 2024-08-28 at 18:17 -0400, Benjamin Marzinski wrote:
> When the tur and directio checkers start an asynchronous checker,
> they
> now immediately return with the path in PATH_PENDING, instead of
> waiting in checker_check(). Instead the wait now happens in
> checker_get_state().
> 
> Additionally, the directio checker now waits for 1 ms, like the tur
> checker does. Also like the tur checker it now only waits once. If it
> is still pending after the first call to checker_get_state(). It will
> not wait at all on future calls, and will just process the already
> completed IOs.
> 
> Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>

Now I understand what you're doing, but I find it somewhat confusing. I
believe this can be simplified after the split between starting the
checkers and looking at their result.

Why not just fire off the checkers, wait 1ms for _all_ the just started
checkers in checkerloop() [*], and then do the pending test like it is
now (but just in polling mode, with no c->timeout)?

Regards
Martin

[*] or maybe even a little more, 5ms, say: we'd still be waiting far
less time than we used to, but greatly increase the odds that most
checkers actually complete.


> ---
>  libmultipath/checkers.c          |  7 +++-
>  libmultipath/checkers/directio.c | 57 +++++++++++++++++-------------
> --
>  libmultipath/checkers/tur.c      | 13 +++-----
>  3 files changed, 41 insertions(+), 36 deletions(-)
> 
> diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
> index 298aec78..b44b856a 100644
> --- a/libmultipath/checkers.c
> +++ b/libmultipath/checkers.c
> @@ -303,7 +303,12 @@ void checker_put (struct checker * dst)
>  
>  int checker_get_state(struct checker *c)
>  {
> -	return c ? c->path_state : PATH_UNCHECKED;
> +	if (!c)
> +		return PATH_UNCHECKED;
> +	if (c->path_state != PATH_PENDING || !c->cls->pending)
> +		return c->path_state;
> +	c->path_state = c->cls->pending(c);
> +	return c->path_state;
>  }
>  
>  void checker_check (struct checker * c, int path_state)
> diff --git a/libmultipath/checkers/directio.c
> b/libmultipath/checkers/directio.c
> index 3f88b40d..904e3071 100644
> --- a/libmultipath/checkers/directio.c
> +++ b/libmultipath/checkers/directio.c
> @@ -60,10 +60,11 @@ const char *libcheck_msgtable[] = {
>  #define LOG(prio, fmt, args...) condlog(prio, "directio: " fmt,
> ##args)
>  
>  struct directio_context {
> -	int		running;
> +	unsigned int	running;
>  	int		reset_flags;
>  	struct aio_group *aio_grp;
>  	struct async_req *req;
> +	struct timespec endtime;
>  };
>  
>  static struct aio_group *
> @@ -314,19 +315,16 @@ check_pending(struct directio_context *ct,
> struct timespec endtime)
>  static int
>  check_state(int fd, struct directio_context *ct, int sync, int
> timeout_secs)
>  {
> -	struct timespec	timeout = { .tv_nsec = 1000 };
>  	struct stat	sb;
>  	int		rc;
> +	struct io_event event;
>  	struct timespec endtime;
>  
>  	if (fstat(fd, &sb) == 0) {
>  		LOG(4, "called for %x", (unsigned) sb.st_rdev);
>  	}
> -	if (sync > 0) {
> +	if (sync > 0)
>  		LOG(4, "called in synchronous mode");
> -		timeout.tv_sec  = timeout_secs;
> -		timeout.tv_nsec = 0;
> -	}
>  
>  	if (ct->running) {
>  		if (ct->req->state != PATH_PENDING) {
> @@ -345,31 +343,26 @@ check_state(int fd, struct directio_context
> *ct, int sync, int timeout_secs)
>  			LOG(3, "io_submit error %i", -rc);
>  			return PATH_UNCHECKED;
>  		}
> +		get_monotonic_time(&ct->endtime);
> +		ct->endtime.tv_nsec += 1000 * 1000;
> +		normalize_timespec(&ct->endtime);
>  	}
>  	ct->running++;
> +	if (!sync)
> +		return PATH_PENDING;
>  
>  	get_monotonic_time(&endtime);
> -	endtime.tv_sec += timeout.tv_sec;
> -	endtime.tv_nsec += timeout.tv_nsec;
> +	endtime.tv_sec += timeout_secs;
>  	normalize_timespec(&endtime);
>  
>  	check_pending(ct, endtime);
>  	if (ct->req->state != PATH_PENDING)
>  		return ct->req->state;
>  
> -	if (ct->running > timeout_secs || sync) {
> -		struct io_event event;
> -
> -		LOG(3, "abort check on timeout");
> -
> -		io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event);
> -		rc = PATH_DOWN;
> -	} else {
> -		LOG(4, "async io pending");
> -		rc = PATH_PENDING;
> -	}
> +	LOG(3, "abort check on timeout");
>  
> -	return rc;
> +	io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event);
> +	return PATH_DOWN;
>  }
>  
>  static void set_msgid(struct checker *c, int state)
> @@ -395,21 +388,31 @@ static void set_msgid(struct checker *c, int
> state)
>  
>  int libcheck_pending(struct checker *c)
>  {
> -	struct timespec endtime;
> +	int rc;
> +	struct io_event event;
>  	struct directio_context *ct = (struct directio_context *)c-
> >context;
>  
>  	/* The if path checker isn't running, just return the
> exiting value. */
>  	if (!ct || !ct->running)
>  		return c->path_state;
>  
> -	if (ct->req->state == PATH_PENDING) {
> -		get_monotonic_time(&endtime);
> -		check_pending(ct, endtime);
> -	} else
> +	if (ct->req->state == PATH_PENDING)
> +		check_pending(ct, ct->endtime);
> +	else
>  		ct->running = 0;
> -	set_msgid(c, ct->req->state);
> +	rc = ct->req->state;
> +	if (rc == PATH_PENDING) {
> +		if (ct->running > c->timeout) {
> +			LOG(3, "abort check on timeout");
> +			io_cancel(ct->aio_grp->ioctx, &ct->req->io,
> &event);
> +			rc = PATH_DOWN;
> +		}
> +		else
> +			LOG(4, "async io pending");
> +	}
> +	set_msgid(c, rc);
>  
> -	return ct->req->state;
> +	return rc;
>  }
>  
>  int libcheck_check (struct checker * c)
> diff --git a/libmultipath/checkers/tur.c
> b/libmultipath/checkers/tur.c
> index 95af5214..81db565b 100644
> --- a/libmultipath/checkers/tur.c
> +++ b/libmultipath/checkers/tur.c
> @@ -58,6 +58,7 @@ struct tur_checker_context {
>  	int msgid;
>  	struct checker_context ctx;
>  	unsigned int nr_timeouts;
> +	struct timespec endtime;
>  };
>  
>  int libcheck_init (struct checker * c)
> @@ -323,7 +324,7 @@ static int tur_check_async_timeout(struct checker
> *c)
>  	return (now.tv_sec > ct->time);
>  }
>  
> -int check_pending(struct checker *c, struct timespec endtime)
> +int check_pending(struct checker *c)
>  {
>  	struct tur_checker_context *ct = c->context;
>  	int r, tur_status = PATH_PENDING;
> @@ -333,7 +334,7 @@ int check_pending(struct checker *c, struct
> timespec endtime)
>  	for (r = 0;
>  	     r == 0 && ct->state == PATH_PENDING &&
>  	     ct->msgid == MSG_TUR_RUNNING;
> -	     r = pthread_cond_timedwait(&ct->active, &ct->lock,
> &endtime));
> +	     r = pthread_cond_timedwait(&ct->active, &ct->lock, &ct-
> >endtime));
>  
>  	if (!r) {
>  		tur_status = ct->state;
> @@ -355,21 +356,18 @@ int check_pending(struct checker *c, struct
> timespec endtime)
>  
>  int libcheck_pending(struct checker *c)
>  {
> -	struct timespec endtime;
>  	struct tur_checker_context *ct = c->context;
>  
>  	/* The if path checker isn't running, just return the
> exiting value. */
>  	if (!ct || !ct->thread)
>  		return c->path_state;
>  
> -	get_monotonic_time(&endtime);
> -	return check_pending(c, endtime);
> +	return check_pending(c);
>  }
>  
>  int libcheck_check(struct checker * c)
>  {
>  	struct tur_checker_context *ct = c->context;
> -	struct timespec tsp;
>  	pthread_attr_t attr;
>  	int tur_status, r;
>  
> @@ -479,8 +477,7 @@ int libcheck_check(struct checker * c)
>  				" sync mode", major(ct->devt),
> minor(ct->devt));
>  			return tur_check(c->fd, c->timeout, &c-
> >msgid);
>  		}
> -		tur_timeout(&tsp);
> -		tur_status = check_pending(c, tsp);
> +		tur_timeout(&ct->endtime);
>  	}
>  
>  	return tur_status;
Benjamin Marzinski Sept. 4, 2024, 9:17 p.m. UTC | #2
On Wed, Sep 04, 2024 at 10:05:30PM +0200, Martin Wilck wrote:
> On Wed, 2024-08-28 at 18:17 -0400, Benjamin Marzinski wrote:
> > When the tur and directio checkers start an asynchronous checker,
> > they
> > now immediately return with the path in PATH_PENDING, instead of
> > waiting in checker_check(). Instead the wait now happens in
> > checker_get_state().
> > 
> > Additionally, the directio checker now waits for 1 ms, like the tur
> > checker does. Also like the tur checker it now only waits once. If it
> > is still pending after the first call to checker_get_state(). It will
> > not wait at all on future calls, and will just process the already
> > completed IOs.
> > 
> > Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> 
> Now I understand what you're doing, but I find it somewhat confusing. I
> believe this can be simplified after the split between starting the
> checkers and looking at their result.
> 
> Why not just fire off the checkers, wait 1ms for _all_ the just started
> checkers in checkerloop() [*], and then do the pending test like it is
> now (but just in polling mode, with no c->timeout)?

That's probably possible. You would also need to put that wait in
pathinfo().

The big reason I did it this way was so nothing outside of the checker
itself needed to care about things like if the checker was set to async
and whether or not the checker was actually async capable and if this
was a newly started async check or if the checker had already been
pending for a cycle, where waiting another 1ms was pointless.

This way, you just call checker_get_state() on all the paths were you
ran the path checker, and if at least one of them needs to wait, then
the first one that does will wait, and the rest won't. If none of them
need to wait, then none of them will.

-Ben

> 
> Regards
> Martin
> 
> [*] or maybe even a little more, 5ms, say: we'd still be waiting far
> less time than we used to, but greatly increase the odds that most
> checkers actually complete.
> 
> 
> > ---
> >  libmultipath/checkers.c          |  7 +++-
> >  libmultipath/checkers/directio.c | 57 +++++++++++++++++-------------
> > --
> >  libmultipath/checkers/tur.c      | 13 +++-----
> >  3 files changed, 41 insertions(+), 36 deletions(-)
> > 
> > diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
> > index 298aec78..b44b856a 100644
> > --- a/libmultipath/checkers.c
> > +++ b/libmultipath/checkers.c
> > @@ -303,7 +303,12 @@ void checker_put (struct checker * dst)
> >  
> >  int checker_get_state(struct checker *c)
> >  {
> > -	return c ? c->path_state : PATH_UNCHECKED;
> > +	if (!c)
> > +		return PATH_UNCHECKED;
> > +	if (c->path_state != PATH_PENDING || !c->cls->pending)
> > +		return c->path_state;
> > +	c->path_state = c->cls->pending(c);
> > +	return c->path_state;
> >  }
> >  
> >  void checker_check (struct checker * c, int path_state)
> > diff --git a/libmultipath/checkers/directio.c
> > b/libmultipath/checkers/directio.c
> > index 3f88b40d..904e3071 100644
> > --- a/libmultipath/checkers/directio.c
> > +++ b/libmultipath/checkers/directio.c
> > @@ -60,10 +60,11 @@ const char *libcheck_msgtable[] = {
> >  #define LOG(prio, fmt, args...) condlog(prio, "directio: " fmt,
> > ##args)
> >  
> >  struct directio_context {
> > -	int		running;
> > +	unsigned int	running;
> >  	int		reset_flags;
> >  	struct aio_group *aio_grp;
> >  	struct async_req *req;
> > +	struct timespec endtime;
> >  };
> >  
> >  static struct aio_group *
> > @@ -314,19 +315,16 @@ check_pending(struct directio_context *ct,
> > struct timespec endtime)
> >  static int
> >  check_state(int fd, struct directio_context *ct, int sync, int
> > timeout_secs)
> >  {
> > -	struct timespec	timeout = { .tv_nsec = 1000 };
> >  	struct stat	sb;
> >  	int		rc;
> > +	struct io_event event;
> >  	struct timespec endtime;
> >  
> >  	if (fstat(fd, &sb) == 0) {
> >  		LOG(4, "called for %x", (unsigned) sb.st_rdev);
> >  	}
> > -	if (sync > 0) {
> > +	if (sync > 0)
> >  		LOG(4, "called in synchronous mode");
> > -		timeout.tv_sec  = timeout_secs;
> > -		timeout.tv_nsec = 0;
> > -	}
> >  
> >  	if (ct->running) {
> >  		if (ct->req->state != PATH_PENDING) {
> > @@ -345,31 +343,26 @@ check_state(int fd, struct directio_context
> > *ct, int sync, int timeout_secs)
> >  			LOG(3, "io_submit error %i", -rc);
> >  			return PATH_UNCHECKED;
> >  		}
> > +		get_monotonic_time(&ct->endtime);
> > +		ct->endtime.tv_nsec += 1000 * 1000;
> > +		normalize_timespec(&ct->endtime);
> >  	}
> >  	ct->running++;
> > +	if (!sync)
> > +		return PATH_PENDING;
> >  
> >  	get_monotonic_time(&endtime);
> > -	endtime.tv_sec += timeout.tv_sec;
> > -	endtime.tv_nsec += timeout.tv_nsec;
> > +	endtime.tv_sec += timeout_secs;
> >  	normalize_timespec(&endtime);
> >  
> >  	check_pending(ct, endtime);
> >  	if (ct->req->state != PATH_PENDING)
> >  		return ct->req->state;
> >  
> > -	if (ct->running > timeout_secs || sync) {
> > -		struct io_event event;
> > -
> > -		LOG(3, "abort check on timeout");
> > -
> > -		io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event);
> > -		rc = PATH_DOWN;
> > -	} else {
> > -		LOG(4, "async io pending");
> > -		rc = PATH_PENDING;
> > -	}
> > +	LOG(3, "abort check on timeout");
> >  
> > -	return rc;
> > +	io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event);
> > +	return PATH_DOWN;
> >  }
> >  
> >  static void set_msgid(struct checker *c, int state)
> > @@ -395,21 +388,31 @@ static void set_msgid(struct checker *c, int
> > state)
> >  
> >  int libcheck_pending(struct checker *c)
> >  {
> > -	struct timespec endtime;
> > +	int rc;
> > +	struct io_event event;
> >  	struct directio_context *ct = (struct directio_context *)c-
> > >context;
> >  
> >  	/* The if path checker isn't running, just return the
> > exiting value. */
> >  	if (!ct || !ct->running)
> >  		return c->path_state;
> >  
> > -	if (ct->req->state == PATH_PENDING) {
> > -		get_monotonic_time(&endtime);
> > -		check_pending(ct, endtime);
> > -	} else
> > +	if (ct->req->state == PATH_PENDING)
> > +		check_pending(ct, ct->endtime);
> > +	else
> >  		ct->running = 0;
> > -	set_msgid(c, ct->req->state);
> > +	rc = ct->req->state;
> > +	if (rc == PATH_PENDING) {
> > +		if (ct->running > c->timeout) {
> > +			LOG(3, "abort check on timeout");
> > +			io_cancel(ct->aio_grp->ioctx, &ct->req->io,
> > &event);
> > +			rc = PATH_DOWN;
> > +		}
> > +		else
> > +			LOG(4, "async io pending");
> > +	}
> > +	set_msgid(c, rc);
> >  
> > -	return ct->req->state;
> > +	return rc;
> >  }
> >  
> >  int libcheck_check (struct checker * c)
> > diff --git a/libmultipath/checkers/tur.c
> > b/libmultipath/checkers/tur.c
> > index 95af5214..81db565b 100644
> > --- a/libmultipath/checkers/tur.c
> > +++ b/libmultipath/checkers/tur.c
> > @@ -58,6 +58,7 @@ struct tur_checker_context {
> >  	int msgid;
> >  	struct checker_context ctx;
> >  	unsigned int nr_timeouts;
> > +	struct timespec endtime;
> >  };
> >  
> >  int libcheck_init (struct checker * c)
> > @@ -323,7 +324,7 @@ static int tur_check_async_timeout(struct checker
> > *c)
> >  	return (now.tv_sec > ct->time);
> >  }
> >  
> > -int check_pending(struct checker *c, struct timespec endtime)
> > +int check_pending(struct checker *c)
> >  {
> >  	struct tur_checker_context *ct = c->context;
> >  	int r, tur_status = PATH_PENDING;
> > @@ -333,7 +334,7 @@ int check_pending(struct checker *c, struct
> > timespec endtime)
> >  	for (r = 0;
> >  	     r == 0 && ct->state == PATH_PENDING &&
> >  	     ct->msgid == MSG_TUR_RUNNING;
> > -	     r = pthread_cond_timedwait(&ct->active, &ct->lock,
> > &endtime));
> > +	     r = pthread_cond_timedwait(&ct->active, &ct->lock, &ct-
> > >endtime));
> >  
> >  	if (!r) {
> >  		tur_status = ct->state;
> > @@ -355,21 +356,18 @@ int check_pending(struct checker *c, struct
> > timespec endtime)
> >  
> >  int libcheck_pending(struct checker *c)
> >  {
> > -	struct timespec endtime;
> >  	struct tur_checker_context *ct = c->context;
> >  
> >  	/* The if path checker isn't running, just return the
> > exiting value. */
> >  	if (!ct || !ct->thread)
> >  		return c->path_state;
> >  
> > -	get_monotonic_time(&endtime);
> > -	return check_pending(c, endtime);
> > +	return check_pending(c);
> >  }
> >  
> >  int libcheck_check(struct checker * c)
> >  {
> >  	struct tur_checker_context *ct = c->context;
> > -	struct timespec tsp;
> >  	pthread_attr_t attr;
> >  	int tur_status, r;
> >  
> > @@ -479,8 +477,7 @@ int libcheck_check(struct checker * c)
> >  				" sync mode", major(ct->devt),
> > minor(ct->devt));
> >  			return tur_check(c->fd, c->timeout, &c-
> > >msgid);
> >  		}
> > -		tur_timeout(&tsp);
> > -		tur_status = check_pending(c, tsp);
> > +		tur_timeout(&ct->endtime);
> >  	}
> >  
> >  	return tur_status;
Martin Wilck Sept. 5, 2024, 7:53 a.m. UTC | #3
On Wed, 2024-09-04 at 17:17 -0400, Benjamin Marzinski wrote:
> On Wed, Sep 04, 2024 at 10:05:30PM +0200, Martin Wilck wrote:
> > On Wed, 2024-08-28 at 18:17 -0400, Benjamin Marzinski wrote:
> > > When the tur and directio checkers start an asynchronous checker,
> > > they
> > > now immediately return with the path in PATH_PENDING, instead of
> > > waiting in checker_check(). Instead the wait now happens in
> > > checker_get_state().
> > > 
> > > Additionally, the directio checker now waits for 1 ms, like the
> > > tur
> > > checker does. Also like the tur checker it now only waits once.
> > > If it
> > > is still pending after the first call to checker_get_state(). It
> > > will
> > > not wait at all on future calls, and will just process the
> > > already
> > > completed IOs.
> > > 
> > > Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> > 
> > Now I understand what you're doing, but I find it somewhat
> > confusing. I
> > believe this can be simplified after the split between starting the
> > checkers and looking at their result.
> > 
> > Why not just fire off the checkers, wait 1ms for _all_ the just
> > started
> > checkers in checkerloop() [*], and then do the pending test like it
> > is
> > now (but just in polling mode, with no c->timeout)?
> 
> That's probably possible. You would also need to put that wait in
> pathinfo().

pathinfo() is tricky because it's written with the assumption that the
result is obtained synchronously. Maybe in pathinfo we should just fire
up the checker and set the status to PENDING. But we might as well not
run the checker at all, just set the checker interval to minimum, and
let the checkers do their work. (I like that idea, actually). 

Another idea would be to define an exception for pathinfo(), and
actually wait for completion there, not until c->timeout expires, but
somewhat longer than we currently do.

> The big reason I did it this way was so nothing outside of the
> checker
> itself needed to care about things like if the checker was set to
> async
> and whether or not the checker was actually async capable and if this
> was a newly started async check or if the checker had already been
> pending for a cycle, where waiting another 1ms was pointless.

Right, the sync property should be hidden. If sync checkers are in use,
none of the recent changes will have much of a positive effect. I'd
like to convert all checkers to async mode. Just need some time to do
it :-/ AFAICS, the only sync checker that still matters is rdac.

Below is some brainstorming from my side. 
Ignore it if it makes no sense.

We currently use the path state, in particular PATH_PENDING, to infer
the state of the checker (thread, aio request), but that doesn't give
us the full picture.

We should be able to distinguish the path state and the state of the
checker itself, which can be IDLE, RUNNING, or DONE. The checker
provides an API checker_state() (I can't think of a better name) to
retrieve its state. The path state can (only) be retrieved in DONE
checker state. When the path state is retrieved, the checker state
switches to IDLE. checker_start() switches from IDLE to RUNNING.
checker_wait(timeout) waits until the RUNNING checker reaches DONE, or
the passed timeout is reached. A timed-out checker
(wrt c->timeout) has state DONE with path state PATH_TIMEOUT.

Sync checkers would have a degenerated API where checker_start() and
checker_state() would be noops.

> This way, you just call checker_get_state() on all the paths were you
> ran the path checker, and if at least one of them needs to wait, then
> the first one that does will wait, and the rest won't. If none of
> them
> need to wait, then none of them will.

This makes sense. But the "wait" you're talking about is the 1ms delay,
after which the paths may well still be in PATH_PENDING. It's actually
not much different from not waiting at all, we've just decreased the
likelihood of PATH_PENDIN. 1ms is just an estimate, and, in my
experience, too short in many environments.

The real benefit of your patch set is that we now start all (async)
checkers in parallel. Which means that we can wait longer, increasing
the likelihood that the checkers have actually finished, while spending
less total time waiting. "Waiting longer" is a bit hand-waving; IMO it
should be handled further up in the stack rather than down in the
checker code.

We could achieve the behavior you describe in checkerloop, like this:

 - record start_timestamp
 - fire off all handlers
 - calculate end_time as start_timestamp + some_delay 
 - use that end_time for waiting for results of all fired-off checkers

Like in your example, we'd really wait for no more than 1 path.
We could do this for the entire set of paths, or for each multipath map
separately.

Regards
Martin
Benjamin Marzinski Sept. 6, 2024, 5:26 p.m. UTC | #4
On Thu, Sep 05, 2024 at 09:53:52AM +0200, Martin Wilck wrote:
> On Wed, 2024-09-04 at 17:17 -0400, Benjamin Marzinski wrote:
> > On Wed, Sep 04, 2024 at 10:05:30PM +0200, Martin Wilck wrote:
> > > On Wed, 2024-08-28 at 18:17 -0400, Benjamin Marzinski wrote:

<snip>

> > This way, you just call checker_get_state() on all the paths were you
> > ran the path checker, and if at least one of them needs to wait, then
> > the first one that does will wait, and the rest won't. If none of
> > them
> > need to wait, then none of them will.
> 
> This makes sense. But the "wait" you're talking about is the 1ms delay,
> after which the paths may well still be in PATH_PENDING. It's actually
> not much different from not waiting at all, we've just decreased the
> likelihood of PATH_PENDIN. 1ms is just an estimate, and, in my
> experience, too short in many environments.
> 
> The real benefit of your patch set is that we now start all (async)
> checkers in parallel. Which means that we can wait longer, increasing
> the likelihood that the checkers have actually finished, while spending
> less total time waiting. "Waiting longer" is a bit hand-waving; IMO it
> should be handled further up in the stack rather than down in the
> checker code.
> 
> We could achieve the behavior you describe in checkerloop, like this:
> 
>  - record start_timestamp
>  - fire off all handlers
>  - calculate end_time as start_timestamp + some_delay 
>  - use that end_time for waiting for results of all fired-off checkers
> 
> Like in your example, we'd really wait for no more than 1 path.
> We could do this for the entire set of paths, or for each multipath map
> separately.

Actually, now that I think about it, the biggest benefit of doing the
wait in checkerloop itself is that the code already handles the case
where we drop the vecs lock while there are paths that have started the
checker, but not yet updated the device based on the results. Given
that, we can just do the wait for pending paths without holding the vecs
lock. It's sort of embarassing that I did all this work to cut down on
the time we're waiting but didn't think about the fact that with just a
bit more work, we could avoid sleeping while holding the lock
altogether.
 
I already did some testing before to make sure that things like
reconfigures, adding/removing paths and manually failing/restoring paths
didn't mess things up if they happened in that window where we drop the
lock in the middle of checking the paths, but I'll probably need to do
some more tests and recheck that code if it's going to be non-rare
occurance for other threads to run in the middle of checking paths. But
this seems like too big of a benefit to avoid doing.

-Ben

> Regards
> Martin
diff mbox series

Patch

diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
index 298aec78..b44b856a 100644
--- a/libmultipath/checkers.c
+++ b/libmultipath/checkers.c
@@ -303,7 +303,12 @@  void checker_put (struct checker * dst)
 
 int checker_get_state(struct checker *c)
 {
-	return c ? c->path_state : PATH_UNCHECKED;
+	if (!c)
+		return PATH_UNCHECKED;
+	if (c->path_state != PATH_PENDING || !c->cls->pending)
+		return c->path_state;
+	c->path_state = c->cls->pending(c);
+	return c->path_state;
 }
 
 void checker_check (struct checker * c, int path_state)
diff --git a/libmultipath/checkers/directio.c b/libmultipath/checkers/directio.c
index 3f88b40d..904e3071 100644
--- a/libmultipath/checkers/directio.c
+++ b/libmultipath/checkers/directio.c
@@ -60,10 +60,11 @@  const char *libcheck_msgtable[] = {
 #define LOG(prio, fmt, args...) condlog(prio, "directio: " fmt, ##args)
 
 struct directio_context {
-	int		running;
+	unsigned int	running;
 	int		reset_flags;
 	struct aio_group *aio_grp;
 	struct async_req *req;
+	struct timespec endtime;
 };
 
 static struct aio_group *
@@ -314,19 +315,16 @@  check_pending(struct directio_context *ct, struct timespec endtime)
 static int
 check_state(int fd, struct directio_context *ct, int sync, int timeout_secs)
 {
-	struct timespec	timeout = { .tv_nsec = 1000 };
 	struct stat	sb;
 	int		rc;
+	struct io_event event;
 	struct timespec endtime;
 
 	if (fstat(fd, &sb) == 0) {
 		LOG(4, "called for %x", (unsigned) sb.st_rdev);
 	}
-	if (sync > 0) {
+	if (sync > 0)
 		LOG(4, "called in synchronous mode");
-		timeout.tv_sec  = timeout_secs;
-		timeout.tv_nsec = 0;
-	}
 
 	if (ct->running) {
 		if (ct->req->state != PATH_PENDING) {
@@ -345,31 +343,26 @@  check_state(int fd, struct directio_context *ct, int sync, int timeout_secs)
 			LOG(3, "io_submit error %i", -rc);
 			return PATH_UNCHECKED;
 		}
+		get_monotonic_time(&ct->endtime);
+		ct->endtime.tv_nsec += 1000 * 1000;
+		normalize_timespec(&ct->endtime);
 	}
 	ct->running++;
+	if (!sync)
+		return PATH_PENDING;
 
 	get_monotonic_time(&endtime);
-	endtime.tv_sec += timeout.tv_sec;
-	endtime.tv_nsec += timeout.tv_nsec;
+	endtime.tv_sec += timeout_secs;
 	normalize_timespec(&endtime);
 
 	check_pending(ct, endtime);
 	if (ct->req->state != PATH_PENDING)
 		return ct->req->state;
 
-	if (ct->running > timeout_secs || sync) {
-		struct io_event event;
-
-		LOG(3, "abort check on timeout");
-
-		io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event);
-		rc = PATH_DOWN;
-	} else {
-		LOG(4, "async io pending");
-		rc = PATH_PENDING;
-	}
+	LOG(3, "abort check on timeout");
 
-	return rc;
+	io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event);
+	return PATH_DOWN;
 }
 
 static void set_msgid(struct checker *c, int state)
@@ -395,21 +388,31 @@  static void set_msgid(struct checker *c, int state)
 
 int libcheck_pending(struct checker *c)
 {
-	struct timespec endtime;
+	int rc;
+	struct io_event event;
 	struct directio_context *ct = (struct directio_context *)c->context;
 
 	/* The if path checker isn't running, just return the exiting value. */
 	if (!ct || !ct->running)
 		return c->path_state;
 
-	if (ct->req->state == PATH_PENDING) {
-		get_monotonic_time(&endtime);
-		check_pending(ct, endtime);
-	} else
+	if (ct->req->state == PATH_PENDING)
+		check_pending(ct, ct->endtime);
+	else
 		ct->running = 0;
-	set_msgid(c, ct->req->state);
+	rc = ct->req->state;
+	if (rc == PATH_PENDING) {
+		if (ct->running > c->timeout) {
+			LOG(3, "abort check on timeout");
+			io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event);
+			rc = PATH_DOWN;
+		}
+		else
+			LOG(4, "async io pending");
+	}
+	set_msgid(c, rc);
 
-	return ct->req->state;
+	return rc;
 }
 
 int libcheck_check (struct checker * c)
diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
index 95af5214..81db565b 100644
--- a/libmultipath/checkers/tur.c
+++ b/libmultipath/checkers/tur.c
@@ -58,6 +58,7 @@  struct tur_checker_context {
 	int msgid;
 	struct checker_context ctx;
 	unsigned int nr_timeouts;
+	struct timespec endtime;
 };
 
 int libcheck_init (struct checker * c)
@@ -323,7 +324,7 @@  static int tur_check_async_timeout(struct checker *c)
 	return (now.tv_sec > ct->time);
 }
 
-int check_pending(struct checker *c, struct timespec endtime)
+int check_pending(struct checker *c)
 {
 	struct tur_checker_context *ct = c->context;
 	int r, tur_status = PATH_PENDING;
@@ -333,7 +334,7 @@  int check_pending(struct checker *c, struct timespec endtime)
 	for (r = 0;
 	     r == 0 && ct->state == PATH_PENDING &&
 	     ct->msgid == MSG_TUR_RUNNING;
-	     r = pthread_cond_timedwait(&ct->active, &ct->lock, &endtime));
+	     r = pthread_cond_timedwait(&ct->active, &ct->lock, &ct->endtime));
 
 	if (!r) {
 		tur_status = ct->state;
@@ -355,21 +356,18 @@  int check_pending(struct checker *c, struct timespec endtime)
 
 int libcheck_pending(struct checker *c)
 {
-	struct timespec endtime;
 	struct tur_checker_context *ct = c->context;
 
 	/* The if path checker isn't running, just return the exiting value. */
 	if (!ct || !ct->thread)
 		return c->path_state;
 
-	get_monotonic_time(&endtime);
-	return check_pending(c, endtime);
+	return check_pending(c);
 }
 
 int libcheck_check(struct checker * c)
 {
 	struct tur_checker_context *ct = c->context;
-	struct timespec tsp;
 	pthread_attr_t attr;
 	int tur_status, r;
 
@@ -479,8 +477,7 @@  int libcheck_check(struct checker * c)
 				" sync mode", major(ct->devt), minor(ct->devt));
 			return tur_check(c->fd, c->timeout, &c->msgid);
 		}
-		tur_timeout(&tsp);
-		tur_status = check_pending(c, tsp);
+		tur_timeout(&ct->endtime);
 	}
 
 	return tur_status;