@@ -139,6 +139,7 @@ enum sg_shr_var {
#define SG_FRQ_FOR_MMAP 7 /* request needs PAGE_SIZE elements */
#define SG_FRQ_COUNT_ACTIVE 8 /* sfp->submitted + waiting active */
#define SG_FRQ_ISSUED 9 /* blk_execute_rq_nowait() finished */
+#define SG_FRQ_POLL_SLEPT 10 /* stop re-entry of hybrid_sleep() */
/* Bit positions (flags) for sg_fd::ffd_bm bitmask follow */
#define SG_FFD_FORCE_PACKID 0 /* receive only given pack_id/tag */
@@ -153,6 +154,7 @@ enum sg_shr_var {
#define SG_FFD_NO_DURATION 9 /* don't do command duration calc */
#define SG_FFD_MORE_ASYNC 10 /* yield EBUSY more often */
#define SG_FFD_MRQ_ABORT 11 /* SG_IOABORT + FLAG_MULTIPLE_REQS */
+#define SG_FFD_EXCL_WAITQ 12 /* append _exclusive to wait_event */
/* Bit positions (flags) for sg_device::fdev_bm bitmask follow */
#define SG_FDEV_EXCLUDE 0 /* have fd open with O_EXCL */
@@ -962,6 +964,17 @@ sg_mrq_1complet(struct sg_io_v4 *cop, struct sg_io_v4 *a_hds,
return 0;
}
+static int
+sg_wait_mrq_event(struct sg_fd *sfp, struct sg_request **srpp)
+{
+ if (test_bit(SG_FFD_EXCL_WAITQ, sfp->ffd_bm))
+ return __wait_event_interruptible_exclusive
+ (sfp->cmpl_wait,
+ sg_mrq_get_ready_srp(sfp, srpp));
+ return __wait_event_interruptible(sfp->cmpl_wait,
+ sg_mrq_get_ready_srp(sfp, srpp));
+}
+
/*
* This is a fair-ish algorithm for an interruptible wait on two file
* descriptors. It favours the main fd over the secondary fd (sec_sfp).
@@ -1002,9 +1015,7 @@ sg_mrq_complets(struct sg_io_v4 *cop, struct sg_io_v4 *a_hds,
return res;
}
} else if (mreqs > 0) {
- res = wait_event_interruptible
- (sfp->cmpl_wait,
- sg_mrq_get_ready_srp(sfp, &srp));
+ res = sg_wait_mrq_event(sfp, &srp);
if (unlikely(res))
return res; /* signal --> -ERESTARTSYS */
if (IS_ERR(srp)) {
@@ -1017,9 +1028,7 @@ sg_mrq_complets(struct sg_io_v4 *cop, struct sg_io_v4 *a_hds,
return res;
}
} else if (sec_reqs > 0) {
- res = wait_event_interruptible
- (sec_sfp->cmpl_wait,
- sg_mrq_get_ready_srp(sec_sfp, &srp));
+ res = sg_wait_mrq_event(sec_sfp, &srp);
if (unlikely(res))
return res; /* signal --> -ERESTARTSYS */
if (IS_ERR(srp)) {
@@ -1082,6 +1091,7 @@ sg_mrq_sanity(struct sg_device *sdp, struct sg_io_v4 *cop,
rip, k, "no IMMED with COMPLETE_B4");
return -ERANGE;
}
+ /* N.B. SGV4_FLAG_SIG_ON_OTHER is allowed */
}
if (!sg_fd_is_shared(sfp)) {
if (unlikely(flags & SGV4_FLAG_SHARE)) {
@@ -1113,8 +1123,9 @@ sg_mrq_sanity(struct sg_device *sdp, struct sg_io_v4 *cop,
/*
* Implements the multiple request functionality. When 'blocking' is true
* invocation was via ioctl(SG_IO), otherwise it was via ioctl(SG_IOSUBMIT).
- * Only fully non-blocking if IMMED flag given or when ioctl(SG_IOSUBMIT)
- * is used with O_NONBLOCK set on its file descriptor.
+ * Submit non-blocking if IMMED flag given or when ioctl(SG_IOSUBMIT)
+ * is used with O_NONBLOCK set on its file descriptor. Hipri non-blocking
+ * is when the HIPRI flag is given.
*/
static int
sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking)
@@ -1174,8 +1185,7 @@ sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking)
immed = true;
SG_LOG(3, fp, "%s: %s, tot_reqs=%u, id_of_mrq=%d\n", __func__,
(immed ? "IMMED" : (blocking ? "ordered blocking" :
- "variable blocking")),
- tot_reqs, id_of_mrq);
+ "variable blocking")), tot_reqs, id_of_mrq);
sg_sgv4_out_zero(cop);
if (unlikely(tot_reqs > U16_MAX)) {
@@ -2018,9 +2028,7 @@ sg_mrq_iorec_complets(struct sg_fd *sfp, bool non_block, int max_mrqs,
return k;
for ( ; k < max_mrqs; ++k) {
- res = wait_event_interruptible
- (sfp->cmpl_wait,
- sg_mrq_get_ready_srp(sfp, &srp));
+ res = sg_wait_mrq_event(sfp, &srp);
if (unlikely(res))
return res; /* signal --> -ERESTARTSYS */
if (IS_ERR(srp))
@@ -2083,6 +2091,19 @@ sg_mrq_ioreceive(struct sg_fd *sfp, struct sg_io_v4 *cop, void __user *p,
return res;
}
+static int
+sg_wait_id_event(struct sg_fd *sfp, struct sg_request **srpp, int id,
+ bool is_tag)
+{
+ if (test_bit(SG_FFD_EXCL_WAITQ, sfp->ffd_bm))
+ return __wait_event_interruptible_exclusive
+ (sfp->cmpl_wait,
+ sg_get_ready_srp(sfp, srpp, id, is_tag));
+ return __wait_event_interruptible
+ (sfp->cmpl_wait,
+ sg_get_ready_srp(sfp, srpp, id, is_tag));
+}
+
/*
* Called when ioctl(SG_IORECEIVE) received. Expects a v4 interface object.
* Checks if O_NONBLOCK file flag given, if not checks given 'flags' field
@@ -2134,9 +2155,7 @@ sg_ctl_ioreceive(struct sg_fd *sfp, void __user *p)
return -ENODEV;
if (non_block)
return -EAGAIN;
- res = wait_event_interruptible
- (sfp->cmpl_wait,
- sg_get_ready_srp(sfp, &srp, id, use_tag));
+ res = sg_wait_id_event(sfp, &srp, id, use_tag);
if (unlikely(res))
return res; /* signal --> -ERESTARTSYS */
if (IS_ERR(srp))
@@ -2191,9 +2210,7 @@ sg_ctl_ioreceive_v3(struct sg_fd *sfp, void __user *p)
return -ENODEV;
if (non_block)
return -EAGAIN;
- res = wait_event_interruptible
- (sfp->cmpl_wait,
- sg_get_ready_srp(sfp, &srp, pack_id, false));
+ res = sg_wait_id_event(sfp, &srp, pack_id, false);
if (unlikely(res))
return res; /* signal --> -ERESTARTSYS */
if (IS_ERR(srp))
@@ -2351,7 +2368,7 @@ sg_read(struct file *filp, char __user *p, size_t count, loff_t *ppos)
int flgs;
ret = get_user(flgs, &h3_up->flags);
- if (ret)
+ if (unlikely(ret))
return ret;
if (flgs & SGV4_FLAG_IMMED)
non_block = true;
@@ -2374,9 +2391,7 @@ sg_read(struct file *filp, char __user *p, size_t count, loff_t *ppos)
return -ENODEV;
if (non_block) /* O_NONBLOCK or v3::flags & SGV4_FLAG_IMMED */
return -EAGAIN;
- ret = wait_event_interruptible
- (sfp->cmpl_wait,
- sg_get_ready_srp(sfp, &srp, want_id, false));
+ ret = sg_wait_id_event(sfp, &srp, want_id, false);
if (unlikely(ret)) /* -ERESTARTSYS as signal hit process */
return ret;
if (IS_ERR(srp))
@@ -2846,9 +2861,9 @@ sg_wait_event_srp(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p,
goto skip_wait;
}
SG_LOG(3, sfp, "%s: about to wait_event...()\n", __func__);
- /* usually will be woken up by sg_rq_end_io() callback */
- res = wait_event_interruptible(sfp->cmpl_wait,
- sg_rq_landed(sdp, srp));
+ /* N.B. The SG_FFD_EXCL_WAITQ flag is ignored here. */
+ res = __wait_event_interruptible(sfp->cmpl_wait,
+ sg_rq_landed(sdp, srp));
if (unlikely(res)) { /* -ERESTARTSYS because signal hit thread */
set_bit(SG_FRQ_IS_ORPHAN, srp->frq_bm);
/* orphans harvested when sfp->keep_orphan is false */
@@ -3316,7 +3331,7 @@ sg_find_sfp_by_fd(const struct file *search_for, int search_fd,
++num_d;
for (k = 0; k < num_d; ++k) {
sdp = idr_find(&sg_index_idr, k);
- if (unlikely(!sdp || SG_IS_DETACHING(sdp)))
+ if (unlikely(!sdp) || SG_IS_DETACHING(sdp))
continue;
xa_for_each_marked(&sdp->sfp_arr, idx, sfp,
SG_XA_FD_UNSHARED) {
@@ -3354,7 +3369,7 @@ sg_find_sfp_by_fd(const struct file *search_for, int search_fd,
++num_d;
for (k = 0; k < num_d; ++k) {
sdp = idr_find(&sg_index_idr, k);
- if (unlikely(!sdp || SG_IS_DETACHING(sdp)))
+ if (unlikely(!sdp) || SG_IS_DETACHING(sdp))
continue;
xa_for_each(&sdp->sfp_arr, idx, sfp) {
if (!sg_fd_is_shared(sfp))
@@ -3781,6 +3796,18 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
else
c_flgs_val_out &= ~SG_CTL_FLAGM_MORE_ASYNC;
}
+ /* EXCL_WAITQ boolean, [rbw] */
+ if (c_flgs_rm & SG_CTL_FLAGM_EXCL_WAITQ)
+ flg = test_bit(SG_FFD_EXCL_WAITQ, sfp->ffd_bm);
+ if (c_flgs_wm & SG_CTL_FLAGM_EXCL_WAITQ)
+ assign_bit(SG_FFD_EXCL_WAITQ, sfp->ffd_bm,
+ !!(c_flgs_val_in & SG_CTL_FLAGM_EXCL_WAITQ));
+ if (c_flgs_rm & SG_CTL_FLAGM_EXCL_WAITQ) {
+ if (flg)
+ c_flgs_val_out |= SG_CTL_FLAGM_EXCL_WAITQ;
+ else
+ c_flgs_val_out &= ~SG_CTL_FLAGM_EXCL_WAITQ;
+ }
if (c_flgs_val_in != c_flgs_val_out)
seip->ctl_flags = c_flgs_val_out;
@@ -114,16 +114,16 @@ typedef struct sg_io_hdr {
#define SGV4_FLAG_YIELD_TAG 0x8 /* sg_io_v4::generated_tag set after SG_IOS */
#define SGV4_FLAG_Q_AT_TAIL SG_FLAG_Q_AT_TAIL
#define SGV4_FLAG_Q_AT_HEAD SG_FLAG_Q_AT_HEAD
-#define SGV4_FLAG_COMPLETE_B4 0x100
-#define SGV4_FLAG_SIGNAL 0x200 /* v3: ignored; v4 signal on completion */
-#define SGV4_FLAG_IMMED 0x400 /* for polling with SG_IOR, ignored in SG_IOS */
+#define SGV4_FLAG_COMPLETE_B4 0x100 /* mrq: complete this rq before next */
+#define SGV4_FLAG_SIGNAL 0x200 /* v3: ignored; v4 signal on completion */
+#define SGV4_FLAG_IMMED 0x400 /* issue request and return immediately ... */
#define SGV4_FLAG_HIPRI 0x800 /* request will use blk_poll to complete */
#define SGV4_FLAG_STOP_IF 0x1000 /* Stops sync mrq if error or warning */
#define SGV4_FLAG_DEV_SCOPE 0x2000 /* permit SG_IOABORT to have wider scope */
#define SGV4_FLAG_SHARE 0x4000 /* share IO buffer; needs SG_SEIM_SHARE_FD */
#define SGV4_FLAG_DO_ON_OTHER 0x8000 /* available on either of shared pair */
#define SGV4_FLAG_NO_DXFER SG_FLAG_NO_DXFER /* but keep dev<-->kernel xfr */
-#define SGV4_FLAG_MULTIPLE_REQS 0x20000 /* n sg_io_v4s in data-in */
+#define SGV4_FLAG_MULTIPLE_REQS 0x20000 /* 1 or more sg_io_v4-s in data-in */
/* Output (potentially OR-ed together) in v3::info or v4::info field */
#define SG_INFO_OK_MASK 0x1
@@ -151,7 +151,7 @@ typedef struct sg_scsi_id {
short h_cmd_per_lun;/* host (adapter) maximum commands per lun */
short d_queue_depth;/* device (or adapter) maximum queue length */
union {
- int unused[2]; /* as per version 3 driver */
+ int unused[2]; /* as per version 3 driver */
__u8 scsi_lun[8]; /* full 8 byte SCSI LUN [in v4 driver] */
};
} sg_scsi_id_t;
@@ -163,8 +163,14 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
/* sg_io_owned set imples synchronous, clear implies asynchronous */
char sg_io_owned;/* 0 -> complete with read(), 1 -> owned by SG_IO */
char problem; /* 0 -> no problem detected, 1 -> error to report */
+ /* If SG_CTL_FLAGM_TAG_FOR_PACK_ID set on fd then next field is tag */
int pack_id; /* pack_id, in v4 driver may be tag instead */
void __user *usr_ptr; /* user provided pointer in v3+v4 interface */
+ /*
+ * millisecs elapsed since the command started (req_state==1) or
+ * command duration (req_state==2). Will be in nanoseconds after
+ * the SG_SET_GET_EXTENDED{TIME_IN_NS} ioctl.
+ */
unsigned int duration;
int unused;
} sg_req_info_t;
@@ -199,12 +205,13 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
#define SG_CTL_FLAGM_IS_SHARE 0x20 /* rd: fd is read-side or write-side share */
#define SG_CTL_FLAGM_IS_READ_SIDE 0x40 /* rd: this fd is read-side share */
#define SG_CTL_FLAGM_UNSHARE 0x80 /* undo share after inflight cmd */
-/* rd> 1: read-side finished 0: not; wr> 1: finish share post read-side */
+/* rd> 1: read-side finished, 0: not; wr> 1: finish share post read-side */
#define SG_CTL_FLAGM_READ_SIDE_FINI 0x100 /* wr> 0: setup for repeat write-side req */
#define SG_CTL_FLAGM_READ_SIDE_ERR 0x200 /* rd: sharing, read-side got error */
#define SG_CTL_FLAGM_NO_DURATION 0x400 /* don't calc command duration */
#define SG_CTL_FLAGM_MORE_ASYNC 0x800 /* yield EAGAIN in more cases */
-#define SG_CTL_FLAGM_ALL_BITS 0xfff /* should be OR of previous items */
+#define SG_CTL_FLAGM_EXCL_WAITQ 0x1000 /* only 1 wake up per response */
+#define SG_CTL_FLAGM_ALL_BITS 0x1fff /* should be OR of previous items */
/* Write one of the following values to sg_extended_info::read_value, get... */
#define SG_SEIRV_INT_MASK 0x0 /* get SG_SEIM_ALL_BITS */
@@ -437,9 +444,11 @@ struct sg_header {
/*
* New ioctls to replace async (non-blocking) write()/read() interface.
* Present in version 4 and later of the sg driver [>20190427]. The
- * SG_IOSUBMIT and SG_IORECEIVE ioctls accept the sg_v4 interface based on
- * struct sg_io_v4 found in <include/uapi/linux/bsg.h>. These objects are
- * passed by a pointer in the third argument of the ioctl.
+ * SG_IOSUBMIT_V3 and SG_IORECEIVE_V3 ioctls accept the sg_v3 interface
+ * based on struct sg_io_hdr shown above. The SG_IOSUBMIT and SG_IORECEIVE
+ * ioctls accept the sg_v4 interface based on struct sg_io_v4 found in
+ * <include/uapi/linux/bsg.h>. These objects are passed by a pointer in
+ * the third argument of the ioctl.
*
* Data may be transferred both from the user space to the driver by these
* ioctls. Hence the _IOWR macro is used here to generate the ioctl number
The new SG_CTL_FLAGM_EXCL_WAITQ boolean flag can be set on a sg file descriptor so that subsequent wait_event_interruptible() calls can be changed to their "_exclusive()" variants. This is to address the potential "thundering herd" problem with the wait_queue Signed-off-by: Douglas Gilbert <dgilbert@interlog.com> --- drivers/scsi/sg.c | 83 ++++++++++++++++++++++++++++-------------- include/uapi/scsi/sg.h | 29 ++++++++++----- 2 files changed, 74 insertions(+), 38 deletions(-)