@@ -46,6 +46,7 @@ static char *sg_version_date = "20210421";
#include <linux/timekeeping.h>
#include <linux/proc_fs.h> /* used if CONFIG_SCSI_PROC_FS */
#include <linux/xarray.h>
+#include <linux/eventfd.h>
#include <linux/debugfs.h>
#include <scsi/scsi.h>
@@ -293,6 +294,7 @@ struct sg_fd { /* holds the state of a file descriptor */
struct file *filp; /* my identity when sharing */
struct sg_fd __rcu *share_sfp;/* fd share cross-references, else NULL */
struct fasync_struct *async_qp; /* used by asynchronous notification */
+ struct eventfd_ctx *efd_ctxp; /* eventfd context or NULL */
struct xarray srp_arr; /* xarray of sg_request object pointers */
struct sg_request *rsv_arr[SG_MAX_RSV_REQS];
struct kref f_ref;
@@ -412,6 +414,7 @@ static void sg_take_snap(struct sg_fd *sfp, bool clear_first);
#define SG_HAVE_EXCLUDE(sdp) test_bit(SG_FDEV_EXCLUDE, (sdp)->fdev_bm)
#define SG_IS_O_NONBLOCK(sfp) (!!((sfp)->filp->f_flags & O_NONBLOCK))
#define SG_RQ_ACTIVE(srp) (atomic_read(&(srp)->rq_st) != SG_RQ_INACTIVE)
+#define SG_IS_V4I(srp) test_bit(SG_FRQ_IS_V4I, (srp)->frq_bm)
/*
* Kernel needs to be built with CONFIG_SCSI_LOGGING to see log messages.
@@ -1098,7 +1101,7 @@ sg_mrq_arr_flush(struct sg_mrq_hold *mhp)
}
static int
-sg_mrq_1complet(struct sg_mrq_hold *mhp, struct sg_fd *do_on_sfp,
+sg_mrq_1complet(struct sg_mrq_hold *mhp, struct sg_fd *sfp,
struct sg_request *srp)
{
int s_res, indx;
@@ -1109,30 +1112,37 @@ sg_mrq_1complet(struct sg_mrq_hold *mhp, struct sg_fd *do_on_sfp,
if (unlikely(!srp))
return -EPROTO;
indx = srp->s_hdr4.mrq_ind;
- if (unlikely(srp->parentfp != do_on_sfp)) {
- SG_LOG(1, do_on_sfp, "%s: mrq_ind=%d, sfp out-of-sync\n",
+ if (unlikely(srp->parentfp != sfp)) {
+ SG_LOG(1, sfp, "%s: mrq_ind=%d, sfp out-of-sync\n",
__func__, indx);
return -EPROTO;
}
- SG_LOG(3, do_on_sfp, "%s: mrq_ind=%d, pack_id=%d\n", __func__, indx,
+ SG_LOG(3, sfp, "%s: mrq_ind=%d, pack_id=%d\n", __func__, indx,
srp->pack_id);
if (unlikely(indx < 0 || indx >= tot_reqs))
return -EPROTO;
hp = a_hds + indx;
- s_res = sg_receive_v4(do_on_sfp, srp, NULL, hp);
+ s_res = sg_receive_v4(sfp, srp, NULL, hp);
if (unlikely(s_res == -EFAULT))
return s_res;
hp->info |= SG_INFO_MRQ_FINI;
if (mhp->co_mmap) {
sg_sgat_cp_into(mhp->co_mmap_sgatp, indx * SZ_SG_IO_V4,
(const u8 *)hp, SZ_SG_IO_V4);
- if (do_on_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL))
- kill_fasync(&do_on_sfp->async_qp, SIGPOLL, POLL_IN);
- } else if (do_on_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) {
+ if (sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL))
+ kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN);
+ if (sfp->efd_ctxp && (srp->rq_flags & SGV4_FLAG_EVENTFD)) {
+ u64 n = eventfd_signal(sfp->efd_ctxp, 1);
+
+ if (n != 1)
+ pr_info("%s: srp=%pK eventfd_signal problem\n",
+ __func__, srp);
+ }
+ } else if (sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) {
s_res = sg_mrq_arr_flush(mhp);
if (unlikely(s_res)) /* can only be -EFAULT */
return s_res;
- kill_fasync(&do_on_sfp->async_qp, SIGPOLL, POLL_IN);
+ kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN);
}
return 0;
}
@@ -1474,6 +1484,14 @@ sg_process_most_mrq(struct sg_fd *fp, struct sg_fd *o_sfp,
if (rq_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL))
kill_fasync(&rq_sfp->async_qp, SIGPOLL,
POLL_IN);
+ if (rq_sfp->efd_ctxp &&
+ (srp->rq_flags & SGV4_FLAG_EVENTFD)) {
+ u64 n = eventfd_signal(rq_sfp->efd_ctxp, 1);
+
+ if (n != 1)
+ pr_info("%s: eventfd_signal prob\n",
+ __func__);
+ }
} else if (rq_sfp->async_qp &&
(hp->flags & SGV4_FLAG_SIGNAL)) {
res = sg_mrq_arr_flush(mhp);
@@ -2677,6 +2695,34 @@ sg_rec_state_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool v4_active)
return err;
}
+static void
+sg_complete_shr_rs(struct sg_fd *sfp, struct sg_request *srp, bool other_err,
+ enum sg_rq_state sr_st)
+{
+ int poll_type = POLL_OUT;
+ struct sg_fd *ws_sfp = sg_fd_share_ptr(sfp);
+
+ if (unlikely(!sg_result_is_good(srp->rq_result) || other_err)) {
+ set_bit(SG_FFD_READ_SIDE_ERR, sfp->ffd_bm);
+ sg_rq_chg_state_force(srp, SG_RQ_BUSY);
+ poll_type = POLL_HUP; /* "Hang-UP flag */
+ } else if (sr_st != SG_RQ_SHR_SWAP) {
+ sg_rq_chg_state_force(srp, SG_RQ_SHR_SWAP);
+ }
+ if (ws_sfp && !srp->sh_srp) {
+ if (ws_sfp->async_qp &&
+ (!SG_IS_V4I(srp) || (srp->rq_flags & SGV4_FLAG_SIGNAL)))
+ kill_fasync(&ws_sfp->async_qp, SIGPOLL, poll_type);
+ if (ws_sfp->efd_ctxp && (srp->rq_flags & SGV4_FLAG_EVENTFD)) {
+ u64 n = eventfd_signal(ws_sfp->efd_ctxp, 1);
+
+ if (n != 1)
+ pr_info("%s: srp=%pK eventfd prob\n",
+ __func__, srp);
+ }
+ }
+}
+
static void
sg_complete_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool other_err)
{
@@ -2687,25 +2733,7 @@ sg_complete_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool other_err)
sg_shr_str(srp->sh_var, true));
switch (srp->sh_var) {
case SG_SHR_RS_RQ:
- {
- int poll_type = POLL_OUT;
- struct sg_fd *ws_sfp = sg_fd_share_ptr(sfp);
-
- if (unlikely(!sg_result_is_good(srp->rq_result) ||
- other_err)) {
- set_bit(SG_FFD_READ_SIDE_ERR, sfp->ffd_bm);
- if (sr_st != SG_RQ_BUSY)
- sg_rq_chg_state_force(srp, SG_RQ_BUSY);
- poll_type = POLL_HUP; /* "Hang-UP flag */
- } else if (sr_st != SG_RQ_SHR_SWAP) {
- sg_rq_chg_state_force(srp, SG_RQ_SHR_SWAP);
- }
- if (ws_sfp && ws_sfp->async_qp && !srp->sh_srp &&
- (!test_bit(SG_FRQ_IS_V4I, srp->frq_bm) ||
- (srp->rq_flags & SGV4_FLAG_SIGNAL)))
- kill_fasync(&ws_sfp->async_qp, SIGPOLL,
- poll_type);
- }
+ sg_complete_shr_rs(sfp, srp, other_err, sr_st);
break;
case SG_SHR_WS_RQ: /* cleanup both on write-side completion */
if (likely(sg_fd_is_shared(sfp))) {
@@ -3655,8 +3683,8 @@ sg_fill_request_element(struct sg_fd *sfp, struct sg_request *srp,
rip->problem = !sg_result_is_good(srp->rq_result);
rip->pack_id = test_bit(SG_FFD_PREFER_TAG, sfp->ffd_bm) ?
srp->tag : srp->pack_id;
- rip->usr_ptr = test_bit(SG_FRQ_IS_V4I, srp->frq_bm) ?
- uptr64(srp->s_hdr4.usr_ptr) : srp->s_hdr3.usr_ptr;
+ rip->usr_ptr = SG_IS_V4I(srp) ? uptr64(srp->s_hdr4.usr_ptr)
+ : srp->s_hdr3.usr_ptr;
xa_unlock_irqrestore(&sfp->srp_arr, iflags);
}
@@ -3713,7 +3741,7 @@ sg_wait_event_srp(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p,
#endif
return res;
}
- if (test_bit(SG_FRQ_IS_V4I, srp->frq_bm))
+ if (SG_IS_V4I(srp))
res = sg_receive_v4(sfp, srp, p, h4p);
else
res = sg_receive_v3(sfp, srp, p);
@@ -4237,6 +4265,23 @@ sg_fd_reshare(struct sg_fd *rs_sfp, int new_ws_fd)
return found ? 0 : -ENOTSOCK; /* ENOTSOCK for fd exists but not sg */
}
+static int
+sg_eventfd_new(struct sg_fd *rs_sfp, int eventfd)
+ __must_hold(&rs_sfp->f_mutex)
+{
+ int ret = 0;
+
+ if (rs_sfp->efd_ctxp)
+ return -EBUSY;
+ rs_sfp->efd_ctxp = eventfd_ctx_fdget(eventfd);
+ if (IS_ERR(rs_sfp->efd_ctxp)) {
+ ret = PTR_ERR(rs_sfp->efd_ctxp);
+ rs_sfp->efd_ctxp = NULL;
+ return ret;
+ }
+ return ret;
+}
+
/*
* First normalize want_rsv_sz to be >= sfp->sgat_elem_sz and
* <= max_segment_size. Exit if that is the same as old size; otherwise
@@ -4465,7 +4510,6 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
const u32 c_flgs_rm = seip->ctl_flags_rd_mask;
const u32 c_flgs_val_in = seip->ctl_flags;
u32 c_flgs_val_out = c_flgs_val_in;
- struct sg_fd *rs_sfp;
struct sg_device *sdp = sfp->parentdp;
/* TIME_IN_NS boolean, [raw] time in nanoseconds (def: millisecs) */
@@ -4545,7 +4589,8 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
* when written: 1 --> write-side doesn't want to continue
*/
if ((c_flgs_rm & SG_CTL_FLAGM_READ_SIDE_FINI) && sg_fd_is_shared(sfp)) {
- rs_sfp = sg_fd_share_ptr(sfp);
+ struct sg_fd *rs_sfp = sg_fd_share_ptr(sfp);
+
if (rs_sfp && !IS_ERR_OR_NULL(rs_sfp->rsv_arr[0])) {
struct sg_request *res_srp = rs_sfp->rsv_arr[0];
@@ -4562,7 +4607,8 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
res = sg_finish_rs_rq(sfp);
/* READ_SIDE_ERR boolean, [ro] share: read-side finished with error */
if (c_flgs_rm & SG_CTL_FLAGM_READ_SIDE_ERR) {
- rs_sfp = sg_fd_share_ptr(sfp);
+ struct sg_fd *rs_sfp = sg_fd_share_ptr(sfp);
+
if (rs_sfp && test_bit(SG_FFD_READ_SIDE_ERR, rs_sfp->ffd_bm))
c_flgs_val_out |= SG_CTL_FLAGM_READ_SIDE_ERR;
else
@@ -4618,6 +4664,21 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
else
c_flgs_val_out &= ~SG_CTL_FLAGM_SNAP_DEV;
}
+ /* RM_EVENTFD boolean, [rbw] */
+ if (c_flgs_rm & SG_CTL_FLAGM_RM_EVENTFD)
+ flg = !!sfp->efd_ctxp;
+ if ((c_flgs_wm & SG_CTL_FLAGM_RM_EVENTFD) && (c_flgs_val_in & SG_CTL_FLAGM_RM_EVENTFD)) {
+ if (sfp->efd_ctxp && atomic_read(&sfp->submitted) < 1) {
+ eventfd_ctx_put(sfp->efd_ctxp);
+ sfp->efd_ctxp = NULL;
+ }
+ }
+ if (c_flgs_rm & SG_CTL_FLAGM_RM_EVENTFD) {
+ if (flg)
+ c_flgs_val_out |= SG_CTL_FLAGM_RM_EVENTFD;
+ else
+ c_flgs_val_out &= ~SG_CTL_FLAGM_RM_EVENTFD;
+ }
if (c_flgs_val_in != c_flgs_val_out)
seip->ctl_flags = c_flgs_val_out;
@@ -4773,6 +4834,15 @@ sg_ctl_extended(struct sg_fd *sfp, void __user *p)
}
mutex_unlock(&sfp->f_mutex);
}
+ if (or_masks & SG_SEIM_EVENTFD) {
+ mutex_lock(&sfp->f_mutex);
+ if (s_wr_mask & SG_SEIM_EVENTFD) {
+ result = sg_eventfd_new(sfp, (int)seip->share_fd);
+ if (ret == 0 && unlikely(result))
+ ret = result;
+ }
+ mutex_unlock(&sfp->f_mutex);
+ }
/* call blk_poll() on this fd's HIPRI requests [raw] */
if (or_masks & SG_SEIM_BLK_POLL) {
n = 0;
@@ -5514,7 +5584,7 @@ sg_rq_end_io(struct request *rqq, blk_status_t status)
a_resid = scsi_rp->resid_len;
if (unlikely(a_resid)) {
- if (test_bit(SG_FRQ_IS_V4I, srp->frq_bm)) {
+ if (SG_IS_V4I(srp)) {
if (rq_data_dir(rqq) == READ)
srp->in_resid = a_resid;
else
@@ -5603,9 +5673,16 @@ sg_rq_end_io(struct request *rqq, blk_status_t status)
}
if (!(srp->rq_flags & SGV4_FLAG_HIPRI))
wake_up_interruptible(&sfp->cmpl_wait);
- if (sfp->async_qp && (!test_bit(SG_FRQ_IS_V4I, srp->frq_bm) ||
+ if (sfp->async_qp && (!SG_IS_V4I(srp) ||
(srp->rq_flags & SGV4_FLAG_SIGNAL)))
kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN);
+ if (sfp->efd_ctxp && (srp->rq_flags & SGV4_FLAG_EVENTFD)) {
+ u64 n = eventfd_signal(sfp->efd_ctxp, 1);
+
+ if (n != 1)
+ pr_info("%s: srp=%pK eventfd_signal problem\n",
+ __func__, srp);
+ }
kref_put(&sfp->f_ref, sg_remove_sfp); /* get in: sg_execute_cmd() */
}
@@ -5943,7 +6020,7 @@ sg_rq_map_kern(struct sg_request *srp, struct request_queue *q, struct request *
if (rw_ind == WRITE)
op_flags = REQ_SYNC | REQ_IDLE;
k = 0; /* N.B. following condition may increase k */
- if (test_bit(SG_FRQ_IS_V4I, srp->frq_bm)) {
+ if (SG_IS_V4I(srp)) {
struct sg_slice_hdr4 *slh4p = &srp->s_hdr4;
if (slh4p->dir == SG_DXFER_TO_DEV) {
@@ -6028,7 +6105,7 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
}
SG_LOG(5, sfp, "%s: long_cmdp=0x%pK ++\n", __func__, long_cmdp);
}
- if (likely(test_bit(SG_FRQ_IS_V4I, srp->frq_bm))) {
+ if (SG_IS_V4I(srp)) {
struct sg_io_v4 *h4p = cwrp->h4p;
if (dxfer_dir == SG_DXFER_TO_DEV) {
@@ -7225,6 +7302,8 @@ sg_uc_remove_sfp(struct work_struct *work)
if (subm != 0)
SG_LOG(1, sfp, "%s: expected submitted=0 got %d\n",
__func__, subm);
+ if (sfp->efd_ctxp)
+ eventfd_ctx_put(sfp->efd_ctxp);
xa_destroy(xafp);
xadp = &sdp->sfp_arr;
xa_lock_irqsave(xadp, iflags);
@@ -7553,7 +7632,7 @@ sg_proc_debug_sreq(struct sg_request *srp, int to, bool t_in_ns, char *obp,
if (unlikely(len < 1))
return 0;
- v4 = test_bit(SG_FRQ_IS_V4I, srp->frq_bm);
+ v4 = SG_IS_V4I(srp);
is_v3v4 = v4 ? true : (srp->s_hdr3.interface_id != '\0');
sg_get_rsv_str(srp, " ", "", sizeof(b), b);
if (strlen(b) > 5)
@@ -115,6 +115,7 @@ typedef struct sg_io_hdr {
#define SGV4_FLAG_Q_AT_TAIL SG_FLAG_Q_AT_TAIL
#define SGV4_FLAG_Q_AT_HEAD SG_FLAG_Q_AT_HEAD
#define SGV4_FLAG_DOUT_OFFSET 0x40 /* dout byte offset in v4::spare_in */
+#define SGV4_FLAG_EVENTFD 0x80 /* signal completion on ... */
#define SGV4_FLAG_COMPLETE_B4 0x100 /* mrq: complete this rq before next */
#define SGV4_FLAG_SIGNAL 0x200 /* v3: ignored; v4 signal on completion */
#define SGV4_FLAG_IMMED 0x400 /* issue request and return immediately ... */
@@ -196,7 +197,8 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
#define SG_SEIM_CHG_SHARE_FD 0x40 /* read-side given new write-side fd */
#define SG_SEIM_SGAT_ELEM_SZ 0x80 /* sgat element size (>= PAGE_SIZE) */
#define SG_SEIM_BLK_POLL 0x100 /* call blk_poll, uses 'num' field */
-#define SG_SEIM_ALL_BITS 0x1ff /* should be OR of previous items */
+#define SG_SEIM_EVENTFD 0x200 /* pass eventfd to driver */
+#define SG_SEIM_ALL_BITS 0x3ff /* should be OR of previous items */
/* flag and mask values for boolean fields follow */
#define SG_CTL_FLAGM_TIME_IN_NS 0x1 /* time: nanosecs (def: millisecs) */
@@ -214,7 +216,8 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
#define SG_CTL_FLAGM_MORE_ASYNC 0x800 /* yield EAGAIN in more cases */
#define SG_CTL_FLAGM_EXCL_WAITQ 0x1000 /* only 1 wake up per response */
#define SG_CTL_FLAGM_SNAP_DEV 0x2000 /* output to debugfs::snapped */
-#define SG_CTL_FLAGM_ALL_BITS 0x3fff /* should be OR of previous items */
+#define SG_CTL_FLAGM_RM_EVENTFD 0x4000 /* only if new eventfd wanted */
+#define SG_CTL_FLAGM_ALL_BITS 0x7fff /* should be OR of previous items */
/* Write one of the following values to sg_extended_info::read_value, get... */
#define SG_SEIRV_INT_MASK 0x0 /* get SG_SEIM_ALL_BITS */
@@ -253,7 +256,7 @@ struct sg_extended_info {
__u32 reserved_sz; /* data/sgl size of pre-allocated request */
__u32 tot_fd_thresh; /* total data/sgat for this fd, 0: no limit */
__u32 minor_index; /* rd: kernel's sg device minor number */
- __u32 share_fd; /* SHARE_FD and CHG_SHARE_FD use this */
+ __u32 share_fd; /* for SHARE_FD, CHG_SHARE_FD or EVENTFD */
__u32 sgat_elem_sz; /* sgat element size (must be power of 2) */
__s32 num; /* blk_poll: loop_count (-1 -> spin)) */
__u8 pad_to_96[48]; /* pad so struct is 96 bytes long */
Experimental version. Add support for user space to pass a file descriptor generated by the eventfd(2) system call by ioctl(2) to this driver, thereby associating the eventfd with a sg file descriptor. Add support to remove the eventfd relationship so another can be added to the same sg file descriptor. If a eventfd is active on a sg fd and a request has the SGV4_FLAG_EVENTFD flag set then on completion of that request, it "signals" that eventfd by adding 1 to its internal count. Signed-off-by: Douglas Gilbert <dgilbert@interlog.com> --- drivers/scsi/sg.c | 157 +++++++++++++++++++++++++++++++---------- include/uapi/scsi/sg.h | 9 ++- 2 files changed, 124 insertions(+), 42 deletions(-)