Message ID | 20190620150337.7847-16-jinpuwang@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | InfiniBand Transport (IBTRS) and Network Block Device (IBNBD) | expand |
On 6/20/19 8:03 AM, Jack Wang wrote: > +#define ibnbd_log(fn, dev, fmt, ...) ({ \ > + __builtin_choose_expr( \ > + __builtin_types_compatible_p( \ > + typeof(dev), struct ibnbd_clt_dev *), \ > + fn("<%s@%s> " fmt, (dev)->pathname, \ > + (dev)->sess->sessname, \ > + ##__VA_ARGS__), \ > + __builtin_choose_expr( \ > + __builtin_types_compatible_p(typeof(dev), \ > + struct ibnbd_srv_sess_dev *), \ > + fn("<%s@%s>: " fmt, (dev)->pathname, \ > + (dev)->sess->sessname, ##__VA_ARGS__), \ > + unknown_type())); \ > +}) Please remove the __builtin_choose_expr() / __builtin_types_compatible_p() construct and split this macro into two macros or inline functions: one for struct ibnbd_clt_dev and another one for struct ibnbd_srv_sess_dev. > +#define IBNBD_PROTO_VER_MAJOR 2 > +#define IBNBD_PROTO_VER_MINOR 0 > + > +#define IBNBD_PROTO_VER_STRING __stringify(IBNBD_PROTO_VER_MAJOR) "." \ > + __stringify(IBNBD_PROTO_VER_MINOR) > + > +#ifndef IBNBD_VER_STRING > +#define IBNBD_VER_STRING __stringify(IBNBD_PROTO_VER_MAJOR) "." \ > + __stringify(IBNBD_PROTO_VER_MINOR) Upstream code should not have a version number. > +/* TODO: should be configurable */ > +#define IBTRS_PORT 1234 How about converting this macro into a kernel module parameter? > +enum ibnbd_access_mode { > + IBNBD_ACCESS_RO, > + IBNBD_ACCESS_RW, > + IBNBD_ACCESS_MIGRATION, > +}; Some more information about what IBNBD_ACCESS_MIGRATION represents would be welcome. > +#define _IBNBD_FILEIO 0 > +#define _IBNBD_BLOCKIO 1 > +#define _IBNBD_AUTOIO 2 > > +enum ibnbd_io_mode { > + IBNBD_FILEIO = _IBNBD_FILEIO, > + IBNBD_BLOCKIO = _IBNBD_BLOCKIO, > + IBNBD_AUTOIO = _IBNBD_AUTOIO, > +}; Since the IBNBD_* and _IBNBD_* constants have the same numerical value, are the former constants really necessary? > +/** > + * struct ibnbd_msg_sess_info - initial session info from client to server > + * @hdr: message header > + * @ver: IBNBD protocol version > + */ > +struct ibnbd_msg_sess_info { > + struct ibnbd_msg_hdr hdr; > + u8 ver; > + u8 reserved[31]; > +}; Since the wire protocol is versioned, is it really necessary to add 31 reserved bytes? > +struct ibnbd_msg_sess_info_rsp { > + struct ibnbd_msg_hdr hdr; > + u8 ver; > + u8 reserved[31]; > +}; Same comment here. > +/** > + * struct ibnbd_msg_open_rsp - response message to IBNBD_MSG_OPEN > + * @hdr: message header > + * @nsectors: number of sectors What is the size of a single sector? > + * @device_id: device_id on server side to identify the device Please use the same order for the members in the kernel-doc header as in the structure. > + * @queue_flags: queue_flags of the device on server side Where is the queue_flags member? > + * @discard_granularity: size of the internal discard allocation unit > + * @discard_alignment: offset from internal allocation assignment > + * @physical_block_size: physical block size device supports > + * @logical_block_size: logical block size device supports What is the unit for these four members? > + * @max_segments: max segments hardware support in one transfer Does 'hardware' refer to the RDMA adapter that transfers the IBNBD message or to the storage device? In the latter case, I assume that transfer refers to a DMA transaction? > + * @io_mode: io_mode device is opened. Should a reference to enum ibnbd_io_mode be added? > + u8 __padding[10]; Why ten padding bytes? Does alignment really matter for a data structure like this one? > +/** > + * struct ibnbd_msg_io_old - message for I/O read/write for > + * ver < IBNBD_PROTO_VER_MAJOR > + * This structure is there only to know the size of the "old" message format > + * @hdr: message header > + * @device_id: device_id on server side to find the right device > + * @sector: bi_sector attribute from struct bio > + * @rw: bitmask, valid values are defined in enum ibnbd_io_flags > + * @bi_size: number of bytes for I/O read/write > + * @prio: priority > + */ > +struct ibnbd_msg_io_old { > + struct ibnbd_msg_hdr hdr; > + __le32 device_id; > + __le64 sector; > + __le32 rw; > + __le32 bi_size; > +}; Since this is the first version of IBNBD that is being sent upstream, I think that ibnbd_msg_io_old should be left out. > + > +/** > + * struct ibnbd_msg_io - message for I/O read/write > + * @hdr: message header > + * @device_id: device_id on server side to find the right device > + * @sector: bi_sector attribute from struct bio > + * @rw: bitmask, valid values are defined in enum ibnbd_io_flags enum ibnbd_io_flags doesn't look like a bitmask but rather like a bit field (https://en.wikipedia.org/wiki/Bit_field)? > +static inline u32 ibnbd_to_bio_flags(u32 ibnbd_flags) > +{ > + u32 bio_flags; The names ibnbd_flags and bio_flags are confusing since these two variables not only contain flags but also an operation. How about changing 'flags' into 'opf' or 'op_flags'? > +static inline const char *ibnbd_io_mode_str(enum ibnbd_io_mode mode) > +{ > + switch (mode) { > + case IBNBD_FILEIO: > + return "fileio"; > + case IBNBD_BLOCKIO: > + return "blockio"; > + case IBNBD_AUTOIO: > + return "autoio"; > + default: > + return "unknown"; > + } > +} > + > +static inline const char *ibnbd_access_mode_str(enum ibnbd_access_mode mode) > +{ > + switch (mode) { > + case IBNBD_ACCESS_RO: > + return "ro"; > + case IBNBD_ACCESS_RW: > + return "rw"; > + case IBNBD_ACCESS_MIGRATION: > + return "migration"; > + default: > + return "unknown"; > + } > +} These two functions are not in the hot path and hence should not be inline functions. Note: I plan to review the entire patch series but it may take some time before I have finished reviewing the entire patch series. Bart.
Thanks Bart for detailed review, reply inline. On Sat, Sep 14, 2019 at 12:10 AM Bart Van Assche <bvanassche@acm.org> wrote: > > On 6/20/19 8:03 AM, Jack Wang wrote: > > +#define ibnbd_log(fn, dev, fmt, ...) ({ \ > > + __builtin_choose_expr( \ > > + __builtin_types_compatible_p( \ > > + typeof(dev), struct ibnbd_clt_dev *), \ > > + fn("<%s@%s> " fmt, (dev)->pathname, \ > > + (dev)->sess->sessname, \ > > + ##__VA_ARGS__), \ > > + __builtin_choose_expr( \ > > + __builtin_types_compatible_p(typeof(dev), \ > > + struct ibnbd_srv_sess_dev *), \ > > + fn("<%s@%s>: " fmt, (dev)->pathname, \ > > + (dev)->sess->sessname, ##__VA_ARGS__), \ > > + unknown_type())); \ > > +}) > > Please remove the __builtin_choose_expr() / > __builtin_types_compatible_p() construct and split this macro into two > macros or inline functions: one for struct ibnbd_clt_dev and another one > for struct ibnbd_srv_sess_dev. Ok, will split to two macros. > > > +#define IBNBD_PROTO_VER_MAJOR 2 > > +#define IBNBD_PROTO_VER_MINOR 0 > > + > > +#define IBNBD_PROTO_VER_STRING __stringify(IBNBD_PROTO_VER_MAJOR) "." \ > > + __stringify(IBNBD_PROTO_VER_MINOR) > > + > > +#ifndef IBNBD_VER_STRING > > +#define IBNBD_VER_STRING __stringify(IBNBD_PROTO_VER_MAJOR) "." \ > > + __stringify(IBNBD_PROTO_VER_MINOR) > > Upstream code should not have a version number. IBNBD_VER_STRING can be removed together with MODULE_VERSION. > > > +/* TODO: should be configurable */ > > +#define IBTRS_PORT 1234 > > How about converting this macro into a kernel module parameter? Sounds good, will do. > > > +enum ibnbd_access_mode { > > + IBNBD_ACCESS_RO, > > + IBNBD_ACCESS_RW, > > + IBNBD_ACCESS_MIGRATION, > > +}; > > Some more information about what IBNBD_ACCESS_MIGRATION represents would > be welcome. This is a special mode to allow temporarily RW access mode during VM migration, will add comments next round. > > > +#define _IBNBD_FILEIO 0 > > +#define _IBNBD_BLOCKIO 1 > > +#define _IBNBD_AUTOIO 2 > > > > +enum ibnbd_io_mode { > > + IBNBD_FILEIO = _IBNBD_FILEIO, > > + IBNBD_BLOCKIO = _IBNBD_BLOCKIO, > > + IBNBD_AUTOIO = _IBNBD_AUTOIO, > > +}; > > Since the IBNBD_* and _IBNBD_* constants have the same numerical value, > are the former constants really necessary? Seems we can remove _IBNBD_*. > > > +/** > > + * struct ibnbd_msg_sess_info - initial session info from client to server > > + * @hdr: message header > > + * @ver: IBNBD protocol version > > + */ > > +struct ibnbd_msg_sess_info { > > + struct ibnbd_msg_hdr hdr; > > + u8 ver; > > + u8 reserved[31]; > > +}; > > Since the wire protocol is versioned, is it really necessary to add 31 > reserved bytes? You will never know, we prefer to keep the reserved bytes for future extension, 31 bytes is not much, isn't it? > > > +struct ibnbd_msg_sess_info_rsp { > > + struct ibnbd_msg_hdr hdr; > > + u8 ver; > > + u8 reserved[31]; > > +}; > > Same comment here. Dito. > > > +/** > > + * struct ibnbd_msg_open_rsp - response message to IBNBD_MSG_OPEN > > + * @hdr: message header > > + * @nsectors: number of sectors > > What is the size of a single sector? 512b, will mention explicitly in the next round. > > > + * @device_id: device_id on server side to identify the device > > Please use the same order for the members in the kernel-doc header as in > the structure. Ok, will fix > > > + * @queue_flags: queue_flags of the device on server side > > Where is the queue_flags member? Oh, will remove it, left over. > > > + * @discard_granularity: size of the internal discard allocation unit > > + * @discard_alignment: offset from internal allocation assignment > > + * @physical_block_size: physical block size device supports > > + * @logical_block_size: logical block size device supports > > What is the unit for these four members? will update to be more clear. > > > + * @max_segments: max segments hardware support in one transfer > > Does 'hardware' refer to the RDMA adapter that transfers the IBNBD > message or to the storage device? In the latter case, I assume that > transfer refers to a DMA transaction? "hardware" refers to the storage device on the server-side. > > > + * @io_mode: io_mode device is opened. > > Should a reference to enum ibnbd_io_mode be added? sounds good. > > > + u8 __padding[10]; > > Why ten padding bytes? Does alignment really matter for a data structure > like this one? It's more a reserved space for future usage, will rename padding to reserved. > > > +/** > > + * struct ibnbd_msg_io_old - message for I/O read/write for > > + * ver < IBNBD_PROTO_VER_MAJOR > > + * This structure is there only to know the size of the "old" message format > > + * @hdr: message header > > + * @device_id: device_id on server side to find the right device > > + * @sector: bi_sector attribute from struct bio > > + * @rw: bitmask, valid values are defined in enum ibnbd_io_flags > > + * @bi_size: number of bytes for I/O read/write > > + * @prio: priority > > + */ > > +struct ibnbd_msg_io_old { > > + struct ibnbd_msg_hdr hdr; > > + __le32 device_id; > > + __le64 sector; > > + __le32 rw; > > + __le32 bi_size; > > +}; > > Since this is the first version of IBNBD that is being sent upstream, I > think that ibnbd_msg_io_old should be left out. > > > + > > +/** > > + * struct ibnbd_msg_io - message for I/O read/write > > + * @hdr: message header > > + * @device_id: device_id on server side to find the right device > > + * @sector: bi_sector attribute from struct bio > > + * @rw: bitmask, valid values are defined in enum ibnbd_io_flags > > enum ibnbd_io_flags doesn't look like a bitmask but rather like a bit > field (https://en.wikipedia.org/wiki/Bit_field)? I will remove the "bitmask", I probably will also rename "rw "to "opf". > > > +static inline u32 ibnbd_to_bio_flags(u32 ibnbd_flags) > > +{ > > + u32 bio_flags; > > The names ibnbd_flags and bio_flags are confusing since these two > variables not only contain flags but also an operation. How about > changing 'flags' into 'opf' or 'op_flags'? Sounds good, will change to ibnbd_opf and bio_opf. > > > +static inline const char *ibnbd_io_mode_str(enum ibnbd_io_mode mode) > > +{ > > + switch (mode) { > > + case IBNBD_FILEIO: > > + return "fileio"; > > + case IBNBD_BLOCKIO: > > + return "blockio"; > > + case IBNBD_AUTOIO: > > + return "autoio"; > > + default: > > + return "unknown"; > > + } > > +} > > + > > +static inline const char *ibnbd_access_mode_str(enum ibnbd_access_mode mode) > > +{ > > + switch (mode) { > > + case IBNBD_ACCESS_RO: > > + return "ro"; > > + case IBNBD_ACCESS_RW: > > + return "rw"; > > + case IBNBD_ACCESS_MIGRATION: > > + return "migration"; > > + default: > > + return "unknown"; > > + } > > +} > > These two functions are not in the hot path and hence should not be > inline functions. Sounds reasonable, will remove the inline. > > Note: I plan to review the entire patch series but it may take some time > before I have finished reviewing the entire patch series. > That will be great, thanks a lot, Bart > Bart. Regards,
On Sun, Sep 15, 2019 at 04:30:04PM +0200, Jinpu Wang wrote: > Thanks Bart for detailed review, reply inline. > > On Sat, Sep 14, 2019 at 12:10 AM Bart Van Assche <bvanassche@acm.org> wrote: > > > > On 6/20/19 8:03 AM, Jack Wang wrote: > > > +#define ibnbd_log(fn, dev, fmt, ...) ({ \ > > > + __builtin_choose_expr( \ > > > + __builtin_types_compatible_p( \ > > > + typeof(dev), struct ibnbd_clt_dev *), \ > > > + fn("<%s@%s> " fmt, (dev)->pathname, \ > > > + (dev)->sess->sessname, \ > > > + ##__VA_ARGS__), \ > > > + __builtin_choose_expr( \ > > > + __builtin_types_compatible_p(typeof(dev), \ > > > + struct ibnbd_srv_sess_dev *), \ > > > + fn("<%s@%s>: " fmt, (dev)->pathname, \ > > > + (dev)->sess->sessname, ##__VA_ARGS__), \ > > > + unknown_type())); \ > > > +}) > > > > Please remove the __builtin_choose_expr() / > > __builtin_types_compatible_p() construct and split this macro into two > > macros or inline functions: one for struct ibnbd_clt_dev and another one > > for struct ibnbd_srv_sess_dev. > Ok, will split to two macros. > > > > > > +#define IBNBD_PROTO_VER_MAJOR 2 > > > +#define IBNBD_PROTO_VER_MINOR 0 > > > + > > > +#define IBNBD_PROTO_VER_STRING __stringify(IBNBD_PROTO_VER_MAJOR) "." \ > > > + __stringify(IBNBD_PROTO_VER_MINOR) > > > + > > > +#ifndef IBNBD_VER_STRING > > > +#define IBNBD_VER_STRING __stringify(IBNBD_PROTO_VER_MAJOR) "." \ > > > + __stringify(IBNBD_PROTO_VER_MINOR) > > > > Upstream code should not have a version number. > IBNBD_VER_STRING can be removed together with MODULE_VERSION. > > > > > +/* TODO: should be configurable */ > > > +#define IBTRS_PORT 1234 > > > > How about converting this macro into a kernel module parameter? > Sounds good, will do. Don't rush to do it and defer it to be the last change before merging, this is controversial request which not everyone will like here. Thanks
> > > +/** > > > + * struct ibnbd_msg_open_rsp - response message to IBNBD_MSG_OPEN > > > + * @hdr: message header > > > + * @nsectors: number of sectors > > > > What is the size of a single sector? > 512b, will mention explicitly in the next round. We only have KERNEL_SECTOR_SIZE=512, defined in ibnbd-clt.c. Looks we only depend on this exact number to set the capacity of the block device on client side. I'm not sure whether it is worth extending the protocol to send the number from the server instead. > > > + * @max_segments: max segments hardware support in one transfer > > > > Does 'hardware' refer to the RDMA adapter that transfers the IBNBD > > message or to the storage device? In the latter case, I assume that > > transfer refers to a DMA transaction? > "hardware" refers to the storage device on the server-side. The field contains queue_max_segments() of the target block device. And is used to call blk_queue_max_segments() on the corresponding device on the client side. We do also have BMAX_SEGMENTS define in ibnbd-clt.h which sets an upper limit to max_segments and does refer to the capabilities of the RDMA adapter. This information should only be known to the transport module and ideally would be returned to IBNBD during the registration in IBTRS. > > Note: I plan to review the entire patch series but it may take some time > > before I have finished reviewing the entire patch series. > > > That will be great, thanks a lot, Bart Thank you Bart!
On 9/15/19 10:27 PM, Leon Romanovsky wrote: > On Sun, Sep 15, 2019 at 04:30:04PM +0200, Jinpu Wang wrote: >> On Sat, Sep 14, 2019 at 12:10 AM Bart Van Assche <bvanassche@acm.org> wrote: >>>> +/* TODO: should be configurable */ >>>> +#define IBTRS_PORT 1234 >>> >>> How about converting this macro into a kernel module parameter? >> Sounds good, will do. > > Don't rush to do it and defer it to be the last change before merging, > this is controversial request which not everyone will like here. Hi Leon, If you do not agree with changing this macro into a kernel module parameter please suggest an alternative. Thanks, Bart.
> > > +#define _IBNBD_FILEIO 0 > > > +#define _IBNBD_BLOCKIO 1 > > > +#define _IBNBD_AUTOIO 2 > > > > > > +enum ibnbd_io_mode { > > > + IBNBD_FILEIO = _IBNBD_FILEIO, > > > + IBNBD_BLOCKIO = _IBNBD_BLOCKIO, > > > + IBNBD_AUTOIO = _IBNBD_AUTOIO, > > > +}; > > > > Since the IBNBD_* and _IBNBD_* constants have the same numerical value, > > are the former constants really necessary? > Seems we can remove _IBNBD_*. Sorry, checked again, we defined _IBNBD_* constants to show the right value for def_io_mode description. If we remove the _IBNBD_*, then the modinfo shows: def_io_mode:By default, export devices in blockio(IBNBD_BLOCKIO) or fileio(IBNBD_FILEIO) mode. (default: IBNBD_BLOCKIO (blockio)) instead of: parm: def_io_mode:By default, export devices in blockio(1) or fileio(0) mode. (default: 1 (blockio)) > > > +/** > > > + * struct ibnbd_msg_io_old - message for I/O read/write for > > > + * ver < IBNBD_PROTO_VER_MAJOR > > > + * This structure is there only to know the size of the "old" message format > > > + * @hdr: message header > > > + * @device_id: device_id on server side to find the right device > > > + * @sector: bi_sector attribute from struct bio > > > + * @rw: bitmask, valid values are defined in enum ibnbd_io_flags > > > + * @bi_size: number of bytes for I/O read/write > > > + * @prio: priority > > > + */ > > > +struct ibnbd_msg_io_old { > > > + struct ibnbd_msg_hdr hdr; > > > + __le32 device_id; > > > + __le64 sector; > > > + __le32 rw; > > > + __le32 bi_size; > > > +}; > > > > Since this is the first version of IBNBD that is being sent upstream, I > > think that ibnbd_msg_io_old should be left out. After discuss with Danil, we will remove the ibnbd_msg_io_old next round. Regards, -- Jack Wang Linux Kernel Developer Platform Engineering Compute (IONOS Cloud)
- Roman's pb emal address, it's no longer valid, will fix next round. > > > > > +static inline const char *ibnbd_io_mode_str(enum ibnbd_io_mode mode) > > > +{ > > > + switch (mode) { > > > + case IBNBD_FILEIO: > > > + return "fileio"; > > > + case IBNBD_BLOCKIO: > > > + return "blockio"; > > > + case IBNBD_AUTOIO: > > > + return "autoio"; > > > + default: > > > + return "unknown"; > > > + } > > > +} > > > + > > > +static inline const char *ibnbd_access_mode_str(enum ibnbd_access_mode mode) > > > +{ > > > + switch (mode) { > > > + case IBNBD_ACCESS_RO: > > > + return "ro"; > > > + case IBNBD_ACCESS_RW: > > > + return "rw"; > > > + case IBNBD_ACCESS_MIGRATION: > > > + return "migration"; > > > + default: > > > + return "unknown"; > > > + } > > > +} > > > > These two functions are not in the hot path and hence should not be > > inline functions. > Sounds reasonable, will remove the inline. inline was added to fix the -Wunused-function warning eg: CC [M] /<<PKGBUILDDIR>>/ibnbd/ibnbd-clt.o In file included from /<<PKGBUILDDIR>>/ibnbd/ibnbd-clt.h:34, from /<<PKGBUILDDIR>>/ibnbd/ibnbd-clt.c:33: /<<PKGBUILDDIR>>/ibnbd/ibnbd-proto.h:362:20: warning: 'ibnbd_access_mode_str' defined but not used [-Wunused-function] static const char *ibnbd_access_mode_str(enum ibnbd_access_mode mode) ^~~~~~~~~~~~~~~~~~~~~ /<<PKGBUILDDIR>>/ibnbd/ibnbd-proto.h:348:20: warning: 'ibnbd_io_mode_str' defined but not used [-Wunused-function] static const char *ibnbd_io_mode_str(enum ibnbd_io_mode mode) We have to move both functions to a separate header file if we really want to do it. The function is simple and small, if you insist, I will do it. Thanks, Jinpu
On 9/16/19 7:57 AM, Jinpu Wang wrote: >>>> +#define _IBNBD_FILEIO 0 >>>> +#define _IBNBD_BLOCKIO 1 >>>> +#define _IBNBD_AUTOIO 2 >>>> >>>> +enum ibnbd_io_mode { >>>> + IBNBD_FILEIO = _IBNBD_FILEIO, >>>> + IBNBD_BLOCKIO = _IBNBD_BLOCKIO, >>>> + IBNBD_AUTOIO = _IBNBD_AUTOIO, >>>> +}; >>> >>> Since the IBNBD_* and _IBNBD_* constants have the same numerical value, >>> are the former constants really necessary? >> >> Seems we can remove _IBNBD_*. > > Sorry, checked again, we defined _IBNBD_* constants to show the right > value for def_io_mode description. > If we remove the _IBNBD_*, then the modinfo shows: > def_io_mode:By default, export devices in blockio(IBNBD_BLOCKIO) or > fileio(IBNBD_FILEIO) mode. (default: IBNBD_BLOCKIO (blockio)) > instead of: > parm: def_io_mode:By default, export devices in blockio(1) > or fileio(0) mode. (default: 1 (blockio)) So the user is required to enter def_io_mode as a number? Wouldn't it be more friendly towards users to change that parameter from a number into a string? Thanks, Bart.
On Mon, Sep 16, 2019 at 7:25 PM Bart Van Assche <bvanassche@acm.org> wrote: > > On 9/16/19 7:57 AM, Jinpu Wang wrote: > >>>> +#define _IBNBD_FILEIO 0 > >>>> +#define _IBNBD_BLOCKIO 1 > >>>> +#define _IBNBD_AUTOIO 2 > >>>> > >>>> +enum ibnbd_io_mode { > >>>> + IBNBD_FILEIO = _IBNBD_FILEIO, > >>>> + IBNBD_BLOCKIO = _IBNBD_BLOCKIO, > >>>> + IBNBD_AUTOIO = _IBNBD_AUTOIO, > >>>> +}; > >>> > >>> Since the IBNBD_* and _IBNBD_* constants have the same numerical value, > >>> are the former constants really necessary? > >> > >> Seems we can remove _IBNBD_*. > > > > Sorry, checked again, we defined _IBNBD_* constants to show the right > > value for def_io_mode description. > > If we remove the _IBNBD_*, then the modinfo shows: > > def_io_mode:By default, export devices in blockio(IBNBD_BLOCKIO) or > > fileio(IBNBD_FILEIO) mode. (default: IBNBD_BLOCKIO (blockio)) > > instead of: > > parm: def_io_mode:By default, export devices in blockio(1) > > or fileio(0) mode. (default: 1 (blockio)) > > So the user is required to enter def_io_mode as a number? Wouldn't it be > more friendly towards users to change that parameter from a number into > a string? > Ok, it's a bit more code, will change to allow user to set "blockio" or "fileio" as string. Thanks, Jinpu
On Mon, Sep 16, 2019 at 06:45:17AM -0700, Bart Van Assche wrote: > On 9/15/19 10:27 PM, Leon Romanovsky wrote: > > On Sun, Sep 15, 2019 at 04:30:04PM +0200, Jinpu Wang wrote: > > > On Sat, Sep 14, 2019 at 12:10 AM Bart Van Assche <bvanassche@acm.org> wrote: > > > > > +/* TODO: should be configurable */ > > > > > +#define IBTRS_PORT 1234 > > > > > > > > How about converting this macro into a kernel module parameter? > > > Sounds good, will do. > > > > Don't rush to do it and defer it to be the last change before merging, > > this is controversial request which not everyone will like here. > > Hi Leon, > > If you do not agree with changing this macro into a kernel module parameter > please suggest an alternative. I didn't review code so my answer can be not fully accurate, but opening some port to use this IB* seems strange from my non-sysadmin POV. What about using RDMA-CM, like NVMe? Thanks > > Thanks, > > Bart.
On Tue, Sep 17, 2019 at 5:42 PM Leon Romanovsky <leon@kernel.org> wrote: > > On Mon, Sep 16, 2019 at 06:45:17AM -0700, Bart Van Assche wrote: > > On 9/15/19 10:27 PM, Leon Romanovsky wrote: > > > On Sun, Sep 15, 2019 at 04:30:04PM +0200, Jinpu Wang wrote: > > > > On Sat, Sep 14, 2019 at 12:10 AM Bart Van Assche <bvanassche@acm.org> wrote: > > > > > > +/* TODO: should be configurable */ > > > > > > +#define IBTRS_PORT 1234 > > > > > > > > > > How about converting this macro into a kernel module parameter? > > > > Sounds good, will do. > > > > > > Don't rush to do it and defer it to be the last change before merging, > > > this is controversial request which not everyone will like here. > > > > Hi Leon, > > > > If you do not agree with changing this macro into a kernel module parameter > > please suggest an alternative. > > I didn't review code so my answer can be not fully accurate, but opening > some port to use this IB* seems strange from my non-sysadmin POV. > What about using RDMA-CM, like NVMe? Hi Leon, We are using rdma-cm, the port number here is same like addr_trsvcid in NVMeoF, it controls which port rdma_listen is listening on. Currently, it's hardcoded, I've adapted the code to have a kernel module parameter port_nr in ibnbd_server, so it's possible to change it if the sysadmin wants. Thanks,
On 9/16/19 8:39 AM, Jinpu Wang wrote: > - Roman's pb emal address, it's no longer valid, will fix next round. > > >>> >>>> +static inline const char *ibnbd_io_mode_str(enum ibnbd_io_mode mode) >>>> +{ >>>> + switch (mode) { >>>> + case IBNBD_FILEIO: >>>> + return "fileio"; >>>> + case IBNBD_BLOCKIO: >>>> + return "blockio"; >>>> + case IBNBD_AUTOIO: >>>> + return "autoio"; >>>> + default: >>>> + return "unknown"; >>>> + } >>>> +} >>>> + >>>> +static inline const char *ibnbd_access_mode_str(enum ibnbd_access_mode mode) >>>> +{ >>>> + switch (mode) { >>>> + case IBNBD_ACCESS_RO: >>>> + return "ro"; >>>> + case IBNBD_ACCESS_RW: >>>> + return "rw"; >>>> + case IBNBD_ACCESS_MIGRATION: >>>> + return "migration"; >>>> + default: >>>> + return "unknown"; >>>> + } >>>> +} >>> >>> These two functions are not in the hot path and hence should not be >>> inline functions. >> Sounds reasonable, will remove the inline. > inline was added to fix the -Wunused-function warning eg: > > CC [M] /<<PKGBUILDDIR>>/ibnbd/ibnbd-clt.o > In file included from /<<PKGBUILDDIR>>/ibnbd/ibnbd-clt.h:34, > from /<<PKGBUILDDIR>>/ibnbd/ibnbd-clt.c:33: > /<<PKGBUILDDIR>>/ibnbd/ibnbd-proto.h:362:20: warning: > 'ibnbd_access_mode_str' defined but not used [-Wunused-function] > static const char *ibnbd_access_mode_str(enum ibnbd_access_mode mode) > ^~~~~~~~~~~~~~~~~~~~~ > /<<PKGBUILDDIR>>/ibnbd/ibnbd-proto.h:348:20: warning: > 'ibnbd_io_mode_str' defined but not used [-Wunused-function] > static const char *ibnbd_io_mode_str(enum ibnbd_io_mode mode) > > We have to move both functions to a separate header file if we really > want to do it. > The function is simple and small, if you insist, I will do it. Please move these functions into a .c file. That will reduce the size of the kernel modules and will also reduce the size of the header file. Thanks, Bart.
On Wed, Sep 18, 2019 at 5:26 PM Bart Van Assche <bvanassche@acm.org> wrote: > > On 9/16/19 8:39 AM, Jinpu Wang wrote: > > - Roman's pb emal address, it's no longer valid, will fix next round. > > > > > >>> > >>>> +static inline const char *ibnbd_io_mode_str(enum ibnbd_io_mode mode) > >>>> +{ > >>>> + switch (mode) { > >>>> + case IBNBD_FILEIO: > >>>> + return "fileio"; > >>>> + case IBNBD_BLOCKIO: > >>>> + return "blockio"; > >>>> + case IBNBD_AUTOIO: > >>>> + return "autoio"; > >>>> + default: > >>>> + return "unknown"; > >>>> + } > >>>> +} > >>>> + > >>>> +static inline const char *ibnbd_access_mode_str(enum ibnbd_access_mode mode) > >>>> +{ > >>>> + switch (mode) { > >>>> + case IBNBD_ACCESS_RO: > >>>> + return "ro"; > >>>> + case IBNBD_ACCESS_RW: > >>>> + return "rw"; > >>>> + case IBNBD_ACCESS_MIGRATION: > >>>> + return "migration"; > >>>> + default: > >>>> + return "unknown"; > >>>> + } > >>>> +} > >>> > >>> These two functions are not in the hot path and hence should not be > >>> inline functions. > >> Sounds reasonable, will remove the inline. > > inline was added to fix the -Wunused-function warning eg: > > > > CC [M] /<<PKGBUILDDIR>>/ibnbd/ibnbd-clt.o > > In file included from /<<PKGBUILDDIR>>/ibnbd/ibnbd-clt.h:34, > > from /<<PKGBUILDDIR>>/ibnbd/ibnbd-clt.c:33: > > /<<PKGBUILDDIR>>/ibnbd/ibnbd-proto.h:362:20: warning: > > 'ibnbd_access_mode_str' defined but not used [-Wunused-function] > > static const char *ibnbd_access_mode_str(enum ibnbd_access_mode mode) > > ^~~~~~~~~~~~~~~~~~~~~ > > /<<PKGBUILDDIR>>/ibnbd/ibnbd-proto.h:348:20: warning: > > 'ibnbd_io_mode_str' defined but not used [-Wunused-function] > > static const char *ibnbd_io_mode_str(enum ibnbd_io_mode mode) > > > > We have to move both functions to a separate header file if we really > > want to do it. > > The function is simple and small, if you insist, I will do it. > > Please move these functions into a .c file. That will reduce the size of > the kernel modules and will also reduce the size of the header file. > > Thanks, > > Bart. > Ok, will do. Thanks, Jinpu
diff --git a/drivers/block/ibnbd/ibnbd-log.h b/drivers/block/ibnbd/ibnbd-log.h new file mode 100644 index 000000000000..7a7ac3908564 --- /dev/null +++ b/drivers/block/ibnbd/ibnbd-log.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * InfiniBand Network Block Driver + * + * Copyright (c) 2014 - 2017 ProfitBricks GmbH. All rights reserved. + * Authors: Fabian Holler <mail@fholler.de> + * Jack Wang <jinpu.wang@profitbricks.com> + * Kleber Souza <kleber.souza@profitbricks.com> + * Danil Kipnis <danil.kipnis@profitbricks.com> + * Roman Penyaev <roman.penyaev@profitbricks.com> + * Milind Dumbare <Milind.dumbare@gmail.com> + * + * Copyright (c) 2017 - 2018 ProfitBricks GmbH. All rights reserved. + * Authors: Danil Kipnis <danil.kipnis@profitbricks.com> + * Roman Penyaev <roman.penyaev@profitbricks.com> + * + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Authors: Roman Penyaev <roman.penyaev@profitbricks.com> + * Jack Wang <jinpu.wang@cloud.ionos.com> + * Danil Kipnis <danil.kipnis@cloud.ionos.com> + */ + +#ifndef IBNBD_LOG_H +#define IBNBD_LOG_H + +#include "ibnbd-clt.h" +#include "ibnbd-srv.h" + +void unknown_type(void); + +#define ibnbd_log(fn, dev, fmt, ...) ({ \ + __builtin_choose_expr( \ + __builtin_types_compatible_p( \ + typeof(dev), struct ibnbd_clt_dev *), \ + fn("<%s@%s> " fmt, (dev)->pathname, \ + (dev)->sess->sessname, \ + ##__VA_ARGS__), \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(dev), \ + struct ibnbd_srv_sess_dev *), \ + fn("<%s@%s>: " fmt, (dev)->pathname, \ + (dev)->sess->sessname, ##__VA_ARGS__), \ + unknown_type())); \ +}) + +#define ibnbd_err(dev, fmt, ...) \ + ibnbd_log(pr_err, dev, fmt, ##__VA_ARGS__) +#define ibnbd_err_rl(dev, fmt, ...) \ + ibnbd_log(pr_err_ratelimited, dev, fmt, ##__VA_ARGS__) +#define ibnbd_wrn(dev, fmt, ...) \ + ibnbd_log(pr_warn, dev, fmt, ##__VA_ARGS__) +#define ibnbd_wrn_rl(dev, fmt, ...) \ + ibnbd_log(pr_warn_ratelimited, dev, fmt, ##__VA_ARGS__) +#define ibnbd_info(dev, fmt, ...) \ + ibnbd_log(pr_info, dev, fmt, ##__VA_ARGS__) +#define ibnbd_info_rl(dev, fmt, ...) \ + ibnbd_log(pr_info_ratelimited, dev, fmt, ##__VA_ARGS__) + +#endif /* IBNBD_LOG_H */ diff --git a/drivers/block/ibnbd/ibnbd-proto.h b/drivers/block/ibnbd/ibnbd-proto.h new file mode 100644 index 000000000000..e5a0a539447b --- /dev/null +++ b/drivers/block/ibnbd/ibnbd-proto.h @@ -0,0 +1,378 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * InfiniBand Network Block Driver + * + * Copyright (c) 2014 - 2017 ProfitBricks GmbH. All rights reserved. + * Authors: Fabian Holler <mail@fholler.de> + * Jack Wang <jinpu.wang@profitbricks.com> + * Kleber Souza <kleber.souza@profitbricks.com> + * Danil Kipnis <danil.kipnis@profitbricks.com> + * Roman Penyaev <roman.penyaev@profitbricks.com> + * Milind Dumbare <Milind.dumbare@gmail.com> + * + * Copyright (c) 2017 - 2018 ProfitBricks GmbH. All rights reserved. + * Authors: Danil Kipnis <danil.kipnis@profitbricks.com> + * Roman Penyaev <roman.penyaev@profitbricks.com> + * + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Authors: Roman Penyaev <roman.penyaev@profitbricks.com> + * Jack Wang <jinpu.wang@cloud.ionos.com> + * Danil Kipnis <danil.kipnis@cloud.ionos.com> + */ + +#ifndef IBNBD_PROTO_H +#define IBNBD_PROTO_H + +#include <linux/types.h> +#include <linux/blkdev.h> +#include <linux/limits.h> +#include <linux/inet.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <rdma/ib.h> + +#define IBNBD_PROTO_VER_MAJOR 2 +#define IBNBD_PROTO_VER_MINOR 0 + +#define IBNBD_PROTO_VER_STRING __stringify(IBNBD_PROTO_VER_MAJOR) "." \ + __stringify(IBNBD_PROTO_VER_MINOR) + +#ifndef IBNBD_VER_STRING +#define IBNBD_VER_STRING __stringify(IBNBD_PROTO_VER_MAJOR) "." \ + __stringify(IBNBD_PROTO_VER_MINOR) +#endif + +/* TODO: should be configurable */ +#define IBTRS_PORT 1234 + +/** + * enum ibnbd_msg_types - IBNBD message types + * @IBNBD_MSG_SESS_INFO: initial session info from client to server + * @IBNBD_MSG_SESS_INFO_RSP: initial session info from server to client + * @IBNBD_MSG_OPEN: open (map) device request + * @IBNBD_MSG_OPEN_RSP: response to an @IBNBD_MSG_OPEN + * @IBNBD_MSG_IO: block IO request operation + * @IBNBD_MSG_CLOSE: close (unmap) device request + */ +enum ibnbd_msg_type { + IBNBD_MSG_SESS_INFO, + IBNBD_MSG_SESS_INFO_RSP, + IBNBD_MSG_OPEN, + IBNBD_MSG_OPEN_RSP, + IBNBD_MSG_IO, + IBNBD_MSG_CLOSE, +}; + +/** + * struct ibnbd_msg_hdr - header of IBNBD messages + * @type: Message type, valid values see: enum ibnbd_msg_types + */ +struct ibnbd_msg_hdr { + __le16 type; + __le16 __padding; +}; + +enum ibnbd_access_mode { + IBNBD_ACCESS_RO, + IBNBD_ACCESS_RW, + IBNBD_ACCESS_MIGRATION, +}; + +#define _IBNBD_FILEIO 0 +#define _IBNBD_BLOCKIO 1 +#define _IBNBD_AUTOIO 2 + +enum ibnbd_io_mode { + IBNBD_FILEIO = _IBNBD_FILEIO, + IBNBD_BLOCKIO = _IBNBD_BLOCKIO, + IBNBD_AUTOIO = _IBNBD_AUTOIO, +}; + +/** + * struct ibnbd_msg_sess_info - initial session info from client to server + * @hdr: message header + * @ver: IBNBD protocol version + */ +struct ibnbd_msg_sess_info { + struct ibnbd_msg_hdr hdr; + u8 ver; + u8 reserved[31]; +}; + +/** + * struct ibnbd_msg_sess_info_rsp - initial session info from server to client + * @hdr: message header + * @ver: IBNBD protocol version + */ +struct ibnbd_msg_sess_info_rsp { + struct ibnbd_msg_hdr hdr; + u8 ver; + u8 reserved[31]; +}; + +/** + * struct ibnbd_msg_open - request to open a remote device. + * @hdr: message header + * @access_mode: the mode to open remote device, valid values see: + * enum ibnbd_access_mode + * @io_mode: Open volume on server as block device or as file + * @device_name: device path on remote side + */ +struct ibnbd_msg_open { + struct ibnbd_msg_hdr hdr; + u8 access_mode; + u8 io_mode; + s8 dev_name[NAME_MAX]; + u8 __padding[3]; +}; + +/** + * struct ibnbd_msg_close - request to close a remote device. + * @hdr: message header + * @device_id: device_id on server side to identify the device + */ +struct ibnbd_msg_close { + struct ibnbd_msg_hdr hdr; + __le32 device_id; +}; + +/** + * struct ibnbd_msg_open_rsp - response message to IBNBD_MSG_OPEN + * @hdr: message header + * @nsectors: number of sectors + * @device_id: device_id on server side to identify the device + * @queue_flags: queue_flags of the device on server side + * @max_hw_sectors: max hardware sectors in the usual 512b unit + * @max_write_same_sectors: max sectors for WRITE SAME in the 512b unit + * @max_discard_sectors: max. sectors that can be discarded at once + * @discard_granularity: size of the internal discard allocation unit + * @discard_alignment: offset from internal allocation assignment + * @physical_block_size: physical block size device supports + * @logical_block_size: logical block size device supports + * @max_segments: max segments hardware support in one transfer + * @secure_discard: supports secure discard + * @rotation: is a rotational disc? + * @io_mode: io_mode device is opened. + */ +struct ibnbd_msg_open_rsp { + struct ibnbd_msg_hdr hdr; + __le32 device_id; + __le64 nsectors; + __le32 max_hw_sectors; + __le32 max_write_same_sectors; + __le32 max_discard_sectors; + __le32 discard_granularity; + __le32 discard_alignment; + __le16 physical_block_size; + __le16 logical_block_size; + __le16 max_segments; + __le16 secure_discard; + u8 rotational; + u8 io_mode; + u8 __padding[10]; +}; + +/** + * struct ibnbd_msg_io_old - message for I/O read/write for + * ver < IBNBD_PROTO_VER_MAJOR + * This structure is there only to know the size of the "old" message format + * @hdr: message header + * @device_id: device_id on server side to find the right device + * @sector: bi_sector attribute from struct bio + * @rw: bitmask, valid values are defined in enum ibnbd_io_flags + * @bi_size: number of bytes for I/O read/write + * @prio: priority + */ +struct ibnbd_msg_io_old { + struct ibnbd_msg_hdr hdr; + __le32 device_id; + __le64 sector; + __le32 rw; + __le32 bi_size; +}; + +/** + * struct ibnbd_msg_io - message for I/O read/write + * @hdr: message header + * @device_id: device_id on server side to find the right device + * @sector: bi_sector attribute from struct bio + * @rw: bitmask, valid values are defined in enum ibnbd_io_flags + * @bi_size: number of bytes for I/O read/write + * @prio: priority + */ +struct ibnbd_msg_io { + struct ibnbd_msg_hdr hdr; + __le32 device_id; + __le64 sector; + __le32 rw; + __le32 bi_size; + __le16 prio; +}; + +#define IBNBD_OP_BITS 8 +#define IBNBD_OP_MASK ((1 << IBNBD_OP_BITS) - 1) + +/** + * enum ibnbd_io_flags - IBNBD request types from rq_flag_bits + * @IBNBD_OP_READ: read sectors from the device + * @IBNBD_OP_WRITE: write sectors to the device + * @IBNBD_OP_FLUSH: flush the volatile write cache + * @IBNBD_OP_DISCARD: discard sectors + * @IBNBD_OP_SECURE_ERASE: securely erase sectors + * @IBNBD_OP_WRITE_SAME: write the same sectors many times + + * @IBNBD_F_SYNC: request is sync (sync write or read) + * @IBNBD_F_FUA: forced unit access + */ +enum ibnbd_io_flags { + + /* Operations */ + + IBNBD_OP_READ = 0, + IBNBD_OP_WRITE = 1, + IBNBD_OP_FLUSH = 2, + IBNBD_OP_DISCARD = 3, + IBNBD_OP_SECURE_ERASE = 4, + IBNBD_OP_WRITE_SAME = 5, + + IBNBD_OP_LAST, + + /* Flags */ + + IBNBD_F_SYNC = 1<<(IBNBD_OP_BITS + 0), + IBNBD_F_FUA = 1<<(IBNBD_OP_BITS + 1), + + IBNBD_F_ALL = (IBNBD_F_SYNC | IBNBD_F_FUA) + +}; + +static inline u32 ibnbd_op(u32 flags) +{ + return (flags & IBNBD_OP_MASK); +} + +static inline u32 ibnbd_flags(u32 flags) +{ + return (flags & ~IBNBD_OP_MASK); +} + +static inline bool ibnbd_flags_supported(u32 flags) +{ + u32 op; + + op = ibnbd_op(flags); + flags = ibnbd_flags(flags); + + if (op >= IBNBD_OP_LAST) + return false; + if (flags & ~IBNBD_F_ALL) + return false; + + return true; +} + +static inline u32 ibnbd_to_bio_flags(u32 ibnbd_flags) +{ + u32 bio_flags; + + switch (ibnbd_op(ibnbd_flags)) { + case IBNBD_OP_READ: + bio_flags = REQ_OP_READ; + break; + case IBNBD_OP_WRITE: + bio_flags = REQ_OP_WRITE; + break; + case IBNBD_OP_FLUSH: + bio_flags = REQ_OP_FLUSH | REQ_PREFLUSH; + break; + case IBNBD_OP_DISCARD: + bio_flags = REQ_OP_DISCARD; + break; + case IBNBD_OP_SECURE_ERASE: + bio_flags = REQ_OP_SECURE_ERASE; + break; + case IBNBD_OP_WRITE_SAME: + bio_flags = REQ_OP_WRITE_SAME; + break; + default: + WARN(1, "Unknown IBNBD type: %d (flags %d)\n", + ibnbd_op(ibnbd_flags), ibnbd_flags); + bio_flags = 0; + } + + if (ibnbd_flags & IBNBD_F_SYNC) + bio_flags |= REQ_SYNC; + + if (ibnbd_flags & IBNBD_F_FUA) + bio_flags |= REQ_FUA; + + return bio_flags; +} + +static inline u32 rq_to_ibnbd_flags(struct request *rq) +{ + u32 ibnbd_flags; + + switch (req_op(rq)) { + case REQ_OP_READ: + ibnbd_flags = IBNBD_OP_READ; + break; + case REQ_OP_WRITE: + ibnbd_flags = IBNBD_OP_WRITE; + break; + case REQ_OP_DISCARD: + ibnbd_flags = IBNBD_OP_DISCARD; + break; + case REQ_OP_SECURE_ERASE: + ibnbd_flags = IBNBD_OP_SECURE_ERASE; + break; + case REQ_OP_WRITE_SAME: + ibnbd_flags = IBNBD_OP_WRITE_SAME; + break; + case REQ_OP_FLUSH: + ibnbd_flags = IBNBD_OP_FLUSH; + break; + default: + WARN(1, "Unknown request type %d (flags %llu)\n", + req_op(rq), (unsigned long long)rq->cmd_flags); + ibnbd_flags = 0; + } + + if (op_is_sync(rq->cmd_flags)) + ibnbd_flags |= IBNBD_F_SYNC; + + if (op_is_flush(rq->cmd_flags)) + ibnbd_flags |= IBNBD_F_FUA; + + return ibnbd_flags; +} + +static inline const char *ibnbd_io_mode_str(enum ibnbd_io_mode mode) +{ + switch (mode) { + case IBNBD_FILEIO: + return "fileio"; + case IBNBD_BLOCKIO: + return "blockio"; + case IBNBD_AUTOIO: + return "autoio"; + default: + return "unknown"; + } +} + +static inline const char *ibnbd_access_mode_str(enum ibnbd_access_mode mode) +{ + switch (mode) { + case IBNBD_ACCESS_RO: + return "ro"; + case IBNBD_ACCESS_RW: + return "rw"; + case IBNBD_ACCESS_MIGRATION: + return "migration"; + default: + return "unknown"; + } +} + +#endif /* IBNBD_PROTO_H */