diff mbox series

[RFC,V3,net-next,1/5] ethtool: Allow network drivers to dump arbitrary EEPROM data

Message ID 1615828363-464-2-git-send-email-moshe@nvidia.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series ethtool: Extend module EEPROM dump API | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 15 maintainers not CCed: linux-api@vger.kernel.org petr.vorel@gmail.com amitc@mellanox.com danieller@nvidia.com johannes.berg@intel.com gustavo@embeddedor.com ayal@mellanox.com linux@rempel-privat.de linux-doc@vger.kernel.org corbet@lwn.net f.fainelli@gmail.com dalias@libc.org petrm@mellanox.com mchehab+huawei@kernel.org meirl@mellanox.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 3080 this patch: 3080
netdev/kdoc success Errors and warnings before: 71 this patch: 71
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 3198 this patch: 3198
netdev/header_inline success Link

Commit Message

Moshe Shemesh March 15, 2021, 5:12 p.m. UTC
From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>

Define get_module_eeprom_data_by_page() ethtool callback and implement
netlink infrastructure.

get_module_eeprom_data_by_page() allows network drivers to dump a part
of module's EEPROM specified by page and bank numbers along with offset
and length. It is effectively a netlink replacement for
get_module_info() and get_module_eeprom() pair, which is needed due to
emergence of complex non-linear EEPROM layouts.

Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
---
 Documentation/networking/ethtool-netlink.rst |  34 ++++-
 include/linux/ethtool.h                      |   8 +-
 include/uapi/linux/ethtool.h                 |  25 +++
 include/uapi/linux/ethtool_netlink.h         |  19 +++
 net/ethtool/Makefile                         |   2 +-
 net/ethtool/eeprom.c                         | 153 +++++++++++++++++++
 net/ethtool/netlink.c                        |  10 ++
 net/ethtool/netlink.h                        |   2 +
 8 files changed, 249 insertions(+), 4 deletions(-)
 create mode 100644 net/ethtool/eeprom.c

Comments

Jakub Kicinski March 15, 2021, 9:01 p.m. UTC | #1
On Mon, 15 Mar 2021 19:12:39 +0200 Moshe Shemesh wrote:
> From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
> 
> Define get_module_eeprom_data_by_page() ethtool callback and implement
> netlink infrastructure.
> 
> get_module_eeprom_data_by_page() allows network drivers to dump a part
> of module's EEPROM specified by page and bank numbers along with offset
> and length. It is effectively a netlink replacement for
> get_module_info() and get_module_eeprom() pair, which is needed due to
> emergence of complex non-linear EEPROM layouts.
> 
> Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>

s/eeprom_data/eeprom/ everywhere?

Otherwise some of your identifiers are over 30 characters long.

> diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
> index cde753bb2093..b3e92db3ad37 100644
> --- a/include/uapi/linux/ethtool.h
> +++ b/include/uapi/linux/ethtool.h
> @@ -340,6 +340,28 @@ struct ethtool_eeprom {
>  	__u8	data[0];
>  };
>  
> +/**
> + * struct ethtool_eeprom_data - EEPROM dump from specified page
> + * @offset: Offset within the specified EEPROM page to begin read, in bytes.
> + * @length: Number of bytes to read.
> + * @page: Page number to read from.
> + * @bank: Page bank number to read from, if applicable by EEPROM spec.
> + * @i2c_address: I2C address of a page. Value less than 0x7f expected. Most
> + *	EEPROMs use 0x50 or 0x51.
> + * @data: Pointer to buffer with EEPROM data of @length size.
> + *
> + * This can be used to manage pages during EEPROM dump in ethtool and pass
> + * required information to the driver.
> + */
> +struct ethtool_eeprom_data {
> +	__u32	offset;
> +	__u32	length;
> +	__u8	page;
> +	__u8	bank;
> +	__u8	i2c_address;
> +	__u8	*data;
> +};
> +

This structure does not have to be part of uAPI, right?

> +#define ETH_MODULE_EEPROM_PAGE_LEN	256
> +#define ETH_MODULE_MAX_I2C_ADDRESS	0x7f

Not sure about these defines either.

> +static int eeprom_data_prepare_data(const struct ethnl_req_info *req_base,
> +				    struct ethnl_reply_data *reply_base,
> +				    struct genl_info *info)
> +{
> +	struct eeprom_data_reply_data *reply = EEPROM_DATA_REPDATA(reply_base);
> +	struct eeprom_data_req_info *request = EEPROM_DATA_REQINFO(req_base);
> +	struct ethtool_eeprom_data page_data = {0};
> +	struct net_device *dev = reply_base->dev;
> +	int ret;
> +
> +	if (!dev->ethtool_ops->get_module_eeprom_data_by_page)
> +		return -EOPNOTSUPP;
> +
> +	page_data.offset = request->offset;
> +	page_data.length = request->length;
> +	page_data.i2c_address = request->i2c_address;
> +	page_data.page = request->page;
> +	page_data.bank = request->bank;
> +	page_data.data = kmalloc(page_data.length, GFP_KERNEL);
> +	if (!page_data.data)
> +		return -ENOMEM;

nit: new line?

> +	ret = ethnl_ops_begin(dev);
> +	if (ret)
> +		goto err_free;
> +
> +	ret = dev->ethtool_ops->get_module_eeprom_data_by_page(dev, &page_data,
> +							       info->extack);
> +	if (ret < 0)
> +		goto err_ops;
> +
> +	reply->length = ret;
> +	reply->data = page_data.data;
> +
> +	ethnl_ops_complete(dev);
> +	return 0;
> +
> +err_ops:
> +	ethnl_ops_complete(dev);
> +err_free:
> +	kfree(page_data.data);
> +	return ret;
> +}
> +
> +static int eeprom_data_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
> +				     struct netlink_ext_ack *extack)
> +{
> +	struct eeprom_data_req_info *request = EEPROM_DATA_REQINFO(req_info);
> +	struct net_device *dev = req_info->dev;
> +
> +	if (!tb[ETHTOOL_A_EEPROM_DATA_OFFSET] ||
> +	    !tb[ETHTOOL_A_EEPROM_DATA_LENGTH] ||
> +	    !tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS])
> +		return -EINVAL;
> +
> +	request->i2c_address = nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS]);
> +	if (request->i2c_address > ETH_MODULE_MAX_I2C_ADDRESS)
> +		return -EINVAL;

Max value should be set in the netlink policy.

> +	request->offset = nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_OFFSET]);
> +	request->length = nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_LENGTH]);
> +	if (tb[ETHTOOL_A_EEPROM_DATA_PAGE] &&
> +	    dev->ethtool_ops->get_module_eeprom_data_by_page &&

Why check dev->ethtool_ops->get_module_eeprom_data_by_page here?

> +	    request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN)
> +		return -EINVAL;
> +
> +	if (tb[ETHTOOL_A_EEPROM_DATA_PAGE])
> +		request->page = nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_PAGE]);
> +	if (tb[ETHTOOL_A_EEPROM_DATA_BANK])
> +		request->bank = nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_BANK]);
> +
> +	return 0;
> +}
> +
> +static int eeprom_data_reply_size(const struct ethnl_req_info *req_base,
> +				  const struct ethnl_reply_data *reply_base)
> +{
> +	const struct eeprom_data_req_info *request = EEPROM_DATA_REQINFO(req_base);
> +
> +	return nla_total_size(sizeof(u32)) + /* _EEPROM_DATA_LENGTH */
> +	       nla_total_size(sizeof(u8) * request->length); /* _EEPROM_DATA */
> +}

Why report length? Is the netlink length of the DATA attribute not
sufficient?

Should we echo back offset in case driver wants to report re-aligned
data?

> +const struct nla_policy ethnl_eeprom_data_get_policy[] = {
> +	[ETHTOOL_A_EEPROM_DATA_HEADER]		= NLA_POLICY_NESTED(ethnl_header_policy),
> +	[ETHTOOL_A_EEPROM_DATA_OFFSET]		= { .type = NLA_U32 },
> +	[ETHTOOL_A_EEPROM_DATA_LENGTH]		= { .type = NLA_U32 },
> +	[ETHTOOL_A_EEPROM_DATA_PAGE]		= { .type = NLA_U8 },
> +	[ETHTOOL_A_EEPROM_DATA_BANK]		= { .type = NLA_U8 },
> +	[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS]	= { .type = NLA_U8 },
> +	[ETHTOOL_A_EEPROM_DATA]			= { .type = NLA_BINARY },

This is a policy for inputs, DATA should not be allowed on input, right?
Don Bollinger March 15, 2021, 10:31 p.m. UTC | #2
On Mon, 15 Mar 2021 10:12:39 +0700 Moshe Shemesh wrote:
> From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
> 
> Define get_module_eeprom_data_by_page() ethtool callback and
> implement netlink infrastructure.
> 
> get_module_eeprom_data_by_page() allows network drivers to dump a
> part of module's EEPROM specified by page and bank numbers along with
> offset and length. It is effectively a netlink replacement for
> get_module_info() and get_module_eeprom() pair, which is needed due to
> emergence of complex non-linear EEPROM layouts.
> 
> Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
> ---
>  Documentation/networking/ethtool-netlink.rst |  34 ++++-
>  include/linux/ethtool.h                      |   8 +-
>  include/uapi/linux/ethtool.h                 |  25 +++
>  include/uapi/linux/ethtool_netlink.h         |  19 +++
>  net/ethtool/Makefile                         |   2 +-
>  net/ethtool/eeprom.c                         | 153 +++++++++++++++++++
>  net/ethtool/netlink.c                        |  10 ++
>  net/ethtool/netlink.h                        |   2 +
>  8 files changed, 249 insertions(+), 4 deletions(-)  create mode 100644
> net/ethtool/eeprom.c
> 
> diff --git a/Documentation/networking/ethtool-netlink.rst
> b/Documentation/networking/ethtool-netlink.rst
> index 05073482db05..25846b97632a 100644
> --- a/Documentation/networking/ethtool-netlink.rst
> +++ b/Documentation/networking/ethtool-netlink.rst
> @@ -1280,6 +1280,36 @@ Kernel response contents:
>  For UDP tunnel table empty ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES``
> indicates that  the table contains static entries, hard-coded by the NIC.
> 
> +EEPROM_DATA
> +===========
> +
> +Fetch module EEPROM data dump.
> +
> +Request contents:
> +
> +  =====================================  ======
> ==========================
> +  ``ETHTOOL_A_EEPROM_DATA_HEADER``       nested  request header
> +  ``ETHTOOL_A_EEPROM_DATA_OFFSET``       u32     offset within a page
> +  ``ETHTOOL_A_EEPROM_DATA_LENGTH``       u32     amount of bytes to read
> +  ``ETHTOOL_A_EEPROM_DATA_PAGE``         u8      page number
> +  ``ETHTOOL_A_EEPROM_DATA_BANK``         u8      bank number
> +  ``ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS``  u8      page I2C address
> +  =====================================  ======
> + ==========================
> +
> +Kernel response contents:
> +
> +
+---------------------------------------------+--------+--------------------
-+
> + | ``ETHTOOL_A_EEPROM_DATA_HEADER``            | nested | reply header
> |
> +
+---------------------------------------------+--------+--------------------
-+
> + | ``ETHTOOL_A_EEPROM_DATA_LENGTH``            | u32    | amount of bytes
> read|
> +
+---------------------------------------------+--------+--------------------
-+
> + | ``ETHTOOL_A_EEPROM_DATA``                   | nested | array of bytes
from |
> + |                                             |        | module EEPROM
|
> +
+---------------------------------------------+--------+--------------------
-+
> +
> +``ETHTOOL_A_EEPROM_DATA`` has an attribute length equal to the amount
> +of bytes driver actually read.
> +
>  Request translation
>  ===================
> 
> @@ -1357,8 +1387,8 @@ are netlink only.
>    ``ETHTOOL_GET_DUMP_FLAG``           n/a
>    ``ETHTOOL_GET_DUMP_DATA``           n/a
>    ``ETHTOOL_GET_TS_INFO``             ``ETHTOOL_MSG_TSINFO_GET``
> -  ``ETHTOOL_GMODULEINFO``             n/a
> -  ``ETHTOOL_GMODULEEEPROM``           n/a
> +  ``ETHTOOL_GMODULEINFO``
> ``ETHTOOL_MSG_MODULE_EEPROM_GET``
> +  ``ETHTOOL_GMODULEEEPROM``
> ``ETHTOOL_MSG_MODULE_EEPROM_GET``
>    ``ETHTOOL_GEEE``                    ``ETHTOOL_MSG_EEE_GET``
>    ``ETHTOOL_SEEE``                    ``ETHTOOL_MSG_EEE_SET``
>    ``ETHTOOL_GRSSH``                   n/a
> diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index
> ec4cd3921c67..9551005b350a 100644
> --- a/include/linux/ethtool.h
> +++ b/include/linux/ethtool.h
> @@ -81,6 +81,7 @@ enum {
>  #define ETH_RSS_HASH_NO_CHANGE	0
> 
>  struct net_device;
> +struct netlink_ext_ack;
> 
>  /* Some generic methods drivers may use in their ethtool_ops */
>  u32 ethtool_op_get_link(struct net_device *dev); @@ -410,6 +411,9 @@
> struct ethtool_pause_stats {
>   * @get_ethtool_phy_stats: Return extended statistics about the PHY
> device.
>   *	This is only useful if the device maintains PHY statistics and
>   *	cannot use the standard PHY library helpers.
> + * @get_module_eeprom_data_by_page: Get a region of plug-in module
> EEPROM data
> + *	from specified page. Returns a negative error code or the amount of
> + *	bytes read.
>   *
>   * All operations are optional (i.e. the function pointer may be set
>   * to %NULL) and callers must take this into account.  Callers must @@
-515,6
> +519,9 @@ struct ethtool_ops {
>  				   const struct ethtool_tunable *, void *);
>  	int	(*set_phy_tunable)(struct net_device *,
>  				   const struct ethtool_tunable *, const
void
> *);
> +	int	(*get_module_eeprom_data_by_page)(struct net_device
> *dev,
> +						  const struct
> ethtool_eeprom_data *page,
> +						  struct netlink_ext_ack
> *extack);
>  };
> 
>  int ethtool_check_ops(const struct ethtool_ops *ops); @@ -538,7 +545,6
> @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
>  				       const struct ethtool_link_ksettings
*cmd,
>  				       u32 *dev_speed, u8 *dev_duplex);
> 
> -struct netlink_ext_ack;
>  struct phy_device;
>  struct phy_tdr_config;
> 
> diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index
> cde753bb2093..b3e92db3ad37 100644
> --- a/include/uapi/linux/ethtool.h
> +++ b/include/uapi/linux/ethtool.h
> @@ -340,6 +340,28 @@ struct ethtool_eeprom {
>  	__u8	data[0];
>  };
> 
> +/**
> + * struct ethtool_eeprom_data - EEPROM dump from specified page
> + * @offset: Offset within the specified EEPROM page to begin read, in
> bytes.
> + * @length: Number of bytes to read.
> + * @page: Page number to read from.
> + * @bank: Page bank number to read from, if applicable by EEPROM spec.
> + * @i2c_address: I2C address of a page. Value less than 0x7f expected.
> Most
> + *	EEPROMs use 0x50 or 0x51.
> + * @data: Pointer to buffer with EEPROM data of @length size.
> + *
> + * This can be used to manage pages during EEPROM dump in ethtool and
> +pass
> + * required information to the driver.
> + */
> +struct ethtool_eeprom_data {
> +	__u32	offset;
> +	__u32	length;
> +	__u8	page;
> +	__u8	bank;
> +	__u8	i2c_address;
> +	__u8	*data;
> +};
> +
>  /**
>   * struct ethtool_eee - Energy Efficient Ethernet information
>   * @cmd: ETHTOOL_{G,S}EEE
> @@ -1865,6 +1887,9 @@ static inline int ethtool_validate_duplex(__u8
> duplex)
>  #define ETH_MODULE_SFF_8636_MAX_LEN     640
>  #define ETH_MODULE_SFF_8436_MAX_LEN     640
> 
> +#define ETH_MODULE_EEPROM_PAGE_LEN	256
> +#define ETH_MODULE_MAX_I2C_ADDRESS	0x7f
> +
>  /* Reset flags */
>  /* The reset() operation must clear the flags for the components which
>   * were actually reset.  On successful return, the flags indicate the
diff --git
> a/include/uapi/linux/ethtool_netlink.h
> b/include/uapi/linux/ethtool_netlink.h
> index a286635ac9b8..e1b1b962f3da 100644
> --- a/include/uapi/linux/ethtool_netlink.h
> +++ b/include/uapi/linux/ethtool_netlink.h
> @@ -42,6 +42,7 @@ enum {
>  	ETHTOOL_MSG_CABLE_TEST_ACT,
>  	ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
>  	ETHTOOL_MSG_TUNNEL_INFO_GET,
> +	ETHTOOL_MSG_EEPROM_DATA_GET,
> 
>  	/* add new constants above here */
>  	__ETHTOOL_MSG_USER_CNT,
> @@ -80,6 +81,7 @@ enum {
>  	ETHTOOL_MSG_CABLE_TEST_NTF,
>  	ETHTOOL_MSG_CABLE_TEST_TDR_NTF,
>  	ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
> +	ETHTOOL_MSG_EEPROM_DATA_GET_REPLY,
> 
>  	/* add new constants above here */
>  	__ETHTOOL_MSG_KERNEL_CNT,
> @@ -629,6 +631,23 @@ enum {
>  	ETHTOOL_A_TUNNEL_INFO_MAX =
> (__ETHTOOL_A_TUNNEL_INFO_CNT - 1)  };
> 
> +/* MODULE EEPROM DATA */
> +
> +enum {
> +	ETHTOOL_A_EEPROM_DATA_UNSPEC,
> +	ETHTOOL_A_EEPROM_DATA_HEADER,			/* nest -
> _A_HEADER_* */
> +
> +	ETHTOOL_A_EEPROM_DATA_OFFSET,			/* u32 */
> +	ETHTOOL_A_EEPROM_DATA_LENGTH,			/* u32 */
> +	ETHTOOL_A_EEPROM_DATA_PAGE,			/* u8 */
> +	ETHTOOL_A_EEPROM_DATA_BANK,			/* u8 */
> +	ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS,		/* u8 */
> +	ETHTOOL_A_EEPROM_DATA,				/* nested */
> +
> +	__ETHTOOL_A_EEPROM_DATA_CNT,
> +	ETHTOOL_A_EEPROM_DATA_MAX =
> (__ETHTOOL_A_EEPROM_DATA_CNT - 1) };
> +
>  /* generic netlink info */
>  #define ETHTOOL_GENL_NAME "ethtool"
>  #define ETHTOOL_GENL_VERSION 1
> diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index
> 7a849ff22dad..d604346bc074 100644
> --- a/net/ethtool/Makefile
> +++ b/net/ethtool/Makefile
> @@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK)	+= ethtool_nl.o
>  ethtool_nl-y	:= netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
>  		   linkstate.o debug.o wol.o features.o privflags.o rings.o
\
>  		   channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o
\
> -		   tunnels.o
> +		   tunnels.o eeprom.o
> diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c new file mode
> 100644 index 000000000000..e110336dc231
> --- /dev/null
> +++ b/net/ethtool/eeprom.c
> @@ -0,0 +1,153 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#include <linux/ethtool.h>
> +#include "netlink.h"
> +#include "common.h"
> +
> +struct eeprom_data_req_info {
> +	struct ethnl_req_info	base;
> +	u32			offset;
> +	u32			length;
> +	u8			page;
> +	u8			bank;
> +	u8			i2c_address;
> +};
> +
> +struct eeprom_data_reply_data {
> +	struct ethnl_reply_data base;
> +	u32			length;
> +	u8			*data;
> +};
> +
> +#define EEPROM_DATA_REQINFO(__req_base) \
> +	container_of(__req_base, struct eeprom_data_req_info, base)
> +
> +#define EEPROM_DATA_REPDATA(__reply_base) \
> +	container_of(__reply_base, struct eeprom_data_reply_data, base)
> +
> +static int eeprom_data_prepare_data(const struct ethnl_req_info
> *req_base,
> +				    struct ethnl_reply_data *reply_base,
> +				    struct genl_info *info)
> +{
> +	struct eeprom_data_reply_data *reply =
> EEPROM_DATA_REPDATA(reply_base);
> +	struct eeprom_data_req_info *request =
> EEPROM_DATA_REQINFO(req_base);
> +	struct ethtool_eeprom_data page_data = {0};
> +	struct net_device *dev = reply_base->dev;
> +	int ret;
> +
> +	if (!dev->ethtool_ops->get_module_eeprom_data_by_page)
> +		return -EOPNOTSUPP;
> +
> +	page_data.offset = request->offset;
> +	page_data.length = request->length;
> +	page_data.i2c_address = request->i2c_address;
> +	page_data.page = request->page;
> +	page_data.bank = request->bank;
> +	page_data.data = kmalloc(page_data.length, GFP_KERNEL);
> +	if (!page_data.data)
> +		return -ENOMEM;
> +	ret = ethnl_ops_begin(dev);
> +	if (ret)
> +		goto err_free;
> +
> +	ret = dev->ethtool_ops->get_module_eeprom_data_by_page(dev,
> &page_data,
> +
info->extack);
> +	if (ret < 0)
> +		goto err_ops;
> +
> +	reply->length = ret;
> +	reply->data = page_data.data;
> +
> +	ethnl_ops_complete(dev);
> +	return 0;
> +
> +err_ops:
> +	ethnl_ops_complete(dev);
> +err_free:
> +	kfree(page_data.data);
> +	return ret;
> +}
> +
> +static int eeprom_data_parse_request(struct ethnl_req_info *req_info,
> struct nlattr **tb,
> +				     struct netlink_ext_ack *extack) {
> +	struct eeprom_data_req_info *request =
> EEPROM_DATA_REQINFO(req_info);
> +	struct net_device *dev = req_info->dev;
> +
> +	if (!tb[ETHTOOL_A_EEPROM_DATA_OFFSET] ||
> +	    !tb[ETHTOOL_A_EEPROM_DATA_LENGTH] ||
> +	    !tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS])
> +		return -EINVAL;

Suggestion:  Consider using i2c address 0x50 as a default if none is given.
0x50 is the first 256 bytes of SFP, and all of QSFP and CMIS EEPROM.  If
there is a page given on an SFP device, then you know i2c address is 0x51.
The only thing that REQUIRES 0x51 is legacy offset 256-511 on SFP.  Keep the
i2c address, but make it optional for the non-standard devices that Andrew
has mentioned, and for that one section of SFP data that requires it.  And
document it for the user.

Note also, legacy does not have an i2c address, so passing this along allows
the fallback to legacy to cover more cases.  (Legacy ethtool addresses this
part of SFP as offset 256-511.)

> +
> +	request->i2c_address =
> nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS]);
> +	if (request->i2c_address > ETH_MODULE_MAX_I2C_ADDRESS)
> +		return -EINVAL;
> +
> +	request->offset =
> nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_OFFSET]);
> +	request->length =
> nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_LENGTH]);
> +	if (tb[ETHTOOL_A_EEPROM_DATA_PAGE] &&
> +	    dev->ethtool_ops->get_module_eeprom_data_by_page &&
> +	    request->offset + request->length >
> ETH_MODULE_EEPROM_PAGE_LEN)
> +		return -EINVAL;

If a request exceeds the length of EEPROM (in legacy), we truncate it to
EEPROM length.  I suggest if a request exceeds the length of the page (in
the new KAPI), then we truncate at the end of the page.  Thus rather than
'return -EINVAL;' I suggest

          request->length = ETH_MODULE_EEPROM_PAGE_LEN - request->offset;

> +
> +	if (tb[ETHTOOL_A_EEPROM_DATA_PAGE])
> +		request->page =
> nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_PAGE]);
> +	if (tb[ETHTOOL_A_EEPROM_DATA_BANK])
> +		request->bank =
> nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_BANK]);
> +
> +	return 0;
> +}
> +
> +static int eeprom_data_reply_size(const struct ethnl_req_info *req_base,
> +				  const struct ethnl_reply_data *reply_base)
{
> +	const struct eeprom_data_req_info *request =
> +EEPROM_DATA_REQINFO(req_base);
> +
> +	return nla_total_size(sizeof(u32)) + /* _EEPROM_DATA_LENGTH */
> +	       nla_total_size(sizeof(u8) * request->length); /* _EEPROM_DATA
> +*/ }
> +
> +static int eeprom_data_fill_reply(struct sk_buff *skb,
> +				  const struct ethnl_req_info *req_base,
> +				  const struct ethnl_reply_data *reply_base)
{
> +	struct eeprom_data_reply_data *reply =
> +EEPROM_DATA_REPDATA(reply_base);
> +
> +	if (nla_put_u32(skb, ETHTOOL_A_EEPROM_DATA_LENGTH, reply-
> >length) ||
> +	    nla_put(skb, ETHTOOL_A_EEPROM_DATA, reply->length, reply-
> >data))
> +		return -EMSGSIZE;
> +
> +	return 0;
> +}
> +
> +static void eeprom_data_cleanup_data(struct ethnl_reply_data
> +*reply_base) {
> +	struct eeprom_data_reply_data *reply =
> +EEPROM_DATA_REPDATA(reply_base);
> +
> +	kfree(reply->data);
> +}
> +
> +const struct ethnl_request_ops ethnl_eeprom_data_request_ops = {
> +	.request_cmd		= ETHTOOL_MSG_EEPROM_DATA_GET,
> +	.reply_cmd		=
> ETHTOOL_MSG_EEPROM_DATA_GET_REPLY,
> +	.hdr_attr		= ETHTOOL_A_EEPROM_DATA_HEADER,
> +	.req_info_size		= sizeof(struct eeprom_data_req_info),
> +	.reply_data_size	= sizeof(struct eeprom_data_reply_data),
> +
> +	.parse_request		= eeprom_data_parse_request,
> +	.prepare_data		= eeprom_data_prepare_data,
> +	.reply_size		= eeprom_data_reply_size,
> +	.fill_reply		= eeprom_data_fill_reply,
> +	.cleanup_data		= eeprom_data_cleanup_data,
> +};
> +
> +const struct nla_policy ethnl_eeprom_data_get_policy[] = {
> +	[ETHTOOL_A_EEPROM_DATA_HEADER]		=
> NLA_POLICY_NESTED(ethnl_header_policy),
> +	[ETHTOOL_A_EEPROM_DATA_OFFSET]		= { .type =
> NLA_U32 },
> +	[ETHTOOL_A_EEPROM_DATA_LENGTH]		= { .type =
> NLA_U32 },
> +	[ETHTOOL_A_EEPROM_DATA_PAGE]		= { .type = NLA_U8 },
> +	[ETHTOOL_A_EEPROM_DATA_BANK]		= { .type = NLA_U8 },
> +	[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS]	= { .type = NLA_U8 },
> +	[ETHTOOL_A_EEPROM_DATA]			= { .type =
> NLA_BINARY },
> +};
> +
> diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index
> 50d3c8896f91..ff2528bee192 100644
> --- a/net/ethtool/netlink.c
> +++ b/net/ethtool/netlink.c
> @@ -245,6 +245,7 @@
> ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
>  	[ETHTOOL_MSG_PAUSE_GET]		=
> &ethnl_pause_request_ops,
>  	[ETHTOOL_MSG_EEE_GET]		= &ethnl_eee_request_ops,
>  	[ETHTOOL_MSG_TSINFO_GET]	= &ethnl_tsinfo_request_ops,
> +	[ETHTOOL_MSG_EEPROM_DATA_GET]	=
> &ethnl_eeprom_data_request_ops,
>  };
> 
>  static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback
> *cb) @@ -912,6 +913,15 @@ static const struct genl_ops ethtool_genl_ops[]
> = {
>  		.policy = ethnl_tunnel_info_get_policy,
>  		.maxattr = ARRAY_SIZE(ethnl_tunnel_info_get_policy) - 1,
>  	},
> +	{
> +		.cmd	= ETHTOOL_MSG_EEPROM_DATA_GET,
> +		.doit	= ethnl_default_doit,
> +		.start	= ethnl_default_start,
> +		.dumpit	= ethnl_default_dumpit,
> +		.done	= ethnl_default_done,
> +		.policy = ethnl_eeprom_data_get_policy,
> +		.maxattr = ARRAY_SIZE(ethnl_eeprom_data_get_policy) - 1,
> +	},
>  };
> 
>  static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff
--git
> a/net/ethtool/netlink.h b/net/ethtool/netlink.h index
> 6eabd58d81bf..60954c7b4dfe 100644
> --- a/net/ethtool/netlink.h
> +++ b/net/ethtool/netlink.h
> @@ -344,6 +344,7 @@ extern const struct ethnl_request_ops
> ethnl_coalesce_request_ops;  extern const struct ethnl_request_ops
> ethnl_pause_request_ops;  extern const struct ethnl_request_ops
> ethnl_eee_request_ops;  extern const struct ethnl_request_ops
> ethnl_tsinfo_request_ops;
> +extern const struct ethnl_request_ops ethnl_eeprom_data_request_ops;
> 
>  extern const struct nla_policy
> ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];  extern const struct
> nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@
> -375,6 +376,7 @@ extern const struct nla_policy
> ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER +  extern const struct
> nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER +
> 1];  extern const struct nla_policy
> ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1];
> extern const struct nla_policy
> ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
> +extern const struct nla_policy
> +ethnl_eeprom_data_get_policy[ETHTOOL_A_EEPROM_DATA + 1];
> 
>  int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);  int
> ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
> --
> 2.26.2
Moshe Shemesh March 16, 2021, 5:59 p.m. UTC | #3
On 3/15/2021 11:01 PM, Jakub Kicinski wrote:
>
> On Mon, 15 Mar 2021 19:12:39 +0200 Moshe Shemesh wrote:
>> From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
>>
>> Define get_module_eeprom_data_by_page() ethtool callback and implement
>> netlink infrastructure.
>>
>> get_module_eeprom_data_by_page() allows network drivers to dump a part
>> of module's EEPROM specified by page and bank numbers along with offset
>> and length. It is effectively a netlink replacement for
>> get_module_info() and get_module_eeprom() pair, which is needed due to
>> emergence of complex non-linear EEPROM layouts.
>>
>> Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
> s/eeprom_data/eeprom/ everywhere?
>
> Otherwise some of your identifiers are over 30 characters long.


That can work part to one struct on the uapi file that can conflict with 
previous old KAPI struct, but we can figure it too.

Will fix.

>> diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
>> index cde753bb2093..b3e92db3ad37 100644
>> --- a/include/uapi/linux/ethtool.h
>> +++ b/include/uapi/linux/ethtool.h
>> @@ -340,6 +340,28 @@ struct ethtool_eeprom {
>>        __u8    data[0];
>>   };
>>
>> +/**
>> + * struct ethtool_eeprom_data - EEPROM dump from specified page
>> + * @offset: Offset within the specified EEPROM page to begin read, in bytes.
>> + * @length: Number of bytes to read.
>> + * @page: Page number to read from.
>> + * @bank: Page bank number to read from, if applicable by EEPROM spec.
>> + * @i2c_address: I2C address of a page. Value less than 0x7f expected. Most
>> + *   EEPROMs use 0x50 or 0x51.
>> + * @data: Pointer to buffer with EEPROM data of @length size.
>> + *
>> + * This can be used to manage pages during EEPROM dump in ethtool and pass
>> + * required information to the driver.
>> + */
>> +struct ethtool_eeprom_data {
>> +     __u32   offset;
>> +     __u32   length;
>> +     __u8    page;
>> +     __u8    bank;
>> +     __u8    i2c_address;
>> +     __u8    *data;
>> +};
>> +
> This structure does not have to be part of uAPI, right?
Right, will fix.
>> +#define ETH_MODULE_EEPROM_PAGE_LEN   256
>> +#define ETH_MODULE_MAX_I2C_ADDRESS   0x7f
> Not sure about these defines either.
Probably.
>> +static int eeprom_data_prepare_data(const struct ethnl_req_info *req_base,
>> +                                 struct ethnl_reply_data *reply_base,
>> +                                 struct genl_info *info)
>> +{
>> +     struct eeprom_data_reply_data *reply = EEPROM_DATA_REPDATA(reply_base);
>> +     struct eeprom_data_req_info *request = EEPROM_DATA_REQINFO(req_base);
>> +     struct ethtool_eeprom_data page_data = {0};
>> +     struct net_device *dev = reply_base->dev;
>> +     int ret;
>> +
>> +     if (!dev->ethtool_ops->get_module_eeprom_data_by_page)
>> +             return -EOPNOTSUPP;
>> +
>> +     page_data.offset = request->offset;
>> +     page_data.length = request->length;
>> +     page_data.i2c_address = request->i2c_address;
>> +     page_data.page = request->page;
>> +     page_data.bank = request->bank;
>> +     page_data.data = kmalloc(page_data.length, GFP_KERNEL);
>> +     if (!page_data.data)
>> +             return -ENOMEM;
> nit: new line?
Ack.
>> +     ret = ethnl_ops_begin(dev);
>> +     if (ret)
>> +             goto err_free;
>> +
>> +     ret = dev->ethtool_ops->get_module_eeprom_data_by_page(dev, &page_data,
>> +                                                            info->extack);
>> +     if (ret < 0)
>> +             goto err_ops;
>> +
>> +     reply->length = ret;
>> +     reply->data = page_data.data;
>> +
>> +     ethnl_ops_complete(dev);
>> +     return 0;
>> +
>> +err_ops:
>> +     ethnl_ops_complete(dev);
>> +err_free:
>> +     kfree(page_data.data);
>> +     return ret;
>> +}
>> +
>> +static int eeprom_data_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
>> +                                  struct netlink_ext_ack *extack)
>> +{
>> +     struct eeprom_data_req_info *request = EEPROM_DATA_REQINFO(req_info);
>> +     struct net_device *dev = req_info->dev;
>> +
>> +     if (!tb[ETHTOOL_A_EEPROM_DATA_OFFSET] ||
>> +         !tb[ETHTOOL_A_EEPROM_DATA_LENGTH] ||
>> +         !tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS])
>> +             return -EINVAL;
>> +
>> +     request->i2c_address = nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS]);
>> +     if (request->i2c_address > ETH_MODULE_MAX_I2C_ADDRESS)
>> +             return -EINVAL;
> Max value should be set in the netlink policy.
Ack
>> +     request->offset = nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_OFFSET]);
>> +     request->length = nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_LENGTH]);
>> +     if (tb[ETHTOOL_A_EEPROM_DATA_PAGE] &&
>> +         dev->ethtool_ops->get_module_eeprom_data_by_page &&
> Why check dev->ethtool_ops->get_module_eeprom_data_by_page here?
Right, actually not needed here, thanks.
>> +         request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN)
>> +             return -EINVAL;
>> +
>> +     if (tb[ETHTOOL_A_EEPROM_DATA_PAGE])
>> +             request->page = nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_PAGE]);
>> +     if (tb[ETHTOOL_A_EEPROM_DATA_BANK])
>> +             request->bank = nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_BANK]);
>> +
>> +     return 0;
>> +}
>> +
>> +static int eeprom_data_reply_size(const struct ethnl_req_info *req_base,
>> +                               const struct ethnl_reply_data *reply_base)
>> +{
>> +     const struct eeprom_data_req_info *request = EEPROM_DATA_REQINFO(req_base);
>> +
>> +     return nla_total_size(sizeof(u32)) + /* _EEPROM_DATA_LENGTH */
>> +            nla_total_size(sizeof(u8) * request->length); /* _EEPROM_DATA */
>> +}
> Why report length? Is the netlink length of the DATA attribute not
> sufficient?
Right, thanks.
> Should we echo back offset in case driver wants to report re-aligned
> data?
The driver knows the requested offset and data has to be from that offset.
>> +const struct nla_policy ethnl_eeprom_data_get_policy[] = {
>> +     [ETHTOOL_A_EEPROM_DATA_HEADER]          = NLA_POLICY_NESTED(ethnl_header_policy),
>> +     [ETHTOOL_A_EEPROM_DATA_OFFSET]          = { .type = NLA_U32 },
>> +     [ETHTOOL_A_EEPROM_DATA_LENGTH]          = { .type = NLA_U32 },
>> +     [ETHTOOL_A_EEPROM_DATA_PAGE]            = { .type = NLA_U8 },
>> +     [ETHTOOL_A_EEPROM_DATA_BANK]            = { .type = NLA_U8 },
>> +     [ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS]     = { .type = NLA_U8 },
>> +     [ETHTOOL_A_EEPROM_DATA]                 = { .type = NLA_BINARY },
> This is a policy for inputs, DATA should not be allowed on input, right?
Right, thanks.
Moshe Shemesh March 16, 2021, 6:23 p.m. UTC | #4
On 3/16/2021 12:31 AM, Don Bollinger wrote:
> On Mon, 15 Mar 2021 10:12:39 +0700 Moshe Shemesh wrote:
>> From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
>>
>> Define get_module_eeprom_data_by_page() ethtool callback and
>> implement netlink infrastructure.
>>
>> get_module_eeprom_data_by_page() allows network drivers to dump a
>> part of module's EEPROM specified by page and bank numbers along with
>> offset and length. It is effectively a netlink replacement for
>> get_module_info() and get_module_eeprom() pair, which is needed due to
>> emergence of complex non-linear EEPROM layouts.
>>
>> Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
>> ---
>>   Documentation/networking/ethtool-netlink.rst |  34 ++++-
>>   include/linux/ethtool.h                      |   8 +-
>>   include/uapi/linux/ethtool.h                 |  25 +++
>>   include/uapi/linux/ethtool_netlink.h         |  19 +++
>>   net/ethtool/Makefile                         |   2 +-
>>   net/ethtool/eeprom.c                         | 153 +++++++++++++++++++
>>   net/ethtool/netlink.c                        |  10 ++
>>   net/ethtool/netlink.h                        |   2 +
>>   8 files changed, 249 insertions(+), 4 deletions(-)  create mode 100644
>> net/ethtool/eeprom.c
>>
>> diff --git a/Documentation/networking/ethtool-netlink.rst
>> b/Documentation/networking/ethtool-netlink.rst
>> index 05073482db05..25846b97632a 100644
>> --- a/Documentation/networking/ethtool-netlink.rst
>> +++ b/Documentation/networking/ethtool-netlink.rst
>> @@ -1280,6 +1280,36 @@ Kernel response contents:
>>   For UDP tunnel table empty ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES``
>> indicates that  the table contains static entries, hard-coded by the NIC.
>>
>> +EEPROM_DATA
>> +===========
>> +
>> +Fetch module EEPROM data dump.
>> +
>> +Request contents:
>> +
>> +  =====================================  ======
>> ==========================
>> +  ``ETHTOOL_A_EEPROM_DATA_HEADER``       nested  request header
>> +  ``ETHTOOL_A_EEPROM_DATA_OFFSET``       u32     offset within a page
>> +  ``ETHTOOL_A_EEPROM_DATA_LENGTH``       u32     amount of bytes to read
>> +  ``ETHTOOL_A_EEPROM_DATA_PAGE``         u8      page number
>> +  ``ETHTOOL_A_EEPROM_DATA_BANK``         u8      bank number
>> +  ``ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS``  u8      page I2C address
>> +  =====================================  ======
>> + ==========================
>> +
>> +Kernel response contents:
>> +
>> +
> +---------------------------------------------+--------+--------------------
> -+
>> + | ``ETHTOOL_A_EEPROM_DATA_HEADER``            | nested | reply header
>> |
>> +
> +---------------------------------------------+--------+--------------------
> -+
>> + | ``ETHTOOL_A_EEPROM_DATA_LENGTH``            | u32    | amount of bytes
>> read|
>> +
> +---------------------------------------------+--------+--------------------
> -+
>> + | ``ETHTOOL_A_EEPROM_DATA``                   | nested | array of bytes
> from |
>> + |                                             |        | module EEPROM
> |
>> +
> +---------------------------------------------+--------+--------------------
> -+
>> +
>> +``ETHTOOL_A_EEPROM_DATA`` has an attribute length equal to the amount
>> +of bytes driver actually read.
>> +
>>   Request translation
>>   ===================
>>
>> @@ -1357,8 +1387,8 @@ are netlink only.
>>     ``ETHTOOL_GET_DUMP_FLAG``           n/a
>>     ``ETHTOOL_GET_DUMP_DATA``           n/a
>>     ``ETHTOOL_GET_TS_INFO``             ``ETHTOOL_MSG_TSINFO_GET``
>> -  ``ETHTOOL_GMODULEINFO``             n/a
>> -  ``ETHTOOL_GMODULEEEPROM``           n/a
>> +  ``ETHTOOL_GMODULEINFO``
>> ``ETHTOOL_MSG_MODULE_EEPROM_GET``
>> +  ``ETHTOOL_GMODULEEEPROM``
>> ``ETHTOOL_MSG_MODULE_EEPROM_GET``
>>     ``ETHTOOL_GEEE``                    ``ETHTOOL_MSG_EEE_GET``
>>     ``ETHTOOL_SEEE``                    ``ETHTOOL_MSG_EEE_SET``
>>     ``ETHTOOL_GRSSH``                   n/a
>> diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index
>> ec4cd3921c67..9551005b350a 100644
>> --- a/include/linux/ethtool.h
>> +++ b/include/linux/ethtool.h
>> @@ -81,6 +81,7 @@ enum {
>>   #define ETH_RSS_HASH_NO_CHANGE       0
>>
>>   struct net_device;
>> +struct netlink_ext_ack;
>>
>>   /* Some generic methods drivers may use in their ethtool_ops */
>>   u32 ethtool_op_get_link(struct net_device *dev); @@ -410,6 +411,9 @@
>> struct ethtool_pause_stats {
>>    * @get_ethtool_phy_stats: Return extended statistics about the PHY
>> device.
>>    *   This is only useful if the device maintains PHY statistics and
>>    *   cannot use the standard PHY library helpers.
>> + * @get_module_eeprom_data_by_page: Get a region of plug-in module
>> EEPROM data
>> + *   from specified page. Returns a negative error code or the amount of
>> + *   bytes read.
>>    *
>>    * All operations are optional (i.e. the function pointer may be set
>>    * to %NULL) and callers must take this into account.  Callers must @@
> -515,6
>> +519,9 @@ struct ethtool_ops {
>>                                   const struct ethtool_tunable *, void *);
>>        int     (*set_phy_tunable)(struct net_device *,
>>                                   const struct ethtool_tunable *, const
> void
>> *);
>> +     int     (*get_module_eeprom_data_by_page)(struct net_device
>> *dev,
>> +                                               const struct
>> ethtool_eeprom_data *page,
>> +                                               struct netlink_ext_ack
>> *extack);
>>   };
>>
>>   int ethtool_check_ops(const struct ethtool_ops *ops); @@ -538,7 +545,6
>> @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
>>                                       const struct ethtool_link_ksettings
> *cmd,
>>                                       u32 *dev_speed, u8 *dev_duplex);
>>
>> -struct netlink_ext_ack;
>>   struct phy_device;
>>   struct phy_tdr_config;
>>
>> diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
> index
>> cde753bb2093..b3e92db3ad37 100644
>> --- a/include/uapi/linux/ethtool.h
>> +++ b/include/uapi/linux/ethtool.h
>> @@ -340,6 +340,28 @@ struct ethtool_eeprom {
>>        __u8    data[0];
>>   };
>>
>> +/**
>> + * struct ethtool_eeprom_data - EEPROM dump from specified page
>> + * @offset: Offset within the specified EEPROM page to begin read, in
>> bytes.
>> + * @length: Number of bytes to read.
>> + * @page: Page number to read from.
>> + * @bank: Page bank number to read from, if applicable by EEPROM spec.
>> + * @i2c_address: I2C address of a page. Value less than 0x7f expected.
>> Most
>> + *   EEPROMs use 0x50 or 0x51.
>> + * @data: Pointer to buffer with EEPROM data of @length size.
>> + *
>> + * This can be used to manage pages during EEPROM dump in ethtool and
>> +pass
>> + * required information to the driver.
>> + */
>> +struct ethtool_eeprom_data {
>> +     __u32   offset;
>> +     __u32   length;
>> +     __u8    page;
>> +     __u8    bank;
>> +     __u8    i2c_address;
>> +     __u8    *data;
>> +};
>> +
>>   /**
>>    * struct ethtool_eee - Energy Efficient Ethernet information
>>    * @cmd: ETHTOOL_{G,S}EEE
>> @@ -1865,6 +1887,9 @@ static inline int ethtool_validate_duplex(__u8
>> duplex)
>>   #define ETH_MODULE_SFF_8636_MAX_LEN     640
>>   #define ETH_MODULE_SFF_8436_MAX_LEN     640
>>
>> +#define ETH_MODULE_EEPROM_PAGE_LEN   256
>> +#define ETH_MODULE_MAX_I2C_ADDRESS   0x7f
>> +
>>   /* Reset flags */
>>   /* The reset() operation must clear the flags for the components which
>>    * were actually reset.  On successful return, the flags indicate the
> diff --git
>> a/include/uapi/linux/ethtool_netlink.h
>> b/include/uapi/linux/ethtool_netlink.h
>> index a286635ac9b8..e1b1b962f3da 100644
>> --- a/include/uapi/linux/ethtool_netlink.h
>> +++ b/include/uapi/linux/ethtool_netlink.h
>> @@ -42,6 +42,7 @@ enum {
>>        ETHTOOL_MSG_CABLE_TEST_ACT,
>>        ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
>>        ETHTOOL_MSG_TUNNEL_INFO_GET,
>> +     ETHTOOL_MSG_EEPROM_DATA_GET,
>>
>>        /* add new constants above here */
>>        __ETHTOOL_MSG_USER_CNT,
>> @@ -80,6 +81,7 @@ enum {
>>        ETHTOOL_MSG_CABLE_TEST_NTF,
>>        ETHTOOL_MSG_CABLE_TEST_TDR_NTF,
>>        ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
>> +     ETHTOOL_MSG_EEPROM_DATA_GET_REPLY,
>>
>>        /* add new constants above here */
>>        __ETHTOOL_MSG_KERNEL_CNT,
>> @@ -629,6 +631,23 @@ enum {
>>        ETHTOOL_A_TUNNEL_INFO_MAX =
>> (__ETHTOOL_A_TUNNEL_INFO_CNT - 1)  };
>>
>> +/* MODULE EEPROM DATA */
>> +
>> +enum {
>> +     ETHTOOL_A_EEPROM_DATA_UNSPEC,
>> +     ETHTOOL_A_EEPROM_DATA_HEADER,                   /* nest -
>> _A_HEADER_* */
>> +
>> +     ETHTOOL_A_EEPROM_DATA_OFFSET,                   /* u32 */
>> +     ETHTOOL_A_EEPROM_DATA_LENGTH,                   /* u32 */
>> +     ETHTOOL_A_EEPROM_DATA_PAGE,                     /* u8 */
>> +     ETHTOOL_A_EEPROM_DATA_BANK,                     /* u8 */
>> +     ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS,              /* u8 */
>> +     ETHTOOL_A_EEPROM_DATA,                          /* nested */
>> +
>> +     __ETHTOOL_A_EEPROM_DATA_CNT,
>> +     ETHTOOL_A_EEPROM_DATA_MAX =
>> (__ETHTOOL_A_EEPROM_DATA_CNT - 1) };
>> +
>>   /* generic netlink info */
>>   #define ETHTOOL_GENL_NAME "ethtool"
>>   #define ETHTOOL_GENL_VERSION 1
>> diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index
>> 7a849ff22dad..d604346bc074 100644
>> --- a/net/ethtool/Makefile
>> +++ b/net/ethtool/Makefile
>> @@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o
>>   ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
>>                   linkstate.o debug.o wol.o features.o privflags.o rings.o
> \
>>                   channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o
> \
>> -                tunnels.o
>> +                tunnels.o eeprom.o
>> diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c new file mode
>> 100644 index 000000000000..e110336dc231
>> --- /dev/null
>> +++ b/net/ethtool/eeprom.c
>> @@ -0,0 +1,153 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +
>> +#include <linux/ethtool.h>
>> +#include "netlink.h"
>> +#include "common.h"
>> +
>> +struct eeprom_data_req_info {
>> +     struct ethnl_req_info   base;
>> +     u32                     offset;
>> +     u32                     length;
>> +     u8                      page;
>> +     u8                      bank;
>> +     u8                      i2c_address;
>> +};
>> +
>> +struct eeprom_data_reply_data {
>> +     struct ethnl_reply_data base;
>> +     u32                     length;
>> +     u8                      *data;
>> +};
>> +
>> +#define EEPROM_DATA_REQINFO(__req_base) \
>> +     container_of(__req_base, struct eeprom_data_req_info, base)
>> +
>> +#define EEPROM_DATA_REPDATA(__reply_base) \
>> +     container_of(__reply_base, struct eeprom_data_reply_data, base)
>> +
>> +static int eeprom_data_prepare_data(const struct ethnl_req_info
>> *req_base,
>> +                                 struct ethnl_reply_data *reply_base,
>> +                                 struct genl_info *info)
>> +{
>> +     struct eeprom_data_reply_data *reply =
>> EEPROM_DATA_REPDATA(reply_base);
>> +     struct eeprom_data_req_info *request =
>> EEPROM_DATA_REQINFO(req_base);
>> +     struct ethtool_eeprom_data page_data = {0};
>> +     struct net_device *dev = reply_base->dev;
>> +     int ret;
>> +
>> +     if (!dev->ethtool_ops->get_module_eeprom_data_by_page)
>> +             return -EOPNOTSUPP;
>> +
>> +     page_data.offset = request->offset;
>> +     page_data.length = request->length;
>> +     page_data.i2c_address = request->i2c_address;
>> +     page_data.page = request->page;
>> +     page_data.bank = request->bank;
>> +     page_data.data = kmalloc(page_data.length, GFP_KERNEL);
>> +     if (!page_data.data)
>> +             return -ENOMEM;
>> +     ret = ethnl_ops_begin(dev);
>> +     if (ret)
>> +             goto err_free;
>> +
>> +     ret = dev->ethtool_ops->get_module_eeprom_data_by_page(dev,
>> &page_data,
>> +
> info->extack);
>> +     if (ret < 0)
>> +             goto err_ops;
>> +
>> +     reply->length = ret;
>> +     reply->data = page_data.data;
>> +
>> +     ethnl_ops_complete(dev);
>> +     return 0;
>> +
>> +err_ops:
>> +     ethnl_ops_complete(dev);
>> +err_free:
>> +     kfree(page_data.data);
>> +     return ret;
>> +}
>> +
>> +static int eeprom_data_parse_request(struct ethnl_req_info *req_info,
>> struct nlattr **tb,
>> +                                  struct netlink_ext_ack *extack) {
>> +     struct eeprom_data_req_info *request =
>> EEPROM_DATA_REQINFO(req_info);
>> +     struct net_device *dev = req_info->dev;
>> +
>> +     if (!tb[ETHTOOL_A_EEPROM_DATA_OFFSET] ||
>> +         !tb[ETHTOOL_A_EEPROM_DATA_LENGTH] ||
>> +         !tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS])
>> +             return -EINVAL;
> Suggestion:  Consider using i2c address 0x50 as a default if none is given.
> 0x50 is the first 256 bytes of SFP, and all of QSFP and CMIS EEPROM.  If
> there is a page given on an SFP device, then you know i2c address is 0x51.
> The only thing that REQUIRES 0x51 is legacy offset 256-511 on SFP.  Keep the
> i2c address, but make it optional for the non-standard devices that Andrew
> has mentioned, and for that one section of SFP data that requires it.  And
> document it for the user.
Agree, but thought to have that i2c address default set on userspace, so 
here we expect it.
> Note also, legacy does not have an i2c address, so passing this along allows
> the fallback to legacy to cover more cases.  (Legacy ethtool addresses this
> part of SFP as offset 256-511.)
>
>> +
>> +     request->i2c_address =
>> nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS]);
>> +     if (request->i2c_address > ETH_MODULE_MAX_I2C_ADDRESS)
>> +             return -EINVAL;
>> +
>> +     request->offset =
>> nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_OFFSET]);
>> +     request->length =
>> nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_LENGTH]);
>> +     if (tb[ETHTOOL_A_EEPROM_DATA_PAGE] &&
>> +         dev->ethtool_ops->get_module_eeprom_data_by_page &&
>> +         request->offset + request->length >
>> ETH_MODULE_EEPROM_PAGE_LEN)
>> +             return -EINVAL;
> If a request exceeds the length of EEPROM (in legacy), we truncate it to
> EEPROM length.  I suggest if a request exceeds the length of the page (in
> the new KAPI), then we truncate at the end of the page.  Thus rather than
> 'return -EINVAL;' I suggest
>
>            request->length = ETH_MODULE_EEPROM_PAGE_LEN - request->offset;

I was not sure about that, isn't it better that once user specified page 
he has to be in the page boundary and let him know the command failed ?

>> +
>> +     if (tb[ETHTOOL_A_EEPROM_DATA_PAGE])
>> +             request->page =
>> nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_PAGE]);
>> +     if (tb[ETHTOOL_A_EEPROM_DATA_BANK])
>> +             request->bank =
>> nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_BANK]);
>> +
>> +     return 0;
>> +}
>> +
>> +static int eeprom_data_reply_size(const struct ethnl_req_info *req_base,
>> +                               const struct ethnl_reply_data *reply_base)
> {
>> +     const struct eeprom_data_req_info *request =
>> +EEPROM_DATA_REQINFO(req_base);
>> +
>> +     return nla_total_size(sizeof(u32)) + /* _EEPROM_DATA_LENGTH */
>> +            nla_total_size(sizeof(u8) * request->length); /* _EEPROM_DATA
>> +*/ }
>> +
>> +static int eeprom_data_fill_reply(struct sk_buff *skb,
>> +                               const struct ethnl_req_info *req_base,
>> +                               const struct ethnl_reply_data *reply_base)
> {
>> +     struct eeprom_data_reply_data *reply =
>> +EEPROM_DATA_REPDATA(reply_base);
>> +
>> +     if (nla_put_u32(skb, ETHTOOL_A_EEPROM_DATA_LENGTH, reply-
>>> length) ||
>> +         nla_put(skb, ETHTOOL_A_EEPROM_DATA, reply->length, reply-
>>> data))
>> +             return -EMSGSIZE;
>> +
>> +     return 0;
>> +}
>> +
>> +static void eeprom_data_cleanup_data(struct ethnl_reply_data
>> +*reply_base) {
>> +     struct eeprom_data_reply_data *reply =
>> +EEPROM_DATA_REPDATA(reply_base);
>> +
>> +     kfree(reply->data);
>> +}
>> +
>> +const struct ethnl_request_ops ethnl_eeprom_data_request_ops = {
>> +     .request_cmd            = ETHTOOL_MSG_EEPROM_DATA_GET,
>> +     .reply_cmd              =
>> ETHTOOL_MSG_EEPROM_DATA_GET_REPLY,
>> +     .hdr_attr               = ETHTOOL_A_EEPROM_DATA_HEADER,
>> +     .req_info_size          = sizeof(struct eeprom_data_req_info),
>> +     .reply_data_size        = sizeof(struct eeprom_data_reply_data),
>> +
>> +     .parse_request          = eeprom_data_parse_request,
>> +     .prepare_data           = eeprom_data_prepare_data,
>> +     .reply_size             = eeprom_data_reply_size,
>> +     .fill_reply             = eeprom_data_fill_reply,
>> +     .cleanup_data           = eeprom_data_cleanup_data,
>> +};
>> +
>> +const struct nla_policy ethnl_eeprom_data_get_policy[] = {
>> +     [ETHTOOL_A_EEPROM_DATA_HEADER]          =
>> NLA_POLICY_NESTED(ethnl_header_policy),
>> +     [ETHTOOL_A_EEPROM_DATA_OFFSET]          = { .type =
>> NLA_U32 },
>> +     [ETHTOOL_A_EEPROM_DATA_LENGTH]          = { .type =
>> NLA_U32 },
>> +     [ETHTOOL_A_EEPROM_DATA_PAGE]            = { .type = NLA_U8 },
>> +     [ETHTOOL_A_EEPROM_DATA_BANK]            = { .type = NLA_U8 },
>> +     [ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS]     = { .type = NLA_U8 },
>> +     [ETHTOOL_A_EEPROM_DATA]                 = { .type =
>> NLA_BINARY },
>> +};
>> +
>> diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index
>> 50d3c8896f91..ff2528bee192 100644
>> --- a/net/ethtool/netlink.c
>> +++ b/net/ethtool/netlink.c
>> @@ -245,6 +245,7 @@
>> ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
>>        [ETHTOOL_MSG_PAUSE_GET]         =
>> &ethnl_pause_request_ops,
>>        [ETHTOOL_MSG_EEE_GET]           = &ethnl_eee_request_ops,
>>        [ETHTOOL_MSG_TSINFO_GET]        = &ethnl_tsinfo_request_ops,
>> +     [ETHTOOL_MSG_EEPROM_DATA_GET]   =
>> &ethnl_eeprom_data_request_ops,
>>   };
>>
>>   static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback
>> *cb) @@ -912,6 +913,15 @@ static const struct genl_ops ethtool_genl_ops[]
>> = {
>>                .policy = ethnl_tunnel_info_get_policy,
>>                .maxattr = ARRAY_SIZE(ethnl_tunnel_info_get_policy) - 1,
>>        },
>> +     {
>> +             .cmd    = ETHTOOL_MSG_EEPROM_DATA_GET,
>> +             .doit   = ethnl_default_doit,
>> +             .start  = ethnl_default_start,
>> +             .dumpit = ethnl_default_dumpit,
>> +             .done   = ethnl_default_done,
>> +             .policy = ethnl_eeprom_data_get_policy,
>> +             .maxattr = ARRAY_SIZE(ethnl_eeprom_data_get_policy) - 1,
>> +     },
>>   };
>>
>>   static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff
> --git
>> a/net/ethtool/netlink.h b/net/ethtool/netlink.h index
>> 6eabd58d81bf..60954c7b4dfe 100644
>> --- a/net/ethtool/netlink.h
>> +++ b/net/ethtool/netlink.h
>> @@ -344,6 +344,7 @@ extern const struct ethnl_request_ops
>> ethnl_coalesce_request_ops;  extern const struct ethnl_request_ops
>> ethnl_pause_request_ops;  extern const struct ethnl_request_ops
>> ethnl_eee_request_ops;  extern const struct ethnl_request_ops
>> ethnl_tsinfo_request_ops;
>> +extern const struct ethnl_request_ops ethnl_eeprom_data_request_ops;
>>
>>   extern const struct nla_policy
>> ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];  extern const struct
>> nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@
>> -375,6 +376,7 @@ extern const struct nla_policy
>> ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER +  extern const struct
>> nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER +
>> 1];  extern const struct nla_policy
>> ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1];
>> extern const struct nla_policy
>> ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
>> +extern const struct nla_policy
>> +ethnl_eeprom_data_get_policy[ETHTOOL_A_EEPROM_DATA + 1];
>>
>>   int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);  int
>> ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
>> --
>> 2.26.2
Don Bollinger March 16, 2021, 9 p.m. UTC | #5
On 3/16/2021 11:24 AM, Moshe Shemesh wrote:
> On 3/16/2021 12:31 AM, Don Bollinger wrote:
> > On Mon, 15 Mar 2021 10:12:39 +0700 Moshe Shemesh wrote:
> >> From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
> >>
> >> Define get_module_eeprom_data_by_page() ethtool callback and
> >> implement netlink infrastructure.
> >>

<snip>

> >> +     request->offset =
> >> nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_OFFSET]);
> >> +     request->length =
> >> nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_LENGTH]);
> >> +     if (tb[ETHTOOL_A_EEPROM_DATA_PAGE] &&
> >> +         dev->ethtool_ops->get_module_eeprom_data_by_page &&
> >> +         request->offset + request->length >
> >> ETH_MODULE_EEPROM_PAGE_LEN)
> >> +             return -EINVAL;
> > If a request exceeds the length of EEPROM (in legacy), we truncate it
> > to EEPROM length.  I suggest if a request exceeds the length of the
> > page (in the new KAPI), then we truncate at the end of the page.  Thus
> > rather than 'return -EINVAL;' I suggest
> >
> >            request->length = ETH_MODULE_EEPROM_PAGE_LEN -
> > request->offset;
> 
> I was not sure about that, isn't it better that once user specified page he has
> to be in the page boundary and let him know the command failed ?

On further review, I agree with your original code.  Reading across a page boundary will require explicit tracking of offset and page to read from.  If you get a short read, you *might* have to update the page on the next read.  If you have to keep track of where the page boundary is and adjust the offset and page together, then you should be explicitly managing not to try to read across a page boundary.  Put another way, if you try to read across a page boundary you probably have a bug in your code and will appreciate an immediate error.

I withdraw my previous suggestion, don't change it.

Don
Andrew Lunn March 18, 2021, 1:03 p.m. UTC | #6
On Mon, Mar 15, 2021 at 07:12:39PM +0200, Moshe Shemesh wrote:
> From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
> 
> Define get_module_eeprom_data_by_page() ethtool callback and implement
> netlink infrastructure.
> 
> get_module_eeprom_data_by_page() allows network drivers to dump a part
> of module's EEPROM specified by page and bank numbers along with offset
> and length. It is effectively a netlink replacement for
> get_module_info() and get_module_eeprom() pair, which is needed due to
> emergence of complex non-linear EEPROM layouts.
> 
> Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
> ---
>  Documentation/networking/ethtool-netlink.rst |  34 ++++-
>  include/linux/ethtool.h                      |   8 +-
>  include/uapi/linux/ethtool.h                 |  25 +++
>  include/uapi/linux/ethtool_netlink.h         |  19 +++
>  net/ethtool/Makefile                         |   2 +-
>  net/ethtool/eeprom.c                         | 153 +++++++++++++++++++
>  net/ethtool/netlink.c                        |  10 ++
>  net/ethtool/netlink.h                        |   2 +
>  8 files changed, 249 insertions(+), 4 deletions(-)
>  create mode 100644 net/ethtool/eeprom.c
> 
> diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
> index 05073482db05..25846b97632a 100644
> --- a/Documentation/networking/ethtool-netlink.rst
> +++ b/Documentation/networking/ethtool-netlink.rst
> @@ -1280,6 +1280,36 @@ Kernel response contents:
>  For UDP tunnel table empty ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES`` indicates that
>  the table contains static entries, hard-coded by the NIC.
>  
> +EEPROM_DATA
> +===========
> +
> +Fetch module EEPROM data dump.
> +
> +Request contents:
> +
> +  =====================================  ======  ==========================
> +  ``ETHTOOL_A_EEPROM_DATA_HEADER``       nested  request header
> +  ``ETHTOOL_A_EEPROM_DATA_OFFSET``       u32     offset within a page
> +  ``ETHTOOL_A_EEPROM_DATA_LENGTH``       u32     amount of bytes to read

I wonder if offset and length should be u8. At most, we should only be
returning a 1/2 page, so 128 bytes. We don't need a u32.

>  Request translation
>  ===================
>  
> @@ -1357,8 +1387,8 @@ are netlink only.
>    ``ETHTOOL_GET_DUMP_FLAG``           n/a
>    ``ETHTOOL_GET_DUMP_DATA``           n/a
>    ``ETHTOOL_GET_TS_INFO``             ``ETHTOOL_MSG_TSINFO_GET``
> -  ``ETHTOOL_GMODULEINFO``             n/a
> -  ``ETHTOOL_GMODULEEEPROM``           n/a
> +  ``ETHTOOL_GMODULEINFO``             ``ETHTOOL_MSG_MODULE_EEPROM_GET``
> +  ``ETHTOOL_GMODULEEEPROM``           ``ETHTOOL_MSG_MODULE_EEPROM_GET``
>    ``ETHTOOL_GEEE``                    ``ETHTOOL_MSG_EEE_GET``
>    ``ETHTOOL_SEEE``                    ``ETHTOOL_MSG_EEE_SET``
>    ``ETHTOOL_GRSSH``                   n/a

We should check with Michal about this. It is not a direct replacement
of the old IOCTL API, it is new API. He may want it documented
differently.

> +static int eeprom_data_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
> +				     struct netlink_ext_ack *extack)
> +{
> +	struct eeprom_data_req_info *request = EEPROM_DATA_REQINFO(req_info);
> +	struct net_device *dev = req_info->dev;
> +
> +	if (!tb[ETHTOOL_A_EEPROM_DATA_OFFSET] ||
> +	    !tb[ETHTOOL_A_EEPROM_DATA_LENGTH] ||
> +	    !tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS])
> +		return -EINVAL;
> +
> +	request->i2c_address = nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS]);
> +	if (request->i2c_address > ETH_MODULE_MAX_I2C_ADDRESS)
> +		return -EINVAL;

> +
> +	request->offset = nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_OFFSET]);
> +	request->length = nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_LENGTH]);
> +	if (tb[ETHTOOL_A_EEPROM_DATA_PAGE] &&
> +	    dev->ethtool_ops->get_module_eeprom_data_by_page &&
> +	    request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN)
> +		return -EINVAL;

You need to watch out for overflows here. 0xfffffff0 + 0x20 is less
than ETH_MODULE_EEPROM_PAGE_LEN when it wraps around, but will cause
bad things to happen.

      Andrew
Moshe Shemesh March 18, 2021, 2:25 p.m. UTC | #7
On 3/18/2021 3:03 PM, Andrew Lunn wrote:
>
> On Mon, Mar 15, 2021 at 07:12:39PM +0200, Moshe Shemesh wrote:
>> From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
>>
>> Define get_module_eeprom_data_by_page() ethtool callback and implement
>> netlink infrastructure.
>>
>> get_module_eeprom_data_by_page() allows network drivers to dump a part
>> of module's EEPROM specified by page and bank numbers along with offset
>> and length. It is effectively a netlink replacement for
>> get_module_info() and get_module_eeprom() pair, which is needed due to
>> emergence of complex non-linear EEPROM layouts.
>>
>> Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
>> ---
>>   Documentation/networking/ethtool-netlink.rst |  34 ++++-
>>   include/linux/ethtool.h                      |   8 +-
>>   include/uapi/linux/ethtool.h                 |  25 +++
>>   include/uapi/linux/ethtool_netlink.h         |  19 +++
>>   net/ethtool/Makefile                         |   2 +-
>>   net/ethtool/eeprom.c                         | 153 +++++++++++++++++++
>>   net/ethtool/netlink.c                        |  10 ++
>>   net/ethtool/netlink.h                        |   2 +
>>   8 files changed, 249 insertions(+), 4 deletions(-)
>>   create mode 100644 net/ethtool/eeprom.c
>>
>> diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
>> index 05073482db05..25846b97632a 100644
>> --- a/Documentation/networking/ethtool-netlink.rst
>> +++ b/Documentation/networking/ethtool-netlink.rst
>> @@ -1280,6 +1280,36 @@ Kernel response contents:
>>   For UDP tunnel table empty ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES`` indicates that
>>   the table contains static entries, hard-coded by the NIC.
>>
>> +EEPROM_DATA
>> +===========
>> +
>> +Fetch module EEPROM data dump.
>> +
>> +Request contents:
>> +
>> +  =====================================  ======  ==========================
>> +  ``ETHTOOL_A_EEPROM_DATA_HEADER``       nested  request header
>> +  ``ETHTOOL_A_EEPROM_DATA_OFFSET``       u32     offset within a page
>> +  ``ETHTOOL_A_EEPROM_DATA_LENGTH``       u32     amount of bytes to read
> I wonder if offset and length should be u8. At most, we should only be
> returning a 1/2 page, so 128 bytes. We don't need a u32.


That's right when page is given, but user may have commands that used to 
work on the ioctl KAPI with offset higher than one page.

>>   Request translation
>>   ===================
>>
>> @@ -1357,8 +1387,8 @@ are netlink only.
>>     ``ETHTOOL_GET_DUMP_FLAG``           n/a
>>     ``ETHTOOL_GET_DUMP_DATA``           n/a
>>     ``ETHTOOL_GET_TS_INFO``             ``ETHTOOL_MSG_TSINFO_GET``
>> -  ``ETHTOOL_GMODULEINFO``             n/a
>> -  ``ETHTOOL_GMODULEEEPROM``           n/a
>> +  ``ETHTOOL_GMODULEINFO``             ``ETHTOOL_MSG_MODULE_EEPROM_GET``
>> +  ``ETHTOOL_GMODULEEEPROM``           ``ETHTOOL_MSG_MODULE_EEPROM_GET``
>>     ``ETHTOOL_GEEE``                    ``ETHTOOL_MSG_EEE_GET``
>>     ``ETHTOOL_SEEE``                    ``ETHTOOL_MSG_EEE_SET``
>>     ``ETHTOOL_GRSSH``                   n/a
> We should check with Michal about this. It is not a direct replacement
> of the old IOCTL API, it is new API. He may want it documented
> differently.
Michal, please comment on where it should be.
>> +static int eeprom_data_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
>> +                                  struct netlink_ext_ack *extack)
>> +{
>> +     struct eeprom_data_req_info *request = EEPROM_DATA_REQINFO(req_info);
>> +     struct net_device *dev = req_info->dev;
>> +
>> +     if (!tb[ETHTOOL_A_EEPROM_DATA_OFFSET] ||
>> +         !tb[ETHTOOL_A_EEPROM_DATA_LENGTH] ||
>> +         !tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS])
>> +             return -EINVAL;
>> +
>> +     request->i2c_address = nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS]);
>> +     if (request->i2c_address > ETH_MODULE_MAX_I2C_ADDRESS)
>> +             return -EINVAL;
>> +
>> +     request->offset = nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_OFFSET]);
>> +     request->length = nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_LENGTH]);
>> +     if (tb[ETHTOOL_A_EEPROM_DATA_PAGE] &&
>> +         dev->ethtool_ops->get_module_eeprom_data_by_page &&
>> +         request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN)
>> +             return -EINVAL;
> You need to watch out for overflows here. 0xfffffff0 + 0x20 is less
> than ETH_MODULE_EEPROM_PAGE_LEN when it wraps around, but will cause
> bad things to happen.
Ack.
>        Andrew
Michal Kubecek March 18, 2021, 3:16 p.m. UTC | #8
On Thu, Mar 18, 2021 at 02:03:02PM +0100, Andrew Lunn wrote:
> On Mon, Mar 15, 2021 at 07:12:39PM +0200, Moshe Shemesh wrote:
> >  
> > +EEPROM_DATA
> > +===========
> > +
> > +Fetch module EEPROM data dump.
> > +
> > +Request contents:
> > +
> > +  =====================================  ======  ==========================
> > +  ``ETHTOOL_A_EEPROM_DATA_HEADER``       nested  request header
> > +  ``ETHTOOL_A_EEPROM_DATA_OFFSET``       u32     offset within a page
> > +  ``ETHTOOL_A_EEPROM_DATA_LENGTH``       u32     amount of bytes to read
> 
> I wonder if offset and length should be u8. At most, we should only be
> returning a 1/2 page, so 128 bytes. We don't need a u32.

There is no actual gain using NLA_U8 due to padding. Out of the
interfaces used here, kernel-userspace API is the least flexible so
I would generally prefer NLA_U32, except for bools or enumerated values
where it's absolutely obvious the number of possible values cannot grow
too much. In this case, I can't really say it's impossible we could have
devices with bigger pages in something like 20 years.

> >  Request translation
> >  ===================
> >  
> > @@ -1357,8 +1387,8 @@ are netlink only.
> >    ``ETHTOOL_GET_DUMP_FLAG``           n/a
> >    ``ETHTOOL_GET_DUMP_DATA``           n/a
> >    ``ETHTOOL_GET_TS_INFO``             ``ETHTOOL_MSG_TSINFO_GET``
> > -  ``ETHTOOL_GMODULEINFO``             n/a
> > -  ``ETHTOOL_GMODULEEEPROM``           n/a
> > +  ``ETHTOOL_GMODULEINFO``             ``ETHTOOL_MSG_MODULE_EEPROM_GET``
> > +  ``ETHTOOL_GMODULEEEPROM``           ``ETHTOOL_MSG_MODULE_EEPROM_GET``
> >    ``ETHTOOL_GEEE``                    ``ETHTOOL_MSG_EEE_GET``
> >    ``ETHTOOL_SEEE``                    ``ETHTOOL_MSG_EEE_SET``
> >    ``ETHTOOL_GRSSH``                   n/a
> 
> We should check with Michal about this. It is not a direct replacement
> of the old IOCTL API, it is new API. He may want it documented
> differently.

This table is meant to give a hint in the sense "for what you used
ioctl command in left column, use now netlink request in the right".
So IMHO it's appropriate. Perhaps it would deserve a comment explaining
this.

> > +	request->offset = nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_OFFSET]);
> > +	request->length = nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_LENGTH]);
> > +	if (tb[ETHTOOL_A_EEPROM_DATA_PAGE] &&
> > +	    dev->ethtool_ops->get_module_eeprom_data_by_page &&
> > +	    request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN)
> > +		return -EINVAL;
> 
> You need to watch out for overflows here. 0xfffffff0 + 0x20 is less
> than ETH_MODULE_EEPROM_PAGE_LEN when it wraps around, but will cause
> bad things to happen.

BtW, the ioctl code also suffers from this problem and we recently had
a report from customer (IIRC the effect was ethtool trying to allocate
~4GB of memory), upstream fix should follow soon.

Michal
Andrew Lunn March 18, 2021, 10:16 p.m. UTC | #9
> > > +Request contents:
> > > +
> > > +  =====================================  ======  ==========================
> > > +  ``ETHTOOL_A_EEPROM_DATA_HEADER``       nested  request header
> > > +  ``ETHTOOL_A_EEPROM_DATA_OFFSET``       u32     offset within a page
> > > +  ``ETHTOOL_A_EEPROM_DATA_LENGTH``       u32     amount of bytes to read
> > I wonder if offset and length should be u8. At most, we should only be
> > returning a 1/2 page, so 128 bytes. We don't need a u32.
> 
> 
> That's right when page is given, but user may have commands that
> used to work on the ioctl KAPI with offset higher than one page.

CMIS section 5.4.1 says:

  The slave maintains an internal current byte address counter
  containing the byte address accessed during the latest read or write
  operation incremented by one with roll-over as follows: The current
  byte address counter rolls-over after a read or write operation at
  the last byte address of the current 128-byte memory page (127 or
  255) to the first byte address (0 or 128) of the same 128-byte
  memory page.

This wrapping is somewhat unexpected. If the user access is for a read
starting at 120 and a length of 20, they get bytes 120-127 followed by
0-11. The user is more likely to be expecting 120-139.

We have two ways to address this:

1) We limit reads to a maximum of a 1/2 page, and the start and end
point needs to be within that 1/2 page.

2) We detect that the read is going to go across a 1/2 page boarder,
and perform two reads to the MAC driver, and glue the data back
together again in the ethtool core.

What i don't want is to leave the individual drivers to solve this,
because i expect some of them will get it wrong.

	Andrew
Andrew Lunn March 18, 2021, 10:36 p.m. UTC | #10
> > > +static int eeprom_data_parse_request(struct ethnl_req_info *req_info,
> > > struct nlattr **tb,
> > > +                                  struct netlink_ext_ack *extack) {
> > > +     struct eeprom_data_req_info *request =
> > > EEPROM_DATA_REQINFO(req_info);
> > > +     struct net_device *dev = req_info->dev;
> > > +
> > > +     if (!tb[ETHTOOL_A_EEPROM_DATA_OFFSET] ||
> > > +         !tb[ETHTOOL_A_EEPROM_DATA_LENGTH] ||
> > > +         !tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS])
> > > +             return -EINVAL;
> > Suggestion:  Consider using i2c address 0x50 as a default if none is given.
> > 0x50 is the first 256 bytes of SFP, and all of QSFP and CMIS EEPROM.  If
> > there is a page given on an SFP device, then you know i2c address is 0x51.
> > The only thing that REQUIRES 0x51 is legacy offset 256-511 on SFP.  Keep the
> > i2c address, but make it optional for the non-standard devices that Andrew
> > has mentioned, and for that one section of SFP data that requires it.  And
> > document it for the user.
> Agree, but thought to have that i2c address default set on userspace, so
> here we expect it.

I would make it mandatory. If you default it to 0x50, the current
message structure has no way of including this information in the
reply. So user space gets some data, but it has no idea from where?

       Andrew
diff mbox series

Patch

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 05073482db05..25846b97632a 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1280,6 +1280,36 @@  Kernel response contents:
 For UDP tunnel table empty ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES`` indicates that
 the table contains static entries, hard-coded by the NIC.
 
+EEPROM_DATA
+===========
+
+Fetch module EEPROM data dump.
+
+Request contents:
+
+  =====================================  ======  ==========================
+  ``ETHTOOL_A_EEPROM_DATA_HEADER``       nested  request header
+  ``ETHTOOL_A_EEPROM_DATA_OFFSET``       u32     offset within a page
+  ``ETHTOOL_A_EEPROM_DATA_LENGTH``       u32     amount of bytes to read
+  ``ETHTOOL_A_EEPROM_DATA_PAGE``         u8      page number
+  ``ETHTOOL_A_EEPROM_DATA_BANK``         u8      bank number
+  ``ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS``  u8      page I2C address
+  =====================================  ======  ==========================
+
+Kernel response contents:
+
+ +---------------------------------------------+--------+---------------------+
+ | ``ETHTOOL_A_EEPROM_DATA_HEADER``            | nested | reply header        |
+ +---------------------------------------------+--------+---------------------+
+ | ``ETHTOOL_A_EEPROM_DATA_LENGTH``            | u32    | amount of bytes read|
+ +---------------------------------------------+--------+---------------------+
+ | ``ETHTOOL_A_EEPROM_DATA``                   | nested | array of bytes from |
+ |                                             |        | module EEPROM       |
+ +---------------------------------------------+--------+---------------------+
+
+``ETHTOOL_A_EEPROM_DATA`` has an attribute length equal to the amount of bytes
+driver actually read.
+
 Request translation
 ===================
 
@@ -1357,8 +1387,8 @@  are netlink only.
   ``ETHTOOL_GET_DUMP_FLAG``           n/a
   ``ETHTOOL_GET_DUMP_DATA``           n/a
   ``ETHTOOL_GET_TS_INFO``             ``ETHTOOL_MSG_TSINFO_GET``
-  ``ETHTOOL_GMODULEINFO``             n/a
-  ``ETHTOOL_GMODULEEEPROM``           n/a
+  ``ETHTOOL_GMODULEINFO``             ``ETHTOOL_MSG_MODULE_EEPROM_GET``
+  ``ETHTOOL_GMODULEEEPROM``           ``ETHTOOL_MSG_MODULE_EEPROM_GET``
   ``ETHTOOL_GEEE``                    ``ETHTOOL_MSG_EEE_GET``
   ``ETHTOOL_SEEE``                    ``ETHTOOL_MSG_EEE_SET``
   ``ETHTOOL_GRSSH``                   n/a
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index ec4cd3921c67..9551005b350a 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -81,6 +81,7 @@  enum {
 #define ETH_RSS_HASH_NO_CHANGE	0
 
 struct net_device;
+struct netlink_ext_ack;
 
 /* Some generic methods drivers may use in their ethtool_ops */
 u32 ethtool_op_get_link(struct net_device *dev);
@@ -410,6 +411,9 @@  struct ethtool_pause_stats {
  * @get_ethtool_phy_stats: Return extended statistics about the PHY device.
  *	This is only useful if the device maintains PHY statistics and
  *	cannot use the standard PHY library helpers.
+ * @get_module_eeprom_data_by_page: Get a region of plug-in module EEPROM data
+ *	from specified page. Returns a negative error code or the amount of
+ *	bytes read.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -515,6 +519,9 @@  struct ethtool_ops {
 				   const struct ethtool_tunable *, void *);
 	int	(*set_phy_tunable)(struct net_device *,
 				   const struct ethtool_tunable *, const void *);
+	int	(*get_module_eeprom_data_by_page)(struct net_device *dev,
+						  const struct ethtool_eeprom_data *page,
+						  struct netlink_ext_ack *extack);
 };
 
 int ethtool_check_ops(const struct ethtool_ops *ops);
@@ -538,7 +545,6 @@  int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
 				       const struct ethtool_link_ksettings *cmd,
 				       u32 *dev_speed, u8 *dev_duplex);
 
-struct netlink_ext_ack;
 struct phy_device;
 struct phy_tdr_config;
 
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index cde753bb2093..b3e92db3ad37 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -340,6 +340,28 @@  struct ethtool_eeprom {
 	__u8	data[0];
 };
 
+/**
+ * struct ethtool_eeprom_data - EEPROM dump from specified page
+ * @offset: Offset within the specified EEPROM page to begin read, in bytes.
+ * @length: Number of bytes to read.
+ * @page: Page number to read from.
+ * @bank: Page bank number to read from, if applicable by EEPROM spec.
+ * @i2c_address: I2C address of a page. Value less than 0x7f expected. Most
+ *	EEPROMs use 0x50 or 0x51.
+ * @data: Pointer to buffer with EEPROM data of @length size.
+ *
+ * This can be used to manage pages during EEPROM dump in ethtool and pass
+ * required information to the driver.
+ */
+struct ethtool_eeprom_data {
+	__u32	offset;
+	__u32	length;
+	__u8	page;
+	__u8	bank;
+	__u8	i2c_address;
+	__u8	*data;
+};
+
 /**
  * struct ethtool_eee - Energy Efficient Ethernet information
  * @cmd: ETHTOOL_{G,S}EEE
@@ -1865,6 +1887,9 @@  static inline int ethtool_validate_duplex(__u8 duplex)
 #define ETH_MODULE_SFF_8636_MAX_LEN     640
 #define ETH_MODULE_SFF_8436_MAX_LEN     640
 
+#define ETH_MODULE_EEPROM_PAGE_LEN	256
+#define ETH_MODULE_MAX_I2C_ADDRESS	0x7f
+
 /* Reset flags */
 /* The reset() operation must clear the flags for the components which
  * were actually reset.  On successful return, the flags indicate the
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index a286635ac9b8..e1b1b962f3da 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -42,6 +42,7 @@  enum {
 	ETHTOOL_MSG_CABLE_TEST_ACT,
 	ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
 	ETHTOOL_MSG_TUNNEL_INFO_GET,
+	ETHTOOL_MSG_EEPROM_DATA_GET,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
@@ -80,6 +81,7 @@  enum {
 	ETHTOOL_MSG_CABLE_TEST_NTF,
 	ETHTOOL_MSG_CABLE_TEST_TDR_NTF,
 	ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
+	ETHTOOL_MSG_EEPROM_DATA_GET_REPLY,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
@@ -629,6 +631,23 @@  enum {
 	ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1)
 };
 
+/* MODULE EEPROM DATA */
+
+enum {
+	ETHTOOL_A_EEPROM_DATA_UNSPEC,
+	ETHTOOL_A_EEPROM_DATA_HEADER,			/* nest - _A_HEADER_* */
+
+	ETHTOOL_A_EEPROM_DATA_OFFSET,			/* u32 */
+	ETHTOOL_A_EEPROM_DATA_LENGTH,			/* u32 */
+	ETHTOOL_A_EEPROM_DATA_PAGE,			/* u8 */
+	ETHTOOL_A_EEPROM_DATA_BANK,			/* u8 */
+	ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS,		/* u8 */
+	ETHTOOL_A_EEPROM_DATA,				/* nested */
+
+	__ETHTOOL_A_EEPROM_DATA_CNT,
+	ETHTOOL_A_EEPROM_DATA_MAX = (__ETHTOOL_A_EEPROM_DATA_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 7a849ff22dad..d604346bc074 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -7,4 +7,4 @@  obj-$(CONFIG_ETHTOOL_NETLINK)	+= ethtool_nl.o
 ethtool_nl-y	:= netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
 		   linkstate.o debug.o wol.o features.o privflags.o rings.o \
 		   channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
-		   tunnels.o
+		   tunnels.o eeprom.o
diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
new file mode 100644
index 000000000000..e110336dc231
--- /dev/null
+++ b/net/ethtool/eeprom.c
@@ -0,0 +1,153 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/ethtool.h>
+#include "netlink.h"
+#include "common.h"
+
+struct eeprom_data_req_info {
+	struct ethnl_req_info	base;
+	u32			offset;
+	u32			length;
+	u8			page;
+	u8			bank;
+	u8			i2c_address;
+};
+
+struct eeprom_data_reply_data {
+	struct ethnl_reply_data base;
+	u32			length;
+	u8			*data;
+};
+
+#define EEPROM_DATA_REQINFO(__req_base) \
+	container_of(__req_base, struct eeprom_data_req_info, base)
+
+#define EEPROM_DATA_REPDATA(__reply_base) \
+	container_of(__reply_base, struct eeprom_data_reply_data, base)
+
+static int eeprom_data_prepare_data(const struct ethnl_req_info *req_base,
+				    struct ethnl_reply_data *reply_base,
+				    struct genl_info *info)
+{
+	struct eeprom_data_reply_data *reply = EEPROM_DATA_REPDATA(reply_base);
+	struct eeprom_data_req_info *request = EEPROM_DATA_REQINFO(req_base);
+	struct ethtool_eeprom_data page_data = {0};
+	struct net_device *dev = reply_base->dev;
+	int ret;
+
+	if (!dev->ethtool_ops->get_module_eeprom_data_by_page)
+		return -EOPNOTSUPP;
+
+	page_data.offset = request->offset;
+	page_data.length = request->length;
+	page_data.i2c_address = request->i2c_address;
+	page_data.page = request->page;
+	page_data.bank = request->bank;
+	page_data.data = kmalloc(page_data.length, GFP_KERNEL);
+	if (!page_data.data)
+		return -ENOMEM;
+	ret = ethnl_ops_begin(dev);
+	if (ret)
+		goto err_free;
+
+	ret = dev->ethtool_ops->get_module_eeprom_data_by_page(dev, &page_data,
+							       info->extack);
+	if (ret < 0)
+		goto err_ops;
+
+	reply->length = ret;
+	reply->data = page_data.data;
+
+	ethnl_ops_complete(dev);
+	return 0;
+
+err_ops:
+	ethnl_ops_complete(dev);
+err_free:
+	kfree(page_data.data);
+	return ret;
+}
+
+static int eeprom_data_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
+				     struct netlink_ext_ack *extack)
+{
+	struct eeprom_data_req_info *request = EEPROM_DATA_REQINFO(req_info);
+	struct net_device *dev = req_info->dev;
+
+	if (!tb[ETHTOOL_A_EEPROM_DATA_OFFSET] ||
+	    !tb[ETHTOOL_A_EEPROM_DATA_LENGTH] ||
+	    !tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS])
+		return -EINVAL;
+
+	request->i2c_address = nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS]);
+	if (request->i2c_address > ETH_MODULE_MAX_I2C_ADDRESS)
+		return -EINVAL;
+
+	request->offset = nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_OFFSET]);
+	request->length = nla_get_u32(tb[ETHTOOL_A_EEPROM_DATA_LENGTH]);
+	if (tb[ETHTOOL_A_EEPROM_DATA_PAGE] &&
+	    dev->ethtool_ops->get_module_eeprom_data_by_page &&
+	    request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN)
+		return -EINVAL;
+
+	if (tb[ETHTOOL_A_EEPROM_DATA_PAGE])
+		request->page = nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_PAGE]);
+	if (tb[ETHTOOL_A_EEPROM_DATA_BANK])
+		request->bank = nla_get_u8(tb[ETHTOOL_A_EEPROM_DATA_BANK]);
+
+	return 0;
+}
+
+static int eeprom_data_reply_size(const struct ethnl_req_info *req_base,
+				  const struct ethnl_reply_data *reply_base)
+{
+	const struct eeprom_data_req_info *request = EEPROM_DATA_REQINFO(req_base);
+
+	return nla_total_size(sizeof(u32)) + /* _EEPROM_DATA_LENGTH */
+	       nla_total_size(sizeof(u8) * request->length); /* _EEPROM_DATA */
+}
+
+static int eeprom_data_fill_reply(struct sk_buff *skb,
+				  const struct ethnl_req_info *req_base,
+				  const struct ethnl_reply_data *reply_base)
+{
+	struct eeprom_data_reply_data *reply = EEPROM_DATA_REPDATA(reply_base);
+
+	if (nla_put_u32(skb, ETHTOOL_A_EEPROM_DATA_LENGTH, reply->length) ||
+	    nla_put(skb, ETHTOOL_A_EEPROM_DATA, reply->length, reply->data))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static void eeprom_data_cleanup_data(struct ethnl_reply_data *reply_base)
+{
+	struct eeprom_data_reply_data *reply = EEPROM_DATA_REPDATA(reply_base);
+
+	kfree(reply->data);
+}
+
+const struct ethnl_request_ops ethnl_eeprom_data_request_ops = {
+	.request_cmd		= ETHTOOL_MSG_EEPROM_DATA_GET,
+	.reply_cmd		= ETHTOOL_MSG_EEPROM_DATA_GET_REPLY,
+	.hdr_attr		= ETHTOOL_A_EEPROM_DATA_HEADER,
+	.req_info_size		= sizeof(struct eeprom_data_req_info),
+	.reply_data_size	= sizeof(struct eeprom_data_reply_data),
+
+	.parse_request		= eeprom_data_parse_request,
+	.prepare_data		= eeprom_data_prepare_data,
+	.reply_size		= eeprom_data_reply_size,
+	.fill_reply		= eeprom_data_fill_reply,
+	.cleanup_data		= eeprom_data_cleanup_data,
+};
+
+const struct nla_policy ethnl_eeprom_data_get_policy[] = {
+	[ETHTOOL_A_EEPROM_DATA_HEADER]		= NLA_POLICY_NESTED(ethnl_header_policy),
+	[ETHTOOL_A_EEPROM_DATA_OFFSET]		= { .type = NLA_U32 },
+	[ETHTOOL_A_EEPROM_DATA_LENGTH]		= { .type = NLA_U32 },
+	[ETHTOOL_A_EEPROM_DATA_PAGE]		= { .type = NLA_U8 },
+	[ETHTOOL_A_EEPROM_DATA_BANK]		= { .type = NLA_U8 },
+	[ETHTOOL_A_EEPROM_DATA_I2C_ADDRESS]	= { .type = NLA_U8 },
+	[ETHTOOL_A_EEPROM_DATA]			= { .type = NLA_BINARY },
+};
+
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 50d3c8896f91..ff2528bee192 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -245,6 +245,7 @@  ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
 	[ETHTOOL_MSG_PAUSE_GET]		= &ethnl_pause_request_ops,
 	[ETHTOOL_MSG_EEE_GET]		= &ethnl_eee_request_ops,
 	[ETHTOOL_MSG_TSINFO_GET]	= &ethnl_tsinfo_request_ops,
+	[ETHTOOL_MSG_EEPROM_DATA_GET]	= &ethnl_eeprom_data_request_ops,
 };
 
 static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -912,6 +913,15 @@  static const struct genl_ops ethtool_genl_ops[] = {
 		.policy = ethnl_tunnel_info_get_policy,
 		.maxattr = ARRAY_SIZE(ethnl_tunnel_info_get_policy) - 1,
 	},
+	{
+		.cmd	= ETHTOOL_MSG_EEPROM_DATA_GET,
+		.doit	= ethnl_default_doit,
+		.start	= ethnl_default_start,
+		.dumpit	= ethnl_default_dumpit,
+		.done	= ethnl_default_done,
+		.policy = ethnl_eeprom_data_get_policy,
+		.maxattr = ARRAY_SIZE(ethnl_eeprom_data_get_policy) - 1,
+	},
 };
 
 static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 6eabd58d81bf..60954c7b4dfe 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -344,6 +344,7 @@  extern const struct ethnl_request_ops ethnl_coalesce_request_ops;
 extern const struct ethnl_request_ops ethnl_pause_request_ops;
 extern const struct ethnl_request_ops ethnl_eee_request_ops;
 extern const struct ethnl_request_ops ethnl_tsinfo_request_ops;
+extern const struct ethnl_request_ops ethnl_eeprom_data_request_ops;
 
 extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
 extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -375,6 +376,7 @@  extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER +
 extern const struct nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER + 1];
 extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1];
 extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
+extern const struct nla_policy ethnl_eeprom_data_get_policy[ETHTOOL_A_EEPROM_DATA + 1];
 
 int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
 int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);