Message ID | 1717169861-15825-1-git-send-email-shradhagupta@linux.microsoft.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net-next,v3] net: mana: Allow variable size indirection table | expand |
On Fri, May 31, 2024 at 08:37:41AM -0700, Shradha Gupta wrote: > Allow variable size indirection table allocation in MANA instead > of using a constant value MANA_INDIRECT_TABLE_SIZE. > The size is now derived from the MANA_QUERY_VPORT_CONFIG and the > indirection table is allocated dynamically. > > Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com> > Reviewed-by: Dexuan Cui <decui@microsoft.com> > Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com> > --- > Changes in v3: > * Fixed the memory leak(save_table) in mana_set_rxfh() > > Changes in v2: > * Rebased to latest net-next tree > * Rearranged cleanup code in mana_probe_port to avoid extra operations > --- > drivers/infiniband/hw/mana/qp.c | 10 +-- > drivers/net/ethernet/microsoft/mana/mana_en.c | 68 ++++++++++++++++--- > .../ethernet/microsoft/mana/mana_ethtool.c | 27 +++++--- > include/net/mana/gdma.h | 4 +- > include/net/mana/mana.h | 9 +-- > 5 files changed, 89 insertions(+), 29 deletions(-) <...> > +free_indir: > + apc->indir_table_sz = 0; > + kfree(apc->indir_table); > + apc->indir_table = NULL; > + kfree(apc->rxobj_table); > + apc->rxobj_table = NULL; > reset_apc: > kfree(apc->rxqs); > apc->rxqs = NULL; > @@ -2897,6 +2936,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) > { <...> > @@ -2931,6 +2972,11 @@ void mana_remove(struct gdma_dev *gd, bool suspending) > } > > unregister_netdevice(ndev); > + apc->indir_table_sz = 0; > + kfree(apc->indir_table); > + apc->indir_table = NULL; > + kfree(apc->rxobj_table); > + apc->rxobj_table = NULL; Why do you need to NULLify here? Will apc is going to be accessible after call to mana_remove? or port probe failure? Thanks
On Mon, Jun 03, 2024 at 11:41:22AM +0300, Leon Romanovsky wrote: > On Fri, May 31, 2024 at 08:37:41AM -0700, Shradha Gupta wrote: > > Allow variable size indirection table allocation in MANA instead > > of using a constant value MANA_INDIRECT_TABLE_SIZE. > > The size is now derived from the MANA_QUERY_VPORT_CONFIG and the > > indirection table is allocated dynamically. > > > > Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com> > > Reviewed-by: Dexuan Cui <decui@microsoft.com> > > Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com> > > --- > > Changes in v3: > > * Fixed the memory leak(save_table) in mana_set_rxfh() > > > > Changes in v2: > > * Rebased to latest net-next tree > > * Rearranged cleanup code in mana_probe_port to avoid extra operations > > --- > > drivers/infiniband/hw/mana/qp.c | 10 +-- > > drivers/net/ethernet/microsoft/mana/mana_en.c | 68 ++++++++++++++++--- > > .../ethernet/microsoft/mana/mana_ethtool.c | 27 +++++--- > > include/net/mana/gdma.h | 4 +- > > include/net/mana/mana.h | 9 +-- > > 5 files changed, 89 insertions(+), 29 deletions(-) > > <...> > > > +free_indir: > > + apc->indir_table_sz = 0; > > + kfree(apc->indir_table); > > + apc->indir_table = NULL; > > + kfree(apc->rxobj_table); > > + apc->rxobj_table = NULL; > > reset_apc: > > kfree(apc->rxqs); > > apc->rxqs = NULL; > > @@ -2897,6 +2936,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) > > { > > <...> > > > @@ -2931,6 +2972,11 @@ void mana_remove(struct gdma_dev *gd, bool suspending) > > } > > > > unregister_netdevice(ndev); > > + apc->indir_table_sz = 0; > > + kfree(apc->indir_table); > > + apc->indir_table = NULL; > > + kfree(apc->rxobj_table); > > + apc->rxobj_table = NULL; > > Why do you need to NULLify here? Will apc is going to be accessible > after call to mana_remove? or port probe failure? Right, they won't be accessed. This is just for the sake of completeness and to prevent double free in case there are code bug in other place. Regards, Shradha. > > Thanks
On Mon, Jun 03, 2024 at 10:36:48PM -0700, Shradha Gupta wrote: > On Mon, Jun 03, 2024 at 11:41:22AM +0300, Leon Romanovsky wrote: > > On Fri, May 31, 2024 at 08:37:41AM -0700, Shradha Gupta wrote: > > > Allow variable size indirection table allocation in MANA instead > > > of using a constant value MANA_INDIRECT_TABLE_SIZE. > > > The size is now derived from the MANA_QUERY_VPORT_CONFIG and the > > > indirection table is allocated dynamically. > > > > > > Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com> > > > Reviewed-by: Dexuan Cui <decui@microsoft.com> > > > Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com> > > > --- > > > Changes in v3: > > > * Fixed the memory leak(save_table) in mana_set_rxfh() > > > > > > Changes in v2: > > > * Rebased to latest net-next tree > > > * Rearranged cleanup code in mana_probe_port to avoid extra operations > > > --- > > > drivers/infiniband/hw/mana/qp.c | 10 +-- > > > drivers/net/ethernet/microsoft/mana/mana_en.c | 68 ++++++++++++++++--- > > > .../ethernet/microsoft/mana/mana_ethtool.c | 27 +++++--- > > > include/net/mana/gdma.h | 4 +- > > > include/net/mana/mana.h | 9 +-- > > > 5 files changed, 89 insertions(+), 29 deletions(-) > > > > <...> > > > > > +free_indir: > > > + apc->indir_table_sz = 0; > > > + kfree(apc->indir_table); > > > + apc->indir_table = NULL; > > > + kfree(apc->rxobj_table); > > > + apc->rxobj_table = NULL; > > > reset_apc: > > > kfree(apc->rxqs); > > > apc->rxqs = NULL; > > > @@ -2897,6 +2936,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) > > > { > > > > <...> > > > > > @@ -2931,6 +2972,11 @@ void mana_remove(struct gdma_dev *gd, bool suspending) > > > } > > > > > > unregister_netdevice(ndev); > > > + apc->indir_table_sz = 0; > > > + kfree(apc->indir_table); > > > + apc->indir_table = NULL; > > > + kfree(apc->rxobj_table); > > > + apc->rxobj_table = NULL; > > > > Why do you need to NULLify here? Will apc is going to be accessible > > after call to mana_remove? or port probe failure? > Right, they won't be accessed. This is just for the sake of completeness > and to prevent double free in case there are code bug in other place. This coding patter is called defensive programming, which is discouraged in the kernel. You are not preventing double free, but hiding bugs which were possible to be found by various static analysis tools. Please don't do it. Thanks > > Regards, > Shradha. > > > > Thanks
On Tue, Jun 04, 2024 at 11:32:05AM +0300, Leon Romanovsky wrote: > On Mon, Jun 03, 2024 at 10:36:48PM -0700, Shradha Gupta wrote: > > On Mon, Jun 03, 2024 at 11:41:22AM +0300, Leon Romanovsky wrote: > > > On Fri, May 31, 2024 at 08:37:41AM -0700, Shradha Gupta wrote: > > > > Allow variable size indirection table allocation in MANA instead > > > > of using a constant value MANA_INDIRECT_TABLE_SIZE. > > > > The size is now derived from the MANA_QUERY_VPORT_CONFIG and the > > > > indirection table is allocated dynamically. > > > > > > > > Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com> > > > > Reviewed-by: Dexuan Cui <decui@microsoft.com> > > > > Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com> > > > > --- > > > > Changes in v3: > > > > * Fixed the memory leak(save_table) in mana_set_rxfh() > > > > > > > > Changes in v2: > > > > * Rebased to latest net-next tree > > > > * Rearranged cleanup code in mana_probe_port to avoid extra operations > > > > --- > > > > drivers/infiniband/hw/mana/qp.c | 10 +-- > > > > drivers/net/ethernet/microsoft/mana/mana_en.c | 68 ++++++++++++++++--- > > > > .../ethernet/microsoft/mana/mana_ethtool.c | 27 +++++--- > > > > include/net/mana/gdma.h | 4 +- > > > > include/net/mana/mana.h | 9 +-- > > > > 5 files changed, 89 insertions(+), 29 deletions(-) > > > > > > <...> > > > > > > > +free_indir: > > > > + apc->indir_table_sz = 0; > > > > + kfree(apc->indir_table); > > > > + apc->indir_table = NULL; > > > > + kfree(apc->rxobj_table); > > > > + apc->rxobj_table = NULL; > > > > reset_apc: > > > > kfree(apc->rxqs); > > > > apc->rxqs = NULL; > > > > @@ -2897,6 +2936,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) > > > > { > > > > > > <...> > > > > > > > @@ -2931,6 +2972,11 @@ void mana_remove(struct gdma_dev *gd, bool suspending) > > > > } > > > > > > > > unregister_netdevice(ndev); > > > > + apc->indir_table_sz = 0; > > > > + kfree(apc->indir_table); > > > > + apc->indir_table = NULL; > > > > + kfree(apc->rxobj_table); > > > > + apc->rxobj_table = NULL; > > > > > > Why do you need to NULLify here? Will apc is going to be accessible > > > after call to mana_remove? or port probe failure? > > Right, they won't be accessed. This is just for the sake of completeness > > and to prevent double free in case there are code bug in other place. > > This coding patter is called defensive programming, which is discouraged > in the kernel. You are not preventing double free, but hiding bugs which > were possible to be found by various static analysis tools. > > Please don't do it. > > Thanks Understood, it makes sense. Let me fix this in the next version. Regards, Shradha > > > > > Regards, > > Shradha. > > > > > > Thanks
On Fri, May 31, 2024 at 08:37:41AM -0700, Shradha Gupta wrote: > Allow variable size indirection table allocation in MANA instead > of using a constant value MANA_INDIRECT_TABLE_SIZE. > The size is now derived from the MANA_QUERY_VPORT_CONFIG and the > indirection table is allocated dynamically. > > Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com> > Reviewed-by: Dexuan Cui <decui@microsoft.com> > Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com> ... > diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c ... > @@ -2344,11 +2352,33 @@ static int mana_create_vport(struct mana_port_context *apc, > return mana_create_txq(apc, net); > } > > +static int mana_rss_table_alloc(struct mana_port_context *apc) > +{ > + if (!apc->indir_table_sz) { > + netdev_err(apc->ndev, > + "Indirection table size not set for vPort %d\n", > + apc->port_idx); > + return -EINVAL; > + } > + > + apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); > + if (!apc->indir_table) > + return -ENOMEM; > + > + apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL); > + if (!apc->rxobj_table) { > + kfree(apc->indir_table); Hi, Shradha Perhaps I am on the wrong track here, but I have some concerns about clean-up paths. Firstly. I think that apc->indir_table should be to NULL here for consistency with other clean-up paths. Or alternatively, fields of apc should not set to NULL elsewhere after being freed. In looking into this I noticed that mana_probe() does not call mana_remove() or return an error in the cases where mana_probe_port() or mana_attach() fail unless add_adev also fails. If so, is that intentional? In any case, I would suggest as a follow-up, arranging things so that when an error occurs in a function, anything that was allocated is unwound before returning an error. I think this would make allocation/deallocation easier to reason with. And I suspect it would avoid both the need for fields of structures to be zeroed after being freed, and the need to call mana_remove() from mana_probe(). > + return -ENOMEM; > + } > + > + return 0; > +} > + > static void mana_rss_table_init(struct mana_port_context *apc) > { > int i; > > - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) > + for (i = 0; i < apc->indir_table_sz; i++) > apc->indir_table[i] = > ethtool_rxfh_indir_default(i, apc->num_queues); > } ... > @@ -2739,11 +2772,17 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, > err = register_netdev(ndev); > if (err) { > netdev_err(ndev, "Unable to register netdev.\n"); > - goto reset_apc; > + goto free_indir; > } > > return 0; > > +free_indir: > + apc->indir_table_sz = 0; > + kfree(apc->indir_table); > + apc->indir_table = NULL; > + kfree(apc->rxobj_table); > + apc->rxobj_table = NULL; > reset_apc: > kfree(apc->rxqs); > apc->rxqs = NULL; nit: Not strictly related to this patch, but the reset_apc code should probably be a call to mana_cleanup_port_context() as it is the dual of mana_init_port_context() which is called earlier in mana_probe_port() ... > @@ -2931,6 +2972,11 @@ void mana_remove(struct gdma_dev *gd, bool suspending) > } > > unregister_netdevice(ndev); > + apc->indir_table_sz = 0; > + kfree(apc->indir_table); > + apc->indir_table = NULL; > + kfree(apc->rxobj_table); > + apc->rxobj_table = NULL; The code to free and zero indir_table_sz and indir_table appears twice in this patch. Perhaps a helper to do this, which would be the dual of mana_rss_table_alloc is in order. > > rtnl_unlock(); > ...
On Tue, Jun 04, 2024 at 10:33:49AM +0100, Simon Horman wrote: > On Fri, May 31, 2024 at 08:37:41AM -0700, Shradha Gupta wrote: > > Allow variable size indirection table allocation in MANA instead > > of using a constant value MANA_INDIRECT_TABLE_SIZE. > > The size is now derived from the MANA_QUERY_VPORT_CONFIG and the > > indirection table is allocated dynamically. > > > > Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com> > > Reviewed-by: Dexuan Cui <decui@microsoft.com> > > Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com> > > ... > > > diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c > > ... > > > @@ -2344,11 +2352,33 @@ static int mana_create_vport(struct mana_port_context *apc, > > return mana_create_txq(apc, net); > > } > > > > +static int mana_rss_table_alloc(struct mana_port_context *apc) > > +{ > > + if (!apc->indir_table_sz) { > > + netdev_err(apc->ndev, > > + "Indirection table size not set for vPort %d\n", > > + apc->port_idx); > > + return -EINVAL; > > + } > > + > > + apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); > > + if (!apc->indir_table) > > + return -ENOMEM; > > + > > + apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL); > > + if (!apc->rxobj_table) { > > + kfree(apc->indir_table); > > Hi, Shradha > > Perhaps I am on the wrong track here, but I have some concerns > about clean-up paths. > > Firstly. I think that apc->indir_table should be to NULL here for > consistency with other clean-up paths. Or alternatively, fields of apc > should not set to NULL elsewhere after being freed. Hi Simon, Thanks for the comments. This makes sense, I am planning of consistently removing the NULLify from other places too as per Leon's comments. > > In looking into this I noticed that mana_probe() does not call > mana_remove() or return an error in the cases where mana_probe_port() or > mana_attach() fail unless add_adev also fails. If so, is that intentional? Right, so most calls like mana_probe_port(), mana_attach() cleanup after themselves in the code if there is any error. So, not having to call mana_remove() in these cases in mana_probe() is intentional. But I do agree that an error is returned in mana_probe() only if add_adev also fails. I'll fix that too in the next version > > In any case, I would suggest as a follow-up, arranging things so that when > an error occurs in a function, anything that was allocated is unwound > before returning an error. > > I think this would make allocation/deallocation easier to reason with. > And I suspect it would avoid both the need for fields of structures to be > zeroed after being freed, and the need to call mana_remove() from > mana_probe(). Agreed > > > + return -ENOMEM; > > + } > > + > > + return 0; > > +} > > + > > static void mana_rss_table_init(struct mana_port_context *apc) > > { > > int i; > > > > - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) > > + for (i = 0; i < apc->indir_table_sz; i++) > > apc->indir_table[i] = > > ethtool_rxfh_indir_default(i, apc->num_queues); > > } > > ... > > > @@ -2739,11 +2772,17 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, > > err = register_netdev(ndev); > > if (err) { > > netdev_err(ndev, "Unable to register netdev.\n"); > > - goto reset_apc; > > + goto free_indir; > > } > > > > return 0; > > > > +free_indir: > > + apc->indir_table_sz = 0; > > + kfree(apc->indir_table); > > + apc->indir_table = NULL; > > + kfree(apc->rxobj_table); > > + apc->rxobj_table = NULL; > > reset_apc: > > kfree(apc->rxqs); > > apc->rxqs = NULL; > > nit: Not strictly related to this patch, but the reset_apc code should > probably be a call to mana_cleanup_port_context() as it is the dual of > mana_init_port_context() which is called earlier in mana_probe_port() Sure, let me do that too. > > ... > > > @@ -2931,6 +2972,11 @@ void mana_remove(struct gdma_dev *gd, bool suspending) > > } > > > > unregister_netdevice(ndev); > > + apc->indir_table_sz = 0; > > + kfree(apc->indir_table); > > + apc->indir_table = NULL; > > + kfree(apc->rxobj_table); > > + apc->rxobj_table = NULL; > > The code to free and zero indir_table_sz and indir_table appears twice > in this patch. Perhaps a helper to do this, which would be the dual > of mana_rss_table_alloc is in order. Makes sense, will change this too. Thanks, Shradha. > > > > > rtnl_unlock(); > > > > ...
On Wed, Jun 05, 2024 at 01:39:06AM -0700, Shradha Gupta wrote: > On Tue, Jun 04, 2024 at 10:33:49AM +0100, Simon Horman wrote: > > On Fri, May 31, 2024 at 08:37:41AM -0700, Shradha Gupta wrote: > > > Allow variable size indirection table allocation in MANA instead > > > of using a constant value MANA_INDIRECT_TABLE_SIZE. > > > The size is now derived from the MANA_QUERY_VPORT_CONFIG and the > > > indirection table is allocated dynamically. > > > > > > Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com> > > > Reviewed-by: Dexuan Cui <decui@microsoft.com> > > > Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com> > > > > ... > > > > > diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c > > > > ... > > > > > @@ -2344,11 +2352,33 @@ static int mana_create_vport(struct mana_port_context *apc, > > > return mana_create_txq(apc, net); > > > } > > > > > > +static int mana_rss_table_alloc(struct mana_port_context *apc) > > > +{ > > > + if (!apc->indir_table_sz) { > > > + netdev_err(apc->ndev, > > > + "Indirection table size not set for vPort %d\n", > > > + apc->port_idx); > > > + return -EINVAL; > > > + } > > > + > > > + apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); > > > + if (!apc->indir_table) > > > + return -ENOMEM; > > > + > > > + apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL); > > > + if (!apc->rxobj_table) { > > > + kfree(apc->indir_table); > > > > Hi, Shradha > > > > Perhaps I am on the wrong track here, but I have some concerns > > about clean-up paths. > > > > Firstly. I think that apc->indir_table should be to NULL here for > > consistency with other clean-up paths. Or alternatively, fields of apc > > should not set to NULL elsewhere after being freed. > > Hi Simon, > > Thanks for the comments. This makes sense, I am planning of consistently > removing the NULLify from other places too as per Leon's comments. Great! > > In looking into this I noticed that mana_probe() does not call > > mana_remove() or return an error in the cases where mana_probe_port() > > or mana_attach() fail unless add_adev also fails. If so, is that > > intentional? > > Right, so most calls like mana_probe_port(), mana_attach() cleanup after > themselves in the code if there is any error. So, not having to call > mana_remove() in these cases in mana_probe() is intentional. But I do > agree that an error is returned in mana_probe() only if add_adev also > fails. I'll fix that too in the next version I'm not entirely sure, but perhaps that is a candidate for a separate patch. > > > > In any case, I would suggest as a follow-up, arranging things so that > > when an error occurs in a function, anything that was allocated is > > unwound before returning an error. > > > > I think this would make allocation/deallocation easier to reason with. > > And I suspect it would avoid both the need for fields of structures to > > be zeroed after being freed, and the need to call mana_remove() from > > mana_probe(). > > Agreed > > > > > + return -ENOMEM; > > > + } > > > + > > > + return 0; > > > +} > > > + > > > static void mana_rss_table_init(struct mana_port_context *apc) > > > { > > > int i; > > > > > > - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) > > > + for (i = 0; i < apc->indir_table_sz; i++) > > > apc->indir_table[i] = > > > ethtool_rxfh_indir_default(i, apc->num_queues); > > > } > > > > ... > > > > > @@ -2739,11 +2772,17 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, > > > err = register_netdev(ndev); > > > if (err) { > > > netdev_err(ndev, "Unable to register netdev.\n"); > > > - goto reset_apc; > > > + goto free_indir; > > > } > > > > > > return 0; > > > > > > +free_indir: > > > + apc->indir_table_sz = 0; > > > + kfree(apc->indir_table); > > > + apc->indir_table = NULL; > > > + kfree(apc->rxobj_table); > > > + apc->rxobj_table = NULL; > > > reset_apc: > > > kfree(apc->rxqs); > > > apc->rxqs = NULL; > > > > nit: Not strictly related to this patch, but the reset_apc code should > > probably be a call to mana_cleanup_port_context() as it is the dual of > > mana_init_port_context() which is called earlier in mana_probe_port() > > Sure, let me do that too. FWIIW, I think it would be appropriate to put that change in a separate patch. > > > > ... > > > > > @@ -2931,6 +2972,11 @@ void mana_remove(struct gdma_dev *gd, bool suspending) > > > } > > > > > > unregister_netdevice(ndev); > > > + apc->indir_table_sz = 0; > > > + kfree(apc->indir_table); > > > + apc->indir_table = NULL; > > > + kfree(apc->rxobj_table); > > > + apc->rxobj_table = NULL; > > > > The code to free and zero indir_table_sz and indir_table appears twice > > in this patch. Perhaps a helper to do this, which would be the dual > > of mana_rss_table_alloc is in order. > Makes sense, will change this too. Thanks.
On Thu, Jun 06, 2024 at 05:33:34PM +0100, Simon Horman wrote: > On Wed, Jun 05, 2024 at 01:39:06AM -0700, Shradha Gupta wrote: > > On Tue, Jun 04, 2024 at 10:33:49AM +0100, Simon Horman wrote: > > > On Fri, May 31, 2024 at 08:37:41AM -0700, Shradha Gupta wrote: > > > > Allow variable size indirection table allocation in MANA instead > > > > of using a constant value MANA_INDIRECT_TABLE_SIZE. > > > > The size is now derived from the MANA_QUERY_VPORT_CONFIG and the > > > > indirection table is allocated dynamically. > > > > > > > > Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com> > > > > Reviewed-by: Dexuan Cui <decui@microsoft.com> > > > > Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com> > > > > > > ... > > > > > > > diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c > > > > > > ... > > > > > > > @@ -2344,11 +2352,33 @@ static int mana_create_vport(struct mana_port_context *apc, > > > > return mana_create_txq(apc, net); > > > > } > > > > > > > > +static int mana_rss_table_alloc(struct mana_port_context *apc) > > > > +{ > > > > + if (!apc->indir_table_sz) { > > > > + netdev_err(apc->ndev, > > > > + "Indirection table size not set for vPort %d\n", > > > > + apc->port_idx); > > > > + return -EINVAL; > > > > + } > > > > + > > > > + apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); > > > > + if (!apc->indir_table) > > > > + return -ENOMEM; > > > > + > > > > + apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL); > > > > + if (!apc->rxobj_table) { > > > > + kfree(apc->indir_table); > > > > > > Hi, Shradha > > > > > > Perhaps I am on the wrong track here, but I have some concerns > > > about clean-up paths. > > > > > > Firstly. I think that apc->indir_table should be to NULL here for > > > consistency with other clean-up paths. Or alternatively, fields of apc > > > should not set to NULL elsewhere after being freed. > > > > Hi Simon, > > > > Thanks for the comments. This makes sense, I am planning of consistently > > removing the NULLify from other places too as per Leon's comments. > > Great! > > > > In looking into this I noticed that mana_probe() does not call > > > mana_remove() or return an error in the cases where mana_probe_port() > > > or mana_attach() fail unless add_adev also fails. If so, is that > > > intentional? > > > > Right, so most calls like mana_probe_port(), mana_attach() cleanup after > > themselves in the code if there is any error. So, not having to call > > mana_remove() in these cases in mana_probe() is intentional. But I do > > agree that an error is returned in mana_probe() only if add_adev also > > fails. I'll fix that too in the next version > > I'm not entirely sure, but perhaps that is a candidate for a separate patch. > > > > > > > In any case, I would suggest as a follow-up, arranging things so that > > > when an error occurs in a function, anything that was allocated is > > > unwound before returning an error. > > > > > > I think this would make allocation/deallocation easier to reason with. > > > And I suspect it would avoid both the need for fields of structures to > > > be zeroed after being freed, and the need to call mana_remove() from > > > mana_probe(). > > > > Agreed > > > > > > > + return -ENOMEM; > > > > + } > > > > + > > > > + return 0; > > > > +} > > > > + > > > > static void mana_rss_table_init(struct mana_port_context *apc) > > > > { > > > > int i; > > > > > > > > - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) > > > > + for (i = 0; i < apc->indir_table_sz; i++) > > > > apc->indir_table[i] = > > > > ethtool_rxfh_indir_default(i, apc->num_queues); > > > > } > > > > > > ... > > > > > > > @@ -2739,11 +2772,17 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, > > > > err = register_netdev(ndev); > > > > if (err) { > > > > netdev_err(ndev, "Unable to register netdev.\n"); > > > > - goto reset_apc; > > > > + goto free_indir; > > > > } > > > > > > > > return 0; > > > > > > > > +free_indir: > > > > + apc->indir_table_sz = 0; > > > > + kfree(apc->indir_table); > > > > + apc->indir_table = NULL; > > > > + kfree(apc->rxobj_table); > > > > + apc->rxobj_table = NULL; > > > > reset_apc: > > > > kfree(apc->rxqs); > > > > apc->rxqs = NULL; > > > > > > nit: Not strictly related to this patch, but the reset_apc code should > > > probably be a call to mana_cleanup_port_context() as it is the dual of > > > mana_init_port_context() which is called earlier in mana_probe_port() > > > > Sure, let me do that too. > > FWIIW, I think it would be appropriate to put that change in a separate patch. Fixing this and other similar changes in a different patch. Thanks > > > > > > > ... > > > > > > > @@ -2931,6 +2972,11 @@ void mana_remove(struct gdma_dev *gd, bool suspending) > > > > } > > > > > > > > unregister_netdevice(ndev); > > > > + apc->indir_table_sz = 0; > > > > + kfree(apc->indir_table); > > > > + apc->indir_table = NULL; > > > > + kfree(apc->rxobj_table); > > > > + apc->rxobj_table = NULL; > > > > > > The code to free and zero indir_table_sz and indir_table appears twice > > > in this patch. Perhaps a helper to do this, which would be the dual > > > of mana_rss_table_alloc is in order. > > Makes sense, will change this too. > > Thanks.
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index ba13c5abf8ef..2d411a16a127 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -21,7 +21,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, gc = mdev_to_gc(dev); - req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_SIZE); + req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) return -ENOMEM; @@ -41,18 +41,18 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, if (log_ind_tbl_size) req->rss_enable = true; - req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE; + req->num_indir_entries = MANA_INDIRECT_TABLE_DEF_SIZE; req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2, indir_tab); req->update_indir_tab = true; req->cqe_coalescing_enable = 1; /* The ind table passed to the hardware must have - * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb + * MANA_INDIRECT_TABLE_DEF_SIZE entries. Adjust the verb * ind_table to MANA_INDIRECT_TABLE_SIZE if required */ ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size); - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + for (i = 0; i < MANA_INDIRECT_TABLE_DEF_SIZE; i++) { req->indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)]; ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i, req->indir_tab[i]); @@ -137,7 +137,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, } ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size; - if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) { + if (ind_tbl_size > MANA_INDIRECT_TABLE_DEF_SIZE) { ibdev_dbg(&mdev->ib_dev, "Indirect table size %d exceeding limit\n", ind_tbl_size); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index d087cf954f75..851e1b9761b3 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -481,7 +481,7 @@ static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, struct sock *sk = skb->sk; int txq; - txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK]; + txq = apc->indir_table[hash & (apc->indir_table_sz - 1)]; if (txq != old_q && sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) @@ -962,7 +962,16 @@ static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, *max_sq = resp.max_num_sq; *max_rq = resp.max_num_rq; - *num_indir_entry = resp.num_indirection_ent; + if (resp.num_indirection_ent > 0 && + resp.num_indirection_ent <= MANA_INDIRECT_TABLE_MAX_SIZE && + is_power_of_2(resp.num_indirection_ent)) { + *num_indir_entry = resp.num_indirection_ent; + } else { + netdev_warn(apc->ndev, + "Setting indirection table size to default %d for vPort %d\n", + MANA_INDIRECT_TABLE_DEF_SIZE, apc->port_idx); + *num_indir_entry = MANA_INDIRECT_TABLE_DEF_SIZE; + } apc->port_handle = resp.vport; ether_addr_copy(apc->mac_addr, resp.mac_addr); @@ -1054,14 +1063,13 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, bool update_default_rxobj, bool update_key, bool update_tab) { - u16 num_entries = MANA_INDIRECT_TABLE_SIZE; struct mana_cfg_rx_steer_req_v2 *req; struct mana_cfg_rx_steer_resp resp = {}; struct net_device *ndev = apc->ndev; u32 req_buf_size; int err; - req_buf_size = struct_size(req, indir_tab, num_entries); + req_buf_size = struct_size(req, indir_tab, apc->indir_table_sz); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) return -ENOMEM; @@ -1072,7 +1080,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, req->hdr.req.msg_version = GDMA_MESSAGE_V2; req->vport = apc->port_handle; - req->num_indir_entries = num_entries; + req->num_indir_entries = apc->indir_table_sz; req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2, indir_tab); req->rx_enable = rx; @@ -1111,7 +1119,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, } netdev_info(ndev, "Configured steering vPort %llu entries %u\n", - apc->port_handle, num_entries); + apc->port_handle, apc->indir_table_sz); out: kfree(req); return err; @@ -2344,11 +2352,33 @@ static int mana_create_vport(struct mana_port_context *apc, return mana_create_txq(apc, net); } +static int mana_rss_table_alloc(struct mana_port_context *apc) +{ + if (!apc->indir_table_sz) { + netdev_err(apc->ndev, + "Indirection table size not set for vPort %d\n", + apc->port_idx); + return -EINVAL; + } + + apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); + if (!apc->indir_table) + return -ENOMEM; + + apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL); + if (!apc->rxobj_table) { + kfree(apc->indir_table); + return -ENOMEM; + } + + return 0; +} + static void mana_rss_table_init(struct mana_port_context *apc) { int i; - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) apc->indir_table[i] = ethtool_rxfh_indir_default(i, apc->num_queues); } @@ -2361,7 +2391,7 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, int i; if (update_tab) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + for (i = 0; i < apc->indir_table_sz; i++) { queue_idx = apc->indir_table[i]; apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; } @@ -2466,7 +2496,6 @@ static int mana_init_port(struct net_device *ndev) struct mana_port_context *apc = netdev_priv(ndev); u32 max_txq, max_rxq, max_queues; int port_idx = apc->port_idx; - u32 num_indirect_entries; int err; err = mana_init_port_context(apc); @@ -2474,7 +2503,7 @@ static int mana_init_port(struct net_device *ndev) return err; err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, - &num_indirect_entries); + &apc->indir_table_sz); if (err) { netdev_err(ndev, "Failed to query info for vPort %d\n", port_idx); @@ -2723,6 +2752,10 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, if (err) goto free_net; + err = mana_rss_table_alloc(apc); + if (err) + goto reset_apc; + netdev_lockdep_set_classes(ndev); ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; @@ -2739,11 +2772,17 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, err = register_netdev(ndev); if (err) { netdev_err(ndev, "Unable to register netdev.\n"); - goto reset_apc; + goto free_indir; } return 0; +free_indir: + apc->indir_table_sz = 0; + kfree(apc->indir_table); + apc->indir_table = NULL; + kfree(apc->rxobj_table); + apc->rxobj_table = NULL; reset_apc: kfree(apc->rxqs); apc->rxqs = NULL; @@ -2897,6 +2936,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) { struct gdma_context *gc = gd->gdma_context; struct mana_context *ac = gd->driver_data; + struct mana_port_context *apc; struct device *dev = gc->dev; struct net_device *ndev; int err; @@ -2908,6 +2948,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) for (i = 0; i < ac->num_ports; i++) { ndev = ac->ports[i]; + apc = netdev_priv(ndev); if (!ndev) { if (i == 0) dev_err(dev, "No net device to remove\n"); @@ -2931,6 +2972,11 @@ void mana_remove(struct gdma_dev *gd, bool suspending) } unregister_netdevice(ndev); + apc->indir_table_sz = 0; + kfree(apc->indir_table); + apc->indir_table = NULL; + kfree(apc->rxobj_table); + apc->rxobj_table = NULL; rtnl_unlock(); diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index ab2413d71f6c..146d5db1792f 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -245,7 +245,9 @@ static u32 mana_get_rxfh_key_size(struct net_device *ndev) static u32 mana_rss_indir_size(struct net_device *ndev) { - return MANA_INDIRECT_TABLE_SIZE; + struct mana_port_context *apc = netdev_priv(ndev); + + return apc->indir_table_sz; } static int mana_get_rxfh(struct net_device *ndev, @@ -257,7 +259,7 @@ static int mana_get_rxfh(struct net_device *ndev, rxfh->hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ if (rxfh->indir) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) rxfh->indir[i] = apc->indir_table[i]; } @@ -273,8 +275,8 @@ static int mana_set_rxfh(struct net_device *ndev, { struct mana_port_context *apc = netdev_priv(ndev); bool update_hash = false, update_table = false; - u32 save_table[MANA_INDIRECT_TABLE_SIZE]; u8 save_key[MANA_HASH_KEY_SIZE]; + u32 *save_table; int i, err; if (!apc->port_is_up) @@ -284,13 +286,19 @@ static int mana_set_rxfh(struct net_device *ndev, rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; + save_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); + if (!save_table) + return -ENOMEM; + if (rxfh->indir) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) - if (rxfh->indir[i] >= apc->num_queues) - return -EINVAL; + for (i = 0; i < apc->indir_table_sz; i++) + if (rxfh->indir[i] >= apc->num_queues) { + err = -EINVAL; + goto cleanup; + } update_table = true; - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + for (i = 0; i < apc->indir_table_sz; i++) { save_table[i] = apc->indir_table[i]; apc->indir_table[i] = rxfh->indir[i]; } @@ -306,7 +314,7 @@ static int mana_set_rxfh(struct net_device *ndev, if (err) { /* recover to original values */ if (update_table) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) apc->indir_table[i] = save_table[i]; } @@ -316,6 +324,9 @@ static int mana_set_rxfh(struct net_device *ndev, mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table); } +cleanup: + kfree(save_table); + return err; } diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 27684135bb4d..c547756c4284 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -543,11 +543,13 @@ enum { */ #define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2) #define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3) +#define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5) #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ - GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG) + GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \ + GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT) #define GDMA_DRV_CAP_FLAGS2 0 diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 561f6719fb4e..59823901b74f 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -30,8 +30,8 @@ enum TRI_STATE { }; /* Number of entries for hardware indirection table must be in power of 2 */ -#define MANA_INDIRECT_TABLE_SIZE 64 -#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1) +#define MANA_INDIRECT_TABLE_MAX_SIZE 512 +#define MANA_INDIRECT_TABLE_DEF_SIZE 64 /* The Toeplitz hash key's length in bytes: should be multiple of 8 */ #define MANA_HASH_KEY_SIZE 40 @@ -410,10 +410,11 @@ struct mana_port_context { struct mana_tx_qp *tx_qp; /* Indirection Table for RX & TX. The values are queue indexes */ - u32 indir_table[MANA_INDIRECT_TABLE_SIZE]; + u32 *indir_table; + u32 indir_table_sz; /* Indirection table containing RxObject Handles */ - mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE]; + mana_handle_t *rxobj_table; /* Hash key used by the NIC */ u8 hashkey[MANA_HASH_KEY_SIZE];