Message ID | 20240324135522.108564-6-guwen@linux.alibaba.com (mailing list archive) |
---|---|
State | RFC |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net/smc: SMC intra-OS shortcut with loopback-ism | expand |
On Sun, 2024-03-24 at 21:55 +0800, Wen Gu wrote: When I instrumented this to see, why I still see tons of my other temporary instrumentation messages from the "ism" driver, I found that in my setup loopback-ism is used rather infrequently. I suspect this is due to how the SMC proposals are constructed in net/smc/af_smc.c and net/smc/smc_pnet.c - and later evaluated in smc_check_ism_v2_match() - where there is a first-come-first-serve selection. I wonder if one should change that to favour loopback-ism over "real" ISM devices - and how this could be achieved elegantly. Just some food for thought... Probably little you can do on x86. Thanks, Gerd > +static int smc_lo_register_dmb(struct smcd_dev *smcd, struct > smcd_dmb *dmb, > + void *client_priv) > +{ > + struct smc_lo_dmb_node *dmb_node, *tmp_node; > + struct smc_lo_dev *ldev = smcd->priv; > + int sba_idx, rc; > + > + /* check space for new dmb */ > + for_each_clear_bit(sba_idx, ldev->sba_idx_mask, > SMC_LO_MAX_DMBS) { > + if (!test_and_set_bit(sba_idx, ldev->sba_idx_mask)) > + break; > + } > + if (sba_idx == SMC_LO_MAX_DMBS) > + return -ENOSPC; > + > + dmb_node = kzalloc(sizeof(*dmb_node), GFP_KERNEL); > + if (!dmb_node) { > + rc = -ENOMEM; > + goto err_bit; > + } > + > + dmb_node->sba_idx = sba_idx; > + dmb_node->len = dmb->dmb_len; > + dmb_node->cpu_addr = kzalloc(dmb_node->len, GFP_KERNEL | > + __GFP_NOWARN | __GFP_NORETRY | > + __GFP_NOMEMALLOC); > + if (!dmb_node->cpu_addr) { > + rc = -ENOMEM; > + goto err_node; > + } > + dmb_node->dma_addr = SMC_DMA_ADDR_INVALID; > + > +again: > + /* add new dmb into hash table */ > + get_random_bytes(&dmb_node->token, sizeof(dmb_node->token)); > + write_lock_bh(&ldev->dmb_ht_lock); > + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, > dmb_node->token) { > + if (tmp_node->token == dmb_node->token) { > + write_unlock_bh(&ldev->dmb_ht_lock); > + goto again; > + } > + } > + hash_add(ldev->dmb_ht, &dmb_node->list, dmb_node->token); > + write_unlock_bh(&ldev->dmb_ht_lock); > + > + dmb->sba_idx = dmb_node->sba_idx; > + dmb->dmb_tok = dmb_node->token; > + dmb->cpu_addr = dmb_node->cpu_addr; > + dmb->dma_addr = dmb_node->dma_addr; > + dmb->dmb_len = dmb_node->len; > + > + return 0; > + > +err_node: > + kfree(dmb_node); > +err_bit: > + clear_bit(sba_idx, ldev->sba_idx_mask); > + return rc; > +} > + > +static int smc_lo_unregister_dmb(struct smcd_dev *smcd, struct > smcd_dmb *dmb) > +{ > + struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node; > + struct smc_lo_dev *ldev = smcd->priv; > + > + /* remove dmb from hash table */ > + write_lock_bh(&ldev->dmb_ht_lock); > + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb- > >dmb_tok) { > + if (tmp_node->token == dmb->dmb_tok) { > + dmb_node = tmp_node; > + break; > + } > + } > + if (!dmb_node) { > + write_unlock_bh(&ldev->dmb_ht_lock); > + return -EINVAL; > + } > + hash_del(&dmb_node->list); > + write_unlock_bh(&ldev->dmb_ht_lock); > + > + clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask); > + kfree(dmb_node->cpu_addr); > + kfree(dmb_node); > + > + return 0; > +} > + > static int smc_lo_add_vlan_id(struct smcd_dev *smcd, u64 vlan_id) > { > return -EOPNOTSUPP; > @@ -75,6 +164,38 @@ static int smc_lo_signal_event(struct smcd_dev > *dev, struct smcd_gid *rgid, > return 0; > } > > +static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok, > + unsigned int idx, bool sf, unsigned int > offset, > + void *data, unsigned int size) > +{ > + struct smc_lo_dmb_node *rmb_node = NULL, *tmp_node; > + struct smc_lo_dev *ldev = smcd->priv; > + struct smc_connection *conn; > + > + read_lock_bh(&ldev->dmb_ht_lock); > + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, > dmb_tok) { > + if (tmp_node->token == dmb_tok) { > + rmb_node = tmp_node; > + break; > + } > + } > + if (!rmb_node) { > + read_unlock_bh(&ldev->dmb_ht_lock); > + return -EINVAL; > + } > + memcpy((char *)rmb_node->cpu_addr + offset, data, size); > + read_unlock_bh(&ldev->dmb_ht_lock); > + > + if (sf) { > + conn = smcd->conn[rmb_node->sba_idx]; > + if (conn && !conn->killed) > + tasklet_schedule(&conn->rx_tsklet); > + else > + return -EPIPE; > + } > + return 0; > +} > + > static int smc_lo_supports_v2(void) > { > return SMC_LO_V2_CAPABLE; > @@ -101,14 +222,14 @@ static struct device *smc_lo_get_dev(struct > smcd_dev *smcd) > > static const struct smcd_ops lo_ops = { > .query_remote_gid = smc_lo_query_rgid, > - .register_dmb = NULL, > - .unregister_dmb = NULL, > + .register_dmb = smc_lo_register_dmb, > + .unregister_dmb = smc_lo_unregister_dmb, > .add_vlan_id = smc_lo_add_vlan_id, > .del_vlan_id = smc_lo_del_vlan_id, > .set_vlan_required = smc_lo_set_vlan_required, > .reset_vlan_required = smc_lo_reset_vlan_required, > .signal_event = smc_lo_signal_event, > - .move_data = NULL, > + .move_data = smc_lo_move_data, > .supports_v2 = smc_lo_supports_v2, > .get_local_gid = smc_lo_get_local_gid, > .get_chid = smc_lo_get_chid, > @@ -173,6 +294,8 @@ static void smcd_lo_unregister_dev(struct > smc_lo_dev *ldev) > static int smc_lo_dev_init(struct smc_lo_dev *ldev) > { > smc_lo_generate_ids(ldev); > + rwlock_init(&ldev->dmb_ht_lock); > + hash_init(ldev->dmb_ht); > return smcd_lo_register_dev(ldev); > } > > diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h > index 11868e5ac732..6c4a390430f3 100644 > --- a/net/smc/smc_loopback.h > +++ b/net/smc/smc_loopback.h > @@ -20,13 +20,26 @@ > > #if IS_ENABLED(CONFIG_SMC_LO) > #define SMC_LO_MAX_DMBS 5000 > +#define SMC_LO_DMBS_HASH_BITS 12 > #define SMC_LO_RESERVED_CHID 0xFFFF > > +struct smc_lo_dmb_node { > + struct hlist_node list; > + u64 token; > + u32 len; > + u32 sba_idx; > + void *cpu_addr; > + dma_addr_t dma_addr; > +}; > + > struct smc_lo_dev { > struct smcd_dev *smcd; > struct device dev; > u16 chid; > struct smcd_gid local_gid; > + rwlock_t dmb_ht_lock; > + DECLARE_BITMAP(sba_idx_mask, SMC_LO_MAX_DMBS); > + DECLARE_HASHTABLE(dmb_ht, SMC_LO_DMBS_HASH_BITS); > }; > #endif >
On 2024/4/4 01:20, Gerd Bayer wrote: > On Sun, 2024-03-24 at 21:55 +0800, Wen Gu wrote: > > When I instrumented this to see, why I still see tons of my other > temporary instrumentation messages from the "ism" driver, I found that > in my setup loopback-ism is used rather infrequently. > > I suspect this is due to how the SMC proposals are constructed in > net/smc/af_smc.c and net/smc/smc_pnet.c - and later evaluated in > smc_check_ism_v2_match() - where there is a first-come-first-serve > selection. > > I wonder if one should change that to favour loopback-ism over "real" > ISM devices - and how this could be achieved elegantly. > > Just some food for thought... Probably little you can do on x86. > Yes, it is about the priority of available ISM devices, and now it is decided by their order in the smcd_dev_list. The later registered ISMv2 devices(without pnetid) will be added to the beginning of the list (see smcd_register_dev()). So there is a probability that loopback-ism will not be ranked first, since it is added into list earlier during smc_init(). If we have the runtime switch of loopback-ism, we can re-active the loopback-ism, that make it be re-added into the beginning of the dev list and be chosen first. Or a new netlink command to adjust the slot order of available ISM devices in the list. As we discussed before, that could be tasks in stage 1 or stage 2. Thanks! > Thanks, > Gerd > >> +static int smc_lo_register_dmb(struct smcd_dev *smcd, struct >> smcd_dmb *dmb, >> + void *client_priv) >> +{ >> + struct smc_lo_dmb_node *dmb_node, *tmp_node; >> + struct smc_lo_dev *ldev = smcd->priv; >> + int sba_idx, rc; >> + >> + /* check space for new dmb */ >> + for_each_clear_bit(sba_idx, ldev->sba_idx_mask, >> SMC_LO_MAX_DMBS) { >> + if (!test_and_set_bit(sba_idx, ldev->sba_idx_mask)) >> + break; >> + } >> + if (sba_idx == SMC_LO_MAX_DMBS) >> + return -ENOSPC; >> + >> + dmb_node = kzalloc(sizeof(*dmb_node), GFP_KERNEL); >> + if (!dmb_node) { >> + rc = -ENOMEM; >> + goto err_bit; >> + } >> + >> + dmb_node->sba_idx = sba_idx; >> + dmb_node->len = dmb->dmb_len; >> + dmb_node->cpu_addr = kzalloc(dmb_node->len, GFP_KERNEL | >> + __GFP_NOWARN | __GFP_NORETRY | >> + __GFP_NOMEMALLOC); >> + if (!dmb_node->cpu_addr) { >> + rc = -ENOMEM; >> + goto err_node; >> + } >> + dmb_node->dma_addr = SMC_DMA_ADDR_INVALID; >> + >> +again: >> + /* add new dmb into hash table */ >> + get_random_bytes(&dmb_node->token, sizeof(dmb_node->token)); >> + write_lock_bh(&ldev->dmb_ht_lock); >> + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, >> dmb_node->token) { >> + if (tmp_node->token == dmb_node->token) { >> + write_unlock_bh(&ldev->dmb_ht_lock); >> + goto again; >> + } >> + } >> + hash_add(ldev->dmb_ht, &dmb_node->list, dmb_node->token); >> + write_unlock_bh(&ldev->dmb_ht_lock); >> + >> + dmb->sba_idx = dmb_node->sba_idx; >> + dmb->dmb_tok = dmb_node->token; >> + dmb->cpu_addr = dmb_node->cpu_addr; >> + dmb->dma_addr = dmb_node->dma_addr; >> + dmb->dmb_len = dmb_node->len; >> + >> + return 0; >> + >> +err_node: >> + kfree(dmb_node); >> +err_bit: >> + clear_bit(sba_idx, ldev->sba_idx_mask); >> + return rc; >> +} >> + >> +static int smc_lo_unregister_dmb(struct smcd_dev *smcd, struct >> smcd_dmb *dmb) >> +{ >> + struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node; >> + struct smc_lo_dev *ldev = smcd->priv; >> + >> + /* remove dmb from hash table */ >> + write_lock_bh(&ldev->dmb_ht_lock); >> + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb- >>> dmb_tok) { >> + if (tmp_node->token == dmb->dmb_tok) { >> + dmb_node = tmp_node; >> + break; >> + } >> + } >> + if (!dmb_node) { >> + write_unlock_bh(&ldev->dmb_ht_lock); >> + return -EINVAL; >> + } >> + hash_del(&dmb_node->list); >> + write_unlock_bh(&ldev->dmb_ht_lock); >> + >> + clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask); >> + kfree(dmb_node->cpu_addr); >> + kfree(dmb_node); >> + >> + return 0; >> +} >> + >> static int smc_lo_add_vlan_id(struct smcd_dev *smcd, u64 vlan_id) >> { >> return -EOPNOTSUPP; >> @@ -75,6 +164,38 @@ static int smc_lo_signal_event(struct smcd_dev >> *dev, struct smcd_gid *rgid, >> return 0; >> } >> >> +static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok, >> + unsigned int idx, bool sf, unsigned int >> offset, >> + void *data, unsigned int size) >> +{ >> + struct smc_lo_dmb_node *rmb_node = NULL, *tmp_node; >> + struct smc_lo_dev *ldev = smcd->priv; >> + struct smc_connection *conn; >> + >> + read_lock_bh(&ldev->dmb_ht_lock); >> + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, >> dmb_tok) { >> + if (tmp_node->token == dmb_tok) { >> + rmb_node = tmp_node; >> + break; >> + } >> + } >> + if (!rmb_node) { >> + read_unlock_bh(&ldev->dmb_ht_lock); >> + return -EINVAL; >> + } >> + memcpy((char *)rmb_node->cpu_addr + offset, data, size); >> + read_unlock_bh(&ldev->dmb_ht_lock); >> + >> + if (sf) { >> + conn = smcd->conn[rmb_node->sba_idx]; >> + if (conn && !conn->killed) >> + tasklet_schedule(&conn->rx_tsklet); >> + else >> + return -EPIPE; >> + } >> + return 0; >> +} >> + >> static int smc_lo_supports_v2(void) >> { >> return SMC_LO_V2_CAPABLE; >> @@ -101,14 +222,14 @@ static struct device *smc_lo_get_dev(struct >> smcd_dev *smcd) >> >> static const struct smcd_ops lo_ops = { >> .query_remote_gid = smc_lo_query_rgid, >> - .register_dmb = NULL, >> - .unregister_dmb = NULL, >> + .register_dmb = smc_lo_register_dmb, >> + .unregister_dmb = smc_lo_unregister_dmb, >> .add_vlan_id = smc_lo_add_vlan_id, >> .del_vlan_id = smc_lo_del_vlan_id, >> .set_vlan_required = smc_lo_set_vlan_required, >> .reset_vlan_required = smc_lo_reset_vlan_required, >> .signal_event = smc_lo_signal_event, >> - .move_data = NULL, >> + .move_data = smc_lo_move_data, >> .supports_v2 = smc_lo_supports_v2, >> .get_local_gid = smc_lo_get_local_gid, >> .get_chid = smc_lo_get_chid, >> @@ -173,6 +294,8 @@ static void smcd_lo_unregister_dev(struct >> smc_lo_dev *ldev) >> static int smc_lo_dev_init(struct smc_lo_dev *ldev) >> { >> smc_lo_generate_ids(ldev); >> + rwlock_init(&ldev->dmb_ht_lock); >> + hash_init(ldev->dmb_ht); >> return smcd_lo_register_dev(ldev); >> } >> >> diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h >> index 11868e5ac732..6c4a390430f3 100644 >> --- a/net/smc/smc_loopback.h >> +++ b/net/smc/smc_loopback.h >> @@ -20,13 +20,26 @@ >> >> #if IS_ENABLED(CONFIG_SMC_LO) >> #define SMC_LO_MAX_DMBS 5000 >> +#define SMC_LO_DMBS_HASH_BITS 12 >> #define SMC_LO_RESERVED_CHID 0xFFFF >> >> +struct smc_lo_dmb_node { >> + struct hlist_node list; >> + u64 token; >> + u32 len; >> + u32 sba_idx; >> + void *cpu_addr; >> + dma_addr_t dma_addr; >> +}; >> + >> struct smc_lo_dev { >> struct smcd_dev *smcd; >> struct device dev; >> u16 chid; >> struct smcd_gid local_gid; >> + rwlock_t dmb_ht_lock; >> + DECLARE_BITMAP(sba_idx_mask, SMC_LO_MAX_DMBS); >> + DECLARE_HASHTABLE(dmb_ht, SMC_LO_DMBS_HASH_BITS); >> }; >> #endif >>
On Thu, 2024-04-04 at 18:20 +0800, Wen Gu wrote: > > On 2024/4/4 01:20, Gerd Bayer wrote: > > On Sun, 2024-03-24 at 21:55 +0800, Wen Gu wrote: > > > > When I instrumented this to see, why I still see tons of my other > > temporary instrumentation messages from the "ism" driver, I found that > > in my setup loopback-ism is used rather infrequently. > > > > I suspect this is due to how the SMC proposals are constructed in > > net/smc/af_smc.c and net/smc/smc_pnet.c - and later evaluated in > > smc_check_ism_v2_match() - where there is a first-come-first-serve > > selection. > > > > I wonder if one should change that to favour loopback-ism over "real" > > ISM devices - and how this could be achieved elegantly. > > > > Just some food for thought... Probably little you can do on x86. > > > > Yes, it is about the priority of available ISM devices, and now it > is decided by their order in the smcd_dev_list. The later registered > ISMv2 devices(without pnetid) will be added to the beginning of the > list (see smcd_register_dev()). So there is a probability that > loopback-ism will not be ranked first, since it is added into list > earlier during smc_init(). > > If we have the runtime switch of loopback-ism, we can re-active the > loopback-ism, that make it be re-added into the beginning of the dev > list and be chosen first. Or a new netlink command to adjust the slot > order of available ISM devices in the list. As we discussed before, > that could be tasks in stage 1 or stage 2. > > Thanks! Maybe when adding the ISM devices we could instead make sure that all ISM devices are added after loopback and loopback is added in the beginning. I think loopback should always be preferred and would consider it a bug if it isn't faster too. Between virtio-ism and ISM it may be less clear so maybe for stage 2 we would want a priority setting and then insert ordered by priority. Thoughts?
On 2024/4/4 19:27, Niklas Schnelle wrote: > On Thu, 2024-04-04 at 18:20 +0800, Wen Gu wrote: >> >> On 2024/4/4 01:20, Gerd Bayer wrote: >>> On Sun, 2024-03-24 at 21:55 +0800, Wen Gu wrote: >>> >>> When I instrumented this to see, why I still see tons of my other >>> temporary instrumentation messages from the "ism" driver, I found that >>> in my setup loopback-ism is used rather infrequently. >>> >>> I suspect this is due to how the SMC proposals are constructed in >>> net/smc/af_smc.c and net/smc/smc_pnet.c - and later evaluated in >>> smc_check_ism_v2_match() - where there is a first-come-first-serve >>> selection. >>> >>> I wonder if one should change that to favour loopback-ism over "real" >>> ISM devices - and how this could be achieved elegantly. >>> >>> Just some food for thought... Probably little you can do on x86. >>> >> >> Yes, it is about the priority of available ISM devices, and now it >> is decided by their order in the smcd_dev_list. The later registered >> ISMv2 devices(without pnetid) will be added to the beginning of the >> list (see smcd_register_dev()). So there is a probability that >> loopback-ism will not be ranked first, since it is added into list >> earlier during smc_init(). >> >> If we have the runtime switch of loopback-ism, we can re-active the >> loopback-ism, that make it be re-added into the beginning of the dev >> list and be chosen first. Or a new netlink command to adjust the slot >> order of available ISM devices in the list. As we discussed before, >> that could be tasks in stage 1 or stage 2. >> >> Thanks! > > Maybe when adding the ISM devices we could instead make sure that all > ISM devices are added after loopback and loopback is added in the > beginning. I think loopback should always be preferred and would > consider it a bug if it isn't faster too. Between virtio-ism and ISM it > may be less clear so maybe for stage 2 we would want a priority setting > and then insert ordered by priority. Thoughts? I have no objection. If we all agree, I will keep it at the beginning of the list. Thanks!
On Thu, 2024-04-04 at 21:44 +0800, Wen Gu wrote: > > On 2024/4/4 19:27, Niklas Schnelle wrote: > > On Thu, 2024-04-04 at 18:20 +0800, Wen Gu wrote: > > > > > > On 2024/4/4 01:20, Gerd Bayer wrote: > > > > On Sun, 2024-03-24 at 21:55 +0800, Wen Gu wrote: > > > > > > > > When I instrumented this to see, why I still see tons of my other > > > > temporary instrumentation messages from the "ism" driver, I found that > > > > in my setup loopback-ism is used rather infrequently. > > > > > > > > I suspect this is due to how the SMC proposals are constructed in > > > > net/smc/af_smc.c and net/smc/smc_pnet.c - and later evaluated in > > > > smc_check_ism_v2_match() - where there is a first-come-first-serve > > > > selection. > > > > > > > > I wonder if one should change that to favour loopback-ism over "real" > > > > ISM devices - and how this could be achieved elegantly. > > > > > > > > Just some food for thought... Probably little you can do on x86. > > > > > > > > > > Yes, it is about the priority of available ISM devices, and now it > > > is decided by their order in the smcd_dev_list. The later registered > > > ISMv2 devices(without pnetid) will be added to the beginning of the > > > list (see smcd_register_dev()). So there is a probability that > > > loopback-ism will not be ranked first, since it is added into list > > > earlier during smc_init(). > > > > > > If we have the runtime switch of loopback-ism, we can re-active the > > > loopback-ism, that make it be re-added into the beginning of the dev > > > list and be chosen first. Or a new netlink command to adjust the slot > > > order of available ISM devices in the list. As we discussed before, > > > that could be tasks in stage 1 or stage 2. > > > > > > Thanks! > > > > Maybe when adding the ISM devices we could instead make sure that all > > ISM devices are added after loopback and loopback is added in the > > beginning. I think loopback should always be preferred and would > > consider it a bug if it isn't faster too. Between virtio-ism and ISM it > > may be less clear so maybe for stage 2 we would want a priority setting > > and then insert ordered by priority. Thoughts? > I have no objection. If we all agree, I will keep it at the beginning of the list. > > Thanks! > > I think this is a decision that needs to be made by the SMC maintainers and Wenjia will be back next week. That said the current code basically prioritizes ISM devices without that having been a conscious and documented decision. Also note that due to hotplug an LPAR could be using loopback-ism happily until suddenly an ISM device is hot plugged and new connections suddenly switch to ISM.
diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c index 4b5e864ebca3..8ee02cfd7fb4 100644 --- a/net/smc/smc_loopback.c +++ b/net/smc/smc_loopback.c @@ -15,11 +15,13 @@ #include <linux/types.h> #include <net/smc.h> +#include "smc_cdc.h" #include "smc_ism.h" #include "smc_loopback.h" #if IS_ENABLED(CONFIG_SMC_LO) #define SMC_LO_V2_CAPABLE 0x1 /* loopback-ism acts as ISMv2 */ +#define SMC_DMA_ADDR_INVALID (~(dma_addr_t)0) static const char smc_lo_dev_name[] = "loopback-ism"; static struct smc_lo_dev *lo_dev; @@ -49,6 +51,93 @@ static int smc_lo_query_rgid(struct smcd_dev *smcd, struct smcd_gid *rgid, return 0; } +static int smc_lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb, + void *client_priv) +{ + struct smc_lo_dmb_node *dmb_node, *tmp_node; + struct smc_lo_dev *ldev = smcd->priv; + int sba_idx, rc; + + /* check space for new dmb */ + for_each_clear_bit(sba_idx, ldev->sba_idx_mask, SMC_LO_MAX_DMBS) { + if (!test_and_set_bit(sba_idx, ldev->sba_idx_mask)) + break; + } + if (sba_idx == SMC_LO_MAX_DMBS) + return -ENOSPC; + + dmb_node = kzalloc(sizeof(*dmb_node), GFP_KERNEL); + if (!dmb_node) { + rc = -ENOMEM; + goto err_bit; + } + + dmb_node->sba_idx = sba_idx; + dmb_node->len = dmb->dmb_len; + dmb_node->cpu_addr = kzalloc(dmb_node->len, GFP_KERNEL | + __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC); + if (!dmb_node->cpu_addr) { + rc = -ENOMEM; + goto err_node; + } + dmb_node->dma_addr = SMC_DMA_ADDR_INVALID; + +again: + /* add new dmb into hash table */ + get_random_bytes(&dmb_node->token, sizeof(dmb_node->token)); + write_lock_bh(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_node->token) { + if (tmp_node->token == dmb_node->token) { + write_unlock_bh(&ldev->dmb_ht_lock); + goto again; + } + } + hash_add(ldev->dmb_ht, &dmb_node->list, dmb_node->token); + write_unlock_bh(&ldev->dmb_ht_lock); + + dmb->sba_idx = dmb_node->sba_idx; + dmb->dmb_tok = dmb_node->token; + dmb->cpu_addr = dmb_node->cpu_addr; + dmb->dma_addr = dmb_node->dma_addr; + dmb->dmb_len = dmb_node->len; + + return 0; + +err_node: + kfree(dmb_node); +err_bit: + clear_bit(sba_idx, ldev->sba_idx_mask); + return rc; +} + +static int smc_lo_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb) +{ + struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node; + struct smc_lo_dev *ldev = smcd->priv; + + /* remove dmb from hash table */ + write_lock_bh(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) { + if (tmp_node->token == dmb->dmb_tok) { + dmb_node = tmp_node; + break; + } + } + if (!dmb_node) { + write_unlock_bh(&ldev->dmb_ht_lock); + return -EINVAL; + } + hash_del(&dmb_node->list); + write_unlock_bh(&ldev->dmb_ht_lock); + + clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask); + kfree(dmb_node->cpu_addr); + kfree(dmb_node); + + return 0; +} + static int smc_lo_add_vlan_id(struct smcd_dev *smcd, u64 vlan_id) { return -EOPNOTSUPP; @@ -75,6 +164,38 @@ static int smc_lo_signal_event(struct smcd_dev *dev, struct smcd_gid *rgid, return 0; } +static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok, + unsigned int idx, bool sf, unsigned int offset, + void *data, unsigned int size) +{ + struct smc_lo_dmb_node *rmb_node = NULL, *tmp_node; + struct smc_lo_dev *ldev = smcd->priv; + struct smc_connection *conn; + + read_lock_bh(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_tok) { + if (tmp_node->token == dmb_tok) { + rmb_node = tmp_node; + break; + } + } + if (!rmb_node) { + read_unlock_bh(&ldev->dmb_ht_lock); + return -EINVAL; + } + memcpy((char *)rmb_node->cpu_addr + offset, data, size); + read_unlock_bh(&ldev->dmb_ht_lock); + + if (sf) { + conn = smcd->conn[rmb_node->sba_idx]; + if (conn && !conn->killed) + tasklet_schedule(&conn->rx_tsklet); + else + return -EPIPE; + } + return 0; +} + static int smc_lo_supports_v2(void) { return SMC_LO_V2_CAPABLE; @@ -101,14 +222,14 @@ static struct device *smc_lo_get_dev(struct smcd_dev *smcd) static const struct smcd_ops lo_ops = { .query_remote_gid = smc_lo_query_rgid, - .register_dmb = NULL, - .unregister_dmb = NULL, + .register_dmb = smc_lo_register_dmb, + .unregister_dmb = smc_lo_unregister_dmb, .add_vlan_id = smc_lo_add_vlan_id, .del_vlan_id = smc_lo_del_vlan_id, .set_vlan_required = smc_lo_set_vlan_required, .reset_vlan_required = smc_lo_reset_vlan_required, .signal_event = smc_lo_signal_event, - .move_data = NULL, + .move_data = smc_lo_move_data, .supports_v2 = smc_lo_supports_v2, .get_local_gid = smc_lo_get_local_gid, .get_chid = smc_lo_get_chid, @@ -173,6 +294,8 @@ static void smcd_lo_unregister_dev(struct smc_lo_dev *ldev) static int smc_lo_dev_init(struct smc_lo_dev *ldev) { smc_lo_generate_ids(ldev); + rwlock_init(&ldev->dmb_ht_lock); + hash_init(ldev->dmb_ht); return smcd_lo_register_dev(ldev); } diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h index 11868e5ac732..6c4a390430f3 100644 --- a/net/smc/smc_loopback.h +++ b/net/smc/smc_loopback.h @@ -20,13 +20,26 @@ #if IS_ENABLED(CONFIG_SMC_LO) #define SMC_LO_MAX_DMBS 5000 +#define SMC_LO_DMBS_HASH_BITS 12 #define SMC_LO_RESERVED_CHID 0xFFFF +struct smc_lo_dmb_node { + struct hlist_node list; + u64 token; + u32 len; + u32 sba_idx; + void *cpu_addr; + dma_addr_t dma_addr; +}; + struct smc_lo_dev { struct smcd_dev *smcd; struct device dev; u16 chid; struct smcd_gid local_gid; + rwlock_t dmb_ht_lock; + DECLARE_BITMAP(sba_idx_mask, SMC_LO_MAX_DMBS); + DECLARE_HASHTABLE(dmb_ht, SMC_LO_DMBS_HASH_BITS); }; #endif
This implements DMB (un)registration and data move operations of loopback-ism device. Signed-off-by: Wen Gu <guwen@linux.alibaba.com> --- net/smc/smc_loopback.c | 129 ++++++++++++++++++++++++++++++++++++++++- net/smc/smc_loopback.h | 13 +++++ 2 files changed, 139 insertions(+), 3 deletions(-)