diff mbox series

[net-next,v2,11/11] net/smc: implement DMB-merged operations of loopback-ism

Message ID 20240307095536.29648-12-guwen@linux.alibaba.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net/smc: SMC intra-OS shortcut with loopback-ism | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 940 this patch: 940
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 10 of 10 maintainers
netdev/build_clang success Errors and warnings before: 956 this patch: 956
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 956 this patch: 956
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 226 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-03-07--15-00 (tests: 892)

Commit Message

Wen Gu March 7, 2024, 9:55 a.m. UTC
This implements operations related to merging sndbuf with peer DMB in
loopback-ism. The DMB won't be freed until no sndbuf is attached to it.

Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
---
 net/smc/smc_loopback.c | 136 +++++++++++++++++++++++++++++++++++------
 net/smc/smc_loopback.h |   3 +
 2 files changed, 119 insertions(+), 20 deletions(-)

Comments

Jan Karcher March 12, 2024, 8:06 a.m. UTC | #1
On 07/03/2024 10:55, Wen Gu wrote:
> This implements operations related to merging sndbuf with peer DMB in
> loopback-ism. The DMB won't be freed until no sndbuf is attached to it.

Hi Wen Gu,

while I'm still reviewing let me drop a lockdep finding.

> 
> Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
> ---
>   net/smc/smc_loopback.c | 136 +++++++++++++++++++++++++++++++++++------
>   net/smc/smc_loopback.h |   3 +
>   2 files changed, 119 insertions(+), 20 deletions(-)
> 
> diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c
> index 6828e0ad3e90..7e772f3772de 100644
> --- a/net/smc/smc_loopback.c
> +++ b/net/smc/smc_loopback.c

[...]

>   
> @@ -170,8 +249,22 @@ static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok,
>   {
>   	struct smc_lo_dmb_node *rmb_node = NULL, *tmp_node;
>   	struct smc_lo_dev *ldev = smcd->priv;
> -
> -	read_lock(&ldev->dmb_ht_lock);
> +	struct smc_connection *conn;
> +
> +	if (!sf)
> +		/* since sndbuf is merged with peer DMB, there is
> +		 * no need to copy data from sndbuf to peer DMB.
> +		 */
> +		return 0;
> +
> +	/* read_lock_bh() is used here just to make lockdep
> +	 * happy, because spin_(un)lock_bh(&conn->send_lock) wraps
> +	 * smc_lo_move_data() and if we use read_lock() here, lockdep
> +	 * will complain about SOFTIRQ-safe -> SOFTIRQ-unsafe lock
> +	 * order detected, but in fact ldev->dmb_ht_lock will never
> +	 * be held in bh context.
> +	 */
> +	read_lock_bh(&ldev->dmb_ht_lock);
>   	hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_tok) {
>   		if (tmp_node->token == dmb_tok) {
>   			rmb_node = tmp_node;
> @@ -182,19 +275,14 @@ static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok,
>   		read_unlock(&ldev->dmb_ht_lock);
>   		return -EINVAL;
>   	}
> -	read_unlock(&ldev->dmb_ht_lock);
> +	read_unlock_bh(&ldev->dmb_ht_lock);
>   
>   	memcpy((char *)rmb_node->cpu_addr + offset, data, size);
>   
> -	if (sf) {
> -		struct smc_connection *conn =
> -			smcd->conn[rmb_node->sba_idx];
> -
> -		if (conn && !conn->killed)
> -			smcd_cdc_rx_handler(conn);
> -		else
> -			return -EPIPE;
> -	}
> +	conn = smcd->conn[rmb_node->sba_idx];
> +	if (!conn || conn->killed)
> +		return -EPIPE;
> +	smcd_cdc_rx_handler(conn);

[ 2385.528515] ============================================
[ 2385.528517] WARNING: possible recursive locking detected
[ 2385.528519] 6.8.0-loopback_ism-g30af186e8a18-dirty #12 Not tainted
[ 2385.528521] --------------------------------------------
[ 2385.528522] smcapp/51326 is trying to acquire lock:
[ 2385.528524] 000000018707a128 (&smc->conn.send_lock){+...}-{2:2}, at: 
smc_tx_sndbuf_nonempty+0xba/0x1c0 [smc]
[ 2385.528552]
                but task is already holding lock:
[ 2385.528554] 0000000187078728 (&smc->conn.send_lock){+...}-{2:2}, at: 
smc_cdc_get_slot_and_msg_send+0x66/0xa0 [smc]
[ 2385.528568]
                other info that might help us debug this:
[ 2385.528570]  Possible unsafe locking scenario:

[ 2385.528572]        CPU0
[ 2385.528573]        ----
[ 2385.528574]   lock(&smc->conn.send_lock);
[ 2385.528576]   lock(&smc->conn.send_lock);
[ 2385.528579]
                 *** DEADLOCK ***

[ 2385.528580]  May be due to missing lock nesting notation

[ 2385.528582] 3 locks held by smcapp/51326:
[ 2385.528584]  #0: 0000000187078378 (sk_lock-AF_SMC){+.+.}-{0:0}, at: 
smc_recvmsg+0x3c/0x2b0 [smc]
[ 2385.528598]  #1: 0000000187078728 (&smc->conn.send_lock){+...}-{2:2}, 
at: smc_cdc_get_slot_and_msg_send+0x66/0xa0 [smc]
[ 2385.528613]  #2: 0000000187079ce8 (slock-AF_SMC){+...}-{2:2}, at: 
smc_cdc_msg_recv+0x56/0xe0 [smc]
[ 2385.528627]
                stack backtrace:
[ 2385.528660] CPU: 3 PID: 51326 Comm: smcapp Not tainted 
6.8.0-loopback_ism-g30af186e8a18-dirty #12
[ 2385.528663] Hardware name: IBM 3906 M04 704 (LPAR)
[ 2385.528664] Call Trace:
[ 2385.528666]  [<000000012db60788>] dump_stack_lvl+0x90/0x120
[ 2385.528671]  [<000000012cc6d088>] validate_chain+0x560/0x960
[ 2385.528677]  [<000000012cc6f644>] __lock_acquire+0x654/0xd58
[ 2385.528680]  [<000000012cc70a04>] lock_acquire.part.0+0xec/0x260
[ 2385.528683]  [<000000012cc70c24>] lock_acquire+0xac/0x170
[ 2385.528687]  [<000000012dba4ccc>] _raw_spin_lock_bh+0x5c/0xb0
[ 2385.528690]  [<000003ff80453b32>] smc_tx_sndbuf_nonempty+0xba/0x1c0 [smc]
[ 2385.528702]  [<000003ff8045428a>] smc_tx_pending+0x32/0x60 [smc]
[ 2385.528712]  [<000003ff80451f02>] smc_cdc_msg_recv_action+0x3c2/0x528 
[smc]
[ 2385.528723]  [<000003ff804520cc>] smc_cdc_msg_recv+0x64/0xe0 [smc]
[ 2385.528734]  [<000003ff80452a4c>] smcd_cdc_rx_handler+0x64/0x70 [smc]
[ 2385.528745]  [<000003ff80459f7e>] smc_lo_move_data+0xde/0x100 [smc]
[ 2385.528755]  [<000003ff804533e0>] smcd_tx_ism_write+0x68/0x90 [smc]
[ 2385.528766]  [<000003ff804528a4>] smcd_cdc_msg_send+0x74/0x118 [smc]
[ 2385.528776]  [<000003ff804529b8>] 
smc_cdc_get_slot_and_msg_send+0x70/0xa0 [smc]
[ 2385.528788]  [<000003ff804543ec>] smc_tx_consumer_update+0xe4/0x1b0 [smc]
[ 2385.528798]  [<000003ff8045458e>] smc_rx_update_consumer+0x86/0x170 [smc]
[ 2385.528809]  [<000003ff80455ba8>] smc_rx_recvmsg+0x3b8/0x6e8 [smc]
[ 2385.528820]  [<000003ff804388a4>] smc_recvmsg+0xdc/0x2b0 [smc]
[ 2385.528831]  [<000000012d8a6d58>] sock_recvmsg+0x70/0xb0
[ 2385.528837]  [<000000012d8aa0c8>] __sys_recvfrom+0xa8/0x128
[ 2385.528840]  [<000000012d8ab3ca>] __do_sys_socketcall+0x1ca/0x398
[ 2385.528844]  [<000000012db8d4c4>] __do_syscall+0x244/0x308
[ 2385.528847]  [<000000012dba6140>] system_call+0x70/0x98
[ 2385.528850] INFO: lockdep is turned off.


I did not investigate deeper, yet. Just an early heads up that there 
might be something broken.

Thank you
- Jan


>   	return 0;
>   }
>   
> @@ -226,6 +314,9 @@ static const struct smcd_ops lo_ops = {
>   	.query_remote_gid = smc_lo_query_rgid,
>   	.register_dmb = smc_lo_register_dmb,
>   	.unregister_dmb = smc_lo_unregister_dmb,
> +	.support_dmb_nocopy = smc_lo_support_dmb_nocopy,
> +	.attach_dmb = smc_lo_attach_dmb,
> +	.detach_dmb = smc_lo_detach_dmb,
>   	.add_vlan_id = smc_lo_add_vlan_id,
>   	.del_vlan_id = smc_lo_del_vlan_id,
>   	.set_vlan_required = smc_lo_set_vlan_required,
> @@ -304,12 +395,17 @@ static int smc_lo_dev_init(struct smc_lo_dev *ldev)
>   	smc_lo_generate_id(ldev);
>   	rwlock_init(&ldev->dmb_ht_lock);
>   	hash_init(ldev->dmb_ht);
> +	atomic_set(&ldev->dmb_cnt, 0);
> +	init_waitqueue_head(&ldev->ldev_release);
> +
>   	return smcd_lo_register_dev(ldev);
>   }
>   
>   static void smc_lo_dev_exit(struct smc_lo_dev *ldev)
>   {
>   	smcd_lo_unregister_dev(ldev);
> +	if (atomic_read(&ldev->dmb_cnt))
> +		wait_event(ldev->ldev_release, !atomic_read(&ldev->dmb_cnt));
>   }
>   
>   static void smc_lo_dev_release(struct device *dev)
> diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h
> index 24ab9d747613..9156a6c37e65 100644
> --- a/net/smc/smc_loopback.h
> +++ b/net/smc/smc_loopback.h
> @@ -30,6 +30,7 @@ struct smc_lo_dmb_node {
>   	u32 sba_idx;
>   	void *cpu_addr;
>   	dma_addr_t dma_addr;
> +	refcount_t refcnt;
>   };
>   
>   struct smc_lo_dev {
> @@ -37,9 +38,11 @@ struct smc_lo_dev {
>   	struct device dev;
>   	u16 chid;
>   	struct smcd_gid local_gid;
> +	atomic_t dmb_cnt;
>   	rwlock_t dmb_ht_lock;
>   	DECLARE_BITMAP(sba_idx_mask, SMC_LO_MAX_DMBS);
>   	DECLARE_HASHTABLE(dmb_ht, SMC_LO_DMBS_HASH_BITS);
> +	wait_queue_head_t ldev_release;
>   };
>   #endif
>
Wen Gu March 12, 2024, 2:11 p.m. UTC | #2
On 2024/3/12 16:06, Jan Karcher wrote:
> 
> 
> On 07/03/2024 10:55, Wen Gu wrote:
>> This implements operations related to merging sndbuf with peer DMB in
>> loopback-ism. The DMB won't be freed until no sndbuf is attached to it.
> 
> Hi Wen Gu,
> 
> while I'm still reviewing let me drop a lockdep finding.
> 
>>
>> Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
>> ---
>>   net/smc/smc_loopback.c | 136 +++++++++++++++++++++++++++++++++++------
>>   net/smc/smc_loopback.h |   3 +
>>   2 files changed, 119 insertions(+), 20 deletions(-)
>>
>> diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c
>> index 6828e0ad3e90..7e772f3772de 100644
>> --- a/net/smc/smc_loopback.c
>> +++ b/net/smc/smc_loopback.c
> 
> [...]
> 
>> @@ -170,8 +249,22 @@ static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok,
>>   {
>>       struct smc_lo_dmb_node *rmb_node = NULL, *tmp_node;
>>       struct smc_lo_dev *ldev = smcd->priv;
>> -
>> -    read_lock(&ldev->dmb_ht_lock);
>> +    struct smc_connection *conn;
>> +
>> +    if (!sf)
>> +        /* since sndbuf is merged with peer DMB, there is
>> +         * no need to copy data from sndbuf to peer DMB.
>> +         */
>> +        return 0;
>> +
>> +    /* read_lock_bh() is used here just to make lockdep
>> +     * happy, because spin_(un)lock_bh(&conn->send_lock) wraps
>> +     * smc_lo_move_data() and if we use read_lock() here, lockdep
>> +     * will complain about SOFTIRQ-safe -> SOFTIRQ-unsafe lock
>> +     * order detected, but in fact ldev->dmb_ht_lock will never
>> +     * be held in bh context.
>> +     */
>> +    read_lock_bh(&ldev->dmb_ht_lock);
>>       hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_tok) {
>>           if (tmp_node->token == dmb_tok) {
>>               rmb_node = tmp_node;
>> @@ -182,19 +275,14 @@ static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok,
>>           read_unlock(&ldev->dmb_ht_lock);
>>           return -EINVAL;
>>       }
>> -    read_unlock(&ldev->dmb_ht_lock);
>> +    read_unlock_bh(&ldev->dmb_ht_lock);
>>       memcpy((char *)rmb_node->cpu_addr + offset, data, size);
>> -    if (sf) {
>> -        struct smc_connection *conn =
>> -            smcd->conn[rmb_node->sba_idx];
>> -
>> -        if (conn && !conn->killed)
>> -            smcd_cdc_rx_handler(conn);
>> -        else
>> -            return -EPIPE;
>> -    }
>> +    conn = smcd->conn[rmb_node->sba_idx];
>> +    if (!conn || conn->killed)
>> +        return -EPIPE;
>> +    smcd_cdc_rx_handler(conn);
> 
> [ 2385.528515] ============================================
> [ 2385.528517] WARNING: possible recursive locking detected
> [ 2385.528519] 6.8.0-loopback_ism-g30af186e8a18-dirty #12 Not tainted
> [ 2385.528521] --------------------------------------------
> [ 2385.528522] smcapp/51326 is trying to acquire lock:
> [ 2385.528524] 000000018707a128 (&smc->conn.send_lock){+...}-{2:2}, at: smc_tx_sndbuf_nonempty+0xba/0x1c0 [smc]
> [ 2385.528552]
>                 but task is already holding lock:
> [ 2385.528554] 0000000187078728 (&smc->conn.send_lock){+...}-{2:2}, at: smc_cdc_get_slot_and_msg_send+0x66/0xa0 [smc]
> [ 2385.528568]
>                 other info that might help us debug this:
> [ 2385.528570]  Possible unsafe locking scenario:
> 
> [ 2385.528572]        CPU0
> [ 2385.528573]        ----
> [ 2385.528574]   lock(&smc->conn.send_lock);
> [ 2385.528576]   lock(&smc->conn.send_lock);
> [ 2385.528579]
>                  *** DEADLOCK ***
> 
> [ 2385.528580]  May be due to missing lock nesting notation
> 
> [ 2385.528582] 3 locks held by smcapp/51326:
> [ 2385.528584]  #0: 0000000187078378 (sk_lock-AF_SMC){+.+.}-{0:0}, at: smc_recvmsg+0x3c/0x2b0 [smc]
> [ 2385.528598]  #1: 0000000187078728 (&smc->conn.send_lock){+...}-{2:2}, at: smc_cdc_get_slot_and_msg_send+0x66/0xa0 [smc]
> [ 2385.528613]  #2: 0000000187079ce8 (slock-AF_SMC){+...}-{2:2}, at: smc_cdc_msg_recv+0x56/0xe0 [smc]
> [ 2385.528627]
>                 stack backtrace:
> [ 2385.528660] CPU: 3 PID: 51326 Comm: smcapp Not tainted 6.8.0-loopback_ism-g30af186e8a18-dirty #12
> [ 2385.528663] Hardware name: IBM 3906 M04 704 (LPAR)
> [ 2385.528664] Call Trace:
> [ 2385.528666]  [<000000012db60788>] dump_stack_lvl+0x90/0x120
> [ 2385.528671]  [<000000012cc6d088>] validate_chain+0x560/0x960
> [ 2385.528677]  [<000000012cc6f644>] __lock_acquire+0x654/0xd58
> [ 2385.528680]  [<000000012cc70a04>] lock_acquire.part.0+0xec/0x260
> [ 2385.528683]  [<000000012cc70c24>] lock_acquire+0xac/0x170
> [ 2385.528687]  [<000000012dba4ccc>] _raw_spin_lock_bh+0x5c/0xb0
> [ 2385.528690]  [<000003ff80453b32>] smc_tx_sndbuf_nonempty+0xba/0x1c0 [smc]
> [ 2385.528702]  [<000003ff8045428a>] smc_tx_pending+0x32/0x60 [smc]
> [ 2385.528712]  [<000003ff80451f02>] smc_cdc_msg_recv_action+0x3c2/0x528 [smc]
> [ 2385.528723]  [<000003ff804520cc>] smc_cdc_msg_recv+0x64/0xe0 [smc]
> [ 2385.528734]  [<000003ff80452a4c>] smcd_cdc_rx_handler+0x64/0x70 [smc]
> [ 2385.528745]  [<000003ff80459f7e>] smc_lo_move_data+0xde/0x100 [smc]
> [ 2385.528755]  [<000003ff804533e0>] smcd_tx_ism_write+0x68/0x90 [smc]
> [ 2385.528766]  [<000003ff804528a4>] smcd_cdc_msg_send+0x74/0x118 [smc]
> [ 2385.528776]  [<000003ff804529b8>] smc_cdc_get_slot_and_msg_send+0x70/0xa0 [smc]
> [ 2385.528788]  [<000003ff804543ec>] smc_tx_consumer_update+0xe4/0x1b0 [smc]
> [ 2385.528798]  [<000003ff8045458e>] smc_rx_update_consumer+0x86/0x170 [smc]
> [ 2385.528809]  [<000003ff80455ba8>] smc_rx_recvmsg+0x3b8/0x6e8 [smc]
> [ 2385.528820]  [<000003ff804388a4>] smc_recvmsg+0xdc/0x2b0 [smc]
> [ 2385.528831]  [<000000012d8a6d58>] sock_recvmsg+0x70/0xb0
> [ 2385.528837]  [<000000012d8aa0c8>] __sys_recvfrom+0xa8/0x128
> [ 2385.528840]  [<000000012d8ab3ca>] __do_sys_socketcall+0x1ca/0x398
> [ 2385.528844]  [<000000012db8d4c4>] __do_syscall+0x244/0x308
> [ 2385.528847]  [<000000012dba6140>] system_call+0x70/0x98
> [ 2385.528850] INFO: lockdep is turned off.
> 
> 
> I did not investigate deeper, yet. Just an early heads up that there might be something broken.
> 

Thank you for reminding, Jan. I think it is because that I used smcd_cdc_rx_handler(),
which may acquire conn->send_lock, in smc_lo_move_data() where the send_lock has been
held. I reproduced this issue and will fix it in v3.

Thanks!

> Thank you
> - Jan
> 
> 
>>       return 0;
>>   }
>> @@ -226,6 +314,9 @@ static const struct smcd_ops lo_ops = {
>>       .query_remote_gid = smc_lo_query_rgid,
>>       .register_dmb = smc_lo_register_dmb,
>>       .unregister_dmb = smc_lo_unregister_dmb,
>> +    .support_dmb_nocopy = smc_lo_support_dmb_nocopy,
>> +    .attach_dmb = smc_lo_attach_dmb,
>> +    .detach_dmb = smc_lo_detach_dmb,
>>       .add_vlan_id = smc_lo_add_vlan_id,
>>       .del_vlan_id = smc_lo_del_vlan_id,
>>       .set_vlan_required = smc_lo_set_vlan_required,
>> @@ -304,12 +395,17 @@ static int smc_lo_dev_init(struct smc_lo_dev *ldev)
>>       smc_lo_generate_id(ldev);
>>       rwlock_init(&ldev->dmb_ht_lock);
>>       hash_init(ldev->dmb_ht);
>> +    atomic_set(&ldev->dmb_cnt, 0);
>> +    init_waitqueue_head(&ldev->ldev_release);
>> +
>>       return smcd_lo_register_dev(ldev);
>>   }
>>   static void smc_lo_dev_exit(struct smc_lo_dev *ldev)
>>   {
>>       smcd_lo_unregister_dev(ldev);
>> +    if (atomic_read(&ldev->dmb_cnt))
>> +        wait_event(ldev->ldev_release, !atomic_read(&ldev->dmb_cnt));
>>   }
>>   static void smc_lo_dev_release(struct device *dev)
>> diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h
>> index 24ab9d747613..9156a6c37e65 100644
>> --- a/net/smc/smc_loopback.h
>> +++ b/net/smc/smc_loopback.h
>> @@ -30,6 +30,7 @@ struct smc_lo_dmb_node {
>>       u32 sba_idx;
>>       void *cpu_addr;
>>       dma_addr_t dma_addr;
>> +    refcount_t refcnt;
>>   };
>>   struct smc_lo_dev {
>> @@ -37,9 +38,11 @@ struct smc_lo_dev {
>>       struct device dev;
>>       u16 chid;
>>       struct smcd_gid local_gid;
>> +    atomic_t dmb_cnt;
>>       rwlock_t dmb_ht_lock;
>>       DECLARE_BITMAP(sba_idx_mask, SMC_LO_MAX_DMBS);
>>       DECLARE_HASHTABLE(dmb_ht, SMC_LO_DMBS_HASH_BITS);
>> +    wait_queue_head_t ldev_release;
>>   };
>>   #endif
diff mbox series

Patch

diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c
index 6828e0ad3e90..7e772f3772de 100644
--- a/net/smc/smc_loopback.c
+++ b/net/smc/smc_loopback.c
@@ -21,6 +21,7 @@ 
 
 #if IS_ENABLED(CONFIG_SMC_LO)
 #define SMC_LO_V2_CAPABLE	0x1 /* loopback-ism acts as ISMv2 */
+#define SMC_LO_SUPPORT_NOCOPY	0x1
 #define SMC_DMA_ADDR_INVALID	(~(dma_addr_t)0)
 
 static const char smc_lo_dev_name[] = "loopback-ism";
@@ -82,6 +83,7 @@  static int smc_lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb,
 		goto err_node;
 	}
 	dmb_node->dma_addr = SMC_DMA_ADDR_INVALID;
+	refcount_set(&dmb_node->refcnt, 1);
 
 again:
 	/* add new dmb into hash table */
@@ -95,6 +97,7 @@  static int smc_lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb,
 	}
 	hash_add(ldev->dmb_ht, &dmb_node->list, dmb_node->token);
 	write_unlock(&ldev->dmb_ht_lock);
+	atomic_inc(&ldev->dmb_cnt);
 
 	dmb->sba_idx = dmb_node->sba_idx;
 	dmb->dmb_tok = dmb_node->token;
@@ -111,13 +114,29 @@  static int smc_lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb,
 	return rc;
 }
 
+static void __smc_lo_unregister_dmb(struct smc_lo_dev *ldev,
+				    struct smc_lo_dmb_node *dmb_node)
+{
+	/* remove dmb from hash table */
+	write_lock(&ldev->dmb_ht_lock);
+	hash_del(&dmb_node->list);
+	write_unlock(&ldev->dmb_ht_lock);
+
+	clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask);
+	kvfree(dmb_node->cpu_addr);
+	kfree(dmb_node);
+
+	if (atomic_dec_and_test(&ldev->dmb_cnt))
+		wake_up(&ldev->ldev_release);
+}
+
 static int smc_lo_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
 {
 	struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
 	struct smc_lo_dev *ldev = smcd->priv;
 
-	/* remove dmb from hash table */
-	write_lock(&ldev->dmb_ht_lock);
+	/* find dmb from hash table */
+	read_lock(&ldev->dmb_ht_lock);
 	hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) {
 		if (tmp_node->token == dmb->dmb_tok) {
 			dmb_node = tmp_node;
@@ -125,16 +144,76 @@  static int smc_lo_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
 		}
 	}
 	if (!dmb_node) {
-		write_unlock(&ldev->dmb_ht_lock);
+		read_unlock(&ldev->dmb_ht_lock);
 		return -EINVAL;
 	}
-	hash_del(&dmb_node->list);
-	write_unlock(&ldev->dmb_ht_lock);
+	read_unlock(&ldev->dmb_ht_lock);
 
-	clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask);
-	kfree(dmb_node->cpu_addr);
-	kfree(dmb_node);
+	if (refcount_dec_and_test(&dmb_node->refcnt))
+		__smc_lo_unregister_dmb(ldev, dmb_node);
+	return 0;
+}
+
+static int smc_lo_support_dmb_nocopy(struct smcd_dev *smcd)
+{
+	return SMC_LO_SUPPORT_NOCOPY;
+}
+
+static int smc_lo_attach_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
+{
+	struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
+	struct smc_lo_dev *ldev = smcd->priv;
 
+	/* find dmb_node according to dmb->dmb_tok */
+	read_lock(&ldev->dmb_ht_lock);
+	hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) {
+		if (tmp_node->token == dmb->dmb_tok) {
+			dmb_node = tmp_node;
+			break;
+		}
+	}
+	if (!dmb_node) {
+		read_unlock(&ldev->dmb_ht_lock);
+		return -EINVAL;
+	}
+	read_unlock(&ldev->dmb_ht_lock);
+
+	if (!refcount_inc_not_zero(&dmb_node->refcnt))
+		/* the dmb is being unregistered, but has
+		 * not been removed from the hash table.
+		 */
+		return -EINVAL;
+
+	/* provide dmb information */
+	dmb->sba_idx = dmb_node->sba_idx;
+	dmb->dmb_tok = dmb_node->token;
+	dmb->cpu_addr = dmb_node->cpu_addr;
+	dmb->dma_addr = dmb_node->dma_addr;
+	dmb->dmb_len = dmb_node->len;
+	return 0;
+}
+
+static int smc_lo_detach_dmb(struct smcd_dev *smcd, u64 token)
+{
+	struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
+	struct smc_lo_dev *ldev = smcd->priv;
+
+	/* find dmb_node according to dmb->dmb_tok */
+	read_lock(&ldev->dmb_ht_lock);
+	hash_for_each_possible(ldev->dmb_ht, tmp_node, list, token) {
+		if (tmp_node->token == token) {
+			dmb_node = tmp_node;
+			break;
+		}
+	}
+	if (!dmb_node) {
+		read_unlock(&ldev->dmb_ht_lock);
+		return -EINVAL;
+	}
+	read_unlock(&ldev->dmb_ht_lock);
+
+	if (refcount_dec_and_test(&dmb_node->refcnt))
+		__smc_lo_unregister_dmb(ldev, dmb_node);
 	return 0;
 }
 
@@ -170,8 +249,22 @@  static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok,
 {
 	struct smc_lo_dmb_node *rmb_node = NULL, *tmp_node;
 	struct smc_lo_dev *ldev = smcd->priv;
-
-	read_lock(&ldev->dmb_ht_lock);
+	struct smc_connection *conn;
+
+	if (!sf)
+		/* since sndbuf is merged with peer DMB, there is
+		 * no need to copy data from sndbuf to peer DMB.
+		 */
+		return 0;
+
+	/* read_lock_bh() is used here just to make lockdep
+	 * happy, because spin_(un)lock_bh(&conn->send_lock) wraps
+	 * smc_lo_move_data() and if we use read_lock() here, lockdep
+	 * will complain about SOFTIRQ-safe -> SOFTIRQ-unsafe lock
+	 * order detected, but in fact ldev->dmb_ht_lock will never
+	 * be held in bh context.
+	 */
+	read_lock_bh(&ldev->dmb_ht_lock);
 	hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_tok) {
 		if (tmp_node->token == dmb_tok) {
 			rmb_node = tmp_node;
@@ -182,19 +275,14 @@  static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok,
 		read_unlock(&ldev->dmb_ht_lock);
 		return -EINVAL;
 	}
-	read_unlock(&ldev->dmb_ht_lock);
+	read_unlock_bh(&ldev->dmb_ht_lock);
 
 	memcpy((char *)rmb_node->cpu_addr + offset, data, size);
 
-	if (sf) {
-		struct smc_connection *conn =
-			smcd->conn[rmb_node->sba_idx];
-
-		if (conn && !conn->killed)
-			smcd_cdc_rx_handler(conn);
-		else
-			return -EPIPE;
-	}
+	conn = smcd->conn[rmb_node->sba_idx];
+	if (!conn || conn->killed)
+		return -EPIPE;
+	smcd_cdc_rx_handler(conn);
 	return 0;
 }
 
@@ -226,6 +314,9 @@  static const struct smcd_ops lo_ops = {
 	.query_remote_gid = smc_lo_query_rgid,
 	.register_dmb = smc_lo_register_dmb,
 	.unregister_dmb = smc_lo_unregister_dmb,
+	.support_dmb_nocopy = smc_lo_support_dmb_nocopy,
+	.attach_dmb = smc_lo_attach_dmb,
+	.detach_dmb = smc_lo_detach_dmb,
 	.add_vlan_id = smc_lo_add_vlan_id,
 	.del_vlan_id = smc_lo_del_vlan_id,
 	.set_vlan_required = smc_lo_set_vlan_required,
@@ -304,12 +395,17 @@  static int smc_lo_dev_init(struct smc_lo_dev *ldev)
 	smc_lo_generate_id(ldev);
 	rwlock_init(&ldev->dmb_ht_lock);
 	hash_init(ldev->dmb_ht);
+	atomic_set(&ldev->dmb_cnt, 0);
+	init_waitqueue_head(&ldev->ldev_release);
+
 	return smcd_lo_register_dev(ldev);
 }
 
 static void smc_lo_dev_exit(struct smc_lo_dev *ldev)
 {
 	smcd_lo_unregister_dev(ldev);
+	if (atomic_read(&ldev->dmb_cnt))
+		wait_event(ldev->ldev_release, !atomic_read(&ldev->dmb_cnt));
 }
 
 static void smc_lo_dev_release(struct device *dev)
diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h
index 24ab9d747613..9156a6c37e65 100644
--- a/net/smc/smc_loopback.h
+++ b/net/smc/smc_loopback.h
@@ -30,6 +30,7 @@  struct smc_lo_dmb_node {
 	u32 sba_idx;
 	void *cpu_addr;
 	dma_addr_t dma_addr;
+	refcount_t refcnt;
 };
 
 struct smc_lo_dev {
@@ -37,9 +38,11 @@  struct smc_lo_dev {
 	struct device dev;
 	u16 chid;
 	struct smcd_gid local_gid;
+	atomic_t dmb_cnt;
 	rwlock_t dmb_ht_lock;
 	DECLARE_BITMAP(sba_idx_mask, SMC_LO_MAX_DMBS);
 	DECLARE_HASHTABLE(dmb_ht, SMC_LO_DMBS_HASH_BITS);
+	wait_queue_head_t ldev_release;
 };
 #endif