diff mbox

[05/11] IB/cm: Share listening CM IDs

Message ID 1434358036-15526-6-git-send-email-haggaie@mellanox.com (mailing list archive)
State Superseded
Headers show

Commit Message

Haggai Eran June 15, 2015, 8:47 a.m. UTC
Enabling network namespaces for RDMA CM will allow processes on different
namespaces to listen on the same port. In order to leave namespace support
out of the CM layer, this requires that multiple RDMA CM IDs will be able
to share a single CM ID.

This patch adds infrastructure to retrieve an existing listening ib_cm_id,
based on its device and service ID, or create a new one if one does not
already exist. It also adds a reference count for such instances
(cm_id_private.listen_sharecount), and prevents cm_destroy_id from
destroying a CM if it is still shared. See the relevant discussion [1].

[1] Re: [PATCH v3 for-next 05/13] IB/cm: Reference count ib_cm_ids
    http://www.spinics.net/lists/netdev/msg328860.html

Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
---
 drivers/infiniband/core/cm.c | 127 +++++++++++++++++++++++++++++++++++++++++--
 include/rdma/ib_cm.h         |   4 ++
 2 files changed, 125 insertions(+), 6 deletions(-)

Comments

Hefty, Sean June 15, 2015, 10:13 p.m. UTC | #1
> @@ -722,6 +725,7 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device
> *device,
>  	INIT_LIST_HEAD(&cm_id_priv->work_list);
>  	atomic_set(&cm_id_priv->work_count, -1);
>  	atomic_set(&cm_id_priv->refcount, 1);
> +	cm_id_priv->listen_sharecount = 1;

This is setting the listen count before we know whether the cm_id will actually be used to listen.


>  	return &cm_id_priv->id;
> 
>  error:
> @@ -847,11 +851,21 @@ retest:
>  	spin_lock_irq(&cm_id_priv->lock);
>  	switch (cm_id->state) {
>  	case IB_CM_LISTEN:
> -		cm_id->state = IB_CM_IDLE;
>  		spin_unlock_irq(&cm_id_priv->lock);
> +
>  		spin_lock_irq(&cm.lock);
> +		if (--cm_id_priv->listen_sharecount > 0) {
> +			/* The id is still shared. */
> +			atomic_dec(&cm_id_priv->refcount);
> +			spin_unlock_irq(&cm.lock);
> +			return;
> +		}
>  		rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
>  		spin_unlock_irq(&cm.lock);
> +
> +		spin_lock_irq(&cm_id_priv->lock);
> +		cm_id->state = IB_CM_IDLE;
> +		spin_unlock_irq(&cm_id_priv->lock);

Why is the state being changed?  The cm_id is about to be freed anyway.


>  		break;
>  	case IB_CM_SIDR_REQ_SENT:
>  		cm_id->state = IB_CM_IDLE;
> @@ -929,11 +943,32 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id)
>  }
>  EXPORT_SYMBOL(ib_destroy_cm_id);
> 
> -int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64
> service_mask,
> -		 struct ib_cm_compare_data *compare_data)
> +/**
> + * __ib_cm_listen - Initiates listening on the specified service ID for
> + *   connection and service ID resolution requests.
> + * @cm_id: Connection identifier associated with the listen request.
> + * @service_id: Service identifier matched against incoming connection
> + *   and service ID resolution requests.  The service ID should be
> specified
> + *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
> + *   assign a service ID to the caller.
> + * @service_mask: Mask applied to service ID used to listen across a
> + *   range of service IDs.  If set to 0, the service ID is matched
> + *   exactly.  This parameter is ignored if %service_id is set to
> + *   IB_CM_ASSIGN_SERVICE_ID.
> + * @compare_data: This parameter is optional.  It specifies data that
> must
> + *   appear in the private data of a connection request for the specified
> + *   listen request.
> + * @lock: If set, lock the cm.lock spin-lock when adding the id to the
> + *   listener tree. When false, the caller must already hold the spin-
> lock,
> + *   and compare_data must be NULL.
> + */
> +static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
> +			  __be64 service_mask,
> +			  struct ib_cm_compare_data *compare_data,
> +			  bool lock)
>  {
>  	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
> -	unsigned long flags;
> +	unsigned long flags = 0;
>  	int ret = 0;
> 
>  	service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
> @@ -959,7 +994,8 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64
> service_id, __be64 service_mask,
> 
>  	cm_id->state = IB_CM_LISTEN;
> 
> -	spin_lock_irqsave(&cm.lock, flags);
> +	if (lock)
> +		spin_lock_irqsave(&cm.lock, flags);

I'm not a fan of this sort of locking structure.  Why not just move the locking into the outside calls completely?  I.e. move to ib_cm_listen() instead of passing in true.


>  	if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
>  		cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
>  		cm_id->service_mask = ~cpu_to_be64(0);
> @@ -968,7 +1004,8 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64
> service_id, __be64 service_mask,
>  		cm_id->service_mask = service_mask;
>  	}
>  	cur_cm_id_priv = cm_insert_listen(cm_id_priv);
> -	spin_unlock_irqrestore(&cm.lock, flags);
> +	if (lock)
> +		spin_unlock_irqrestore(&cm.lock, flags);
> 
>  	if (cur_cm_id_priv) {
>  		cm_id->state = IB_CM_IDLE;
> @@ -978,8 +1015,86 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64
> service_id, __be64 service_mask,
>  	}
>  	return ret;
>  }
> +
> +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64
> service_mask,
> +		 struct ib_cm_compare_data *compare_data)
> +{
> +	return __ib_cm_listen(cm_id, service_id, service_mask, compare_data,
> +			      true);
> +}
>  EXPORT_SYMBOL(ib_cm_listen);
> 
> +/**
> + * Create a new listening ib_cm_id and listen on the given service ID.
> + *
> + * If there's an existing ID listening on that same device and service
> ID,
> + * return it.
> + *
> + * @device: Device associated with the cm_id.  All related communication
> will
> + * be associated with the specified device.
> + * @cm_handler: Callback invoked to notify the user of CM events.
> + * @service_id: Service identifier matched against incoming connection
> + *   and service ID resolution requests.  The service ID should be
> specified
> + *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
> + *   assign a service ID to the caller.
> + * @service_mask: Mask applied to service ID used to listen across a
> + *   range of service IDs.  If set to 0, the service ID is matched
> + *   exactly.  This parameter is ignored if %service_id is set to
> + *   IB_CM_ASSIGN_SERVICE_ID.
> + *
> + * Callers should call ib_destroy_cm_id when done with the listener ID.
> + */
> +struct ib_cm_id *ib_cm_id_create_and_listen(

Maybe ib_cm_insert_listen() instead?

> +		struct ib_device *device,
> +		ib_cm_handler cm_handler,
> +		__be64 service_id,
> +		__be64 service_mask)
> +{
> +	struct cm_id_private *cm_id_priv;
> +	struct ib_cm_id *cm_id;
> +	unsigned long flags;
> +	int err = 0;
> +
> +	/* Create an ID in advance, since the creation may sleep */
> +	cm_id = ib_create_cm_id(device, cm_handler, NULL);
> +	if (IS_ERR(cm_id))
> +		return cm_id;
> +
> +	spin_lock_irqsave(&cm.lock, flags);
> +
> +	if (service_id == IB_CM_ASSIGN_SERVICE_ID)
> +		goto new_id;
> +
> +	/* Find an existing ID */
> +	cm_id_priv = cm_find_listen(device, service_id, NULL);
> +	if (cm_id_priv) {
> +		atomic_inc(&cm_id_priv->refcount);
> +		++cm_id_priv->listen_sharecount;
> +		spin_unlock_irqrestore(&cm.lock, flags);
> +
> +		ib_destroy_cm_id(cm_id);
> +		cm_id = &cm_id_priv->id;
> +		if (cm_id->cm_handler != cm_handler || cm_id->context)
> +			/* Sharing an ib_cm_id with different handlers is not
> +			 * supported */
> +			return ERR_PTR(-EINVAL);

This leaks listen_sharecount references.


> +		return cm_id;
> +	}
> +
> +new_id:
> +	/* Use newly created ID */
> +	err = __ib_cm_listen(cm_id, service_id, service_mask, NULL, false);
> +
> +	spin_unlock_irqrestore(&cm.lock, flags);
> +
> +	if (err) {
> +		ib_destroy_cm_id(cm_id);
> +		return ERR_PTR(err);
> +	}
> +	return cm_id;
> +}
> +EXPORT_SYMBOL(ib_cm_id_create_and_listen);
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Haggai Eran June 16, 2015, 12:50 p.m. UTC | #2
On 16/06/2015 01:13, Hefty, Sean wrote:
>> @@ -722,6 +725,7 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device
>> *device,
>>  	INIT_LIST_HEAD(&cm_id_priv->work_list);
>>  	atomic_set(&cm_id_priv->work_count, -1);
>>  	atomic_set(&cm_id_priv->refcount, 1);
>> +	cm_id_priv->listen_sharecount = 1;
> 
> This is setting the listen count before we know whether the cm_id will actually be used to listen.

Right. I'll move it to the new_id case in ib_cm_id_create_and_listen.

> 
> 
>>  	return &cm_id_priv->id;
>>
>>  error:
>> @@ -847,11 +851,21 @@ retest:
>>  	spin_lock_irq(&cm_id_priv->lock);
>>  	switch (cm_id->state) {
>>  	case IB_CM_LISTEN:
>> -		cm_id->state = IB_CM_IDLE;
>>  		spin_unlock_irq(&cm_id_priv->lock);
>> +
>>  		spin_lock_irq(&cm.lock);
>> +		if (--cm_id_priv->listen_sharecount > 0) {
>> +			/* The id is still shared. */
>> +			atomic_dec(&cm_id_priv->refcount);
>> +			spin_unlock_irq(&cm.lock);
>> +			return;
>> +		}
>>  		rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
>>  		spin_unlock_irq(&cm.lock);
>> +
>> +		spin_lock_irq(&cm_id_priv->lock);
>> +		cm_id->state = IB_CM_IDLE;
>> +		spin_unlock_irq(&cm_id_priv->lock);
> 
> Why is the state being changed?  The cm_id is about to be freed anyway.

It matches the rest of the code, but I don't think it is actually being
used for listening ids. I will drop it.

> 
> 
>>  		break;
>>  	case IB_CM_SIDR_REQ_SENT:
>>  		cm_id->state = IB_CM_IDLE;
>> @@ -929,11 +943,32 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id)
>>  }
>>  EXPORT_SYMBOL(ib_destroy_cm_id);
>>
>> -int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64
>> service_mask,
>> -		 struct ib_cm_compare_data *compare_data)
>> +/**
>> + * __ib_cm_listen - Initiates listening on the specified service ID for
>> + *   connection and service ID resolution requests.
>> + * @cm_id: Connection identifier associated with the listen request.
>> + * @service_id: Service identifier matched against incoming connection
>> + *   and service ID resolution requests.  The service ID should be
>> specified
>> + *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
>> + *   assign a service ID to the caller.
>> + * @service_mask: Mask applied to service ID used to listen across a
>> + *   range of service IDs.  If set to 0, the service ID is matched
>> + *   exactly.  This parameter is ignored if %service_id is set to
>> + *   IB_CM_ASSIGN_SERVICE_ID.
>> + * @compare_data: This parameter is optional.  It specifies data that
>> must
>> + *   appear in the private data of a connection request for the specified
>> + *   listen request.
>> + * @lock: If set, lock the cm.lock spin-lock when adding the id to the
>> + *   listener tree. When false, the caller must already hold the spin-
>> lock,
>> + *   and compare_data must be NULL.
>> + */
>> +static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
>> +			  __be64 service_mask,
>> +			  struct ib_cm_compare_data *compare_data,
>> +			  bool lock)
>>  {
>>  	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
>> -	unsigned long flags;
>> +	unsigned long flags = 0;
>>  	int ret = 0;
>>
>>  	service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
>> @@ -959,7 +994,8 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64
>> service_id, __be64 service_mask,
>>
>>  	cm_id->state = IB_CM_LISTEN;
>>
>> -	spin_lock_irqsave(&cm.lock, flags);
>> +	if (lock)
>> +		spin_lock_irqsave(&cm.lock, flags);
> 
> I'm not a fan of this sort of locking structure.  Why not just move the locking into the outside calls completely?  I.e. move to ib_cm_listen() instead of passing in true.

The reason is that this function can sleep when called compare_data !=
NULL, allocating the id's compare_data with GFP_KERNEL. But, since the
compare_data is going away in a later patch, I can actually fix the
locking at that point. I'll change the patch that removes compare_data
to also remove the lock parameter.

> 
> 
>>  	if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
>>  		cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
>>  		cm_id->service_mask = ~cpu_to_be64(0);
>> @@ -968,7 +1004,8 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64
>> service_id, __be64 service_mask,
>>  		cm_id->service_mask = service_mask;
>>  	}
>>  	cur_cm_id_priv = cm_insert_listen(cm_id_priv);
>> -	spin_unlock_irqrestore(&cm.lock, flags);
>> +	if (lock)
>> +		spin_unlock_irqrestore(&cm.lock, flags);
>>
>>  	if (cur_cm_id_priv) {
>>  		cm_id->state = IB_CM_IDLE;
>> @@ -978,8 +1015,86 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64
>> service_id, __be64 service_mask,
>>  	}
>>  	return ret;
>>  }
>> +
>> +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64
>> service_mask,
>> +		 struct ib_cm_compare_data *compare_data)
>> +{
>> +	return __ib_cm_listen(cm_id, service_id, service_mask, compare_data,
>> +			      true);
>> +}
>>  EXPORT_SYMBOL(ib_cm_listen);
>>
>> +/**
>> + * Create a new listening ib_cm_id and listen on the given service ID.
>> + *
>> + * If there's an existing ID listening on that same device and service
>> ID,
>> + * return it.
>> + *
>> + * @device: Device associated with the cm_id.  All related communication
>> will
>> + * be associated with the specified device.
>> + * @cm_handler: Callback invoked to notify the user of CM events.
>> + * @service_id: Service identifier matched against incoming connection
>> + *   and service ID resolution requests.  The service ID should be
>> specified
>> + *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
>> + *   assign a service ID to the caller.
>> + * @service_mask: Mask applied to service ID used to listen across a
>> + *   range of service IDs.  If set to 0, the service ID is matched
>> + *   exactly.  This parameter is ignored if %service_id is set to
>> + *   IB_CM_ASSIGN_SERVICE_ID.
>> + *
>> + * Callers should call ib_destroy_cm_id when done with the listener ID.
>> + */
>> +struct ib_cm_id *ib_cm_id_create_and_listen(
> 
> Maybe ib_cm_insert_listen() instead?

Okay.

> 
>> +		struct ib_device *device,
>> +		ib_cm_handler cm_handler,
>> +		__be64 service_id,
>> +		__be64 service_mask)
>> +{
>> +	struct cm_id_private *cm_id_priv;
>> +	struct ib_cm_id *cm_id;
>> +	unsigned long flags;
>> +	int err = 0;
>> +
>> +	/* Create an ID in advance, since the creation may sleep */
>> +	cm_id = ib_create_cm_id(device, cm_handler, NULL);
>> +	if (IS_ERR(cm_id))
>> +		return cm_id;
>> +
>> +	spin_lock_irqsave(&cm.lock, flags);
>> +
>> +	if (service_id == IB_CM_ASSIGN_SERVICE_ID)
>> +		goto new_id;
>> +
>> +	/* Find an existing ID */
>> +	cm_id_priv = cm_find_listen(device, service_id, NULL);
>> +	if (cm_id_priv) {
>> +		atomic_inc(&cm_id_priv->refcount);
>> +		++cm_id_priv->listen_sharecount;
>> +		spin_unlock_irqrestore(&cm.lock, flags);
>> +
>> +		ib_destroy_cm_id(cm_id);
>> +		cm_id = &cm_id_priv->id;
>> +		if (cm_id->cm_handler != cm_handler || cm_id->context)
>> +			/* Sharing an ib_cm_id with different handlers is not
>> +			 * supported */
>> +			return ERR_PTR(-EINVAL);
> 
> This leaks listen_sharecount references.

Thanks. I'll fix that.

> 
> 
>> +		return cm_id;
>> +	}
>> +
>> +new_id:
>> +	/* Use newly created ID */
>> +	err = __ib_cm_listen(cm_id, service_id, service_mask, NULL, false);
>> +
>> +	spin_unlock_irqrestore(&cm.lock, flags);
>> +
>> +	if (err) {
>> +		ib_destroy_cm_id(cm_id);
>> +		return ERR_PTR(err);
>> +	}
>> +	return cm_id;
>> +}
>> +EXPORT_SYMBOL(ib_cm_id_create_and_listen);

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 46f99ec4080a..d124a891430b 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -212,6 +212,9 @@  struct cm_id_private {
 	spinlock_t lock;	/* Do not acquire inside cm.lock */
 	struct completion comp;
 	atomic_t refcount;
+	/* Number of clients sharing this ib_cm_id. Only valid for listeners.
+	 * Protected by the cm.lock spinlock. */
+	int listen_sharecount;
 
 	struct ib_mad_send_buf *msg;
 	struct cm_timewait_info *timewait_info;
@@ -722,6 +725,7 @@  struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
 	INIT_LIST_HEAD(&cm_id_priv->work_list);
 	atomic_set(&cm_id_priv->work_count, -1);
 	atomic_set(&cm_id_priv->refcount, 1);
+	cm_id_priv->listen_sharecount = 1;
 	return &cm_id_priv->id;
 
 error:
@@ -847,11 +851,21 @@  retest:
 	spin_lock_irq(&cm_id_priv->lock);
 	switch (cm_id->state) {
 	case IB_CM_LISTEN:
-		cm_id->state = IB_CM_IDLE;
 		spin_unlock_irq(&cm_id_priv->lock);
+
 		spin_lock_irq(&cm.lock);
+		if (--cm_id_priv->listen_sharecount > 0) {
+			/* The id is still shared. */
+			atomic_dec(&cm_id_priv->refcount);
+			spin_unlock_irq(&cm.lock);
+			return;
+		}
 		rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
 		spin_unlock_irq(&cm.lock);
+
+		spin_lock_irq(&cm_id_priv->lock);
+		cm_id->state = IB_CM_IDLE;
+		spin_unlock_irq(&cm_id_priv->lock);
 		break;
 	case IB_CM_SIDR_REQ_SENT:
 		cm_id->state = IB_CM_IDLE;
@@ -929,11 +943,32 @@  void ib_destroy_cm_id(struct ib_cm_id *cm_id)
 }
 EXPORT_SYMBOL(ib_destroy_cm_id);
 
-int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
-		 struct ib_cm_compare_data *compare_data)
+/**
+ * __ib_cm_listen - Initiates listening on the specified service ID for
+ *   connection and service ID resolution requests.
+ * @cm_id: Connection identifier associated with the listen request.
+ * @service_id: Service identifier matched against incoming connection
+ *   and service ID resolution requests.  The service ID should be specified
+ *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
+ *   assign a service ID to the caller.
+ * @service_mask: Mask applied to service ID used to listen across a
+ *   range of service IDs.  If set to 0, the service ID is matched
+ *   exactly.  This parameter is ignored if %service_id is set to
+ *   IB_CM_ASSIGN_SERVICE_ID.
+ * @compare_data: This parameter is optional.  It specifies data that must
+ *   appear in the private data of a connection request for the specified
+ *   listen request.
+ * @lock: If set, lock the cm.lock spin-lock when adding the id to the
+ *   listener tree. When false, the caller must already hold the spin-lock,
+ *   and compare_data must be NULL.
+ */
+static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
+			  __be64 service_mask,
+			  struct ib_cm_compare_data *compare_data,
+			  bool lock)
 {
 	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
-	unsigned long flags;
+	unsigned long flags = 0;
 	int ret = 0;
 
 	service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
@@ -959,7 +994,8 @@  int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
 
 	cm_id->state = IB_CM_LISTEN;
 
-	spin_lock_irqsave(&cm.lock, flags);
+	if (lock)
+		spin_lock_irqsave(&cm.lock, flags);
 	if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
 		cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
 		cm_id->service_mask = ~cpu_to_be64(0);
@@ -968,7 +1004,8 @@  int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
 		cm_id->service_mask = service_mask;
 	}
 	cur_cm_id_priv = cm_insert_listen(cm_id_priv);
-	spin_unlock_irqrestore(&cm.lock, flags);
+	if (lock)
+		spin_unlock_irqrestore(&cm.lock, flags);
 
 	if (cur_cm_id_priv) {
 		cm_id->state = IB_CM_IDLE;
@@ -978,8 +1015,86 @@  int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
 	}
 	return ret;
 }
+
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
+		 struct ib_cm_compare_data *compare_data)
+{
+	return __ib_cm_listen(cm_id, service_id, service_mask, compare_data,
+			      true);
+}
 EXPORT_SYMBOL(ib_cm_listen);
 
+/**
+ * Create a new listening ib_cm_id and listen on the given service ID.
+ *
+ * If there's an existing ID listening on that same device and service ID,
+ * return it.
+ *
+ * @device: Device associated with the cm_id.  All related communication will
+ * be associated with the specified device.
+ * @cm_handler: Callback invoked to notify the user of CM events.
+ * @service_id: Service identifier matched against incoming connection
+ *   and service ID resolution requests.  The service ID should be specified
+ *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
+ *   assign a service ID to the caller.
+ * @service_mask: Mask applied to service ID used to listen across a
+ *   range of service IDs.  If set to 0, the service ID is matched
+ *   exactly.  This parameter is ignored if %service_id is set to
+ *   IB_CM_ASSIGN_SERVICE_ID.
+ *
+ * Callers should call ib_destroy_cm_id when done with the listener ID.
+ */
+struct ib_cm_id *ib_cm_id_create_and_listen(
+		struct ib_device *device,
+		ib_cm_handler cm_handler,
+		__be64 service_id,
+		__be64 service_mask)
+{
+	struct cm_id_private *cm_id_priv;
+	struct ib_cm_id *cm_id;
+	unsigned long flags;
+	int err = 0;
+
+	/* Create an ID in advance, since the creation may sleep */
+	cm_id = ib_create_cm_id(device, cm_handler, NULL);
+	if (IS_ERR(cm_id))
+		return cm_id;
+
+	spin_lock_irqsave(&cm.lock, flags);
+
+	if (service_id == IB_CM_ASSIGN_SERVICE_ID)
+		goto new_id;
+
+	/* Find an existing ID */
+	cm_id_priv = cm_find_listen(device, service_id, NULL);
+	if (cm_id_priv) {
+		atomic_inc(&cm_id_priv->refcount);
+		++cm_id_priv->listen_sharecount;
+		spin_unlock_irqrestore(&cm.lock, flags);
+
+		ib_destroy_cm_id(cm_id);
+		cm_id = &cm_id_priv->id;
+		if (cm_id->cm_handler != cm_handler || cm_id->context)
+			/* Sharing an ib_cm_id with different handlers is not
+			 * supported */
+			return ERR_PTR(-EINVAL);
+		return cm_id;
+	}
+
+new_id:
+	/* Use newly created ID */
+	err = __ib_cm_listen(cm_id, service_id, service_mask, NULL, false);
+
+	spin_unlock_irqrestore(&cm.lock, flags);
+
+	if (err) {
+		ib_destroy_cm_id(cm_id);
+		return ERR_PTR(err);
+	}
+	return cm_id;
+}
+EXPORT_SYMBOL(ib_cm_id_create_and_listen);
+
 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
 			  enum cm_msg_sequence msg_seq)
 {
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 3a5d70d79790..0dc2ff983198 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -364,6 +364,10 @@  struct ib_cm_compare_data {
 int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
 		 struct ib_cm_compare_data *compare_data);
 
+struct ib_cm_id *ib_cm_id_create_and_listen(
+		struct ib_device *device, ib_cm_handler cm_handler,
+		__be64 service_id, __be64 service_mask);
+
 struct ib_cm_req_param {
 	struct ib_sa_path_rec	*primary_path;
 	struct ib_sa_path_rec	*alternate_path;