diff mbox series

[rdma-next,v1] IB/core: Implement a limit on UMAD receive List

Message ID e2262f827f43518e5e3a4d825a3e0514c0f7aa5f.1712668708.git.leonro@nvidia.com (mailing list archive)
State Superseded
Headers show
Series [rdma-next,v1] IB/core: Implement a limit on UMAD receive List | expand

Commit Message

Leon Romanovsky April 9, 2024, 1:26 p.m. UTC
From: Michael Guralnik <michaelgur@nvidia.com>

The existing behavior of ib_umad, which maintains received MAD
packets in an unbounded list, poses a risk of uncontrolled growth.
As user-space applications extract packets from this list, the rate
of extraction may not match the rate of incoming packets, leading
to potential list overflow.

To address this, we introduce a limit to the size of the list. After
considering typical scenarios, such as OpenSM processing, which can
handle approximately 100k packets per second, and the 1-second retry
timeout for most packets, we set the list size limit to 200k. Packets
received beyond this limit are dropped, assuming they are likely timed
out by the time they are handled by user-space.

Notably, packets queued on the receive list due to reasons like
timed-out sends are preserved even when the list is full.

Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
Changelog:
v1:
 * Changed sysfs entry to hard coded value.
 * Rewrote the commit message.
v0: https://lore.kernel.org/all/70029b5f256fbad6efbb98458deb9c46baa2c4b3.1712051390.git.leon@kernel.org
---
 drivers/infiniband/core/user_mad.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

Comments

Mark Zhang April 9, 2024, 1:50 p.m. UTC | #1
On 4/9/2024 9:26 PM, Leon Romanovsky wrote:
> External email: Use caution opening links or attachments
> 
> 
> From: Michael Guralnik <michaelgur@nvidia.com>
> 
> The existing behavior of ib_umad, which maintains received MAD
> packets in an unbounded list, poses a risk of uncontrolled growth.
> As user-space applications extract packets from this list, the rate
> of extraction may not match the rate of incoming packets, leading
> to potential list overflow.
> 
> To address this, we introduce a limit to the size of the list. After
> considering typical scenarios, such as OpenSM processing, which can
> handle approximately 100k packets per second, and the 1-second retry
> timeout for most packets, we set the list size limit to 200k. Packets
> received beyond this limit are dropped, assuming they are likely timed
> out by the time they are handled by user-space.
> 
> Notably, packets queued on the receive list due to reasons like
> timed-out sends are preserved even when the list is full.
> 
> Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> ---
> Changelog:
> v1:
>   * Changed sysfs entry to hard coded value.
>   * Rewrote the commit message.
> v0: https://lore.kernel.org/all/70029b5f256fbad6efbb98458deb9c46baa2c4b3.1712051390.git.leon@kernel.org
> ---
>   drivers/infiniband/core/user_mad.c | 16 +++++++++++++---
>   1 file changed, 13 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
> index f5feca7fa9b9..40756b573017 100644
> --- a/drivers/infiniband/core/user_mad.c
> +++ b/drivers/infiniband/core/user_mad.c
> @@ -63,6 +63,8 @@ MODULE_AUTHOR("Roland Dreier");
>   MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
>   MODULE_LICENSE("Dual BSD/GPL");
> 
> +#define MAX_UMAD_RECV_LIST_SIZE 200000
> +
>   enum {
>          IB_UMAD_MAX_PORTS  = RDMA_MAX_PORTS,
>          IB_UMAD_MAX_AGENTS = 32,
> @@ -113,6 +115,7 @@ struct ib_umad_file {
>          struct mutex            mutex;
>          struct ib_umad_port    *port;
>          struct list_head        recv_list;
> +       atomic_t                recv_list_size;
>          struct list_head        send_list;
>          struct list_head        port_list;
>          spinlock_t              send_lock;
> @@ -182,7 +185,8 @@ static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
> 
>   static int queue_packet(struct ib_umad_file *file,
>                          struct ib_mad_agent *agent,
> -                       struct ib_umad_packet *packet)
> +                       struct ib_umad_packet *packet,
> +                       bool is_send_mad)
>   {
>          int ret = 1;
> 
> @@ -192,7 +196,11 @@ static int queue_packet(struct ib_umad_file *file,
>               packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
>               packet->mad.hdr.id++)
>                  if (agent == __get_agent(file, packet->mad.hdr.id)) {
> +                       if (is_send_mad || atomic_read(&file->recv_list_size) >
> +                                                  MAX_UMAD_RECV_LIST_SIZE)

Should it be:

if (!is_send_mad &&
      atomic_read(&file->recv_list_size) > MAX_UMAD_RECV_LIST_SIZE))

Or maybe:

if (is_recv_mad &&
      atomic_read(&file->recv_list_size) > MAX_UMAD_RECV_LIST_SIZE))

> +                               break;
>                          list_add_tail(&packet->list, &file->recv_list);
> +                       atomic_inc(&file->recv_list_size);
>                          wake_up_interruptible(&file->recv_wait);
>                          ret = 0;
>                          break;
> @@ -224,7 +232,7 @@ static void send_handler(struct ib_mad_agent *agent,
>          if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
>                  packet->length = IB_MGMT_MAD_HDR;
>                  packet->mad.hdr.status = ETIMEDOUT;
> -               if (!queue_packet(file, agent, packet))
> +               if (!queue_packet(file, agent, packet, true))
>                          return;
>          }
>          kfree(packet);
> @@ -284,7 +292,7 @@ static void recv_handler(struct ib_mad_agent *agent,
>                  rdma_destroy_ah_attr(&ah_attr);
>          }
> 
> -       if (queue_packet(file, agent, packet))
> +       if (queue_packet(file, agent, packet, false))
>                  goto err2;
>          return;
> 
> @@ -409,6 +417,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
> 
>          packet = list_entry(file->recv_list.next, struct ib_umad_packet, list);
>          list_del(&packet->list);
> +       atomic_dec(&file->recv_list_size);
> 
>          mutex_unlock(&file->mutex);
> 
> @@ -421,6 +430,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
>                  /* Requeue packet */
>                  mutex_lock(&file->mutex);
>                  list_add(&packet->list, &file->recv_list);
> +               atomic_inc(&file->recv_list_size);
>                  mutex_unlock(&file->mutex);
>          } else {
>                  if (packet->recv_wc)
> --
> 2.44.0
> 
>
Leon Romanovsky April 11, 2024, 10:22 a.m. UTC | #2
On Tue, Apr 09, 2024 at 09:50:57PM +0800, Mark Zhang wrote:
> On 4/9/2024 9:26 PM, Leon Romanovsky wrote:
> > External email: Use caution opening links or attachments
> > 
> > 
> > From: Michael Guralnik <michaelgur@nvidia.com>
> > 
> > The existing behavior of ib_umad, which maintains received MAD
> > packets in an unbounded list, poses a risk of uncontrolled growth.
> > As user-space applications extract packets from this list, the rate
> > of extraction may not match the rate of incoming packets, leading
> > to potential list overflow.
> > 
> > To address this, we introduce a limit to the size of the list. After
> > considering typical scenarios, such as OpenSM processing, which can
> > handle approximately 100k packets per second, and the 1-second retry
> > timeout for most packets, we set the list size limit to 200k. Packets
> > received beyond this limit are dropped, assuming they are likely timed
> > out by the time they are handled by user-space.
> > 
> > Notably, packets queued on the receive list due to reasons like
> > timed-out sends are preserved even when the list is full.
> > 
> > Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
> > Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> > ---
> > Changelog:
> > v1:
> >   * Changed sysfs entry to hard coded value.
> >   * Rewrote the commit message.
> > v0: https://lore.kernel.org/all/70029b5f256fbad6efbb98458deb9c46baa2c4b3.1712051390.git.leon@kernel.org
> > ---
> >   drivers/infiniband/core/user_mad.c | 16 +++++++++++++---
> >   1 file changed, 13 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
> > index f5feca7fa9b9..40756b573017 100644
> > --- a/drivers/infiniband/core/user_mad.c
> > +++ b/drivers/infiniband/core/user_mad.c
> > @@ -63,6 +63,8 @@ MODULE_AUTHOR("Roland Dreier");
> >   MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
> >   MODULE_LICENSE("Dual BSD/GPL");
> > 
> > +#define MAX_UMAD_RECV_LIST_SIZE 200000
> > +
> >   enum {
> >          IB_UMAD_MAX_PORTS  = RDMA_MAX_PORTS,
> >          IB_UMAD_MAX_AGENTS = 32,
> > @@ -113,6 +115,7 @@ struct ib_umad_file {
> >          struct mutex            mutex;
> >          struct ib_umad_port    *port;
> >          struct list_head        recv_list;
> > +       atomic_t                recv_list_size;
> >          struct list_head        send_list;
> >          struct list_head        port_list;
> >          spinlock_t              send_lock;
> > @@ -182,7 +185,8 @@ static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
> > 
> >   static int queue_packet(struct ib_umad_file *file,
> >                          struct ib_mad_agent *agent,
> > -                       struct ib_umad_packet *packet)
> > +                       struct ib_umad_packet *packet,
> > +                       bool is_send_mad)
> >   {
> >          int ret = 1;
> > 
> > @@ -192,7 +196,11 @@ static int queue_packet(struct ib_umad_file *file,
> >               packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
> >               packet->mad.hdr.id++)
> >                  if (agent == __get_agent(file, packet->mad.hdr.id)) {
> > +                       if (is_send_mad || atomic_read(&file->recv_list_size) >
> > +                                                  MAX_UMAD_RECV_LIST_SIZE)
> 
> Should it be:
> 
> if (!is_send_mad &&
>      atomic_read(&file->recv_list_size) > MAX_UMAD_RECV_LIST_SIZE))
> 
> Or maybe:
> 
> if (is_recv_mad &&
>      atomic_read(&file->recv_list_size) > MAX_UMAD_RECV_LIST_SIZE))

Of course you are right, I will fix it in the next version.

Thanks

> 
> > +                               break;
> >                          list_add_tail(&packet->list, &file->recv_list);
> > +                       atomic_inc(&file->recv_list_size);
> >                          wake_up_interruptible(&file->recv_wait);
> >                          ret = 0;
> >                          break;
> > @@ -224,7 +232,7 @@ static void send_handler(struct ib_mad_agent *agent,
> >          if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
> >                  packet->length = IB_MGMT_MAD_HDR;
> >                  packet->mad.hdr.status = ETIMEDOUT;
> > -               if (!queue_packet(file, agent, packet))
> > +               if (!queue_packet(file, agent, packet, true))
> >                          return;
> >          }
> >          kfree(packet);
> > @@ -284,7 +292,7 @@ static void recv_handler(struct ib_mad_agent *agent,
> >                  rdma_destroy_ah_attr(&ah_attr);
> >          }
> > 
> > -       if (queue_packet(file, agent, packet))
> > +       if (queue_packet(file, agent, packet, false))
> >                  goto err2;
> >          return;
> > 
> > @@ -409,6 +417,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
> > 
> >          packet = list_entry(file->recv_list.next, struct ib_umad_packet, list);
> >          list_del(&packet->list);
> > +       atomic_dec(&file->recv_list_size);
> > 
> >          mutex_unlock(&file->mutex);
> > 
> > @@ -421,6 +430,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
> >                  /* Requeue packet */
> >                  mutex_lock(&file->mutex);
> >                  list_add(&packet->list, &file->recv_list);
> > +               atomic_inc(&file->recv_list_size);
> >                  mutex_unlock(&file->mutex);
> >          } else {
> >                  if (packet->recv_wc)
> > --
> > 2.44.0
> > 
> > 
>
diff mbox series

Patch

diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index f5feca7fa9b9..40756b573017 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -63,6 +63,8 @@  MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
 MODULE_LICENSE("Dual BSD/GPL");
 
+#define MAX_UMAD_RECV_LIST_SIZE 200000
+
 enum {
 	IB_UMAD_MAX_PORTS  = RDMA_MAX_PORTS,
 	IB_UMAD_MAX_AGENTS = 32,
@@ -113,6 +115,7 @@  struct ib_umad_file {
 	struct mutex		mutex;
 	struct ib_umad_port    *port;
 	struct list_head	recv_list;
+	atomic_t		recv_list_size;
 	struct list_head	send_list;
 	struct list_head	port_list;
 	spinlock_t		send_lock;
@@ -182,7 +185,8 @@  static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
 
 static int queue_packet(struct ib_umad_file *file,
 			struct ib_mad_agent *agent,
-			struct ib_umad_packet *packet)
+			struct ib_umad_packet *packet,
+			bool is_send_mad)
 {
 	int ret = 1;
 
@@ -192,7 +196,11 @@  static int queue_packet(struct ib_umad_file *file,
 	     packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
 	     packet->mad.hdr.id++)
 		if (agent == __get_agent(file, packet->mad.hdr.id)) {
+			if (is_send_mad || atomic_read(&file->recv_list_size) >
+						   MAX_UMAD_RECV_LIST_SIZE)
+				break;
 			list_add_tail(&packet->list, &file->recv_list);
+			atomic_inc(&file->recv_list_size);
 			wake_up_interruptible(&file->recv_wait);
 			ret = 0;
 			break;
@@ -224,7 +232,7 @@  static void send_handler(struct ib_mad_agent *agent,
 	if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
 		packet->length = IB_MGMT_MAD_HDR;
 		packet->mad.hdr.status = ETIMEDOUT;
-		if (!queue_packet(file, agent, packet))
+		if (!queue_packet(file, agent, packet, true))
 			return;
 	}
 	kfree(packet);
@@ -284,7 +292,7 @@  static void recv_handler(struct ib_mad_agent *agent,
 		rdma_destroy_ah_attr(&ah_attr);
 	}
 
-	if (queue_packet(file, agent, packet))
+	if (queue_packet(file, agent, packet, false))
 		goto err2;
 	return;
 
@@ -409,6 +417,7 @@  static ssize_t ib_umad_read(struct file *filp, char __user *buf,
 
 	packet = list_entry(file->recv_list.next, struct ib_umad_packet, list);
 	list_del(&packet->list);
+	atomic_dec(&file->recv_list_size);
 
 	mutex_unlock(&file->mutex);
 
@@ -421,6 +430,7 @@  static ssize_t ib_umad_read(struct file *filp, char __user *buf,
 		/* Requeue packet */
 		mutex_lock(&file->mutex);
 		list_add(&packet->list, &file->recv_list);
+		atomic_inc(&file->recv_list_size);
 		mutex_unlock(&file->mutex);
 	} else {
 		if (packet->recv_wc)