diff mbox

[net,v3] xen-netback: fix race condition on XenBus disconnect

Message ID 1489088497-9027-1-git-send-email-igor.druzhinin@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Igor Druzhinin March 9, 2017, 7:41 p.m. UTC
In some cases during XenBus disconnect event handling and subsequent
queue resource release there may be some TX handlers active on
other processors. Use RCU in order to synchronize with them.

Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
---
v3:
 * Fix unintended semantic change in xenvif_get_ethtool_stats
 * Dropped extra code

v2:
 * Add protection for xenvif_get_ethtool_stats
 * Additional comments and fixes
---
 drivers/net/xen-netback/interface.c | 26 +++++++++++++++++---------
 drivers/net/xen-netback/netback.c   |  2 +-
 drivers/net/xen-netback/xenbus.c    | 20 ++++++++++----------
 3 files changed, 28 insertions(+), 20 deletions(-)

Comments

Paul Durrant March 9, 2017, 8:28 p.m. UTC | #1
> -----Original Message-----
> From: Igor Druzhinin [mailto:igor.druzhinin@citrix.com]
> Sent: 09 March 2017 19:42
> To: netdev@vger.kernel.org; xen-devel@lists.xenproject.org
> Cc: Paul Durrant <Paul.Durrant@citrix.com>; jgross@suse.com; Wei Liu
> <wei.liu2@citrix.com>; Igor Druzhinin <igor.druzhinin@citrix.com>
> Subject: [PATCH net v3] xen-netback: fix race condition on XenBus
> disconnect
> 
> In some cases during XenBus disconnect event handling and subsequent
> queue resource release there may be some TX handlers active on
> other processors. Use RCU in order to synchronize with them.
> 
> Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>

Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

> ---
> v3:
>  * Fix unintended semantic change in xenvif_get_ethtool_stats
>  * Dropped extra code
> 
> v2:
>  * Add protection for xenvif_get_ethtool_stats
>  * Additional comments and fixes
> ---
>  drivers/net/xen-netback/interface.c | 26 +++++++++++++++++---------
>  drivers/net/xen-netback/netback.c   |  2 +-
>  drivers/net/xen-netback/xenbus.c    | 20 ++++++++++----------
>  3 files changed, 28 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-
> netback/interface.c
> index 829b26c..a3c018e 100644
> --- a/drivers/net/xen-netback/interface.c
> +++ b/drivers/net/xen-netback/interface.c
> @@ -165,13 +165,17 @@ static int xenvif_start_xmit(struct sk_buff *skb,
> struct net_device *dev)
>  {
>  	struct xenvif *vif = netdev_priv(dev);
>  	struct xenvif_queue *queue = NULL;
> -	unsigned int num_queues = vif->num_queues;
> +	unsigned int num_queues;
>  	u16 index;
>  	struct xenvif_rx_cb *cb;
> 
>  	BUG_ON(skb->dev != dev);
> 
> -	/* Drop the packet if queues are not set up */
> +	/* Drop the packet if queues are not set up.
> +	 * This handler should be called inside an RCU read section
> +	 * so we don't need to enter it here explicitly.
> +	 */
> +	num_queues = rcu_dereference(vif)->num_queues;
>  	if (num_queues < 1)
>  		goto drop;
> 
> @@ -222,18 +226,18 @@ static struct net_device_stats
> *xenvif_get_stats(struct net_device *dev)
>  {
>  	struct xenvif *vif = netdev_priv(dev);
>  	struct xenvif_queue *queue = NULL;
> +	unsigned int num_queues;
>  	u64 rx_bytes = 0;
>  	u64 rx_packets = 0;
>  	u64 tx_bytes = 0;
>  	u64 tx_packets = 0;
>  	unsigned int index;
> 
> -	spin_lock(&vif->lock);
> -	if (vif->queues == NULL)
> -		goto out;
> +	rcu_read_lock();
> +	num_queues = rcu_dereference(vif)->num_queues;
> 
>  	/* Aggregate tx and rx stats from each queue */
> -	for (index = 0; index < vif->num_queues; ++index) {
> +	for (index = 0; index < num_queues; ++index) {
>  		queue = &vif->queues[index];
>  		rx_bytes += queue->stats.rx_bytes;
>  		rx_packets += queue->stats.rx_packets;
> @@ -241,8 +245,7 @@ static struct net_device_stats
> *xenvif_get_stats(struct net_device *dev)
>  		tx_packets += queue->stats.tx_packets;
>  	}
> 
> -out:
> -	spin_unlock(&vif->lock);
> +	rcu_read_unlock();
> 
>  	vif->dev->stats.rx_bytes = rx_bytes;
>  	vif->dev->stats.rx_packets = rx_packets;
> @@ -378,10 +381,13 @@ static void xenvif_get_ethtool_stats(struct
> net_device *dev,
>  				     struct ethtool_stats *stats, u64 * data)
>  {
>  	struct xenvif *vif = netdev_priv(dev);
> -	unsigned int num_queues = vif->num_queues;
> +	unsigned int num_queues;
>  	int i;
>  	unsigned int queue_index;
> 
> +	rcu_read_lock();
> +	num_queues = rcu_dereference(vif)->num_queues;
> +
>  	for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) {
>  		unsigned long accum = 0;
>  		for (queue_index = 0; queue_index < num_queues;
> ++queue_index) {
> @@ -390,6 +396,8 @@ static void xenvif_get_ethtool_stats(struct
> net_device *dev,
>  		}
>  		data[i] = accum;
>  	}
> +
> +	rcu_read_unlock();
>  }
> 
>  static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 *
> data)
> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-
> netback/netback.c
> index f9bcf4a..602d408 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -214,7 +214,7 @@ static void xenvif_fatal_tx_err(struct xenvif *vif)
>  	netdev_err(vif->dev, "fatal error; disabling device\n");
>  	vif->disabled = true;
>  	/* Disable the vif from queue 0's kthread */
> -	if (vif->queues)
> +	if (vif->num_queues)
>  		xenvif_kick_thread(&vif->queues[0]);
>  }
> 
> diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-
> netback/xenbus.c
> index d2d7cd9..a56d3ea 100644
> --- a/drivers/net/xen-netback/xenbus.c
> +++ b/drivers/net/xen-netback/xenbus.c
> @@ -495,26 +495,26 @@ static void backend_disconnect(struct
> backend_info *be)
>  	struct xenvif *vif = be->vif;
> 
>  	if (vif) {
> +		unsigned int num_queues = vif->num_queues;
>  		unsigned int queue_index;
> -		struct xenvif_queue *queues;
> 
>  		xen_unregister_watchers(vif);
>  #ifdef CONFIG_DEBUG_FS
>  		xenvif_debugfs_delif(vif);
>  #endif /* CONFIG_DEBUG_FS */
>  		xenvif_disconnect_data(vif);
> -		for (queue_index = 0;
> -		     queue_index < vif->num_queues;
> -		     ++queue_index)
> -			xenvif_deinit_queue(&vif->queues[queue_index]);
> 
> -		spin_lock(&vif->lock);
> -		queues = vif->queues;
> +		/* At this point some of the handlers may still be active
> +		 * so we need to have additional synchronization here.
> +		 */
>  		vif->num_queues = 0;
> -		vif->queues = NULL;
> -		spin_unlock(&vif->lock);
> +		synchronize_net();
> 
> -		vfree(queues);
> +		for (queue_index = 0; queue_index < num_queues;
> ++queue_index)
> +			xenvif_deinit_queue(&vif->queues[queue_index]);
> +
> +		vfree(vif->queues);
> +		vif->queues = NULL;
> 
>  		xenvif_disconnect_ctrl(vif);
>  	}
> --
> 1.8.3.1
kernel test robot March 9, 2017, 11:36 p.m. UTC | #2
Hi Igor,

[auto build test WARNING on net/master]

url:    https://github.com/0day-ci/linux/commits/Igor-Druzhinin/xen-netback-fix-race-condition-on-XenBus-disconnect/20170310-055447
reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

   include/linux/compiler.h:264:8: sparse: attribute 'no_sanitize_address': unknown attribute
>> drivers/net/xen-netback/interface.c:178:22: sparse: incompatible types in comparison expression (different address spaces)
   drivers/net/xen-netback/interface.c:237:22: sparse: incompatible types in comparison expression (different address spaces)
   drivers/net/xen-netback/interface.c:389:22: sparse: incompatible types in comparison expression (different address spaces)

vim +178 drivers/net/xen-netback/interface.c

   162	}
   163	
   164	static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
   165	{
   166		struct xenvif *vif = netdev_priv(dev);
   167		struct xenvif_queue *queue = NULL;
   168		unsigned int num_queues;
   169		u16 index;
   170		struct xenvif_rx_cb *cb;
   171	
   172		BUG_ON(skb->dev != dev);
   173	
   174		/* Drop the packet if queues are not set up.
   175		 * This handler should be called inside an RCU read section
   176		 * so we don't need to enter it here explicitly.
   177		 */
 > 178		num_queues = rcu_dereference(vif)->num_queues;
   179		if (num_queues < 1)
   180			goto drop;
   181	
   182		/* Obtain the queue to be used to transmit this packet */
   183		index = skb_get_queue_mapping(skb);
   184		if (index >= num_queues) {
   185			pr_warn_ratelimited("Invalid queue %hu for packet on interface %s\n.",
   186					    index, vif->dev->name);

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 829b26c..a3c018e 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -165,13 +165,17 @@  static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct xenvif *vif = netdev_priv(dev);
 	struct xenvif_queue *queue = NULL;
-	unsigned int num_queues = vif->num_queues;
+	unsigned int num_queues;
 	u16 index;
 	struct xenvif_rx_cb *cb;
 
 	BUG_ON(skb->dev != dev);
 
-	/* Drop the packet if queues are not set up */
+	/* Drop the packet if queues are not set up.
+	 * This handler should be called inside an RCU read section
+	 * so we don't need to enter it here explicitly.
+	 */
+	num_queues = rcu_dereference(vif)->num_queues;
 	if (num_queues < 1)
 		goto drop;
 
@@ -222,18 +226,18 @@  static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
 {
 	struct xenvif *vif = netdev_priv(dev);
 	struct xenvif_queue *queue = NULL;
+	unsigned int num_queues;
 	u64 rx_bytes = 0;
 	u64 rx_packets = 0;
 	u64 tx_bytes = 0;
 	u64 tx_packets = 0;
 	unsigned int index;
 
-	spin_lock(&vif->lock);
-	if (vif->queues == NULL)
-		goto out;
+	rcu_read_lock();
+	num_queues = rcu_dereference(vif)->num_queues;
 
 	/* Aggregate tx and rx stats from each queue */
-	for (index = 0; index < vif->num_queues; ++index) {
+	for (index = 0; index < num_queues; ++index) {
 		queue = &vif->queues[index];
 		rx_bytes += queue->stats.rx_bytes;
 		rx_packets += queue->stats.rx_packets;
@@ -241,8 +245,7 @@  static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
 		tx_packets += queue->stats.tx_packets;
 	}
 
-out:
-	spin_unlock(&vif->lock);
+	rcu_read_unlock();
 
 	vif->dev->stats.rx_bytes = rx_bytes;
 	vif->dev->stats.rx_packets = rx_packets;
@@ -378,10 +381,13 @@  static void xenvif_get_ethtool_stats(struct net_device *dev,
 				     struct ethtool_stats *stats, u64 * data)
 {
 	struct xenvif *vif = netdev_priv(dev);
-	unsigned int num_queues = vif->num_queues;
+	unsigned int num_queues;
 	int i;
 	unsigned int queue_index;
 
+	rcu_read_lock();
+	num_queues = rcu_dereference(vif)->num_queues;
+
 	for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) {
 		unsigned long accum = 0;
 		for (queue_index = 0; queue_index < num_queues; ++queue_index) {
@@ -390,6 +396,8 @@  static void xenvif_get_ethtool_stats(struct net_device *dev,
 		}
 		data[i] = accum;
 	}
+
+	rcu_read_unlock();
 }
 
 static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index f9bcf4a..602d408 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -214,7 +214,7 @@  static void xenvif_fatal_tx_err(struct xenvif *vif)
 	netdev_err(vif->dev, "fatal error; disabling device\n");
 	vif->disabled = true;
 	/* Disable the vif from queue 0's kthread */
-	if (vif->queues)
+	if (vif->num_queues)
 		xenvif_kick_thread(&vif->queues[0]);
 }
 
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index d2d7cd9..a56d3ea 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -495,26 +495,26 @@  static void backend_disconnect(struct backend_info *be)
 	struct xenvif *vif = be->vif;
 
 	if (vif) {
+		unsigned int num_queues = vif->num_queues;
 		unsigned int queue_index;
-		struct xenvif_queue *queues;
 
 		xen_unregister_watchers(vif);
 #ifdef CONFIG_DEBUG_FS
 		xenvif_debugfs_delif(vif);
 #endif /* CONFIG_DEBUG_FS */
 		xenvif_disconnect_data(vif);
-		for (queue_index = 0;
-		     queue_index < vif->num_queues;
-		     ++queue_index)
-			xenvif_deinit_queue(&vif->queues[queue_index]);
 
-		spin_lock(&vif->lock);
-		queues = vif->queues;
+		/* At this point some of the handlers may still be active
+		 * so we need to have additional synchronization here.
+		 */
 		vif->num_queues = 0;
-		vif->queues = NULL;
-		spin_unlock(&vif->lock);
+		synchronize_net();
 
-		vfree(queues);
+		for (queue_index = 0; queue_index < num_queues; ++queue_index)
+			xenvif_deinit_queue(&vif->queues[queue_index]);
+
+		vfree(vif->queues);
+		vif->queues = NULL;
 
 		xenvif_disconnect_ctrl(vif);
 	}