diff mbox series

[net-next,v8,1/2] vxlan: Add nolocalbypass option to vxlan.

Message ID 20230511032210.9146-1-vladimir@nikishkin.pw (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net-next,v8,1/2] vxlan: Add nolocalbypass option to vxlan. | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4193 this patch: 4193
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 921 this patch: 921
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4414 this patch: 4414
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Vladimir Nikishkin May 11, 2023, 3:22 a.m. UTC
If a packet needs to be encapsulated towards a local destination IP,
the packet will be injected into the Rx path as if it was received by
the target VXLAN device without undergoing encapsulation. If such a
device does not exist, the packet will be dropped.

There are scenarios where we do not want to drop such packets and
instead want to let them be encapsulated and locally received by a user
space program that post-processes these VXLAN packets.

To that end, add a new VXLAN device attribute that controls whether such
packets are dropped or not. When set ("localbypass") packets are
dropped and when unset ("nolocalbypass") the packets are encapsulated
and locally delivered to the listening user space application. Default
to "localbypass" to maintain existing behavior.

Signed-off-by: Vladimir Nikishkin <vladimir@nikishkin.pw>
---
 drivers/net/vxlan/vxlan_core.c | 21 +++++++++++++++++++--
 include/net/vxlan.h            |  4 +++-
 include/uapi/linux/if_link.h   |  1 +
 3 files changed, 23 insertions(+), 3 deletions(-)

Comments

Vladimir Nikishkin May 11, 2023, 3:38 a.m. UTC | #1
Do I need to re-submit the patch to ifconfig separately, or the old one
was good enough?
Vladimir Nikishkin <vladimir@nikishkin.pw> writes:

> If a packet needs to be encapsulated towards a local destination IP,
> the packet will be injected into the Rx path as if it was received by
> the target VXLAN device without undergoing encapsulation. If such a
> device does not exist, the packet will be dropped.
>
> There are scenarios where we do not want to drop such packets and
> instead want to let them be encapsulated and locally received by a user
> space program that post-processes these VXLAN packets.
>
> To that end, add a new VXLAN device attribute that controls whether such
> packets are dropped or not. When set ("localbypass") packets are
> dropped and when unset ("nolocalbypass") the packets are encapsulated
> and locally delivered to the listening user space application. Default
> to "localbypass" to maintain existing behavior.
>
> Signed-off-by: Vladimir Nikishkin <vladimir@nikishkin.pw>
> ---
>  drivers/net/vxlan/vxlan_core.c | 21 +++++++++++++++++++--
>  include/net/vxlan.h            |  4 +++-
>  include/uapi/linux/if_link.h   |  1 +
>  3 files changed, 23 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
> index 561fe1b314f5..78744549c1b3 100644
> --- a/drivers/net/vxlan/vxlan_core.c
> +++ b/drivers/net/vxlan/vxlan_core.c
> @@ -2352,7 +2352,8 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
>  #endif
>  	/* Bypass encapsulation if the destination is local */
>  	if (rt_flags & RTCF_LOCAL &&
> -	    !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
> +	    !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
> +	    vxlan->cfg.flags & VXLAN_F_LOCALBYPASS) {
>  		struct vxlan_dev *dst_vxlan;
>  
>  		dst_release(dst);
> @@ -3172,6 +3173,7 @@ static void vxlan_raw_setup(struct net_device *dev)
>  }
>  
>  static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
> +	[IFLA_VXLAN_UNSPEC]     = { .strict_start_type = IFLA_VXLAN_LOCALBYPASS },
>  	[IFLA_VXLAN_ID]		= { .type = NLA_U32 },
>  	[IFLA_VXLAN_GROUP]	= { .len = sizeof_field(struct iphdr, daddr) },
>  	[IFLA_VXLAN_GROUP6]	= { .len = sizeof(struct in6_addr) },
> @@ -3202,6 +3204,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
>  	[IFLA_VXLAN_TTL_INHERIT]	= { .type = NLA_FLAG },
>  	[IFLA_VXLAN_DF]		= { .type = NLA_U8 },
>  	[IFLA_VXLAN_VNIFILTER]	= { .type = NLA_U8 },
> +	[IFLA_VXLAN_LOCALBYPASS]	= NLA_POLICY_MAX(NLA_U8, 1),
>  };
>  
>  static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
> @@ -4011,6 +4014,17 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
>  			conf->flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
>  	}
>  
> +	if (data[IFLA_VXLAN_LOCALBYPASS]) {
> +		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LOCALBYPASS,
> +				    VXLAN_F_LOCALBYPASS, changelink,
> +				    true, extack);
> +		if (err)
> +			return err;
> +	} else if (!changelink) {
> +		/* default to local bypass on a new device */
> +		conf->flags |= VXLAN_F_LOCALBYPASS;
> +	}
> +
>  	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]) {
>  		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
>  				    VXLAN_F_UDP_ZERO_CSUM6_TX, changelink,
> @@ -4232,6 +4246,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
>  		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
>  		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
>  		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
> +		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LOCALBYPASS */
>  		0;
>  }
>  
> @@ -4308,7 +4323,9 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
>  	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
>  		       !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
>  	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
> -		       !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
> +		       !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)) ||
> +	    nla_put_u8(skb, IFLA_VXLAN_LOCALBYPASS,
> +		       !!(vxlan->cfg.flags & VXLAN_F_LOCALBYPASS)))
>  		goto nla_put_failure;
>  
>  	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
> diff --git a/include/net/vxlan.h b/include/net/vxlan.h
> index 20bd7d893e10..0be91ca78d3a 100644
> --- a/include/net/vxlan.h
> +++ b/include/net/vxlan.h
> @@ -328,6 +328,7 @@ struct vxlan_dev {
>  #define VXLAN_F_TTL_INHERIT		0x10000
>  #define VXLAN_F_VNIFILTER               0x20000
>  #define VXLAN_F_MDB			0x40000
> +#define VXLAN_F_LOCALBYPASS		0x80000
>  
>  /* Flags that are used in the receive path. These flags must match in
>   * order for a socket to be shareable
> @@ -348,7 +349,8 @@ struct vxlan_dev {
>  					 VXLAN_F_UDP_ZERO_CSUM6_TX |	\
>  					 VXLAN_F_UDP_ZERO_CSUM6_RX |	\
>  					 VXLAN_F_COLLECT_METADATA  |	\
> -					 VXLAN_F_VNIFILTER)
> +					 VXLAN_F_VNIFILTER         |    \
> +					 VXLAN_F_LOCALBYPASS)
>  
>  struct net_device *vxlan_dev_create(struct net *net, const char *name,
>  				    u8 name_assign_type, struct vxlan_config *conf);
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index 4ac1000b0ef2..0f6a0fe09bdb 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -828,6 +828,7 @@ enum {
>  	IFLA_VXLAN_TTL_INHERIT,
>  	IFLA_VXLAN_DF,
>  	IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
> +	IFLA_VXLAN_LOCALBYPASS,
>  	__IFLA_VXLAN_MAX
>  };
>  #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
> -- 
> 2.35.8
Ido Schimmel May 11, 2023, 2:53 p.m. UTC | #2
On Thu, May 11, 2023 at 11:38:33AM +0800, Vladimir Nikishkin wrote:
> Do I need to re-submit the patch to ifconfig separately, or the old one
> was good enough?

You will need to re-submit the iproute2 patches after the kernel patches
are accepted
Ido Schimmel May 11, 2023, 3:27 p.m. UTC | #3
On Thu, May 11, 2023 at 11:22:09AM +0800, Vladimir Nikishkin wrote:
> If a packet needs to be encapsulated towards a local destination IP,
> the packet will be injected into the Rx path as if it was received by
> the target VXLAN device without undergoing encapsulation. If such a
> device does not exist, the packet will be dropped.
> 
> There are scenarios where we do not want to drop such packets and
> instead want to let them be encapsulated and locally received by a user
> space program that post-processes these VXLAN packets.
> 
> To that end, add a new VXLAN device attribute that controls whether such
> packets are dropped or not. When set ("localbypass") packets are
> dropped and when unset ("nolocalbypass") the packets are encapsulated
> and locally delivered to the listening user space application. Default
> to "localbypass" to maintain existing behavior.
> 
> Signed-off-by: Vladimir Nikishkin <vladimir@nikishkin.pw>

The code looks fine to me, so:

Reviewed-by: Ido Schimmel <idosch@nvidia.com>

But the commit message needs to be aligned to the code changes made in
this version (which need to be noted the under the '---' [1]). I
suggest:

"
If a packet needs to be encapsulated towards a local destination IP, the
packet will undergo a "local bypass" and be injected into the Rx path as
if it was received by the target VXLAN device without undergoing
encapsulation. If such a device does not exist, the packet will be
dropped.

There are scenarios where we do not want to perform such a bypass, but
instead want the packet to be encapsulated and locally received by a
user space program for post-processing.

To that end, add a new VXLAN device attribute that controls whether a
"local bypass" is performed or not. Default to performing a bypass to
maintain existing behavior.
"

[1] https://www.kernel.org/doc/html/latest/process/submitting-patches.html#the-canonical-patch-format
diff mbox series

Patch

diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 561fe1b314f5..78744549c1b3 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -2352,7 +2352,8 @@  static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
 #endif
 	/* Bypass encapsulation if the destination is local */
 	if (rt_flags & RTCF_LOCAL &&
-	    !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
+	    !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
+	    vxlan->cfg.flags & VXLAN_F_LOCALBYPASS) {
 		struct vxlan_dev *dst_vxlan;
 
 		dst_release(dst);
@@ -3172,6 +3173,7 @@  static void vxlan_raw_setup(struct net_device *dev)
 }
 
 static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
+	[IFLA_VXLAN_UNSPEC]     = { .strict_start_type = IFLA_VXLAN_LOCALBYPASS },
 	[IFLA_VXLAN_ID]		= { .type = NLA_U32 },
 	[IFLA_VXLAN_GROUP]	= { .len = sizeof_field(struct iphdr, daddr) },
 	[IFLA_VXLAN_GROUP6]	= { .len = sizeof(struct in6_addr) },
@@ -3202,6 +3204,7 @@  static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_TTL_INHERIT]	= { .type = NLA_FLAG },
 	[IFLA_VXLAN_DF]		= { .type = NLA_U8 },
 	[IFLA_VXLAN_VNIFILTER]	= { .type = NLA_U8 },
+	[IFLA_VXLAN_LOCALBYPASS]	= NLA_POLICY_MAX(NLA_U8, 1),
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -4011,6 +4014,17 @@  static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 			conf->flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
 	}
 
+	if (data[IFLA_VXLAN_LOCALBYPASS]) {
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LOCALBYPASS,
+				    VXLAN_F_LOCALBYPASS, changelink,
+				    true, extack);
+		if (err)
+			return err;
+	} else if (!changelink) {
+		/* default to local bypass on a new device */
+		conf->flags |= VXLAN_F_LOCALBYPASS;
+	}
+
 	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]) {
 		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
 				    VXLAN_F_UDP_ZERO_CSUM6_TX, changelink,
@@ -4232,6 +4246,7 @@  static size_t vxlan_get_size(const struct net_device *dev)
 		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
 		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
 		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
+		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LOCALBYPASS */
 		0;
 }
 
@@ -4308,7 +4323,9 @@  static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
 		       !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
 	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
-		       !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
+		       !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)) ||
+	    nla_put_u8(skb, IFLA_VXLAN_LOCALBYPASS,
+		       !!(vxlan->cfg.flags & VXLAN_F_LOCALBYPASS)))
 		goto nla_put_failure;
 
 	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 20bd7d893e10..0be91ca78d3a 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -328,6 +328,7 @@  struct vxlan_dev {
 #define VXLAN_F_TTL_INHERIT		0x10000
 #define VXLAN_F_VNIFILTER               0x20000
 #define VXLAN_F_MDB			0x40000
+#define VXLAN_F_LOCALBYPASS		0x80000
 
 /* Flags that are used in the receive path. These flags must match in
  * order for a socket to be shareable
@@ -348,7 +349,8 @@  struct vxlan_dev {
 					 VXLAN_F_UDP_ZERO_CSUM6_TX |	\
 					 VXLAN_F_UDP_ZERO_CSUM6_RX |	\
 					 VXLAN_F_COLLECT_METADATA  |	\
-					 VXLAN_F_VNIFILTER)
+					 VXLAN_F_VNIFILTER         |    \
+					 VXLAN_F_LOCALBYPASS)
 
 struct net_device *vxlan_dev_create(struct net *net, const char *name,
 				    u8 name_assign_type, struct vxlan_config *conf);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 4ac1000b0ef2..0f6a0fe09bdb 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -828,6 +828,7 @@  enum {
 	IFLA_VXLAN_TTL_INHERIT,
 	IFLA_VXLAN_DF,
 	IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
+	IFLA_VXLAN_LOCALBYPASS,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)