@@ -144,6 +144,7 @@ struct ovs_vport_stats {
/* Packet transfer. */
#define OVS_PACKET_FAMILY "ovs_packet"
+#define OVS_PACKET_MCGROUP "ovs_packet"
#define OVS_PACKET_VERSION 0x1
enum ovs_packet_cmd {
@@ -678,7 +679,8 @@ struct sample_arg {
/**
* enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
* @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
- * message should be sent. Required.
+ * message should be sent. If the PID is 0, the message will be sent to the
+ * "ovs_packet" netlink multicast group. Required.
* @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is
* copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA.
* @OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: If present, u32 output port to get
@@ -692,6 +694,8 @@ enum ovs_userspace_attr {
OVS_USERSPACE_ATTR_EGRESS_TUN_PORT, /* Optional, u32 output port
* to get tunnel info. */
OVS_USERSPACE_ATTR_ACTIONS, /* Optional flag to get actions. */
+ OVS_USERSPACE_ATTR_MCAST, /* Optional flag to send the packet to
+ the "ovs_packet" multicast group. */
__OVS_USERSPACE_ATTR_MAX
};
@@ -1004,6 +1004,11 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
break;
}
+ case OVS_USERSPACE_ATTR_MCAST: {
+ upcall.portid = MCAST_PID;
+ break;
+ }
+
} /* End of switch. */
}
@@ -70,6 +70,10 @@ static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
.name = OVS_VPORT_MCGROUP,
};
+static const struct genl_multicast_group ovs_dp_packet_multicast_group = {
+ .name = OVS_PACKET_MCGROUP,
+};
+
/* Check if need to build a reply message.
* OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
@@ -577,7 +581,13 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
- err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
+ if (upcall_info->portid == MCAST_PID)
+ err = genlmsg_multicast_netns(&dp_packet_genl_family,
+ ovs_dp_get_net(dp), user_skb, 0, 0, GFP_KERNEL);
+ else
+ err = genlmsg_unicast(ovs_dp_get_net(dp),
+ user_skb, upcall_info->portid);
+
user_skb = NULL;
out:
if (err)
@@ -717,6 +727,8 @@ static struct genl_family dp_packet_genl_family __ro_after_init = {
.small_ops = dp_packet_genl_ops,
.n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
.resv_start_op = OVS_PACKET_CMD_EXECUTE + 1,
+ .mcgrps = &ovs_dp_packet_multicast_group,
+ .n_mcgrps = 1,
.module = THIS_MODULE,
};
@@ -124,6 +124,7 @@ struct ovs_skb_cb {
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
+#define MCAST_PID 0xFFFFFFFF
/**
* struct dp_upcall - metadata to include with a packet to send to userspace
* @cmd: One of %OVS_PACKET_CMD_*.
@@ -3043,6 +3043,8 @@ static int validate_userspace(const struct nlattr *attr)
[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
[OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
+ [OVS_USERSPACE_ATTR_ACTIONS] = {.type = NLA_FLAG },
+ [OVS_USERSPACE_ATTR_MCAST] = {.type = NLA_FLAG },
};
struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
int error;
@@ -3052,8 +3054,8 @@ static int validate_userspace(const struct nlattr *attr)
if (error)
return error;
- if (!a[OVS_USERSPACE_ATTR_PID] ||
- !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
+ if (!a[OVS_USERSPACE_ATTR_MCAST] && (!a[OVS_USERSPACE_ATTR_PID] ||
+ !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])))
return -EINVAL;
return 0;
actions. Some userspace actions, such as the ones derived from OFP_CONTROLLER action or slow path, have to be handled by ovs-vswitchd, so they are unicasted through the netlink socket that corresponds. However, some other userspace actions require little processing by ovs-vswitchd and their end consumer is typically some external entity. This is the case for IPFIX sampling which can provide very useful observability on the OVS datapath. Having these samples share the netlink socket and the userspace cpu time with flow misses can easily lead to higher latency and packet drops. This is clearly a price too high to pay for observability. In order to allow observability applications safely consume data that include OVN metadata, this patch makes the existing "ovs_packet" netlink family also contain a multicast group and adds a new attribute to the userspace action so that ovs-vswitchd can indicate that an action must be multicasted. Signed-off-by: Adrian Moreno <amorenoz@redhat.com> --- include/uapi/linux/openvswitch.h | 6 +++++- net/openvswitch/actions.c | 5 +++++ net/openvswitch/datapath.c | 14 +++++++++++++- net/openvswitch/datapath.h | 1 + net/openvswitch/flow_netlink.c | 6 ++++-- 5 files changed, 28 insertions(+), 4 deletions(-)