Message ID | 56e17ee7750b89ff350735ff7cbaf14c63864586.1533030830.git.petrm@mellanox.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | ipv4: Control SKB reprioritization after forwarding | expand |
On 07/31/2018 12:56 PM, Petr Machata wrote: > After IPv4 packets are forwarded, the priority of the corresponding SKB > is updated according to the TOS field of IPv4 header. This overrides any > prioritization done earlier by e.g. an skbedit action or ingress-qos-map > defined at a vlan device. ... > diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h > index 661348f23ea5..e47503b4e4d1 100644 > --- a/include/net/netns/ipv4.h > +++ b/include/net/netns/ipv4.h > @@ -98,6 +98,7 @@ struct netns_ipv4 { > int sysctl_ip_default_ttl; > int sysctl_ip_no_pmtu_disc; > int sysctl_ip_fwd_use_pmtu; > + int sysctl_ip_fwd_update_priority; > int sysctl_ip_nonlocal_bind; > /* Shall we try to damage output packets if routing dev changes? */ > int sysctl_ip_dynaddr; > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c > index f2a0a3bab6b5..d3cfbd89ca3a 100644 > --- a/net/ipv4/af_inet.c > +++ b/net/ipv4/af_inet.c > @@ -1802,6 +1802,7 @@ static __net_init int inet_init_net(struct net *net) > * We set them here, in case sysctl is not compiled. > */ > net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; > + net->ipv4.sysctl_ip_fwd_update_priority = true; nit: since this is an int and to keep the code style of inet_init_net() I'd suggest to use 1 instead > net->ipv4.sysctl_ip_dynaddr = 0; > net->ipv4.sysctl_ip_early_demux = 1; > net->ipv4.sysctl_udp_early_demux = 1; ... -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Nikolay Aleksandrov <nikolay@cumulusnetworks.com> writes: >> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c >> index f2a0a3bab6b5..d3cfbd89ca3a 100644 >> --- a/net/ipv4/af_inet.c >> +++ b/net/ipv4/af_inet.c >> @@ -1802,6 +1802,7 @@ static __net_init int inet_init_net(struct net *net) >> * We set them here, in case sysctl is not compiled. >> */ >> net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; >> + net->ipv4.sysctl_ip_fwd_update_priority = true; > > nit: since this is an int and to keep the code style of inet_init_net() > I'd suggest to use 1 instead OK. Thanks, Petr -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 77c37fb0b6a6..e74515ecaa9c 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -81,6 +81,15 @@ fib_multipath_hash_policy - INTEGER 0 - Layer 3 1 - Layer 4 +ip_forward_update_priority - INTEGER + Whether to update SKB priority from "TOS" field in IPv4 header after it + is forwarded. The new SKB priority is mapped from TOS field value + according to an rt_tos2priority table (see e.g. man tc-prio). + Default: 1 (Update priority.) + Possible values: + 0 - Do not update priority. + 1 - Update priority. + route/max_size - INTEGER Maximum number of routes allowed in the kernel. Increase this when using large numbers of interfaces and/or routes. diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 661348f23ea5..e47503b4e4d1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -98,6 +98,7 @@ struct netns_ipv4 { int sysctl_ip_default_ttl; int sysctl_ip_no_pmtu_disc; int sysctl_ip_fwd_use_pmtu; + int sysctl_ip_fwd_update_priority; int sysctl_ip_nonlocal_bind; /* Shall we try to damage output packets if routing dev changes? */ int sysctl_ip_dynaddr; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f2a0a3bab6b5..d3cfbd89ca3a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1802,6 +1802,7 @@ static __net_init int inet_init_net(struct net *net) * We set them here, in case sysctl is not compiled. */ net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; + net->ipv4.sysctl_ip_fwd_update_priority = true; net->ipv4.sysctl_ip_dynaddr = 0; net->ipv4.sysctl_ip_early_demux = 1; net->ipv4.sysctl_udp_early_demux = 1; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index b54b948b0596..32662e9e5d21 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -143,7 +143,8 @@ int ip_forward(struct sk_buff *skb) !skb_sec_path(skb)) ip_rt_send_redirect(skb); - skb->priority = rt_tos2priority(iph->tos); + if (net->ipv4.sysctl_ip_fwd_update_priority) + skb->priority = rt_tos2priority(iph->tos); return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, net, NULL, skb, skb->dev, rt->dst.dev, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5fa335fd3852..e21dda015513 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -664,6 +664,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "ip_forward_update_priority", + .data = &init_net.ipv4.sysctl_ip_fwd_update_priority, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { .procname = "ip_nonlocal_bind", .data = &init_net.ipv4.sysctl_ip_nonlocal_bind, .maxlen = sizeof(int),