[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200925231159.945-2-qingtao.cao.au@gmail.com>
Date: Sat, 26 Sep 2020 09:11:59 +1000
From: Qingtao Cao <qingtao.cao.au@...il.com>
To: netdev@...r.kernel.org, davem@...emloft.net, kuba@...nel.org,
kuznet@....inr.ac.ru, yoshfuji@...ux-ipv6.org, corbet@....net
Cc: Qingtao Cao <qingtao.cao@...i.com>,
David Leonard <david.leonard@...i.com>
Subject: [PATCH 1/1] Network: support default route metric per interface
From: Qingtao Cao <qingtao.cao@...i.com>
Add /proc/sys/net/ipv[4|6]/conf/<device>/def_rt_metric sysfs attribute
file for each network interface so that userspace programs can specify
different default route metrics for each interface, which will also be
applied by the kernel when new routes are automatically created for
relevant interfaces, when userspace programs may have not specified
metrics via relevant netlink messages for example.
Signed-off-by: Qingtao Cao <qingtao.cao@...i.com>
Signed-off-by: David Leonard <david.leonard@...i.com>
---
Documentation/networking/ip-sysctl.rst | 8 +++++
include/linux/inetdevice.h | 4 +++
include/linux/ipv6.h | 3 ++
include/net/ip6_route.h | 15 ++++++++
include/uapi/linux/ip.h | 1 +
include/uapi/linux/ipv6.h | 1 +
net/ipv4/Kconfig | 13 +++++++
net/ipv4/devinet.c | 3 ++
net/ipv4/fib_frontend.c | 27 ++++++++++++++
net/ipv6/addrconf.c | 30 ++++++++++++++--
net/ipv6/route.c | 50 ++++++++++++++++++++++++--
11 files changed, 150 insertions(+), 5 deletions(-)
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 837d51f9e1fa..b3252591fc31 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1591,6 +1591,10 @@ igmp_link_local_mcast_reports - BOOLEAN
Default TRUE
+def_rt_metric - INTEGER
+ Default metric used for routes when no metric is specified.
+ 0 to use system default
+
Alexey Kuznetsov.
kuznet@....inr.ac.ru
@@ -2264,6 +2268,10 @@ enhanced_dad - BOOLEAN
Default: TRUE
+def_rt_metric - INTEGER
+ Default metric used for routes when no metric is specified.
+ 0 to use system default
+
``icmp/*``:
===========
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 3515ca64e638..2904f158e048 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -119,6 +119,10 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
#define IN_DEV_NET_ROUTE_LOCALNET(in_dev, net) \
IN_DEV_NET_ORCONF(in_dev, net, ROUTE_LOCALNET)
+#ifdef CONFIG_IP_DEF_RT_METRIC
+#define IN_DEV_DEF_RT_METRIC(in_dev) IN_DEV_CONF_GET((in_dev), DEF_RT_METRIC)
+#endif
+
#define IN_DEV_RX_REDIRECTS(in_dev) \
((IN_DEV_FORWARD(in_dev) && \
IN_DEV_ANDCONF((in_dev), ACCEPT_REDIRECTS)) \
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index a44789d027cc..be399c74c8b2 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -75,6 +75,9 @@ struct ipv6_devconf {
__s32 disable_policy;
__s32 ndisc_tclass;
__s32 rpl_seg_enabled;
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ __s32 def_rt_metric;
+#endif
struct ctl_table_header *sysctl_header;
};
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 2a5277758379..ca470729d5b9 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -336,4 +336,19 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res,
struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
struct net_device *dev, struct sk_buff *skb,
const void *daddr);
+
+#ifdef CONFIG_IP_DEF_RT_METRIC
+static inline void rt6_get_dev_dflt_metric(struct net_device *dev, struct fib6_config *cfg)
+{
+ struct inet6_dev *idev = NULL;
+
+ idev = in6_dev_get(dev);
+ if (idev) {
+ if (idev->cnf.def_rt_metric)
+ cfg->fc_metric = idev->cnf.def_rt_metric;
+ in6_dev_put(idev);
+ }
+}
+#endif
+
#endif
diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
index e42d13b55cf3..de97706b900c 100644
--- a/include/uapi/linux/ip.h
+++ b/include/uapi/linux/ip.h
@@ -169,6 +169,7 @@ enum
IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
IPV4_DEVCONF_DROP_GRATUITOUS_ARP,
IPV4_DEVCONF_BC_FORWARDING,
+ IPV4_DEVCONF_DEF_RT_METRIC,
__IPV4_DEVCONF_MAX
};
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 13e8751bf24a..c4ba9ce53756 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -189,6 +189,7 @@ enum {
DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN,
DEVCONF_NDISC_TCLASS,
DEVCONF_RPL_SEG_ENABLED,
+ DEVCONF_DEF_RT_METRIC,
DEVCONF_MAX
};
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 87983e70f03f..529cd5a26e9a 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -457,6 +457,19 @@ config INET_DIAG_DESTROY
had been disconnected.
If unsure, say N.
+config IP_DEF_RT_METRIC
+ bool "IP: default route metrics support"
+ help
+ Allow userspace to specify the default metric for routes per network
+ interfaces when no metric is explicitly provided. When userspace
+ programs change routes' metrics, if they save the new metric value
+ into relevant network interface's def_rt_metric sysfs attribute file,
+ the kernel will also apply it whenever new routes are created for
+ that interface, unless the metric is explicitly specified. Leave 0
+ to use the system default values.
+
+ If unsure, say N.
+
menuconfig TCP_CONG_ADVANCED
bool "TCP: advanced congestion control"
help
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 123a6d39438f..775a358e5466 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2538,6 +2538,9 @@ static struct devinet_sysctl_table {
"ignore_routes_with_linkdown"),
DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
"drop_gratuitous_arp"),
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ DEVINET_SYSCTL_RW_ENTRY(DEF_RT_METRIC, "def_rt_metric"),
+#endif
DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 86a23e4a6a50..459fdc507d50 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -724,6 +724,10 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
struct nlattr *attr;
int err, remaining;
struct rtmsg *rtm;
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ struct net_device *dev = NULL;
+ struct in_device *in_dev = NULL;
+#endif
err = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX,
rtm_ipv4_policy, extack);
@@ -828,6 +832,21 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
goto errout;
}
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ /* Apply the default route metric of the out interface if needed */
+ if (cfg->fc_priority == 0 && cfg->fc_oif) {
+ dev = dev_get_by_index(net, cfg->fc_oif);
+ if (dev) {
+ in_dev = in_dev_get(dev);
+ if (in_dev) {
+ cfg->fc_priority = IN_DEV_DEF_RT_METRIC(in_dev);
+ in_dev_put(in_dev);
+ }
+ dev_put(dev);
+ }
+ }
+#endif
+
return 0;
errout:
return err;
@@ -1081,6 +1100,14 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len,
else
cfg.fc_scope = RT_SCOPE_HOST;
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ /* If the netlink message doesn't have the IFA_RT_PRIORITY attribute,
+ * fall back on the interface's default route metric
+ */
+ if (cfg.fc_priority == 0)
+ cfg.fc_priority = IN_DEV_DEF_RT_METRIC(ifa->ifa_dev);
+#endif
+
if (cmd == RTM_NEWROUTE)
fib_table_insert(net, tb, &cfg, NULL);
else
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 01146b66d666..6480da7ae885 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2388,7 +2388,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, u32 metric,
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
- .fc_metric = metric ? : IP6_RT_PRIO_ADDRCONF,
+ .fc_metric = metric,
.fc_ifindex = dev->ifindex,
.fc_expires = expires,
.fc_dst_len = plen,
@@ -2400,6 +2400,14 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, u32 metric,
cfg.fc_dst = *pfx;
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ if (cfg.fc_metric == 0)
+ rt6_get_dev_dflt_metric(dev, &cfg);
+#endif
+
+ if (cfg.fc_metric == 0)
+ cfg.fc_metric = IP6_RT_PRIO_ADDRCONF;
+
/* Prevent useless cloning on PtP SIT.
This thing is done here expecting that the whole
class of non-broadcast devices need not cloning.
@@ -2462,7 +2470,6 @@ static void addrconf_add_mroute(struct net_device *dev)
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_LOCAL,
- .fc_metric = IP6_RT_PRIO_ADDRCONF,
.fc_ifindex = dev->ifindex,
.fc_dst_len = 8,
.fc_flags = RTF_UP,
@@ -2472,6 +2479,13 @@ static void addrconf_add_mroute(struct net_device *dev)
ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ rt6_get_dev_dflt_metric(dev, &cfg);
+#endif
+
+ if (cfg.fc_metric == 0)
+ cfg.fc_metric = IP6_RT_PRIO_ADDRCONF;
+
ip6_route_add(&cfg, GFP_KERNEL, NULL);
}
@@ -5512,6 +5526,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled;
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ array[DEVCONF_DEF_RT_METRIC] = cnf->def_rt_metric;
+#endif
}
static inline size_t inet6_ifla6_size(void)
@@ -6892,6 +6909,15 @@ static const struct ctl_table addrconf_sysctl[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ {
+ .procname = "def_rt_metric",
+ .data = &ipv6_devconf.def_rt_metric,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
{
/* sentinel */
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fb075d9545b9..f3d74e0f6434 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4190,7 +4190,6 @@ static struct fib6_info *rt6_add_route_info(struct net *net,
unsigned int pref)
{
struct fib6_config cfg = {
- .fc_metric = IP6_RT_PRIO_USER,
.fc_ifindex = dev->ifindex,
.fc_dst_len = prefixlen,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
@@ -4202,6 +4201,13 @@ static struct fib6_info *rt6_add_route_info(struct net *net,
.fc_nlinfo.nl_net = net,
};
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ rt6_get_dev_dflt_metric(dev, &cfg);
+#endif
+
+ if (cfg.fc_metric == 0)
+ cfg.fc_metric = IP6_RT_PRIO_USER;
+
cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
cfg.fc_dst = *prefix;
cfg.fc_gateway = *gwaddr;
@@ -4255,7 +4261,6 @@ struct fib6_info *rt6_add_dflt_router(struct net *net,
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
- .fc_metric = IP6_RT_PRIO_USER,
.fc_ifindex = dev->ifindex,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
@@ -4266,6 +4271,13 @@ struct fib6_info *rt6_add_dflt_router(struct net *net,
.fc_nlinfo.nl_net = net,
};
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ rt6_get_dev_dflt_metric(dev, &cfg);
+#endif
+
+ if (cfg.fc_metric == 0)
+ cfg.fc_metric = IP6_RT_PRIO_USER;
+
cfg.fc_gateway = *gwaddr;
if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
@@ -4326,11 +4338,15 @@ static void rtmsg_to_fib6_config(struct net *net,
struct in6_rtmsg *rtmsg,
struct fib6_config *cfg)
{
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ struct net_device *dev = NULL;
+#endif
+
*cfg = (struct fib6_config){
.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
: RT6_TABLE_MAIN,
.fc_ifindex = rtmsg->rtmsg_ifindex,
- .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
+ .fc_metric = rtmsg->rtmsg_metric,
.fc_expires = rtmsg->rtmsg_info,
.fc_dst_len = rtmsg->rtmsg_dst_len,
.fc_src_len = rtmsg->rtmsg_src_len,
@@ -4343,6 +4359,19 @@ static void rtmsg_to_fib6_config(struct net *net,
.fc_src = rtmsg->rtmsg_src,
.fc_gateway = rtmsg->rtmsg_gateway,
};
+
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ if (cfg->fc_metric == 0 && cfg->fc_ifindex) {
+ dev = dev_get_by_index_rcu(net, cfg->fc_ifindex);
+ if (dev) {
+ rt6_get_dev_dflt_metric(dev, cfg);
+ dev_put(dev);
+ }
+ }
+#endif
+
+ if (cfg->fc_metric == 0)
+ cfg->fc_metric = IP6_RT_PRIO_USER;
}
int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
@@ -4886,6 +4915,10 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr *tb[RTA_MAX+1];
unsigned int pref;
int err;
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ struct net *net = NULL;
+ struct net_device *dev = NULL;
+#endif
err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
rtm_ipv6_policy, extack);
@@ -5014,6 +5047,17 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
+#ifdef CONFIG_IP_DEF_RT_METRIC
+ if (cfg->fc_metric == 0 && cfg->fc_ifindex) {
+ net = dev_net(skb->dev);
+ dev = dev_get_by_index_rcu(net, cfg->fc_ifindex);
+ if (dev) {
+ rt6_get_dev_dflt_metric(dev, cfg);
+ dev_put(dev);
+ }
+ }
+#endif
+
err = 0;
errout:
return err;
--
2.17.1
Powered by blists - more mailing lists