[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180901004954.7145-15-dsahern@kernel.org>
Date: Fri, 31 Aug 2018 17:49:49 -0700
From: dsahern@...nel.org
To: netdev@...r.kernel.org
Cc: roopa@...ulusnetworks.com, sharpd@...ulusnetworks.com,
idosch@...lanox.com, davem@...emloft.net,
David Ahern <dsahern@...il.com>
Subject: [PATCH RFC net-next 14/18] net/ipv4: Allow routes to use nexthop objects
From: David Ahern <dsahern@...il.com>
Add new RTA attribute to allow a user to specify a nexthop id to use
with a route instead of the current nexthop specification.
Signed-off-by: David Ahern <dsahern@...il.com>
---
include/net/ip_fib.h | 1 +
include/uapi/linux/rtnetlink.h | 1 +
net/ipv4/fib_frontend.c | 7 +++
net/ipv4/fib_semantics.c | 139 ++++++++++++++++++++++++++++++-----------
net/ipv4/fib_trie.c | 33 +++++++---
5 files changed, 136 insertions(+), 45 deletions(-)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index c59e0f1ba59b..d2f961de732d 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -40,6 +40,7 @@ struct fib_config {
u32 fc_flags;
u32 fc_priority;
__be32 fc_prefsrc;
+ u32 fc_nh_id;
struct nlattr *fc_mx;
struct rtnexthop *fc_mp;
int fc_mx_len;
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 4a0615797e5e..a036368798a9 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -349,6 +349,7 @@ enum rtattr_type_t {
RTA_IP_PROTO,
RTA_SPORT,
RTA_DPORT,
+ RTA_NH_ID,
__RTA_MAX
};
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c483453bf037..cf133d4e02f2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -322,6 +322,9 @@ static bool fib_info_nh_uses_dev(struct fib_info *fi,
bool dev_match = false;
int ret;
+ if (fi->nh)
+ return nexthop_uses_dev(fi->nh, dev);
+
#ifdef CONFIG_IP_ROUTE_MULTIPATH
for (ret = 0; ret < fi->fib_nhs; ret++) {
struct fib_nh *nh = &fi->fib_nh[ret];
@@ -663,6 +666,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_IP_PROTO] = { .type = NLA_U8 },
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
+ [RTA_NH_ID] = { .type = NLA_U32 },
};
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
@@ -746,6 +750,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
if (err < 0)
goto errout;
break;
+ case RTA_NH_ID:
+ cfg->fc_nh_id = nla_get_u32(attr);
+ break;
}
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0cd536ad1761..c91cdafd40ec 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -226,9 +226,13 @@ static void free_fib_info_rcu(struct rcu_head *head)
struct fib_info *fi = container_of(head, struct fib_info, rcu);
struct dst_metrics *m;
- change_nexthops(fi) {
- fib_nh_release(fi->fib_net, nexthop_nh);
- } endfor_nexthops(fi);
+ if (fi->nh) {
+ nexthop_put(fi->nh);
+ } else {
+ change_nexthops(fi) {
+ fib_nh_release(fi->fib_net, nexthop_nh);
+ } endfor_nexthops(fi);
+ }
m = fi->fib_metrics;
if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
@@ -260,11 +264,15 @@ void fib_release_info(struct fib_info *fi)
hlist_del(&fi->fib_hash);
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
- change_nexthops(fi) {
- if (!nexthop_nh->nh_dev)
- continue;
- hlist_del(&nexthop_nh->nh_hash);
- } endfor_nexthops(fi)
+ if (fi->nh) {
+ list_del(&fi->nh_list);
+ } else {
+ change_nexthops(fi) {
+ if (!nexthop_nh->nh_dev)
+ continue;
+ hlist_del(&nexthop_nh->nh_hash);
+ } endfor_nexthops(fi)
+ }
fi->fib_dead = 1;
fib_info_put(fi);
}
@@ -275,6 +283,12 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
{
const struct fib_nh *onh = ofi->fib_nh;
+ if (fi->nh || ofi->nh)
+ return nexthop_cmp(fi->nh, ofi->nh) ? 0 : -1;
+
+ if (ofi->fib_nhs == 0)
+ return 0;
+
for_nexthops(fi) {
if (nh->nh_oif != onh->nh_oif ||
nh->nh_gw != onh->nh_gw ||
@@ -310,10 +324,13 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
val ^= (fi->fib_protocol << 8) | fi->fib_scope;
val ^= (__force u32)fi->fib_prefsrc;
val ^= fi->fib_priority;
- for_nexthops(fi) {
- val ^= fib_devindex_hashfn(nh->nh_oif);
- } endfor_nexthops(fi)
-
+ if (fi->nh) {
+ val ^= fib_devindex_hashfn(fi->nh->id);
+ } else {
+ for_nexthops(fi) {
+ val ^= fib_devindex_hashfn(nh->nh_oif);
+ } endfor_nexthops(fi)
+ }
return (val ^ (val >> 7) ^ (val >> 12)) & mask;
}
@@ -339,7 +356,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
memcmp(nfi->fib_metrics, fi->fib_metrics,
sizeof(u32) * RTAX_MAX) == 0 &&
!((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
- (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
+ (nh_comp(fi, nfi) == 0))
return fi;
}
@@ -349,6 +366,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
/* Check, that the gateway is already configured.
* Used only by redirect accept routine.
*/
+//TO-DO: need a nexthop version
int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
struct hlist_head *head;
@@ -381,16 +399,19 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
+ nla_total_size(4) /* RTA_PRIORITY */
+ nla_total_size(4) /* RTA_PREFSRC */
+ nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
+ size_t nhsize = 0;
/* space for nested metrics */
payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
- if (fi->fib_nhs) {
+ if (fi->nh) {
+ nhsize = nla_total_size(4); /* RTA_NH_ID */
+ } else if (fi->fib_nhs) {
size_t nh_encapsize = 0;
/* Also handles the special case fib_nhs == 1 */
/* each nexthop is packed in an attribute */
- size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
+ nhsize = nla_total_size(sizeof(struct rtnexthop));
/* may contain flow and gateway attribute */
nhsize += 2 * nla_total_size(4);
@@ -539,6 +560,7 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
return nhs;
}
+/* only called when fib_nh is integrated into fib_info */
static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
int remaining, struct fib_config *cfg,
struct netlink_ext_ack *extack)
@@ -625,6 +647,8 @@ static void fib_rebalance(struct fib_info *fi)
int w;
struct in_device *in_dev;
+ WARN_ON(fi->nh);
+
if (fi->fib_nhs < 2)
return;
@@ -712,6 +736,9 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
return 1;
+ if (fi->nh)
+ return cfg->fc_nh_id == fi->nh->id ? 0 : 1;
+
if (cfg->fc_oif || cfg->fc_gw) {
if (cfg->fc_encap) {
if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
@@ -1099,9 +1126,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
{
int err;
struct fib_info *fi = NULL;
+ struct nexthop *nh = NULL;
struct fib_info *ofi;
int nhs = 1;
struct net *net = cfg->fc_nlinfo.nl_net;
+ unsigned char scope;
if (cfg->fc_type > RTN_MAX)
goto err_inval;
@@ -1118,6 +1147,21 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
+ if (cfg->fc_nh_id) {
+ if (cfg->fc_oif || cfg->fc_gw || cfg->fc_encap || cfg->fc_mp) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop specification and nexthop id are mutually exclusive");
+ goto err_inval;
+ }
+
+ nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+ if (!nh) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid nexthop id - nexthop does not exist");
+ goto err_inval;
+ }
+ }
+
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (cfg->fc_mp) {
nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
@@ -1180,7 +1224,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
if (err)
goto failure;
- if (cfg->fc_mp) {
+ if (nh) {
+ nexthop_get(nh);
+ fi->nh = nh;
+ } else if (cfg->fc_mp) {
err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
if (err != 0)
goto failure;
@@ -1214,7 +1261,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
- if (cfg->fc_scope == RT_SCOPE_HOST) {
+ if (fi->nh) {
+ err = fib_check_nexthop(fi, cfg, extack);
+ if (err)
+ goto failure;
+ } else if (cfg->fc_scope == RT_SCOPE_HOST) {
struct fib_nh *nh = fi->fib_nh;
/* Local address is added. */
@@ -1254,12 +1305,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
- change_nexthops(fi) {
- fib_info_update_nh_saddr(net, nexthop_nh, fi->fib_scope);
- } endfor_nexthops(fi)
-
- fib_rebalance(fi);
+ if (!fi->nh) {
+ scope = fi->fib_scope;
+ change_nexthops(fi) {
+ fib_info_update_nh_saddr(net, nexthop_nh, scope);
+ } endfor_nexthops(fi)
+ fib_rebalance(fi);
+ }
link_it:
ofi = fib_find_info(fi);
if (ofi) {
@@ -1280,16 +1333,20 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
hlist_add_head(&fi->fib_lhash, head);
}
- change_nexthops(fi) {
- struct hlist_head *head;
- unsigned int hash;
+ if (fi->nh) {
+ list_add(&fi->nh_list, &nh->fi_list);
+ } else {
+ change_nexthops(fi) {
+ struct hlist_head *head;
+ unsigned int hash;
- if (!nexthop_nh->nh_dev)
- continue;
- hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
- head = &fib_info_devhash[hash];
- hlist_add_head(&nexthop_nh->nh_hash, head);
- } endfor_nexthops(fi)
+ if (!nexthop_nh->nh_dev)
+ continue;
+ hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
+ head = &fib_info_devhash[hash];
+ hlist_add_head(&nexthop_nh->nh_hash, head);
+ } endfor_nexthops(fi)
+ }
spin_unlock_bh(&fib_info_lock);
return fi;
@@ -1298,6 +1355,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
failure:
if (fi) {
+ if (fi->nh)
+ nexthop_put(fi->nh);
+
fi->fib_dead = 1;
free_fib_info(fi);
}
@@ -1344,7 +1404,11 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
if (fi->fib_prefsrc &&
nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
goto nla_put_failure;
- if (fi->fib_nhs == 1) {
+
+ if (fi->nh) {
+ if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id))
+ goto nla_put_failure;
+ } else if (fi->fib_nhs == 1) {
if (fi->fib_nh->nh_gw &&
nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
goto nla_put_failure;
@@ -1587,8 +1651,11 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
continue;
break;
}
- if (next_fi->fib_flags & RTNH_F_DEAD)
+
+ fnh = fib_info_nh(next_fi, 0);
+ if (fnh->nh_flags & RTNH_F_DEAD)
continue;
+
last_tos = fa->fa_tos;
last_prio = next_fi->fib_priority;
@@ -1596,7 +1663,6 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
fa->fa_type != RTN_UNICAST)
continue;
- fnh = fib_info_nh(next_fi, 0);
if (!fnh->nh_gw || fnh->nh_scope != RT_SCOPE_LINK)
continue;
@@ -1749,13 +1815,14 @@ void fib_select_multipath(struct fib_result *res, int hash)
void fib_select_path(struct net *net, struct fib_result *res,
struct flowi4 *fl4, const struct sk_buff *skb)
{
+ int h;
+
if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
goto check_saddr;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi->fib_nhs > 1) {
- int h = fib_multipath_hash(net, fl4, skb, NULL);
-
+ h = fib_multipath_hash(net, fl4, skb, NULL);
fib_select_multipath(res, h);
}
else
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c6aab049a4ac..575bb34d895f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1445,7 +1445,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
/* Step 3: Process the leaf, if that fails fall back to backtracing */
hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
struct fib_info *fi = fa->fa_info;
- int nhsel, err;
+ int nhsel, err, nhmax;
if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
if (index >= (1ul << fa->fa_slen))
@@ -1460,6 +1460,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
fib_alias_accessed(fa);
err = fib_props[fa->fa_type].error;
if (unlikely(err < 0)) {
+out_reject:
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->semantic_match_passed);
#endif
@@ -1468,17 +1469,31 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
}
if (fi->fib_flags & RTNH_F_DEAD)
continue;
- for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
- struct fib_nh *nh = &fi->fib_nh[nhsel];
- struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev);
+
+ if (fi->nh) {
+ if (nexthop_is_blackhole(fi->nh)) {
+ err = fib_props[RTN_BLACKHOLE].error;
+ goto out_reject;
+ }
+ nhmax = nexthop_num_path(fi->nh);
+ } else {
+ nhmax = fi->fib_nhs;
+ }
+ for (nhsel = 0; nhsel < nhmax; nhsel++) {
+ struct fib_nh *nh = fib_info_nh(fi, nhsel);
+ struct in_device *in_dev;
if (nh->nh_flags & RTNH_F_DEAD)
continue;
- if (in_dev &&
- IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
- nh->nh_flags & RTNH_F_LINKDOWN &&
- !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
- continue;
+
+ if (!fi->nh) {
+ in_dev = __in_dev_get_rcu(nh->nh_dev);
+ if (in_dev &&
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+ nh->nh_flags & RTNH_F_LINKDOWN &&
+ !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
+ continue;
+ }
if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
if (flp->flowi4_oif &&
flp->flowi4_oif != nh->nh_oif)
--
2.11.0
Powered by blists - more mailing lists