netdev - Re: [PATCH net-next 3/3] mpls: add multipath route support

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <55CC0F44.9030402@cumulusnetworks.com>
Date:	Wed, 12 Aug 2015 20:30:12 -0700
From:	roopa <roopa@...ulusnetworks.com>
To:	Robert Shearman <rshearma@...cade.com>
CC:	davem@...emloft.net, ebiederm@...ssion.com, netdev@...r.kernel.org
Subject: Re: [PATCH net-next 3/3] mpls: add multipath route support

On 8/12/15, 12:18 PM, Robert Shearman wrote:
> On 11/08/15 22:45, Roopa Prabhu wrote:
>> From: Roopa Prabhu <roopa@...ulusnetworks.com>
>>
>> Adds support for MPLS multipath routes.
>> supports parse/fill of RTA_MULTIPATH netlink attribute
>> for multipath routes similar to ipv4 fib. Mostly based on
>> multipath handling in ipv4 fib code.
>>
>> The multipath route nexthop selection algorithm is the same
>> code as in ipv4 fib.
>>
>> This patch also adds new functions to parse multipath attributes
>> from route config into mpls_nhlfe.
>>
>> note that it also simplifies mpls_route_update. Removes handling
>> route updates based on dev argument. The reason for
>> doing that is, the function was not being used for route updates
>> based on dev and if we do need to support route updates based
>> on dev in the future it will have to be done differently.
>>
>> Signed-off-by: Roopa Prabhu <roopa@...ulusnetworks.com>
>> ---
>>   net/mpls/af_mpls.c  |  378 
>> +++++++++++++++++++++++++++++++++++++++++----------
>>   net/mpls/internal.h |   19 +++
>>   2 files changed, 323 insertions(+), 74 deletions(-)
>>
>> diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
>> index eb089ef..de5ae29 100644
>> --- a/net/mpls/af_mpls.c
>> +++ b/net/mpls/af_mpls.c
>> @@ -19,10 +19,12 @@
>>   #include <net/ipv6.h>
>>   #include <net/addrconf.h>
>>   #endif
>> +#include <net/nexthop.h>
>>   #include "internal.h"
>>
>>   static int zero = 0;
>>   static int label_limit = (1 << 20) - 1;
>> +static DEFINE_SPINLOCK(mpls_multipath_lock);
>>
>>   static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
>>                  struct nlmsghdr *nlh, struct net *net, u32 portid,
>> @@ -51,10 +53,10 @@ bool mpls_output_possible(const struct net_device 
>> *dev)
>>   }
>>   EXPORT_SYMBOL_GPL(mpls_output_possible);
>>
>> -static unsigned int mpls_rt_header_size(const struct mpls_route *rt)
>> +static unsigned int mpls_nhlfe_header_size(const struct mpls_nhlfe 
>> *nhlfe)
>>   {
>>       /* The size of the layer 2.5 labels to be added for this route */
>> -    return rt->rt_nh->nh_labels * sizeof(struct mpls_shim_hdr);
>> +    return nhlfe->nh_labels * sizeof(struct mpls_shim_hdr);
>>   }
>>
>>   unsigned int mpls_dev_mtu(const struct net_device *dev)
>> @@ -76,7 +78,52 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, 
>> unsigned int mtu)
>>   }
>>   EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
>>
>> -static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
>> +/* This is a cut/copy/modify from fib_select_multipath */
>> +static void mpls_select_multipath(struct mpls_route *rt, int *nhidx)
>> +{
>> +    int w;
>> +
>> +    spin_lock_bh(&mpls_multipath_lock);
>> +    if (rt->rt_power <= 0) {
>> +        int power = 0;
>> +
>> +        change_nexthops(rt) {
>> +            power += nhlfe->nh_weight;
>> +            nhlfe->nh_power = nhlfe->nh_weight;
>> +        } endfor_nexthops(rt);
>> +        rt->rt_power = power;
>> +        if (power <= 0) {
>> +            spin_unlock_bh(&mpls_multipath_lock);
>> +            /* Race condition: route has just become dead. */
>> +            *nhidx = 0;
>> +            return;
>> +        }
>> +    }
>> +
>> +    /* w should be random number [0..rt->rt_power-1],
>> +     * it is pretty bad approximation.
>> +     */
>> +    w = jiffies % rt->rt_power;
>> +
>> +    change_nexthops(rt) {
>> +        if (nhlfe->nh_power) {
>> +            w -= nhlfe->nh_power;
>> +            if (w <= 0) {
>> +                nhlfe->nh_power--;
>> +                rt->rt_power--;
>> +                *nhidx = nhsel;
>> +                spin_unlock_bh(&mpls_multipath_lock);
>> +                return;
>> +            }
>> +        }
>> +    } endfor_nexthops(rt);
>> +
>> +    /* Race condition: route has just become dead. */
>> +    *nhidx = 0;
>> +    spin_unlock_bh(&mpls_multipath_lock);
>> +}
>
> If we were to do flow-based hashing then this would also avoid the 
> locking required here.
ok,
>
>> +
>> +static bool mpls_egress(struct mpls_nhlfe *nhlfe, struct sk_buff *skb,
>>               struct mpls_entry_decoded dec)
>>   {
>>       enum mpls_payload_type payload_type;
>> @@ -95,7 +142,7 @@ static bool mpls_egress(struct mpls_route *rt, 
>> struct sk_buff *skb,
>>       if (!pskb_may_pull(skb, 12))
>>           return false;
>>
>> -    payload_type = rt->rt_nh->nh_payload_type;
>> +    payload_type = nhlfe->nh_payload_type;
>>       if (payload_type == MPT_UNSPEC)
>>           payload_type = ip_hdr(skb)->version;
>>
>> @@ -130,6 +177,7 @@ static int mpls_forward(struct sk_buff *skb, 
>> struct net_device *dev,
>>       struct net *net = dev_net(dev);
>>       struct mpls_shim_hdr *hdr;
>>       struct mpls_route *rt;
>> +    struct mpls_nhlfe *nhlfe;
>>       struct mpls_entry_decoded dec;
>>       struct net_device *out_dev;
>>       struct mpls_dev *mdev;
>> @@ -137,6 +185,7 @@ static int mpls_forward(struct sk_buff *skb, 
>> struct net_device *dev,
>>       unsigned int new_header_size;
>>       unsigned int mtu;
>>       int err;
>> +    int nhidx;
>>
>>       /* Careful this entire function runs inside of an rcu critical 
>> section */
>>
>> @@ -167,9 +216,12 @@ static int mpls_forward(struct sk_buff *skb, 
>> struct net_device *dev,
>>       if (!rt)
>>           goto drop;
>>
>> +    mpls_select_multipath(rt, &nhidx);
>> +    nhlfe = &rt->rt_nh[nhidx];
>
> How about just returning the nexthop from mpls_select_multipath rather 
> than going via nhidx?
sure,
>
>> +
>>       /* Find the output device */
>> -    out_dev = rcu_dereference(rt->rt_nh->nh_dev);
>> -    if (!mpls_output_possible(out_dev))
>> +    out_dev = rcu_dereference(nhlfe->nh_dev);
>> +    if (!out_dev || !mpls_output_possible(out_dev))
>>           goto drop;
>>
>>       if (skb_warn_if_lro(skb))
> ...
>> @@ -796,6 +954,48 @@ int nla_get_labels(const struct nlattr *nla,
>>   }
>>   EXPORT_SYMBOL_GPL(nla_get_labels);
>>
>> +int nla_get_via(const struct nlattr *nla, u8 *via_alen,
>> +        u8 *via_table, u8 via_addr[])
>
> This isn't used by any other source files so couldn't this be made 
> static?
yeah, i will do that.
>
>> +{
>> +    struct rtvia *via = nla_data(nla);
>> +    int err = -EINVAL;
>> +    u8 alen;
>
> Should this be an int to avoid false negatives below for large values 
> of nla_len?
sure
>
>> +
>> +    if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
>> +        goto errout;
>> +    alen = nla_len(nla) -
>> +            offsetof(struct rtvia, rtvia_addr);
>> +    if (alen > MAX_VIA_ALEN)
>> +        goto errout;
>> +
>> +    /* Validate the address family */
>> +    switch (via->rtvia_family) {
>> +    case AF_PACKET:
>> +        *via_table = NEIGH_LINK_TABLE;
>> +        break;
>> +    case AF_INET:
>> +        *via_table = NEIGH_ARP_TABLE;
>> +        if (alen != 4)
>> +            goto errout;
>> +        break;
>> +    case AF_INET6:
>> +        *via_table = NEIGH_ND_TABLE;
>> +        if (alen != 16)
>> +            goto errout;
>> +        break;
>> +    default:
>> +        /* Unsupported address family */
>> +        goto errout;
>> +    }
>> +
>> +    memcpy(via_addr, via->rtvia_addr, alen);
>> +    *via_alen = alen;
>> +    err = 0;
>> +
>> +errout:
>> +    return err;
>> +}
>> +
>>   static int rtm_to_route_config(struct sk_buff *skb,  struct 
>> nlmsghdr *nlh,
>>                      struct mpls_route_config *cfg)
>>   {
>> @@ -872,35 +1072,15 @@ static int rtm_to_route_config(struct sk_buff 
>> *skb,  struct nlmsghdr *nlh,
>>           }
>>           case RTA_VIA:
>>           {
>> -            struct rtvia *via = nla_data(nla);
>> -            if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
>> -                goto errout;
>> -            cfg->rc_via_alen   = nla_len(nla) -
>> -                offsetof(struct rtvia, rtvia_addr);
>> -            if (cfg->rc_via_alen > MAX_VIA_ALEN)
>> -                goto errout;
>> -
>> -            /* Validate the address family */
>> -            switch(via->rtvia_family) {
>> -            case AF_PACKET:
>> -                cfg->rc_via_table = NEIGH_LINK_TABLE;
>> -                break;
>> -            case AF_INET:
>> -                cfg->rc_via_table = NEIGH_ARP_TABLE;
>> -                if (cfg->rc_via_alen != 4)
>> -                    goto errout;
>> -                break;
>> -            case AF_INET6:
>> -                cfg->rc_via_table = NEIGH_ND_TABLE;
>> -                if (cfg->rc_via_alen != 16)
>> -                    goto errout;
>> -                break;
>> -            default:
>> -                /* Unsupported address family */
>> +            if (nla_get_via(nla, &cfg->rc_via_alen,
>> +                    &cfg->rc_via_table, cfg->rc_via))
>>                   goto errout;
>> -            }
>> -
>> -            memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen);
>> +            break;
>> +        }
>> +        case RTA_MULTIPATH:
>> +        {
>> +            cfg->rc_mp = nla_data(nla);
>> +            cfg->rc_mp_len = nla_len(nla);
>>               break;
>>           }
>
> Nit: these scope brackets can be removed here and now above for the 
> RTA_VIA case.
>
ack,


thanks,
Roopa


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html