[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAF2d9jgEOsSEQXOVJWJsw=RMdr27YiTdcMPrZ6P2EopqU8wtkw@mail.gmail.com>
Date: Wed, 14 Sep 2016 09:30:10 -0700
From: Mahesh Bandewar (महेश बंडेवार)
<maheshb@...gle.com>
To: David Ahern <dsa@...ulusnetworks.com>
Cc: Mahesh Bandewar <mahesh@...dewar.net>,
netdev <netdev@...r.kernel.org>,
Eric Dumazet <edumazet@...gle.com>,
David Miller <davem@...emloft.net>
Subject: Re: [PATCHv2 next 3/3] ipvlan: Introduce l3s mode
Hi David, thanks for the comments.
On Tue, Sep 13, 2016 at 8:24 PM, David Ahern <dsa@...ulusnetworks.com> wrote:
> On 9/12/16 12:01 PM, Mahesh Bandewar wrote:
>
>> +struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
>> + u16 proto)
>> +{
>> + struct ipvl_addr *addr;
>> + struct net_device *sdev;
>> +
>> + addr = ipvlan_skb_to_addr(skb, dev);
>> + if (!addr)
>> + goto out;
>> +
>> + sdev = addr->master->dev;
>> + switch (proto) {
>> + case AF_INET:
>> + {
>> + int err;
>> + struct iphdr *ip4h = ip_hdr(skb);
>> +
>> + err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
>> + ip4h->tos, sdev);
>> + if (unlikely(err))
>> + goto out;
>> + break;
>> + }
>> + case AF_INET6:
>> + {
>> + struct dst_entry *dst;
>> + struct ipv6hdr *ip6h = ipv6_hdr(skb);
>> + int flags = RT6_LOOKUP_F_HAS_SADDR;
>> + struct flowi6 fl6 = {
>> + .flowi6_iif = sdev->ifindex,
>> + .daddr = ip6h->daddr,
>> + .saddr = ip6h->saddr,
>> + .flowlabel = ip6_flowinfo(ip6h),
>> + .flowi6_mark = skb->mark,
>> + .flowi6_proto = ip6h->nexthdr,
>> + };
>> +
>> + skb_dst_drop(skb);
>> + dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
>> + skb_dst_set(skb, dst);
>> + break;
>> + }
>> + default:
>> + break;
>> + }
>
> Nit: why not put the above in separate per-version functions (ipvlan_ip_rcv and ipvlan_ip6_rcv) similar to what is done for ipvlan_process_outbound?
>
I can but it's small enough to have it together. Also 'proto' is a
parameter to the handler and makes it easier However do you see any
issue having just one function?
>
>> +
>> +out:
>> + return skb;
>> +}
>> +
>> +unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
>> + const struct nf_hook_state *state)
>> +{
>> + struct ipvl_addr *addr;
>> + unsigned int len;
>> +
>> + addr = ipvlan_skb_to_addr(skb, skb->dev);
>> + if (!addr)
>> + goto out;
>> +
>> + skb->dev = addr->master->dev;
>> + len = skb->len + ETH_HLEN;
>> + ipvlan_count_rx(addr->master, len, true, false);
>> +out:
>> + return NF_ACCEPT;
>> +}
>> diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
>> index 18b4e8c7f68a..d02be277e1db 100644
>> --- a/drivers/net/ipvlan/ipvlan_main.c
>> +++ b/drivers/net/ipvlan/ipvlan_main.c
>> @@ -9,24 +9,65 @@
>>
>> #include "ipvlan.h"
>>
>> +static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
>> + {
>> + .hook = ipvlan_nf_input,
>> + .pf = NFPROTO_IPV4,
>> + .hooknum = NF_INET_LOCAL_IN,
>> + .priority = INT_MAX,
>> + },
>> + {
>> + .hook = ipvlan_nf_input,
>> + .pf = NFPROTO_IPV6,
>> + .hooknum = NF_INET_LOCAL_IN,
>> + .priority = INT_MAX,
>> + },
>> +};
>> +
>> +static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = {
>> + .l3mdev_l3_rcv = ipvlan_l3_rcv,
>> +};
>> +
>> static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
>> {
>> ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
>> }
>>
>> -static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
>> +static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
>> {
>> struct ipvl_dev *ipvlan;
>> + int err = 0;
>>
>> + ASSERT_RTNL();
>> if (port->mode != nval) {
>> + if (nval == IPVLAN_MODE_L3S) {
>> + port->dev->l3mdev_ops = &ipvl_l3mdev_ops;
>> + port->dev->priv_flags |= IFF_L3MDEV_MASTER;
>> + if (!port->ipt_hook_added) {
>> + err = _nf_register_hooks(ipvl_nfops,
>> + ARRAY_SIZE(ipvl_nfops));
>
> That's clever. The hooks are not device based so why do the register for each device? Alternatively, you could use a static dst like VRF does for Tx. In the ipvlan rcv function set the dst input handler to send the packet back to the ipvlan driver via dst->input. From there send the packet through the netfilter hooks and then do the real lookup, update the dst and call its input function. I have working code for VRF driver somewhere that shows how to do this.
>
Do you mean per slave device? It's registering it for a port (so only
once) depending on the mode. If the mode != l3s it wont bother
registering to keep current modes as they are.
I'm sure dst idea could work as well (as you have suggested) but
l3mdev + ipt-hook is simpler and probably far less intrusive.
>
>> + if (!err)
>> + port->ipt_hook_added = true;
>> + else
>> + return err;
>> + }
>> + } else {
>> + port->dev->priv_flags &= ~IFF_L3MDEV_MASTER;
>> + port->dev->l3mdev_ops = NULL;
>> + if (port->ipt_hook_added)
>> + _nf_unregister_hooks(ipvl_nfops,
>> + ARRAY_SIZE(ipvl_nfops));
>> + port->ipt_hook_added = false;
>> + }
>
>
Powered by blists - more mailing lists