[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CALnjE+oRojsyuxTsDsahmJCxyK=26PgX9jS+4WJvWbXEmX-4og@mail.gmail.com>
Date: Wed, 12 Aug 2015 16:08:54 -0700
From: Pravin Shelar <pshelar@...ira.com>
To: Jesse Gross <jesse@...ira.com>
Cc: netdev <netdev@...r.kernel.org>
Subject: Re: [PATCH net-next 3/6] geneve: Add support to collect tunnel metadata.
On Wed, Aug 12, 2015 at 1:53 PM, Jesse Gross <jesse@...ira.com> wrote:
> On Tue, Aug 11, 2015 at 10:17 PM, Pravin B Shelar <pshelar@...ira.com> wrote:
>> diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
>> index 5e9bab8..a463383 100644
>> --- a/drivers/net/geneve.c
>> +++ b/drivers/net/geneve.c
>> +static struct geneve_dev *geneve_lookup(struct geneve_net *gn,
>> + struct iphdr *iph,
>> + struct genevehdr *gnvh)
>> {
>> - struct genevehdr *gnvh = geneve_hdr(skb);
>> - struct geneve_dev *dummy, *geneve = NULL;
>> - struct geneve_net *gn;
>> - struct iphdr *iph = NULL;
>> - struct pcpu_sw_netstats *stats;
>> struct hlist_head *vni_list_head;
>> - int err = 0;
>> + struct geneve_dev *geneve;
>> __u32 hash;
>>
>> - iph = ip_hdr(skb); /* Still outer IP header... */
>> -
>> - gn = gs->rcv_data;
>> -
>> /* Find the device for this VNI */
>> hash = geneve_net_vni_hash(gnvh->vni);
>> vni_list_head = &gn->vni_list[hash];
>> - hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) {
>> - if (!memcmp(gnvh->vni, dummy->vni, sizeof(dummy->vni)) &&
>> - iph->saddr == dummy->remote.sin_addr.s_addr) {
>> - geneve = dummy;
>> - break;
>> + hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
>> + if (!memcmp(gnvh->vni, geneve->vni, sizeof(geneve->vni)) &&
>> + iph->saddr == geneve->remote.sin_addr.s_addr) {
>> + return geneve;
>> }
>> }
>> +
>> + return rcu_dereference(gn->collect_md_tun);
>> +}
>
> I think this operates differently from VXLAN (and GRE I believe) where
> you can't have tunnels based on the VNI overlapping the
> collect_md_tun. VXLAN is nice because it can go straight from the
> socket to collecting metadata without having to an additional lookup
> that doesn't give any additional information and it seems a little
> simpler because we don't need to keep track of a flow-based device.
> However, at the very least the behavior should be consistent.
>
This is how GRE works. But I can check for flow based device first to
keep it consistent with vxlan.
>> +/* geneve receive/decap routine */
>> +static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb)
>> +{
>> + struct genevehdr *gnvh = geneve_hdr(skb);
>> + struct metadata_dst *tun_dst = NULL;
>> + struct geneve_dev *geneve = NULL;
>> + struct pcpu_sw_netstats *stats;
>> + struct geneve_net *gn;
>> + struct iphdr *iph;
>> + int err;
>> +
>> + iph = ip_hdr(skb); /* Still outer IP header... */
>> + gn = gs->rcv_data;
>> + geneve = geneve_lookup(gn, iph, gnvh);
>> if (!geneve)
>> goto drop;
>>
>> - /* Drop packets w/ critical options,
>> - * since we don't support any...
>> - */
>> - if (gnvh->critical)
>> - goto drop;
>> + if (geneve->collect_md) {
>
> Should this also check ip_tunnel_collect_metadata()? GRE has the same issue.
>
ok.
>> @@ -103,7 +146,8 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb)
>> skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
>>
>> /* Ignore packet loops (and multicast echo) */
>> - if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr))
>> + if (!geneve->collect_md &&
>> + ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr))
>> goto drop;
>
> Why is this different in the collect_md case?
>
>> @@ -128,10 +169,18 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb)
>> stats->rx_bytes += skb->len;
>> u64_stats_update_end(&stats->syncp);
>>
>> + if (tun_dst)
>> + skb_dst_set(skb, (struct dst_entry *)tun_dst);
>
> I think there is a leak if we allocate the dst but then drop the
> packet before we set it on the skb. It seems easier to just set it
> earlier.
>
ok.
>> -static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
>> +static struct rtable *geneve_get_rt(struct sk_buff *skb,
>> + struct net_device *dev,
>> + struct flowi4 *fl4,
>> + struct ip_tunnel_info *info)
>> {
> [...]
>> + if (info) {
>> + fl4->daddr = info->key.ipv4_dst;
>> + fl4->saddr = info->key.ipv4_src;
>> + fl4->flowi4_tos = RT_TOS(info->key.ipv4_tos);
>> + fl4->flowi4_mark = skb->mark;
>> + fl4->flowi4_proto = IPPROTO_UDP;
>> + } else {
>> + tos = geneve->tos;
>> + if (tos == 1) {
>> + const struct iphdr *iip = ip_hdr(skb);
>> +
>> + tos = ip_tunnel_get_dsfield(iip, skb);
>> + }
>
> Should the mark and protocol be used in both cases?
>
It is not done in current code, so did not changed the behavior.
>> +static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
>> +{
>> + struct geneve_dev *geneve = netdev_priv(dev);
>> + struct geneve_sock *gs = geneve->sock;
>> + struct ip_tunnel_info *info = NULL;
>> + struct rtable *rt = NULL;
>> + const struct iphdr *iip; /* interior IP header */
>> + struct flowi4 fl4;
>> + __u8 tos, ttl;
>> + __be16 sport;
>> + bool xnet;
>> + int err;
>>
>> - /* no need to handle local destination and encap bypass...yet... */
>> + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
>>
>> - err = geneve_xmit_skb(gs, rt, skb, fl4.saddr, fl4.daddr,
>> - tos, ttl, 0, sport, geneve->dst_port, 0,
>> - geneve->vni, 0, NULL, false,
>> - !net_eq(geneve->net, dev_net(geneve->dev)));
>> + if (geneve->collect_md) {
>> + info = skb_tunnel_info(skb, AF_INET);
>> + if (info && info->mode != IP_TUNNEL_INFO_TX)
>> + info = NULL;
>
> Should we just return at this point? Best case, it prints an error
> message that is slightly less clear than it could be. Worst case, it
> somehow keeps going and does something weird.
>
ok.
>> +struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
>> + u8 name_assign_type, u16 dst_port)
>> +{
>> + struct nlattr *tb[IFLA_MAX + 1];
>> + struct net_device *dev;
>> + int err;
>> +
>> + memset(tb, 0, sizeof(tb));
>> + dev = rtnl_create_link(net, name, name_assign_type,
>> + &geneve_link_ops, tb);
>> + if (IS_ERR(dev))
>> + return dev;
>> +
>> + err = geneve_configure(net, dev, 0, 0, 0, 0, dst_port, true);
>> + if (err) {
>> + free_netdev(dev);
>> + return ERR_PTR(err);
>> + }
>> +
>> + eth_hw_addr_random(dev);
>
> VXLAN calls eth_hw_addr_random in its setup function. Maybe that's a
> better place? I think that would also take care of the existing call
> that we have to eth_hw_addr_random.
ok, I will move it to geneve setup function.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists