[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <C4BC2AFC-E0C4-4E6D-959F-B482D8935020@redfish-solutions.com>
Date: Tue, 7 Jun 2016 21:28:42 -0600
From: Philip Prindeville <philipp_subx@...fish-solutions.com>
To: Alexander Duyck <alexander.duyck@...il.com>
Cc: Netdev <netdev@...r.kernel.org>,
Stephen Hemminger <stephen@...workplumber.org>
Subject: Re: [PATCH net-next] net: ipv4: Add ability to have GRE ignore DF bit in IPv4 payloads
> On Jun 7, 2016, at 9:22 PM, Alexander Duyck <alexander.duyck@...il.com> wrote:
>
> On Tue, Jun 7, 2016 at 12:48 PM, Philip Prindeville
> <philipp@...fish-solutions.com> wrote:
>> From: Philip Prindeville <philipp@...fish-solutions.com>
>>
>> In the presence of firewalls which improperly block ICMP Unreachable
>> (including Fragmentation Required) messages, Path MTU Discovery is
>> prevented from working.
>>
>> A workaround is to handle IPv4 payloads opaquely, ignoring the DF bit--as
>> is done for other payloads like AppleTalk--and doing transparent
>> fragmentation and reassembly.
>>
>> Reviewed-by: Stephen Hemminger <stephen@...workplumber.org>
>> Signed-off-by: Philip Prindeville <philipp@...fish-solutions.com>
>> ---
>> include/net/ip_tunnels.h | 1 +
>> include/uapi/linux/if_tunnel.h | 1 +
>> net/ipv4/ip_gre.c | 13 +++++++++++--
>> net/ipv4/ip_tunnel.c | 2 +-
>> 4 files changed, 14 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
>> index dbf4444..9222678 100644
>> --- a/include/net/ip_tunnels.h
>> +++ b/include/net/ip_tunnels.h
>> @@ -132,6 +132,7 @@ struct ip_tunnel {
>> int ip_tnl_net_id;
>> struct gro_cells gro_cells;
>> bool collect_md;
>> + bool ignore_df;
>> };
>>
>> #define TUNNEL_CSUM __cpu_to_be16(0x01)
>> diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
>> index af4de90..1046f55 100644
>> --- a/include/uapi/linux/if_tunnel.h
>> +++ b/include/uapi/linux/if_tunnel.h
>> @@ -113,6 +113,7 @@ enum {
>> IFLA_GRE_ENCAP_SPORT,
>> IFLA_GRE_ENCAP_DPORT,
>> IFLA_GRE_COLLECT_METADATA,
>> + IFLA_GRE_IGNORE_DF,
>> __IFLA_GRE_MAX,
>> };
>>
>> diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
>> index 4d2025f..cbeecfb 100644
>> --- a/net/ipv4/ip_gre.c
>> +++ b/net/ipv4/ip_gre.c
>> @@ -846,6 +846,8 @@ static void ipgre_netlink_parms(struct net_device *dev,
>> struct nlattr *tb[],
>> struct ip_tunnel_parm *parms)
>> {
>> + struct ip_tunnel *t = netdev_priv(dev);
>> +
>> memset(parms, 0, sizeof(*parms));
>>
>> parms->iph.protocol = IPPROTO_GRE;
>> @@ -884,12 +886,13 @@ static void ipgre_netlink_parms(struct net_device *dev,
>> parms->iph.frag_off = htons(IP_DF);
>>
>> if (data[IFLA_GRE_COLLECT_METADATA]) {
>> - struct ip_tunnel *t = netdev_priv(dev);
>> -
>> t->collect_md = true;
>> if (dev->type == ARPHRD_IPGRE)
>> dev->type = ARPHRD_NONE;
>> }
>> +
>> + if (data[IFLA_GRE_IGNORE_DF])
>> + t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
>
> Why are you passing this as a u8? Why not just pass it as a flag
> since it defaults to off?
>
>> }
>>
>> /* This function returns true when ENCAP attributes are present in the nl msg */
>> @@ -1024,6 +1027,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
>> nla_total_size(2) +
>> /* IFLA_GRE_COLLECT_METADATA */
>> nla_total_size(0) +
>> + /* IFLA_GRE_IGNORE_DF */
>> + nla_total_size(1) +
>> 0;
>> }
>
> Switching it to a flag would help reduce the total size.
>
>> @@ -1057,6 +1062,9 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
>> t->encap.flags))
>> goto nla_put_failure;
>>
>> + if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
>> + goto nla_put_failure;
>> +
>> if (t->collect_md) {
>> if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
>> goto nla_put_failure;
>> @@ -1084,6 +1092,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
>> [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
>> [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
>> [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
>> + [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
>> };
>>
>
> You could just copy the approach used for COLLECT_METADATA and skip
> the need to pass any data and instead just use a flag.
I looked at doing that, but decided not to because there’s no way to turn it back off once you turn it on that way.
-Philip
>
>> static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
>> diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
>> index d8f5e0a..95649eb 100644
>> --- a/net/ipv4/ip_tunnel.c
>> +++ b/net/ipv4/ip_tunnel.c
>> @@ -682,7 +682,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
>> }
>>
>> df = tnl_params->frag_off;
>> - if (skb->protocol == htons(ETH_P_IP))
>> + if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
>> df |= (inner_iph->frag_off&htons(IP_DF));
>>
>> max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
>
> Looking at this it appears that you should probably make the feature
> mutually exclusive from the pmtudisc feature. It wouldn't make much
> sense to set the tunnel to ignore the inner DF bit and then turn
> around and set it because you are enforcing path MTU discovery on the
> tunnel itself.
Powered by blists - more mailing lists