[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1434571686-5149-4-git-send-email-pch@ordbogen.com>
Date: Wed, 17 Jun 2015 22:08:06 +0200
From: Peter Nørlund <pch@...bogen.com>
To: netdev@...r.kernel.org
Cc: "David S. Miller" <davem@...emloft.net>,
Alexey Kuznetsov <kuznet@....inr.ac.ru>,
James Morris <jmorris@...ei.org>,
Hideaki YOSHIFUJI <yoshfuji@...ux-ipv6.org>,
Patrick McHardy <kaber@...sh.net>, linux-api@...r.kernel.org,
Peter Nørlund <pch@...bogen.com>
Subject: [PATCH net-next 3/3] ipv4: ICMP packet inspection for multipath
ICMP packets are inspected to let them route together with the flow they
belong to, allowing anycast environments to work with ECMP.
Signed-off-by: Peter Nørlund <pch@...bogen.com>
---
net/ipv4/icmp.c | 27 ++++++++++++++++++-
net/ipv4/route.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++----------
2 files changed, 92 insertions(+), 15 deletions(-)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 3abcfea..20f1d5e 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -447,6 +447,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
{
struct rtable *rt, *rt2;
struct flowi4 fl4_dec;
+ struct flowi4 mp_flow;
int err;
memset(fl4, 0, sizeof(*fl4));
@@ -459,7 +460,31 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
- rt = __ip_route_output_key(net, fl4, NULL);
+
+ /* Source and destination is swapped. See ip_multipath_flow */
+ mp_flow.saddr = iph->daddr;
+ mp_flow.daddr = iph->saddr;
+ mp_flow.flowi4_proto = iph->protocol;
+ mp_flow.fl4_sport = 0;
+ mp_flow.fl4_dport = 0;
+ if (!ip_is_fragment(iph)) {
+ if (iph->protocol == IPPROTO_TCP ||
+ iph->protocol == IPPROTO_UDP ||
+ iph->protocol == IPPROTO_SCTP) {
+ __be16 _ports[2];
+ const __be16 *ports;
+
+ ports = skb_header_pointer(skb_in, iph->ihl * 4,
+ sizeof(_ports),
+ &_ports);
+ if (ports) {
+ mp_flow.fl4_sport = ports[1];
+ mp_flow.fl4_dport = ports[0];
+ }
+ }
+ }
+
+ rt = __ip_route_output_key(net, fl4, &mp_flow);
if (IS_ERR(rt))
return rt;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a1ec62c..bab4318 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1635,31 +1635,83 @@ out:
/* Fill flow key data based on packet for use in multipath routing. */
static void ip_multipath_flow(const struct sk_buff *skb, struct flowi4 *flow)
{
- const struct iphdr *iph;
-
- iph = ip_hdr(skb);
-
- flow->saddr = iph->saddr;
- flow->daddr = iph->daddr;
- flow->flowi4_proto = iph->protocol;
+ struct icmphdr _icmph;
+ struct iphdr _inner_iph;
+ const struct iphdr *outer_iph;
+ const struct icmphdr *icmph;
+ const struct iphdr *inner_iph;
+ unsigned int offset;
+ __be16 _ports[2];
+ const __be16 *ports;
+
+ outer_iph = ip_hdr(skb);
+
+ flow->saddr = outer_iph->saddr;
+ flow->daddr = outer_iph->daddr;
+ flow->flowi4_proto = outer_iph->protocol;
flow->fl4_sport = 0;
flow->fl4_dport = 0;
- if (unlikely(ip_is_fragment(iph)))
+ if (unlikely(ip_is_fragment(outer_iph)))
return;
- if (iph->protocol == IPPROTO_TCP ||
- iph->protocol == IPPROTO_UDP ||
- iph->protocol == IPPROTO_SCTP) {
- __be16 _ports;
- const __be16 *ports;
+ offset = outer_iph->ihl * 4;
- ports = skb_header_pointer(skb, iph->ihl * 4, sizeof(_ports),
+ if (outer_iph->protocol == IPPROTO_TCP ||
+ outer_iph->protocol == IPPROTO_UDP ||
+ outer_iph->protocol == IPPROTO_SCTP) {
+ ports = skb_header_pointer(skb, offset, sizeof(_ports),
&_ports);
if (ports) {
flow->fl4_sport = ports[0];
flow->fl4_dport = ports[1];
}
+
+ return;
+ }
+
+ if (outer_iph->protocol != IPPROTO_ICMP)
+ return;
+
+ icmph = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ if (!icmph)
+ return;
+
+ if (icmph->type != ICMP_DEST_UNREACH &&
+ icmph->type != ICMP_SOURCE_QUENCH &&
+ icmph->type != ICMP_REDIRECT &&
+ icmph->type != ICMP_TIME_EXCEEDED &&
+ icmph->type != ICMP_PARAMETERPROB) {
+ return;
+ }
+
+ offset += sizeof(_icmph);
+ inner_iph = skb_header_pointer(skb, offset, sizeof(_inner_iph),
+ &_inner_iph);
+ if (inner_iph)
+ return;
+
+ /* Since the ICMP payload contains a packet sent from the current
+ * recipient, we swap source and destination addresses and ports
+ */
+ flow->saddr = inner_iph->daddr;
+ flow->daddr = inner_iph->saddr;
+ flow->flowi4_proto = inner_iph->protocol;
+
+ if (unlikely(ip_is_fragment(inner_iph)))
+ return;
+
+ if (inner_iph->protocol != IPPROTO_TCP &&
+ inner_iph->protocol != IPPROTO_UDP &&
+ inner_iph->protocol != IPPROTO_SCTP) {
+ return;
+ }
+
+ offset += inner_iph->ihl * 4;
+ ports = skb_header_pointer(skb, offset, sizeof(_ports), &_ports);
+ if (ports) {
+ flow->fl4_sport = ports[1];
+ flow->fl4_dport = ports[0];
}
}
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
--
2.1.4
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists