lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1319426081.2517.20.camel@edumazet-laptop>
Date:	Mon, 24 Oct 2011 05:14:41 +0200
From:	Eric Dumazet <eric.dumazet@...il.com>
To:	Dan Siemon <dan@...erfire.com>
Cc:	netdev <netdev@...r.kernel.org>
Subject: Re: [PATCH] cls_flow: Add tunnel support to the flow classifier

Le dimanche 23 octobre 2011 à 21:21 -0400, Dan Siemon a écrit :

> Thanks for the review.
> 
> Are you arguing this use case isn't worth addressing or that there is a
> more efficient way to implement this with less code?
> 

Its worth doing it, but also needs more efficient code ;)

As long as we were reading only bytes in the first 64 bytes of the
frame, existing code was probably fine and efficient.

If we want to add features and features, this is going to ask more bytes
so can trigger expensive skb head reallocs.

> > IPv6 part is also a bit limited : It assumes TCP/UDP headers are the
> > first ones. Maybe its time to use ipv6_skip_exthdr() ?
> 
> I noticed this too but the existing src-proto and dst-proto don't handle
> this case either. Maybe I can look into fixing those as well.
> 

Yes.

> > Note also that if we pull (with pskb_network_may_pull()) too many bytes,
> > we kill routing performance on paged frags devices, wich are now
> > becoming very common.
> 
> I don't know what paged frag devices means but I trust you are correct :)
> 
> The existing keys also use pskb_network_may_pull(). Should they be changed as well?
> 

A frame delivered by such device has for example 64 bytes present in skb
head, but remaining of data sits in attached fragment(s). 

For example :
drivers/net/ethernet/emulex/benet/be.h
/* Number of bytes of an RX frame that are copied to skb->data */
#define BE_HDR_LEN      ((u16) 64)

This works well if this fragment stay as is until being delivered to
userland, or forwarded.

Using pskb_network_may_pull() on data present on fragment might force to
reallocate skb head because it was too small, including a copy of struct
skb_shared_info, and all headroom (usually 64 bytes were reserved by
dev_alloc_skb()).

Adding tunnelling code definitely can increase the max offset of
inspected data from the frame beyond 64.

skb_header_pointer() can access to frag data without reallocations.

You can find many use examples in net/sched/cls_u32.c & net/netfilter

If you prefer, I can do the preliminary work 

Here is a patch to give a hint :

diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6994214..cda6bf1 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -65,19 +65,27 @@ static inline u32 addr_fold(void *addr)
 	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
 }
 
-static u32 flow_get_src(struct sk_buff *skb)
+static u32 flow_get_src(const struct sk_buff *skb, int nhoff)
 {
+	__be32 *data = NULL, hdata;
+
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		if (pskb_network_may_pull(skb, sizeof(struct iphdr)))
-			return ntohl(ip_hdr(skb)->saddr);
+		data = skb_header_pointer(skb,
+					  nhoff + offsetof(struct iphdr,
+							   saddr),
+					  4, &hdata);
 		break;
 	case htons(ETH_P_IPV6):
-		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
-			return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
+		data = skb_header_pointer(skb,
+					 nhoff + offsetof(struct ipv6hdr,
+							  saddr.s6_addr32[3]),
+					 4, &hdata);
 		break;
 	}
 
+	if (data)
+		return ntohl(*data);
 	return addr_fold(skb->sk);
 }
 
@@ -236,7 +244,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
 })
 #endif
 
-static u32 flow_get_nfct_src(struct sk_buff *skb)
+static u32 flow_get_nfct_src(const struct sk_buff *skb, int nhoff)
 {
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
@@ -245,7 +253,7 @@ static u32 flow_get_nfct_src(struct sk_buff *skb)
 		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
 	}
 fallback:
-	return flow_get_src(skb);
+	return flow_get_src(skb, nhoff);
 }
 
 static u32 flow_get_nfct_dst(struct sk_buff *skb)
@@ -313,9 +321,11 @@ static u32 flow_get_rxhash(struct sk_buff *skb)
 
 static u32 flow_key_get(struct sk_buff *skb, int key)
 {
+	int nhoff = skb_network_offset(skb);
+
 	switch (key) {
 	case FLOW_KEY_SRC:
-		return flow_get_src(skb);
+		return flow_get_src(skb, nhoff);
 	case FLOW_KEY_DST:
 		return flow_get_dst(skb);
 	case FLOW_KEY_PROTO:
@@ -333,7 +343,7 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
 	case FLOW_KEY_NFCT:
 		return flow_get_nfct(skb);
 	case FLOW_KEY_NFCT_SRC:
-		return flow_get_nfct_src(skb);
+		return flow_get_nfct_src(skb, nhoff);
 	case FLOW_KEY_NFCT_DST:
 		return flow_get_nfct_dst(skb);
 	case FLOW_KEY_NFCT_PROTO_SRC:


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ