lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9d4fd782-896d-4a44-b596-517c84d97d5a@gmail.com>
Date:   Tue, 8 Feb 2022 23:30:03 +0900
From:   Toshiaki Makita <toshiaki.makita1@...il.com>
To:     Pablo Neira Ayuso <pablo@...filter.org>
Cc:     "David S. Miller" <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>,
        Saeed Mahameed <saeedm@...dia.com>,
        Jamal Hadi Salim <jhs@...atatu.com>,
        Cong Wang <xiyou.wangcong@...il.com>,
        Jiri Pirko <jiri@...nulli.us>,
        Jozsef Kadlecsik <kadlec@...filter.org>,
        Florian Westphal <fw@...len.de>, netdev@...r.kernel.org,
        netfilter-devel@...r.kernel.org, coreteam@...filter.org,
        Paul Blakey <paulb@...dia.com>
Subject: Re: [PATCH net-next 1/3] netfilter: flowtable: Support GRE

On 2022/02/08 2:56, Pablo Neira Ayuso wrote:
> On Thu, Feb 03, 2022 at 08:59:39PM +0900, Toshiaki Makita wrote:
>> Support GREv0 without NAT.
>>
>> Signed-off-by: Toshiaki Makita <toshiaki.makita1@...il.com>
>> ---
>>   net/netfilter/nf_flow_table_core.c    | 10 +++++--
>>   net/netfilter/nf_flow_table_ip.c      | 54 ++++++++++++++++++++++++++++-------
>>   net/netfilter/nf_flow_table_offload.c | 19 +++++++-----
>>   net/netfilter/nft_flow_offload.c      | 13 +++++++++
>>   4 files changed, 77 insertions(+), 19 deletions(-)
>>
>> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
>> index b90eca7..e66a375 100644
>> --- a/net/netfilter/nf_flow_table_core.c
>> +++ b/net/netfilter/nf_flow_table_core.c
>> @@ -39,8 +39,14 @@
>>   
>>   	ft->l3proto = ctt->src.l3num;
>>   	ft->l4proto = ctt->dst.protonum;
>> -	ft->src_port = ctt->src.u.tcp.port;
>> -	ft->dst_port = ctt->dst.u.tcp.port;
>> +
>> +	switch (ctt->dst.protonum) {
>> +	case IPPROTO_TCP:
>> +	case IPPROTO_UDP:
>> +		ft->src_port = ctt->src.u.tcp.port;
>> +		ft->dst_port = ctt->dst.u.tcp.port;
>> +		break;
>> +	}
>>   }
>>   
>>   struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
>> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
>> index 889cf88..48e2f58 100644
>> --- a/net/netfilter/nf_flow_table_ip.c
>> +++ b/net/netfilter/nf_flow_table_ip.c
>> @@ -172,6 +172,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
>>   	struct flow_ports *ports;
>>   	unsigned int thoff;
>>   	struct iphdr *iph;
>> +	u8 ipproto;
>>
>>   	if (!pskb_may_pull(skb, sizeof(*iph) + offset))
>>   		return -1;
>> @@ -185,13 +186,19 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
>>   
>>   	thoff += offset;
>>   
>> -	switch (iph->protocol) {
>> +	ipproto = iph->protocol;
>> +	switch (ipproto) {
>>   	case IPPROTO_TCP:
>>   		*hdrsize = sizeof(struct tcphdr);
>>   		break;
>>   	case IPPROTO_UDP:
>>   		*hdrsize = sizeof(struct udphdr);
>>   		break;
>> +#ifdef CONFIG_NF_CT_PROTO_GRE
>> +	case IPPROTO_GRE:
>> +		*hdrsize = sizeof(struct gre_base_hdr);
>> +		break;
>> +#endif
>>   	default:
>>   		return -1;
>>   	}
>> @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
>>   	if (!pskb_may_pull(skb, thoff + *hdrsize))
>>   		return -1;
>>   
>> +	if (ipproto == IPPROTO_GRE) {
> 
> No ifdef here? Maybe remove these ifdef everywhere?

I wanted to avoid adding many ifdefs and I expect this to be compiled out when 
CONFIG_NF_CT_PROTO_GRE=n as this block is unreachable anyway. It rather may have 
been unintuitive though.

Removing all of these ifdefs will cause inconsistent behavior between 
CONFIG_NF_CT_PROTO_GRE=n/y.
When CONFIG_NF_CT_PROTO_GRE=n, conntrack cannot determine GRE version, thus it will 
track GREv1 without key infomation, and the flow will be offloaded.
When CONFIG_NF_CT_PROTO_GRE=y, GREv1 will have key information and will not be 
offloaded.
I wanted to just refuse offloading of GRE to avoid this inconsistency.
Anyway this kind of inconsistency seems to happen in software conntrack, so if you'd 
like to remove ifdefs, I will do.

>> +		struct gre_base_hdr *greh;
>> +
>> +		greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
>> +		if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
>> +			return -1;
>> +	}
>> +
>>   	iph = (struct iphdr *)(skb_network_header(skb) + offset);
>> -	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
>>   
>>   	tuple->src_v4.s_addr	= iph->saddr;
>>   	tuple->dst_v4.s_addr	= iph->daddr;
>> -	tuple->src_port		= ports->source;
>> -	tuple->dst_port		= ports->dest;
>> +	if (ipproto == IPPROTO_TCP || ipproto == IPPROTO_UDP) {
>> +		ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
>> +		tuple->src_port		= ports->source;
>> +		tuple->dst_port		= ports->dest;
>> +	}
> 
> maybe:
> 
>          switch (ipproto) {
>          case IPPROTO_TCP:
>          case IPPROTO_UDP:
>                  ...
>                  break;
>          case IPPROTO_GRE:
>                  break;
>          }
> 
> ?

Sure, will fix.

>>   	tuple->l3proto		= AF_INET;
>> -	tuple->l4proto		= iph->protocol;
>> +	tuple->l4proto		= ipproto;
>>   	tuple->iifidx		= dev->ifindex;
>>   	nf_flow_tuple_encap(skb, tuple);
>>   
>> @@ -521,6 +538,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
>>   	struct flow_ports *ports;
>>   	struct ipv6hdr *ip6h;
>>   	unsigned int thoff;
>> +	u8 nexthdr;
>>   
>>   	thoff = sizeof(*ip6h) + offset;
>>   	if (!pskb_may_pull(skb, thoff))
>> @@ -528,13 +546,19 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
>>   
>>   	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
>>   
>> -	switch (ip6h->nexthdr) {
>> +	nexthdr = ip6h->nexthdr;
>> +	switch (nexthdr) {
>>   	case IPPROTO_TCP:
>>   		*hdrsize = sizeof(struct tcphdr);
>>   		break;
>>   	case IPPROTO_UDP:
>>   		*hdrsize = sizeof(struct udphdr);
>>   		break;
>> +#ifdef CONFIG_NF_CT_PROTO_GRE
>> +	case IPPROTO_GRE:
>> +		*hdrsize = sizeof(struct gre_base_hdr);
>> +		break;
>> +#endif
>>   	default:
>>   		return -1;
>>   	}
>> @@ -545,15 +569,25 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
>>   	if (!pskb_may_pull(skb, thoff + *hdrsize))
>>   		return -1;
>>   
>> +	if (nexthdr == IPPROTO_GRE) {
>> +		struct gre_base_hdr *greh;
>> +
>> +		greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
>> +		if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
>> +			return -1;
>> +	}
>> +
>>   	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
>> -	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
>>   
>>   	tuple->src_v6		= ip6h->saddr;
>>   	tuple->dst_v6		= ip6h->daddr;
>> -	tuple->src_port		= ports->source;
>> -	tuple->dst_port		= ports->dest;
>> +	if (nexthdr == IPPROTO_TCP || nexthdr == IPPROTO_UDP) {
>> +		ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
>> +		tuple->src_port		= ports->source;
>> +		tuple->dst_port		= ports->dest;
>> +	}
>>   	tuple->l3proto		= AF_INET6;
>> -	tuple->l4proto		= ip6h->nexthdr;
>> +	tuple->l4proto		= nexthdr;
>>   	tuple->iifidx		= dev->ifindex;
>>   	nf_flow_tuple_encap(skb, tuple);
>>   
>> diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
>> index b561e0a..9b81080 100644
>> --- a/net/netfilter/nf_flow_table_offload.c
>> +++ b/net/netfilter/nf_flow_table_offload.c
>> @@ -170,6 +170,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
>>   		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
>>   		break;
>>   	case IPPROTO_UDP:
>> +	case IPPROTO_GRE:
>>   		break;
>>   	default:
>>   		return -EOPNOTSUPP;
>> @@ -178,15 +179,19 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
>>   	key->basic.ip_proto = tuple->l4proto;
>>   	mask->basic.ip_proto = 0xff;
>>   
>> -	key->tp.src = tuple->src_port;
>> -	mask->tp.src = 0xffff;
>> -	key->tp.dst = tuple->dst_port;
>> -	mask->tp.dst = 0xffff;
>> -
>>   	match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
>>   				      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
>> -				      BIT(FLOW_DISSECTOR_KEY_BASIC) |
>> -				      BIT(FLOW_DISSECTOR_KEY_PORTS);
>> +				      BIT(FLOW_DISSECTOR_KEY_BASIC);
>> +
>> +	if (tuple->l4proto == IPPROTO_TCP || tuple->l4proto == IPPROTO_UDP) {
>> +		key->tp.src = tuple->src_port;
>> +		mask->tp.src = 0xffff;
>> +		key->tp.dst = tuple->dst_port;
>> +		mask->tp.dst = 0xffff;
>> +
>> +		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS);
>> +	}
>> +
>>   	return 0;
>>   }
>>   
>> diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
>> index 0af34ad..731b5d8 100644
>> --- a/net/netfilter/nft_flow_offload.c
>> +++ b/net/netfilter/nft_flow_offload.c
>> @@ -298,6 +298,19 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
>>   		break;
>>   	case IPPROTO_UDP:
>>   		break;
>> +#ifdef CONFIG_NF_CT_PROTO_GRE
>> +	case IPPROTO_GRE: {
>> +		struct nf_conntrack_tuple *tuple;
>> +
>> +		if (ct->status & IPS_NAT_MASK)
>> +			goto out;
> 
> Why this NAT check?

NAT requires more work. I'd like to start with a minimal GRE support.
Maybe we can add NAT support later.

Toshiaki Makita

>> +		tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
>> +		/* No support for GRE v1 */
>> +		if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
>> +			goto out;
>> +		break;
>> +	}
>> +#endif
>>   	default:
>>   		goto out;
>>   	}
>> -- 
>> 1.8.3.1
>>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ