[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230124140207.3975283-3-vladbu@nvidia.com>
Date: Tue, 24 Jan 2023 15:02:02 +0100
From: Vlad Buslov <vladbu@...dia.com>
To: <davem@...emloft.net>, <kuba@...nel.org>, <pabeni@...hat.com>,
<pablo@...filter.org>
CC: <netdev@...r.kernel.org>, <netfilter-devel@...r.kernel.org>,
<jhs@...atatu.com>, <xiyou.wangcong@...il.com>, <jiri@...nulli.us>,
<ozsh@...dia.com>, <marcelo.leitner@...il.com>,
<simon.horman@...igine.com>, Vlad Buslov <vladbu@...dia.com>
Subject: [PATCH net-next v4 2/7] netfilter: flowtable: fixup UDP timeout depending on ct state
Currently flow_offload_fixup_ct() function assumes that only replied UDP
connections can be offloaded and hardcodes UDP_CT_REPLIED timeout value.
Allow users to modify timeout calculation by implementing new flowtable
type callback 'timeout' and use the existing algorithm otherwise.
To enable UDP NEW connection offload in following patches implement
'timeout' callback in flowtable_ct of act_ct which extracts the actual
connections state from ct->status and set the timeout according to it.
Signed-off-by: Vlad Buslov <vladbu@...dia.com>
---
Notes:
Changes V3 -> V4:
- Rework the patch to decouple netfilter and act_ct timeout fixup
algorithms.
include/net/netfilter/nf_flow_table.h | 6 +++-
net/netfilter/nf_flow_table_core.c | 40 +++++++++++++++++++--------
net/netfilter/nf_flow_table_ip.c | 17 ++++++------
net/sched/act_ct.c | 35 ++++++++++++++++++++++-
4 files changed, 76 insertions(+), 22 deletions(-)
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index cd982f4a0f50..a3e4b5127ad0 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -61,6 +61,9 @@ struct nf_flowtable_type {
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule);
void (*free)(struct nf_flowtable *ft);
+ bool (*timeout)(struct nf_flowtable *ft,
+ struct flow_offload *flow,
+ s32 *val);
nf_hookfn *hook;
struct module *owner;
};
@@ -278,7 +281,8 @@ void nf_flow_table_cleanup(struct net_device *dev);
int nf_flow_table_init(struct nf_flowtable *flow_table);
void nf_flow_table_free(struct nf_flowtable *flow_table);
-void flow_offload_teardown(struct flow_offload *flow);
+void flow_offload_teardown(struct nf_flowtable *flow_table,
+ struct flow_offload *flow);
void nf_flow_snat_port(const struct flow_offload *flow,
struct sk_buff *skb, unsigned int thoff,
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 81c26a96c30b..e3eeea349c8d 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -178,28 +178,43 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
tcp->seen[1].td_maxwin = 0;
}
-static void flow_offload_fixup_ct(struct nf_conn *ct)
+static bool flow_offload_timeout_default(struct nf_conn *ct, s32 *timeout)
{
struct net *net = nf_ct_net(ct);
int l4num = nf_ct_protonum(ct);
- s32 timeout;
if (l4num == IPPROTO_TCP) {
struct nf_tcp_net *tn = nf_tcp_pernet(net);
flow_offload_fixup_tcp(&ct->proto.tcp);
- timeout = tn->timeouts[ct->proto.tcp.state];
- timeout -= tn->offload_timeout;
+ *timeout = tn->timeouts[ct->proto.tcp.state];
+ *timeout -= tn->offload_timeout;
} else if (l4num == IPPROTO_UDP) {
struct nf_udp_net *tn = nf_udp_pernet(net);
- timeout = tn->timeouts[UDP_CT_REPLIED];
- timeout -= tn->offload_timeout;
+ *timeout = tn->timeouts[UDP_CT_REPLIED];
+ *timeout -= tn->offload_timeout;
} else {
- return;
+ return false;
}
+ return true;
+}
+
+static void flow_offload_fixup_ct(struct nf_flowtable *flow_table,
+ struct flow_offload *flow)
+{
+ struct nf_conn *ct = flow->ct;
+ bool needs_fixup;
+ s32 timeout;
+
+ needs_fixup = flow_table->type->timeout ?
+ flow_table->type->timeout(flow_table, flow, &timeout) :
+ flow_offload_timeout_default(ct, &timeout);
+ if (!needs_fixup)
+ return;
+
if (timeout < 0)
timeout = 0;
@@ -348,11 +363,12 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
flow_offload_free(flow);
}
-void flow_offload_teardown(struct flow_offload *flow)
+void flow_offload_teardown(struct nf_flowtable *flow_table,
+ struct flow_offload *flow)
{
clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
set_bit(NF_FLOW_TEARDOWN, &flow->flags);
- flow_offload_fixup_ct(flow->ct);
+ flow_offload_fixup_ct(flow_table, flow);
}
EXPORT_SYMBOL_GPL(flow_offload_teardown);
@@ -421,7 +437,7 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
{
if (nf_flow_has_expired(flow) ||
nf_ct_is_dying(flow->ct))
- flow_offload_teardown(flow);
+ flow_offload_teardown(flow_table, flow);
if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
if (test_bit(NF_FLOW_HW, &flow->flags)) {
@@ -569,14 +585,14 @@ static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table,
struct net_device *dev = data;
if (!dev) {
- flow_offload_teardown(flow);
+ flow_offload_teardown(flow_table, flow);
return;
}
if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
(flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
flow->tuplehash[1].tuple.iifidx == dev->ifindex))
- flow_offload_teardown(flow);
+ flow_offload_teardown(flow_table, flow);
}
void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 19efba1e51ef..9c97b9994a96 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -18,7 +18,8 @@
#include <linux/tcp.h>
#include <linux/udp.h>
-static int nf_flow_state_check(struct flow_offload *flow, int proto,
+static int nf_flow_state_check(struct nf_flowtable *flow_table,
+ struct flow_offload *flow, int proto,
struct sk_buff *skb, unsigned int thoff)
{
struct tcphdr *tcph;
@@ -28,7 +29,7 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
tcph = (void *)(skb_network_header(skb) + thoff);
if (unlikely(tcph->fin || tcph->rst)) {
- flow_offload_teardown(flow);
+ flow_offload_teardown(flow_table, flow);
return -1;
}
@@ -373,11 +374,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
iph = (struct iphdr *)(skb_network_header(skb) + offset);
thoff = (iph->ihl * 4) + offset;
- if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
+ if (nf_flow_state_check(flow_table, flow, iph->protocol, skb, thoff))
return NF_ACCEPT;
if (!nf_flow_dst_check(&tuplehash->tuple)) {
- flow_offload_teardown(flow);
+ flow_offload_teardown(flow_table, flow);
return NF_ACCEPT;
}
@@ -419,7 +420,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
case FLOW_OFFLOAD_XMIT_DIRECT:
ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
if (ret == NF_DROP)
- flow_offload_teardown(flow);
+ flow_offload_teardown(flow_table, flow);
break;
default:
WARN_ON_ONCE(1);
@@ -639,11 +640,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
thoff = sizeof(*ip6h) + offset;
- if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
+ if (nf_flow_state_check(flow_table, flow, ip6h->nexthdr, skb, thoff))
return NF_ACCEPT;
if (!nf_flow_dst_check(&tuplehash->tuple)) {
- flow_offload_teardown(flow);
+ flow_offload_teardown(flow_table, flow);
return NF_ACCEPT;
}
@@ -684,7 +685,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
case FLOW_OFFLOAD_XMIT_DIRECT:
ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
if (ret == NF_DROP)
- flow_offload_teardown(flow);
+ flow_offload_teardown(flow_table, flow);
break;
default:
WARN_ON_ONCE(1);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 0ca2bb8ed026..861305c9c079 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -274,8 +274,41 @@ static int tcf_ct_flow_table_fill_actions(struct net *net,
return err;
}
+static bool tcf_ct_flow_table_get_timeout(struct nf_flowtable *ft,
+ struct flow_offload *flow,
+ s32 *val)
+{
+ struct nf_conn *ct = flow->ct;
+ int l4num =
+ nf_ct_protonum(ct);
+ struct net *net =
+ nf_ct_net(ct);
+
+ if (l4num == IPPROTO_TCP) {
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+ ct->proto.tcp.seen[0].td_maxwin = 0;
+ ct->proto.tcp.seen[1].td_maxwin = 0;
+ *val = tn->timeouts[ct->proto.tcp.state];
+ *val -= tn->offload_timeout;
+ } else if (l4num == IPPROTO_UDP) {
+ struct nf_udp_net *tn = nf_udp_pernet(net);
+ enum udp_conntrack state =
+ test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+ UDP_CT_REPLIED : UDP_CT_UNREPLIED;
+
+ *val = tn->timeouts[state];
+ *val -= tn->offload_timeout;
+ } else {
+ return false;
+ }
+
+ return true;
+}
+
static struct nf_flowtable_type flowtable_ct = {
.action = tcf_ct_flow_table_fill_actions,
+ .timeout = tcf_ct_flow_table_get_timeout,
.owner = THIS_MODULE,
};
@@ -622,7 +655,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
ct = flow->ct;
if (tcph && (unlikely(tcph->fin || tcph->rst))) {
- flow_offload_teardown(flow);
+ flow_offload_teardown(nf_ft, flow);
return false;
}
--
2.38.1
Powered by blists - more mailing lists