lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Tue, 7 Apr 2015 18:03:45 -0700 From: Alexei Starovoitov <ast@...mgrid.com> To: "David S. Miller" <davem@...emloft.net> Cc: Daniel Borkmann <daniel@...earbox.net>, Jiri Pirko <jiri@...nulli.us>, Jamal Hadi Salim <jhs@...atatu.com>, netdev@...r.kernel.org Subject: [PATCH v2 net-next 2/2] tc: make ingress and egress qdiscs consistent TC classifers and actions attached to ingress and egress qdiscs see inconsistent skb->data. For ingress L2 header is already pulled, whereas for egress it's present. Make them consistent by pushing L2 before calling into classifiers/actions and pulling L2 back afterwards. The following cls/act assume L2 and were broken on ingress before this commit: cls_bpf, act_bpf, cls_rsvp, cls_cgroup, act_csum, act_nat, all of ematch. All other in-tree cls/act use skb_network_offset() accessors for skb data and work regardless whether L2 was pulled or not. Since L2 is now present on ingress update act_mirred (the only action that was aware of L2 differences) and fix two bugs in it: - it was forwarding packets with L2 present to tunnel devices if used with egress qdisc - it wasn't updating skb->csum with ingress qdisc Also rename 'ok_push' to 'needs_l2' to make it more readable. Signed-off-by: Alexei Starovoitov <ast@...mgrid.com> --- V1->V2: . major refactoring: move push/pull into ingress qdisc . use dev->hard_header_len instead of hard coding ETH_HLEN . remove now obsolete hack in samples/bpf/ example cls_rsvp, act_csum, act_nat may appear to work on ingress, since they're using skb_*_offset(), but they do pskb_may_pull() assuming L2 is present. ematches are hard coding skb->data to mean L2 in few places. The fix may break out-of-tree ingress classifiers that rely on lack of L2 and which use skb->data directly. Alternative approach would be to do full push/pull with csum recompute in ingress_enqueue() which will simplify act_mirred. Not sure which is better. include/net/tc_act/tc_mirred.h | 2 +- net/sched/act_mirred.c | 30 ++++++++++++++++++++++-------- net/sched/sch_ingress.c | 8 ++++++++ samples/bpf/tcbpf1_kern.c | 6 ------ 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 4dd77a1c106b..f0cddd81bd6c 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -7,7 +7,7 @@ struct tcf_mirred { struct tcf_common common; int tcfm_eaction; int tcfm_ifindex; - int tcfm_ok_push; + int tcfm_needs_l2; struct net_device *tcfm_dev; struct list_head tcfm_list; }; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 5953517ec059..c015d1c43db7 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -52,7 +52,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct tc_mirred *parm; struct tcf_mirred *m; struct net_device *dev; - int ret, ok_push = 0; + int ret, needs_l2 = 0; if (nla == NULL) return -EINVAL; @@ -80,10 +80,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, case ARPHRD_IPGRE: case ARPHRD_VOID: case ARPHRD_NONE: - ok_push = 0; + needs_l2 = 0; break; default: - ok_push = 1; + needs_l2 = 1; break; } } else { @@ -114,7 +114,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, dev_put(m->tcfm_dev); dev_hold(dev); m->tcfm_dev = dev; - m->tcfm_ok_push = ok_push; + m->tcfm_needs_l2 = needs_l2; } spin_unlock_bh(&m->tcf_lock); if (ret == ACT_P_CREATED) { @@ -131,7 +131,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, struct tcf_mirred *m = a->priv; struct net_device *dev; struct sk_buff *skb2; - u32 at; + u32 at, hard_header_len; int retval, err = 1; spin_lock(&m->tcf_lock); @@ -155,9 +155,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, if (skb2 == NULL) goto out; - if (!(at & AT_EGRESS)) { - if (m->tcfm_ok_push) - skb_push(skb2, skb2->dev->hard_header_len); + hard_header_len = skb2->dev->hard_header_len; + + if (!m->tcfm_needs_l2) { + /* in packets arriving on egress qdiscs skb->csum (complete) + * includes L2, whereas ingress_enqueue() only pushes L2 without + * updating skb->csum. + * So pull L2 here to undo push done by ingress_enqueue() + * and do pull_rcsum for fully checksummed packets + */ + if (at & AT_INGRESS) + skb_pull(skb2, hard_header_len); + else + skb_pull_rcsum(skb2, hard_header_len); + } else { + /* ingress_enqueue() already pushed L2, recompute csum here */ + if (at & AT_INGRESS) + skb_postpush_rcsum(skb2, skb2->data, hard_header_len); } /* mirror is always swallowed */ diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index eb5b8445fef9..658b46082269 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -61,9 +61,17 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct ingress_qdisc_data *p = qdisc_priv(sch); struct tcf_result res; struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); + unsigned int hard_header_len = skb->dev->hard_header_len; int result; + if (unlikely(skb_headroom(skb) < hard_header_len)) + /* should never happen, since L2 was just pulled on ingress */ + return TC_ACT_OK; + + /* don't recompute skb->csum back and forth while pushing/pulling L2 */ + __skb_push(skb, hard_header_len); result = tc_classify(skb, fl, &res); + __skb_pull(skb, hard_header_len); qdisc_bstats_update(sch, skb); switch (result) { diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c index 7cf3f42a6e39..76cdaab63058 100644 --- a/samples/bpf/tcbpf1_kern.c +++ b/samples/bpf/tcbpf1_kern.c @@ -14,12 +14,6 @@ static inline void set_dst_mac(struct __sk_buff *skb, char *mac) bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1); } -/* use 1 below for ingress qdisc and 0 for egress */ -#if 0 -#undef ETH_HLEN -#define ETH_HLEN 0 -#endif - #define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check)) #define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos)) -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists