lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CALrw=nEPf2sp=xmPPsWBeQcnkmx3zY9vRvcekC=obnkgVE1yuQ@mail.gmail.com>
Date:   Fri, 4 Jun 2021 10:32:21 +0100
From:   Ignat Korchagin <ignat@...udflare.com>
To:     stephen@...workplumber.org
Cc:     netdev@...r.kernel.org, kernel-team <kernel-team@...udflare.com>
Subject: Re: Strange TCP behaviour when appending data to a packet from netfilter

On Fri, Jun 4, 2021 at 7:13 AM Stephen Hemminger
<stephen@...workplumber.org> wrote:
>
> On Thu, 3 Jun 2021 20:38:16 +0100
> Ignat Korchagin <ignat@...udflare.com> wrote:
>
> > Hi,
> >
> > I was experimenting with a netfilter module (originally nftables
> > module) which appends a fixed byte string to outgoing IP packets and
> > removes it from incoming IP packets. In its simplest form the full
> > module code is below:
> >
> > #include <linux/module.h>
> > #include <linux/netfilter.h>
> > #include <linux/netfilter_ipv4.h>
> > #include <net/ip.h>
> >
> > #define TRAILER_LEN 16
> > #define TRAILER_VAL 0xfe
> >
> > static u8 trailer_pattern[TRAILER_LEN];
> >
> > static void adust_net_hdr(struct sk_buff *skb, bool out)
> > {
> >     ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) + (out ?
> > TRAILER_LEN : -TRAILER_LEN));
> >     ip_send_check(ip_hdr(skb));
> > }
> >
> > static unsigned int nf_crypt_trailer(void *priv, struct sk_buff *skb,
> > const struct nf_hook_state *state)
> > {
> >     if (state->hook == NF_INET_LOCAL_OUT) {
> >         struct sk_buff *trailer;
> >         int num_frags = skb_cow_data(skb, TRAILER_LEN, &trailer);
> >         if (num_frags < 0) {
> >             pr_err("skb_cow_data failed for NF_INET_LOCAL_OUT");
> >             return NF_DROP;
> >         }
> >         memset(pskb_put(skb, trailer, TRAILER_LEN), TRAILER_VAL, TRAILER_LEN);
> >     }
> >
> >     if (state->hook == NF_INET_LOCAL_IN) {
> >         u8 buf[TRAILER_LEN];
> >         struct sk_buff *trailer;
> >         int num_frags = skb_cow_data(skb, 0, &trailer);
> >         if (num_frags < 0) {
> >             pr_err("skb_cow_data failed for NF_INET_LOCAL_IN");
> >             return NF_DROP;
> >         }
> >
> >         if (skb_copy_bits(skb, skb->len - TRAILER_LEN, buf, TRAILER_LEN))
> >         {
> >             pr_err("skb_copy_bits failed for NF_INET_LOCAL_IN");
> >             return NF_DROP;
> >         }
> >
> >         if (memcmp(buf, trailer_pattern, TRAILER_LEN)) {
> >             pr_err("trailer pattern not found in NF_INET_LOCAL_IN");
> >             return NF_DROP;
> >         }
> >
> >         if (pskb_trim(skb, skb->len - TRAILER_LEN)) {
> >             pr_err("pskb_trim failed\n");
> >             return NF_DROP;
> >         }
> >     }
> >     /* adjust IP checksum */
> >     adust_net_hdr(skb, state->hook == NF_INET_LOCAL_OUT);
> >
> >     return NF_ACCEPT;
> > }
> >
> > static const struct nf_hook_ops nf_crypt_ops[] = {
> >     {
> >         .hook        = nf_crypt_trailer,
> >         .pf            = NFPROTO_IPV4,
> >         .hooknum    = NF_INET_LOCAL_IN,
> >         .priority    = NF_IP_PRI_RAW,
> >     },
> >     {
> >         .hook       = nf_crypt_trailer,
> >         .pf         = NFPROTO_IPV4,
> >         .hooknum    = NF_INET_LOCAL_OUT,
> >         .priority   = NF_IP_PRI_RAW,
> >     },
> > };
> >
> > static int __net_init nf_crypt_net_init(struct net *net)
> > {
> >     /* do nothing in the init namespace */
> >     if (net == &init_net)
> >         return 0;
> >
> >     return nf_register_net_hooks(net, nf_crypt_ops, ARRAY_SIZE(nf_crypt_ops));
> > }
> >
> > static void __net_exit nf_crypt_net_exit(struct net *net)
> > {
> >     /* do nothing in the init namespace */
> >     if (net == &init_net)
> >         return;
> >
> >     nf_unregister_net_hooks(net, nf_crypt_ops, ARRAY_SIZE(nf_crypt_ops));
> > }
> >
> > static struct pernet_operations nf_crypt_net_ops = {
> >     .init = nf_crypt_net_init,
> >     .exit = nf_crypt_net_exit,
> > };
> >
> > static int __init nf_crypt_init(void)
> > {
> >     memset(trailer_pattern, TRAILER_VAL, TRAILER_LEN);
> >     return register_pernet_subsys(&nf_crypt_net_ops);
> > }
> >
> > static void __exit nf_crypt_fini(void)
> > {
> >     unregister_pernet_subsys(&nf_crypt_net_ops);
> > }
> >
> > module_init(nf_crypt_init);
> > module_exit(nf_crypt_fini);
> >
> > MODULE_LICENSE("GPL");
> >
> > Then I set up a test env using two Linux network namespaces:
> > #!/bin/bash -e
> >
> > sudo ip netns add alice
> > sudo ip netns add bob
> >
> > sudo ip -netns alice link add a0 type veth peer b0 netns bob
> >
> > sudo ip -netns alice address add 192.168.13.5/24 dev a0
> > sudo ip -netns bob address add 192.168.13.7/24 dev b0
> >
> > sudo ip -netns alice link set lo up
> > sudo ip -netns alice link set a0 up
> >
> > sudo ip -netns bob link set lo up
> > sudo ip -netns bob link set b0 up
> >
> > All works except when I try to serve a large file over HTTP (aroung 5Gb):
> > $ sudo ip netns exec bob python3 -m http.server
> > and in another terminal
> > $ sudo ip netns exec alice curl -o /dev/null http://192.168.13.7:8000/test.bin
> >
> > The download starts, but the download speed almost immediately drops
> > to 0 and "stalls".
> >
> > I've explicitly added the pr_err messages for the module to notify me,
> > if it drops packets for whatever reason, but it doesn't drop any
> > packets.
> >
> > Additionally, further debugging showed - if a TCP "ack" packet to
> > "bob" gets processed on a kernel thread (and not in softirq), "# cat
> > /proc/<pid>/stack" for the thread produces:
> >
> > [<0>] wait_woken+0x1f4/0x250
> > [<0>] sk_stream_wait_memory+0x3fb/0xde0
> > [<0>] tcp_sendmsg_locked+0x94b/0x2e60
> > [<0>] tcp_sendmsg+0x28/0x40
> > [<0>] sock_sendmsg+0xdb/0x110
> > [<0>] __sys_sendto+0x1a8/0x270
> > [<0>] __x64_sys_sendto+0xdd/0x1b0
> > [<0>] do_syscall_64+0x33/0x40
> > [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xae
> >
> > It seems the server-side sending buffer is full, so one would assume
> > TCP acks from the client are somehow not getting processed, but I
> > definitely see client TCP acks at least in the netfilter module. I've
> > also tried to disable GSO on the veth interfaces as well as lower the
> > MTU to no avail.
> >
> > Additionally, if I reduce TRAILER_LEN to 0 (leaving the other
> > skb_cow_data calls in place) - all start working.
> >
> > Are there any hints why the above code causes this strange behaviour
> > in TCP given that it seems I'm undoing everything on the incoming path
> > I did for the outgoing path, so should be totally transparent to TCP?
> >
> > Kind regards,
> > Ignat
>
> TCP segmentation offload doesn't know what you are doing

I've also tried to disable this as well - but the issue persisted.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ