lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210603124601.1c260b56@hermes.local>
Date:   Thu, 3 Jun 2021 12:46:01 -0700
From:   Stephen Hemminger <stephen@...workplumber.org>
To:     Ignat Korchagin <ignat@...udflare.com>
Cc:     netdev@...r.kernel.org, kernel-team <kernel-team@...udflare.com>
Subject: Re: Strange TCP behaviour when appending data to a packet from
 netfilter

On Thu, 3 Jun 2021 20:38:16 +0100
Ignat Korchagin <ignat@...udflare.com> wrote:

> Hi,
> 
> I was experimenting with a netfilter module (originally nftables
> module) which appends a fixed byte string to outgoing IP packets and
> removes it from incoming IP packets. In its simplest form the full
> module code is below:
> 
> #include <linux/module.h>
> #include <linux/netfilter.h>
> #include <linux/netfilter_ipv4.h>
> #include <net/ip.h>
> 
> #define TRAILER_LEN 16
> #define TRAILER_VAL 0xfe
> 
> static u8 trailer_pattern[TRAILER_LEN];
> 
> static void adust_net_hdr(struct sk_buff *skb, bool out)
> {
>     ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) + (out ?
> TRAILER_LEN : -TRAILER_LEN));
>     ip_send_check(ip_hdr(skb));
> }
> 
> static unsigned int nf_crypt_trailer(void *priv, struct sk_buff *skb,
> const struct nf_hook_state *state)
> {
>     if (state->hook == NF_INET_LOCAL_OUT) {
>         struct sk_buff *trailer;
>         int num_frags = skb_cow_data(skb, TRAILER_LEN, &trailer);
>         if (num_frags < 0) {
>             pr_err("skb_cow_data failed for NF_INET_LOCAL_OUT");
>             return NF_DROP;
>         }
>         memset(pskb_put(skb, trailer, TRAILER_LEN), TRAILER_VAL, TRAILER_LEN);
>     }
> 
>     if (state->hook == NF_INET_LOCAL_IN) {
>         u8 buf[TRAILER_LEN];
>         struct sk_buff *trailer;
>         int num_frags = skb_cow_data(skb, 0, &trailer);
>         if (num_frags < 0) {
>             pr_err("skb_cow_data failed for NF_INET_LOCAL_IN");
>             return NF_DROP;
>         }
> 
>         if (skb_copy_bits(skb, skb->len - TRAILER_LEN, buf, TRAILER_LEN))
>         {
>             pr_err("skb_copy_bits failed for NF_INET_LOCAL_IN");
>             return NF_DROP;
>         }
> 
>         if (memcmp(buf, trailer_pattern, TRAILER_LEN)) {
>             pr_err("trailer pattern not found in NF_INET_LOCAL_IN");
>             return NF_DROP;
>         }
> 
>         if (pskb_trim(skb, skb->len - TRAILER_LEN)) {
>             pr_err("pskb_trim failed\n");
>             return NF_DROP;
>         }
>     }
>     /* adjust IP checksum */
>     adust_net_hdr(skb, state->hook == NF_INET_LOCAL_OUT);
> 
>     return NF_ACCEPT;
> }
> 
> static const struct nf_hook_ops nf_crypt_ops[] = {
>     {
>         .hook        = nf_crypt_trailer,
>         .pf            = NFPROTO_IPV4,
>         .hooknum    = NF_INET_LOCAL_IN,
>         .priority    = NF_IP_PRI_RAW,
>     },
>     {
>         .hook       = nf_crypt_trailer,
>         .pf         = NFPROTO_IPV4,
>         .hooknum    = NF_INET_LOCAL_OUT,
>         .priority   = NF_IP_PRI_RAW,
>     },
> };
> 
> static int __net_init nf_crypt_net_init(struct net *net)
> {
>     /* do nothing in the init namespace */
>     if (net == &init_net)
>         return 0;
> 
>     return nf_register_net_hooks(net, nf_crypt_ops, ARRAY_SIZE(nf_crypt_ops));
> }
> 
> static void __net_exit nf_crypt_net_exit(struct net *net)
> {
>     /* do nothing in the init namespace */
>     if (net == &init_net)
>         return;
> 
>     nf_unregister_net_hooks(net, nf_crypt_ops, ARRAY_SIZE(nf_crypt_ops));
> }
> 
> static struct pernet_operations nf_crypt_net_ops = {
>     .init = nf_crypt_net_init,
>     .exit = nf_crypt_net_exit,
> };
> 
> static int __init nf_crypt_init(void)
> {
>     memset(trailer_pattern, TRAILER_VAL, TRAILER_LEN);
>     return register_pernet_subsys(&nf_crypt_net_ops);
> }
> 
> static void __exit nf_crypt_fini(void)
> {
>     unregister_pernet_subsys(&nf_crypt_net_ops);
> }
> 
> module_init(nf_crypt_init);
> module_exit(nf_crypt_fini);
> 
> MODULE_LICENSE("GPL");
> 
> Then I set up a test env using two Linux network namespaces:
> #!/bin/bash -e
> 
> sudo ip netns add alice
> sudo ip netns add bob
> 
> sudo ip -netns alice link add a0 type veth peer b0 netns bob
> 
> sudo ip -netns alice address add 192.168.13.5/24 dev a0
> sudo ip -netns bob address add 192.168.13.7/24 dev b0
> 
> sudo ip -netns alice link set lo up
> sudo ip -netns alice link set a0 up
> 
> sudo ip -netns bob link set lo up
> sudo ip -netns bob link set b0 up
> 
> All works except when I try to serve a large file over HTTP (aroung 5Gb):
> $ sudo ip netns exec bob python3 -m http.server
> and in another terminal
> $ sudo ip netns exec alice curl -o /dev/null http://192.168.13.7:8000/test.bin
> 
> The download starts, but the download speed almost immediately drops
> to 0 and "stalls".
> 
> I've explicitly added the pr_err messages for the module to notify me,
> if it drops packets for whatever reason, but it doesn't drop any
> packets.
> 
> Additionally, further debugging showed - if a TCP "ack" packet to
> "bob" gets processed on a kernel thread (and not in softirq), "# cat
> /proc/<pid>/stack" for the thread produces:
> 
> [<0>] wait_woken+0x1f4/0x250
> [<0>] sk_stream_wait_memory+0x3fb/0xde0
> [<0>] tcp_sendmsg_locked+0x94b/0x2e60
> [<0>] tcp_sendmsg+0x28/0x40
> [<0>] sock_sendmsg+0xdb/0x110
> [<0>] __sys_sendto+0x1a8/0x270
> [<0>] __x64_sys_sendto+0xdd/0x1b0
> [<0>] do_syscall_64+0x33/0x40
> [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xae
> 
> It seems the server-side sending buffer is full, so one would assume
> TCP acks from the client are somehow not getting processed, but I
> definitely see client TCP acks at least in the netfilter module. I've
> also tried to disable GSO on the veth interfaces as well as lower the
> MTU to no avail.
> 
> Additionally, if I reduce TRAILER_LEN to 0 (leaving the other
> skb_cow_data calls in place) - all start working.
> 
> Are there any hints why the above code causes this strange behaviour
> in TCP given that it seems I'm undoing everything on the incoming path
> I did for the outgoing path, so should be totally transparent to TCP?
> 
> Kind regards,
> Ignat

TCP segmentation offload doesn't know what you are doing

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ