lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5c565b2c-85a5-9141-112f-be854cccc558@gmail.com>
Date:   Fri, 6 Aug 2021 12:08:59 +0200
From:   Eric Dumazet <eric.dumazet@...il.com>
To:     Cong Wang <xiyou.wangcong@...il.com>, netdev@...r.kernel.org
Cc:     Qitao Xu <qitao.xu@...edance.com>,
        Cong Wang <cong.wang@...edance.com>
Subject: Re: [Patch net-next 02/13] ipv4: introduce tracepoint
 trace_ip_queue_xmit()



On 8/5/21 8:57 PM, Cong Wang wrote:
> From: Qitao Xu <qitao.xu@...edance.com>
> 
> Tracepoint trace_ip_queue_xmit() is introduced to trace skb
> at the entrance of IP layer on TX side.
> 
> Reviewed-by: Cong Wang <cong.wang@...edance.com>
> Signed-off-by: Qitao Xu <qitao.xu@...edance.com>
> ---
>  include/trace/events/ip.h | 42 +++++++++++++++++++++++++++++++++++++++
>  net/ipv4/ip_output.c      | 10 +++++++++-
>  2 files changed, 51 insertions(+), 1 deletion(-)
> 
> diff --git a/include/trace/events/ip.h b/include/trace/events/ip.h
> index 008f821ebc50..553ae7276732 100644
> --- a/include/trace/events/ip.h
> +++ b/include/trace/events/ip.h
> @@ -41,6 +41,48 @@
>  	TP_STORE_V4MAPPED(__entry, saddr, daddr)
>  #endif
>  
> +TRACE_EVENT(ip_queue_xmit,
> +
> +	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
> +
> +	TP_ARGS(sk, skb),
> +
> +	TP_STRUCT__entry(
> +		__field(const void *, skbaddr)
> +		__field(const void *, skaddr)
> +		__field(__u16, sport)
> +		__field(__u16, dport)
> +		__array(__u8, saddr, 4)
> +		__array(__u8, daddr, 4)
> +		__array(__u8, saddr_v6, 16)
> +		__array(__u8, daddr_v6, 16)
> +	),
> +
> +	TP_fast_assign(
> +		struct inet_sock *inet = inet_sk(sk);
> +		__be32 *p32;
> +
> +		__entry->skbaddr = skb;
> +		__entry->skaddr = sk;
> +
> +		__entry->sport = ntohs(inet->inet_sport);
> +		__entry->dport = ntohs(inet->inet_dport);
> +
> +		p32 = (__be32 *) __entry->saddr;
> +		*p32 = inet->inet_saddr;
> +
> +		p32 = (__be32 *) __entry->daddr;
> +		*p32 =  inet->inet_daddr;
> +
> +		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
> +			      sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
> +	),
> +
> +	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c skbaddr=%px",
> +		  __entry->sport, __entry->dport, __entry->saddr, __entry->daddr,
> +		  __entry->saddr_v6, __entry->daddr_v6, __entry->skbaddr)
> +);
> +
>  #endif /* _TRACE_IP_H */
>  
>  /* This part must be outside protection */
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index 6b04a88466b2..dcf94059112e 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -82,6 +82,7 @@
>  #include <linux/netfilter_bridge.h>
>  #include <linux/netlink.h>
>  #include <linux/tcp.h>
> +#include <trace/events/ip.h>
>  
>  static int
>  ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
> @@ -536,7 +537,14 @@ EXPORT_SYMBOL(__ip_queue_xmit);
>  
>  int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
>  {
> -	return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos);
> +	int ret;
> +
> +	ret = __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos);
> +	if (!ret)
> +		trace_ip_queue_xmit(sk, skb);
> +
> +	return ret;
> +
>  }
>  EXPORT_SYMBOL(ip_queue_xmit);
>  
> 

While it is useful to have stuff like this,
ddding so many trace points has a certain cost.

I fear that you have not determined this cost
on workloads where we enter these functions with cold caches.

For instance, before this patch, compiler gives us :

2e10 <ip_queue_xmit>:
    2e10:	e8 00 00 00 00       	callq  2e15 <ip_queue_xmit+0x5> (__fentry__-0x4)
    2e15:	0f b6 8f 1c 03 00 00 	movzbl 0x31c(%rdi),%ecx
    2e1c:	e9 ef fb ff ff       	jmpq   2a10 <__ip_queue_xmit>


After patch, we see the compiler had to save/restore registers, and no longer
jumps to __ip_queue_xmit. Code is bigger, even when tracepoint is not enabled.

    2e10:	e8 00 00 00 00       	callq  2e15 <ip_queue_xmit+0x5>
			2e11: R_X86_64_PLT32	__fentry__-0x4
    2e15:	41 55                	push   %r13
    2e17:	49 89 f5             	mov    %rsi,%r13
    2e1a:	41 54                	push   %r12
    2e1c:	55                   	push   %rbp
    2e1d:	0f b6 8f 1c 03 00 00 	movzbl 0x31c(%rdi),%ecx
    2e24:	48 89 fd             	mov    %rdi,%rbp
    2e27:	e8 00 00 00 00       	callq  2e2c <ip_queue_xmit+0x1c>
			2e28: R_X86_64_PLT32	__ip_queue_xmit-0x4
    2e2c:	41 89 c4             	mov    %eax,%r12d
    2e2f:	85 c0                	test   %eax,%eax
    2e31:	74 09                	je     2e3c <ip_queue_xmit+0x2c>
    2e33:	44 89 e0             	mov    %r12d,%eax
    2e36:	5d                   	pop    %rbp
    2e37:	41 5c                	pop    %r12
    2e39:	41 5d                	pop    %r13
    2e3b:	c3                   	retq   
    2e3c:	66 90                	xchg   %ax,%ax
    2e3e:	44 89 e0             	mov    %r12d,%eax
    2e41:	5d                   	pop    %rbp
    2e42:	41 5c                	pop    %r12
    2e44:	41 5d                	pop    %r13
    2e46:	c3                   	retq  
---- tracing code --- 
    2e47:	65 8b 05 00 00 00 00 	mov    %gs:0x0(%rip),%eax        # 2e4e <ip_queue_xmit+0x3e>
			2e4a: R_X86_64_PC32	cpu_number-0x4
    2e4e:	89 c0                	mov    %eax,%eax
    2e50:	48 0f a3 05 00 00 00 	bt     %rax,0x0(%rip)        # 2e58 <ip_queue_xmit+0x48>
    2e57:	00 
			2e54: R_X86_64_PC32	__cpu_online_mask-0x4
    2e58:	73 d9                	jae    2e33 <ip_queue_xmit+0x23>
    2e5a:	48 8b 05 00 00 00 00 	mov    0x0(%rip),%rax        # 2e61 <ip_queue_xmit+0x51>
			2e5d: R_X86_64_PC32	__tracepoint_ip_queue_xmit+0x3c
    2e61:	48 85 c0             	test   %rax,%rax
    2e64:	74 0f                	je     2e75 <ip_queue_xmit+0x65>
    2e66:	48 8b 78 08          	mov    0x8(%rax),%rdi
    2e6a:	4c 89 ea             	mov    %r13,%rdx
    2e6d:	48 89 ee             	mov    %rbp,%rsi
    2e70:	e8 00 00 00 00       	callq  2e75 <ip_queue_xmit+0x65>
			2e71: R_X86_64_PLT32	__SCT__tp_func_ip_queue_xmit-0x4
    2e75:	44 89 e0             	mov    %r12d,%eax
    2e78:	5d                   	pop    %rbp
    2e79:	41 5c                	pop    %r12
    2e7b:	41 5d                	pop    %r13
    2e7d:	c3                   	retq   

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ