lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20141217153349.GG11607@kernel.org>
Date:	Wed, 17 Dec 2014 12:33:49 -0300
From:	Arnaldo Carvalho de Melo <acme@...nel.org>
To:	Martin KaFai Lau <kafai@...com>
Cc:	netdev@...r.kernel.org, "David S. Miller" <davem@...emloft.net>,
	Hannes Frederic Sowa <hannes@...essinduktion.org>,
	Steven Rostedt <rostedt@...dmis.org>,
	Lawrence Brakmo <brakmo@...com>, Josef Bacik <jbacik@...com>,
	Kernel Team <Kernel-team@...com>
Subject: Re: [RFC PATCH net-next 3/5] tcp: Add a few more tracepoints for tcp
 tracer

Em Sun, Dec 14, 2014 at 05:56:44PM -0800, Martin KaFai Lau escreveu:
> The tcp tracer, which will be added in the later patch, depends
> on them to collect statistics.

> --- a/include/trace/events/tcp.h
<SNIP>
> +DECLARE_TRACE(tcp_sacks_rcv,
> +	     TP_PROTO(struct sock *sk, int num_sacks),
> +	     TP_ARGS(sk, num_sacks)
> +);

<SNIP>

> +++ b/net/ipv4/tcp_input.c
> @@ -1650,6 +1650,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
>  	int i, j;
>  	int first_sack_index;
>  
> +	trace_tcp_sacks_rcv(sk, num_sacks);
> +

In another message someone pointed out that we want some tracepoints, but
others would imply ABI, a drag on upstream to keep tons of set in stone
tracepoints, so what I was saying was like below, where one of the above
proposed tracepoints is implemented as a "wannabe tracepoint", i.e. a
dynamic probe, that will be as optimized as the kprobes_tracer can make it,
sometimes even using, IIRC, the ftrace mechanizms, if put on some suitable
place (function entry, etc, IIRC, Steven?).

On a random RHEL7 kernel I had laying around on a test machine:

[root@...andy ~]# perf probe -L tcp_sacktag_write_queue | head -20
<tcp_sacktag_write_queue@...r/src/debug/kernel-3.10.0-123.el7/linux-3.10.0-123.el7.x86_64/net/ipv4/tcp_input.c:0>
      0  tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
         			u32 prior_snd_una)
      2  {
         	struct tcp_sock *tp = tcp_sk(sk);
      4  	const unsigned char *ptr = (skb_transport_header(ack_skb) +
         				    TCP_SKB_CB(ack_skb)->sacked);
         	struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
         	struct tcp_sack_block sp[TCP_NUM_SACKS];
         	struct tcp_sack_block *cache;
         	struct tcp_sacktag_state state;
         	struct sk_buff *skb;
     11  	int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
         	int used_sacks;
         	bool found_dup_sack = false;
         	int i, j;
         	int first_sack_index;
         
     17  	state.flag = 0;
     18  	state.reord = tp->packets_out;
[root@...andy ~]#

Available variables at tcp_sacktag_write_queue:17
        @<tcp_sacktag_write_queue+77>
                int     num_sacks
                struct sk_buff* ack_skb
                struct sock*    sk
                struct tcp_sack_block_wire*     sp_wire
                u32     prior_snd_una
                unsigned char*  ptr
[root@...andy ~]#

Ok, so we can insert a probe at that point and also we can collect the values of the
sk and num_sacks variables, so:

[root@...andy ~]# perf probe 'tcp_sacks_rcv=tcp_sacktag_write_queue:17 sk num_sacks'
Added new event:
  probe:tcp_sacks_rcv  (on tcp_sacktag_write_queue:17 with sk num_sacks)

You can now use it in all perf tools, such as:

	perf record -e probe:tcp_sacks_rcv -aR sleep 1

[root@...andy ~]

There you go, you have your wannabe tracepoint, dynamic:

[root@...andy ~]# perf record -a -g -e probe:tcp_sacks_rcv
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.362 MB perf.data (~15799 samples) ]

[root@...andy ~]# perf script
swapper 0 [000] 184175.932790: probe:tcp_sacks_rcv: (ffffffff8151e59d) sk=ffff880425761e00 num_sacks=0
              71e59e tcp_sacktag_write_queue (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              72216e tcp_ack (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              723cf8 tcp_rcv_state_process (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              72d158 tcp_v4_do_rcv (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              72f5c7 tcp_v4_rcv (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              709584 ip_local_deliver_finish (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              709858 ip_local_deliver (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              7091fd ip_rcv_finish (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              709ac4 ip_rcv (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              6cfdb6 __netif_receive_skb_core (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              6cffc8 __netif_receive_skb (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              6d0050 netif_receive_skb (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              6d0aa8 napi_gro_receive (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
               1b5bf e1000_receive_skb (/lib/modules/3.10.0-123.el7.x86_64/kernel/drivers/net/ethernet/intel/e1000e/e1000e.ko)
               1cbda e1000_clean_rx_irq (/lib/modules/3.10.0-123.el7.x86_64/kernel/drivers/net/ethernet/intel/e1000e/e1000e.ko)
               247dc e1000e_poll (/lib/modules/3.10.0-123.el7.x86_64/kernel/drivers/net/ethernet/intel/e1000e/e1000e.ko)
              6d041a net_rx_action (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              267047 __do_softirq (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              7f3a5c call_softirq (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              214d25 do_softirq (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              2673e5 irq_exit (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              7f4358 do_IRQ (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              7e94ad ret_from_intr (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              7c3927 rest_init (/usr/lib/debug/lib/modules/3.10.0-123.el7.x86_64/vmlinux)
              e06fa7 start_kernel ([kernel.vmlinux].init.text)
              e065ee x86_64_start_reservations ([kernel.vmlinux].init.text)
              e06742 x86_64_start_kernel ([kernel.vmlinux].init.text)
<SNIP>

[root@...andy ~]# perf script -g python
generated Python script: perf-script.py
[root@...andy ~]# vim perf-script.py # Edit it to remove callchain printing, simplify some stuff
[root@...andy ~]# mv perf-script.py tcp_sack_rcv.py
[root@...andy ~]# cat tcp_sack_rcv.py 
import os, sys

sys.path.append(os.environ['PERF_EXEC_PATH'] + \
	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')

from perf_trace_context import *
from Core import *

def probe__tcp_sacks_rcv(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, __probe_ip, sk, num_sacks):
		print_header(event_name, common_cpu, common_secs, common_nsecs,
			common_pid, common_comm)

		print "__probe_ip=%#x, sk=%#x, num_sacks=%d" % \
		(__probe_ip, sk, num_sacks)

def print_header(event_name, cpu, secs, nsecs, pid, comm):
	print "%-18s %3u %05u.%09u %1u %-8s " % \
	(event_name, cpu, secs, nsecs, pid, comm),
[root@...andy ~]#

[root@...andy ~]# perf script -s tcp_sack_rcv.py  | head -10
Failed to open 64/libfreebl3.so, continuing without symbols
probe__tcp_sacks_rcv   0 184175.932790461 0 swapper   __probe_ip=0xffffffff8151e59d, sk=0xffff880425761e00, num_sacks=0
probe__tcp_sacks_rcv   0 184177.487455369 0 swapper   __probe_ip=0xffffffff8151e59d, sk=0xffff8804047e0780, num_sacks=0
probe__tcp_sacks_rcv   0 184177.588593040 0 swapper   __probe_ip=0xffffffff8151e59d, sk=0xffff8804256af800, num_sacks=0
probe__tcp_sacks_rcv   0 184178.741298627 0 swapper   __probe_ip=0xffffffff8151e59d, sk=0xffff8804256acb00, num_sacks=0
probe__tcp_sacks_rcv   0 184179.902089365 0 swapper   __probe_ip=0xffffffff8151e59d, sk=0xffff8804256ad280, num_sacks=0
probe__tcp_sacks_rcv   0 184180.802761942 0 swapper   __probe_ip=0xffffffff8151e59d, sk=0xffff8804256acb00, num_sacks=0
probe__tcp_sacks_rcv   0 184180.961373503 0 swapper   __probe_ip=0xffffffff8151e59d, sk=0xffff8804256af800, num_sacks=0
probe__tcp_sacks_rcv   0 184182.123660739 0 swapper   __probe_ip=0xffffffff8151e59d, sk=0xffff8804256ad280, num_sacks=0
probe__tcp_sacks_rcv   0 184182.387640636 0 swapper   __probe_ip=0xffffffff8151e59d, sk=0xffff8804256acb00, num_sacks=0
probe__tcp_sacks_rcv   0 184182.859420892 0 swapper   __probe_ip=0xffffffff8151e59d, sk=0xffff8804256af800, num_sacks=0
[root@...andy ~]#


>  	state.flag = 0;
>  	state.reord = tp->packets_out;
>  	state.rtt_us = -1L;
> @@ -2932,6 +2934,9 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
>  
>  	/* RFC6298: only reset backoff on valid RTT measurement. */
>  	inet_csk(sk)->icsk_backoff = 0;
> +
> +	trace_tcp_rtt_sample(sk, seq_rtt_us);
> +
>  	return true;
>  }
>  
> @@ -4232,6 +4237,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
>  	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
>  	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
>  		   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
> +	trace_tcp_ooo_rcv(sk);
>  
>  	skb1 = skb_peek_tail(&tp->out_of_order_queue);
>  	if (!skb1) {
> -- 
> 1.8.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ