lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20071206130309.GU4653@ghostprotocols.net>
Date:	Thu, 6 Dec 2007 11:03:09 -0200
From:	Arnaldo Carvalho de Melo <acme@...hat.com>
To:	Gerrit Renker <gerrit@....abdn.ac.uk>
Cc:	dccp@...r.kernel.org, netdev@...r.kernel.org
Subject: [RFC2][PATCH 7/7] [TFRC]: New rx history code

Gerrit,

	I think I got this right this time, please see if there is
anything left so that we can move on. I plan to go thru the following
patches restricting myself to namespacing and consistency issues,
leaving ideas I have for later, when we get more of your backlog merged.

	The first six patches in this series are unmodified, so if you
are OK with them please send me your Signed-off-by.

Thanks a lot,

- Arnaldo

>From 2a3b4067dd514ce0e307d165783bc561cc7f17c4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@...hat.com>
Date: Thu, 6 Dec 2007 10:56:58 -0200
Subject: [PATCH 7/7] [TFRC]: New rx history code

Credit here goes to Gerrit Renker, that provided the initial implementation for
this new codebase.

I modified it just to try to make it closer to the existing API, renaming some
functions, add namespacing and fix one bug where the tfrc_rx_hist_alloc was not
freeing the allocated ring entries on the error path.

Original changeset comment from Gerrit:
      -----------
This provides a new, self-contained and generic RX history service for TFRC
based protocols.

Details:
 * new data structure, initialisation and cleanup routines;
 * allocation of dccp_rx_hist entries local to packet_history.c,
   as a service exported by the dccp_tfrc_lib module.
 * interface to automatically track highest-received seqno;
 * receiver-based RTT estimation (needed for instance by RFC 3448, 6.3.1);
 * a generic function to test for `data packets' as per  RFC 4340, sec. 7.7.

Signed-off-by: Arnaldo Carvalho de Melo <acme@...hat.com>
---
 net/dccp/ccids/ccid3.c              |  292 +++++++++++++----------------------
 net/dccp/ccids/ccid3.h              |   14 +-
 net/dccp/ccids/lib/loss_interval.c  |   13 ++-
 net/dccp/ccids/lib/packet_history.c |  290 +++++++++++++++++++++--------------
 net/dccp/ccids/lib/packet_history.h |   83 +++++------
 5 files changed, 334 insertions(+), 358 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 5ff5aab..28a5e4d 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -641,6 +641,15 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
 /*
  *	Receiver Half-Connection Routines
  */
+
+/* CCID3 feedback types */
+enum ccid3_fback_type {
+	CCID3_FBACK_NONE = 0,
+	CCID3_FBACK_INITIAL,
+	CCID3_FBACK_PERIODIC,
+	CCID3_FBACK_PARAM_CHANGE
+};
+
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
 static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
 {
@@ -667,59 +676,60 @@ static void ccid3_hc_rx_set_state(struct sock *sk,
 	hcrx->ccid3hcrx_state = state;
 }
 
-static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len)
-{
-	if (likely(len > 0))	/* don't update on empty packets (e.g. ACKs) */
-		hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, len, 9);
-}
-
-static void ccid3_hc_rx_send_feedback(struct sock *sk)
+static void ccid3_hc_rx_send_feedback(struct sock *sk,
+				      const struct sk_buff *skb,
+				      enum ccid3_fback_type fbtype)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct tfrc_rx_hist_entry *packet;
 	ktime_t now;
-	suseconds_t delta;
+	s64 delta = 0;
 
 	ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk);
 
+	if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM))
+		return;
+
 	now = ktime_get_real();
 
-	switch (hcrx->ccid3hcrx_state) {
-	case TFRC_RSTATE_NO_DATA:
+	switch (fbtype) {
+	case CCID3_FBACK_INITIAL:
 		hcrx->ccid3hcrx_x_recv = 0;
+		hcrx->ccid3hcrx_pinv   = ~0U;   /* see RFC 4342, 8.5 */
 		break;
-	case TFRC_RSTATE_DATA:
-		delta = ktime_us_delta(now,
-				       hcrx->ccid3hcrx_tstamp_last_feedback);
-		DCCP_BUG_ON(delta < 0);
-		hcrx->ccid3hcrx_x_recv =
-			scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
+	case CCID3_FBACK_PARAM_CHANGE:
+		/*
+		 * When parameters change (new loss or p > p_prev), we do not
+		 * have a reliable estimate for R_m of [RFC 3448, 6.2] and so
+		 * need to  reuse the previous value of X_recv. However, when
+		 * X_recv was 0 (due to early loss), this would kill X down to
+		 * s/t_mbi (i.e. one packet in 64 seconds).
+		 * To avoid such drastic reduction, we approximate X_recv as
+		 * the number of bytes since last feedback.
+		 * This is a safe fallback, since X is bounded above by X_calc.
+		 */
+		if (hcrx->ccid3hcrx_x_recv > 0)
+			break;
+		/* fall through */
+	case CCID3_FBACK_PERIODIC:
+		delta = ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_feedback);
+		if (delta <= 0)
+			DCCP_BUG("delta (%ld) <= 0", (long)delta);
+		else
+			hcrx->ccid3hcrx_x_recv =
+				scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
 		break;
-	case TFRC_RSTATE_TERM:
-		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
+	default:
 		return;
 	}
 
-	packet = tfrc_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
-	if (unlikely(packet == NULL)) {
-		DCCP_WARN("%s(%p), no data packet in history!\n",
-			  dccp_role(sk), sk);
-		return;
-	}
+	ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta,
+		       hcrx->ccid3hcrx_x_recv, hcrx->ccid3hcrx_pinv);
 
 	hcrx->ccid3hcrx_tstamp_last_feedback = now;
-	hcrx->ccid3hcrx_ccval_last_counter   = packet->tfrchrx_ccval;
+	hcrx->ccid3hcrx_last_counter	     = dccp_hdr(skb)->dccph_ccval;
 	hcrx->ccid3hcrx_bytes_recv	     = 0;
 
-	if (hcrx->ccid3hcrx_p == 0)
-		hcrx->ccid3hcrx_pinv = ~0U;	/* see RFC 4342, 8.5 */
-	else if (hcrx->ccid3hcrx_p > 1000000) {
-		DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p);
-		hcrx->ccid3hcrx_pinv = 1;	/* use 100% in this case */
-	} else
-		hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
-
 	dp->dccps_hc_rx_insert_options = 1;
 	dccp_send_ack(sk);
 }
@@ -750,165 +760,78 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
-static int ccid3_hc_rx_detect_loss(struct sock *sk,
-				    struct tfrc_rx_hist_entry *packet)
-{
-	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	struct tfrc_rx_hist_entry *rx_hist =
-				tfrc_rx_hist_head(&hcrx->ccid3hcrx_hist);
-	u64 seqno = packet->tfrchrx_seqno;
-	u64 tmp_seqno;
-	int loss = 0;
-	u8 ccval;
-
-
-	tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
-
-	if (!rx_hist ||
-	   follows48(packet->tfrchrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
-		hcrx->ccid3hcrx_seqno_nonloss = seqno;
-		hcrx->ccid3hcrx_ccval_nonloss = packet->tfrchrx_ccval;
-		goto detect_out;
-	}
-
-
-	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
-	   > TFRC_RECV_NUM_LATE_LOSS) {
-		loss = 1;
-		dccp_li_update_li(sk,
-				  &hcrx->ccid3hcrx_li_hist,
-				  &hcrx->ccid3hcrx_hist,
-				  hcrx->ccid3hcrx_tstamp_last_feedback,
-				  hcrx->ccid3hcrx_s,
-				  hcrx->ccid3hcrx_bytes_recv,
-				  hcrx->ccid3hcrx_x_recv,
-				  hcrx->ccid3hcrx_seqno_nonloss,
-				  hcrx->ccid3hcrx_ccval_nonloss);
-		tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
-		dccp_inc_seqno(&tmp_seqno);
-		hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
-		dccp_inc_seqno(&tmp_seqno);
-		while (tfrc_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
-		   tmp_seqno, &ccval)) {
-			hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
-			hcrx->ccid3hcrx_ccval_nonloss = ccval;
-			dccp_inc_seqno(&tmp_seqno);
-		}
-	}
-
-	/* FIXME - this code could be simplified with above while */
-	/* but works at moment */
-	if (follows48(packet->tfrchrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
-		hcrx->ccid3hcrx_seqno_nonloss = seqno;
-		hcrx->ccid3hcrx_ccval_nonloss = packet->tfrchrx_ccval;
-	}
-
-detect_out:
-	tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist,
-				&hcrx->ccid3hcrx_li_hist, packet,
-				hcrx->ccid3hcrx_seqno_nonloss);
-	return loss;
-}
-
 static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	const struct dccp_options_received *opt_recv;
-	struct tfrc_rx_hist_entry *packet;
-	u32 p_prev, r_sample, rtt_prev;
-	int loss, payload_size;
-	ktime_t now;
-
-	opt_recv = &dccp_sk(sk)->dccps_options_received;
-
-	switch (DCCP_SKB_CB(skb)->dccpd_type) {
-	case DCCP_PKT_ACK:
-		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
-			return;
-	case DCCP_PKT_DATAACK:
-		if (opt_recv->dccpor_timestamp_echo == 0)
-			break;
-		r_sample = dccp_timestamp() - opt_recv->dccpor_timestamp_echo;
-		rtt_prev = hcrx->ccid3hcrx_rtt;
-		r_sample = dccp_sample_rtt(sk, 10 * r_sample);
-
-		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
-			hcrx->ccid3hcrx_rtt = r_sample;
-		else
-			hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
-					      r_sample / 10;
-
-		if (rtt_prev != hcrx->ccid3hcrx_rtt)
-			ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n",
-				       dccp_role(sk), sk, hcrx->ccid3hcrx_rtt,
-				       opt_recv->dccpor_elapsed_time);
-		break;
-	case DCCP_PKT_DATA:
-		break;
-	default: /* We're not interested in other packet types, move along */
-		return;
-	}
-
-	packet = tfrc_rx_hist_entry_new(opt_recv->dccpor_ndp, skb, GFP_ATOMIC);
-	if (unlikely(packet == NULL)) {
-		DCCP_WARN("%s(%p), Not enough mem to add rx packet "
-			  "to history, consider it lost!\n", dccp_role(sk), sk);
-		return;
-	}
-
-	loss = ccid3_hc_rx_detect_loss(sk, packet);
+	enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE;
+	const u32 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
+	const bool is_data_packet = dccp_data_packet(skb);
+
+	if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
+		if (is_data_packet) {
+			const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
+			do_feedback = CCID3_FBACK_INITIAL;
+			ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
+			hcrx->ccid3hcrx_s = payload;
+			/*
+			 * Updating ccid3hcrx_bytes_recv is in fact not needed
+			 * here and if it would be done it would not have a
+			 * useable effect. The reason is that the first data
+			 * packet will trigger the initial feedback; and in the
+			 * initial feedback packet X_recv (which is
+			 * ccid3hcrx_bytes_recv / the_time_spent) is set to 0
+			 * (RFC 3448, 6.3) - Gerrit
+			 */
+		}
+		goto update_records;
+ 	}
 
-	if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
-		return;
+	if (tfrc_rx_hist_duplicate(&hcrx->ccid3hcrx_hist, skb))
+		return; /* done receiving */
 
-	payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4;
-	ccid3_hc_rx_update_s(hcrx, payload_size);
+	if (is_data_packet) {
+		const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
+		/*
+		 * Update moving-average of s and the sum of received payload bytes
+		 */
+		hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, payload, 9);
+		hcrx->ccid3hcrx_bytes_recv += payload;
+ 	}
 
-	switch (hcrx->ccid3hcrx_state) {
-	case TFRC_RSTATE_NO_DATA:
-		ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial "
-			       "feedback\n", dccp_role(sk), sk,
-			       dccp_state_name(sk->sk_state), skb);
-		ccid3_hc_rx_send_feedback(sk);
-		ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
-		return;
-	case TFRC_RSTATE_DATA:
-		hcrx->ccid3hcrx_bytes_recv += payload_size;
-		if (loss)
-			break;
+	/*
+	 * Handle pending losses and otherwise check for new loss
+	 */
+	if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp))
+		goto update_records;
 
-		now = ktime_get_real();
-		if ((ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_ack) -
-		     (s64)hcrx->ccid3hcrx_rtt) >= 0) {
-			hcrx->ccid3hcrx_tstamp_last_ack = now;
-			ccid3_hc_rx_send_feedback(sk);
-		}
-		return;
-	case TFRC_RSTATE_TERM:
-		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
-		return;
+	/*
+	 * Handle data packets: RTT sampling and monitoring p
+	 */
+	if (unlikely(!is_data_packet))
+		goto update_records;
+
+	if (list_empty(&hcrx->ccid3hcrx_li_hist)) {  /* no loss so far: p = 0 */
+		const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb);
+ 		/*
+ 		 * Empty loss history: no loss so far, hence p stays 0.
+ 		 * Sample RTT values, since an RTT estimate is required for the
+ 		 * computation of p when the first loss occurs; RFC 3448, 6.3.1.
+ 		 */
+		if (sample != 0)
+			hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9);
 	}
 
-	/* Dealing with packet loss */
-	ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n",
-		       dccp_role(sk), sk, dccp_state_name(sk->sk_state));
-
-	p_prev = hcrx->ccid3hcrx_p;
-
-	/* Calculate loss event rate */
-	if (!list_empty(&hcrx->ccid3hcrx_li_hist)) {
-		u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);
+	/*
+	 * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
+	 */
+	if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->ccid3hcrx_last_counter) > 3)
+		do_feedback = CCID3_FBACK_PERIODIC;
 
-		/* Scaling up by 1000000 as fixed decimal */
-		if (i_mean != 0)
-			hcrx->ccid3hcrx_p = 1000000 / i_mean;
-	} else
-		DCCP_BUG("empty loss history");
+update_records:	
+	tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp);
 
-	if (hcrx->ccid3hcrx_p > p_prev) {
-		ccid3_hc_rx_send_feedback(sk);
-		return;
-	}
+	if (do_feedback)
+		ccid3_hc_rx_send_feedback(sk, skb, do_feedback);
 }
 
 static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
@@ -918,11 +841,8 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
 	ccid3_pr_debug("entry\n");
 
 	hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
-	INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
 	INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
-	hcrx->ccid3hcrx_tstamp_last_feedback =
-		hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real();
-	return 0;
+	return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist);
 }
 
 static void ccid3_hc_rx_exit(struct sock *sk)
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index b842a7d..3c33dc6 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -1,7 +1,8 @@
 /*
  *  net/dccp/ccids/ccid3.h
  *
- *  Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
  *
  *  An implementation of the DCCP protocol
  *
@@ -135,9 +136,7 @@ enum ccid3_hc_rx_states {
  *  @ccid3hcrx_x_recv  -  Receiver estimate of send rate (RFC 3448 4.3)
  *  @ccid3hcrx_rtt  -  Receiver estimate of rtt (non-standard)
  *  @ccid3hcrx_p  -  current loss event rate (RFC 3448 5.4)
- *  @ccid3hcrx_seqno_nonloss  -  Last received non-loss sequence number
- *  @ccid3hcrx_ccval_nonloss  -  Last received non-loss Window CCVal
- *  @ccid3hcrx_ccval_last_counter  -  Tracks window counter (RFC 4342, 8.1)
+ *  @ccid3hcrx_last_counter  -  Tracks window counter (RFC 4342, 8.1)
  *  @ccid3hcrx_state  -  receiver state, one of %ccid3_hc_rx_states
  *  @ccid3hcrx_bytes_recv  -  Total sum of DCCP payload bytes
  *  @ccid3hcrx_tstamp_last_feedback  -  Time at which last feedback was sent
@@ -152,14 +151,11 @@ struct ccid3_hc_rx_sock {
 #define ccid3hcrx_x_recv		ccid3hcrx_tfrc.tfrcrx_x_recv
 #define ccid3hcrx_rtt			ccid3hcrx_tfrc.tfrcrx_rtt
 #define ccid3hcrx_p			ccid3hcrx_tfrc.tfrcrx_p
-	u64				ccid3hcrx_seqno_nonloss:48,
-					ccid3hcrx_ccval_nonloss:4,
-					ccid3hcrx_ccval_last_counter:4;
+	u8				ccid3hcrx_last_counter:4;
 	enum ccid3_hc_rx_states		ccid3hcrx_state:8;
 	u32				ccid3hcrx_bytes_recv;
 	ktime_t				ccid3hcrx_tstamp_last_feedback;
-	ktime_t				ccid3hcrx_tstamp_last_ack;
-	struct list_head		ccid3hcrx_hist;
+	struct tfrc_rx_hist		ccid3hcrx_hist;
 	struct list_head		ccid3hcrx_li_hist;
 	u16				ccid3hcrx_s;
 	u32				ccid3hcrx_pinv;
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index a5f59af..7e0714a 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -129,6 +129,13 @@ static u32 dccp_li_calc_first_li(struct sock *sk,
 				 u16 s, u32 bytes_recv,
 				 u32 previous_x_recv)
 {
+/*
+ * FIXME:
+ * Will be rewritten in the upcoming new loss intervals code. 
+ * Has to be commented ou because it relies on the old rx history
+ * data structures
+ */
+#if 0
 	struct tfrc_rx_hist_entry *entry, *next, *tail = NULL;
 	u32 x_recv, p;
 	suseconds_t rtt, delta;
@@ -216,10 +223,10 @@ found:
 	dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
 		      "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
 
-	if (p == 0)
-		return ~0;
-	else
+	if (p != 0)
 		return 1000000 / p;
+#endif
+	return ~0;
 }
 
 void dccp_li_update_li(struct sock *sk,
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 255cca1..e197389 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -36,7 +36,9 @@
  */
 
 #include <linux/string.h>
+#include <linux/slab.h>
 #include "packet_history.h"
+#include "../../dccp.h"
 
 /**
  *  tfrc_tx_hist_entry  -  Simple singly-linked TX history list
@@ -111,154 +113,214 @@ u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
 }
 EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
 
+
+/**
+ * tfrc_rx_hist_index - index to reach n-th entry after loss_start
+ */
+static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n)
+{
+	return (h->loss_start + n) & TFRC_NDUPACK;
+}
+
+/**
+ * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far
+ */
+static inline struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h)
+{
+	return h->ring[tfrc_rx_hist_index(h, h->loss_count)];
+}
+
 /*
  * 	Receiver History Routines
  */
 static struct kmem_cache *tfrc_rx_hist_slab;
 
-struct tfrc_rx_hist_entry *tfrc_rx_hist_entry_new(const u32 ndp,
-						  const struct sk_buff *skb,
-						  const gfp_t prio)
+void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
+			     const struct sk_buff *skb,
+			     const u32 ndp)
 {
-	struct tfrc_rx_hist_entry *entry = kmem_cache_alloc(tfrc_rx_hist_slab,
-							    prio);
-
-	if (entry != NULL) {
-		const struct dccp_hdr *dh = dccp_hdr(skb);
-
-		entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
-		entry->tfrchrx_ccval = dh->dccph_ccval;
-		entry->tfrchrx_type  = dh->dccph_type;
-		entry->tfrchrx_ndp   = ndp;
-		entry->tfrchrx_tstamp = ktime_get_real();
-	}
-
-	return entry;
+	struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h);
+	const struct dccp_hdr *dh = dccp_hdr(skb);
+
+	entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+	entry->tfrchrx_ccval = dh->dccph_ccval;
+	entry->tfrchrx_type  = dh->dccph_type;
+	entry->tfrchrx_ndp   = ndp;
+	entry->tfrchrx_tstamp = ktime_get_real();
 }
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_entry_new);
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
 
 static inline void tfrc_rx_hist_entry_delete(struct tfrc_rx_hist_entry *entry)
 {
 	kmem_cache_free(tfrc_rx_hist_slab, entry);
 }
 
-int tfrc_rx_hist_find_entry(const struct list_head *list, const u64 seq,
-			    u8 *ccval)
+/**
+ * tfrc_rx_hist_entry - return the n-th history entry after loss_start
+ */
+static inline struct tfrc_rx_hist_entry *
+		tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n)
 {
-	struct tfrc_rx_hist_entry *packet = NULL, *entry;
+	return h->ring[tfrc_rx_hist_index(h, n)];
+}
 
-	list_for_each_entry(entry, list, tfrchrx_node)
-		if (entry->tfrchrx_seqno == seq) {
-			packet = entry;
-			break;
-		}
+/**
+ * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected
+ */
+static inline struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h)
+{
+	return h->ring[h->loss_start];
+}
+
+/* has the packet contained in skb been seen before? */
+int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
+{
+	const u64 seq = DCCP_SKB_CB(skb)->dccpd_seq;
+	int i;
+
+	if (dccp_delta_seqno(tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, seq) <= 0)
+		return 1;
 
-	if (packet)
-		*ccval = packet->tfrchrx_ccval;
+	for (i = 1; i <= h->loss_count; i++)
+		if (tfrc_rx_hist_entry(h, i)->tfrchrx_seqno == seq)
+			return 1;
 
-	return packet != NULL;
+	return 0;
 }
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
 
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_find_entry);
-struct tfrc_rx_hist_entry *
-		tfrc_rx_hist_find_data_packet(const struct list_head *list)
+/* initialise loss detection and disable RTT sampling */
+static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h)
 {
-	struct tfrc_rx_hist_entry *entry, *packet = NULL;
+	h->loss_count = 1;
+}
 
-	list_for_each_entry(entry, list, tfrchrx_node)
-		if (entry->tfrchrx_type == DCCP_PKT_DATA ||
-		    entry->tfrchrx_type == DCCP_PKT_DATAACK) {
-			packet = entry;
-			break;
-		}
+/* indicate whether previously a packet was detected missing */
+static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
+{
+	return h->loss_count;
+}
+
+/* any data packets missing between last reception and skb ? */
+int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
+				    const struct sk_buff *skb, u32 ndp)
+{
+	int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)->tfrchrx_seqno,
+				     DCCP_SKB_CB(skb)->dccpd_seq);
+
+	if (delta > 1 && ndp < delta)
+		tfrc_rx_hist_loss_indicated(h);
 
-	return packet;
+	return tfrc_rx_hist_loss_pending(h);
 }
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_new_loss_indicated);
 
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_find_data_packet);
+int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
+{
+	int i;
+
+	for (i = 0; i <= TFRC_NDUPACK; i++) {
+		h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
+		if (h->ring[i] == NULL)
+			goto out_free;
+	}
+
+	h->loss_count = h->loss_start = 0;
+	return 0;
+
+out_free:
+	while (i-- != 0) {
+		kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
+		h->ring[i] = NULL;
+	}
+	return -ENOBUFS;
+}
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
 
-void tfrc_rx_hist_add_packet(struct list_head *rx_list,
-			     struct list_head *li_list,
-			     struct tfrc_rx_hist_entry *packet,
-			     u64 nonloss_seqno)
+void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
 {
-	struct tfrc_rx_hist_entry *entry, *next;
-	u8 num_later = 0;
-
-	list_add(&packet->tfrchrx_node, rx_list);
-
-	num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
-
-	if (!list_empty(li_list)) {
-		list_for_each_entry_safe(entry, next, rx_list, tfrchrx_node) {
-			if (num_later == 0) {
-				if (after48(nonloss_seqno,
-				   entry->tfrchrx_seqno)) {
-					list_del_init(&entry->tfrchrx_node);
-					tfrc_rx_hist_entry_delete(entry);
-				}
-			} else if (tfrc_rx_hist_entry_data_packet(entry))
-				--num_later;
-		}
-	} else {
-		int step = 0;
-		u8 win_count = 0; /* Not needed, but lets shut up gcc */
-		int tmp;
-		/*
-		 * We have no loss interval history so we need at least one
-		 * rtt:s of data packets to approximate rtt.
-		 */
-		list_for_each_entry_safe(entry, next, rx_list, tfrchrx_node) {
-			if (num_later == 0) {
-				switch (step) {
-				case 0:
-					step = 1;
-					/* OK, find next data packet */
-					num_later = 1;
-					break;
-				case 1:
-					step = 2;
-					/* OK, find next data packet */
-					num_later = 1;
-					win_count = entry->tfrchrx_ccval;
-					break;
-				case 2:
-					tmp = win_count - entry->tfrchrx_ccval;
-					if (tmp < 0)
-						tmp += TFRC_WIN_COUNT_LIMIT;
-					if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) {
-						/*
-						 * We have found a packet older
-						 * than one rtt remove the rest
-						 */
-						step = 3;
-					} else /* OK, find next data packet */
-						num_later = 1;
-					break;
-				case 3:
-					list_del_init(&entry->tfrchrx_node);
-					tfrc_rx_hist_entry_delete(entry);
-					break;
-				}
-			} else if (tfrc_rx_hist_entry_data_packet(entry))
-				--num_later;
+	int i;
+
+	for (i = 0; i <= TFRC_NDUPACK; ++i)
+		if (h->ring[i] != NULL) {
+			kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
+			h->ring[i] = NULL;
 		}
-	}
 }
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
 
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
+/**
+ * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
+ */
+static inline struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
+{
+	return h->ring[0];
+}
 
-void tfrc_rx_hist_purge(struct list_head *list)
+/**
+ * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
+ */
+static inline struct tfrc_rx_hist_entry *
+			tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
 {
-	struct tfrc_rx_hist_entry *entry, *next;
+	return h->ring[h->rtt_sample_prev];
+}
 
-	list_for_each_entry_safe(entry, next, list, tfrchrx_node) {
-		list_del_init(&entry->tfrchrx_node);
-		tfrc_rx_hist_entry_delete(entry);
+/**
+ * tfrc_rx_hist_sample_rtt  -  Sample RTT from timestamp / CCVal
+ * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
+ * to compute a sample with given data - calling function should check this.
+ */
+u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
+{
+	u32 sample = 0,
+	    delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
+	    		    tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
+
+	if (delta_v < 1 || delta_v > 4) {	/* unsuitable CCVal delta */
+		if (h->rtt_sample_prev == 2) {	/* previous candidate stored */
+			sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
+				       tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
+			if (sample)
+				sample = 4 / sample *
+				         ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
+							tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
+			else    /*
+				 * FIXME: This condition is in principle not
+				 * possible but occurs when CCID is used for
+				 * two-way data traffic. I have tried to trace
+				 * it, but the cause does not seem to be here.
+				 */
+				DCCP_BUG("please report to dccp@...r.kernel.org"
+					 " => prev = %u, last = %u",
+					 tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
+					 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
+		} else if (delta_v < 1) {
+			h->rtt_sample_prev = 1;
+			goto keep_ref_for_next_time;
+		}
+
+	} else if (delta_v == 4) /* optimal match */
+		sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
+	else {			 /* suboptimal match */
+		h->rtt_sample_prev = 2;
+		goto keep_ref_for_next_time;
 	}
-}
 
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
+	if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
+		DCCP_WARN("RTT sample %u too large, using max\n", sample);
+		sample = DCCP_SANE_RTT_MAX;
+	}
+
+	h->rtt_sample_prev = 0;	       /* use current entry as next reference */
+keep_ref_for_next_time:
+
+	return sample;
+}
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
 
 __init int packet_history_init(void)
 {
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 5b0b983..3dfd182 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -37,15 +37,9 @@
 #define _DCCP_PKT_HIST_
 
 #include <linux/ktime.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include "tfrc.h"
+#include <linux/types.h>
 
-/* Number of later packets received before one is considered lost */
-#define TFRC_RECV_NUM_LATE_LOSS	 3
-
-#define TFRC_WIN_COUNT_PER_RTT	 4
-#define TFRC_WIN_COUNT_LIMIT	16
+struct sk_buff;
 
 struct tfrc_tx_hist_entry;
 
@@ -54,11 +48,20 @@ extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
 extern u32  tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
 			     const u64 seqno, const ktime_t now);
 
-/*
- * 	Receiver History data structures and declarations
+/* Subtraction a-b modulo-16, respects circular wrap-around */
+#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
+
+/* Number of packets to wait after a missing packet (RFC 4342, 6.1) */
+#define TFRC_NDUPACK 3
+
+/**
+ * tfrc_rx_hist_entry - Store information about a single received packet
+ * @tfrchrx_seqno:	DCCP packet sequence number
+ * @tfrchrx_ccval:	window counter value of packet (RFC 4342, 8.1)
+ * @tfrchrx_ndp:	the NDP count (if any) of the packet
+ * @tfrchrx_tstamp:	actual receive time of packet
  */
 struct tfrc_rx_hist_entry {
-	struct list_head tfrchrx_node;
 	u64		 tfrchrx_seqno:48,
 			 tfrchrx_ccval:4,
 			 tfrchrx_type:4;
@@ -66,42 +69,30 @@ struct tfrc_rx_hist_entry {
 	ktime_t		 tfrchrx_tstamp;
 };
 
-extern struct tfrc_rx_hist_entry *
-			tfrc_rx_hist_entry_new(const u32 ndp,
-					       const struct sk_buff *skb,
-					       const gfp_t prio);
-
-static inline struct tfrc_rx_hist_entry *
-			tfrc_rx_hist_head(struct list_head *list)
-{
-	struct tfrc_rx_hist_entry *head = NULL;
-
-	if (!list_empty(list))
-		head = list_entry(list->next, struct tfrc_rx_hist_entry,
-				  tfrchrx_node);
-	return head;
-}
-
-extern int tfrc_rx_hist_find_entry(const struct list_head *list, const u64 seq,
-				   u8 *ccval);
-extern struct tfrc_rx_hist_entry *
-		tfrc_rx_hist_find_data_packet(const struct list_head *list);
-
-extern void tfrc_rx_hist_add_packet(struct list_head *rx_list,
-				    struct list_head *li_list,
-				    struct tfrc_rx_hist_entry *packet,
-				    u64 nonloss_seqno);
-
-extern void tfrc_rx_hist_purge(struct list_head *list);
+/**
+ * tfrc_rx_hist  -  RX history structure for TFRC-based protocols
+ *
+ * @ring:		Packet history for RTT sampling and loss detection
+ * @loss_count:		Number of entries in circular history
+ * @loss_start:		Movable index (for loss detection)
+ * @rtt_sample_prev:	Used during RTT sampling, points to candidate entry
+ */
+struct tfrc_rx_hist {
+	struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
+	u8			  loss_count:2,
+				  loss_start:2;
+#define rtt_sample_prev		  loss_start
+};
 
-static inline int
-	tfrc_rx_hist_entry_data_packet(const struct tfrc_rx_hist_entry *entry)
-{
-	return entry->tfrchrx_type == DCCP_PKT_DATA ||
-	       entry->tfrchrx_type == DCCP_PKT_DATAACK;
-}
+extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
+				    const struct sk_buff *skb, const u32 ndp);
 
-extern u64 tfrc_rx_hist_detect_loss(struct list_head *rx_list,
-				    struct list_head *li_list, u8 *win_loss);
+extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
+extern int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
+					   const struct sk_buff *skb, u32 ndp);
+extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
+				   const struct sk_buff *skb);
+extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
+extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
 
 #endif /* _DCCP_PKT_HIST_ */
-- 
1.5.3.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ