[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1196974974-31531-8-git-send-email-acme@redhat.com>
Date: Thu, 6 Dec 2007 19:02:54 -0200
From: Arnaldo Carvalho de Melo <acme@...hat.com>
To: "David S. Miller" <davem@...emloft.net>
Cc: netdev@...r.kernel.org, dccp@...r.kernel.org,
Arnaldo Carvalho de Melo <acme@...hat.com>,
Gerrit Renker <gerrit@....abdn.ac.uk>
Subject: [PATCH 7/7] [TFRC]: New rx history code
Credit here goes to Gerrit Renker, that provided the initial implementation for
this new codebase.
I modified it just to try to make it closer to the existing API, renaming some
functions, add namespacing and fix one bug where the tfrc_rx_hist_alloc was not
freeing the allocated ring entries on the error path.
Original changeset comment from Gerrit:
-----------
This provides a new, self-contained and generic RX history service for TFRC
based protocols.
Details:
* new data structure, initialisation and cleanup routines;
* allocation of dccp_rx_hist entries local to packet_history.c,
as a service exported by the dccp_tfrc_lib module.
* interface to automatically track highest-received seqno;
* receiver-based RTT estimation (needed for instance by RFC 3448, 6.3.1);
* a generic function to test for `data packets' as per RFC 4340, sec. 7.7.
Signed-off-by: Gerrit Renker <gerrit@....abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@...hat.com>
---
net/dccp/ccids/ccid3.c | 288 ++++++++++++----------------------
net/dccp/ccids/ccid3.h | 14 +-
net/dccp/ccids/lib/loss_interval.c | 13 ++-
net/dccp/ccids/lib/packet_history.c | 290 +++++++++++++++++++++--------------
net/dccp/ccids/lib/packet_history.h | 83 +++++------
5 files changed, 330 insertions(+), 358 deletions(-)
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 5ff5aab..faacffa 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -641,6 +641,15 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
/*
* Receiver Half-Connection Routines
*/
+
+/* CCID3 feedback types */
+enum ccid3_fback_type {
+ CCID3_FBACK_NONE = 0,
+ CCID3_FBACK_INITIAL,
+ CCID3_FBACK_PERIODIC,
+ CCID3_FBACK_PARAM_CHANGE
+};
+
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
{
@@ -667,59 +676,60 @@ static void ccid3_hc_rx_set_state(struct sock *sk,
hcrx->ccid3hcrx_state = state;
}
-static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len)
-{
- if (likely(len > 0)) /* don't update on empty packets (e.g. ACKs) */
- hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, len, 9);
-}
-
-static void ccid3_hc_rx_send_feedback(struct sock *sk)
+static void ccid3_hc_rx_send_feedback(struct sock *sk,
+ const struct sk_buff *skb,
+ enum ccid3_fback_type fbtype)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
- struct tfrc_rx_hist_entry *packet;
ktime_t now;
- suseconds_t delta;
+ s64 delta = 0;
ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk);
+ if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM))
+ return;
+
now = ktime_get_real();
- switch (hcrx->ccid3hcrx_state) {
- case TFRC_RSTATE_NO_DATA:
+ switch (fbtype) {
+ case CCID3_FBACK_INITIAL:
hcrx->ccid3hcrx_x_recv = 0;
+ hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */
break;
- case TFRC_RSTATE_DATA:
- delta = ktime_us_delta(now,
- hcrx->ccid3hcrx_tstamp_last_feedback);
- DCCP_BUG_ON(delta < 0);
- hcrx->ccid3hcrx_x_recv =
- scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
+ case CCID3_FBACK_PARAM_CHANGE:
+ /*
+ * When parameters change (new loss or p > p_prev), we do not
+ * have a reliable estimate for R_m of [RFC 3448, 6.2] and so
+ * need to reuse the previous value of X_recv. However, when
+ * X_recv was 0 (due to early loss), this would kill X down to
+ * s/t_mbi (i.e. one packet in 64 seconds).
+ * To avoid such drastic reduction, we approximate X_recv as
+ * the number of bytes since last feedback.
+ * This is a safe fallback, since X is bounded above by X_calc.
+ */
+ if (hcrx->ccid3hcrx_x_recv > 0)
+ break;
+ /* fall through */
+ case CCID3_FBACK_PERIODIC:
+ delta = ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_feedback);
+ if (delta <= 0)
+ DCCP_BUG("delta (%ld) <= 0", (long)delta);
+ else
+ hcrx->ccid3hcrx_x_recv =
+ scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
break;
- case TFRC_RSTATE_TERM:
- DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
+ default:
return;
}
- packet = tfrc_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
- if (unlikely(packet == NULL)) {
- DCCP_WARN("%s(%p), no data packet in history!\n",
- dccp_role(sk), sk);
- return;
- }
+ ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta,
+ hcrx->ccid3hcrx_x_recv, hcrx->ccid3hcrx_pinv);
hcrx->ccid3hcrx_tstamp_last_feedback = now;
- hcrx->ccid3hcrx_ccval_last_counter = packet->tfrchrx_ccval;
+ hcrx->ccid3hcrx_last_counter = dccp_hdr(skb)->dccph_ccval;
hcrx->ccid3hcrx_bytes_recv = 0;
- if (hcrx->ccid3hcrx_p == 0)
- hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */
- else if (hcrx->ccid3hcrx_p > 1000000) {
- DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p);
- hcrx->ccid3hcrx_pinv = 1; /* use 100% in this case */
- } else
- hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
-
dp->dccps_hc_rx_insert_options = 1;
dccp_send_ack(sk);
}
@@ -750,165 +760,74 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
return 0;
}
-static int ccid3_hc_rx_detect_loss(struct sock *sk,
- struct tfrc_rx_hist_entry *packet)
-{
- struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
- struct tfrc_rx_hist_entry *rx_hist =
- tfrc_rx_hist_head(&hcrx->ccid3hcrx_hist);
- u64 seqno = packet->tfrchrx_seqno;
- u64 tmp_seqno;
- int loss = 0;
- u8 ccval;
-
-
- tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
-
- if (!rx_hist ||
- follows48(packet->tfrchrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
- hcrx->ccid3hcrx_seqno_nonloss = seqno;
- hcrx->ccid3hcrx_ccval_nonloss = packet->tfrchrx_ccval;
- goto detect_out;
- }
-
-
- while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
- > TFRC_RECV_NUM_LATE_LOSS) {
- loss = 1;
- dccp_li_update_li(sk,
- &hcrx->ccid3hcrx_li_hist,
- &hcrx->ccid3hcrx_hist,
- hcrx->ccid3hcrx_tstamp_last_feedback,
- hcrx->ccid3hcrx_s,
- hcrx->ccid3hcrx_bytes_recv,
- hcrx->ccid3hcrx_x_recv,
- hcrx->ccid3hcrx_seqno_nonloss,
- hcrx->ccid3hcrx_ccval_nonloss);
- tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
- dccp_inc_seqno(&tmp_seqno);
- hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
- dccp_inc_seqno(&tmp_seqno);
- while (tfrc_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
- tmp_seqno, &ccval)) {
- hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
- hcrx->ccid3hcrx_ccval_nonloss = ccval;
- dccp_inc_seqno(&tmp_seqno);
- }
- }
-
- /* FIXME - this code could be simplified with above while */
- /* but works at moment */
- if (follows48(packet->tfrchrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
- hcrx->ccid3hcrx_seqno_nonloss = seqno;
- hcrx->ccid3hcrx_ccval_nonloss = packet->tfrchrx_ccval;
- }
-
-detect_out:
- tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist,
- &hcrx->ccid3hcrx_li_hist, packet,
- hcrx->ccid3hcrx_seqno_nonloss);
- return loss;
-}
-
static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
- const struct dccp_options_received *opt_recv;
- struct tfrc_rx_hist_entry *packet;
- u32 p_prev, r_sample, rtt_prev;
- int loss, payload_size;
- ktime_t now;
-
- opt_recv = &dccp_sk(sk)->dccps_options_received;
-
- switch (DCCP_SKB_CB(skb)->dccpd_type) {
- case DCCP_PKT_ACK:
- if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
- return;
- case DCCP_PKT_DATAACK:
- if (opt_recv->dccpor_timestamp_echo == 0)
- break;
- r_sample = dccp_timestamp() - opt_recv->dccpor_timestamp_echo;
- rtt_prev = hcrx->ccid3hcrx_rtt;
- r_sample = dccp_sample_rtt(sk, 10 * r_sample);
-
- if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
- hcrx->ccid3hcrx_rtt = r_sample;
- else
- hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
- r_sample / 10;
-
- if (rtt_prev != hcrx->ccid3hcrx_rtt)
- ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n",
- dccp_role(sk), sk, hcrx->ccid3hcrx_rtt,
- opt_recv->dccpor_elapsed_time);
- break;
- case DCCP_PKT_DATA:
- break;
- default: /* We're not interested in other packet types, move along */
- return;
- }
-
- packet = tfrc_rx_hist_entry_new(opt_recv->dccpor_ndp, skb, GFP_ATOMIC);
- if (unlikely(packet == NULL)) {
- DCCP_WARN("%s(%p), Not enough mem to add rx packet "
- "to history, consider it lost!\n", dccp_role(sk), sk);
- return;
- }
-
- loss = ccid3_hc_rx_detect_loss(sk, packet);
+ enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE;
+ const u32 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
+ const bool is_data_packet = dccp_data_packet(skb);
+
+ if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
+ if (is_data_packet) {
+ const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
+ do_feedback = CCID3_FBACK_INITIAL;
+ ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
+ hcrx->ccid3hcrx_s = payload;
+ /*
+ * Not necessary to update ccid3hcrx_bytes_recv here,
+ * since X_recv = 0 for the first feedback packet (cf.
+ * RFC 3448, 6.3) -- gerrit
+ */
+ }
+ goto update_records;
+ }
- if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
- return;
+ if (tfrc_rx_hist_duplicate(&hcrx->ccid3hcrx_hist, skb))
+ return; /* done receiving */
- payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4;
- ccid3_hc_rx_update_s(hcrx, payload_size);
+ if (is_data_packet) {
+ const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
+ /*
+ * Update moving-average of s and the sum of received payload bytes
+ */
+ hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, payload, 9);
+ hcrx->ccid3hcrx_bytes_recv += payload;
+ }
- switch (hcrx->ccid3hcrx_state) {
- case TFRC_RSTATE_NO_DATA:
- ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial "
- "feedback\n", dccp_role(sk), sk,
- dccp_state_name(sk->sk_state), skb);
- ccid3_hc_rx_send_feedback(sk);
- ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
- return;
- case TFRC_RSTATE_DATA:
- hcrx->ccid3hcrx_bytes_recv += payload_size;
- if (loss)
- break;
+ /*
+ * Handle pending losses and otherwise check for new loss
+ */
+ if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp))
+ goto update_records;
- now = ktime_get_real();
- if ((ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_ack) -
- (s64)hcrx->ccid3hcrx_rtt) >= 0) {
- hcrx->ccid3hcrx_tstamp_last_ack = now;
- ccid3_hc_rx_send_feedback(sk);
- }
- return;
- case TFRC_RSTATE_TERM:
- DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
- return;
+ /*
+ * Handle data packets: RTT sampling and monitoring p
+ */
+ if (unlikely(!is_data_packet))
+ goto update_records;
+
+ if (list_empty(&hcrx->ccid3hcrx_li_hist)) { /* no loss so far: p = 0 */
+ const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb);
+ /*
+ * Empty loss history: no loss so far, hence p stays 0.
+ * Sample RTT values, since an RTT estimate is required for the
+ * computation of p when the first loss occurs; RFC 3448, 6.3.1.
+ */
+ if (sample != 0)
+ hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9);
}
- /* Dealing with packet loss */
- ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n",
- dccp_role(sk), sk, dccp_state_name(sk->sk_state));
-
- p_prev = hcrx->ccid3hcrx_p;
-
- /* Calculate loss event rate */
- if (!list_empty(&hcrx->ccid3hcrx_li_hist)) {
- u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);
+ /*
+ * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
+ */
+ if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->ccid3hcrx_last_counter) > 3)
+ do_feedback = CCID3_FBACK_PERIODIC;
- /* Scaling up by 1000000 as fixed decimal */
- if (i_mean != 0)
- hcrx->ccid3hcrx_p = 1000000 / i_mean;
- } else
- DCCP_BUG("empty loss history");
+update_records:
+ tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp);
- if (hcrx->ccid3hcrx_p > p_prev) {
- ccid3_hc_rx_send_feedback(sk);
- return;
- }
+ if (do_feedback)
+ ccid3_hc_rx_send_feedback(sk, skb, do_feedback);
}
static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
@@ -918,11 +837,8 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
ccid3_pr_debug("entry\n");
hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
- INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
- hcrx->ccid3hcrx_tstamp_last_feedback =
- hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real();
- return 0;
+ return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist);
}
static void ccid3_hc_rx_exit(struct sock *sk)
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index b842a7d..3c33dc6 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -1,7 +1,8 @@
/*
* net/dccp/ccids/ccid3.h
*
- * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
+ * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
+ * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
*
* An implementation of the DCCP protocol
*
@@ -135,9 +136,7 @@ enum ccid3_hc_rx_states {
* @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3)
* @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard)
* @ccid3hcrx_p - current loss event rate (RFC 3448 5.4)
- * @ccid3hcrx_seqno_nonloss - Last received non-loss sequence number
- * @ccid3hcrx_ccval_nonloss - Last received non-loss Window CCVal
- * @ccid3hcrx_ccval_last_counter - Tracks window counter (RFC 4342, 8.1)
+ * @ccid3hcrx_last_counter - Tracks window counter (RFC 4342, 8.1)
* @ccid3hcrx_state - receiver state, one of %ccid3_hc_rx_states
* @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes
* @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent
@@ -152,14 +151,11 @@ struct ccid3_hc_rx_sock {
#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv
#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt
#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p
- u64 ccid3hcrx_seqno_nonloss:48,
- ccid3hcrx_ccval_nonloss:4,
- ccid3hcrx_ccval_last_counter:4;
+ u8 ccid3hcrx_last_counter:4;
enum ccid3_hc_rx_states ccid3hcrx_state:8;
u32 ccid3hcrx_bytes_recv;
ktime_t ccid3hcrx_tstamp_last_feedback;
- ktime_t ccid3hcrx_tstamp_last_ack;
- struct list_head ccid3hcrx_hist;
+ struct tfrc_rx_hist ccid3hcrx_hist;
struct list_head ccid3hcrx_li_hist;
u16 ccid3hcrx_s;
u32 ccid3hcrx_pinv;
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index a5f59af..7e0714a 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -129,6 +129,13 @@ static u32 dccp_li_calc_first_li(struct sock *sk,
u16 s, u32 bytes_recv,
u32 previous_x_recv)
{
+/*
+ * FIXME:
+ * Will be rewritten in the upcoming new loss intervals code.
+ * Has to be commented ou because it relies on the old rx history
+ * data structures
+ */
+#if 0
struct tfrc_rx_hist_entry *entry, *next, *tail = NULL;
u32 x_recv, p;
suseconds_t rtt, delta;
@@ -216,10 +223,10 @@ found:
dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
"loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
- if (p == 0)
- return ~0;
- else
+ if (p != 0)
return 1000000 / p;
+#endif
+ return ~0;
}
void dccp_li_update_li(struct sock *sk,
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 255cca1..e197389 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -36,7 +36,9 @@
*/
#include <linux/string.h>
+#include <linux/slab.h>
#include "packet_history.h"
+#include "../../dccp.h"
/**
* tfrc_tx_hist_entry - Simple singly-linked TX history list
@@ -111,154 +113,214 @@ u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
}
EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
+
+/**
+ * tfrc_rx_hist_index - index to reach n-th entry after loss_start
+ */
+static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n)
+{
+ return (h->loss_start + n) & TFRC_NDUPACK;
+}
+
+/**
+ * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far
+ */
+static inline struct tfrc_rx_hist_entry *
+ tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h)
+{
+ return h->ring[tfrc_rx_hist_index(h, h->loss_count)];
+}
+
/*
* Receiver History Routines
*/
static struct kmem_cache *tfrc_rx_hist_slab;
-struct tfrc_rx_hist_entry *tfrc_rx_hist_entry_new(const u32 ndp,
- const struct sk_buff *skb,
- const gfp_t prio)
+void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
+ const struct sk_buff *skb,
+ const u32 ndp)
{
- struct tfrc_rx_hist_entry *entry = kmem_cache_alloc(tfrc_rx_hist_slab,
- prio);
-
- if (entry != NULL) {
- const struct dccp_hdr *dh = dccp_hdr(skb);
-
- entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
- entry->tfrchrx_ccval = dh->dccph_ccval;
- entry->tfrchrx_type = dh->dccph_type;
- entry->tfrchrx_ndp = ndp;
- entry->tfrchrx_tstamp = ktime_get_real();
- }
-
- return entry;
+ struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h);
+ const struct dccp_hdr *dh = dccp_hdr(skb);
+
+ entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+ entry->tfrchrx_ccval = dh->dccph_ccval;
+ entry->tfrchrx_type = dh->dccph_type;
+ entry->tfrchrx_ndp = ndp;
+ entry->tfrchrx_tstamp = ktime_get_real();
}
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_entry_new);
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
static inline void tfrc_rx_hist_entry_delete(struct tfrc_rx_hist_entry *entry)
{
kmem_cache_free(tfrc_rx_hist_slab, entry);
}
-int tfrc_rx_hist_find_entry(const struct list_head *list, const u64 seq,
- u8 *ccval)
+/**
+ * tfrc_rx_hist_entry - return the n-th history entry after loss_start
+ */
+static inline struct tfrc_rx_hist_entry *
+ tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n)
{
- struct tfrc_rx_hist_entry *packet = NULL, *entry;
+ return h->ring[tfrc_rx_hist_index(h, n)];
+}
- list_for_each_entry(entry, list, tfrchrx_node)
- if (entry->tfrchrx_seqno == seq) {
- packet = entry;
- break;
- }
+/**
+ * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected
+ */
+static inline struct tfrc_rx_hist_entry *
+ tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h)
+{
+ return h->ring[h->loss_start];
+}
+
+/* has the packet contained in skb been seen before? */
+int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
+{
+ const u64 seq = DCCP_SKB_CB(skb)->dccpd_seq;
+ int i;
+
+ if (dccp_delta_seqno(tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, seq) <= 0)
+ return 1;
- if (packet)
- *ccval = packet->tfrchrx_ccval;
+ for (i = 1; i <= h->loss_count; i++)
+ if (tfrc_rx_hist_entry(h, i)->tfrchrx_seqno == seq)
+ return 1;
- return packet != NULL;
+ return 0;
}
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_find_entry);
-struct tfrc_rx_hist_entry *
- tfrc_rx_hist_find_data_packet(const struct list_head *list)
+/* initialise loss detection and disable RTT sampling */
+static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h)
{
- struct tfrc_rx_hist_entry *entry, *packet = NULL;
+ h->loss_count = 1;
+}
- list_for_each_entry(entry, list, tfrchrx_node)
- if (entry->tfrchrx_type == DCCP_PKT_DATA ||
- entry->tfrchrx_type == DCCP_PKT_DATAACK) {
- packet = entry;
- break;
- }
+/* indicate whether previously a packet was detected missing */
+static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
+{
+ return h->loss_count;
+}
+
+/* any data packets missing between last reception and skb ? */
+int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
+ const struct sk_buff *skb, u32 ndp)
+{
+ int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)->tfrchrx_seqno,
+ DCCP_SKB_CB(skb)->dccpd_seq);
+
+ if (delta > 1 && ndp < delta)
+ tfrc_rx_hist_loss_indicated(h);
- return packet;
+ return tfrc_rx_hist_loss_pending(h);
}
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_new_loss_indicated);
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_find_data_packet);
+int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
+{
+ int i;
+
+ for (i = 0; i <= TFRC_NDUPACK; i++) {
+ h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
+ if (h->ring[i] == NULL)
+ goto out_free;
+ }
+
+ h->loss_count = h->loss_start = 0;
+ return 0;
+
+out_free:
+ while (i-- != 0) {
+ kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
+ h->ring[i] = NULL;
+ }
+ return -ENOBUFS;
+}
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
-void tfrc_rx_hist_add_packet(struct list_head *rx_list,
- struct list_head *li_list,
- struct tfrc_rx_hist_entry *packet,
- u64 nonloss_seqno)
+void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
{
- struct tfrc_rx_hist_entry *entry, *next;
- u8 num_later = 0;
-
- list_add(&packet->tfrchrx_node, rx_list);
-
- num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
-
- if (!list_empty(li_list)) {
- list_for_each_entry_safe(entry, next, rx_list, tfrchrx_node) {
- if (num_later == 0) {
- if (after48(nonloss_seqno,
- entry->tfrchrx_seqno)) {
- list_del_init(&entry->tfrchrx_node);
- tfrc_rx_hist_entry_delete(entry);
- }
- } else if (tfrc_rx_hist_entry_data_packet(entry))
- --num_later;
- }
- } else {
- int step = 0;
- u8 win_count = 0; /* Not needed, but lets shut up gcc */
- int tmp;
- /*
- * We have no loss interval history so we need at least one
- * rtt:s of data packets to approximate rtt.
- */
- list_for_each_entry_safe(entry, next, rx_list, tfrchrx_node) {
- if (num_later == 0) {
- switch (step) {
- case 0:
- step = 1;
- /* OK, find next data packet */
- num_later = 1;
- break;
- case 1:
- step = 2;
- /* OK, find next data packet */
- num_later = 1;
- win_count = entry->tfrchrx_ccval;
- break;
- case 2:
- tmp = win_count - entry->tfrchrx_ccval;
- if (tmp < 0)
- tmp += TFRC_WIN_COUNT_LIMIT;
- if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) {
- /*
- * We have found a packet older
- * than one rtt remove the rest
- */
- step = 3;
- } else /* OK, find next data packet */
- num_later = 1;
- break;
- case 3:
- list_del_init(&entry->tfrchrx_node);
- tfrc_rx_hist_entry_delete(entry);
- break;
- }
- } else if (tfrc_rx_hist_entry_data_packet(entry))
- --num_later;
+ int i;
+
+ for (i = 0; i <= TFRC_NDUPACK; ++i)
+ if (h->ring[i] != NULL) {
+ kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
+ h->ring[i] = NULL;
}
- }
}
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
+/**
+ * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
+ */
+static inline struct tfrc_rx_hist_entry *
+ tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
+{
+ return h->ring[0];
+}
-void tfrc_rx_hist_purge(struct list_head *list)
+/**
+ * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
+ */
+static inline struct tfrc_rx_hist_entry *
+ tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
{
- struct tfrc_rx_hist_entry *entry, *next;
+ return h->ring[h->rtt_sample_prev];
+}
- list_for_each_entry_safe(entry, next, list, tfrchrx_node) {
- list_del_init(&entry->tfrchrx_node);
- tfrc_rx_hist_entry_delete(entry);
+/**
+ * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal
+ * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
+ * to compute a sample with given data - calling function should check this.
+ */
+u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
+{
+ u32 sample = 0,
+ delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
+ tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
+
+ if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */
+ if (h->rtt_sample_prev == 2) { /* previous candidate stored */
+ sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
+ tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
+ if (sample)
+ sample = 4 / sample *
+ ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
+ tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
+ else /*
+ * FIXME: This condition is in principle not
+ * possible but occurs when CCID is used for
+ * two-way data traffic. I have tried to trace
+ * it, but the cause does not seem to be here.
+ */
+ DCCP_BUG("please report to dccp@...r.kernel.org"
+ " => prev = %u, last = %u",
+ tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
+ tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
+ } else if (delta_v < 1) {
+ h->rtt_sample_prev = 1;
+ goto keep_ref_for_next_time;
+ }
+
+ } else if (delta_v == 4) /* optimal match */
+ sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
+ else { /* suboptimal match */
+ h->rtt_sample_prev = 2;
+ goto keep_ref_for_next_time;
}
-}
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
+ if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
+ DCCP_WARN("RTT sample %u too large, using max\n", sample);
+ sample = DCCP_SANE_RTT_MAX;
+ }
+
+ h->rtt_sample_prev = 0; /* use current entry as next reference */
+keep_ref_for_next_time:
+
+ return sample;
+}
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
__init int packet_history_init(void)
{
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 5b0b983..3dfd182 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -37,15 +37,9 @@
#define _DCCP_PKT_HIST_
#include <linux/ktime.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include "tfrc.h"
+#include <linux/types.h>
-/* Number of later packets received before one is considered lost */
-#define TFRC_RECV_NUM_LATE_LOSS 3
-
-#define TFRC_WIN_COUNT_PER_RTT 4
-#define TFRC_WIN_COUNT_LIMIT 16
+struct sk_buff;
struct tfrc_tx_hist_entry;
@@ -54,11 +48,20 @@ extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
const u64 seqno, const ktime_t now);
-/*
- * Receiver History data structures and declarations
+/* Subtraction a-b modulo-16, respects circular wrap-around */
+#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
+
+/* Number of packets to wait after a missing packet (RFC 4342, 6.1) */
+#define TFRC_NDUPACK 3
+
+/**
+ * tfrc_rx_hist_entry - Store information about a single received packet
+ * @tfrchrx_seqno: DCCP packet sequence number
+ * @tfrchrx_ccval: window counter value of packet (RFC 4342, 8.1)
+ * @tfrchrx_ndp: the NDP count (if any) of the packet
+ * @tfrchrx_tstamp: actual receive time of packet
*/
struct tfrc_rx_hist_entry {
- struct list_head tfrchrx_node;
u64 tfrchrx_seqno:48,
tfrchrx_ccval:4,
tfrchrx_type:4;
@@ -66,42 +69,30 @@ struct tfrc_rx_hist_entry {
ktime_t tfrchrx_tstamp;
};
-extern struct tfrc_rx_hist_entry *
- tfrc_rx_hist_entry_new(const u32 ndp,
- const struct sk_buff *skb,
- const gfp_t prio);
-
-static inline struct tfrc_rx_hist_entry *
- tfrc_rx_hist_head(struct list_head *list)
-{
- struct tfrc_rx_hist_entry *head = NULL;
-
- if (!list_empty(list))
- head = list_entry(list->next, struct tfrc_rx_hist_entry,
- tfrchrx_node);
- return head;
-}
-
-extern int tfrc_rx_hist_find_entry(const struct list_head *list, const u64 seq,
- u8 *ccval);
-extern struct tfrc_rx_hist_entry *
- tfrc_rx_hist_find_data_packet(const struct list_head *list);
-
-extern void tfrc_rx_hist_add_packet(struct list_head *rx_list,
- struct list_head *li_list,
- struct tfrc_rx_hist_entry *packet,
- u64 nonloss_seqno);
-
-extern void tfrc_rx_hist_purge(struct list_head *list);
+/**
+ * tfrc_rx_hist - RX history structure for TFRC-based protocols
+ *
+ * @ring: Packet history for RTT sampling and loss detection
+ * @loss_count: Number of entries in circular history
+ * @loss_start: Movable index (for loss detection)
+ * @rtt_sample_prev: Used during RTT sampling, points to candidate entry
+ */
+struct tfrc_rx_hist {
+ struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
+ u8 loss_count:2,
+ loss_start:2;
+#define rtt_sample_prev loss_start
+};
-static inline int
- tfrc_rx_hist_entry_data_packet(const struct tfrc_rx_hist_entry *entry)
-{
- return entry->tfrchrx_type == DCCP_PKT_DATA ||
- entry->tfrchrx_type == DCCP_PKT_DATAACK;
-}
+extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
+ const struct sk_buff *skb, const u32 ndp);
-extern u64 tfrc_rx_hist_detect_loss(struct list_head *rx_list,
- struct list_head *li_list, u8 *win_loss);
+extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
+extern int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
+ const struct sk_buff *skb, u32 ndp);
+extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
+ const struct sk_buff *skb);
+extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
+extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
#endif /* _DCCP_PKT_HIST_ */
--
1.5.3.4
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists