lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-Id: <20070228.114912.89063615.davem@davemloft.net> Date: Wed, 28 Feb 2007 11:49:12 -0800 (PST) From: David Miller <davem@...emloft.net> To: netdev@...r.kernel.org Subject: [PATCH 2/4]: Store retransmit queue packets in RB tree. commit c387760826bd71103220e06ca7b0bf90a785567e Author: David S. Miller <davem@...set.davemloft.net> Date: Tue Feb 27 16:44:42 2007 -0800 [TCP]: Store retransmit queue packets in RB tree. Signed-off-by: David S. Miller <davem@...emloft.net> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4ff3940..b70fd21 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -18,6 +18,7 @@ #include <linux/compiler.h> #include <linux/time.h> #include <linux/cache.h> +#include <linux/rbtree.h> #include <asm/atomic.h> #include <asm/types.h> @@ -232,6 +233,8 @@ struct sk_buff { struct sk_buff *next; struct sk_buff *prev; + struct rb_node rb; + struct sock *sk; struct skb_timeval tstamp; struct net_device *dev; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 18a468d..b73687a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -174,6 +174,7 @@ struct tcp_md5sig { #include <linux/skbuff.h> #include <linux/dmaengine.h> +#include <linux/rbtree.h> #include <net/sock.h> #include <net/inet_connection_sock.h> #include <net/inet_timewait_sock.h> @@ -306,6 +307,7 @@ struct tcp_sock { u32 snd_cwnd_used; u32 snd_cwnd_stamp; + struct rb_root write_queue_rb; struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ u32 rcv_wnd; /* Current receiver window */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 571faa1..cce6b0e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1169,6 +1169,7 @@ static inline void tcp_write_queue_purge(struct sock *sk) while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) sk_stream_free_skb(sk, skb); + tcp_sk(sk)->write_queue_rb = RB_ROOT; sk_stream_mem_reclaim(sk); } @@ -1193,16 +1194,14 @@ static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_bu return skb->next; } -#define tcp_for_write_queue(skb, sk) \ - for (skb = (sk)->sk_write_queue.next; \ - (skb != (sk)->sk_send_head) && \ - (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ - skb = skb->next) +#define tcp_for_write_queue(skb, sk) \ + for (skb = (sk)->sk_write_queue.next; \ + (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ + skb = skb->next) -#define tcp_for_write_queue_from(skb, sk) \ - for (; (skb != (sk)->sk_send_head) && \ - (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ - skb = skb->next) +#define tcp_for_write_queue_from(skb, sk) \ + for (; (skb != (struct sk_buff *)&(sk)->sk_write_queue);\ + skb = skb->next) static inline struct sk_buff *tcp_send_head(struct sock *sk) { @@ -1211,7 +1210,7 @@ static inline struct sk_buff *tcp_send_head(struct sock *sk) static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb) { - sk->sk_send_head = skb->next; + sk->sk_send_head = tcp_write_queue_next(sk, skb); if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) sk->sk_send_head = NULL; } @@ -1227,9 +1226,54 @@ static inline void tcp_init_send_head(struct sock *sk) sk->sk_send_head = NULL; } +static inline struct sk_buff *tcp_write_queue_find(struct sock *sk, __u32 seq) +{ + struct rb_node *rb_node = tcp_sk(sk)->write_queue_rb.rb_node; + struct sk_buff *skb = NULL; + + while (rb_node) { + struct sk_buff *tmp = rb_entry(rb_node,struct sk_buff,rb); + if (TCP_SKB_CB(tmp)->end_seq > seq) { + skb = tmp; + if (TCP_SKB_CB(tmp)->seq <= seq) + break; + rb_node = rb_node->rb_left; + } else + rb_node = rb_node->rb_right; + + } + return skb; +} + +static inline void tcp_rb_insert(struct sk_buff *skb, struct rb_root *root) +{ + struct rb_node **rb_link, *rb_parent; + __u32 seq = TCP_SKB_CB(skb)->seq; + + rb_link = &root->rb_node; + rb_parent = NULL; + while ((rb_parent = *rb_link) != NULL) { + struct sk_buff *tmp = rb_entry(rb_parent,struct sk_buff,rb); + if (TCP_SKB_CB(tmp)->end_seq > seq) { + BUG_ON(TCP_SKB_CB(tmp)->seq <= seq); + rb_link = &rb_parent->rb_left; + } else { + rb_link = &rb_parent->rb_right; + } + } + rb_link_node(&skb->rb, rb_parent, rb_link); + rb_insert_color(&skb->rb, root); +} + +static inline void tcp_rb_unlink(struct sk_buff *skb, struct rb_root *root) +{ + rb_erase(&skb->rb, root); +} + static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) { __skb_queue_tail(&sk->sk_write_queue, skb); + tcp_rb_insert(skb, &tcp_sk(sk)->write_queue_rb); } static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) @@ -1244,6 +1288,7 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb) { __skb_queue_head(&sk->sk_write_queue, skb); + tcp_rb_insert(skb, &tcp_sk(sk)->write_queue_rb); } /* Insert buff after skb on the write queue of sk. */ @@ -1252,19 +1297,22 @@ static inline void tcp_insert_write_queue_after(struct sk_buff *skb, struct sock *sk) { __skb_append(skb, buff, &sk->sk_write_queue); + tcp_rb_insert(skb, &tcp_sk(sk)->write_queue_rb); } -/* Insert skb between prev and next on the write queue of sk. */ +/* Insert new before skb on the write queue of sk. */ static inline void tcp_insert_write_queue_before(struct sk_buff *new, struct sk_buff *skb, struct sock *sk) { __skb_insert(new, skb->prev, skb, &sk->sk_write_queue); + tcp_rb_insert(skb, &tcp_sk(sk)->write_queue_rb); } static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) { __skb_unlink(skb, &sk->sk_write_queue); + tcp_rb_unlink(skb, &tcp_sk(sk)->write_queue_rb); } static inline int tcp_skb_is_last(const struct sock *sk, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 219255f..b919cd7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1065,6 +1065,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int in_sack, pcount; u8 sacked; + if (skb == tcp_send_head(sk)) + break; + cached_skb = skb; cached_fack_count = fack_count; if (i == first_sack_index) { @@ -1214,6 +1217,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ struct sk_buff *skb; tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; if (after(TCP_SKB_CB(skb)->seq, lost_retrans)) break; if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) @@ -1281,6 +1286,8 @@ int tcp_use_frto(struct sock *sk) skb = tcp_write_queue_head(sk); skb = tcp_write_queue_next(sk, skb); /* Skips head */ tcp_for_write_queue_from(skb, sk) { + if (skb == tcp_send_head(sk)) + break; if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) return 0; /* Short-circuit when first non-SACKed skb has been checked */ @@ -1382,6 +1389,8 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->retrans_out = 0; tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; cnt += tcp_skb_pcount(skb); /* * Count the retransmission made on RTO correctly (only when @@ -1470,6 +1479,8 @@ void tcp_enter_loss(struct sock *sk, int how) tp->undo_marker = tp->snd_una; tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; cnt += tcp_skb_pcount(skb); if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) tp->undo_marker = 0; @@ -1732,6 +1743,8 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, } tcp_for_write_queue_from(skb, sk) { + if (skb == tcp_send_head(sk)) + break; /* TODO: do this better */ /* this is not the most efficient way to do this... */ tp->lost_skb_hint = skb; @@ -1781,6 +1794,8 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) : tcp_write_queue_head(sk); tcp_for_write_queue_from(skb, sk) { + if (skb == tcp_send_head(sk)) + break; if (!tcp_skb_timedout(sk, skb)) break; @@ -1972,6 +1987,8 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) if (tcp_may_undo(tp)) { struct sk_buff *skb; tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 19ba048..1343e2f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1838,6 +1838,7 @@ static int tcp_v4_init_sock(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + tp->write_queue_rb = RB_ROOT; skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1d25565..d5e3cbe 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -421,6 +421,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); + newtp->write_queue_rb = RB_ROOT; skb_queue_head_init(&newtp->out_of_order_queue); newtp->write_seq = treq->snt_isn + 1; newtp->pushed_seq = newtp->write_seq; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0adef4a..bc2d477 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1267,11 +1267,11 @@ static int tcp_mtu_probe(struct sock *sk) sk_charge_skb(sk, nskb); skb = tcp_send_head(sk); + TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; + TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; tcp_insert_write_queue_before(nskb, skb, sk); tcp_advance_send_head(sk, skb); - TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; - TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; @@ -1691,6 +1691,8 @@ void tcp_simple_retransmit(struct sock *sk) int lost = 0; tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; if (skb->len > mss && !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { @@ -1865,6 +1867,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) tcp_for_write_queue_from(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; + if (skb == tcp_send_head(sk)) + break; /* we could do better than to assign each time */ tp->retransmit_skb_hint = skb; tp->retransmit_cnt_hint = packet_cnt; @@ -1932,6 +1936,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) } tcp_for_write_queue_from(skb, sk) { + if (skb == tcp_send_head(sk)) + break; tp->forward_cnt_hint = packet_cnt; tp->forward_skb_hint = skb; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f57a9ba..21706a1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1890,6 +1890,7 @@ static int tcp_v6_init_sock(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + tp->write_queue_rb = RB_ROOT; skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists