include/linux/sockios.h | 6 ++ include/linux/tcp.h | 2 net/ipv4/tcp.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/tcp_ipv4.c | 16 ++++-- 4 files changed, 130 insertions(+), 7 deletions(-) diff --git a/include/linux/sockios.h b/include/linux/sockios.h index 7997a50..f5c3e41 100644 --- a/include/linux/sockios.h +++ b/include/linux/sockios.h @@ -127,6 +127,12 @@ /* hardware time stamping: parameters in linux/net_tstamp.h */ #define SIOCSHWTSTAMP 0x89b0 +#define SIOCGINSEQ 0x89b1 /* get copied_seq */ +#define SIOCGOUTSEQS 0x89b2 /* get seqs for pending tx pkts */ +#define SIOCSOUTSEQ 0x89b3 /* set write_seq */ +#define SIOCPEEKOUTQ 0x89b4 /* peek output queue */ +#define SIOCFORCEOUTBD 0x89b5 /* force output packet boundary */ + /* Device private ioctl calls */ /* diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 531ede8..c0945fe 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -365,6 +365,8 @@ struct tcp_sock { u32 snd_up; /* Urgent pointer */ u8 keepalive_probes; /* num of allowed keep alive probes */ + u8 wseq_set : 1;/* Write sequence set via setsockopt */ + u8 force_outbd : 1;/* force packet boundary on next send */ /* * Options received (usually on last packet, some only on SYN packets). */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46febca..3389827 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -464,12 +464,118 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } EXPORT_SYMBOL(tcp_poll); +static int tcp_get_out_seqs(struct sock *sk, u32 __user *p, int size) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + int pos = 0, cnt = size / sizeof(u32); + + if (pos < cnt && put_user(tp->write_seq, &p[pos++])) + return -EFAULT; + + skb_queue_reverse_walk(&sk->sk_write_queue, skb) { + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + + if (pos < cnt && put_user(tcb->seq, &p[pos++])) + return -EFAULT; + } + return pos * sizeof(u32); +} + +static int tcp_peek_outq(struct sock *sk, void __user *arg, int size) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct iovec iov = { .iov_base = arg, .iov_len = size }; + struct sk_buff *skb; + int copied = 0, err = 0; + int outq, skip; + + lock_sock(sk); + + /* XXX: why doesn't SIOCOUTQ[NSD] account for queued fin? */ + outq = tp->write_seq - tp->snd_una; + skb = skb_peek_tail(&sk->sk_write_queue); + if (outq && skb) + outq -= tcp_hdr(skb)->fin; + + skip = outq - min(size, outq); + + skb_queue_walk(&sk->sk_write_queue, skb) { + int off = 0, todo; + + if (skip) { + off = min_t(int, skip, skb->len); + skip -= off; + } + + if (!(todo = skb->len - off)) + continue; + + if (WARN_ON_ONCE(iov.iov_len < todo)) { + err = -EINVAL; + break; + } + + err = skb_copy_datagram_iovec(skb, off, &iov, todo); + if (err) + break; + copied += todo; + } + + release_sock(sk); + + return err ?: copied; +} + int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) { struct tcp_sock *tp = tcp_sk(sk); int answ; switch (cmd) { + case SIOCGOUTSEQS: { + s32 size; + + if (get_user(size, (s32 __user *)arg)) + return -EFAULT; + if (size < 0) + return -EINVAL; + return tcp_get_out_seqs(sk, (u32 __user *)arg, size); + } + case SIOCSOUTSEQ: { + u32 seq; + + if (get_user(seq, (u32 __user *)arg)) + return -EFAULT; + + lock_sock(sk); + answ = -EISCONN; + if ((sk->sk_socket->state == SS_UNCONNECTED && + sk->sk_state == TCP_CLOSE) || sk->sk_state == TCP_LISTEN) { + tp->write_seq = seq; + tp->wseq_set = true; + answ = 0; + } + release_sock(sk); + return answ; + } + case SIOCPEEKOUTQ: { + u32 size; + + if (get_user(size, (u32 __user *)arg)) + return -EFAULT; + if ((int)size < size) + return -EINVAL; + return tcp_peek_outq(sk, (void __user *)arg, size); + } + case SIOCFORCEOUTBD: + lock_sock(sk); + tp->force_outbd = true; + release_sock(sk); + return 0; + } + + switch (cmd) { case SIOCINQ: if (sk->sk_state == TCP_LISTEN) return -EINVAL; @@ -514,6 +620,9 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) else answ = tp->write_seq - tp->snd_nxt; break; + case SIOCGINSEQ: + answ = tp->copied_seq; + break; default: return -ENOIOCTLCMD; } @@ -965,7 +1074,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, copy = max - skb->len; } - if (copy <= 0) { + if (copy <= 0 || unlikely(tp->force_outbd)) { new_segment: /* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. @@ -979,6 +1088,8 @@ new_segment: if (!skb) goto wait_for_memory; + tp->force_outbd = false; + /* * Check whether we can use HW checksum. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 955b8e6..579234c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -201,7 +201,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* Reset inherited state */ tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; - tp->write_seq = 0; + if (!tp->wseq_set) + tp->write_seq = 0; } if (tcp_death_row.sysctl_tw_recycle && @@ -252,12 +253,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(sk, &rt->dst); - if (!tp->write_seq) + if (!tp->write_seq && !tp->wseq_set) tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, inet->inet_daddr, inet->inet_sport, usin->sin_port); - + tp->wseq_set = false; inet->inet_id = tp->write_seq ^ jiffies; err = tcp_connect(sk); @@ -1252,7 +1253,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (net_ratelimit()) syn_flood_warning(skb); #ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) { + if (sysctl_tcp_syncookies && !tp->wseq_set) { want_cookie = 1; } else #endif @@ -1334,7 +1335,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, tcp_hdr(skb)); - if (want_cookie) { + if (unlikely(tp->wseq_set)) { + isn = tp->write_seq; + tp->wseq_set = false; + } else if (want_cookie) { isn = cookie_v4_init_sequence(sk, skb, &req->mss); req->cookie_ts = tmp_opt.tstamp_ok; } else if (!isn) { @@ -1526,7 +1530,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) } #ifdef CONFIG_SYN_COOKIES - if (!th->syn) + if (!th->syn && !tcp_sk(sk)->wseq_set) sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); #endif return sk;