lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20090120083726.GA13806@ff.dom.local>
Date:	Tue, 20 Jan 2009 08:37:26 +0000
From:	Jarek Poplawski <jarkao2@...il.com>
To:	David Miller <davem@...emloft.net>
Cc:	herbert@...dor.apana.org.au, w@....eu, zbr@...emap.net,
	dada1@...mosbay.com, ben@...s.com, mingo@...e.hu,
	linux-kernel@...r.kernel.org, netdev@...r.kernel.org,
	jens.axboe@...cle.com
Subject: Re: [PATCH] tcp: splice as many packets as possible at once

On Thu, Jan 15, 2009 at 03:26:08PM -0800, David Miller wrote:
...
> net: Fix data corruption when splicing from sockets.
> 
> From: Jarek Poplawski <jarkao2@...il.com>
> 
> The trick in socket splicing where we try to convert the skb->data
> into a page based reference using virt_to_page() does not work so
> well.
...
> Signed-off-by: David S. Miller <davem@...emloft.net>
> 
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 65eac77..56272ac 100644
...

Here is a tiny upgrade to save some memory by reusing a page for more
chunks if possible, which I think could be considered, after the
testing of the main patch is finished. (There could be also added an
additional freeing of this cached page before socket destruction,
maybe in tcp_splice_read(), if somebody finds good place.)

Thanks,
Jarek P.

---

 include/net/sock.h  |    4 ++++
 net/core/skbuff.c   |   32 ++++++++++++++++++++++++++------
 net/core/sock.c     |    2 ++
 net/ipv4/tcp_ipv4.c |    8 ++++++++
 4 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 5a3a151..4ded741 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -190,6 +190,8 @@ struct sock_common {
   *	@sk_user_data: RPC layer private data
   *	@sk_sndmsg_page: cached page for sendmsg
   *	@sk_sndmsg_off: cached offset for sendmsg
+  *	@sk_splice_page: cached page for splice
+  *	@sk_splice_off: cached offset for splice
   *	@sk_send_head: front of stuff to transmit
   *	@sk_security: used by security modules
   *	@sk_mark: generic packet mark
@@ -279,6 +281,8 @@ struct sock {
 	struct page		*sk_sndmsg_page;
 	struct sk_buff		*sk_send_head;
 	__u32			sk_sndmsg_off;
+	struct page		*sk_splice_page;
+	__u32			sk_splice_off;
 	int			sk_write_pending;
 #ifdef CONFIG_SECURITY
 	void			*sk_security;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 56272ac..74adc79 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1334,13 +1334,33 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
 }
 
 static inline struct page *linear_to_page(struct page *page, unsigned int len,
-					  unsigned int offset)
+					  unsigned int *offset,
+					  struct sk_buff *skb)
 {
-	struct page *p = alloc_pages(GFP_KERNEL, 0);
+	struct sock *sk = skb->sk;
+	struct page *p = sk->sk_splice_page;
+	unsigned int off;
 
-	if (!p)
-		return NULL;
-	memcpy(page_address(p) + offset, page_address(page) + offset, len);
+	if (!p) {
+new_page:
+		p = sk->sk_splice_page = alloc_pages(sk->sk_allocation, 0);
+		if (!p)
+			return NULL;
+
+		off = sk->sk_splice_off = 0;
+		/* hold this page until it's full or unneeded */
+		get_page(p);
+	} else {
+		off = sk->sk_splice_off;
+		if (off + len > PAGE_SIZE) {
+			put_page(p);
+			goto new_page;
+		}
+	}
+
+	memcpy(page_address(p) + off, page_address(page) + *offset, len);
+	sk->sk_splice_off += len;
+	*offset = off;
 
 	return p;
 }
@@ -1356,7 +1376,7 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
 		return 1;
 
 	if (linear) {
-		page = linear_to_page(page, len, offset);
+		page = linear_to_page(page, len, &offset, skb);
 		if (!page)
 			return 1;
 	} else
diff --git a/net/core/sock.c b/net/core/sock.c
index f3a0d08..6b258a9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1732,6 +1732,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	sk->sk_sndmsg_page	=	NULL;
 	sk->sk_sndmsg_off	=	0;
+	sk->sk_splice_page	=	NULL;
+	sk->sk_splice_off	=	0;
 
 	sk->sk_peercred.pid 	=	0;
 	sk->sk_peercred.uid	=	-1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 19d7b42..cf3d367 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1848,6 +1848,14 @@ void tcp_v4_destroy_sock(struct sock *sk)
 		sk->sk_sndmsg_page = NULL;
 	}
 
+	/*
+	 * If splice cached page exists, toss it.
+	 */
+	if (sk->sk_splice_page) {
+		__free_page(sk->sk_splice_page);
+		sk->sk_splice_page = NULL;
+	}
+
 	percpu_counter_dec(&tcp_sockets_allocated);
 }
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ