lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 22 Aug 2019 13:24:23 +0100
From:   David Howells <dhowells@...hat.com>
To:     netdev@...r.kernel.org
Cc:     dhowells@...hat.com, linux-afs@...ts.infradead.org,
        linux-kernel@...r.kernel.org
Subject: [PATCH net 8/9] rxrpc: Use shadow refcount for packets in the RxTx
 ring

Use the previously added shadow refcount for packets that are in the Rx/Tx
ring so that the ring itself only ever holds a single ref on the skbuff.

This allows skb_cow_data() to be used by the recvmsg code to make the data
modifyable for in-place decryption without triggering the assertion in
pskb_expand_head:

	BUG_ON(skb_shared(skb));

This *should* be okay as:

 (1) Once rxrpc_input_data() starts attaching the sk_buff to the ring, it
     no longer looks inside the packet (all the parsing was done previously
     and notes were taken in struct rxrpc_skb_priv).

 (2) rxrpc_recvmsg_data() may not run in parallel for a particular call.

 (3) rxrpc_recvmsg_data() cow's the sk_buff the first time it sees it and
     then steps through each pointer from the buffer in order, unpinning as
     it goes.

     Each subpacket is individually and sequentially decrypted in place in
     the sk_buff, hence the need for skb_cow_data().

 (4) No one else can be looking in a packet in the Rx ring once it's there.

The problem was occuring because the softirq handler may be holding a ref
or the ring may be holding multiple refs when skb_cow_data() is called in
rxkad_verify_packet(), and so skb_shared() returns true and
__pskb_pull_tail() dislikes that.  If this occurs, something like the
following report will be generated.

	kernel BUG at net/core/skbuff.c:1463!
	...
	RIP: 0010:pskb_expand_head+0x253/0x2b0
	...
	Call Trace:
	 __pskb_pull_tail+0x49/0x460
	 skb_cow_data+0x6f/0x300
	 rxkad_verify_packet+0x18b/0xb10 [rxrpc]
	 rxrpc_recvmsg_data.isra.11+0x4a8/0xa10 [rxrpc]
	 rxrpc_kernel_recv_data+0x126/0x240 [rxrpc]
	 afs_extract_data+0x51/0x2d0 [kafs]
	 afs_deliver_fs_fetch_data+0x188/0x400 [kafs]
	 afs_deliver_to_call+0xac/0x430 [kafs]
	 afs_wait_for_call_to_complete+0x22f/0x3d0 [kafs]
	 afs_make_call+0x282/0x3f0 [kafs]
	 afs_fs_fetch_data+0x164/0x300 [kafs]
	 afs_fetch_data+0x54/0x130 [kafs]
	 afs_readpages+0x20d/0x340 [kafs]
	 read_pages+0x66/0x180
	 __do_page_cache_readahead+0x188/0x1a0
	 ondemand_readahead+0x17d/0x2e0
	 generic_file_read_iter+0x740/0xc10
	 __vfs_read+0x145/0x1a0
	 vfs_read+0x8c/0x140
	 ksys_read+0x4a/0xb0
	 do_syscall_64+0x43/0xf0
	 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code")
Reported-by: Julian Wollrath <jwollrath@....de>
Signed-off-by: David Howells <dhowells@...hat.com>
---

 net/rxrpc/call_object.c |    2 +-
 net/rxrpc/input.c       |   22 ++++++++++------------
 net/rxrpc/recvmsg.c     |    2 +-
 net/rxrpc/sendmsg.c     |    1 +
 4 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 014548c259ce..830b6152dfa3 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -429,7 +429,7 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call)
 	int i;
 
 	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
-		rxrpc_free_skb(call->rxtx_buffer[i], rxrpc_skb_cleaned);
+		rxrpc_unpin_skb(call->rxtx_buffer[i], rxrpc_skb_cleaned);
 		call->rxtx_buffer[i] = NULL;
 	}
 }
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 31090bdf1fae..660b7eed39b7 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -258,7 +258,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
 		skb = list;
 		list = skb->next;
 		skb_mark_not_on_list(skb);
-		rxrpc_free_skb(skb, rxrpc_skb_freed);
+		rxrpc_unpin_skb(skb, rxrpc_skb_unpin);
 	}
 
 	return rot_last;
@@ -447,6 +447,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
 		return;
 	}
 
+	atomic_set(&sp->nr_ring_pins, 1);
+
 	if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) {
 		unsigned long timo = READ_ONCE(call->next_req_timo);
 		unsigned long now, expect_req_by;
@@ -550,6 +552,12 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
 			ack_serial = serial;
 		}
 
+		/* Each insertion into the rxtx_buffer holds a ring pin.  This
+		 * allows a single ref on the buffer to be shared, thereby
+		 * allowing skb_cow_data() to be used.
+		 */
+		rxrpc_pin_skb(skb, rxrpc_skb_pin);
+
 		/* Queue the packet.  We use a couple of memory barriers here as need
 		 * to make sure that rx_top is perceived to be set after the buffer
 		 * pointer and that the buffer pointer is set after the annotation and
@@ -558,8 +566,6 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
 		 * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window()
 		 * and also rxrpc_fill_out_ack().
 		 */
-		if (!terminal)
-			rxrpc_get_skb(skb, rxrpc_skb_got);
 		call->rxtx_annotations[ix] = annotation;
 		smp_wmb();
 		call->rxtx_buffer[ix] = skb;
@@ -574,14 +580,6 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
 			immediate_ack = true;
 		}
 
-		if (terminal) {
-			/* From this point on, we're not allowed to touch the
-			 * packet any longer as its ref now belongs to the Rx
-			 * ring.
-			 */
-			skb = NULL;
-		}
-
 		if (last) {
 			set_bit(RXRPC_CALL_RX_LAST, &call->flags);
 			if (!ack) {
@@ -620,7 +618,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
 
 unlock:
 	spin_unlock(&call->input_lock);
-	rxrpc_free_skb(skb, rxrpc_skb_freed);
+	rxrpc_unpin_skb(skb, rxrpc_skb_unpin);
 	_leave(" [queued]");
 }
 
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 3b0becb12041..82bb48d96526 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -205,7 +205,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
 	/* Barrier against rxrpc_input_data(). */
 	smp_store_release(&call->rx_hard_ack, hard_ack);
 
-	rxrpc_free_skb(skb, rxrpc_skb_freed);
+	rxrpc_unpin_skb(skb, rxrpc_skb_unpin);
 
 	trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack);
 	if (last) {
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 6a1547b270fe..ba0e2aa268b1 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -175,6 +175,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
 	 */
 	skb->tstamp = ktime_get_real();
 
+	atomic_set(&sp->nr_ring_pins, 1);
 	ix = seq & RXRPC_RXTX_BUFF_MASK;
 	rxrpc_get_skb(skb, rxrpc_skb_got);
 	call->rxtx_annotations[ix] = annotation;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ