lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 12 Nov 2019 09:32:10 -0800
From:   "Jonathan Lemon" <jonathan.lemon@...il.com>
To:     "Alexei Starovoitov" <ast@...com>
Cc:     "Jesper Dangaard Brouer" <brouer@...hat.com>,
        netdev@...r.kernel.org, davem@...emloft.net,
        "Kernel Team" <Kernel-team@...com>, ilias.apalodimas@...aro.org
Subject: Re: [net-next PATCH] page_pool: do not release pool until inflight ==
 0.



On 12 Nov 2019, at 9:23, Alexei Starovoitov wrote:

> On 11/12/19 8:48 AM, Jesper Dangaard Brouer wrote:
>>> The trace_page_pool_state_release() does not dereference pool, it 
>>> just
>>> reports the pointer value, so there shouldn't be any use-after-free.
>> In the tracepoint we can still dereference the pool object pointer.
>> This is made easier via using bpftrace for example see[1] (and with 
>> BTF
>> this will become more common to do so).
>
> bpf tracing progs cannot assume that the pointer is valid.
> The program can remember a kernel pointer in a map and then
> access it days later.
> Like kretprobe on kfree_skb(). The skb is freed. 100% use-after-free.
> Such bpf program is broken and won't be reading meaningful values,
> but it won't crash the kernel.
>
> On the other side we should not be passing pointers to freed objects
> into tracepoints. That just wrong.
> May be simply move that questionable tracepoint?

Yes, move and simplify it.  I believe this patch should resolve the 
issue,
it just reports pages entering/exiting the pool, without trying to 
access
the counters - the counters are reported through the inflight 
tracepoint.
-- 
Jonathan

diff --git a/include/trace/events/page_pool.h 
b/include/trace/events/page_pool.h
index 47b5ee880aa9..0adf9aed9f5b 100644
--- a/include/trace/events/page_pool.h
+++ b/include/trace/events/page_pool.h
@@ -35,50 +35,46 @@ TRACE_EVENT(page_pool_inflight,
  	  __entry->pool, __entry->inflight, __entry->hold, __entry->release)
  );

-TRACE_EVENT(page_pool_state_release,
+TRACE_EVENT(page_pool_page_release,

  	TP_PROTO(const struct page_pool *pool,
-		 const struct page *page, u32 release),
+		 const struct page *page)

-	TP_ARGS(pool, page, release),
+	TP_ARGS(pool, page),

  	TP_STRUCT__entry(
  		__field(const struct page_pool *,	pool)
  		__field(const struct page *,		page)
-		__field(u32,				release)
  	),

  	TP_fast_assign(
  		__entry->pool		= pool;
  		__entry->page		= page;
-		__entry->release	= release;
  	),

-	TP_printk("page_pool=%p page=%p release=%u",
-		  __entry->pool, __entry->page, __entry->release)
+	TP_printk("page_pool=%p page=%p",
+		  __entry->pool, __entry->page)
  );

-TRACE_EVENT(page_pool_state_hold,
+TRACE_EVENT(page_pool_page_hold,

  	TP_PROTO(const struct page_pool *pool,
-		 const struct page *page, u32 hold),
+		 const struct page *page),

-	TP_ARGS(pool, page, hold),
+	TP_ARGS(pool, page),

  	TP_STRUCT__entry(
  		__field(const struct page_pool *,	pool)
  		__field(const struct page *,		page)
-		__field(u32,				hold)
  	),

  	TP_fast_assign(
  		__entry->pool	= pool;
  		__entry->page	= page;
-		__entry->hold	= hold;
  	),

-	TP_printk("page_pool=%p page=%p hold=%u",
-		  __entry->pool, __entry->page, __entry->hold)
+	TP_printk("page_pool=%p page=%p",
+		  __entry->pool, __entry->page)
  );

  #endif /* _TRACE_PAGE_POOL_H */
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index bfe96326335d..05b93ea67ac5 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -163,7 +163,7 @@ static struct page 
*__page_pool_alloc_pages_slow(struct page_pool *pool,
  	/* Track how many pages are held 'in-flight' */
  	pool->pages_state_hold_cnt++;

-	trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
+	trace_page_pool_page_hold(pool, page);

  	/* When page just alloc'ed is should/must have refcnt 1. */
  	return page;
@@ -222,9 +223,11 @@ static void __page_pool_clean_page(struct page_pool 
*pool,
  			     DMA_ATTR_SKIP_CPU_SYNC);
  	page->dma_addr = 0;
  skip_dma_unmap:
+	trace_page_pool_page_release(pool, page);
+	/* This may be the last page returned, releasing the pool, so
+	 * it is not safe to reference pool afterwards.
+	 */
  	atomic_inc(&pool->pages_state_release_cnt);
-	trace_page_pool_state_release(pool, page,
-			      atomic_read(&pool->pages_state_release_cnt));
  }

  /* unmap the page and clean our state */

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ