[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <A370C7C4-FC02-4FC7-B215-585F6909A72A@gmail.com>
Date: Fri, 18 Oct 2019 16:37:34 -0700
From: "Jonathan Lemon" <jonathan.lemon@...il.com>
To: "Saeed Mahameed" <saeedm@...lanox.com>
Cc: ilias.apalodimas@...aro.org, "Tariq Toukan" <tariqt@...lanox.com>,
brouer@...hat.com, kernel-team@...com, netdev@...r.kernel.org
Subject: Re: [PATCH 08/10 net-next] page_pool: Add statistics
On 18 Oct 2019, at 14:29, Saeed Mahameed wrote:
> On Wed, 2019-10-16 at 15:50 -0700, Jonathan Lemon wrote:
>> Add statistics to the page pool, providing visibility into its
>> operation.
>>
>> Callers can provide a location where the stats are stored, otherwise
>> the page pool will allocate a statistic area.
>>
>> Signed-off-by: Jonathan Lemon <jonathan.lemon@...il.com>
>> ---
>> include/net/page_pool.h | 21 +++++++++++++---
>> net/core/page_pool.c | 55 +++++++++++++++++++++++++++++++++++--
>> ----
>> 2 files changed, 65 insertions(+), 11 deletions(-)
>>
>> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
>> index fc340db42f9a..4f383522b141 100644
>> --- a/include/net/page_pool.h
>> +++ b/include/net/page_pool.h
>> @@ -34,8 +34,11 @@
>> #include <linux/ptr_ring.h>
>> #include <linux/dma-direction.h>
>>
>> -#define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap
>> */
>> -#define PP_FLAG_ALL PP_FLAG_DMA_MAP
>> +#define PP_FLAG_DMA_MAP BIT(0) /* page_pool does the
>> DMA map/unmap */
>> +#define PP_FLAG_ALL (PP_FLAG_DMA_MAP)
>> +
>> +/* internal flags, not expoed to user */
>> +#define PP_FLAG_INTERNAL_STATS BIT(8)
>>
>> /*
>> * Fast allocation side cache array/stack
>> @@ -57,6 +60,17 @@
>> #define PP_ALLOC_POOL_DEFAULT 1024
>> #define PP_ALLOC_POOL_LIMIT 32768
>>
>> +struct page_pool_stats {
>> + u64 cache_hit;
>> + u64 cache_full;
>> + u64 cache_empty;
>> + u64 ring_produce;
>> + u64 ring_consume;
>> + u64 ring_return;
>> + u64 flush;
>> + u64 node_change;
>> +};
>> +
>> struct page_pool_params {
>> unsigned int flags;
>> unsigned int order;
>> @@ -65,6 +79,7 @@ struct page_pool_params {
>> int nid; /* Numa node id to allocate from pages
>> from */
>> enum dma_data_direction dma_dir; /* DMA mapping direction */
>> struct device *dev; /* device, for DMA pre-mapping purposes
>> */
>> + struct page_pool_stats *stats; /* pool stats stored externally
>> */
>> };
>>
>> struct page_pool {
>> @@ -230,8 +245,8 @@ static inline bool page_pool_put(struct page_pool
>> *pool)
>> static inline void page_pool_update_nid(struct page_pool *pool, int
>> new_nid)
>> {
>> if (unlikely(pool->p.nid != new_nid)) {
>> - /* TODO: Add statistics/trace */
>> pool->p.nid = new_nid;
>> + pool->p.stats->node_change++;
>> }
>> }
>> #endif /* _NET_PAGE_POOL_H */
>> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
>> index f8fedecddb6f..ea6202813584 100644
>> --- a/net/core/page_pool.c
>> +++ b/net/core/page_pool.c
>> @@ -20,9 +20,10 @@
>>
>> static int page_pool_init(struct page_pool *pool)
>> {
>> + int size;
>>
>> /* Validate only known flags were used */
>> - if (pool->p.flags & ~(PP_FLAG_ALL))
>> + if (pool->p.flags & ~PP_FLAG_ALL)
>> return -EINVAL;
>>
>> if (!pool->p.pool_size)
>> @@ -40,8 +41,16 @@ static int page_pool_init(struct page_pool *pool)
>> (pool->p.dma_dir != DMA_BIDIRECTIONAL))
>> return -EINVAL;
>>
>> + if (!pool->p.stats) {
>> + size = sizeof(struct page_pool_stats);
>> + pool->p.stats = kzalloc_node(size, GFP_KERNEL, pool-
>>> p.nid);
>> + if (!pool->p.stats)
>> + return -ENOMEM;
>> + pool->p.flags |= PP_FLAG_INTERNAL_STATS;
>> + }
>> +
>> if (ptr_ring_init(&pool->ring, pool->p.pool_size, GFP_KERNEL) <
>> 0)
>> - return -ENOMEM;
>> + goto fail;
>>
>> atomic_set(&pool->pages_state_release_cnt, 0);
>>
>> @@ -52,6 +61,12 @@ static int page_pool_init(struct page_pool *pool)
>> get_device(pool->p.dev);
>>
>> return 0;
>> +
>> +fail:
>> + if (pool->p.flags & PP_FLAG_INTERNAL_STATS)
>> + kfree(pool->p.stats);
>> +
>> + return -ENOMEM;
>> }
>>
>> struct page_pool *page_pool_create(const struct page_pool_params
>> *params)
>> @@ -98,9 +113,11 @@ static struct page *__page_pool_get_cached(struct
>> page_pool *pool)
>> if (likely(in_serving_softirq())) {
>> if (likely(pool->alloc_count)) {
>> /* Fast-path */
>> + pool->p.stats->cache_hit++;
>> page = pool->alloc_cache[--pool->alloc_count];
>> return page;
>> }
>> + pool->p.stats->cache_empty++;
>
> this is problematic for 32bit SMP archs, you need to use
> u64_stats_sync API.
> in mlx5 we only support 64bits, unlike page cache which must be
> protected here.
Oooh, hmm.
I think Apple had the right idea and discarded 32-bits
along with the iPhone 5.
Tempted to just make stats a NOP on 32-bit machines.
--
Jonathan
>
>> refill = true;
>> }
>>
>> @@ -113,10 +130,13 @@ static struct page
>> *__page_pool_get_cached(struct page_pool *pool)
>> */
>> spin_lock(&r->consumer_lock);
>> page = __ptr_ring_consume(r);
>> - if (refill)
>> + if (refill) {
>> pool->alloc_count = __ptr_ring_consume_batched(r,
>> pool-
>>> alloc_cache,
>> PP_ALLOC_CACHE_
>> REFILL);
>> + pool->p.stats->ring_consume += pool->alloc_count;
>> + }
>> + pool->p.stats->ring_consume += !!page;
>> spin_unlock(&r->consumer_lock);
>> return page;
>> }
>> @@ -266,15 +286,23 @@ static void __page_pool_return_page(struct
>> page_pool *pool, struct page *page)
>> static bool __page_pool_recycle_into_ring(struct page_pool *pool,
>> struct page *page)
>> {
>> + struct ptr_ring *r = &pool->ring;
>> int ret;
>>
>> - /* BH protection not needed if current is serving softirq */
>> if (in_serving_softirq())
>> - ret = ptr_ring_produce(&pool->ring, page);
>> + spin_lock(&r->producer_lock);
>> else
>> - ret = ptr_ring_produce_bh(&pool->ring, page);
>> + spin_lock_bh(&r->producer_lock);
>>
>> - return (ret == 0) ? true : false;
>> + ret = __ptr_ring_produce(r, page);
>> + pool->p.stats->ring_produce++;
>> +
>> + if (in_serving_softirq())
>> + spin_unlock(&r->producer_lock);
>> + else
>> + spin_unlock_bh(&r->producer_lock);
>> +
>> + return ret == 0;
>> }
>>
>> /* Only allow direct recycling in special circumstances, into the
>> @@ -285,8 +313,10 @@ static bool __page_pool_recycle_into_ring(struct
>> page_pool *pool,
>> static bool __page_pool_recycle_into_cache(struct page *page,
>> struct page_pool *pool)
>> {
>> - if (unlikely(pool->alloc_count == pool->p.cache_size))
>> + if (unlikely(pool->alloc_count == pool->p.cache_size)) {
>> + pool->p.stats->cache_full++;
>> return false;
>> + }
>>
>> /* Caller MUST have verified/know (page_ref_count(page) == 1)
>> */
>> pool->alloc_cache[pool->alloc_count++] = page;
>> @@ -343,6 +373,7 @@ EXPORT_SYMBOL(__page_pool_put_page);
>> static void __page_pool_empty_ring(struct page_pool *pool)
>> {
>> struct page *page;
>> + int count = 0;
>>
>> /* Empty recycle ring */
>> while ((page = ptr_ring_consume_bh(&pool->ring))) {
>> @@ -351,8 +382,11 @@ static void __page_pool_empty_ring(struct
>> page_pool *pool)
>> pr_crit("%s() page_pool refcnt %d violation\n",
>> __func__, page_ref_count(page));
>>
>> + count++;
>> __page_pool_return_page(pool, page);
>> }
>> +
>> + pool->p.stats->ring_return += count;
>> }
>>
>> static void __warn_in_flight(struct page_pool *pool)
>> @@ -381,6 +415,9 @@ void __page_pool_free(struct page_pool *pool)
>> if (!__page_pool_safe_to_destroy(pool))
>> __warn_in_flight(pool);
>>
>> + if (pool->p.flags & PP_FLAG_INTERNAL_STATS)
>> + kfree(pool->p.stats);
>> +
>> ptr_ring_cleanup(&pool->ring, NULL);
>>
>> if (pool->p.flags & PP_FLAG_DMA_MAP)
>> @@ -394,6 +431,8 @@ static void page_pool_flush(struct page_pool
>> *pool)
>> {
>> struct page *page;
>>
>> + pool->p.stats->flush++;
>> +
>> /* Empty alloc cache, assume caller made sure this is
>> * no-longer in use, and page_pool_alloc_pages() cannot be
>> * called concurrently.
Powered by blists - more mailing lists