[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200305221753.GA66450@google.com>
Date: Thu, 5 Mar 2020 17:17:53 -0500
From: Joel Fernandes <joel@...lfernandes.org>
To: linux-kernel@...r.kernel.org
Cc: urezki@...il.com, Davidlohr Bueso <dave@...olabs.net>,
Josh Triplett <josh@...htriplett.org>,
Lai Jiangshan <jiangshanlai@...il.com>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
"Paul E. McKenney" <paulmck@...nel.org>, rcu@...r.kernel.org,
Steven Rostedt <rostedt@...dmis.org>
Subject: Re: [PATCH linus/master 2/2] rcu/tree: Add a shrinker to prevent OOM
due to kfree_rcu() batching
On Thu, Mar 05, 2020 at 05:13:23PM -0500, Joel Fernandes (Google) wrote:
> To reduce grace periods and improve kfree() performance, we have done
> batching recently dramatically bringing down the number of grace periods
> while giving us the ability to use kfree_bulk() for efficient kfree'ing.
>
> However, this has increased the likelihood of OOM condition under heavy
> kfree_rcu() flood on small memory systems. This patch introduces a
> shrinker which starts grace periods right away if the system is under
> memory pressure due to existence of objects that have still not started
> a grace period.
>
> With this patch, I do not observe an OOM anymore on a system with 512MB
> RAM and 8 CPUs, with the following rcuperf options:
>
> rcuperf.kfree_loops=20000 rcuperf.kfree_alloc_num=8000
> rcuperf.kfree_rcu_test=1 rcuperf.kfree_mult=2
Paul,
I may have to rebase this patch on top of Vlad's kfree_bulk() work. But let
us discuss patch and I can rebase it and repost it once patch looks Ok to
you. (The kfree_bulk() work should not affect the patch).
thanks,
- Joel
>
> NOTE:
> On systems with no memory pressure, the patch has no effect as intended.
>
> Cc: urezki@...il.com
> Signed-off-by: Joel Fernandes (Google) <joel@...lfernandes.org>
>
> ---
> kernel/rcu/tree.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 58 insertions(+)
>
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index d91c9156fab2e..28ec35e15529d 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -2723,6 +2723,8 @@ struct kfree_rcu_cpu {
> struct delayed_work monitor_work;
> bool monitor_todo;
> bool initialized;
> + // Number of objects for which GP not started
> + int count;
> };
>
> static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
> @@ -2791,6 +2793,7 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
>
> krwp->head_free = krcp->head;
> krcp->head = NULL;
> + krcp->count = 0;
> INIT_RCU_WORK(&krwp->rcu_work, kfree_rcu_work);
> queue_rcu_work(system_wq, &krwp->rcu_work);
> return true;
> @@ -2864,6 +2867,7 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
> head->func = func;
> head->next = krcp->head;
> krcp->head = head;
> + krcp->count++;
>
> // Set timer to drain after KFREE_DRAIN_JIFFIES.
> if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
> @@ -2879,6 +2883,58 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
> }
> EXPORT_SYMBOL_GPL(kfree_call_rcu);
>
> +static unsigned long
> +kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + int cpu;
> + unsigned long flags, count = 0;
> +
> + /* Snapshot count of all CPUs */
> + for_each_online_cpu(cpu) {
> + struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
> +
> + spin_lock_irqsave(&krcp->lock, flags);
> + count += krcp->count;
> + spin_unlock_irqrestore(&krcp->lock, flags);
> + }
> +
> + return count;
> +}
> +
> +static unsigned long
> +kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + int cpu, freed = 0;
> + unsigned long flags;
> +
> + for_each_online_cpu(cpu) {
> + int count;
> + struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
> +
> + count = krcp->count;
> + spin_lock_irqsave(&krcp->lock, flags);
> + if (krcp->monitor_todo)
> + kfree_rcu_drain_unlock(krcp, flags);
> + else
> + spin_unlock_irqrestore(&krcp->lock, flags);
> +
> + sc->nr_to_scan -= count;
> + freed += count;
> +
> + if (sc->nr_to_scan <= 0)
> + break;
> + }
> +
> + return freed;
> +}
> +
> +static struct shrinker kfree_rcu_shrinker = {
> + .count_objects = kfree_rcu_shrink_count,
> + .scan_objects = kfree_rcu_shrink_scan,
> + .batch = 0,
> + .seeks = DEFAULT_SEEKS,
> +};
> +
> void __init kfree_rcu_scheduler_running(void)
> {
> int cpu;
> @@ -3774,6 +3830,8 @@ static void __init kfree_rcu_batch_init(void)
> INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
> krcp->initialized = true;
> }
> + if (register_shrinker(&kfree_rcu_shrinker))
> + pr_err("Failed to register kfree_rcu() shrinker!\n");
> }
>
> void __init rcu_init(void)
> --
> 2.25.0.265.gbab2e86ba0-goog
>
Powered by blists - more mailing lists