[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210310200757.32331-1-urezki@gmail.com>
Date: Wed, 10 Mar 2021 21:07:57 +0100
From: "Uladzislau Rezki (Sony)" <urezki@...il.com>
To: LKML <linux-kernel@...r.kernel.org>, RCU <rcu@...r.kernel.org>,
"Paul E . McKenney" <paulmck@...nel.org>
Cc: Michal Hocko <mhocko@...e.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Daniel Axtens <dja@...ens.net>,
Frederic Weisbecker <frederic@...nel.org>,
Neeraj Upadhyay <neeraju@...eaurora.org>,
Joel Fernandes <joel@...lfernandes.org>,
Peter Zijlstra <peterz@...radead.org>,
Thomas Gleixner <tglx@...utronix.de>,
"Theodore Y . Ts'o" <tytso@....edu>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
Uladzislau Rezki <urezki@...il.com>,
Oleksiy Avramchenko <oleksiy.avramchenko@...ymobile.com>,
Zhang Qiang <qiang.zhang@...driver.com>
Subject: [PATCH v2 1/1] kvfree_rcu: Release a page cache under memory pressure
From: Zhang Qiang <qiang.zhang@...driver.com>
Add a drain_page_cache() function to drain a per-cpu page cache.
The reason behind of it is a system can run into a low memory
condition, in that case a page shrinker can ask for its users
to free their caches in order to get extra memory available for
other needs in a system.
When a system hits such condition, a page cache is drained for
all CPUs in a system. Apart of that a page cache work is delayed
with 5 seconds interval until a memory pressure disappears.
Co-developed-by: Uladzislau Rezki (Sony) <urezki@...il.com>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@...il.com>
Signed-off-by: Zqiang <qiang.zhang@...driver.com>
---
kernel/rcu/tree.c | 59 ++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 51 insertions(+), 8 deletions(-)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2c9cf4df942c..46b8a98ca077 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3163,7 +3163,7 @@ struct kfree_rcu_cpu {
bool initialized;
int count;
- struct work_struct page_cache_work;
+ struct delayed_work page_cache_work;
atomic_t work_in_progress;
struct hrtimer hrtimer;
@@ -3175,6 +3175,17 @@ static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc) = {
.lock = __RAW_SPIN_LOCK_UNLOCKED(krc.lock),
};
+// A page shrinker can ask for freeing extra pages
+// to get them available for other needs in a system.
+// Usually it happens under low memory condition, in
+// that case hold on a bit with page cache filling.
+static unsigned long backoff_page_cache_fill;
+
+// 5 seconds delay. That is long enough to reduce
+// an interfering and racing with a shrinker where
+// the cache is drained.
+#define PAGE_CACHE_FILL_DELAY (5 * HZ)
+
static __always_inline void
debug_rcu_bhead_unqueue(struct kvfree_rcu_bulk_data *bhead)
{
@@ -3229,6 +3240,26 @@ put_cached_bnode(struct kfree_rcu_cpu *krcp,
}
+static int
+drain_page_cache(struct kfree_rcu_cpu *krcp)
+{
+ unsigned long flags;
+ struct llist_node *page_list, *pos, *n;
+ int freed = 0;
+
+ raw_spin_lock_irqsave(&krcp->lock, flags);
+ page_list = llist_del_all(&krcp->bkvcache);
+ krcp->nr_bkv_objs = 0;
+ raw_spin_unlock_irqrestore(&krcp->lock, flags);
+
+ llist_for_each_safe(pos, n, page_list) {
+ free_page((unsigned long)pos);
+ freed++;
+ }
+
+ return freed;
+}
+
/*
* This function is invoked in workqueue context after a grace period.
* It frees all the objects queued on ->bhead_free or ->head_free.
@@ -3419,7 +3450,7 @@ schedule_page_work_fn(struct hrtimer *t)
struct kfree_rcu_cpu *krcp =
container_of(t, struct kfree_rcu_cpu, hrtimer);
- queue_work(system_highpri_wq, &krcp->page_cache_work);
+ queue_delayed_work(system_highpri_wq, &krcp->page_cache_work, 0);
return HRTIMER_NORESTART;
}
@@ -3428,7 +3459,7 @@ static void fill_page_cache_func(struct work_struct *work)
struct kvfree_rcu_bulk_data *bnode;
struct kfree_rcu_cpu *krcp =
container_of(work, struct kfree_rcu_cpu,
- page_cache_work);
+ page_cache_work.work);
unsigned long flags;
bool pushed;
int i;
@@ -3457,10 +3488,14 @@ run_page_cache_worker(struct kfree_rcu_cpu *krcp)
{
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
!atomic_xchg(&krcp->work_in_progress, 1)) {
- hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
- krcp->hrtimer.function = schedule_page_work_fn;
- hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
+ if (xchg(&backoff_page_cache_fill, 0UL)) {
+ queue_delayed_work(system_wq,
+ &krcp->page_cache_work, PAGE_CACHE_FILL_DELAY);
+ } else {
+ hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ krcp->hrtimer.function = schedule_page_work_fn;
+ hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
+ }
}
}
@@ -3612,14 +3647,20 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
int cpu;
unsigned long count = 0;
+ unsigned long flags;
/* Snapshot count of all CPUs */
for_each_possible_cpu(cpu) {
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
count += READ_ONCE(krcp->count);
+
+ raw_spin_lock_irqsave(&krcp->lock, flags);
+ count += krcp->nr_bkv_objs;
+ raw_spin_unlock_irqrestore(&krcp->lock, flags);
}
+ WRITE_ONCE(backoff_page_cache_fill, 1);
return count;
}
@@ -3634,6 +3675,8 @@ kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
count = krcp->count;
+ count += drain_page_cache(krcp);
+
raw_spin_lock_irqsave(&krcp->lock, flags);
if (krcp->monitor_todo)
kfree_rcu_drain_unlock(krcp, flags);
@@ -4608,7 +4651,7 @@ static void __init kfree_rcu_batch_init(void)
}
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
- INIT_WORK(&krcp->page_cache_work, fill_page_cache_func);
+ INIT_DELAYED_WORK(&krcp->page_cache_work, fill_page_cache_func);
krcp->initialized = true;
}
if (register_shrinker(&kfree_rcu_shrinker))
--
2.20.1
Powered by blists - more mailing lists