[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240515125342.1069999-2-haakon.bugge@oracle.com>
Date: Wed, 15 May 2024 14:53:37 +0200
From: Håkon Bugge <haakon.bugge@...cle.com>
To: linux-rdma@...r.kernel.org, linux-kernel@...r.kernel.org,
netdev@...r.kernel.org, rds-devel@....oracle.com
Cc: Jason Gunthorpe <jgg@...pe.ca>, Leon Romanovsky <leon@...nel.org>,
Saeed Mahameed <saeedm@...dia.com>, Tariq Toukan <tariqt@...dia.com>,
"David S . Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Tejun Heo <tj@...nel.org>,
Lai Jiangshan <jiangshanlai@...il.com>,
Allison Henderson <allison.henderson@...cle.com>,
Manjunath Patil <manjunath.b.patil@...cle.com>,
Mark Zhang <markzhang@...dia.com>,
Håkon Bugge <haakon.bugge@...cle.com>,
Chuck Lever <chuck.lever@...cle.com>,
Shiraz Saleem <shiraz.saleem@...el.com>,
Yang Li <yang.lee@...ux.alibaba.com>
Subject: [PATCH v2 1/6] workqueue: Inherit NOIO and NOFS alloc flags
For drivers/modules running inside a
memalloc_{noio,nofs}_{save,restore} region, if a work-queue is
created, we make sure work executed on the work-queue inherits the
same flag(s).
This in order to conditionally enable drivers to work aligned with
block I/O devices. This commit makes sure that any work queued later
on work-queues created during module initialization, when current's
flags has PF_MEMALLOC_{NOIO,NOFS} set, will inherit the same flags.
We do this in order to enable drivers to be used as a network block
I/O device. This in order to support XFS or other file-systems on top
of a raw block device which uses said drivers as the network transport
layer.
Under intense memory pressure, we get memory reclaims. Assume the
file-system reclaims memory, goes to the raw block device, which calls
into said drivers. Now, if regular GFP_KERNEL allocations in the
drivers require reclaims to be fulfilled, we end up in a circular
dependency.
We break this circular dependency by:
1. Force all allocations in the drivers to use GFP_NOIO, by means of a
parenthetic use of memalloc_noio_{save,restore} on all relevant
entry points.
2. Make sure work-queues inherits current->flags
wrt. PF_MEMALLOC_{NOIO,NOFS}, such that work executed on the
work-queue inherits the same flag(s). That is what this commit
contributes with.
Signed-off-by: Håkon Bugge <haakon.bugge@...cle.com>
---
v1 -> v2:
* Added missing hunk in alloc_workqueue()
---
include/linux/workqueue.h | 2 ++
kernel/workqueue.c | 21 +++++++++++++++++++++
2 files changed, 23 insertions(+)
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 158784dd189ab..09ecc692ffcae 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -398,6 +398,8 @@ enum wq_flags {
__WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */
__WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */
__WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */
+ __WQ_NOIO = 1 << 19, /* internal: execute work with NOIO */
+ __WQ_NOFS = 1 << 20, /* internal: execute work with NOFS */
/* BH wq only allows the following flags */
__WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d2dbe099286b9..8eb7562372ce2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -51,6 +51,7 @@
#include <linux/uaccess.h>
#include <linux/sched/isolation.h>
#include <linux/sched/debug.h>
+#include <linux/sched/mm.h>
#include <linux/nmi.h>
#include <linux/kvm_para.h>
#include <linux/delay.h>
@@ -3172,6 +3173,10 @@ __acquires(&pool->lock)
unsigned long work_data;
int lockdep_start_depth, rcu_start_depth;
bool bh_draining = pool->flags & POOL_BH_DRAINING;
+ bool use_noio_allocs = pwq->wq->flags & __WQ_NOIO;
+ bool use_nofs_allocs = pwq->wq->flags & __WQ_NOFS;
+ unsigned long noio_flags;
+ unsigned long nofs_flags;
#ifdef CONFIG_LOCKDEP
/*
* It is permissible to free the struct work_struct from
@@ -3184,6 +3189,12 @@ __acquires(&pool->lock)
lockdep_copy_map(&lockdep_map, &work->lockdep_map);
#endif
+ /* Set inherited alloc flags */
+ if (use_noio_allocs)
+ noio_flags = memalloc_noio_save();
+ if (use_nofs_allocs)
+ nofs_flags = memalloc_nofs_save();
+
/* ensure we're on the correct CPU */
WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
raw_smp_processor_id() != pool->cpu);
@@ -3320,6 +3331,12 @@ __acquires(&pool->lock)
/* must be the last step, see the function comment */
pwq_dec_nr_in_flight(pwq, work_data);
+
+ /* Restore alloc flags */
+ if (use_nofs_allocs)
+ memalloc_nofs_restore(nofs_flags);
+ if (use_noio_allocs)
+ memalloc_noio_restore(noio_flags);
}
/**
@@ -5583,6 +5600,10 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
/* init wq */
wq->flags = flags;
+ if (current->flags & PF_MEMALLOC_NOIO)
+ wq->flags |= __WQ_NOIO;
+ if (current->flags & PF_MEMALLOC_NOFS)
+ wq->flags |= __WQ_NOFS;
wq->max_active = max_active;
wq->min_active = min(max_active, WQ_DFL_MIN_ACTIVE);
wq->saved_max_active = wq->max_active;
--
2.45.0
Powered by blists - more mailing lists