lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250819114932.597600-7-dev@lankhorst.se>
Date: Tue, 19 Aug 2025 13:49:35 +0200
From: Maarten Lankhorst <dev@...khorst.se>
To: Lucas De Marchi <lucas.demarchi@...el.com>,
	'Thomas Hellström' <thomas.hellstrom@...ux.intel.com>,
	Rodrigo Vivi <rodrigo.vivi@...el.com>,
	David Airlie <airlied@...il.com>,
	Simona Vetter <simona@...ll.ch>,
	Maarten Lankhorst <dev@...khorst.se>,
	Maxime Ripard <mripard@...nel.org>,
	Natalie Vock <natalie.vock@....de>,
	Tejun Heo <tj@...nel.org>,
	Johannes Weiner <hannes@...xchg.org>,
	'Michal Koutný' <mkoutny@...e.com>,
	Michal Hocko <mhocko@...nel.org>,
	Roman Gushchin <roman.gushchin@...ux.dev>,
	Shakeel Butt <shakeel.butt@...ux.dev>,
	Muchun Song <muchun.song@...ux.dev>,
	Andrew Morton <akpm@...ux-foundation.org>,
	David Hildenbrand <david@...hat.com>,
	Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
	"'Liam R . Howlett'" <Liam.Howlett@...cle.com>,
	Vlastimil Babka <vbabka@...e.cz>,
	Mike Rapoport <rppt@...nel.org>,
	Suren Baghdasaryan <surenb@...gle.com>,
	Thomas Zimmermann <tzimmermann@...e.de>
Cc: Michal Hocko <mhocko@...e.com>,
	intel-xe@...ts.freedesktop.org,
	dri-devel@...ts.freedesktop.org,
	linux-kernel@...r.kernel.org,
	cgroups@...r.kernel.org,
	linux-mm@...ck.org
Subject: [RFC 2/3] cgroup/dmem: Implement pinning device memory

Add a function to pin, and to unipn memory and adjust the calculations
in dmem_cgroup_state_evict_valuable().

Signed-off-by: Maarten Lankhorst <dev@...khorst.se>
---
 include/linux/cgroup_dmem.h |  2 ++
 kernel/cgroup/dmem.c        | 57 +++++++++++++++++++++++++++++++++++--
 2 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/include/linux/cgroup_dmem.h b/include/linux/cgroup_dmem.h
index dd4869f1d736e..a981bb692ba22 100644
--- a/include/linux/cgroup_dmem.h
+++ b/include/linux/cgroup_dmem.h
@@ -21,6 +21,8 @@ int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size,
 			   struct dmem_cgroup_pool_state **ret_pool,
 			   struct dmem_cgroup_pool_state **ret_limit_pool);
 void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size);
+int dmem_cgroup_try_pin(struct dmem_cgroup_pool_state *pool, u64 size);
+void dmem_cgroup_unpin(struct dmem_cgroup_pool_state *pool, u64 size);
 bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool,
 				      struct dmem_cgroup_pool_state *test_pool,
 				      bool ignore_low, bool *ret_hit_low);
diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c
index 10b63433f0573..ec8b1ffec78de 100644
--- a/kernel/cgroup/dmem.c
+++ b/kernel/cgroup/dmem.c
@@ -147,6 +147,11 @@ static u64 get_resource_current(struct dmem_cgroup_pool_state *pool)
 	return pool ? page_counter_read(&pool->cnt) : 0;
 }
 
+static u64 get_resource_pinned(struct dmem_cgroup_pool_state *pool)
+{
+	return pool ? page_counter_pinned(&pool->cnt) : 0;
+}
+
 static void reset_all_resource_limits(struct dmem_cgroup_pool_state *rpool)
 {
 	set_resource_min(rpool, 0);
@@ -270,7 +275,7 @@ bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool,
 {
 	struct dmem_cgroup_pool_state *pool = test_pool;
 	struct page_counter *ctest;
-	u64 used, min, low;
+	u64 used, min, low, pinned;
 
 	/* Can always evict from current pool, despite limits */
 	if (limit_pool == test_pool)
@@ -296,16 +301,18 @@ bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool,
 
 	ctest = &test_pool->cnt;
 
+	/* Protection is calculated without pinned memory */
 	dmem_cgroup_calculate_protection(limit_pool, test_pool);
 
 	used = page_counter_read(ctest);
-	min = READ_ONCE(ctest->emin);
+	pinned = page_counter_pinned(ctest);
+	min = READ_ONCE(ctest->emin) + pinned;
 
 	if (used <= min)
 		return false;
 
 	if (!ignore_low) {
-		low = READ_ONCE(ctest->elow);
+		low = READ_ONCE(ctest->elow) + pinned;
 		if (used > low)
 			return true;
 
@@ -641,6 +648,41 @@ int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size,
 }
 EXPORT_SYMBOL_GPL(dmem_cgroup_try_charge);
 
+/**
+ * dmem_cgroup_unpin() - Unpin from a pool.
+ * @pool: Pool to unpin.
+ * @size: Size to unpin.
+ *
+ * Undoes the effects of dmem_cgroup_try_pin.
+ * Must be called with the returned pool as argument,
+ * and same @index and @size.
+ */
+void dmem_cgroup_unpin(struct dmem_cgroup_pool_state *pool, u64 size)
+{
+	if (pool)
+		page_counter_unpin(&pool->cnt, size);
+}
+EXPORT_SYMBOL_GPL(dmem_cgroup_unpin);
+
+/**
+ * dmem_cgroup_try_pin() - Try pinning an existing allocation to a region.
+ * @pool: dmem region to pin
+ * @size: Size (in bytes) to pin.
+ *
+ * This function pins in @pool for a size of @size bytes.
+ *
+ * If the function succeeds, the memory is succesfully accounted as being pinned.
+ * The memory may not be uncharged before unpin is called.
+ *
+ * Return: 0 on success, -EAGAIN on hitting a limit, or a negative errno on failure.
+ */
+int dmem_cgroup_try_pin(struct dmem_cgroup_pool_state *pool, u64 size)
+{
+	return page_counter_try_pin(&pool->cnt, size) ? 0 : -EAGAIN;
+
+}
+EXPORT_SYMBOL_GPL(dmem_cgroup_try_pin);
+
 static int dmem_cgroup_region_capacity_show(struct seq_file *sf, void *v)
 {
 	struct dmem_cgroup_region *region;
@@ -756,6 +798,11 @@ static int dmem_cgroup_region_current_show(struct seq_file *sf, void *v)
 	return dmemcg_limit_show(sf, v, get_resource_current);
 }
 
+static int dmem_cgroup_region_pinned_show(struct seq_file *sf, void *v)
+{
+	return dmemcg_limit_show(sf, v, get_resource_pinned);
+}
+
 static int dmem_cgroup_region_min_show(struct seq_file *sf, void *v)
 {
 	return dmemcg_limit_show(sf, v, get_resource_min);
@@ -799,6 +846,10 @@ static struct cftype files[] = {
 		.name = "current",
 		.seq_show = dmem_cgroup_region_current_show,
 	},
+	{
+		.name = "pinned",
+		.seq_show = dmem_cgroup_region_pinned_show,
+	},
 	{
 		.name = "min",
 		.write = dmem_cgroup_region_min_write,
-- 
2.50.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ