[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210402191638.3249835-4-schatzberg.dan@gmail.com>
Date: Fri, 2 Apr 2021 12:16:34 -0700
From: Dan Schatzberg <schatzberg.dan@...il.com>
To: unlisted-recipients:; (no To-header on input)
Cc: Jens Axboe <axboe@...nel.dk>, Tejun Heo <tj@...nel.org>,
Zefan Li <lizefan.x@...edance.com>,
Johannes Weiner <hannes@...xchg.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Michal Hocko <mhocko@...nel.org>,
Vladimir Davydov <vdavydov.dev@...il.com>,
Hugh Dickins <hughd@...gle.com>,
Shakeel Butt <shakeelb@...gle.com>,
Roman Gushchin <guro@...com>,
Muchun Song <songmuchun@...edance.com>,
Yang Shi <shy828301@...il.com>,
Alex Shi <alex.shi@...ux.alibaba.com>,
Alexander Duyck <alexander.h.duyck@...ux.intel.com>,
Wei Yang <richard.weiyang@...il.com>,
linux-block@...r.kernel.org (open list:BLOCK LAYER),
linux-kernel@...r.kernel.org (open list),
cgroups@...r.kernel.org (open list:CONTROL GROUP (CGROUP)),
linux-mm@...ck.org (open list:MEMORY MANAGEMENT)
Subject: [PATCH 3/3] loop: Charge i/o to mem and blk cg
The current code only associates with the existing blkcg when aio is
used to access the backing file. This patch covers all types of i/o to
the backing file and also associates the memcg so if the backing file is
on tmpfs, memory is charged appropriately.
This patch also exports cgroup_get_e_css and int_active_memcg so it
can be used by the loop module.
Signed-off-by: Dan Schatzberg <schatzberg.dan@...il.com>
Acked-by: Johannes Weiner <hannes@...xchg.org>
---
drivers/block/loop.c | 61 +++++++++++++++++++++++++-------------
drivers/block/loop.h | 3 +-
include/linux/memcontrol.h | 6 ++++
kernel/cgroup/cgroup.c | 1 +
mm/memcontrol.c | 1 +
5 files changed, 51 insertions(+), 21 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 4750b373d4bb..d2759f8a7c2a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -78,6 +78,7 @@
#include <linux/uio.h>
#include <linux/ioprio.h>
#include <linux/blk-cgroup.h>
+#include <linux/sched/mm.h>
#include "loop.h"
@@ -516,8 +517,6 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
{
struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
- if (cmd->css)
- css_put(cmd->css);
cmd->ret = ret;
lo_rw_aio_do_completion(cmd);
}
@@ -578,8 +577,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
cmd->iocb.ki_complete = lo_rw_aio_complete;
cmd->iocb.ki_flags = IOCB_DIRECT;
cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
- if (cmd->css)
- kthread_associate_blkcg(cmd->css);
if (rw == WRITE)
ret = call_write_iter(file, &cmd->iocb, &iter);
@@ -587,7 +584,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
ret = call_read_iter(file, &cmd->iocb, &iter);
lo_rw_aio_do_completion(cmd);
- kthread_associate_blkcg(NULL);
if (ret != -EIOCBQUEUED)
cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
@@ -928,7 +924,7 @@ struct loop_worker {
struct list_head cmd_list;
struct list_head idle_list;
struct loop_device *lo;
- struct cgroup_subsys_state *css;
+ struct cgroup_subsys_state *blkcg_css;
unsigned long last_ran_at;
};
@@ -945,7 +941,7 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
spin_lock_irq(&lo->lo_work_lock);
- if (!cmd->css)
+ if (!cmd->blkcg_css)
goto queue_work;
node = &lo->worker_tree.rb_node;
@@ -953,10 +949,10 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
while (*node) {
parent = *node;
cur_worker = container_of(*node, struct loop_worker, rb_node);
- if (cur_worker->css == cmd->css) {
+ if (cur_worker->blkcg_css == cmd->blkcg_css) {
worker = cur_worker;
break;
- } else if ((long)cur_worker->css < (long)cmd->css) {
+ } else if ((long)cur_worker->blkcg_css < (long)cmd->blkcg_css) {
node = &(*node)->rb_left;
} else {
node = &(*node)->rb_right;
@@ -968,13 +964,18 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
worker = kzalloc(sizeof(struct loop_worker), GFP_NOWAIT | __GFP_NOWARN);
/*
* In the event we cannot allocate a worker, just queue on the
- * rootcg worker
+ * rootcg worker and issue the I/O as the rootcg
*/
- if (!worker)
+ if (!worker) {
+ cmd->blkcg_css = NULL;
+ if (cmd->memcg_css)
+ css_put(cmd->memcg_css);
+ cmd->memcg_css = NULL;
goto queue_work;
+ }
- worker->css = cmd->css;
- css_get(worker->css);
+ worker->blkcg_css = cmd->blkcg_css;
+ css_get(worker->blkcg_css);
INIT_WORK(&worker->work, loop_workfn);
INIT_LIST_HEAD(&worker->cmd_list);
INIT_LIST_HEAD(&worker->idle_list);
@@ -1294,7 +1295,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
idle_list) {
list_del(&worker->idle_list);
rb_erase(&worker->rb_node, &lo->worker_tree);
- css_put(worker->css);
+ css_put(worker->blkcg_css);
kfree(worker);
}
spin_unlock_irq(&lo->lo_work_lock);
@@ -2099,13 +2100,18 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
}
/* always use the first bio's css */
+ cmd->blkcg_css = NULL;
+ cmd->memcg_css = NULL;
#ifdef CONFIG_BLK_CGROUP
- if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
- cmd->css = &bio_blkcg(rq->bio)->css;
- css_get(cmd->css);
- } else
+ if (rq->bio && rq->bio->bi_blkg) {
+ cmd->blkcg_css = &bio_blkcg(rq->bio)->css;
+#ifdef CONFIG_MEMCG
+ cmd->memcg_css =
+ cgroup_get_e_css(cmd->blkcg_css->cgroup,
+ &memory_cgrp_subsys);
+#endif
+ }
#endif
- cmd->css = NULL;
loop_queue_work(lo, cmd);
return BLK_STS_OK;
@@ -2117,13 +2123,28 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
const bool write = op_is_write(req_op(rq));
struct loop_device *lo = rq->q->queuedata;
int ret = 0;
+ struct mem_cgroup *old_memcg = NULL;
if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
ret = -EIO;
goto failed;
}
+ if (cmd->blkcg_css)
+ kthread_associate_blkcg(cmd->blkcg_css);
+ if (cmd->memcg_css)
+ old_memcg = set_active_memcg(
+ mem_cgroup_from_css(cmd->memcg_css));
+
ret = do_req_filebacked(lo, rq);
+
+ if (cmd->blkcg_css)
+ kthread_associate_blkcg(NULL);
+
+ if (cmd->memcg_css) {
+ set_active_memcg(old_memcg);
+ css_put(cmd->memcg_css);
+ }
failed:
/* complete non-aio request */
if (!cmd->use_aio || ret) {
@@ -2202,7 +2223,7 @@ static void loop_free_idle_workers(struct timer_list *timer)
break;
list_del(&worker->idle_list);
rb_erase(&worker->rb_node, &lo->worker_tree);
- css_put(worker->css);
+ css_put(worker->blkcg_css);
kfree(worker);
}
if (!list_empty(&lo->idle_worker_list))
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 9289c1cd6374..cd24a81e00e6 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -76,7 +76,8 @@ struct loop_cmd {
long ret;
struct kiocb iocb;
struct bio_vec *bvec;
- struct cgroup_subsys_state *css;
+ struct cgroup_subsys_state *blkcg_css;
+ struct cgroup_subsys_state *memcg_css;
};
/* Support for loadable transfer modules */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b8b0a802852c..a92500734f90 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1249,6 +1249,12 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
return NULL;
}
+static inline
+struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css)
+{
+ return NULL;
+}
+
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
}
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index e049edd66776..8c84a5374238 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -577,6 +577,7 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
rcu_read_unlock();
return css;
}
+EXPORT_SYMBOL_GPL(cgroup_get_e_css);
static void cgroup_get_live(struct cgroup *cgrp)
{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d2939d6602b3..f12886a85e8b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -78,6 +78,7 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;
/* Active memory cgroup to use from an interrupt context */
DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
+EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
/* Socket memory accounting disabled? */
static bool cgroup_memory_nosocket;
--
2.30.2
Powered by blists - more mailing lists