lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAGWkznGW+W+x0JCAmJJBYznSAWqnYDPTpe_p=8ubFx8C+V9oNQ@mail.gmail.com>
Date: Fri, 26 Jan 2024 17:49:34 +0800
From: Zhaoyang Huang <huangzhaoyang@...il.com>
To: Matthew Wilcox <willy@...radead.org>
Cc: "zhaoyang.huang" <zhaoyang.huang@...soc.com>, Alexander Viro <viro@...iv.linux.org.uk>, 
	Christian Brauner <brauner@...nel.org>, linux-fsdevel@...r.kernel.org, 
	Andrew Morton <akpm@...ux-foundation.org>, Jens Axboe <axboe@...nel.dk>, Yu Zhao <yuzhao@...gle.com>, 
	Damien Le Moal <dlemoal@...nel.org>, Niklas Cassel <niklas.cassel@....com>, 
	"Martin K . Petersen" <martin.petersen@...cle.com>, Hannes Reinecke <hare@...e.de>, 
	Linus Walleij <linus.walleij@...aro.org>, linux-mm@...ck.org, linux-block@...r.kernel.org, 
	linux-kernel@...r.kernel.org, steve.kang@...soc.com
Subject: Re: [PATCHv3 1/1] block: introduce content activity based ioprio

On Fri, Jan 26, 2024 at 5:36 PM Matthew Wilcox <willy@...radead.org> wrote:
>
> On Fri, Jan 26, 2024 at 05:28:58PM +0800, Zhaoyang Huang wrote:
> > On Fri, Jan 26, 2024 at 4:55 PM Matthew Wilcox <willy@...radeadorg> wrote:
> > >
> > > On Fri, Jan 26, 2024 at 03:59:48PM +0800, Zhaoyang Huang wrote:
> > > > loop more mm and fs guys for more comments
> > >
> > > I agree with everything Damien said.  But also ...
> > ok, I will find a way to solve this problem.
> > >
> > > > > +bool BIO_ADD_FOLIO(struct bio *bio, struct folio *folio, size_t len,
> > > > > +               size_t off)
> > >
> > > You don't add any users of these functions.  It's hard to assess whether
> > > this is the right API when there are no example users.
> > Actually, the code has been tested on ext4 and f2fs by patchv2 on a
> > v6.6 6GB android system where I get the test result posted on the
> > commit message. These APIs is to keep block layer clean and wrap
> > things up for fs.
>
> well, where's patch v2?  i don't see it in my inbox.  i'm not going
> to go hunting around the email lists for it.  this is not good enough.
>
> > > why are BIO_ADD_PAGE and BIO_ADD_FOLIO so very different from each
> > > other?
> > These two API just repeat the same thing that bio_add_page and
> > bio_add_folio do.
>
> what?
>
> here's the patch you sent.  these two functions do wildly different
> things:
>
> +bool BIO_ADD_FOLIO(struct bio *bio, struct folio *folio, size_t len,
> +               size_t off)
> +{
> +       int class, level, hint, activity;
> +
> +       if (len > UINT_MAX || off > UINT_MAX)
> +               return false;
> +
> +       class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
> +       level = IOPRIO_PRIO_LEVEL(bio->bi_ioprio);
> +       hint = IOPRIO_PRIO_HINT(bio->bi_ioprio);
> +       activity = IOPRIO_PRIO_ACTIVITY(bio->bi_ioprio);
> +
> +       activity += (bio->bi_vcnt + 1 <= IOPRIO_NR_ACTIVITY &&
> +                       PageWorkingset(&folio->page)) ? 1 : 0;
> +       if (activity >= bio->bi_vcnt / 2)
> +               class = IOPRIO_CLASS_RT;
> +       else if (activity >= bio->bi_vcnt / 4)
> +               class = max(IOPRIO_PRIO_CLASS(get_current_ioprio()), IOPRIO_CLASS_BE);
> +
> +       bio->bi_ioprio = IOPRIO_PRIO_VALUE_ACTIVITY(class, level, hint, activity);
> +
> +       return bio_add_page(bio, &folio->page, len, off) > 0;
> +}
> +
> +int BIO_ADD_PAGE(struct bio *bio, struct page *page,
> +               unsigned int len, unsigned int offset)
> +{
> +       int class, level, hint, activity;
> +
> +       if (bio_add_page(bio, page, len, offset) > 0) {
> +               class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
> +               level = IOPRIO_PRIO_LEVEL(bio->bi_ioprio);
> +               hint = IOPRIO_PRIO_HINT(bio->bi_ioprio);
> +               activity = IOPRIO_PRIO_ACTIVITY(bio->bi_ioprio);
> +               activity += (bio->bi_vcnt <= IOPRIO_NR_ACTIVITY && PageWorkingset(page)) ? 1 : 0;
> +               bio->bi_ioprio = IOPRIO_PRIO_VALUE_ACTIVITY(class, level, hint, activity);
> +       }
> +
> +       return len;
> +}
>
> did you change one and forget to change the other?
Sorry for missing you in the list. Please find below patchv2 where all
activity calculation is located within _bio_add_page which aims at
avoiding iterating the bio->bvec before submit_bio. This is rejected
by Jens as it introduces page operation in the block layer.

block/Kconfig               |  8 ++++++++
 block/bio.c                 | 10 ++++++++++
 block/blk-mq.c              | 21 +++++++++++++++++++++
 fs/buffer.c                 |  6 ++++++
 include/linux/buffer_head.h |  1 +
 include/uapi/linux/ioprio.h | 20 +++++++++++++++-----
 6 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/block/Kconfig b/block/Kconfig
index f1364d1c0d93..8d6075575eae 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -228,6 +228,14 @@ config BLOCK_HOLDER_DEPRECATED
 config BLK_MQ_STACKING
        bool

+config CONTENT_ACT_BASED_IOPRIO
+       bool "Enable content activity based ioprio"
+       depends on LRU_GEN
+       default y
+       help
+       This item enable the feature of adjust bio's priority by
+       calculating its content's activity.
+
 source "block/Kconfig.iosched"

 endif # BLOCK
diff --git a/block/bio.c b/block/bio.c
index 816d412c06e9..1228e2a4940f 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -24,6 +24,7 @@
 #include "blk.h"
 #include "blk-rq-qos.h"
 #include "blk-cgroup.h"
+#include "blk-ioprio.h"

 #define ALLOC_CACHE_THRESHOLD  16
 #define ALLOC_CACHE_MAX                256
@@ -1069,12 +1070,21 @@ EXPORT_SYMBOL_GPL(bio_add_zone_append_page);
 void __bio_add_page(struct bio *bio, struct page *page,
                unsigned int len, unsigned int off)
 {
+       int class, level, hint, activity;
+
+       class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
+       level = IOPRIO_PRIO_LEVEL(bio->bi_ioprio);
+       hint = IOPRIO_PRIO_HINT(bio->bi_ioprio);
+       activity = IOPRIO_PRIO_ACTIVITY(bio->bi_ioprio);
+
        WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
        WARN_ON_ONCE(bio_full(bio, len));

        bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, off);
        bio->bi_iter.bi_size += len;
        bio->bi_vcnt++;
+       activity += bio_page_if_active(bio, page, IOPRIO_NR_ACTIVITY);
+       bio->bi_ioprio = IOPRIO_PRIO_VALUE_ACTIVITY(class, level,
hint, activity);
 }
 EXPORT_SYMBOL_GPL(__bio_add_page);

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1fafd54dce3c..05cdd3adde94 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2939,6 +2939,26 @@ static inline struct request
*blk_mq_get_cached_request(struct request_queue *q,
        return rq;
 }

+#ifdef CONFIG_CONTENT_ACT_BASED_IOPRIO
+static void bio_set_ioprio(struct bio *bio)
+{
+       int class, level, hint, activity;
+
+       class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
+       level = IOPRIO_PRIO_LEVEL(bio->bi_ioprio);
+       hint = IOPRIO_PRIO_HINT(bio->bi_ioprio);
+       activity = IOPRIO_PRIO_ACTIVITY(bio->bi_ioprio);
+
+       if (activity >= bio->bi_vcnt / 2)
+               class = IOPRIO_CLASS_RT;
+       else if (activity >= bio->bi_vcnt / 4)
+               class = max(IOPRIO_PRIO_CLASS(get_current_ioprio()),
IOPRIO_CLASS_BE);
+
+       bio->bi_ioprio = IOPRIO_PRIO_VALUE_ACTIVITY(class, level,
hint, activity);
+
+       blkcg_set_ioprio(bio);
+}
+#else
 static void bio_set_ioprio(struct bio *bio)
 {
        /* Nobody set ioprio so far? Initialize it based on task's nice value */
@@ -2946,6 +2966,7 @@ static void bio_set_ioprio(struct bio *bio)
                bio->bi_ioprio = get_current_ioprio();
        blkcg_set_ioprio(bio);
 }
+#endif

 /**
  * blk_mq_submit_bio - Create and send a request to block device.
diff --git a/fs/buffer.c b/fs/buffer.c
index 12e9a71c693d..b15bff481706 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2832,6 +2832,12 @@ void submit_bh(blk_opf_t opf, struct buffer_head *bh)
 }
 EXPORT_SYMBOL(submit_bh);

+int bio_page_if_active(struct bio *bio, struct page *page, unsigned
short limit)
+{
+       return (bio->bi_vcnt <= limit && PageWorkingset(page)) ? 1 : 0;
+}
+EXPORT_SYMBOL(bio_page_if_active);
+
 void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
 {
        lock_buffer(bh);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 44e9de51eedf..9a374f5965ec 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -248,6 +248,7 @@ int bh_uptodate_or_lock(struct buffer_head *bh);
 int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait);
 void __bh_read_batch(int nr, struct buffer_head *bhs[],
                     blk_opf_t op_flags, bool force_lock);
+int bio_page_if_active(struct bio *bio, struct page *page, unsigned
short limit);

 /*
  * Generic address_space_operations implementations for buffer_head-backed
diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h
index bee2bdb0eedb..d1c6081e796b 100644
--- a/include/uapi/linux/ioprio.h
+++ b/include/uapi/linux/ioprio.h
@@ -71,12 +71,18 @@ enum {
  * class and level.
  */
 #define IOPRIO_HINT_SHIFT              IOPRIO_LEVEL_NR_BITS
-#define IOPRIO_HINT_NR_BITS            10
+#define IOPRIO_HINT_NR_BITS            3
 #define IOPRIO_NR_HINTS                        (1 << IOPRIO_HINT_NR_BITS)
 #define IOPRIO_HINT_MASK               (IOPRIO_NR_HINTS - 1)
 #define IOPRIO_PRIO_HINT(ioprio)       \
        (((ioprio) >> IOPRIO_HINT_SHIFT) & IOPRIO_HINT_MASK)

+#define IOPRIO_ACTIVITY_SHIFT          (IOPRIO_HINT_NR_BITS +
IOPRIO_LEVEL_NR_BITS)
+#define IOPRIO_ACTIVITY_NR_BITS                7
+#define IOPRIO_NR_ACTIVITY             (1 << IOPRIO_ACTIVITY_NR_BITS)
+#define IOPRIO_ACTIVITY_MASK           (IOPRIO_NR_ACTIVITY - 1)
+#define IOPRIO_PRIO_ACTIVITY(ioprio)   \
+       (((ioprio) >> IOPRIO_ACTIVITY_SHIFT) & IOPRIO_ACTIVITY_MASK)
 /*
  * I/O hints.
  */
@@ -108,20 +114,24 @@ enum {
  * Return an I/O priority value based on a class, a level and a hint.
  */
 static __always_inline __u16 ioprio_value(int prioclass, int priolevel,
-                                         int priohint)
+                                         int priohint, int activity)
 {
        if (IOPRIO_BAD_VALUE(prioclass, IOPRIO_NR_CLASSES) ||
            IOPRIO_BAD_VALUE(priolevel, IOPRIO_NR_LEVELS) ||
-           IOPRIO_BAD_VALUE(priohint, IOPRIO_NR_HINTS))
+           IOPRIO_BAD_VALUE(priohint, IOPRIO_NR_HINTS) ||
+           IOPRIO_BAD_VALUE(activity, IOPRIO_NR_ACTIVITY))
                return IOPRIO_CLASS_INVALID << IOPRIO_CLASS_SHIFT;

        return (prioclass << IOPRIO_CLASS_SHIFT) |
+               (activity << IOPRIO_ACTIVITY_SHIFT) |
                (priohint << IOPRIO_HINT_SHIFT) | priolevel;
 }

 #define IOPRIO_PRIO_VALUE(prioclass, priolevel)                        \
-       ioprio_value(prioclass, priolevel, IOPRIO_HINT_NONE)
+       ioprio_value(prioclass, priolevel, IOPRIO_HINT_NONE, 0)
 #define IOPRIO_PRIO_VALUE_HINT(prioclass, priolevel, priohint) \
-       ioprio_value(prioclass, priolevel, priohint)
+       ioprio_value(prioclass, priolevel, priohint, 0)
+#define IOPRIO_PRIO_VALUE_ACTIVITY(prioclass, priolevel, priohint,
activity)   \
+       ioprio_value(prioclass, priolevel, priohint, activity)

 #endif /* _UAPI_LINUX_IOPRIO_H */


>
> > These white spaces are trimmed by vim, I will change them back in next version.
>
> vim doesn't do that by default.
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ