[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <02ff6c8e2199487b9d43ad2aa1fd813a@honor.com>
Date: Wed, 18 Sep 2024 06:19:28 +0000
From: gaoxu <gaoxu2@...or.com>
To: Barry Song <21cnbao@...il.com>, "akpm@...ux-foundation.org"
<akpm@...ux-foundation.org>, "linux-mm@...ck.org" <linux-mm@...ck.org>
CC: "mhocko@...e.com" <mhocko@...e.com>, fengbaopeng <fengbaopeng@...or.com>,
"hailong.liu@...o.com" <hailong.liu@...o.com>, "kaleshsingh@...gle.com"
<kaleshsingh@...gle.com>, "linux-kernel@...r.kernel.org"
<linux-kernel@...r.kernel.org>, "lokeshgidra@...gle.com"
<lokeshgidra@...gle.com>, "ngeoffray@...gle.com" <ngeoffray@...gle.com>,
"shli@...com" <shli@...com>, "surenb@...gle.com" <surenb@...gle.com>,
yipengxiang <yipengxiang@...or.com>, "david@...hat.com" <david@...hat.com>,
"yuzhao@...gle.com" <yuzhao@...gle.com>, "minchan@...nel.org"
<minchan@...nel.org>, Barry Song <v-songbaohua@...o.com>
Subject: 回复: [PATCH RFC] mm: mglru: provide a separate list for lazyfree anon folios
>
> From: Barry Song <v-songbaohua@...o.com>
>
> This follows up on the discussion regarding Gaoxu's work[1]. It's
> unclear if there's still interest in implementing a separate LRU
> list for lazyfree folios, but I decided to explore it out of
> curiosity.
>
> According to Lokesh, MADV_FREE'd anon folios are expected to be
> released earlier than file folios. One option, as implemented
> by Gao Xu, is to place lazyfree anon folios at the tail of the
> file's `min_seq` generation. However, this approach results in
> lazyfree folios being released in a LIFO manner, which conflicts
> with LRU behavior, as noted by Michal.
>
> To address this, this patch proposes maintaining a separate list
> for lazyfree anon folios while keeping them classified under the
> "file" LRU type to minimize code changes. These lazyfree anon
> folios will still be counted as file folios and share the same
> generation with regular files. In the eviction path, the lazyfree
> list will be prioritized for scanning before the actual file
> LRU list.
It seems like a very feasible solution. I will conduct comparative tests
based on this patch and synchronize the test results (it will take some time);
Thanks to Barry for providing the patch!
>
> [1]
> https://lore.kernel.org/linux-mm/f29f64e29c08427b95e3df30a5770056@honor
> .com/
>
> Signed-off-by: Barry Song <v-songbaohua@...o.com>
> ---
> include/linux/mm_inline.h | 5 +-
> include/linux/mmzone.h | 2 +-
> mm/vmscan.c | 97 +++++++++++++++++++++++----------------
> 3 files changed, 61 insertions(+), 43 deletions(-)
>
> diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
> index f4fe593c1400..118d70ed3120 100644
> --- a/include/linux/mm_inline.h
> +++ b/include/linux/mm_inline.h
> @@ -225,6 +225,7 @@ static inline bool lru_gen_add_folio(struct lruvec
> *lruvec, struct folio *folio,
> int gen = folio_lru_gen(folio);
> int type = folio_is_file_lru(folio);
> int zone = folio_zonenum(folio);
> + int lazyfree = type ? folio_test_anon(folio) : 0;
> struct lru_gen_folio *lrugen = &lruvec->lrugen;
>
> VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
> @@ -262,9 +263,9 @@ static inline bool lru_gen_add_folio(struct lruvec
> *lruvec, struct folio *folio,
> lru_gen_update_size(lruvec, folio, -1, gen);
> /* for folio_rotate_reclaimable() */
> if (reclaiming)
> - list_add_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> + list_add_tail(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
> else
> - list_add(&folio->lru, &lrugen->folios[gen][type][zone]);
> + list_add(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
>
> return true;
> }
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 17506e4a2835..5d2331778528 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -434,7 +434,7 @@ struct lru_gen_folio {
> /* the birth time of each generation in jiffies */
> unsigned long timestamps[MAX_NR_GENS];
> /* the multi-gen LRU lists, lazily sorted on eviction */
> - struct list_head
> folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
> + struct list_head folios[MAX_NR_GENS][ANON_AND_FILE +
> 1][MAX_NR_ZONES];
For better understanding and future scalability, could use enum types
instead of numbers, Create a new type, such as: enum folio_type.
> /* the multi-gen LRU sizes, eventually consistent */
> long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
> /* the exponential moving average of refaulted */
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 96abf4a52382..9dc665dc6ba9 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -3725,21 +3725,25 @@ static bool inc_min_seq(struct lruvec *lruvec, int
> type, bool can_swap)
>
> /* prevent cold/hot inversion if force_scan is true */
> for (zone = 0; zone < MAX_NR_ZONES; zone++) {
> - struct list_head *head = &lrugen->folios[old_gen][type][zone];
> + int list_num = type ? 2 : 1;
> + struct list_head *head;
>
> - while (!list_empty(head)) {
> - struct folio *folio = lru_to_folio(head);
> + for (int i = list_num - 1; i >= 0; i--) {
> + head = &lrugen->folios[old_gen][type + i][zone];
> + while (!list_empty(head)) {
> + struct folio *folio = lru_to_folio(head);
>
> - VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> folio);
> - VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> - VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> folio);
> - VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> folio);
> + VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> folio);
> + VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> + VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> folio);
> + VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> folio);
>
> - new_gen = folio_inc_gen(lruvec, folio, false);
> - list_move_tail(&folio->lru,
> &lrugen->folios[new_gen][type][zone]);
> + new_gen = folio_inc_gen(lruvec, folio, false);
> + list_move_tail(&folio->lru, &lrugen->folios[new_gen][type +
> i][zone]);
>
> - if (!--remaining)
> - return false;
> + if (!--remaining)
> + return false;
> + }
> }
> }
> done:
> @@ -4291,6 +4295,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
> int refs = folio_lru_refs(folio);
> int tier = lru_tier_from_refs(refs);
> struct lru_gen_folio *lrugen = &lruvec->lrugen;
> + int lazyfree = type ? folio_test_anon(folio) : 0;
>
> VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio);
>
> @@ -4306,7 +4311,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
>
> /* promoted */
> if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
> - list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> + list_move(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
> return true;
> }
>
> @@ -4315,7 +4320,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
> int hist = lru_hist_from_seq(lrugen->min_seq[type]);
>
> gen = folio_inc_gen(lruvec, folio, false);
> - list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> + list_move_tail(&folio->lru, &lrugen->folios[gen][type +
> lazyfree][zone]);
>
> WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
> lrugen->protected[hist][type][tier - 1] + delta);
> @@ -4325,7 +4330,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
> /* ineligible */
> if (!folio_test_lru(folio) || zone > sc->reclaim_idx) {
> gen = folio_inc_gen(lruvec, folio, false);
> - list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> + list_move_tail(&folio->lru, &lrugen->folios[gen][type +
> lazyfree][zone]);
> return true;
> }
>
> @@ -4333,7 +4338,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio
> *folio, struct scan_c
> if (folio_test_locked(folio) || folio_test_writeback(folio) ||
> (type == LRU_GEN_FILE && folio_test_dirty(folio))) {
> gen = folio_inc_gen(lruvec, folio, true);
> - list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> + list_move(&folio->lru, &lrugen->folios[gen][type + lazyfree][zone]);
> return true;
> }
>
> @@ -4377,7 +4382,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct
> folio *folio, struct sca
> static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> int type, int tier, struct list_head *list)
> {
> - int i;
> + int i, j;
> int gen;
> enum vm_event_item item;
> int sorted = 0;
> @@ -4399,33 +4404,38 @@ static int scan_folios(struct lruvec *lruvec, struct
> scan_control *sc,
> LIST_HEAD(moved);
> int skipped_zone = 0;
> int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
> - struct list_head *head = &lrugen->folios[gen][type][zone];
> -
> - while (!list_empty(head)) {
> - struct folio *folio = lru_to_folio(head);
> - int delta = folio_nr_pages(folio);
> -
> - VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> folio);
> - VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> - VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> folio);
> - VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> folio);
> -
> - scanned += delta;
> + int list_num = type ? 2 : 1;
> + struct list_head *head;
> +
> + for (j = list_num - 1; j >= 0; j--) {
> + head = &lrugen->folios[gen][type + j][zone];
> + while (!list_empty(head)) {
> + struct folio *folio = lru_to_folio(head);
> + int delta = folio_nr_pages(folio);
> +
> + VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio),
> folio);
> + VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> + VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type,
> folio);
> + VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone,
> folio);
> +
> + scanned += delta;
> +
> + if (sort_folio(lruvec, folio, sc, tier))
> + sorted += delta;
> + else if (isolate_folio(lruvec, folio, sc)) {
> + list_add(&folio->lru, list);
> + isolated += delta;
> + } else {
> + list_move(&folio->lru, &moved);
> + skipped_zone += delta;
> + }
>
> - if (sort_folio(lruvec, folio, sc, tier))
> - sorted += delta;
> - else if (isolate_folio(lruvec, folio, sc)) {
> - list_add(&folio->lru, list);
> - isolated += delta;
> - } else {
> - list_move(&folio->lru, &moved);
> - skipped_zone += delta;
> + if (!--remaining || max(isolated, skipped_zone) >=
> MIN_LRU_BATCH)
> + goto isolate_done;
> }
> -
> - if (!--remaining || max(isolated, skipped_zone) >=
> MIN_LRU_BATCH)
> - break;
> }
>
> +isolate_done:
> if (skipped_zone) {
> list_splice(&moved, head);
> __count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
> @@ -5586,8 +5596,15 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
> for (i = 0; i <= MIN_NR_GENS + 1; i++)
> lrugen->timestamps[i] = jiffies;
>
> - for_each_gen_type_zone(gen, type, zone)
> + for_each_gen_type_zone(gen, type, zone) {
> INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);
> + /*
> + * lazyfree anon folios have a separate list while using
> + * file as type
> + */
> + if (type)
> + INIT_LIST_HEAD(&lrugen->folios[gen][type + 1][zone]);
> + }
>
> if (mm_state)
> mm_state->seq = MIN_NR_GENS;
> --
> 2.39.3 (Apple Git-146)
Powered by blists - more mailing lists