linux-kernel - Re: [PATCH] mm: Add Kcompressd for accelerated memory compression

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aBqzcIteOzC9mRjY@harry>
Date: Wed, 7 May 2025 10:12:16 +0900
From: Harry Yoo <harry.yoo@...cle.com>
To: Qun-Wei Lin <qun-wei.lin@...iatek.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>, Mike Rapoport <rppt@...nel.org>,
        Matthias Brugger <matthias.bgg@...il.com>,
        AngeloGioacchino Del Regno <angelogioacchino.delregno@...labora.com>,
        Nhat Pham <nphamcs@...il.com>,
        Sergey Senozhatsky <senozhatsky@...omium.org>,
        Minchan Kim <minchan@...nel.org>, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org, linux-arm-kernel@...ts.infradead.org,
        linux-mediatek@...ts.infradead.org, Casper Li <casper.li@...iatek.com>,
        Chinwen Chang <chinwen.chang@...iatek.com>,
        Andrew Yang <andrew.yang@...iatek.com>,
        James Hsu <james.hsu@...iatek.com>, Barry Song <21cnbao@...il.com>,
        Zi Yan <ziy@...dia.com>
Subject: Re: [PATCH] mm: Add Kcompressd for accelerated memory compression

On Wed, Apr 30, 2025 at 04:26:41PM +0800, Qun-Wei Lin wrote:
> This patch series introduces a new mechanism called kcompressd to
> improve the efficiency of memory reclaiming in the operating system.
> 
> Problem:
>   In the current system, the kswapd thread is responsible for both scanning
>   the LRU pages and handling memory compression tasks (such as those
>   involving ZSWAP/ZRAM, if enabled). This combined responsibility can lead
>   to significant performance bottlenecks, especially under high memory
>   pressure. The kswapd thread becomes a single point of contention, causing
>   delays in memory reclaiming and overall system performance degradation.
> 
> Solution:
>   Introduced kcompressd to handle asynchronous compression during memory
>   reclaim, improving efficiency by offloading compression tasks from
>   kswapd. This allows kswapd to focus on its primary task of page reclaim
>   without being burdened by the additional overhead of compression.
> 
> In our handheld devices, we found that applying this mechanism under high
> memory pressure scenarios can increase the rate of pgsteal_anon per second
> by over 260% compared to the situation with only kswapd. Additionally, we
> observed a reduction of over 50% in page allocation stall occurrences,
> further demonstrating the effectiveness of kcompressd in alleviating memory
> pressure and improving system responsiveness.
> 
> Co-developed-by: Barry Song <21cnbao@...il.com>
> Signed-off-by: Barry Song <21cnbao@...il.com>
> Signed-off-by: Qun-Wei Lin <qun-wei.lin@...iatek.com>
> Reference: Re: [PATCH 0/2] Improve Zram by separating compression context from kswapd - Barry Song
>            https://lore.kernel.org/lkml/20250313093005.13998-1-21cnbao@gmail.com/
> ---

+Cc Zi Yan, who might be interested in writing a framework (or improving
the existing one, padata) for parallelizing jobs (e.g. migration/compression)

>  include/linux/mmzone.h |  6 ++++
>  mm/mm_init.c           |  1 +
>  mm/page_io.c           | 71 ++++++++++++++++++++++++++++++++++++++++++
>  mm/swap.h              |  6 ++++
>  mm/vmscan.c            | 25 +++++++++++++++
>  5 files changed, 109 insertions(+)
> 
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 6ccec1bf2896..93c9195a54ae 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -23,6 +23,7 @@
>  #include <linux/page-flags.h>
>  #include <linux/local_lock.h>
>  #include <linux/zswap.h>
> +#include <linux/kfifo.h>
>  #include <asm/page.h>
>  
>  /* Free memory management - zoned buddy allocator.  */
> @@ -1398,6 +1399,11 @@ typedef struct pglist_data {
>  
>  	int kswapd_failures;		/* Number of 'reclaimed == 0' runs */
>  
> +#define KCOMPRESS_FIFO_SIZE 256
> +	wait_queue_head_t kcompressd_wait;
> +	struct task_struct *kcompressd;
> +	struct kfifo kcompress_fifo;
> +
>  #ifdef CONFIG_COMPACTION
>  	int kcompactd_max_order;
>  	enum zone_type kcompactd_highest_zoneidx;
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 9659689b8ace..49bae1dd4584 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -1410,6 +1410,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
>  	pgdat_init_kcompactd(pgdat);
>  
>  	init_waitqueue_head(&pgdat->kswapd_wait);
> +	init_waitqueue_head(&pgdat->kcompressd_wait);
>  	init_waitqueue_head(&pgdat->pfmemalloc_wait);
>  
>  	for (i = 0; i < NR_VMSCAN_THROTTLE; i++)
> diff --git a/mm/page_io.c b/mm/page_io.c
> index 4bce19df557b..d85deb494a6a 100644
> --- a/mm/page_io.c
> +++ b/mm/page_io.c
> @@ -233,6 +233,38 @@ static void swap_zeromap_folio_clear(struct folio *folio)
>  	}
>  }
>  
> +static bool swap_sched_async_compress(struct folio *folio)
> +{
> +	struct swap_info_struct *sis = swp_swap_info(folio->swap);
> +	int nid = numa_node_id();
> +	pg_data_t *pgdat = NODE_DATA(nid);
> +
> +	if (unlikely(!pgdat->kcompressd))
> +		return false;
> +
> +	if (!current_is_kswapd())
> +		return false;
> +
> +	if (!folio_test_anon(folio))
> +		return false;
> +	/*
> +	 * This case needs to synchronously return AOP_WRITEPAGE_ACTIVATE
> +	 */
> +	if (!mem_cgroup_zswap_writeback_enabled(folio_memcg(folio)))
> +		return false;
> +
> +	sis = swp_swap_info(folio->swap);
> +	if (zswap_is_enabled() || data_race(sis->flags & SWP_SYNCHRONOUS_IO)) {
> +		if (kfifo_avail(&pgdat->kcompress_fifo) >= sizeof(folio) &&
> +			kfifo_in(&pgdat->kcompress_fifo, &folio, sizeof(folio))) {
> +			wake_up_interruptible(&pgdat->kcompressd_wait);
> +			return true;
> +		}
> +	}
> +
> +	return false;
> +}
> +
>  /*
>   * We may have stale swap cache pages in memory: notice
>   * them here and get rid of the unnecessary final write.
> @@ -275,6 +307,15 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
>  		 */
>  		swap_zeromap_folio_clear(folio);
>  	}
> +
> +	/*
> +	 * Compression within zswap and zram might block rmap, unmap
> +	 * of both file and anon pages, try to do compression async
> +	 * if possible
> +	 */
> +	if (swap_sched_async_compress(folio))
> +		return 0;
> +
>  	if (zswap_store(folio)) {
>  		count_mthp_stat(folio_order(folio), MTHP_STAT_ZSWPOUT);
>  		folio_unlock(folio);
> @@ -289,6 +330,36 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
>  	return 0;
>  }
>  
> +int kcompressd(void *p)
> +{
> +	pg_data_t *pgdat = (pg_data_t *)p;
> +	struct folio *folio;
> +	struct writeback_control wbc = {
> +		.sync_mode = WB_SYNC_NONE,
> +		.nr_to_write = SWAP_CLUSTER_MAX,
> +		.range_start = 0,
> +		.range_end = LLONG_MAX,
> +		.for_reclaim = 1,
> +	};
> +
> +	while (!kthread_should_stop()) {
> +		wait_event_interruptible(pgdat->kcompressd_wait,
> +				!kfifo_is_empty(&pgdat->kcompress_fifo));
> +
> +		while (!kfifo_is_empty(&pgdat->kcompress_fifo)) {
> +			if (kfifo_out(&pgdat->kcompress_fifo, &folio, sizeof(folio))) {
> +				if (zswap_store(folio)) {
> +					count_mthp_stat(folio_order(folio), MTHP_STAT_ZSWPOUT);
> +					folio_unlock(folio);
> +					continue;
> +				}
> +				__swap_writepage(folio, &wbc);
> +			}
> +		}
> +	}
> +	return 0;
> +}
> +
>  static inline void count_swpout_vm_event(struct folio *folio)
>  {
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> diff --git a/mm/swap.h b/mm/swap.h
> index 6f4a3f927edb..3579da413dc2 100644
> --- a/mm/swap.h
> +++ b/mm/swap.h
> @@ -22,6 +22,7 @@ static inline void swap_read_unplug(struct swap_iocb *plug)
>  void swap_write_unplug(struct swap_iocb *sio);
>  int swap_writepage(struct page *page, struct writeback_control *wbc);
>  void __swap_writepage(struct folio *folio, struct writeback_control *wbc);
> +int kcompressd(void *p);
>  
>  /* linux/mm/swap_state.c */
>  /* One swap address space for each 64M swap space */
> @@ -199,6 +200,11 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
>  	return 0;
>  }
>  
> +static inline int kcompressd(void *p)
> +{
> +	return 0;
> +}
> +
>  #endif /* CONFIG_SWAP */
>  
>  #endif /* _MM_SWAP_H */
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 3783e45bfc92..2d7b9167bfd6 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -7420,6 +7420,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
>  void __meminit kswapd_run(int nid)
>  {
>  	pg_data_t *pgdat = NODE_DATA(nid);
> +	int ret;
>  
>  	pgdat_kswapd_lock(pgdat);
>  	if (!pgdat->kswapd) {
> @@ -7433,7 +7434,26 @@ void __meminit kswapd_run(int nid)
>  		} else {
>  			wake_up_process(pgdat->kswapd);
>  		}
> +		ret = kfifo_alloc(&pgdat->kcompress_fifo,
> +				KCOMPRESS_FIFO_SIZE * sizeof(struct folio *),
> +				GFP_KERNEL);
> +		if (ret) {
> +			pr_err("%s: fail to kfifo_alloc\n", __func__);
> +			goto out;
> +		}
> +
> +		pgdat->kcompressd = kthread_create_on_node(kcompressd, pgdat, nid,
> +				"kcompressd%d", nid);
> +		if (IS_ERR(pgdat->kcompressd)) {
> +			pr_err("Failed to start kcompressd on node %d，ret=%ld\n",
> +					nid, PTR_ERR(pgdat->kcompressd));
> +			pgdat->kcompressd = NULL;
> +			kfifo_free(&pgdat->kcompress_fifo);
> +		} else {
> +			wake_up_process(pgdat->kcompressd);
> +		}
>  	}
> +out:
>  	pgdat_kswapd_unlock(pgdat);
>  }
>  
> @@ -7452,6 +7472,11 @@ void __meminit kswapd_stop(int nid)
>  		kthread_stop(kswapd);
>  		pgdat->kswapd = NULL;
>  	}
> +	if (pgdat->kcompressd) {
> +		kthread_stop(pgdat->kcompressd);
> +		pgdat->kcompressd = NULL;
> +		kfifo_free(&pgdat->kcompress_fifo);
> +	}
>  	pgdat_kswapd_unlock(pgdat);
>  }
>  
> -- 
> 2.45.2
> 
> 

-- 
Cheers,
Harry / Hyeonggon