[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250407234223.1059191-5-nphamcs@gmail.com>
Date: Mon, 7 Apr 2025 16:42:05 -0700
From: Nhat Pham <nphamcs@...il.com>
To: linux-mm@...ck.org
Cc: akpm@...ux-foundation.org,
hannes@...xchg.org,
hughd@...gle.com,
yosry.ahmed@...ux.dev,
mhocko@...nel.org,
roman.gushchin@...ux.dev,
shakeel.butt@...ux.dev,
muchun.song@...ux.dev,
len.brown@...el.com,
chengming.zhou@...ux.dev,
kasong@...cent.com,
chrisl@...nel.org,
huang.ying.caritas@...il.com,
ryan.roberts@....com,
viro@...iv.linux.org.uk,
baohua@...nel.org,
osalvador@...e.de,
lorenzo.stoakes@...cle.com,
christophe.leroy@...roup.eu,
pavel@...nel.org,
kernel-team@...a.com,
linux-kernel@...r.kernel.org,
cgroups@...r.kernel.org,
linux-pm@...r.kernel.org
Subject: [RFC PATCH 04/14] mm: swap: swap cache support for virtualized swap
Currently, the swap cache code assumes that the swap space is of a fixed
size. The virtual swap space is dynamically sized, so the existing
partitioning code cannot be easily reused. A dynamic partitioning is
planned, but for now keep the design simple and just use a flat
swapcache for vswap.
Since the vswap's implementation has begun to diverge from the old
implementation, we also introduce a new build config
(CONFIG_VIRTUAL_SWAP). Users who do not select this config will get the
old implementation, with no behavioral change.
Signed-off-by: Nhat Pham <nphamcs@...il.com>
---
mm/Kconfig | 13 ++++++++++
mm/swap.h | 22 ++++++++++------
mm/swap_state.c | 68 +++++++++++++++++++++++++++++++++++++++++--------
3 files changed, 85 insertions(+), 18 deletions(-)
diff --git a/mm/Kconfig b/mm/Kconfig
index 1b501db06417..1a6acdb64333 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -22,6 +22,19 @@ menuconfig SWAP
used to provide more virtual memory than the actual RAM present
in your computer. If unsure say Y.
+config VIRTUAL_SWAP
+ bool "Swap space virtualization"
+ depends on SWAP
+ default n
+ help
+ When this is selected, the kernel is built with the new swap
+ design. This will allow us to decouple the swap backends
+ (zswap, on-disk swapfile, etc.), and save disk space when we
+ use zswap (or the zero-filled swap page optimization).
+
+ There might be more lock contentions with heavy swap use, since
+ the swap cache is no longer range partitioned.
+
config ZSWAP
bool "Compressed cache for swap pages"
depends on SWAP
diff --git a/mm/swap.h b/mm/swap.h
index d5f8effa8015..06e20b1d79c4 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -22,22 +22,27 @@ void swap_write_unplug(struct swap_iocb *sio);
int swap_writepage(struct page *page, struct writeback_control *wbc);
void __swap_writepage(struct folio *folio, struct writeback_control *wbc);
-/* linux/mm/swap_state.c */
-/* One swap address space for each 64M swap space */
+/* Return the swap device position of the swap slot. */
+static inline loff_t swap_slot_pos(swp_slot_t slot)
+{
+ return ((loff_t)swp_slot_offset(slot)) << PAGE_SHIFT;
+}
+
#define SWAP_ADDRESS_SPACE_SHIFT 14
#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT)
#define SWAP_ADDRESS_SPACE_MASK (SWAP_ADDRESS_SPACE_PAGES - 1)
+
+/* linux/mm/swap_state.c */
+#ifdef CONFIG_VIRTUAL_SWAP
+extern struct address_space *swap_address_space(swp_entry_t entry);
+#define swap_cache_index(entry) entry.val
+#else
+/* One swap address space for each 64M swap space */
extern struct address_space *swapper_spaces[];
#define swap_address_space(entry) \
(&swapper_spaces[swp_type(entry)][swp_offset(entry) \
>> SWAP_ADDRESS_SPACE_SHIFT])
-/* Return the swap device position of the swap slot. */
-static inline loff_t swap_slot_pos(swp_slot_t slot)
-{
- return ((loff_t)swp_slot_offset(slot)) << PAGE_SHIFT;
-}
-
/*
* Return the swap cache index of the swap entry.
*/
@@ -46,6 +51,7 @@ static inline pgoff_t swap_cache_index(swp_entry_t entry)
BUILD_BUG_ON((SWP_OFFSET_MASK | SWAP_ADDRESS_SPACE_MASK) != SWP_OFFSET_MASK);
return swp_offset(entry) & SWAP_ADDRESS_SPACE_MASK;
}
+#endif
void show_swap_cache_info(void);
bool add_to_swap(struct folio *folio);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 055e555d3382..268338a0ea57 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -38,10 +38,19 @@ static const struct address_space_operations swap_aops = {
#endif
};
+#ifdef CONFIG_VIRTUAL_SWAP
+static struct address_space swapper_space __read_mostly;
+
+struct address_space *swap_address_space(swp_entry_t entry)
+{
+ return &swapper_space;
+}
+#else
struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly;
static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly;
-static bool enable_vma_readahead __read_mostly = true;
+#endif
+static bool enable_vma_readahead __read_mostly = true;
#define SWAP_RA_ORDER_CEILING 5
#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2)
@@ -260,6 +269,28 @@ void delete_from_swap_cache(struct folio *folio)
folio_ref_sub(folio, folio_nr_pages(folio));
}
+#ifdef CONFIG_VIRTUAL_SWAP
+void clear_shadow_from_swap_cache(int type, unsigned long begin,
+ unsigned long end)
+{
+ swp_slot_t slot = swp_slot(type, begin);
+ swp_entry_t entry = swp_slot_to_swp_entry(slot);
+ unsigned long index = swap_cache_index(entry);
+ struct address_space *address_space = swap_address_space(entry);
+ void *old;
+ XA_STATE(xas, &address_space->i_pages, index);
+
+ xas_set_update(&xas, workingset_update_node);
+
+ xa_lock_irq(&address_space->i_pages);
+ xas_for_each(&xas, old, entry.val + end - begin) {
+ if (!xa_is_value(old))
+ continue;
+ xas_store(&xas, NULL);
+ }
+ xa_unlock_irq(&address_space->i_pages);
+}
+#else
void clear_shadow_from_swap_cache(int type, unsigned long begin,
unsigned long end)
{
@@ -290,6 +321,7 @@ void clear_shadow_from_swap_cache(int type, unsigned long begin,
break;
}
}
+#endif
/*
* If we are the only user, then try to free up the swap cache.
@@ -718,23 +750,34 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
return folio;
}
+static void init_swapper_space(struct address_space *space)
+{
+ xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
+ atomic_set(&space->i_mmap_writable, 0);
+ space->a_ops = &swap_aops;
+ /* swap cache doesn't use writeback related tags */
+ mapping_set_no_writeback_tags(space);
+}
+
+#ifdef CONFIG_VIRTUAL_SWAP
int init_swap_address_space(unsigned int type, unsigned long nr_pages)
{
- struct address_space *spaces, *space;
+ return 0;
+}
+
+void exit_swap_address_space(unsigned int type) {}
+#else
+int init_swap_address_space(unsigned int type, unsigned long nr_pages)
+{
+ struct address_space *spaces;
unsigned int i, nr;
nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL);
if (!spaces)
return -ENOMEM;
- for (i = 0; i < nr; i++) {
- space = spaces + i;
- xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
- atomic_set(&space->i_mmap_writable, 0);
- space->a_ops = &swap_aops;
- /* swap cache doesn't use writeback related tags */
- mapping_set_no_writeback_tags(space);
- }
+ for (i = 0; i < nr; i++)
+ init_swapper_space(spaces + i);
nr_swapper_spaces[type] = nr;
swapper_spaces[type] = spaces;
@@ -752,6 +795,7 @@ void exit_swap_address_space(unsigned int type)
nr_swapper_spaces[type] = 0;
swapper_spaces[type] = NULL;
}
+#endif
static int swap_vma_ra_win(struct vm_fault *vmf, unsigned long *start,
unsigned long *end)
@@ -930,6 +974,10 @@ static int __init swap_init_sysfs(void)
int err;
struct kobject *swap_kobj;
+#ifdef CONFIG_VIRTUAL_SWAP
+ init_swapper_space(&swapper_space);
+#endif
+
swap_kobj = kobject_create_and_add("swap", mm_kobj);
if (!swap_kobj) {
pr_err("failed to create swap kobject\n");
--
2.47.1
Powered by blists - more mailing lists