[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z71AnDX+LQuXhY1Y@MiWiFi-R3L-srv>
Date: Tue, 25 Feb 2025 12:01:32 +0800
From: Baoquan He <bhe@...hat.com>
To: Kairui Song <kasong@...cent.com>
Cc: linux-mm@...ck.org, Andrew Morton <akpm@...ux-foundation.org>,
Chris Li <chrisl@...nel.org>, Barry Song <v-songbaohua@...o.com>,
Hugh Dickins <hughd@...gle.com>,
Yosry Ahmed <yosryahmed@...gle.com>,
"Huang, Ying" <ying.huang@...ux.alibaba.com>,
Nhat Pham <nphamcs@...il.com>, Johannes Weiner <hannes@...xchg.org>,
Baolin Wang <baolin.wang@...ux.alibaba.com>,
Kalesh Singh <kaleshsingh@...gle.com>,
Matthew Wilcox <willy@...radead.org>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 5/7] mm, swap: use percpu cluster as allocation fast
path
On 02/25/25 at 02:02am, Kairui Song wrote:
......snip...
> @@ -1204,19 +1252,36 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order)
> int n_ret = 0;
> int node;
>
> + /* Fast path using percpu cluster */
> + local_lock(&percpu_swap_cluster.lock);
> + n_ret = swap_alloc_fast(swp_entries,
> + SWAP_HAS_CACHE,
> + order, n_goal);
Since we always pass in the SWAP_HAS_CACHE for the 2nd parameter, we can
hard code it inside fucntion swap_alloc_fast(), unless there will be
other callsite where different value is passed in for 'usage'. And the
function 2nd parameter of swap_alloc_slow() too. Anyway, it's not a big
deal, could be my personal peference.
Other than this nit, this looks good to me.
> + if (n_ret == n_goal)
> + goto out;
> +
> + n_goal = min_t(int, n_goal - n_ret, SWAP_BATCH);
> + /* Rotate the device and switch to a new cluster */
> spin_lock(&swap_avail_lock);
> start_over:
> node = numa_node_id();
> plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) {
> - /* requeue si to after same-priority siblings */
> plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]);
> spin_unlock(&swap_avail_lock);
> if (get_swap_device_info(si)) {
> - n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
> - n_goal, swp_entries, order);
> + /*
> + * For order 0 allocation, try best to fill the request
> + * as it's used by slot cache.
> + *
> + * For mTHP allocation, it always have n_goal == 1,
> + * and falling a mTHP swapin will just make the caller
> + * fallback to order 0 allocation, so just bail out.
> + */
> + n_ret += scan_swap_map_slots(si, SWAP_HAS_CACHE, n_goal,
> + swp_entries + n_ret, order);
> put_swap_device(si);
> if (n_ret || size > 1)
> - goto check_out;
> + goto out;
> }
>
> spin_lock(&swap_avail_lock);
> @@ -1234,12 +1299,10 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order)
> if (plist_node_empty(&next->avail_lists[node]))
> goto start_over;
> }
> -
> spin_unlock(&swap_avail_lock);
> -
> -check_out:
> +out:
> + local_unlock(&percpu_swap_cluster.lock);
> atomic_long_sub(n_ret * size, &nr_swap_pages);
> -
> return n_ret;
> }
>
> @@ -2725,8 +2788,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
> arch_swap_invalidate_area(p->type);
> zswap_swapoff(p->type);
> mutex_unlock(&swapon_mutex);
> - free_percpu(p->percpu_cluster);
> - p->percpu_cluster = NULL;
> kfree(p->global_cluster);
> p->global_cluster = NULL;
> vfree(swap_map);
> @@ -3125,7 +3186,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
> unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
> struct swap_cluster_info *cluster_info;
> unsigned long i, j, idx;
> - int cpu, err = -ENOMEM;
> + int err = -ENOMEM;
>
> cluster_info = kvcalloc(nr_clusters, sizeof(*cluster_info), GFP_KERNEL);
> if (!cluster_info)
> @@ -3134,20 +3195,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
> for (i = 0; i < nr_clusters; i++)
> spin_lock_init(&cluster_info[i].lock);
>
> - if (si->flags & SWP_SOLIDSTATE) {
> - si->percpu_cluster = alloc_percpu(struct percpu_cluster);
> - if (!si->percpu_cluster)
> - goto err_free;
> -
> - for_each_possible_cpu(cpu) {
> - struct percpu_cluster *cluster;
> -
> - cluster = per_cpu_ptr(si->percpu_cluster, cpu);
> - for (i = 0; i < SWAP_NR_ORDERS; i++)
> - cluster->next[i] = SWAP_ENTRY_INVALID;
> - local_lock_init(&cluster->lock);
> - }
> - } else {
> + if (!(si->flags & SWP_SOLIDSTATE)) {
> si->global_cluster = kmalloc(sizeof(*si->global_cluster),
> GFP_KERNEL);
> if (!si->global_cluster)
> @@ -3424,8 +3472,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
> bad_swap_unlock_inode:
> inode_unlock(inode);
> bad_swap:
> - free_percpu(si->percpu_cluster);
> - si->percpu_cluster = NULL;
> kfree(si->global_cluster);
> si->global_cluster = NULL;
> inode = NULL;
> --
> 2.48.1
>
Powered by blists - more mailing lists