[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1589538614-24048-7-git-send-email-avri.altman@wdc.com>
Date: Fri, 15 May 2020 13:30:07 +0300
From: Avri Altman <avri.altman@....com>
To: "James E . J . Bottomley" <jejb@...ux.vnet.ibm.com>,
"Martin K . Petersen" <martin.petersen@...cle.com>,
linux-scsi@...r.kernel.org, linux-kernel@...r.kernel.org
Cc: Bart Van Assche <bvanassche@....org>, alim.akhtar@...sung.com,
asutoshd@...eaurora.org, Zang Leigang <zangleigang@...ilicon.com>,
Avi Shchislowski <avi.shchislowski@....com>,
Bean Huo <beanhuo@...ron.com>, cang@...eaurora.org,
stanley.chu@...iatek.com,
MOHAMMED RAFIQ KAMAL BASHA <md.rafiq@...sung.com>,
Sang-yoon Oh <sangyoon.oh@...sung.com>,
yongmyung lee <ymhungry.lee@...sung.com>,
Jinyoung CHOI <j-young.choi@...sung.com>,
Avri Altman <avri.altman@....com>
Subject: [RFC PATCH 06/13] scsi: scsi_dh: ufshpb: Prepare for L2P cache management
Preparing for L2P cache management, this patch introduces the memory
pool supportive structures and constants.
The logical unit address space is divided into equally-sized slots
called Regions. Each such region is further divided into equally-sized
subregions. subregions which theirs L2P mapping entries are cached in
host system memory are called active subregions. An active region is a
region which includes at least one active subregion. A HPB Entry
provides the physical block address of a specific LBA. The size of the
Entry is 8 bytes. HPB entries of all LBAs included in a subregion are
transferred with a single HPB_READ_BUFFER command.
Each lun is limited by the number of its active regions. This, on one
hand, determines the size of the L2P cache, and the nature of its by-lun
management, on the other. The second critical factor that determines
how the L2P will be managed is the fact that subregion is the atomic
unit that can be activated and updated.
We also want to manage the L2P mapping by using pages, because we want
to utilize the kernel’s bio submission constructs, should we need to
update the L2P database.
Each sub-region will be designated by a mapping context, which is as an
array of pages, and arranged those as a linked list. We will also
map all subregions using a regions-subregions table. Derived from the
sequential nature of LBAs, such a structure will allow us to retrieve
its applicable HPB entry in O(1).
The actual page allocation is taking place only upon subregion
activation.
Upon failure to allocate any of the memory pool components – HPB is
disabled for that lun. This is accomplished by the ->detach() handler.
Another case for disabling HPB for that lun can be upon failing to
allocate its pinned regions.
All those allocations and preparations are done as part of the handler
activation phase.
Signed-off-by: Avri Altman <avri.altman@....com>
---
drivers/scsi/device_handler/scsi_dh_ufshpb.c | 314 +++++++++++++++++++++++++++
1 file changed, 314 insertions(+)
diff --git a/drivers/scsi/device_handler/scsi_dh_ufshpb.c b/drivers/scsi/device_handler/scsi_dh_ufshpb.c
index f26208c..9b9b338 100644
--- a/drivers/scsi/device_handler/scsi_dh_ufshpb.c
+++ b/drivers/scsi/device_handler/scsi_dh_ufshpb.c
@@ -17,11 +17,279 @@
#define UFSHPB_NAME "ufshpb"
+#define HPB_ENTRY_SIZE (8)
+
+/* subregion activation threshold - % of subregion size */
+#define ACTIVATION_READS (10)
+/* write/discard requests in blocks to consider a subregion as dirty */
+#define SET_AS_DIRTY (1)
+/* subregion inactivation threshold - % of subregion size */
+#define INACTIVATION_WRITES (30)
+/* to control the amount of HPB_READ_BUFFER commands */
+#define READS_READ_BUFFER_SENT ACTIVATION_READS
+/* not to hold on to "cold" subregions */
+#define READ_TIMEOUT_MSEC (500)
+/* allowed rewinds of the read_timeout for “clean” subregion */
+#define READ_TIMEOUT_EXPIRIES (100)
+/* region eviction threshold: 3 x subregions_per_region x MIN_READS */
+#define EVICTION_READS (3 * ACTIVATION_READS)
+
+/**
+ * struct map_ctx - a mapping context
+ * @pages - array of pages
+ * @list - to hang on lh_map_ctx
+ */
+struct ufshpb_map_ctx {
+ char *pages;
+ struct list_head list;
+};
+
+/**
+ * struct ufshpb_subregion
+ * @mctx - mapping context
+ * @hpb - lun
+ * @r - region in which included in
+ * @reads - cumulative read request in blocks
+ * @writes - cumulative write/discard requests in blocks
+ * @reads_last_trial - read request blocks since last activation trial
+ * @read_timeout_expiries - to rewind the read_timer for "clean" subregion
+ * @region - region index
+ * @subregion - subregion index
+ */
+struct ufshpb_subregion {
+ struct ufshpb_map_ctx *mctx;
+ struct ufshpb_dh_lun *hpb;
+ struct ufshpb_region *r;
+ atomic64_t reads;
+ atomic64_t writes;
+ atomic64_t reads_last_trial;
+ atomic_t read_timeout_expiries;
+ unsigned int region;
+ unsigned int subregion;
+};
+
+/**
+ * struct ufshpb_region
+ * @subregion_tbl - subregions column
+ * @hpb - lun
+ * @reads - sum over subregions @reads
+ * @writes - sum over subregions @writes
+ * @region - region index
+ * @active_subregions - actual active subregions
+ */
+struct ufshpb_region {
+ struct ufshpb_subregion *subregion_tbl;
+ struct ufshpb_dh_lun *hpb;
+ atomic64_t reads;
+ atomic64_t writes;
+ unsigned int region;
+
+ atomic_t active_subregions;
+};
+
+/**
+ * struct ufshpb_dh_lun - hpb lun
+ * @list - to hang on to the hpb luns list
+ * @lh_map_ctx - list head of mapping context
+ * @map_list_lock - to protect mapping context list operations
+ * @region_tbl - regions/subregions table
+ * @sdev - scsi device for that lun
+ * @regions_per_lun
+ * @subregions_per_lun - lun size is not guaranteed to be region aligned
+ * @last_subregion_size - 0 (actual size) if lun is (not) subregion aligned
+ * @max_active_regions - Max allowable active regions at any given time
+ * @active_regions - current active regions
+ */
+struct ufshpb_dh_lun {
+ struct list_head list;
+ struct list_head lh_map_ctx;
+ spinlock_t map_list_lock;
+ struct ufshpb_region *region_tbl;
+ struct scsi_device *sdev;
+
+ unsigned int regions_per_lun;
+ unsigned int subregions_per_lun;
+ unsigned int last_subregion_size;
+ unsigned int max_active_regions;
+
+ atomic_t active_regions;
+};
+
struct ufshpb_dh_data {
const struct ufshpb_config *ufshpb_conf;
const struct ufshpb_lun_config *ufshpb_lun_conf;
+ /* keep those first - ufshpb lld expects it */
+ struct ufshpb_dh_lun *hpb;
};
+static struct list_head lh_hpb_lunp;
+/* to support device level configuration */
+static bool is_configured;
+
+/* L2P constants */
+static unsigned int entries_per_region;
+static unsigned int entries_per_subregion;
+static unsigned char order;
+static unsigned long region_size;
+static unsigned long subregion_size;
+static unsigned int subregions_per_region;
+
+static int ufshpb_mempool_init(struct ufshpb_dh_lun *hpb)
+{
+ unsigned int max_active_subregions = hpb->max_active_regions *
+ subregions_per_region;
+ int i;
+
+ INIT_LIST_HEAD(&hpb->lh_map_ctx);
+ spin_lock_init(&hpb->map_list_lock);
+
+ for (i = 0 ; i < max_active_subregions; i++) {
+ struct ufshpb_map_ctx *mctx =
+ kzalloc(sizeof(struct ufshpb_map_ctx), GFP_KERNEL);
+
+ if (!mctx) {
+ /*
+ * mctxs already added in lh_map_ctx will be removed in
+ * detach
+ */
+ return -ENOMEM;
+ }
+
+ /* actual page allocation is done upon subregion activation */
+
+ INIT_LIST_HEAD(&mctx->list);
+ list_add(&mctx->list, &hpb->lh_map_ctx);
+ }
+
+ return 0;
+
+}
+
+static int ufshpb_region_tbl_init(struct ufshpb_dh_lun *hpb)
+{
+ struct ufshpb_region *regions;
+ int i, j;
+
+ regions = kcalloc(hpb->regions_per_lun, sizeof(*regions), GFP_KERNEL);
+ if (!regions)
+ return -ENOMEM;
+
+ atomic_set(&hpb->active_regions, 0);
+
+ for (i = 0 ; i < hpb->regions_per_lun; i++) {
+ struct ufshpb_region *r = regions + i;
+ struct ufshpb_subregion *subregions;
+
+ subregions = kcalloc(subregions_per_region, sizeof(*subregions),
+ GFP_KERNEL);
+ if (!subregions)
+ goto release_mem;
+
+ for (j = 0; j < subregions_per_region; j++) {
+ struct ufshpb_subregion *s = subregions + j;
+
+ s->hpb = hpb;
+ s->r = r;
+ s->region = i;
+ s->subregion = j;
+ }
+
+ r->subregion_tbl = subregions;
+ r->hpb = hpb;
+ r->region = i;
+ }
+
+ hpb->region_tbl = regions;
+
+ return 0;
+
+release_mem:
+ for (i = 0 ; i < hpb->regions_per_lun; i++) {
+ struct ufshpb_region *r = regions + i;
+
+ if (!r->subregion_tbl)
+ break;
+ kfree(r->subregion_tbl);
+ }
+
+ kfree(regions);
+ /* hpb is released in detach */
+ return -ENOMEM;
+}
+
+static int ufshpb_lun_configuration(struct scsi_device *sdev,
+ struct ufshpb_dh_data *h)
+{
+ const struct ufshpb_lun_config *lun_conf = h->ufshpb_lun_conf;
+ struct ufshpb_dh_lun *hpb = NULL;
+ int ret;
+
+ if (h->hpb)
+ return -EBUSY;
+
+ hpb = kzalloc(sizeof(*hpb), GFP_KERNEL);
+ if (!hpb)
+ return -ENOMEM;
+
+ hpb->max_active_regions = lun_conf->max_active_regions;
+ hpb->regions_per_lun = DIV_ROUND_UP(lun_conf->size, region_size);
+ hpb->subregions_per_lun = DIV_ROUND_UP(lun_conf->size, subregion_size);
+ hpb->last_subregion_size = lun_conf->size % subregion_size;
+
+ ret = ufshpb_mempool_init(hpb);
+ if (ret)
+ goto out_free;
+
+ ret = ufshpb_region_tbl_init(hpb);
+ if (ret)
+ goto out_free;
+
+ INIT_LIST_HEAD(&hpb->list);
+ list_add(&hpb->list, &lh_hpb_lunp);
+
+ hpb->sdev = sdev;
+ h->hpb = hpb;
+
+out_free:
+ if (ret)
+ kfree(hpb);
+ return ret;
+}
+
+static void ufshpb_get_config(const struct ufshpb_config *c)
+{
+ /* get hpb constants */
+ region_size = BIT(c->region_size + 9);
+ subregion_size = BIT(c->subregion_size + 9);
+ subregions_per_region = BIT(c->region_size - c->subregion_size);
+ entries_per_region = BIT(c->region_size - 3);
+ entries_per_subregion = BIT(c->subregion_size - 3);
+ order = get_order(entries_per_subregion * HPB_ENTRY_SIZE);
+
+ INIT_LIST_HEAD(&lh_hpb_lunp);
+ is_configured = true;
+}
+
+static int ufshpb_activate(struct scsi_device *sdev, activate_complete fn,
+ void *data)
+{
+ int ret;
+ struct ufshpb_dh_data *h = sdev->handler_data;
+
+ /* device level configuration - only at the 1st time */
+ if (!is_configured)
+ ufshpb_get_config(h->ufshpb_conf);
+
+ ret = ufshpb_lun_configuration(sdev, h);
+ if (ret)
+ return ret;
+
+ if (fn)
+ fn(data, ret);
+
+ return 0;
+}
+
static int ufshpb_attach(struct scsi_device *sdev)
{
struct ufshpb_dh_data *h;
@@ -38,12 +306,57 @@ static int ufshpb_attach(struct scsi_device *sdev)
return SCSI_DH_OK;
}
+static void ufshpb_mempool_remove(struct ufshpb_dh_lun *hpb)
+{
+ struct ufshpb_map_ctx *mctx, *next;
+
+ if (!hpb)
+ return;
+
+ spin_lock(&hpb->map_list_lock);
+
+ list_for_each_entry_safe(mctx, next, &hpb->lh_map_ctx, list) {
+ list_del(&mctx->list);
+ kfree(mctx);
+ }
+
+ spin_unlock(&hpb->map_list_lock);
+}
+
+static void ufshpb_region_tbl_remove(struct ufshpb_dh_lun *hpb)
+{
+ struct ufshpb_region *regions;
+
+ if (!hpb)
+ return;
+
+ regions = hpb->region_tbl;
+ if (regions) {
+ int i;
+
+ for (i = 0 ; i < hpb->regions_per_lun; i++) {
+ struct ufshpb_region *r = regions + i;
+
+ kfree(r->subregion_tbl);
+ }
+
+ kfree(regions);
+ }
+
+ list_del(&hpb->list);
+ kfree(hpb);
+}
+
/*
* scsi_dh_detach will be called from the ufs driver upon failuire and on
* remove.
*/
static void ufshpb_detach(struct scsi_device *sdev)
{
+ struct ufshpb_dh_data *h = sdev->handler_data;
+
+ ufshpb_mempool_remove(h->hpb);
+ ufshpb_region_tbl_remove(h->hpb);
kfree(sdev->handler_data);
sdev->handler_data = NULL;
}
@@ -53,6 +366,7 @@ static struct scsi_device_handler ufshpb_dh = {
.module = THIS_MODULE,
.attach = ufshpb_attach,
.detach = ufshpb_detach,
+ .activate = ufshpb_activate,
};
static int __init ufshpb_init(void)
--
2.7.4
Powered by blists - more mailing lists