linux-kernel - [PATCH 1/2] habanalabs: split the host MMU properties

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200105090537.19979-1-oshpigelman@habana.ai>
Date:   Sun, 5 Jan 2020 09:05:45 +0000
From:   Omer Shpigelman <oshpigelman@...ana.ai>
To:     "oded.gabbay@...il.com" <oded.gabbay@...il.com>
CC:     "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: [PATCH 1/2] habanalabs: split the host MMU properties

Host memory may be alloacted with huge pages.
A different virtual range may be used for mapping in this case.
Add Huge PCI MMU (HPMMU) properties to support it.
This patch is a prerequisite for future ASICs support and has no effect on
Goya ASIC as currently a single virtual host range is used for all page
sizes.

Signed-off-by: Omer Shpigelman <oshpigelman@...ana.ai>
---
 drivers/misc/habanalabs/debugfs.c             |  21 +-
 drivers/misc/habanalabs/goya/goya.c           |  25 +-
 drivers/misc/habanalabs/goya/goya_coresight.c |   4 +-
 drivers/misc/habanalabs/habanalabs.h          |  31 ++-
 drivers/misc/habanalabs/memory.c              | 213 ++++++++++++------
 drivers/misc/habanalabs/mmu.c                 |  68 +++---
 6 files changed, 225 insertions(+), 137 deletions(-)

diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index 20413e350343..599d17dfd542 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data)
 	}
 
 	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
-				prop->va_space_dram_start_address,
-				prop->va_space_dram_end_address);
+						prop->dmmu.start_addr,
+						prop->dmmu.end_addr);
 
+	/* shifts and masks are the same in PMMU and HPMMU, use one of them */
 	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 
 	mutex_lock(&ctx->mmu_lock);
@@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
 		goto out;
 
 	if (hdev->dram_supports_virtual_memory &&
-			addr >= prop->va_space_dram_start_address &&
-			addr < prop->va_space_dram_end_address)
+		(addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
 		return true;
 
-	if (addr >= prop->va_space_host_start_address &&
-			addr < prop->va_space_host_end_address)
+	if (addr >= prop->pmmu.start_addr &&
+		addr < prop->pmmu.end_addr)
+		return true;
+
+	if (addr >= prop->pmmu_huge.start_addr &&
+		addr < prop->pmmu_huge.end_addr)
 		return true;
 out:
 	return false;
@@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
 	}
 
 	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
-				prop->va_space_dram_start_address,
-				prop->va_space_dram_end_address);
+						prop->dmmu.start_addr,
+						prop->dmmu.end_addr);
 
+	/* shifts and masks are the same in PMMU and HPMMU, use one of them */
 	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 
 	mutex_lock(&ctx->mmu_lock);
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 5750294400f4..382331cc00d3 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -393,19 +393,21 @@ void goya_get_fixed_properties(struct hl_device *hdev)
 	prop->dmmu.hop2_mask = HOP2_MASK;
 	prop->dmmu.hop3_mask = HOP3_MASK;
 	prop->dmmu.hop4_mask = HOP4_MASK;
-	prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
+	prop->dmmu.start_addr = VA_DDR_SPACE_START;
+	prop->dmmu.end_addr = VA_DDR_SPACE_END;
+	prop->dmmu.page_size = PAGE_SIZE_2MB;
 
-	/* No difference between PMMU and DMMU except of page size */
+	/* shifts and masks are the same in PMMU and DMMU */
 	memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
-	prop->dmmu.page_size = PAGE_SIZE_2MB;
+	prop->pmmu.start_addr = VA_HOST_SPACE_START;
+	prop->pmmu.end_addr = VA_HOST_SPACE_END;
 	prop->pmmu.page_size = PAGE_SIZE_4KB;
 
-	prop->va_space_host_start_address = VA_HOST_SPACE_START;
-	prop->va_space_host_end_address = VA_HOST_SPACE_END;
-	prop->va_space_dram_start_address = VA_DDR_SPACE_START;
-	prop->va_space_dram_end_address = VA_DDR_SPACE_END;
-	prop->dram_size_for_default_page_mapping =
-			prop->va_space_dram_end_address;
+	/* PMMU and HPMMU are the same except of page size */
+	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
+	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
+
+	prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
 	prop->cfg_size = CFG_SIZE;
 	prop->max_asid = MAX_ASID;
 	prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
@@ -3443,12 +3445,13 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
 	/*
 	 * WA for HW-23.
 	 * We can't allow user to read from Host using QMANs other than 1.
+	 * PMMU and HPMMU addresses are equal, check only one of them.
 	 */
 	if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
 		hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
 				le32_to_cpu(user_dma_pkt->tsize),
-				hdev->asic_prop.va_space_host_start_address,
-				hdev->asic_prop.va_space_host_end_address)) {
+				hdev->asic_prop.pmmu.start_addr,
+				hdev->asic_prop.pmmu.end_addr)) {
 		dev_err(hdev->dev,
 			"Can't DMA from host on queue other then 1\n");
 		return -EFAULT;
diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c
index c1ee6e2b5dff..a1bc930d904f 100644
--- a/drivers/misc/habanalabs/goya/goya_coresight.c
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -364,8 +364,8 @@ static int goya_etr_validate_address(struct hl_device *hdev, u64 addr,
 	u64 range_start, range_end;
 
 	if (hdev->mmu_enable) {
-		range_start = prop->va_space_dram_start_address;
-		range_end = prop->va_space_dram_end_address;
+		range_start = prop->dmmu.start_addr;
+		range_end = prop->dmmu.end_addr;
 	} else {
 		range_start = prop->dram_user_base_address;
 		range_end = prop->dram_end_address;
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index df34227dea31..5c751b9517c0 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -132,6 +132,8 @@ enum hl_device_hw_state {
 
 /**
  * struct hl_mmu_properties - ASIC specific MMU address translation properties.
+ * @start_addr: virtual start address of the memory region.
+ * @end_addr: virtual end address of the memory region.
  * @hop0_shift: shift of hop 0 mask.
  * @hop1_shift: shift of hop 1 mask.
  * @hop2_shift: shift of hop 2 mask.
@@ -143,9 +145,10 @@ enum hl_device_hw_state {
  * @hop3_mask: mask to get the PTE address in hop 3.
  * @hop4_mask: mask to get the PTE address in hop 4.
  * @page_size: default page size used to allocate memory.
- * @huge_page_size: page size used to allocate memory with huge pages.
  */
 struct hl_mmu_properties {
+	u64	start_addr;
+	u64	end_addr;
 	u64	hop0_shift;
 	u64	hop1_shift;
 	u64	hop2_shift;
@@ -157,7 +160,6 @@ struct hl_mmu_properties {
 	u64	hop3_mask;
 	u64	hop4_mask;
 	u32	page_size;
-	u32	huge_page_size;
 };
 
 /**
@@ -169,6 +171,8 @@ struct hl_mmu_properties {
  * @preboot_ver: F/W Preboot version.
  * @dmmu: DRAM MMU address translation properties.
  * @pmmu: PCI (host) MMU address translation properties.
+ * @pmmu_huge: PCI (host) MMU address translation properties for memory
+ *              allocated with huge pages.
  * @sram_base_address: SRAM physical start address.
  * @sram_end_address: SRAM physical end address.
  * @sram_user_base_address - SRAM physical start address for user access.
@@ -178,14 +182,6 @@ struct hl_mmu_properties {
  * @dram_size: DRAM total size.
  * @dram_pci_bar_size: size of PCI bar towards DRAM.
  * @max_power_default: max power of the device after reset
- * @va_space_host_start_address: base address of virtual memory range for
- *                               mapping host memory.
- * @va_space_host_end_address: end address of virtual memory range for
- *                             mapping host memory.
- * @va_space_dram_start_address: base address of virtual memory range for
- *                               mapping DRAM memory.
- * @va_space_dram_end_address: end address of virtual memory range for
- *                             mapping DRAM memory.
  * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
  *                                      fault.
  * @pcie_dbi_base_address: Base address of the PCIE_DBI block.
@@ -218,6 +214,7 @@ struct asic_fixed_properties {
 	char				preboot_ver[VERSION_MAX_LEN];
 	struct hl_mmu_properties	dmmu;
 	struct hl_mmu_properties	pmmu;
+	struct hl_mmu_properties	pmmu_huge;
 	u64				sram_base_address;
 	u64				sram_end_address;
 	u64				sram_user_base_address;
@@ -227,10 +224,6 @@ struct asic_fixed_properties {
 	u64				dram_size;
 	u64				dram_pci_bar_size;
 	u64				max_power_default;
-	u64				va_space_host_start_address;
-	u64				va_space_host_end_address;
-	u64				va_space_dram_start_address;
-	u64				va_space_dram_end_address;
 	u64				dram_size_for_default_page_mapping;
 	u64				pcie_dbi_base_address;
 	u64				pcie_aux_dbi_reg_addr;
@@ -658,6 +651,8 @@ struct hl_va_range {
  *		this hits 0l. It is incremented on CS and CS_WAIT.
  * @cs_pending: array of DMA fence objects representing pending CS.
  * @host_va_range: holds available virtual addresses for host mappings.
+ * @host_huge_va_range: holds available virtual addresses for host mappings
+ *                      with huge pages.
  * @dram_va_range: holds available virtual addresses for DRAM mappings.
  * @mem_hash_lock: protects the mem_hash.
  * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the
@@ -688,8 +683,9 @@ struct hl_ctx {
 	struct hl_device	*hdev;
 	struct kref		refcount;
 	struct dma_fence	*cs_pending[HL_MAX_PENDING_CS];
-	struct hl_va_range	host_va_range;
-	struct hl_va_range	dram_va_range;
+	struct hl_va_range	*host_va_range;
+	struct hl_va_range	*host_huge_va_range;
+	struct hl_va_range	*dram_va_range;
 	struct mutex		mem_hash_lock;
 	struct mutex		mmu_lock;
 	struct list_head	debugfs_list;
@@ -1291,6 +1287,8 @@ struct hl_device_idle_busy_ts {
  *                   otherwise.
  * @dram_supports_virtual_memory: is MMU enabled towards DRAM.
  * @dram_default_page_mapping: is DRAM default page mapping enabled.
+ * @pmmu_huge_range: is a different virtual addresses range used for PMMU with
+ *                   huge pages.
  * @init_done: is the initialization of the device done.
  * @mmu_enable: is MMU enabled.
  * @device_cpu_disabled: is the device CPU disabled (due to timeouts)
@@ -1372,6 +1370,7 @@ struct hl_device {
 	u8				reset_on_lockup;
 	u8				dram_supports_virtual_memory;
 	u8				dram_default_page_mapping;
+	u8				pmmu_huge_range;
 	u8				init_done;
 	u8				device_cpu_disabled;
 	u8				dma_mask;
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
index b612b1ad0aac..a72f766ca470 100644
--- a/drivers/misc/habanalabs/memory.c
+++ b/drivers/misc/habanalabs/memory.c
@@ -530,7 +530,7 @@ static u64 get_va_block(struct hl_device *hdev,
 		 * or not, hence we continue with the biggest possible
 		 * granularity.
 		 */
-		page_size = hdev->asic_prop.pmmu.huge_page_size;
+		page_size = hdev->asic_prop.pmmu_huge.page_size;
 	else
 		page_size = hdev->asic_prop.dmmu.page_size;
 
@@ -638,13 +638,12 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
 				struct hl_userptr *userptr,
 				struct hl_vm_phys_pg_pack **pphys_pg_pack)
 {
-	struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu;
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
 	struct scatterlist *sg;
 	dma_addr_t dma_addr;
 	u64 page_mask, total_npages;
 	u32 npages, page_size = PAGE_SIZE,
-		huge_page_size = mmu_prop->huge_page_size;
+		huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
 	bool first = true, is_huge_page_opt = true;
 	int rc, i, j;
 	u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
@@ -856,6 +855,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
 	struct hl_userptr *userptr = NULL;
 	struct hl_vm_hash_node *hnode;
+	struct hl_va_range *va_range;
 	enum vm_type_t *vm_type;
 	u64 ret_vaddr, hint_addr;
 	u32 handle = 0;
@@ -927,9 +927,16 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 		goto hnode_err;
 	}
 
-	ret_vaddr = get_va_block(hdev,
-			is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
-			phys_pg_pack->total_size, hint_addr, is_userptr);
+	if (is_userptr)
+		if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size)
+			va_range = ctx->host_va_range;
+		else
+			va_range = ctx->host_huge_va_range;
+	else
+		va_range = ctx->dram_va_range;
+
+	ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
+					hint_addr, is_userptr);
 	if (!ret_vaddr) {
 		dev_err(hdev->dev, "no available va block for handle %u\n",
 				handle);
@@ -968,10 +975,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 	return 0;
 
 map_err:
-	if (add_va_block(hdev,
-			is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
-			ret_vaddr,
-			ret_vaddr + phys_pg_pack->total_size - 1))
+	if (add_va_block(hdev, va_range, ret_vaddr,
+				ret_vaddr + phys_pg_pack->total_size - 1))
 		dev_warn(hdev->dev,
 			"release va block failed for handle 0x%x, vaddr: 0x%llx\n",
 				handle, ret_vaddr);
@@ -1033,7 +1038,6 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
 
 	if (*vm_type == VM_TYPE_USERPTR) {
 		is_userptr = true;
-		va_range = &ctx->host_va_range;
 		userptr = hnode->ptr;
 		rc = init_phys_pg_pack_from_userptr(ctx, userptr,
 							&phys_pg_pack);
@@ -1043,9 +1047,15 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
 				vaddr);
 			goto vm_type_err;
 		}
+
+		if (phys_pg_pack->page_size ==
+					hdev->asic_prop.pmmu.page_size)
+			va_range = ctx->host_va_range;
+		else
+			va_range = ctx->host_huge_va_range;
 	} else if (*vm_type == VM_TYPE_PHYS_PACK) {
 		is_userptr = false;
-		va_range = &ctx->dram_va_range;
+		va_range = ctx->dram_va_range;
 		phys_pg_pack = hnode->ptr;
 	} else {
 		dev_warn(hdev->dev,
@@ -1441,19 +1451,18 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
 }
 
 /*
- * hl_va_range_init - initialize virtual addresses range
- *
- * @hdev                : pointer to the habanalabs device structure
- * @va_range            : pointer to the range to initialize
- * @start               : range start address
- * @end                 : range end address
+ * va_range_init - initialize virtual addresses range
+ * @hdev: pointer to the habanalabs device structure
+ * @va_range: pointer to the range to initialize
+ * @start: range start address
+ * @end: range end address
  *
  * This function does the following:
  * - Initializes the virtual addresses list of the given range with the given
  *   addresses.
  */
-static int hl_va_range_init(struct hl_device *hdev,
-		struct hl_va_range *va_range, u64 start, u64 end)
+static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
+				u64 start, u64 end)
 {
 	int rc;
 
@@ -1488,47 +1497,105 @@ static int hl_va_range_init(struct hl_device *hdev,
 }
 
 /*
- * hl_vm_ctx_init_with_ranges - initialize virtual memory for context
+ * va_range_fini() - clear a virtual addresses range
+ * @hdev: pointer to the habanalabs structure
+ * va_range: pointer to virtual addresses range
  *
- * @ctx                 : pointer to the habanalabs context structure
- * @host_range_start    : host virtual addresses range start
- * @host_range_end      : host virtual addresses range end
- * @dram_range_start    : dram virtual addresses range start
- * @dram_range_end      : dram virtual addresses range end
+ * This function does the following:
+ * - Frees the virtual addresses block list and its lock
+ */
+static void va_range_fini(struct hl_device *hdev,
+		struct hl_va_range *va_range)
+{
+	mutex_lock(&va_range->lock);
+	clear_va_list_locked(hdev, &va_range->list);
+	mutex_unlock(&va_range->lock);
+
+	mutex_destroy(&va_range->lock);
+	kfree(va_range);
+}
+
+/*
+ * vm_ctx_init_with_ranges() - initialize virtual memory for context
+ * @ctx: pointer to the habanalabs context structure
+ * @host_range_start: host virtual addresses range start.
+ * @host_range_end: host virtual addresses range end.
+ * @host_huge_range_start: host virtual addresses range start for memory
+ *                          allocated with huge pages.
+ * @host_huge_range_end: host virtual addresses range end for memory allocated
+ *                        with huge pages.
+ * @dram_range_start: dram virtual addresses range start.
+ * @dram_range_end: dram virtual addresses range end.
  *
  * This function initializes the following:
  * - MMU for context
  * - Virtual address to area descriptor hashtable
  * - Virtual block list of available virtual memory
  */
-static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
-				u64 host_range_end, u64 dram_range_start,
-				u64 dram_range_end)
+static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
+					u64 host_range_start,
+					u64 host_range_end,
+					u64 host_huge_range_start,
+					u64 host_huge_range_end,
+					u64 dram_range_start,
+					u64 dram_range_end)
 {
 	struct hl_device *hdev = ctx->hdev;
 	int rc;
 
+	ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL);
+	if (!ctx->host_va_range)
+		return -ENOMEM;
+
+	ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range),
+						GFP_KERNEL);
+	if (!ctx->host_huge_va_range) {
+		rc =  -ENOMEM;
+		goto host_huge_va_range_err;
+	}
+
+	ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL);
+	if (!ctx->dram_va_range) {
+		rc = -ENOMEM;
+		goto dram_va_range_err;
+	}
+
 	rc = hl_mmu_ctx_init(ctx);
 	if (rc) {
 		dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
-		return rc;
+		goto mmu_ctx_err;
 	}
 
 	mutex_init(&ctx->mem_hash_lock);
 	hash_init(ctx->mem_hash);
 
-	mutex_init(&ctx->host_va_range.lock);
+	mutex_init(&ctx->host_va_range->lock);
 
-	rc = hl_va_range_init(hdev, &ctx->host_va_range, host_range_start,
-			host_range_end);
+	rc = va_range_init(hdev, ctx->host_va_range, host_range_start,
+				host_range_end);
 	if (rc) {
 		dev_err(hdev->dev, "failed to init host vm range\n");
-		goto host_vm_err;
+		goto host_page_range_err;
+	}
+
+	if (hdev->pmmu_huge_range) {
+		mutex_init(&ctx->host_huge_va_range->lock);
+
+		rc = va_range_init(hdev, ctx->host_huge_va_range,
+					host_huge_range_start,
+					host_huge_range_end);
+		if (rc) {
+			dev_err(hdev->dev,
+				"failed to init host huge vm range\n");
+			goto host_hpage_range_err;
+		}
+	} else {
+		ctx->host_huge_va_range = ctx->host_va_range;
 	}
 
-	mutex_init(&ctx->dram_va_range.lock);
+	mutex_init(&ctx->dram_va_range->lock);
 
-	rc = hl_va_range_init(hdev, &ctx->dram_va_range, dram_range_start,
+	rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start,
 			dram_range_end);
 	if (rc) {
 		dev_err(hdev->dev, "failed to init dram vm range\n");
@@ -1540,15 +1607,29 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
 	return 0;
 
 dram_vm_err:
-	mutex_destroy(&ctx->dram_va_range.lock);
+	mutex_destroy(&ctx->dram_va_range->lock);
 
-	mutex_lock(&ctx->host_va_range.lock);
-	clear_va_list_locked(hdev, &ctx->host_va_range.list);
-	mutex_unlock(&ctx->host_va_range.lock);
-host_vm_err:
-	mutex_destroy(&ctx->host_va_range.lock);
+	if (hdev->pmmu_huge_range) {
+		mutex_lock(&ctx->host_huge_va_range->lock);
+		clear_va_list_locked(hdev, &ctx->host_huge_va_range->list);
+		mutex_unlock(&ctx->host_huge_va_range->lock);
+	}
+host_hpage_range_err:
+	if (hdev->pmmu_huge_range)
+		mutex_destroy(&ctx->host_huge_va_range->lock);
+	mutex_lock(&ctx->host_va_range->lock);
+	clear_va_list_locked(hdev, &ctx->host_va_range->list);
+	mutex_unlock(&ctx->host_va_range->lock);
+host_page_range_err:
+	mutex_destroy(&ctx->host_va_range->lock);
 	mutex_destroy(&ctx->mem_hash_lock);
 	hl_mmu_ctx_fini(ctx);
+mmu_ctx_err:
+	kfree(ctx->dram_va_range);
+dram_va_range_err:
+	kfree(ctx->host_huge_va_range);
+host_huge_va_range_err:
+	kfree(ctx->host_va_range);
 
 	return rc;
 }
@@ -1556,8 +1637,8 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
 int hl_vm_ctx_init(struct hl_ctx *ctx)
 {
 	struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
-	u64 host_range_start, host_range_end, dram_range_start,
-		dram_range_end;
+	u64 host_range_start, host_range_end, host_huge_range_start,
+		host_huge_range_end, dram_range_start, dram_range_end;
 
 	atomic64_set(&ctx->dram_phys_mem, 0);
 
@@ -1569,38 +1650,26 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
 	 *   address of the memory related to the given handle.
 	 */
 	if (ctx->hdev->mmu_enable) {
-		dram_range_start = prop->va_space_dram_start_address;
-		dram_range_end = prop->va_space_dram_end_address;
-		host_range_start = prop->va_space_host_start_address;
-		host_range_end = prop->va_space_host_end_address;
+		dram_range_start = prop->dmmu.start_addr;
+		dram_range_end = prop->dmmu.end_addr;
+		host_range_start = prop->pmmu.start_addr;
+		host_range_end = prop->pmmu.end_addr;
+		host_huge_range_start = prop->pmmu_huge.start_addr;
+		host_huge_range_end = prop->pmmu_huge.end_addr;
 	} else {
 		dram_range_start = prop->dram_user_base_address;
 		dram_range_end = prop->dram_end_address;
 		host_range_start = prop->dram_user_base_address;
 		host_range_end = prop->dram_end_address;
+		host_huge_range_start = prop->dram_user_base_address;
+		host_huge_range_end = prop->dram_end_address;
 	}
 
-	return hl_vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
-			dram_range_start, dram_range_end);
-}
-
-/*
- * hl_va_range_fini     - clear a virtual addresses range
- *
- * @hdev                : pointer to the habanalabs structure
- * va_range             : pointer to virtual addresses range
- *
- * This function does the following:
- * - Frees the virtual addresses block list and its lock
- */
-static void hl_va_range_fini(struct hl_device *hdev,
-		struct hl_va_range *va_range)
-{
-	mutex_lock(&va_range->lock);
-	clear_va_list_locked(hdev, &va_range->list);
-	mutex_unlock(&va_range->lock);
-
-	mutex_destroy(&va_range->lock);
+	return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
+					host_huge_range_start,
+					host_huge_range_end,
+					dram_range_start,
+					dram_range_end);
 }
 
 /*
@@ -1667,8 +1736,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
 		}
 	spin_unlock(&vm->idr_lock);
 
-	hl_va_range_fini(hdev, &ctx->dram_va_range);
-	hl_va_range_fini(hdev, &ctx->host_va_range);
+	va_range_fini(hdev, ctx->dram_va_range);
+	if (hdev->pmmu_huge_range)
+		va_range_fini(hdev, ctx->host_huge_va_range);
+	va_range_fini(hdev, ctx->host_va_range);
 
 	mutex_destroy(&ctx->mem_hash_lock);
 	hl_mmu_ctx_fini(ctx);
diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c
index 006eee47909d..a290d6b49d78 100644
--- a/drivers/misc/habanalabs/mmu.c
+++ b/drivers/misc/habanalabs/mmu.c
@@ -254,6 +254,15 @@ static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
 	return phys_hop_addr + pte_offset;
 }
 
+static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+	return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+					prop->dmmu.start_addr,
+					prop->dmmu.end_addr);
+}
+
 static int dram_default_mapping_init(struct hl_ctx *ctx)
 {
 	struct hl_device *hdev = ctx->hdev;
@@ -548,6 +557,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
 		curr_pte;
 	bool is_huge, clear_hop3 = true;
 
+	/* shifts and masks are the same in PMMU and HPMMU, use one of them */
 	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 
 	hop0_addr = get_hop0_addr(ctx);
@@ -702,26 +712,25 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
 	if (!hdev->mmu_enable)
 		return 0;
 
-	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
-				prop->va_space_dram_start_address,
-				prop->va_space_dram_end_address);
+	is_dram_addr = is_dram_va(hdev, virt_addr);
 
-	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+	if (is_dram_addr)
+		mmu_prop = &prop->dmmu;
+	else if ((page_size % prop->pmmu_huge.page_size) == 0)
+		mmu_prop = &prop->pmmu_huge;
+	else
+		mmu_prop = &prop->pmmu;
 
 	/*
 	 * The H/W handles mapping of specific page sizes. Hence if the page
 	 * size is bigger, we break it to sub-pages and unmap them separately.
 	 */
-	if ((page_size % mmu_prop->huge_page_size) == 0) {
-		real_page_size = mmu_prop->huge_page_size;
-	} else if ((page_size % mmu_prop->page_size) == 0) {
+	if ((page_size % mmu_prop->page_size) == 0) {
 		real_page_size = mmu_prop->page_size;
 	} else {
 		dev_err(hdev->dev,
-			"page size of %u is not %uKB nor %uMB aligned, can't unmap\n",
-			page_size,
-			mmu_prop->page_size >> 10,
-			mmu_prop->huge_page_size >> 20);
+			"page size of %u is not %uKB aligned, can't unmap\n",
+			page_size, mmu_prop->page_size >> 10);
 
 		return -EFAULT;
 	}
@@ -759,8 +768,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 		hop4_new = false, is_huge;
 	int rc = -ENOMEM;
 
-	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
-
 	/*
 	 * This mapping function can map a page or a huge page. For huge page
 	 * there are only 3 hops rather than 4. Currently the DRAM allocation
@@ -768,11 +775,15 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 	 * one of the two page sizes. Since this is a common code for all the
 	 * three cases, we need this hugs page check.
 	 */
-	is_huge = page_size == mmu_prop->huge_page_size;
-
-	if (is_dram_addr && !is_huge) {
-		dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
-		return -EFAULT;
+	if (is_dram_addr) {
+		mmu_prop = &prop->dmmu;
+		is_huge = true;
+	} else if (page_size == prop->pmmu_huge.page_size) {
+		mmu_prop = &prop->pmmu_huge;
+		is_huge = true;
+	} else {
+		mmu_prop = &prop->pmmu;
+		is_huge = false;
 	}
 
 	hop0_addr = get_hop0_addr(ctx);
@@ -942,26 +953,25 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
 	if (!hdev->mmu_enable)
 		return 0;
 
-	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
-				prop->va_space_dram_start_address,
-				prop->va_space_dram_end_address);
+	is_dram_addr = is_dram_va(hdev, virt_addr);
 
-	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+	if (is_dram_addr)
+		mmu_prop = &prop->dmmu;
+	else if ((page_size % prop->pmmu_huge.page_size) == 0)
+		mmu_prop = &prop->pmmu_huge;
+	else
+		mmu_prop = &prop->pmmu;
 
 	/*
 	 * The H/W handles mapping of specific page sizes. Hence if the page
 	 * size is bigger, we break it to sub-pages and map them separately.
 	 */
-	if ((page_size % mmu_prop->huge_page_size) == 0) {
-		real_page_size = mmu_prop->huge_page_size;
-	} else if ((page_size % mmu_prop->page_size) == 0) {
+	if ((page_size % mmu_prop->page_size) == 0) {
 		real_page_size = mmu_prop->page_size;
 	} else {
 		dev_err(hdev->dev,
-			"page size of %u is not %dKB nor %dMB aligned, can't unmap\n",
-			page_size,
-			mmu_prop->page_size >> 10,
-			mmu_prop->huge_page_size >> 20);
+			"page size of %u is not %uKB aligned, can't unmap\n",
+			page_size, mmu_prop->page_size >> 10);
 
 		return -EFAULT;
 	}
-- 
2.17.1