[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1605686780-17886-7-git-send-email-wendy.liang@xilinx.com>
Date: Wed, 18 Nov 2020 00:06:17 -0800
From: Wendy Liang <wendy.liang@...inx.com>
To: <robh+dt@...nel.org>, <michal.simek@...inx.com>, <arnd@...db.de>,
<gregkh@...uxfoundation.org>, <sumit.semwal@...aro.org>,
<christian.koenig@....com>, <derek.kiernan@...inx.com>,
<dragan.cvetic@...inx.com>, <rajan.vaja@...inx.com>,
<tejas.patel@...inx.com>, <manish.narani@...inx.com>,
<ravi.patel@...inx.com>
CC: <devicetree@...r.kernel.org>,
<linux-arm-kernel@...ts.infradead.org>,
<linux-kernel@...r.kernel.org>, <linux-media@...r.kernel.org>,
<dri-devel@...ts.freedesktop.org>,
<linaro-mm-sig@...ts.linaro.org>,
"Wendy Liang" <wendy.liang@...inx.com>
Subject: [PATCH 6/9] misc: xilinx-ai-engine: add request and release tiles
Add request/release and related clock gating functions to AI engine
driver:
* scanning when the partition is being requested to know which tiles
are in use.
* check if a tile is gated or not
* tiles requesting and releasing ioctl so that user application can
enable/disable tiles at runtime.
Signed-off-by: Wendy Liang <wendy.liang@...inx.com>
Reviewed-by: Hyun Kwon <hyun.kwon@...inx.com>
---
drivers/misc/xilinx-ai-engine/Makefile | 1 +
drivers/misc/xilinx-ai-engine/ai-engine-aie.c | 227 ++++++++++++++++++-
drivers/misc/xilinx-ai-engine/ai-engine-clock.c | 244 +++++++++++++++++++++
drivers/misc/xilinx-ai-engine/ai-engine-dev.c | 19 +-
drivers/misc/xilinx-ai-engine/ai-engine-internal.h | 34 +++
drivers/misc/xilinx-ai-engine/ai-engine-part.c | 32 +++
drivers/misc/xilinx-ai-engine/ai-engine-res.c | 51 +++++
include/uapi/linux/xlnx-ai-engine.h | 31 +++
8 files changed, 631 insertions(+), 8 deletions(-)
create mode 100644 drivers/misc/xilinx-ai-engine/ai-engine-clock.c
diff --git a/drivers/misc/xilinx-ai-engine/Makefile b/drivers/misc/xilinx-ai-engine/Makefile
index 1b743fa..2e67b25 100644
--- a/drivers/misc/xilinx-ai-engine/Makefile
+++ b/drivers/misc/xilinx-ai-engine/Makefile
@@ -6,6 +6,7 @@
obj-$(CONFIG_XILINX_AIE) += xilinx-aie.o
xilinx-aie-$(CONFIG_XILINX_AIE) := ai-engine-aie.o \
+ ai-engine-clock.o \
ai-engine-dev.o \
ai-engine-dma.o \
ai-engine-mem.o \
diff --git a/drivers/misc/xilinx-ai-engine/ai-engine-aie.c b/drivers/misc/xilinx-ai-engine/ai-engine-aie.c
index 19c262d..ff721b3 100644
--- a/drivers/misc/xilinx-ai-engine/ai-engine-aie.c
+++ b/drivers/misc/xilinx-ai-engine/ai-engine-aie.c
@@ -41,6 +41,9 @@
#define AIE_SHIMPL_SHIMRST_MASK 0x1U
#define AIE_SHIMPL_COLRST_MASK 0x1U
#define AIE_SHIMPL_CLKCNTR_COLBUF_MASK 0x1U
+#define AIE_SHIMPL_CLKCNTR_NEXTCLK_MASK BIT(1)
+#define AIE_TILE_CLKCNTR_COLBUF_MASK BIT(0)
+#define AIE_TILE_CLKCNTR_NEXTCLK_MASK BIT(1)
/*
* AI engine SHIM reset ID.
@@ -221,10 +224,232 @@ static int aie_reset_shim(struct aie_device *adev, struct aie_range *range)
return 0;
}
+static int aie_init_part_clk_state(struct aie_partition *apart)
+{
+ int ret, num_tiles;
+
+ num_tiles = apart->range.size.col * (apart->range.size.row - 1);
+
+ ret = aie_resource_initialize(&apart->cores_clk_state, num_tiles);
+ if (ret) {
+ dev_err(&apart->dev,
+ "failed to initialize cores clock state resource.\n");
+ return ret;
+ }
+
+ ret = aie_resource_initialize(&apart->tiles_inuse, num_tiles);
+ if (ret) {
+ dev_err(&apart->dev,
+ "failed to initialize tiles in use resource.\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int aie_scan_part_clocks(struct aie_partition *apart)
+{
+ struct aie_device *adev = apart->adev;
+ struct aie_range *range = &apart->range;
+ struct aie_location loc;
+
+ /* Clear the bitmap of cores and memories clock state */
+ aie_resource_put_region(&apart->cores_clk_state, 0,
+ apart->cores_clk_state.total);
+
+ for (loc.col = range->start.col;
+ loc.col < range->start.col + range->size.col;
+ loc.col++) {
+ for (loc.row = range->start.row;
+ loc.row < range->start.row + range->size.row - 1;
+ loc.row++) {
+ void __iomem *va;
+ u32 val, nbitpos;
+
+ /*
+ * Reading registers of the current tile to see the next
+ * tile is clock gated.
+ */
+ nbitpos = loc.col * (range->size.row - 1) + loc.row;
+
+ if (aie_get_tile_type(&loc) != AIE_TILE_TYPE_TILE) {
+ /* Checks shim tile for next core tile */
+ va = adev->base +
+ aie_cal_regoff(adev, loc,
+ AIE_SHIMPL_CLKCNTR_REGOFF);
+ val = ioread32(va);
+
+ /*
+ * check if the clock buffer and the next clock
+ * tile is set, if one of them is not set, the
+ * tiles of the column are clock gated.
+ */
+ if (!(val & AIE_SHIMPL_CLKCNTR_COLBUF_MASK) ||
+ !(val & AIE_SHIMPL_CLKCNTR_NEXTCLK_MASK))
+ break;
+
+ /* Set next tile in the row clock state on */
+ aie_resource_set(&apart->cores_clk_state,
+ nbitpos, 1);
+ continue;
+ }
+
+ /* Checks core tile for next tile */
+ va = adev->base +
+ aie_cal_regoff(adev, loc,
+ AIE_TILE_CORE_CLKCNTR_REGOFF);
+ val = ioread32(va);
+
+ /*
+ * If the next tile is gated, skip the rest of the
+ * column.
+ */
+ if (!(val & AIE_TILE_CLKCNTR_NEXTCLK_MASK))
+ break;
+
+ aie_resource_set(&apart->cores_clk_state, nbitpos, 1);
+ }
+ }
+
+ /*
+ * Set the tiles in use bitmap.
+ * In case of scanning, tiles which are powered on are considered as
+ * tiles in use.
+ */
+ bitmap_copy(apart->tiles_inuse.bitmap, apart->cores_clk_state.bitmap,
+ apart->tiles_inuse.total);
+
+ return 0;
+}
+
+/* aie_set_col_clocks() - set clocks of a range of tiles of a column
+ * @apart: AI engine partition
+ * @range: range of tiles of a column
+ * @enable: true to enable the clock, false to disable
+ * @return: 0 for success, negative value of errors.
+ */
+static int aie_set_col_clocks(struct aie_partition *apart,
+ struct aie_range *range, bool enable)
+{
+ struct aie_location ploc;
+ u32 startbit;
+
+ /*
+ * check if the range is of single column. only single column is allowed.
+ * check if the start row is tile row, only tile rows are allowed.
+ */
+ if (range->size.col != 1 || range->start.row < 1)
+ return -EINVAL;
+
+ ploc.col = range->start.col;
+ for (ploc.row = range->start.row - 1;
+ ploc.row < range->start.row + range->size.row - 1;
+ ploc.row++) {
+ struct aie_device *adev = apart->adev;
+
+ if (!ploc.row) {
+ void __iomem *va;
+ u32 val = 0;
+
+ /*
+ * Configure SHIM clock registers to gate or
+ * ungate next tile.
+ */
+ if (enable)
+ val = AIE_SHIMPL_CLKCNTR_COLBUF_MASK |
+ AIE_SHIMPL_CLKCNTR_NEXTCLK_MASK;
+ va = adev->base +
+ aie_cal_regoff(adev, ploc,
+ AIE_SHIMPL_CLKCNTR_REGOFF);
+ iowrite32(val, va);
+ } else {
+ void __iomem *va;
+ u32 val = 0;
+
+ /*
+ * Configure core tile clock registers to gate
+ * or ungate next tile.
+ */
+ if (enable)
+ val = AIE_TILE_CLKCNTR_COLBUF_MASK |
+ AIE_TILE_CLKCNTR_NEXTCLK_MASK;
+ va = adev->base +
+ aie_cal_regoff(adev, ploc,
+ AIE_TILE_CORE_CLKCNTR_REGOFF);
+ iowrite32(val, va);
+ }
+
+ /* If the tile clock is not on, jump to next column */
+ if (!enable)
+ break;
+ }
+
+ /* Update clock state bitmap */
+ startbit = range->start.col * (apart->range.size.row - 1) +
+ range->start.row - 1;
+ if (enable)
+ aie_resource_set(&apart->cores_clk_state, startbit,
+ range->size.row);
+ else
+ aie_resource_clear(&apart->cores_clk_state, startbit,
+ range->size.row);
+
+ return 0;
+}
+
+static int aie_set_part_clocks(struct aie_partition *apart)
+{
+ struct aie_range *range = &apart->range, lrange;
+ struct aie_location loc;
+
+ /*
+ * The tiles below the highest tile whose clock is on, need to have the
+ * clock on. The first for loop is to scan the clock states bitmap to
+ * see which tiles are required to be clocked on, and update the bitmap
+ * to make sure the tiles below are also required to be clocked on.
+ */
+ for (loc.col = range->start.col;
+ loc.col < range->start.col + range->size.col;
+ loc.col++) {
+ u32 startbit, inuse_toprow = 0, clk_toprow = 0;
+
+ startbit = loc.col * (range->size.row - 1);
+
+ for (loc.row = range->start.row + 1;
+ loc.row < range->start.row + range->size.row;
+ loc.row++) {
+ u32 bit = startbit + loc.row - 1;
+
+ if (aie_resource_testbit(&apart->tiles_inuse, bit))
+ inuse_toprow = loc.row;
+ if (aie_resource_testbit(&apart->cores_clk_state, bit))
+ clk_toprow = loc.row;
+ }
+
+ /* Update clock states of a column */
+ lrange.start.col = loc.col;
+ lrange.size.col = 1;
+ if (inuse_toprow < clk_toprow) {
+ lrange.start.row = inuse_toprow + 1;
+ lrange.size.row = clk_toprow - inuse_toprow;
+ aie_set_col_clocks(apart, &lrange, false);
+ } else if (inuse_toprow > clk_toprow) {
+ lrange.start.row = clk_toprow + 1;
+ lrange.size.row = inuse_toprow - clk_toprow;
+ aie_set_col_clocks(apart, &lrange, true);
+ }
+ }
+
+ return 0;
+}
+
static const struct aie_tile_operations aie_ops = {
.get_tile_type = aie_get_tile_type,
.get_mem_info = aie_get_mem_info,
.reset_shim = aie_reset_shim,
+ .init_part_clk_state = aie_init_part_clk_state,
+ .scan_part_clocks = aie_scan_part_clocks,
+ .set_part_clocks = aie_set_part_clocks,
};
/**
@@ -250,7 +475,7 @@ int aie_device_init(struct aie_device *adev)
adev->kernel_regs = aie_kernel_regs;
adev->col_rst = &aie_col_rst;
adev->col_clkbuf = &aie_col_clkbuf;
- adev->shim_dma = &aiev1_shimdma;
+ adev->shim_dma = &aie_shimdma;
/* Get the columns resource */
/* Get number of columns from AI engine memory resource */
diff --git a/drivers/misc/xilinx-ai-engine/ai-engine-clock.c b/drivers/misc/xilinx-ai-engine/ai-engine-clock.c
new file mode 100644
index 0000000..d490ad5
--- /dev/null
+++ b/drivers/misc/xilinx-ai-engine/ai-engine-clock.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx AI Engine device driver
+ *
+ * Copyright (C) 2020 Xilinx, Inc.
+ */
+
+#include "ai-engine-internal.h"
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+/**
+ * aie_part_get_clk_state_bit() - return bit position of the clock state of a
+ * tile
+ * @apart: AI engine partition
+ * @loc: AI engine tile location
+ * @return: bit position for success, negative value for failure
+ */
+static int aie_part_get_clk_state_bit(struct aie_partition *apart,
+ struct aie_location *loc)
+{
+ if (apart->adev->ops->get_tile_type(loc) != AIE_TILE_TYPE_TILE)
+ return -EINVAL;
+
+ return loc->col * (apart->range.size.row - 1) + loc->row - 1;
+}
+
+/**
+ * aie_part_scan_clk_state() - scan the clock states of tiles of the AI engine
+ * partition
+ * @apart: AI engine partition
+ * @return: 0 for success, negative value for failure.
+ *
+ * This function will scan the clock status of both the memory and core
+ * modules.
+ */
+int aie_part_scan_clk_state(struct aie_partition *apart)
+{
+ return apart->adev->ops->scan_part_clocks(apart);
+}
+
+/**
+ * aie_part_check_clk_enable_loc() - return if clock of a tile is enabled
+ * @apart: AI engine partition
+ * @loc: AI engine tile location
+ * @return: true for enabled, false for disabled
+ */
+bool aie_part_check_clk_enable_loc(struct aie_partition *apart,
+ struct aie_location *loc)
+{
+ int bit;
+
+ if (apart->adev->ops->get_tile_type(loc) != AIE_TILE_TYPE_TILE)
+ return true;
+
+ bit = aie_part_get_clk_state_bit(apart, loc);
+ return aie_resource_testbit(&apart->cores_clk_state, bit);
+}
+
+/**
+ * aie_part_request_tiles() - request tiles from an AI engine partition.
+ * @apart: AI engine partition
+ * @num_tiles: number of tiles to request. If it is 0, it means all tiles
+ * @locs: the AI engine tiles locations array which will be requested
+ * @return: 0 for success, negative value for failure.
+ *
+ * This function will enable clocks of the specified tiles.
+ */
+static int aie_part_request_tiles(struct aie_partition *apart, int num_tiles,
+ struct aie_location *locs)
+{
+ if (num_tiles == 0) {
+ aie_resource_set(&apart->tiles_inuse, 0,
+ apart->tiles_inuse.total);
+ } else {
+ u32 n;
+
+ if (!locs)
+ return -EINVAL;
+
+ for (n = 0; n < num_tiles; n++) {
+ int bit = aie_part_get_clk_state_bit(apart, &locs[n]);
+
+ if (bit >= 0)
+ aie_resource_set(&apart->tiles_inuse, bit, 1);
+ }
+ }
+
+ return apart->adev->ops->set_part_clocks(apart);
+}
+
+/**
+ * aie_part_release_tiles() - release tiles from an AI engine partition.
+ * @apart: AI engine partition
+ * @num_tiles: number of tiles to release. If it is 0, it means all tiles
+ * @locs: the AI engine tiles locations array which will be released
+ * @return: 0 for success, negative value for failure.
+ *
+ * This function will disable clocks of the specified tiles.
+ */
+static int aie_part_release_tiles(struct aie_partition *apart, int num_tiles,
+ struct aie_location *locs)
+{
+ if (num_tiles == 0) {
+ aie_resource_clear(&apart->tiles_inuse, 0,
+ apart->tiles_inuse.total);
+ } else {
+ u32 n;
+
+ if (!locs)
+ return -EINVAL;
+
+ for (n = 0; n < num_tiles; n++) {
+ int bit = aie_part_get_clk_state_bit(apart, &locs[n]);
+
+ if (bit >= 0)
+ aie_resource_clear(&apart->tiles_inuse, bit, 1);
+ }
+ }
+
+ return apart->adev->ops->set_part_clocks(apart);
+}
+
+/**
+ * aie_part_request_tiles_from_user() - request tiles from an AI engine
+ * partition from user
+ * @apart: AI engine partition
+ * @user_args: user AI engine request tiles argument
+ * @return: 0 for success, negative value for failure.
+ *
+ * This function will request tiles from user request.
+ */
+int aie_part_request_tiles_from_user(struct aie_partition *apart,
+ void __user *user_args)
+{
+ struct aie_tiles_array args;
+ struct aie_location *locs = NULL;
+ int ret;
+
+ if (copy_from_user(&args, user_args, sizeof(args)))
+ return -EFAULT;
+
+ if (args.num_tiles) {
+ u32 i;
+
+ locs = kmalloc_array(args.num_tiles, sizeof(*locs),
+ GFP_KERNEL);
+ if (!locs)
+ return -ENOMEM;
+
+ if (copy_from_user(locs, (void __user *)args.locs,
+ args.num_tiles * sizeof(*locs))) {
+ kfree(locs);
+ return -EFAULT;
+ }
+
+ /* update the location to absolute location */
+ for (i = 0; i < args.num_tiles; i++) {
+ if (locs[i].col > apart->range.size.col ||
+ locs[i].row > apart->range.size.row) {
+ dev_err(&apart->dev,
+ "failed to request tiles, invalid tile(%u,%u).\n",
+ locs[i].col, locs[i].row);
+ kfree(locs);
+ return -EINVAL;
+ }
+ locs[i].col += apart->range.start.col;
+ locs[i].row += apart->range.start.row;
+ }
+ }
+
+ ret = mutex_lock_interruptible(&apart->mlock);
+ if (ret) {
+ kfree(locs);
+ return ret;
+ }
+
+ ret = aie_part_request_tiles(apart, args.num_tiles, locs);
+ mutex_unlock(&apart->mlock);
+
+ kfree(locs);
+ return ret;
+}
+
+/**
+ * aie_part_release_tiles_from_user() - release tiles from an AI engine
+ * partition from user
+ * @apart: AI engine partition
+ * @user_args: user AI engine request tiles argument
+ * @return: 0 for success, negative value for failure.
+ *
+ * This function will release tiles from user request.
+ */
+int aie_part_release_tiles_from_user(struct aie_partition *apart,
+ void __user *user_args)
+{
+ struct aie_tiles_array args;
+ struct aie_location *locs = NULL;
+ int ret;
+
+ if (copy_from_user(&args, user_args, sizeof(args)))
+ return -EFAULT;
+
+ if (args.num_tiles) {
+ int i;
+
+ locs = kmalloc_array(args.num_tiles, sizeof(*locs),
+ GFP_KERNEL);
+ if (!locs)
+ return -ENOMEM;
+
+ if (copy_from_user(locs, (void __user *)args.locs,
+ args.num_tiles * sizeof(*locs))) {
+ kfree(locs);
+ return -EFAULT;
+ }
+
+ /* update the location to absolute location */
+ for (i = 0; i < args.num_tiles; i++) {
+ if (locs[i].col > apart->range.size.col ||
+ locs[i].row > apart->range.size.row) {
+ dev_err(&apart->dev,
+ "failed to release tiles, invalid tile(%u,%u).\n",
+ locs[i].col, locs[i].row);
+ kfree(locs);
+ return -EINVAL;
+ }
+ locs[i].col += apart->range.start.col;
+ locs[i].row += apart->range.start.row;
+ }
+ }
+
+ ret = mutex_lock_interruptible(&apart->mlock);
+ if (ret) {
+ kfree(locs);
+ return ret;
+ }
+
+ ret = aie_part_release_tiles(apart, args.num_tiles, locs);
+ mutex_unlock(&apart->mlock);
+
+ kfree(locs);
+ return ret;
+}
diff --git a/drivers/misc/xilinx-ai-engine/ai-engine-dev.c b/drivers/misc/xilinx-ai-engine/ai-engine-dev.c
index 38a1ded..7e69ff4 100644
--- a/drivers/misc/xilinx-ai-engine/ai-engine-dev.c
+++ b/drivers/misc/xilinx-ai-engine/ai-engine-dev.c
@@ -200,17 +200,22 @@ struct aie_partition *aie_request_partition(struct aie_device *adev,
} else {
/*
* TBD:
- * 1. setup NOC AXI MM config to only generate error events
- * for slave error and decode error.
- * 2. scan to see which tiles have been clock gated.
+ * setup NOC AXI MM config to only generate error events
+ * for slave error and decode error.
*
* This needs to be done before the AI engine partition is
* exported for user to access.
*/
- apart->status = XAIE_PART_STATUS_INUSE;
- apart->cntrflag = req->flag;
-
- mutex_unlock(&apart->mlock);
+ /* scan to setup the initial clock state for tiles */
+ ret = aie_part_scan_clk_state(apart);
+ if (ret) {
+ mutex_unlock(&apart->mlock);
+ apart = ERR_PTR(ret);
+ } else {
+ apart->status = XAIE_PART_STATUS_INUSE;
+ apart->cntrflag = req->flag;
+ mutex_unlock(&apart->mlock);
+ }
}
mutex_unlock(&adev->mlock);
diff --git a/drivers/misc/xilinx-ai-engine/ai-engine-internal.h b/drivers/misc/xilinx-ai-engine/ai-engine-internal.h
index bf3a09c..131d22a 100644
--- a/drivers/misc/xilinx-ai-engine/ai-engine-internal.h
+++ b/drivers/misc/xilinx-ai-engine/ai-engine-internal.h
@@ -112,6 +112,22 @@ struct aie_dma_attr {
* @get_tile_type: get type of tile based on tile operation
* @get_mem_info: get different types of memories information
* @reset_shim: reset shim, it will assert and then release SHIM reset
+ * @init_part_clk_state: initialize clock states software structure which is a
+ * bitmap for the AI engine partition. The clock states
+ * structure is the structure used to keep track of if
+ * the modules in the AI engine partition are gated.
+ * @scan_part_clocks: scan partition modules to check whether the modules are
+ * clock gated or not, and update the soft clock states
+ * structure. It is required to be called when the partition
+ * is requested so that the driver knows which modules are
+ * clock gated when the partition is requested. This function
+ * expects the caller to apply partition lock before calling
+ * this function.
+ * @set_part_clocks: set partition modules clocks gate registers based on the
+ * partition clock states bitmap. This function expects the
+ * caller to apply partition lock before calling this
+ * function. The caller function will need to set the bitmap
+ * on which tiles are required to be clocked on.
*
* Different AI engine device version has its own device
* operation.
@@ -121,6 +137,9 @@ struct aie_tile_operations {
unsigned int (*get_mem_info)(struct aie_range *range,
struct aie_part_mem *pmem);
int (*reset_shim)(struct aie_device *adev, struct aie_range *range);
+ int (*init_part_clk_state)(struct aie_partition *apart);
+ int (*scan_part_clocks)(struct aie_partition *apart);
+ int (*set_part_clocks)(struct aie_partition *apart);
};
/**
@@ -185,6 +204,8 @@ struct aie_device {
* @range: range of partition
* @mlock: protection for AI engine partition operations
* @dev: device for the AI engine partition
+ * @cores_clk_state: bitmap to indicate the power state of core modules
+ * @tiles_inuse: bitmap to indicate if a tile is in use
* @partition_id: partition id. Partition ID is the identifier
* of the AI engine partition in the system.
* @status: indicate if the partition is in use
@@ -199,6 +220,8 @@ struct aie_partition {
struct aie_range range;
struct mutex mlock; /* protection for AI engine partition operations */
struct device dev;
+ struct aie_resource cores_clk_state;
+ struct aie_resource tiles_inuse;
u32 partition_id;
u32 status;
u32 cntrflag;
@@ -308,6 +331,9 @@ int aie_resource_check_region(struct aie_resource *res, u32 start,
int aie_resource_get_region(struct aie_resource *res, u32 start,
u32 count);
void aie_resource_put_region(struct aie_resource *res, int start, u32 count);
+int aie_resource_set(struct aie_resource *res, u32 start, u32 count);
+int aie_resource_clear(struct aie_resource *res, u32 start, u32 count);
+bool aie_resource_testbit(struct aie_resource *res, u32 bit);
const struct file_operations *aie_part_get_fops(void);
u8 aie_part_in_use(struct aie_partition *apart);
@@ -331,5 +357,13 @@ long aie_part_set_dmabuf_bd(struct aie_partition *apart,
void __user *user_args);
void aie_part_release_dmabufs(struct aie_partition *apart);
+int aie_part_scan_clk_state(struct aie_partition *apart);
+bool aie_part_check_clk_enable_loc(struct aie_partition *apart,
+ struct aie_location *loc);
+int aie_part_request_tiles_from_user(struct aie_partition *apart,
+ void __user *user_args);
+int aie_part_release_tiles_from_user(struct aie_partition *apart,
+ void __user *user_args);
+
int aie_device_init(struct aie_device *adev);
#endif /* AIE_INTERNAL_H */
diff --git a/drivers/misc/xilinx-ai-engine/ai-engine-part.c b/drivers/misc/xilinx-ai-engine/ai-engine-part.c
index dcfb9ec..54450b6 100644
--- a/drivers/misc/xilinx-ai-engine/ai-engine-part.c
+++ b/drivers/misc/xilinx-ai-engine/ai-engine-part.c
@@ -94,6 +94,27 @@ static int aie_part_reg_validation(struct aie_partition *apart, size_t offset,
return -EINVAL;
}
+ /*
+ * We check if a tile is gated before trying to access the tile.
+ * As we mmap() the registers as read only to enable faster status
+ * enquiry, and mmap() memories as write/read to faster memory access,
+ * user can still access the clock gated tiles from userspace by
+ * accessing the mmapped space.
+ * Accessing the gated tiles can cause decode error. With PDI flow,
+ * the PDI sets up the SHIM NOC AXI MM to only generate AI engine error
+ * even instead of generating the NSU error. but for non PDI flow, as
+ * the AXI MM register are protected register, until we have EEMI API
+ * to update the AXI MM register, access the gated tiles can cause NSU
+ * errors.
+ * TODO: To solve this, we need to either request EEMI to configure
+ * AXI MM or split the mmapped space into tiles based lists.
+ */
+ if (!aie_part_check_clk_enable_loc(apart, &loc)) {
+ dev_err(&apart->dev,
+ "Tile(%u,%d) is gated.\n", loc.col, loc.row);
+ return -EINVAL;
+ }
+
if (!is_write)
return 0;
@@ -304,6 +325,10 @@ static long aie_part_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
return aie_part_detach_dmabuf_req(apart, argp);
case AIE_SET_SHIMDMA_DMABUF_BD_IOCTL:
return aie_part_set_dmabuf_bd(apart, argp);
+ case AIE_REQUEST_TILES_IOCTL:
+ return aie_part_request_tiles_from_user(apart, argp);
+ case AIE_RELEASE_TILES_IOCTL:
+ return aie_part_release_tiles_from_user(apart, argp);
default:
dev_err(&apart->dev, "Invalid ioctl command %u.\n", cmd);
ret = -EINVAL;
@@ -343,6 +368,7 @@ static void aie_part_release_device(struct device *dev)
apart->range.size.col);
list_del(&apart->node);
mutex_unlock(&adev->mlock);
+ aie_resource_uninitialize(&apart->cores_clk_state);
put_device(apart->dev.parent);
}
@@ -466,6 +492,12 @@ static struct aie_partition *aie_create_partition(struct aie_device *adev,
return ERR_PTR(ret);
}
+ ret = adev->ops->init_part_clk_state(apart);
+ if (ret) {
+ put_device(dev);
+ return ERR_PTR(ret);
+ }
+
ret = mutex_lock_interruptible(&adev->mlock);
if (ret) {
put_device(dev);
diff --git a/drivers/misc/xilinx-ai-engine/ai-engine-res.c b/drivers/misc/xilinx-ai-engine/ai-engine-res.c
index 36f08bf..b0c0741 100644
--- a/drivers/misc/xilinx-ai-engine/ai-engine-res.c
+++ b/drivers/misc/xilinx-ai-engine/ai-engine-res.c
@@ -112,3 +112,54 @@ void aie_resource_put_region(struct aie_resource *res, int start, u32 count)
return;
bitmap_clear(res->bitmap, start, count);
}
+
+/**
+ * aie_resource_set() - set the AI engine resource bits
+ * @res: pointer to AI engine resource
+ * @start: start bit to set
+ * @count: number of bits to set
+ * @return: 0 for success and negative value for failure
+ *
+ * This function sets the specified number bits in the resource.
+ */
+int aie_resource_set(struct aie_resource *res, u32 start, u32 count)
+{
+ if (!res || !res->bitmap || !count || start + count > res->total)
+ return -EINVAL;
+
+ bitmap_set(res->bitmap, start, count);
+ return 0;
+}
+
+/**
+ * aie_resource_clear() - clear the AI engine resource bits
+ * @res: pointer to AI engine resource
+ * @start: start bit to set
+ * @count: number of bits to clear
+ * @return: 0 for success and negative value for failure
+ *
+ * This function clears the specified number bits in the resource.
+ */
+int aie_resource_clear(struct aie_resource *res, u32 start, u32 count)
+{
+ if (!res || !res->bitmap || !count || start + count > res->total)
+ return -EINVAL;
+
+ bitmap_clear(res->bitmap, start, count);
+ return 0;
+}
+
+/**
+ * aie_resource_testbit() - test if a bit is set in a AI engine resource
+ * @res: pointer to AI engine resource
+ * @bit: bit to check
+ * @return: true for set, false for not set
+ */
+bool aie_resource_testbit(struct aie_resource *res, u32 bit)
+{
+ if (!res || !res->bitmap || bit >= res->total)
+ return false;
+
+ /* Locate the unsigned long the required bit belongs to */
+ return test_bit(bit, res->bitmap);
+}
diff --git a/include/uapi/linux/xlnx-ai-engine.h b/include/uapi/linux/xlnx-ai-engine.h
index 9080f57..5db5e31f 100644
--- a/include/uapi/linux/xlnx-ai-engine.h
+++ b/include/uapi/linux/xlnx-ai-engine.h
@@ -144,6 +144,16 @@ struct aie_dmabuf_bd_args {
__u32 bd_id;
};
+/**
+ * struct aie_tiles_array - AIE tiles array
+ * @locs: tiles locations array
+ * @num_tiles: number of tiles in the tiles locations array
+ */
+struct aie_tiles_array {
+ struct aie_location *locs;
+ __u32 num_tiles;
+};
+
#define AIE_IOCTL_BASE 'A'
/* AI engine device IOCTL operations */
@@ -202,4 +212,25 @@ struct aie_dmabuf_bd_args {
#define AIE_SET_SHIMDMA_DMABUF_BD_IOCTL _IOW(AIE_IOCTL_BASE, 0x10, \
struct aie_dmabuf_bd_args)
+/**
+ * DOC: AIE_REQUEST_TILES_IOCTL - request AI engine tiles
+ *
+ * This ioctl is used to request tiles.
+ * When requested the AI engine partition, the kernel driver will scan the
+ * partition to track which tiles are enabled or not. After that, if user
+ * want to request for more tiles, it will use this ioctl to request more
+ * tiles.
+ * If the aie_tiles_array is empty, it means it will request for all tiles
+ * in the partition.
+ */
+#define AIE_REQUEST_TILES_IOCTL _IOW(AIE_IOCTL_BASE, 0xe, \
+ struct aie_tiles_array)
+
+/**
+ * DOC: AIE_RELEASE_TILES_IOCTL - release AI engine tiles
+ *
+ * This ioctl is used to release tiles
+ */
+#define AIE_RELEASE_TILES_IOCTL _IOW(AIE_IOCTL_BASE, 0xf, \
+ struct aie_tiles_array)
#endif
--
2.7.4
Powered by blists - more mailing lists