lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20211215103609.9268-2-dejia.shang@armchina.com>
Date:   Wed, 15 Dec 2021 18:36:06 +0800
From:   Dejia Shang <dejia.shang@...china.com>
To:     gregkh@...uxfoundation.org, robh+dt@...nel.org,
        linux-kernel@...r.kernel.org, linux-arm-kernel@...ts.infradead.org,
        devicetree@...r.kernel.org
Cc:     dejia.shang@...china.com, toby.shang@...china.com
Subject: [PATCH 1/4] misc: add ArmChina Zhouyi NPU driver

This kernel driver provides basic supports to Zhouyi Z1 and Z2 NPU IP.
It works with the Zhouyi user mode driver together to schedule AI inference
tasks on Zhouyi NPUs.

Vendors of the SoC chips with Zhouyi NPU inside should implement
their SoC level operations and replace the aipu_soc_default.c,
if their chips have specific clock or other settings.

Signed-off-by: Dejia Shang <dejia.shang@...china.com>
---
 drivers/misc/Kconfig                          |   1 +
 drivers/misc/Makefile                         |   1 +
 drivers/misc/armchina-npu/Kconfig             |  15 +
 drivers/misc/armchina-npu/Makefile            |  11 +
 drivers/misc/armchina-npu/aipu.c              | 312 ++++++++
 drivers/misc/armchina-npu/aipu_core.c         | 418 ++++++++++
 drivers/misc/armchina-npu/aipu_core.h         | 100 +++
 drivers/misc/armchina-npu/aipu_io.c           |  74 ++
 drivers/misc/armchina-npu/aipu_io.h           |  27 +
 drivers/misc/armchina-npu/aipu_irq.c          | 113 +++
 drivers/misc/armchina-npu/aipu_irq.h          |  36 +
 drivers/misc/armchina-npu/aipu_job_manager.c  | 689 ++++++++++++++++
 drivers/misc/armchina-npu/aipu_job_manager.h  | 110 +++
 drivers/misc/armchina-npu/aipu_mm.c           | 740 ++++++++++++++++++
 drivers/misc/armchina-npu/aipu_mm.h           | 127 +++
 drivers/misc/armchina-npu/aipu_priv.c         | 280 +++++++
 drivers/misc/armchina-npu/aipu_priv.h         |  58 ++
 drivers/misc/armchina-npu/aipu_soc_default.c  |  82 ++
 drivers/misc/armchina-npu/config.h            |  12 +
 .../armchina-npu/include/armchina_aipu_soc.h  |  52 ++
 drivers/misc/armchina-npu/zhouyi/Makefile     |   4 +
 drivers/misc/armchina-npu/zhouyi/z1.c         | 244 ++++++
 drivers/misc/armchina-npu/zhouyi/z1.h         |  33 +
 drivers/misc/armchina-npu/zhouyi/z2.c         | 311 ++++++++
 drivers/misc/armchina-npu/zhouyi/z2.h         |  47 ++
 drivers/misc/armchina-npu/zhouyi/zhouyi.c     | 142 ++++
 drivers/misc/armchina-npu/zhouyi/zhouyi.h     |  73 ++
 include/uapi/misc/armchina_aipu.h             | 335 ++++++++
 28 files changed, 4447 insertions(+)
 create mode 100644 drivers/misc/armchina-npu/Kconfig
 create mode 100644 drivers/misc/armchina-npu/Makefile
 create mode 100644 drivers/misc/armchina-npu/aipu.c
 create mode 100644 drivers/misc/armchina-npu/aipu_core.c
 create mode 100644 drivers/misc/armchina-npu/aipu_core.h
 create mode 100644 drivers/misc/armchina-npu/aipu_io.c
 create mode 100644 drivers/misc/armchina-npu/aipu_io.h
 create mode 100644 drivers/misc/armchina-npu/aipu_irq.c
 create mode 100644 drivers/misc/armchina-npu/aipu_irq.h
 create mode 100644 drivers/misc/armchina-npu/aipu_job_manager.c
 create mode 100644 drivers/misc/armchina-npu/aipu_job_manager.h
 create mode 100644 drivers/misc/armchina-npu/aipu_mm.c
 create mode 100644 drivers/misc/armchina-npu/aipu_mm.h
 create mode 100644 drivers/misc/armchina-npu/aipu_priv.c
 create mode 100644 drivers/misc/armchina-npu/aipu_priv.h
 create mode 100644 drivers/misc/armchina-npu/aipu_soc_default.c
 create mode 100644 drivers/misc/armchina-npu/config.h
 create mode 100644 drivers/misc/armchina-npu/include/armchina_aipu_soc.h
 create mode 100644 drivers/misc/armchina-npu/zhouyi/Makefile
 create mode 100644 drivers/misc/armchina-npu/zhouyi/z1.c
 create mode 100644 drivers/misc/armchina-npu/zhouyi/z1.h
 create mode 100644 drivers/misc/armchina-npu/zhouyi/z2.c
 create mode 100644 drivers/misc/armchina-npu/zhouyi/z2.h
 create mode 100644 drivers/misc/armchina-npu/zhouyi/zhouyi.c
 create mode 100644 drivers/misc/armchina-npu/zhouyi/zhouyi.h
 create mode 100644 include/uapi/misc/armchina_aipu.h

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 0f5a49fc7c9e..a27fec2891d6 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -487,4 +487,5 @@ source "drivers/misc/cardreader/Kconfig"
 source "drivers/misc/habanalabs/Kconfig"
 source "drivers/misc/uacce/Kconfig"
 source "drivers/misc/pvpanic/Kconfig"
+source "drivers/misc/armchina-npu/Kconfig"
 endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index a086197af544..cc18c3eaefc2 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -59,3 +59,4 @@ obj-$(CONFIG_UACCE)           += uacce/
 obj-$(CONFIG_XILINX_SDFEC)     += xilinx_sdfec.o
 obj-$(CONFIG_HISI_HIKEY_USB)   += hisi_hikey_usb.o
 obj-$(CONFIG_HI6421V600_IRQ)   += hi6421v600-irq.o
+obj-$(CONFIG_ARMCHINA_NPU)     += armchina-npu/
diff --git a/drivers/misc/armchina-npu/Kconfig b/drivers/misc/armchina-npu/Kconfig
new file mode 100644
index 000000000000..9c1b67d88d66
--- /dev/null
+++ b/drivers/misc/armchina-npu/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# ArmChina NPU driver
+#
+
+config ARMCHINA_NPU
+       tristate "ArmChina NPU"
+       help
+         Enables driver for ArmChina's AI Accelerator which is designed for
+         the acceleration of AI inference tasks on Arm SoC.
+
+         The driver manages the NPU char device and provides IOCTL interface for
+         the user to submit workloads to the NPU cores.
+
+         If unsure, say N.
diff --git a/drivers/misc/armchina-npu/Makefile b/drivers/misc/armchina-npu/Makefile
new file mode 100644
index 000000000000..935ead214a27
--- /dev/null
+++ b/drivers/misc/armchina-npu/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+subdir-ccflags-y += -I$(src)/include
+subdir-ccflags-y += -I$(src)/zhouyi
+
+obj-$(CONFIG_ARMCHINA_NPU)  += armchina_npu.o
+armchina_npu-y       := aipu.o aipu_core.o aipu_io.o aipu_irq.o  \
+                       aipu_job_manager.o aipu_mm.o aipu_priv.o \
+                       aipu_soc_default.o
+
+include $(src)/zhouyi/Makefile
+armchina_npu-y       += $(ZHOUYI_FILES)
diff --git a/drivers/misc/armchina-npu/aipu.c b/drivers/misc/armchina-npu/aipu.c
new file mode 100644
index 000000000000..6add10b00b98
--- /dev/null
+++ b/drivers/misc/armchina-npu/aipu.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/poll.h>
+#include <linux/compat.h>
+#include <uapi/misc/armchina_aipu.h>
+#include "include/armchina_aipu_soc.h"
+#include "aipu_mm.h"
+#include "aipu_job_manager.h"
+#include "aipu_priv.h"
+#include "zhouyi.h"
+#include "config.h"
+
+static struct aipu_priv *aipu;
+
+static int aipu_open(struct inode *inode, struct file *filp)
+{
+       filp->private_data = aipu;
+       return aipu_priv_check_status(aipu);
+}
+
+static int aipu_release(struct inode *inode, struct file *filp)
+{
+       int ret = 0;
+       struct aipu_priv *aipu = filp->private_data;
+
+       ret = aipu_job_manager_cancel_jobs(&aipu->job_manager, filp);
+       if (ret)
+               return ret;
+
+       aipu_mm_free_buffers(&aipu->mm, filp);
+       return 0;
+}
+
+static long aipu_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+       int ret = 0;
+       struct aipu_priv *aipu = filp->private_data;
+
+       u32 core_cnt = 0;
+       struct aipu_core_cap *core_cap = NULL;
+       struct aipu_cap cap;
+       struct aipu_buf_request buf_req;
+       struct aipu_job_desc user_job;
+       struct aipu_buf_desc desc;
+       struct aipu_io_req io_req;
+       struct aipu_job_status_query status;
+       u32 job_id;
+
+       switch (cmd) {
+       case AIPU_IOCTL_QUERY_CAP:
+               ret = aipu_priv_query_capability(aipu, &cap);
+               if (!ret && copy_to_user((struct aipu_cap __user *)arg, &cap, sizeof(cap)))
+                       ret = -EINVAL;
+               break;
+       case AIPU_IOCTL_QUERY_CORE_CAP:
+               core_cnt = aipu_priv_get_core_cnt(aipu);
+               core_cap = kcalloc(core_cnt, sizeof(*core_cap), GFP_KERNEL);
+               if (core_cap) {
+                       ret = aipu_priv_query_core_capability(aipu, core_cap);
+                       if (!ret &&
+                           copy_to_user((struct aipu_core_cap __user *)arg, core_cap,
+                                        core_cnt * sizeof(*core_cap)))
+                               ret = -EINVAL;
+                       kfree(core_cap);
+                       core_cap = NULL;
+               } else {
+                       ret = -ENOMEM;
+               }
+               break;
+       case AIPU_IOCTL_REQ_BUF:
+               if (!copy_from_user(&buf_req, (struct aipu_buf_request __user *)arg,
+                                   sizeof(buf_req))) {
+                       ret = aipu_mm_alloc(&aipu->mm, &buf_req, filp);
+                       if (!ret &&
+                           copy_to_user((struct aipu_buf_request __user *)arg, &buf_req,
+                                        sizeof(buf_req)))
+                               ret = -EINVAL;
+               } else {
+                       ret = -EINVAL;
+               }
+               break;
+       case AIPU_IOCTL_FREE_BUF:
+               if (!copy_from_user(&desc, (struct buf_desc __user *)arg, sizeof(desc)))
+                       ret = aipu_mm_free(&aipu->mm, &desc, filp);
+               else
+                       ret = -EINVAL;
+               break;
+       case AIPU_IOCTL_DISABLE_SRAM:
+               ret = aipu_mm_disable_sram_allocation(&aipu->mm, filp);
+               break;
+       case AIPU_IOCTL_ENABLE_SRAM:
+               ret = aipu_mm_enable_sram_allocation(&aipu->mm, filp);
+               break;
+       case AIPU_IOCTL_SCHEDULE_JOB:
+               if (!copy_from_user(&user_job, (struct user_job_desc __user *)arg,
+                                   sizeof(user_job)))
+                       ret = aipu_job_manager_scheduler(&aipu->job_manager, &user_job, filp);
+               else
+                       ret = -EINVAL;
+               break;
+       case AIPU_IOCTL_QUERY_STATUS:
+               if (!copy_from_user(&status, (struct job_status_query __user *)arg,
+                                   sizeof(status))) {
+                       ret = aipu_job_manager_get_job_status(&aipu->job_manager, &status, filp);
+                       if (!ret &&
+                           copy_to_user((struct job_status_query __user *)arg, &status,
+                                        sizeof(status)))
+                               ret = -EINVAL;
+               }
+               break;
+       case AIPU_IOCTL_KILL_TIMEOUT_JOB:
+               if (!copy_from_user(&job_id, (u32 __user *)arg, sizeof(job_id)))
+                       ret = aipu_job_manager_invalidate_timeout_job(&aipu->job_manager, job_id);
+               else
+                       ret = -EINVAL;
+               break;
+       case AIPU_IOCTL_REQ_IO:
+               if (!copy_from_user(&io_req, (struct aipu_io_req __user *)arg, sizeof(io_req))) {
+                       ret = aipu_priv_io_rw(aipu, &io_req);
+                       if (!ret &&
+                           copy_to_user((struct aipu_io_req __user *)arg, &io_req,
+                                        sizeof(io_req)))
+                               ret = -EINVAL;
+               } else {
+                       ret = -EINVAL;
+               }
+               break;
+       default:
+               ret = -ENOTTY;
+               break;
+       }
+
+       return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long aipu_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+       arg = (unsigned long)compat_ptr(arg);
+
+       return aipu_ioctl(filp, cmd, arg);
+}
+#endif
+
+static int aipu_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+       struct aipu_priv *aipu = filp->private_data;
+
+       return aipu_mm_mmap_buf(&aipu->mm, vma, filp);
+}
+
+static unsigned int aipu_poll(struct file *filp, struct poll_table_struct *wait)
+{
+       unsigned int mask = 0;
+       struct aipu_priv *aipu = filp->private_data;
+
+       if (aipu_job_manager_has_end_job(&aipu->job_manager, filp, wait, task_pid_nr(current)))
+               mask |= POLLIN | POLLRDNORM;
+
+       return mask;
+}
+
+static const struct file_operations aipu_fops = {
+       .owner = THIS_MODULE,
+       .open = aipu_open,
+       .poll = aipu_poll,
+       .unlocked_ioctl = aipu_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl = aipu_compat_ioctl,
+#endif
+       .mmap = aipu_mmap,
+       .release = aipu_release,
+};
+
+/**
+ * @armchina_aipu_probe() - probe operation for platfom driver provided by ArmChina
+ * @p_dev: pointer to the AIPU platform device struct.
+ * @soc:   pointer to aipu SoC struct contains soc specific data.
+ *         this argument can be NULL if SoC vendor has no AIPU related soc data structure.
+ * @ops:   pointer to aipu SoC operation struct.
+ *         this argument can be NULL if SoC vendor does not provide any soc operation. In
+ *         this case the SoC related operations in AIPU driver are unavailable.
+ *
+ * This function should be called in a SoC vendor provided xx_aipu_probe() function,
+ * which is registered to the platfom_driver struct; if no such xx_aipu_probe() is provided,
+ * SoC vendor should directly register this function to the platfom_driver struct.
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int armchina_aipu_probe(struct platform_device *p_dev, struct aipu_soc *soc,
+                       struct aipu_soc_operations *ops)
+{
+       int ret = 0;
+       struct device *dev = &p_dev->dev;
+       struct aipu_core *core = NULL;
+       int version = 0;
+       int config = 0;
+       int id = 0;
+
+       /* create & init aipu priv struct shared by all cores */
+       if (!aipu) {
+               aipu = devm_kzalloc(dev, sizeof(*aipu), GFP_KERNEL);
+               if (!aipu)
+                       return -ENOMEM;
+
+               dev_info(dev, "AIPU KMD probe start...\n");
+               ret = init_aipu_priv(aipu, p_dev, &aipu_fops, soc, ops);
+               if (ret)
+                       return ret;
+       }
+
+       zhouyi_detect_aipu_version(p_dev, &version, &config);
+       of_property_read_u32(dev->of_node, "core-id", &id);
+       if (version == AIPU_ISA_VERSION_ZHOUYI_V1 ||
+           version == AIPU_ISA_VERSION_ZHOUYI_V2 ||
+           version == AIPU_ISA_VERSION_ZHOUYI_V3) {
+               dev_info(dev, "AIPU core #%d detected: zhouyi-v%d-%04d\n", id, version, config);
+       } else {
+               dev_err(dev, "unsupported AIPU core detected (id %d, version 0x%x)\n",
+                       id, version);
+               ret = -EINVAL;
+               goto out_clean;
+       }
+
+       core = devm_kzalloc(dev, sizeof(*core), GFP_KERNEL);
+       if (!core) {
+               ret = -ENOMEM;
+               goto out_clean;
+       }
+
+       ret = aipu_priv_add_core(aipu, core, version, id, p_dev);
+       if (ret)
+               goto out_clean;
+
+       dev_info(dev, "initialize AIPU core #%d done\n", id);
+       platform_set_drvdata(p_dev, core);
+       goto finish;
+
+out_clean:
+       armchina_aipu_remove(p_dev);
+
+finish:
+       return ret;
+}
+EXPORT_SYMBOL(armchina_aipu_probe);
+
+/**
+ * @armchina_aipu_remove() - remove operation for platfom driver provided by ArmChina
+ * @p_dev: pointer to the AIPU platform device struct
+ *
+ * This function should be called in a SoC vendor provided xx_aipu_remove() function,
+ * which is registered to the platfom_driver struct; if no such xx_aipu_remove() is provided,
+ * this function should be directly registered to the platfom_driver struct.
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int armchina_aipu_remove(struct platform_device *p_dev)
+{
+       if (!aipu || !aipu->is_init)
+               return 0;
+       return deinit_aipu_priv(aipu);
+}
+EXPORT_SYMBOL(armchina_aipu_remove);
+
+/**
+ * @armchina_aipu_suspend() - suspend operation for platfom driver provided by ArmChina
+ * @p_dev: pointer to the AIPU platform device struct.
+ * @state: power state device is entering.
+ *
+ * This function disables the AIPU clock from SoC level with the disable_clk method
+ * registered in SoC probing. SoC vendor can directly register this function to the
+ * platfom_driver struct or implements its private xx_aipu_suspend() as a replacement.
+ */
+int armchina_aipu_suspend(struct platform_device *p_dev, pm_message_t state)
+{
+       struct aipu_core *core = platform_get_drvdata(p_dev);
+
+       if (aipu && aipu->soc_ops && aipu->soc_ops->disable_clk)
+               aipu->soc_ops->disable_clk(core->dev, aipu->soc);
+       return 0;
+}
+EXPORT_SYMBOL(armchina_aipu_suspend);
+
+/**
+ * @armchina_aipu_resume() - resume operation for platfom driver provided by ArmChina
+ * @p_dev: pointer to the AIPU platform device struct.
+ *
+ * This function enables the AIPU clock from SoC level with the enable_clk method
+ * registered in SoC probing, and enable AIPU interrupts. SoC vendor should directly
+ * register this function to the platfom_driver struct, or implements its private
+ * xx_aipu_resume() calls this function.
+ */
+int armchina_aipu_resume(struct platform_device *p_dev)
+{
+       struct aipu_core *core = platform_get_drvdata(p_dev);
+
+       if (aipu && aipu->soc_ops && aipu->soc_ops->enable_clk)
+               aipu->soc_ops->enable_clk(core->dev, aipu->soc);
+
+       core->ops->enable_interrupt(core);
+       return 0;
+}
+EXPORT_SYMBOL(armchina_aipu_resume);
diff --git a/drivers/misc/armchina-npu/aipu_core.c b/drivers/misc/armchina-npu/aipu_core.c
new file mode 100644
index 000000000000..3ed989801f7b
--- /dev/null
+++ b/drivers/misc/armchina-npu/aipu_core.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#include <linux/slab.h>
+#include <linux/string.h>
+#include "aipu_priv.h"
+#include "config.h"
+#include "z1.h"
+#include "z2.h"
+
+#define MAX_CHAR_SYSFS 4096
+
+inline struct aipu_soc *get_soc(struct aipu_core *core)
+{
+       if (core && core->priv)
+               return core->priv->soc;
+       return NULL;
+}
+
+inline struct aipu_soc_operations *get_soc_ops(struct aipu_core *core)
+{
+       if (core && core->priv)
+               return core->priv->soc_ops;
+       return NULL;
+}
+
+inline struct aipu_job_manager *get_job_manager(struct aipu_core *core)
+{
+       if (core && core->priv)
+               return &core->priv->job_manager;
+       return NULL;
+}
+
+#ifdef CONFIG_SYSFS
+static ssize_t aipu_core_ext_register_sysfs_show(struct device *dev,
+                                                struct device_attribute *attr,
+                                                char *buf)
+{
+       int ret = 0;
+       char tmp[512];
+       struct platform_device *p_dev = container_of(dev, struct platform_device, dev);
+       struct aipu_core *core = platform_get_drvdata(p_dev);
+
+       if (unlikely(!core))
+               return 0;
+
+       if (get_soc_ops(core) &&
+           get_soc_ops(core)->is_clk_enabled &&
+           !get_soc_ops(core)->is_clk_enabled(dev, get_soc(core))) {
+               return snprintf(buf, MAX_CHAR_SYSFS,
+                   "AIPU is suspended and external registers cannot be read!\n");
+       }
+
+       ret += snprintf(tmp, 1024, "----------------------------------------\n");
+       strcat(buf, tmp);
+       ret += snprintf(tmp, 1024, "   AIPU Core%d External Register Values\n", core->id);
+       strcat(buf, tmp);
+       ret += snprintf(tmp, 1024, "----------------------------------------\n");
+       strcat(buf, tmp);
+       ret += snprintf(tmp, 1024, "%-*s%-*s%-*s\n", 8, "Offset", 22, "Name", 10, "Value");
+       strcat(buf, tmp);
+       ret += snprintf(tmp, 1024, "----------------------------------------\n");
+       strcat(buf, tmp);
+       ret += core->ops->sysfs_show(core, buf);
+       ret += snprintf(tmp, 1024, "----------------------------------------\n");
+       strcat(buf, tmp);
+
+       return ret;
+}
+
+static ssize_t aipu_core_ext_register_sysfs_store(struct device *dev,
+                                                 struct device_attribute *attr,
+                                                 const char *buf, size_t count)
+{
+       int i = 0;
+       int ret = 0;
+       char *token = NULL;
+       char *buf_dup = NULL;
+       int value[3] = { 0 };
+       struct aipu_io_req io_req;
+       struct platform_device *p_dev = container_of(dev, struct platform_device, dev);
+       struct aipu_core *core = platform_get_drvdata(p_dev);
+
+       if (get_soc_ops(core) &&
+           get_soc_ops(core)->is_clk_enabled &&
+           !get_soc_ops(core)->is_clk_enabled(dev, get_soc(core)))
+               return 0;
+
+       buf_dup = kzalloc(1024, GFP_KERNEL);
+       if (!buf_dup)
+               return -ENOMEM;
+       snprintf(buf_dup, 1024, buf);
+
+       for (i = 0; i < 3; i++) {
+               token = strsep(&buf_dup, "-");
+               if (!token) {
+                       dev_err(dev, "[SYSFS] please echo as this format: <reg_offset>-<write time>-<write value>");
+                       goto out_free_buffer;
+               }
+
+               dev_dbg(dev, "[SYSFS] to convert str: %s", token);
+
+               ret = kstrtouint(token, 0, &value[i]);
+               if (ret) {
+                       dev_err(dev, "[SYSFS] convert str to int failed (%d): %s", ret, token);
+                       goto out_free_buffer;
+               }
+       }
+
+       dev_dbg(dev, "[SYSFS] offset 0x%x, time 0x%x, value 0x%x",
+               value[0], value[1], value[2]);
+
+       io_req.rw = AIPU_IO_WRITE;
+       io_req.offset = value[0];
+       io_req.value = value[2];
+       for (i = 0; i < value[1]; i++) {
+               dev_dbg(dev, "[SYSFS] writing register 0x%x with value 0x%x", value[0], value[2]);
+               core->ops->io_rw(core, &io_req);
+       }
+
+out_free_buffer:
+       kfree(buf_dup);
+       return count;
+}
+
+static ssize_t aipu_core_clock_sysfs_show(struct device *dev,
+                                         struct device_attribute *attr,
+                                         char *buf)
+{
+       struct platform_device *p_dev = container_of(dev, struct platform_device, dev);
+       struct aipu_core *core = platform_get_drvdata(p_dev);
+
+       /*
+        * If SoC level provides no clock operations,
+        * the state of AIPU is by default treated as normal.
+        */
+       if (get_soc_ops(core) &&
+           get_soc_ops(core)->is_clk_enabled &&
+           !get_soc_ops(core)->is_clk_enabled(dev, get_soc(core)))
+               return snprintf(buf, MAX_CHAR_SYSFS,
+                               "AIPU is in clock gating state and suspended.\n");
+       else
+               return snprintf(buf, MAX_CHAR_SYSFS, "AIPU is in normal working state.\n");
+}
+
+static ssize_t aipu_core_clock_sysfs_store(struct device *dev,
+                                          struct device_attribute *attr,
+                                          const char *buf, size_t count)
+{
+       int do_suspend = 0;
+       int do_resume = 0;
+       struct platform_device *p_dev = container_of(dev, struct platform_device, dev);
+       struct aipu_core *core = platform_get_drvdata(p_dev);
+
+       if (unlikely(!core))
+               return count;
+
+       if (!get_soc_ops(core) ||
+           !get_soc_ops(core)->enable_clk ||
+           !get_soc_ops(core)->disable_clk ||
+           !get_soc_ops(core)->is_clk_enabled) {
+               dev_info(dev, "operation is not supported.\n");
+               return count;
+       }
+
+       if ((strncmp(buf, "1", 1) == 0))
+               do_suspend = 1;
+       else if ((strncmp(buf, "0", 1) == 0))
+               do_resume = 1;
+
+       if (get_soc_ops(core)->is_clk_enabled(dev, get_soc(core)) &&
+           core->ops->is_idle(core) && do_suspend) {
+               dev_info(dev, "disable clock\n");
+               get_soc_ops(core)->disable_clk(core->dev, get_soc(core));
+       } else if (!get_soc_ops(core)->is_clk_enabled(dev, get_soc(core)) && do_resume) {
+               dev_info(dev, "enable clock\n");
+               get_soc_ops(core)->enable_clk(core->dev, get_soc(core));
+       } else {
+               dev_err(dev, "operation cannot be completed!\n");
+       }
+
+       return count;
+}
+
+static ssize_t aipu_core_disable_sysfs_show(struct device *dev, struct device_attribute *attr,
+                                           char *buf)
+{
+       struct platform_device *p_dev = container_of(dev, struct platform_device, dev);
+       struct aipu_core *core = platform_get_drvdata(p_dev);
+
+       if (atomic_read(&core->disable)) {
+               return snprintf(buf, MAX_CHAR_SYSFS,
+                   "AIPU core #%d is disabled (echo 0 > /sys/devices/platform/[dev]/disable to enable it).\n",
+                   core->id);
+       } else {
+               return snprintf(buf, MAX_CHAR_SYSFS,
+                   "AIPU core #%d is enabled (echo 1 > /sys/devices/platform/[dev]/disable to disable it).\n",
+                   core->id);
+       }
+}
+
+static ssize_t aipu_core_disable_sysfs_store(struct device *dev, struct device_attribute *attr,
+                                            const char *buf, size_t count)
+{
+       int do_disable = 0;
+       struct platform_device *p_dev = container_of(dev, struct platform_device, dev);
+       struct aipu_core *core = platform_get_drvdata(p_dev);
+
+       if ((strncmp(buf, "1", 1) == 0))
+               do_disable = 1;
+       else if ((strncmp(buf, "0", 1) == 0))
+               do_disable = 0;
+       else
+               do_disable = -1;
+
+       if (atomic_read(&core->disable) && !do_disable) {
+               dev_info(dev, "enable core...\n");
+               atomic_set(&core->disable, 0);
+       } else if (!atomic_read(&core->disable) && do_disable) {
+               dev_info(dev, "disable core...\n");
+               atomic_set(&core->disable, 1);
+       }
+
+       return count;
+}
+
+typedef ssize_t (*sysfs_show_t)(struct device *dev, struct device_attribute *attr, char *buf);
+typedef ssize_t (*sysfs_store_t)(struct device *dev, struct device_attribute *attr,
+                                const char *buf, size_t count);
+
+static struct device_attribute *aipu_core_create_attr(struct device *dev,
+                                                     struct device_attribute **attr,
+                                                     const char *name, int mode,
+                                                     sysfs_show_t show, sysfs_store_t store)
+{
+       if (!dev || !attr || !name)
+               return ERR_PTR(-EINVAL);
+
+       *attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+       if (!*attr)
+               return ERR_PTR(-ENOMEM);
+
+       (*attr)->attr.name = name;
+       (*attr)->attr.mode = mode;
+       (*attr)->show = show;
+       (*attr)->store = store;
+       device_create_file(dev, *attr);
+
+       return *attr;
+}
+
+static void aipu_core_destroy_attr(struct device *dev, struct device_attribute **attr)
+{
+       if (!dev || !attr || !*attr)
+               return;
+
+       device_remove_file(dev, *attr);
+       kfree(*attr);
+       *attr = NULL;
+}
+#endif
+
+/**
+ * @init_aipu_core() - init an AIPU core struct in driver probe phase
+ * @core:     AIPU hardware core created in a calling function
+ * @version:  AIPU core hardware version
+ * @id:       AIPU core ID
+ * @priv:     pointer to aipu_private struct
+ * @p_dev:    platform device struct pointer
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int init_aipu_core(struct aipu_core *core, int version, int id, struct aipu_priv *priv,
+                  struct platform_device *p_dev)
+{
+       int ret = 0;
+       struct resource *res = NULL;
+       u64 base = 0;
+       u64 size = 0;
+
+       if (!core || !p_dev || !priv)
+               return -EINVAL;
+
+       WARN_ON(core->is_init);
+       WARN_ON(version != AIPU_ISA_VERSION_ZHOUYI_V1 &&
+               version != AIPU_ISA_VERSION_ZHOUYI_V2 &&
+               version != AIPU_ISA_VERSION_ZHOUYI_V3);
+
+       core->version = version;
+       core->id = id;
+       core->dev = &p_dev->dev;
+       core->priv = priv;
+       atomic_set(&core->disable, 0);
+       snprintf(core->core_name, sizeof(core->core_name), "aipu%d", id);
+
+       if (version == AIPU_ISA_VERSION_ZHOUYI_V1) {
+               core->max_sched_num = ZHOUYI_V1_MAX_SCHED_JOB_NUM;
+               core->ops = get_zhouyi_v1_ops();
+       } else if (version == AIPU_ISA_VERSION_ZHOUYI_V2 ||
+                  version == AIPU_ISA_VERSION_ZHOUYI_V3) {
+               core->max_sched_num = ZHOUYI_V2_MAX_SCHED_JOB_NUM;
+               core->ops = get_zhouyi_v2_ops();
+       }
+
+       res = platform_get_resource(p_dev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(core->dev, "get aipu core #%d IO region failed\n", id);
+               ret = -EINVAL;
+               goto init_reg_fail;
+       }
+       base = res->start;
+       size = res->end - res->start + 1;
+       dev_dbg(core->dev, "get aipu core #%d IO region: [0x%llx, 0x%llx]\n",
+               id, base, res->end);
+
+       ret = init_aipu_ioregion(&core->reg, base, size);
+       if (ret) {
+               dev_err(core->dev,
+                       "create aipu core #%d IO region failed: base 0x%llx, size 0x%llx\n",
+                       id, base, size);
+               goto init_reg_fail;
+       }
+       dev_dbg(core->dev, "init aipu core #%d IO region done: [0x%llx, 0x%llx]\n",
+               id, base, res->end);
+
+       res = platform_get_resource(p_dev, IORESOURCE_IRQ, 0);
+       if (!res) {
+               dev_err(core->dev, "get aipu core #%d IRQ number failed\n", id);
+               ret = -EINVAL;
+               goto init_irq_fail;
+       }
+       dev_dbg(core->dev, "get aipu core #%d IRQ number: 0x%x\n", id, (int)res->start);
+
+       core->irq_obj = aipu_create_irq_object(res->start, core, core->core_name);
+       if (!core->irq_obj) {
+               dev_err(core->dev, "create IRQ object for core #%d failed: IRQ 0x%x\n",
+                       id, (int)res->start);
+               ret = -EFAULT;
+               goto init_irq_fail;
+       }
+       dev_dbg(core->dev, "init aipu core #%d IRQ done\n", id);
+
+#ifdef CONFIG_SYSFS
+       if (IS_ERR(aipu_core_create_attr(core->dev, &core->reg_attr, "ext_registers", 0644,
+                                        aipu_core_ext_register_sysfs_show,
+                                        aipu_core_ext_register_sysfs_store))) {
+               ret = -EFAULT;
+               goto init_sysfs_fail;
+       }
+
+       if (priv->soc_ops &&
+           priv->soc_ops->enable_clk && priv->soc_ops->disable_clk &&
+           IS_ERR(aipu_core_create_attr(core->dev, &core->clk_attr, "soc_clock", 0644,
+                                        aipu_core_clock_sysfs_show,
+                                        aipu_core_clock_sysfs_store))) {
+               ret = -EFAULT;
+               goto init_sysfs_fail;
+       }
+
+       if (IS_ERR(aipu_core_create_attr(core->dev, &core->disable_attr, "disable", 0644,
+                                        aipu_core_disable_sysfs_show,
+                                        aipu_core_disable_sysfs_store))) {
+               ret = -EFAULT;
+               goto init_sysfs_fail;
+       }
+#else
+       core->reg_attr = NULL;
+       core->clk_attr = NULL;
+       core->disable_attr = NULL;
+#endif
+
+       core->arch = AIPU_ARCH_ZHOUYI;
+       core->config = core->ops->get_config(core);
+       core->ops->enable_interrupt(core);
+       core->ops->print_hw_id_info(core);
+
+       core->is_init = 1;
+       goto finish;
+
+#ifdef CONFIG_SYSFS
+init_sysfs_fail:
+       aipu_core_destroy_attr(core->dev, &core->reg_attr);
+       aipu_core_destroy_attr(core->dev, &core->clk_attr);
+       aipu_core_destroy_attr(core->dev, &core->disable_attr);
+#endif
+       aipu_destroy_irq_object(core->irq_obj);
+init_irq_fail:
+init_reg_fail:
+       deinit_aipu_ioregion(&core->reg);
+
+finish:
+       return ret;
+}
+
+/**
+ * @deinit_aipu_core() - deinit a created aipu_core struct
+ * @core: pointer to struct aipu_core initialized in init_aipu_core()
+ */
+void deinit_aipu_core(struct aipu_core *core)
+{
+       if (!core)
+               return;
+
+       core->ops->disable_interrupt(core);
+       deinit_aipu_ioregion(&core->reg);
+
+       if (core->irq_obj) {
+               aipu_destroy_irq_object(core->irq_obj);
+               core->irq_obj = NULL;
+       }
+
+#ifdef CONFIG_SYSFS
+       aipu_core_destroy_attr(core->dev, &core->reg_attr);
+       aipu_core_destroy_attr(core->dev, &core->clk_attr);
+       aipu_core_destroy_attr(core->dev, &core->disable_attr);
+#endif
+       core->is_init = 0;
+}
diff --git a/drivers/misc/armchina-npu/aipu_core.h b/drivers/misc/armchina-npu/aipu_core.h
new file mode 100644
index 000000000000..306253a4b841
--- /dev/null
+++ b/drivers/misc/armchina-npu/aipu_core.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#ifndef __AIPU_CORE_H__
+#define __AIPU_CORE_H__
+
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/atomic.h>
+#include <uapi/misc/armchina_aipu.h>
+#include "aipu_irq.h"
+#include "aipu_io.h"
+#include "zhouyi/zhouyi.h"
+#include "config.h"
+
+struct aipu_core;
+struct aipu_priv;
+
+/**
+ * struct aipu_core_operations - a struct contains AIPU hardware operation methods
+ * @get_version:        get hardware version number
+ * @get_config:         get hardware configuration number
+ * @enable_interrupt:   enable all AIPU interrupts
+ * @disable_interrupt:  disable all AIPU interrupts
+ * @trigger:            trigger a deferred-job to run on a reserved core
+ * @reserve:            reserve AIPU core for a job/deferred-job
+ * @is_idle:            is AIPU hardware idle or not
+ * @read_status_reg:    read status register value
+ * @print_hw_id_info:   print AIPU version ID registers information
+ * @io_rw:              direct IO read/write operations
+ * @upper_half:         interrupt upper half handler
+ * @bottom_half:        interrupt bottom half handler
+ * @sysfs_show:         show core external register values
+ */
+struct aipu_core_operations {
+       int (*get_version)(struct aipu_core *core);
+       int (*get_config)(struct aipu_core *core);
+       void (*enable_interrupt)(struct aipu_core *core);
+       void (*disable_interrupt)(struct aipu_core *core);
+       void (*trigger)(struct aipu_core *core);
+       int (*reserve)(struct aipu_core *core, struct aipu_job_desc *udesc,
+                      int do_trigger);
+       bool (*is_idle)(struct aipu_core *core);
+       int (*read_status_reg)(struct aipu_core *core);
+       void (*print_hw_id_info)(struct aipu_core *core);
+       int (*io_rw)(struct aipu_core *core, struct aipu_io_req *io_req);
+       int (*upper_half)(void *data);
+       void (*bottom_half)(void *data);
+#ifdef CONFIG_SYSFS
+       int (*sysfs_show)(struct aipu_core *core, char *buf);
+#endif
+};
+
+/**
+ * struct aipu_core - a general struct describe a hardware AIPU core
+ * @id:              AIPU core ID
+ * @arch:            AIPU architecture number
+ * @version:         AIPU hardware version number
+ * @config:          AIPU hardware configuration number
+ * @core_name:       AIPU core name string
+ * @max_sched_num:   maximum number of jobs can be scheduled in pipeline
+ * @dev:             device struct pointer
+ * @reg:             IO region array of this AIPU core
+ * @ops:             operations of this core
+ * @irq_obj:         interrupt object of this core
+ * @priv:            pointer to aipu private struct
+ * @reg_attr:        external register attribute
+ * @clk_attr:        clock attribute
+ * @disable_attr:    disable core attribute
+ * @disable:         core disable flag (for debug usage)
+ * @is_init:         init flag
+ */
+struct aipu_core {
+       int id;
+       int arch;
+       int version;
+       int config;
+       char core_name[10];
+       int max_sched_num;
+       struct device *dev;
+       struct io_region reg;
+       struct aipu_core_operations *ops;
+       struct aipu_irq_object *irq_obj;
+       struct aipu_priv *priv;
+       struct device_attribute *reg_attr;
+       struct device_attribute *clk_attr;
+       struct device_attribute *disable_attr;
+       atomic_t disable;
+       int is_init;
+};
+
+int init_aipu_core(struct aipu_core *core, int version, int id, struct aipu_priv *priv,
+                  struct platform_device *p_dev);
+void deinit_aipu_core(struct aipu_core *core);
+struct aipu_soc *get_soc(struct aipu_core *core);
+struct aipu_soc_operations *get_soc_ops(struct aipu_core *core);
+struct aipu_job_manager *get_job_manager(struct aipu_core *core);
+
+#endif /* __AIPU_CORE_H__ */
diff --git a/drivers/misc/armchina-npu/aipu_io.c b/drivers/misc/armchina-npu/aipu_io.c
new file mode 100644
index 000000000000..df95fc0a4cf9
--- /dev/null
+++ b/drivers/misc/armchina-npu/aipu_io.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#include <linux/ioport.h>
+#include "aipu_io.h"
+
+/**
+ * @init_aipu_ioregion() - initialize struct io_region using physical base address
+ * @region:    pointer to struct io_region to be initialized
+ * @phys_base: base address of this region
+ * @size:      size of this region
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int init_aipu_ioregion(struct io_region *region, u64 phys_base, u32 size)
+{
+       int ret = 0;
+
+       if (!region || !size)
+               return -EINVAL;
+
+       if (!request_mem_region(phys_base, size, "aipu"))
+               return -ENOMEM;
+
+       region->kern = ioremap(phys_base, size);
+       if (!region->kern) {
+               release_mem_region(phys_base, size);
+               return -ENOMEM;
+       }
+
+       region->phys = phys_base;
+       region->size = size;
+       return ret;
+}
+
+/**
+ * @deinit_aipu_ioregion() - destroy an AIPU IO region
+ * @region: pointer to struct io_region initialized in init_aipu_ioregion()
+ */
+void deinit_aipu_ioregion(struct io_region *region)
+{
+       if (region && region->kern) {
+               iounmap(region->kern);
+               release_mem_region(region->phys, region->size);
+               region->kern = NULL;
+               region->phys = 0;
+               region->size = 0;
+       }
+}
+
+/**
+ * @aipu_read32() - read AIPU register in word
+ * @region: pointer to struct io_region initialized in init_aipu_ioregion()
+ * @offset: AIPU register offset
+ * Return: u32 value in the register or error code
+ */
+int aipu_read32(struct io_region *region, int offset)
+{
+       if (region && region->kern && offset < region->size)
+               return readl((void __iomem *)((unsigned long)(region->kern) + offset));
+       return -EINVAL;
+}
+
+/**
+ * @aipu_write32() - write AIPU register in word
+ * @region: pointer to struct io_region initialized in init_aipu_ioregion()
+ * @offset: AIPU register offset
+ * @data:   u32 data to be writen
+ */
+void aipu_write32(struct io_region *region, int offset, unsigned int data)
+{
+       if (region && region->kern && offset < region->size)
+               writel((u32)data, (void __iomem *)((unsigned long)(region->kern) + offset));
+}
diff --git a/drivers/misc/armchina-npu/aipu_io.h b/drivers/misc/armchina-npu/aipu_io.h
new file mode 100644
index 000000000000..8d5e8102f93d
--- /dev/null
+++ b/drivers/misc/armchina-npu/aipu_io.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#ifndef __AIPU_IO_H__
+#define __AIPU_IO_H__
+
+#include <linux/io.h>
+#include <asm/types.h>
+
+/**
+ * struct io_region - a general struct describe IO region
+ * @phys: physical address base of an IO region
+ * @kern: kernel virtual address base remapped from phys
+ * @size: size of an IO region in byte
+ */
+struct io_region {
+       u64  phys;
+       void *kern;
+       u32  size;
+};
+
+int init_aipu_ioregion(struct io_region *region, u64 phys_base, u32 size);
+void deinit_aipu_ioregion(struct io_region *region);
+int aipu_read32(struct io_region *region, int offset);
+void aipu_write32(struct io_region *region, int offset, unsigned int data);
+
+#endif /* __AIPU_IO_H__ */
diff --git a/drivers/misc/armchina-npu/aipu_irq.c b/drivers/misc/armchina-npu/aipu_irq.c
new file mode 100644
index 000000000000..efa0569fdb97
--- /dev/null
+++ b/drivers/misc/armchina-npu/aipu_irq.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
+#include "aipu_irq.h"
+#include "aipu_core.h"
+
+static irqreturn_t aipu_irq_handler_upper_half(int irq, void *dev_id)
+{
+       struct aipu_core *core = (struct aipu_core *)(((struct device *)dev_id)->driver_data);
+
+       return core->ops->upper_half(core);
+}
+
+static void aipu_irq_handler_bottom_half(struct work_struct *work)
+{
+       struct aipu_irq_object *irq_obj = NULL;
+       struct aipu_core *core = NULL;
+
+       if (work) {
+               irq_obj = container_of(work, struct aipu_irq_object, work);
+               core = irq_obj->core;
+               core->ops->bottom_half(core);
+       }
+}
+
+/**
+ * @aipu_create_irq_object() - initialize an AIPU IRQ object
+ * @irqnum:      interrupt number
+ * @core:        aipu core struct pointer
+ * @description: irq object description string
+ *
+ * Return: pointer to the created irq_object on success and NULL otherwise.
+ */
+struct aipu_irq_object *aipu_create_irq_object(u32 irqnum, void *core, char *description)
+{
+       int ret = 0;
+       struct aipu_irq_object *irq_obj = NULL;
+
+       if (!core || !description)
+               return NULL;
+
+       irq_obj = kzalloc(sizeof(*irq_obj), GFP_KERNEL);
+       if (!irq_obj)
+               return NULL;
+
+       irq_obj->aipu_wq = NULL;
+       irq_obj->irqnum = 0;
+       irq_obj->dev = ((struct aipu_core *)core)->dev;
+
+       irq_obj->aipu_wq = create_singlethread_workqueue(description);
+       if (!irq_obj->aipu_wq)
+               goto err_handle;
+
+       INIT_WORK(&irq_obj->work, aipu_irq_handler_bottom_half);
+
+       ret = request_irq(irqnum, aipu_irq_handler_upper_half, IRQF_SHARED,
+                         description, irq_obj->dev);
+       if (ret)
+               goto err_handle;
+
+       irq_obj->irqnum = irqnum;
+       irq_obj->core = core;
+
+       goto finish;
+
+err_handle:
+       aipu_destroy_irq_object(irq_obj);
+       irq_obj = NULL;
+
+finish:
+       return irq_obj;
+}
+
+/**
+ * @aipu_destroy_irq_object() - destroy a created aipu_irq_object
+ * @irq_obj: interrupt object created in aipu_create_irq_object()
+ */
+void aipu_destroy_irq_object(struct aipu_irq_object *irq_obj)
+{
+       if (irq_obj) {
+               if (irq_obj->aipu_wq) {
+                       flush_workqueue(irq_obj->aipu_wq);
+                       destroy_workqueue(irq_obj->aipu_wq);
+                       irq_obj->aipu_wq = NULL;
+               }
+               if (irq_obj->irqnum)
+                       free_irq(irq_obj->irqnum, irq_obj->dev);
+               kfree(irq_obj);
+               flush_scheduled_work();
+       }
+}
+
+/**
+ * @aipu_irq_schedulework() - workqueue schedule API
+ * @irq_obj: interrupt object created in aipu_create_irq_object()
+ */
+void aipu_irq_schedulework(struct aipu_irq_object *irq_obj)
+{
+       if (irq_obj)
+               queue_work(irq_obj->aipu_wq, &irq_obj->work);
+}
+
+/**
+ * @aipu_irq_flush_workqueue() - workqueue flush API
+ * @irq_obj: interrupt object created in aipu_create_irq_object()
+ */
+void aipu_irq_flush_workqueue(struct aipu_irq_object *irq_obj)
+{
+       flush_workqueue(irq_obj->aipu_wq);
+}
diff --git a/drivers/misc/armchina-npu/aipu_irq.h b/drivers/misc/armchina-npu/aipu_irq.h
new file mode 100644
index 000000000000..25a426810d8c
--- /dev/null
+++ b/drivers/misc/armchina-npu/aipu_irq.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#ifndef __AIPU_IRQ_H__
+#define __AIPU_IRQ_H__
+
+#include <linux/device.h>
+#include <linux/workqueue.h>
+
+typedef int  (*aipu_irq_uhandler_t) (void *arg);
+typedef void (*aipu_irq_bhandler_t) (void *arg);
+typedef void (*aipu_irq_trigger_t) (void *arg);
+typedef void (*aipu_irq_ack_t) (void *arg);
+
+/**
+ * struct aipu_irq_object - interrupt object for every single AIPU core
+ * @irqnum:  interrupt number used to request IRQ
+ * @core:    aipu core struct pointer
+ * @work:    work struct
+ * @dev:     device pointer
+ * @aipu_wq: workqueue struct pointer
+ */
+struct aipu_irq_object {
+       u32 irqnum;
+       void *core;
+       struct work_struct  work;
+       struct device *dev;
+       struct workqueue_struct *aipu_wq;
+};
+
+struct aipu_irq_object *aipu_create_irq_object(u32 irqnum, void *core, char *description);
+void aipu_irq_schedulework(struct aipu_irq_object *irq_obj);
+void aipu_irq_flush_workqueue(struct aipu_irq_object *irq_obj);
+void aipu_destroy_irq_object(struct aipu_irq_object *irq_obj);
+
+#endif /* __AIPU_IRQ_H__ */
diff --git a/drivers/misc/armchina-npu/aipu_job_manager.c b/drivers/misc/armchina-npu/aipu_job_manager.c
new file mode 100644
index 000000000000..d6b11b73af29
--- /dev/null
+++ b/drivers/misc/armchina-npu/aipu_job_manager.c
@@ -0,0 +1,689 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#include <linux/string.h>
+#include <linux/time.h>
+#include <linux/uaccess.h>
+#include <linux/poll.h>
+#include "aipu_job_manager.h"
+#include "aipu_priv.h"
+
+static struct aipu_thread_wait_queue *do_create_thread_wait_queue(int uthread_id, struct file *filp)
+{
+       struct aipu_thread_wait_queue *new_wait_queue =
+               kzalloc(sizeof(*new_wait_queue), GFP_KERNEL);
+
+       if (unlikely(!new_wait_queue))
+               return ERR_PTR(-ENOMEM);
+       new_wait_queue->ref_cnt = 0;
+       new_wait_queue->uthread_id = uthread_id;
+       new_wait_queue->filp = filp;
+       init_waitqueue_head(&new_wait_queue->p_wait);
+       INIT_LIST_HEAD(&new_wait_queue->node);
+       return new_wait_queue;
+}
+
+static struct aipu_thread_wait_queue *get_thread_wait_queue(struct aipu_thread_wait_queue *head,
+                                                           int uthread_id, struct file *filp)
+{
+       struct aipu_thread_wait_queue *curr = NULL;
+
+       if (unlikely(!head))
+               return ERR_PTR(-EINVAL);
+
+       list_for_each_entry(curr, &head->node, node) {
+               if ((curr->uthread_id == uthread_id && uthread_id) ||
+                   (curr->filp == filp && filp))
+                       return curr;
+       }
+       return ERR_PTR(-EINVAL);
+}
+
+static struct aipu_thread_wait_queue *create_thread_wait_queue(struct aipu_thread_wait_queue *head,
+                                                              int uthread_id, struct file *filp)
+{
+       struct aipu_thread_wait_queue *queue = get_thread_wait_queue(head, uthread_id, filp);
+
+       if (IS_ERR(queue)) {
+               queue = do_create_thread_wait_queue(uthread_id, filp);
+               if (!IS_ERR(queue) && head)
+                       list_add_tail(&queue->node, &head->node);
+               else
+                       return queue;
+       }
+
+       queue->ref_cnt++;
+       return queue;
+}
+
+static void delete_wait_queue(struct aipu_thread_wait_queue **wait_queue_head)
+{
+       struct aipu_thread_wait_queue *curr = NULL;
+       struct aipu_thread_wait_queue *next = NULL;
+
+       if (wait_queue_head && *wait_queue_head) {
+               list_for_each_entry_safe(curr, next, &(*wait_queue_head)->node, node) {
+                       list_del(&curr->node);
+                       kfree(curr);
+               }
+               kfree(*wait_queue_head);
+               *wait_queue_head = NULL;
+       }
+}
+
+static int init_aipu_job(struct aipu_job *job, struct aipu_job_desc *desc,
+                        struct aipu_thread_wait_queue *queue, struct file *filp)
+{
+       if (unlikely(!job))
+               return -EINVAL;
+
+       job->uthread_id = task_pid_nr(current);
+       job->filp = filp;
+       if (likely(desc))
+               job->desc = *desc;
+       else
+               memset(&job->desc, 0, sizeof(job->desc));
+       job->core_id = -1;
+       job->thread_queue = &queue->p_wait;
+       job->state = AIPU_JOB_STATE_IDLE;
+       INIT_LIST_HEAD(&job->node);
+       job->sched_time = ns_to_ktime(0);
+       job->done_time = ns_to_ktime(0);
+       job->wake_up = 0;
+
+       return 0;
+}
+
+static void destroy_aipu_job(struct aipu_job_manager *manager, struct aipu_job *job)
+{
+       struct aipu_thread_wait_queue *job_aipu_wait_queue = NULL;
+
+       WARN_ON(!job);
+
+       if (likely(job->thread_queue)) {
+               job_aipu_wait_queue =
+                       container_of(job->thread_queue, struct aipu_thread_wait_queue, p_wait);
+               job_aipu_wait_queue->ref_cnt--;
+       }
+       kmem_cache_free(manager->job_cache, job);
+}
+
+static struct aipu_job *create_aipu_job(struct aipu_job_manager *manager,
+                                       struct aipu_job_desc *desc,
+                                       struct aipu_thread_wait_queue *queue, struct file *filp)
+{
+       int ret = 0;
+       struct aipu_job *new_aipu_job = NULL;
+
+       new_aipu_job = kmem_cache_alloc(manager->job_cache, GFP_KERNEL);
+       if (unlikely(!new_aipu_job))
+               return ERR_PTR(-ENOMEM);
+
+       ret = init_aipu_job(new_aipu_job, desc, queue, filp);
+       if (unlikely(ret)) {
+               destroy_aipu_job(manager, new_aipu_job);
+               new_aipu_job = NULL;
+               return ERR_PTR(ret);
+       }
+
+       return new_aipu_job;
+}
+
+static void remove_aipu_job(struct aipu_job_manager *manager, struct aipu_job *job)
+{
+       WARN_ON(!job);
+       list_del(&job->node);
+       destroy_aipu_job(manager, job);
+}
+
+static void delete_job_queue(struct aipu_job_manager *manager, struct aipu_job **head)
+{
+       struct aipu_job *curr = NULL;
+       struct aipu_job *next = NULL;
+
+       if (head && *head) {
+               list_for_each_entry_safe(curr, next, &(*head)->node, node) {
+                       remove_aipu_job(manager, curr);
+               }
+               kmem_cache_free(manager->job_cache, *head);
+               *head = NULL;
+       }
+}
+
+inline bool is_job_version_match(struct aipu_core *core, struct aipu_job_desc *user_job)
+{
+       if (core->arch == user_job->aipu_arch && user_job->version_compatible)
+               return true;
+
+       return (core->arch == user_job->aipu_arch) &&
+               (core->version == user_job->aipu_version) &&
+               (core->config == user_job->aipu_config);
+}
+
+static bool is_user_job_valid(struct aipu_job_manager *manager, struct aipu_job_desc *user_job)
+{
+       int id = 0;
+       struct aipu_core *core = NULL;
+
+       if (unlikely(!manager || !user_job))
+               return false;
+
+       if (user_job->is_defer_run) {
+               id = user_job->core_id;
+               if (id < manager->core_cnt)
+                       return is_job_version_match(manager->cores[id], user_job);
+               return false;
+       }
+
+       for (id = 0; id < manager->core_cnt; id++) {
+               core = manager->cores[id];
+               if (is_job_version_match(core, user_job))
+                       return true;
+       }
+
+       return false;
+}
+
+static int get_available_core_no_lock(struct aipu_job_manager *manager, struct aipu_job *job)
+{
+       int id = 0;
+       struct aipu_core *core = NULL;
+
+       if (unlikely(!manager))
+               return -1;
+
+       for (id = 0; id < manager->core_cnt; id++) {
+               core = manager->cores[id];
+               if (!atomic_read(&core->disable) && manager->idle_bmap[id] &&
+                   is_job_version_match(core, &job->desc))
+                       return id;
+       }
+
+       return -1;
+}
+
+static void reserve_core_for_job_no_lock(struct aipu_job_manager *manager, struct aipu_job *job,
+                                        int do_trigger)
+{
+       struct aipu_core *sched_core = NULL;
+
+       WARN_ON(job->core_id < 0);
+       WARN_ON(job->core_id >= manager->core_cnt);
+
+       sched_core = manager->cores[job->core_id];
+       manager->idle_bmap[job->core_id] = 0;
+       if (job->desc.enable_prof) {
+               get_soc_ops(sched_core)->start_bw_profiling(sched_core->dev, get_soc(sched_core));
+               job->sched_time = ktime_get();
+       }
+
+       if (do_trigger)
+               job->state = AIPU_JOB_STATE_RUNNING;
+
+       sched_core->ops->reserve(sched_core, &job->desc, do_trigger);
+
+       if (do_trigger)
+               dev_dbg(sched_core->dev, "[Job %d of Thread %d] trigger job running done\n",
+                       job->desc.job_id, job->uthread_id);
+       else
+               dev_dbg(sched_core->dev, "[Job %d of Thread %d] reserve for deferred job done\n",
+                       job->desc.job_id, job->uthread_id);
+}
+
+static int schedule_new_job(struct aipu_job_manager *manager, struct aipu_job_desc *user_job,
+                           struct file *filp, int do_trigger)
+{
+       int ret = 0;
+       struct aipu_job *kern_job = NULL;
+       struct aipu_thread_wait_queue *queue = NULL;
+       unsigned long flags;
+
+       mutex_lock(&manager->wq_lock);
+       if (user_job->enable_poll_opt)
+               queue = create_thread_wait_queue(manager->wait_queue_head, 0, filp);
+       else
+               queue = create_thread_wait_queue(manager->wait_queue_head,
+                                                task_pid_nr(current), NULL);
+       mutex_unlock(&manager->wq_lock);
+
+       WARN_ON(IS_ERR(queue));
+
+       kern_job = create_aipu_job(manager, user_job, queue, filp);
+       if (IS_ERR(kern_job))
+               return PTR_ERR(kern_job);
+
+       spin_lock_irqsave(&manager->lock, flags);
+       if (do_trigger) {
+               kern_job->state = AIPU_JOB_STATE_PENDING;
+               list_add_tail(&kern_job->node, &manager->scheduled_head->node);
+
+               /*
+                * For a job using SRAM managed by AIPU Gbuilder, it should be
+                * executed exclusively in serial with other Gbuilder-managed-SRAM ones,
+                * and parallel scheduling is not allowed.
+                *
+                * Pending it if there has been a Gbuilder-managed-SRAM job running;
+                * otherwise mark the flag and reserve a core for running.
+                */
+               if (kern_job->desc.exec_flag & AIPU_JOB_EXEC_FLAG_SRAM_MUTEX) {
+                       if (manager->exec_flag & AIPU_JOB_EXEC_FLAG_SRAM_MUTEX)
+                               goto unlock;
+                       else
+                               manager->exec_flag |= AIPU_JOB_EXEC_FLAG_SRAM_MUTEX;
+               }
+               kern_job->core_id = get_available_core_no_lock(manager, kern_job);
+               if (kern_job->core_id >= 0)
+                       reserve_core_for_job_no_lock(manager, kern_job, do_trigger);
+       } else {
+               if (user_job->core_id >= manager->core_cnt ||
+                   !manager->idle_bmap[user_job->core_id]) {
+                       ret = -EINVAL;
+                       goto unlock;
+               }
+               kern_job->state = AIPU_JOB_STATE_DEFERRED;
+               kern_job->core_id = user_job->core_id;
+               list_add_tail(&kern_job->node, &manager->scheduled_head->node);
+               reserve_core_for_job_no_lock(manager, kern_job, do_trigger);
+       }
+unlock:
+       spin_unlock_irqrestore(&manager->lock, flags);
+
+       return ret;
+}
+
+static int trigger_deferred_job_run(struct aipu_job_manager *manager,
+                                   struct aipu_job_desc *user_job)
+{
+       unsigned long flags;
+       struct aipu_job *curr = NULL;
+       struct aipu_core *sched_core = NULL;
+       int triggered = 0;
+
+       spin_lock_irqsave(&manager->lock, flags);
+       list_for_each_entry(curr, &manager->scheduled_head->node, node) {
+               if (curr->uthread_id == task_pid_nr(current) &&
+                   curr->desc.job_id == user_job->job_id &&
+                   curr->state == AIPU_JOB_STATE_DEFERRED) {
+                       curr->state = AIPU_JOB_STATE_RUNNING;
+                       sched_core = manager->cores[curr->core_id];
+                       sched_core->ops->trigger(sched_core);
+                       triggered = 1;
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&manager->lock, flags);
+
+       if (!triggered)
+               return -EINVAL;
+
+       dev_dbg(sched_core->dev, "[Job %d of Thread %d] trigger deferred job running done\n",
+               user_job->job_id, task_pid_nr(current));
+       return 0;
+}
+
+/**
+ * @init_aipu_job_manager() - initialize an existing job manager struct during driver probe phase
+ * @manager: pointer to the struct job_manager struct to be initialized
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int init_aipu_job_manager(struct aipu_job_manager *manager)
+{
+       if (!manager)
+               return -EINVAL;
+
+       manager->is_init = 0;
+       manager->core_cnt = 0;
+       manager->cores = NULL;
+       manager->idle_bmap = NULL;
+       manager->job_cache =
+               kmem_cache_create("aipu_job_cache", sizeof(struct aipu_job), 0, SLAB_PANIC, NULL);
+       manager->scheduled_head = create_aipu_job(manager, NULL, NULL, NULL);
+       INIT_LIST_HEAD(&manager->scheduled_head->node);
+       spin_lock_init(&manager->lock);
+       manager->wait_queue_head = create_thread_wait_queue(NULL, 0, NULL);
+       mutex_init(&manager->wq_lock);
+       manager->exec_flag = 0;
+
+       WARN_ON(IS_ERR(manager->scheduled_head));
+       WARN_ON(IS_ERR(manager->wait_queue_head));
+
+       manager->is_init = 1;
+       return 0;
+}
+
+/**
+ * @deinit_aipu_job_manager() - de-init the job manager
+ * @manager: pointer to the struct job_manager initialized in init_aipu_job_manager()
+ */
+void deinit_aipu_job_manager(struct aipu_job_manager *manager)
+{
+       if (!manager || !manager->is_init)
+               return;
+
+       kfree(manager->idle_bmap);
+       manager->idle_bmap = NULL;
+       delete_job_queue(manager, &manager->scheduled_head);
+       delete_wait_queue(&manager->wait_queue_head);
+       mutex_destroy(&manager->wq_lock);
+       kmem_cache_destroy(manager->job_cache);
+       manager->job_cache = NULL;
+       manager->is_init = 0;
+}
+
+/**
+ * @aipu_job_manager_set_cores_info() - set multicore info. while probing
+ * @manager:  pointer to the struct job_manager initialized in init_aipu_job_manager()
+ * @core_cnt: AIPU core count
+ * @cores:    pointer to AIPU core struct array
+ */
+void aipu_job_manager_set_cores_info(struct aipu_job_manager *manager, int core_cnt,
+                                    struct aipu_core **cores)
+{
+       WARN_ON(!manager || !core_cnt || !cores);
+       manager->core_cnt = core_cnt;
+       manager->cores = cores;
+       kfree(manager->idle_bmap);
+       manager->idle_bmap = kmalloc_array(core_cnt, sizeof(bool), GFP_KERNEL);
+       memset(manager->idle_bmap, 1, core_cnt);
+}
+
+/**
+ * @aipu_job_manager_scheduler() - schedule a job flushed from userland
+ * @manager:  pointer to the struct job_manager initialized in init_aipu_job_manager()
+ * @user_job: pointer to the userspace job descriptor
+ * @filp:     pointer to the device char file
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_job_manager_scheduler(struct aipu_job_manager *manager, struct aipu_job_desc *user_job,
+                              struct file *filp)
+{
+       int ret = 0;
+
+       if (unlikely(!manager || !user_job || !filp))
+               return -EINVAL;
+
+       if (unlikely(!is_user_job_valid(manager, user_job)))
+               return -EINVAL;
+
+       if (!user_job->is_defer_run)
+               ret = schedule_new_job(manager, user_job, filp, 1);
+       else if (!user_job->do_trigger)
+               ret = schedule_new_job(manager, user_job, filp, 0);
+       else
+               ret = trigger_deferred_job_run(manager, user_job);
+
+       return ret;
+}
+
+/**
+ * @aipu_job_manager_irq_upper_half() - aipu interrupt upper half handler
+ * @core:           pointer to the aipu core struct
+ * @exception_flag: exception flag
+ */
+void aipu_job_manager_irq_upper_half(struct aipu_core *core, int exception_flag)
+{
+       struct aipu_job *curr = NULL;
+       struct aipu_job_manager *manager = NULL;
+       int handled = 0;
+       int triggered = 0;
+
+       if (unlikely(!core))
+               return;
+
+       manager = get_job_manager(core);
+
+       spin_lock(&manager->lock);
+       list_for_each_entry(curr, &manager->scheduled_head->node, node) {
+               if (curr->core_id == core->id && curr->state == AIPU_JOB_STATE_RUNNING) {
+                       if (unlikely(exception_flag))
+                               curr->state = AIPU_JOB_STATE_EXCEP;
+                       else
+                               curr->state = AIPU_JOB_STATE_SUCCESS;
+
+                       if (curr->desc.enable_prof) {
+                               curr->done_time = ktime_get();
+                               get_soc_ops(core)->stop_bw_profiling(core->dev, get_soc(core));
+                               get_soc_ops(core)->read_profiling_reg(core->dev,
+                                                                     get_soc(core), &curr->pdata);
+                       }
+
+                       if (curr->desc.exec_flag & AIPU_JOB_EXEC_FLAG_SRAM_MUTEX)
+                               manager->exec_flag &= ~AIPU_JOB_EXEC_FLAG_SRAM_MUTEX;
+
+                       handled = 1;
+                       break;
+               }
+       }
+
+       /* handled == false means a job was invalidated before done */
+
+       if (!atomic_read(&core->disable)) {
+               list_for_each_entry(curr, &manager->scheduled_head->node, node) {
+                       if (curr->state == AIPU_JOB_STATE_PENDING &&
+                           is_job_version_match(core, &curr->desc)) {
+                               if (curr->desc.exec_flag & AIPU_JOB_EXEC_FLAG_SRAM_MUTEX) {
+                                       if (manager->exec_flag & AIPU_JOB_EXEC_FLAG_SRAM_MUTEX)
+                                               continue;
+                                       else
+                                               manager->exec_flag |= AIPU_JOB_EXEC_FLAG_SRAM_MUTEX;
+                               }
+                               curr->core_id = core->id;
+                               reserve_core_for_job_no_lock(manager, curr, 1);
+                               triggered = 1;
+                               break;
+                       }
+               }
+       }
+
+       if (!triggered)
+               manager->idle_bmap[core->id] = 1;
+       spin_unlock(&manager->lock);
+}
+
+/**
+ * @aipu_job_manager_irq_bottom_half() - aipu interrupt bottom half handler
+ * @core: pointer to the aipu core struct
+ */
+void aipu_job_manager_irq_bottom_half(struct aipu_core *core)
+{
+       struct aipu_job *curr = NULL;
+       struct aipu_job *next = NULL;
+       struct aipu_job_manager *manager = NULL;
+       unsigned long flags;
+
+       if (unlikely(!core))
+               return;
+
+       manager = get_job_manager(core);
+
+       spin_lock_irqsave(&manager->lock, flags);
+       list_for_each_entry_safe(curr, next, &manager->scheduled_head->node, node) {
+               if (curr->state >= AIPU_JOB_STATE_EXCEP && !curr->wake_up &&
+                   curr->core_id == core->id) {
+                       if (curr->desc.enable_prof)
+                               curr->pdata.execution_time_ns =
+                                 (long)ktime_to_ns(ktime_sub(curr->done_time, curr->sched_time));
+                       wake_up_interruptible(curr->thread_queue);
+                       curr->wake_up = 1;
+               }
+       }
+       spin_unlock_irqrestore(&manager->lock, flags);
+}
+
+/**
+ * @aipu_job_manager_cancel_jobs() - cancel all jobs flushed for certain user closing fd
+ * @param manager: pointer to the struct job_manager initialized in init_aipu_job_manager()
+ * @param filp: file struct pointer
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_job_manager_cancel_jobs(struct aipu_job_manager *manager, struct file *filp)
+{
+       unsigned long flags;
+       struct aipu_job *curr = NULL;
+       struct aipu_job *next = NULL;
+       struct aipu_thread_wait_queue *curr_wq = NULL;
+       struct aipu_thread_wait_queue *next_wq = NULL;
+
+       if (!manager || !filp)
+               return -EINVAL;
+
+       /* jobs should be cleaned first */
+       spin_lock_irqsave(&manager->lock, flags);
+       list_for_each_entry_safe(curr, next, &manager->scheduled_head->node, node) {
+               if (curr->filp == filp) {
+                       if (curr->state == AIPU_JOB_STATE_DEFERRED)
+                               manager->idle_bmap[curr->core_id] = 1;
+                       remove_aipu_job(manager, curr);
+               }
+       }
+       spin_unlock_irqrestore(&manager->lock, flags);
+
+       mutex_lock(&manager->wq_lock);
+       list_for_each_entry_safe(curr_wq, next_wq, &manager->wait_queue_head->node, node) {
+               if (!curr_wq->ref_cnt) {
+                       list_del(&curr_wq->node);
+                       kfree(curr_wq);
+               }
+       }
+       mutex_unlock(&manager->wq_lock);
+
+       return 0;
+}
+
+/**
+ * @aipu_job_manager_invalidate_timeout_job() - invalidate/kill a timeout job
+ * @manager: pointer to the struct job_manager initialized in init_aipu_job_manager()
+ * @job_id:  job ID
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_job_manager_invalidate_timeout_job(struct aipu_job_manager *manager, int job_id)
+{
+       int ret = 0;
+       struct aipu_job *curr = NULL;
+       struct aipu_job *next = NULL;
+       unsigned long flags;
+
+       if (!manager)
+               return -EINVAL;
+
+       spin_lock_irqsave(&manager->lock, flags);
+       list_for_each_entry_safe(curr, next, &manager->scheduled_head->node, node) {
+               if (curr->uthread_id == task_pid_nr(current) &&
+                   curr->desc.job_id == job_id) {
+                       remove_aipu_job(manager, curr);
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&manager->lock, flags);
+
+       return ret;
+}
+
+/**
+ * @aipu_job_manager_get_job_status() - get AIPU jobs' statuses after a polling event returns
+ * @manager: pointer to the struct job_manager initialized in init_aipu_job_manager()
+ * @job_status: job status array stores statuys info filled by this API
+ * @filp: file struct pointer
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_job_manager_get_job_status(struct aipu_job_manager *manager,
+                                   struct aipu_job_status_query *job_status, struct file *filp)
+{
+       int ret = 0;
+       struct aipu_job_status_desc *status = NULL;
+       struct aipu_job *curr = NULL;
+       struct aipu_job *next = NULL;
+       int poll_iter = 0;
+       unsigned long flags;
+
+       if (unlikely(!manager || !job_status || job_status->max_cnt < 1))
+               return -EINVAL;
+
+       status = kcalloc(job_status->max_cnt, sizeof(*status), GFP_KERNEL);
+       if (!status)
+               return -ENOMEM;
+
+       job_status->poll_cnt = 0;
+       spin_lock_irqsave(&manager->lock, flags);
+       list_for_each_entry_safe(curr, next, &manager->scheduled_head->node, node) {
+               if (job_status->poll_cnt == job_status->max_cnt)
+                       break;
+
+               if (curr->state < AIPU_JOB_STATE_EXCEP)
+                       continue;
+
+               if (curr->filp != filp)
+                       continue;
+
+               if ((job_status->of_this_thread && curr->uthread_id == task_pid_nr(current)) ||
+                   !job_status->of_this_thread) {
+                       status[poll_iter].job_id = curr->desc.job_id;
+                       status[poll_iter].thread_id = curr->uthread_id;
+                       status[poll_iter].state = (curr->state == AIPU_JOB_STATE_SUCCESS) ?
+                           AIPU_JOB_STATE_DONE : AIPU_JOB_STATE_EXCEPTION;
+                       if (curr->desc.enable_prof)
+                               status[poll_iter].pdata = curr->pdata;
+
+                       remove_aipu_job(manager, curr);
+                       curr = NULL;
+
+                       job_status->poll_cnt++;
+                       poll_iter++;
+               }
+       }
+       spin_unlock_irqrestore(&manager->lock, flags);
+
+       ret = copy_to_user((struct job_status_desc __user *)job_status->status, status,
+                          job_status->poll_cnt * sizeof(*status));
+
+       kfree(status);
+       return ret;
+}
+
+/**
+ * @aipu_job_manager_has_end_job() - check if a user thread has end job(s) to query
+ * @manager: pointer to the struct job_manager initialized in init_aipu_job_manager()
+ * @filp: file struct pointer
+ * @wait: wait table struct from kernel
+ * @uthread_id: thread ID
+ *
+ * Return: true if there is/are AIPU job(s) done and false otherwise.
+ */
+bool aipu_job_manager_has_end_job(struct aipu_job_manager *manager, struct file *filp,
+                                 struct poll_table_struct *wait, int uthread_id)
+{
+       bool ret = false;
+       struct aipu_job *curr = NULL;
+       struct aipu_thread_wait_queue *wq = NULL;
+       unsigned long flags;
+
+       if (unlikely(!manager || !filp))
+               return -EINVAL;
+
+       mutex_lock(&manager->wq_lock);
+       list_for_each_entry(wq, &manager->wait_queue_head->node, node) {
+               if (wq->uthread_id == uthread_id || wq->filp == filp) {
+                       poll_wait(filp, &wq->p_wait, wait);
+                       break;
+               }
+       }
+       mutex_unlock(&manager->wq_lock);
+
+       spin_lock_irqsave(&manager->lock, flags);
+       list_for_each_entry(curr, &manager->scheduled_head->node, node) {
+               if (curr->filp == filp &&
+                   curr->state >= AIPU_JOB_STATE_EXCEP &&
+                   (curr->desc.enable_poll_opt || curr->uthread_id == uthread_id)) {
+                       ret = true;
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&manager->lock, flags);
+
+       return ret;
+}
diff --git a/drivers/misc/armchina-npu/aipu_job_manager.h b/drivers/misc/armchina-npu/aipu_job_manager.h
new file mode 100644
index 000000000000..7a36464986e5
--- /dev/null
+++ b/drivers/misc/armchina-npu/aipu_job_manager.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#ifndef __AIPU_JOB_MANAGER_H__
+#define __AIPU_JOB_MANAGER_H__
+
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <uapi/misc/armchina_aipu.h>
+#include "aipu_core.h"
+
+enum aipu_state_kern {
+       AIPU_JOB_STATE_IDLE,
+       AIPU_JOB_STATE_PENDING,
+       AIPU_JOB_STATE_DEFERRED,
+       AIPU_JOB_STATE_RUNNING,
+       AIPU_JOB_STATE_EXCEP,
+       AIPU_JOB_STATE_SUCCESS
+};
+
+/**
+ * struct waitqueue - maintain the waitqueue for a user thread
+ * @uthread_id: user thread owns this waitqueue
+ * @filp: file struct pointer
+ * @ref_cnt: struct reference count
+ * @p_wait: wait queue head for polling
+ * @node: list head struct
+ */
+struct aipu_thread_wait_queue {
+       int uthread_id;
+       struct file *filp;
+       int ref_cnt;
+       wait_queue_head_t p_wait;
+       struct list_head node;
+};
+
+/**
+ * struct aipu_job - job struct describing a job under scheduling in job manager
+ *        Job status will be tracked as soon as interrupt or user evenets come in.
+ * @uthread_id: ID of user thread scheduled this job
+ * @filp: file struct pointer used when scheduling this job
+ * @desc: job descriptor from userland
+ * @core_id: ID of an aipu core this job scheduled on
+ * @thread_queue: wait queue of this job to be waken up
+ * @state: job state
+ * @node: list node
+ * @sched_time: job scheduled time (enabled by profiling flag in desc)
+ * @done_time: job termination time (enabled by profiling flag in desc)
+ * @pdata: profiling data (enabled by profiling flag in desc)
+ * @wake_up: wake up flag
+ */
+struct aipu_job {
+       int uthread_id;
+       struct file *filp;
+       struct aipu_job_desc desc;
+       int core_id;
+       wait_queue_head_t *thread_queue;
+       int state;
+       struct list_head node;
+       ktime_t sched_time;
+       ktime_t done_time;
+       struct aipu_ext_profiling_data pdata;
+       int wake_up;
+};
+
+/**
+ * struct aipu_job_manager - job manager
+ *        Maintain all jobs and update their statuses
+ * @core_cnt: aipu core count
+ * @cores: aipu core struct pointer array
+ * @idle_bmap: idle flag bitmap for every core
+ * @scheduled_head: scheduled job list head
+ * @lock: spinlock
+ * @wait_queue_head: wait queue list head
+ * @wq_lock: waitqueue lock
+ * @job_cache: slab cache of aipu_job
+ * @is_init: init flag
+ * @exec_flag: execution flags propagated to all jobs
+ */
+struct aipu_job_manager {
+       int core_cnt;
+       struct aipu_core **cores;
+       bool *idle_bmap;
+       struct aipu_job *scheduled_head;
+       spinlock_t lock; /* Protect cores and jobs status */
+       struct aipu_thread_wait_queue *wait_queue_head;
+       struct mutex wq_lock; /* Protect thread wait queue */
+       struct kmem_cache *job_cache;
+       int is_init;
+       int exec_flag;
+};
+
+int init_aipu_job_manager(struct aipu_job_manager *manager);
+void deinit_aipu_job_manager(struct aipu_job_manager *manager);
+void aipu_job_manager_set_cores_info(struct aipu_job_manager *manager, int core_cnt,
+                                    struct aipu_core **cores);
+int aipu_job_manager_scheduler(struct aipu_job_manager *manager, struct aipu_job_desc *user_job,
+                              struct file *filp);
+void aipu_job_manager_irq_upper_half(struct aipu_core *core, int exception_flag);
+void aipu_job_manager_irq_bottom_half(struct aipu_core *core);
+int aipu_job_manager_cancel_jobs(struct aipu_job_manager *manager, struct file *filp);
+int aipu_job_manager_invalidate_timeout_job(struct aipu_job_manager *manager, int job_id);
+int aipu_job_manager_get_job_status(struct aipu_job_manager *manager,
+                                   struct aipu_job_status_query *job_status, struct file *filp);
+bool aipu_job_manager_has_end_job(struct aipu_job_manager *manager, struct file *filp,
+                                 struct poll_table_struct *wait, int uthread_id);
+
+#endif /* __AIPU_JOB_MANAGER_H__ */
diff --git a/drivers/misc/armchina-npu/aipu_mm.c b/drivers/misc/armchina-npu/aipu_mm.c
new file mode 100644
index 000000000000..ce6e31b95107
--- /dev/null
+++ b/drivers/misc/armchina-npu/aipu_mm.c
@@ -0,0 +1,740 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/of_iommu.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/iommu.h>
+#include <linux/bitmap.h>
+#include <linux/version.h>
+#include "config.h"
+#include "aipu_priv.h"
+#include "aipu_mm.h"
+
+static struct device *aipu_mm_create_child_sramdev(struct device *dev)
+{
+       struct device *child = NULL;
+
+       child = devm_kzalloc(dev, sizeof(*child), GFP_KERNEL);
+       if (!child)
+               return NULL;
+
+       device_initialize(child);
+       dev_set_name(child, "%s:%s", dev_name(dev), "sram-child");
+       child->parent = dev;
+       child->coherent_dma_mask = dev->coherent_dma_mask;
+       child->dma_mask = dev->dma_mask;
+       child->dma_parms = devm_kzalloc(dev, sizeof(*child->dma_parms),
+                                       GFP_KERNEL);
+       child->bus = dev->bus;
+       if (!child->dma_parms)
+               goto err;
+
+       if (!device_add(child))
+               return child;
+       device_del(child);
+
+err:
+       put_device(child);
+       return NULL;
+}
+
+static int aipu_mm_init_pages(struct aipu_memory_manager *mm, int id)
+{
+       struct aipu_mem_region *reg = NULL;
+
+       if (!mm || id >= AIPU_MEM_REGION_MAX_ID)
+               return -EINVAL;
+
+       reg = &mm->reg[id];
+
+       reg->count = reg->bytes >> PAGE_SHIFT;
+       reg->bitmap = devm_kzalloc(reg->dev,
+                                  BITS_TO_LONGS(reg->count) * sizeof(long), GFP_KERNEL);
+       if (!reg->bitmap)
+               return -ENOMEM;
+
+       reg->pages = vzalloc(reg->count * sizeof(struct aipu_virt_page *));
+       if (!reg->pages)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static int aipu_mm_init_mem_region(struct aipu_memory_manager *mm, int id)
+{
+       int ret = 0;
+       void *va = NULL;
+       struct aipu_mem_region *reg = NULL;
+       bool enable_iommu = false;
+
+       if (!mm || id >= AIPU_MEM_REGION_MAX_ID)
+               return -EINVAL;
+
+       reg = &mm->reg[id];
+
+       if (!reg->bytes &&
+           reg->type != AIPU_MEM_TYPE_CMA_DEFAULT &&
+           reg->type != AIPU_MEM_TYPE_KERNEL)
+               return 0;
+
+       if (id == AIPU_MEM_REGION_DRAM_ID)
+               reg->dev = mm->dev;
+       else
+               reg->dev = aipu_mm_create_child_sramdev(mm->dev);
+
+       if (!reg->dev)
+               return -ENODEV;
+
+       if (reg->type == AIPU_MEM_TYPE_DEV_RESERVED ||
+           reg->type == AIPU_MEM_TYPE_DMA_RESERVED ||
+           (!mm->has_iommu && reg->type == AIPU_MEM_TYPE_CMA_RESERVED)) {
+               u64 upper = reg->base_pa + reg->bytes - mm->host_aipu_offset;
+
+               /*
+                * Z1 only accepts 0~3G region;
+                * Z2/3 has ASE registers therefore accepts 0~3G for lower 32 bits;
+                */
+               if (mm->version == AIPU_ISA_VERSION_ZHOUYI_V2 ||
+                   mm->version == AIPU_ISA_VERSION_ZHOUYI_V3)
+                       upper &= U32_MAX;
+
+               if (upper > mm->limit) {
+                       dev_err(reg->dev,
+                               "region is beyond valid region used by AIPU (0x%llx > 0x%llx)\n",
+                               upper, mm->limit);
+                       ret = -EINVAL;
+                       goto err;
+               }
+       }
+
+       /* allocate iova for userland anyway regardless of with/without IOMMU */
+
+       /* Native reserved */
+       if (reg->type == AIPU_MEM_TYPE_DEV_RESERVED) {
+               va = memremap(reg->base_pa, reg->bytes, MEMREMAP_WT);
+               if (!va) {
+                       ret = -EINVAL;
+                       goto err;
+               }
+
+               reg->base_va = va;
+               reg->base_iova = reg->base_pa;
+               goto init_page;
+       }
+
+       /* DMA/CMA reserved */
+       if (reg->type == AIPU_MEM_TYPE_DMA_RESERVED || reg->type == AIPU_MEM_TYPE_CMA_RESERVED) {
+               ret = of_reserved_mem_device_init_by_idx(reg->dev, mm->dev->of_node, id);
+               if (ret) {
+                       dev_err(mm->dev, "init reserved mem failed: idx %d, ret %d\n",
+                               id, ret);
+                       goto err;
+               }
+       }
+
+       if (mm->has_iommu &&
+           (reg->type == AIPU_MEM_TYPE_CMA_RESERVED || reg->type == AIPU_MEM_TYPE_KERNEL)) {
+               ret = dma_set_coherent_mask(reg->dev, DMA_BIT_MASK(31));
+               if (ret) {
+                       dev_err(mm->dev, "DMA set coherent mask failed: %d!\n", ret);
+                       goto err;
+               }
+               enable_iommu = true;
+       }
+
+       if (mm->has_iommu && reg->type == AIPU_MEM_TYPE_CMA_RESERVED)
+               reg->attrs = DMA_ATTR_FORCE_CONTIGUOUS;
+       else
+               reg->attrs = 0;
+
+       if (reg->type == AIPU_MEM_TYPE_KERNEL ||
+           reg->type == AIPU_MEM_TYPE_CMA_DEFAULT ||
+           AIPU_CONFIG_USE_DRAM_DEFAULT_SIZE == 1)
+               reg->bytes = AIPU_CONFIG_DRAM_DEFAULT_SIZE;
+
+       va = dma_alloc_attrs(reg->dev, reg->bytes, &reg->base_iova, GFP_KERNEL, reg->attrs);
+       if (!va) {
+               dev_err(reg->dev, "dma_alloc_attrs failed (bytes: 0x%llx, attrs %ld)\n",
+                       reg->bytes, reg->attrs);
+               ret = -EINVAL;
+               goto err;
+       }
+       reg->base_va = va;
+
+init_page:
+       ret = aipu_mm_init_pages(mm, id);
+       if (ret)
+               goto err;
+
+       reg->base_pfn = PFN_DOWN(reg->base_iova);
+
+       dev_info(reg->dev, "init %s region done: %s [0x%llx, 0x%llx]\n",
+                id ? "SRAM" : "DRAM",
+                enable_iommu ? "iova" : "pa",
+                reg->base_iova, reg->base_iova + reg->bytes - 1);
+       goto finish;
+
+err:
+       if (reg->base_va) {
+               if (reg->type == AIPU_MEM_TYPE_DEV_RESERVED)
+                       memunmap(reg->base_va);
+               else
+                       dma_free_attrs(reg->dev, reg->bytes, reg->base_va, reg->base_iova,
+                                      reg->attrs);
+               reg->base_va = NULL;
+       }
+       if (reg->type == AIPU_MEM_TYPE_DMA_RESERVED || reg->type == AIPU_MEM_TYPE_CMA_RESERVED)
+               of_reserved_mem_device_release(reg->dev);
+       if (reg->dev && reg->dev != mm->dev) {
+               device_del(reg->dev);
+               reg->dev = NULL;
+       }
+
+finish:
+       return ret;
+}
+
+static void aipu_mm_deinit_mem_region(struct aipu_memory_manager *mm, int id)
+{
+       struct aipu_mem_region *reg = &mm->reg[id];
+
+       if (!reg->bytes || !reg->base_va)
+               return;
+
+       if (reg->type == AIPU_MEM_TYPE_DEV_RESERVED)
+               memunmap(reg->base_va);
+       else
+               dma_free_attrs(reg->dev, reg->bytes, reg->base_va, reg->base_iova, reg->attrs);
+
+       if (reg->type == AIPU_MEM_TYPE_DMA_RESERVED || reg->type == AIPU_MEM_TYPE_CMA_RESERVED)
+               of_reserved_mem_device_release(reg->dev);
+
+       vfree(reg->pages);
+
+       reg->bytes = 0;
+       reg->base_va = NULL;
+
+       if (reg->dev && reg->dev != mm->dev) {
+               device_del(reg->dev);
+               reg->dev = NULL;
+       }
+}
+
+static int aipu_mm_alloc_in_region_no_lock(struct aipu_memory_manager *mm,
+                                          struct aipu_buf_request *buf_req,
+                                          struct aipu_mem_region *reg, struct file *filp)
+{
+       unsigned long align_order = 0;
+       unsigned long mask = 0;
+       unsigned long offset = 0;
+       unsigned long bitmap_no = 0;
+       unsigned long alloc_nr = 0;
+
+       if (!mm || !buf_req || !reg || !filp)
+               return -EINVAL;
+
+       alloc_nr = ALIGN(buf_req->bytes, PAGE_SIZE) >> PAGE_SHIFT;
+       align_order = order_base_2(buf_req->align_in_page);
+       mask = (1UL << align_order) - 1;
+       offset = reg->base_pfn & ((1UL << align_order) - 1);
+       bitmap_no =
+               bitmap_find_next_zero_area_off(reg->bitmap, reg->count, 0, alloc_nr, mask, offset);
+       if (bitmap_no >= reg->count)
+               return -ENOMEM;
+
+       bitmap_set(reg->bitmap, bitmap_no, alloc_nr);
+       if (!reg->pages[bitmap_no]) {
+               reg->pages[bitmap_no] =
+                       devm_kzalloc(reg->dev, sizeof(struct aipu_virt_page), GFP_KERNEL);
+               if (!reg->pages[bitmap_no])
+                       return -ENOMEM;
+       }
+       reg->pages[bitmap_no]->contiguous_alloc_len = alloc_nr;
+       reg->pages[bitmap_no]->filp = filp;
+       reg->pages[bitmap_no]->tid = task_pid_nr(current);
+
+       buf_req->desc.dev_offset = reg->base_iova + (bitmap_no << PAGE_SHIFT);
+       buf_req->desc.pa = buf_req->desc.dev_offset - mm->host_aipu_offset;
+       buf_req->desc.bytes = alloc_nr * PAGE_SIZE;
+
+       dev_dbg(reg->dev,
+               "[MM] allocation done: iova 0x%llx, bytes 0x%llx, align_pages %lu, map_num = %d\n",
+               buf_req->desc.pa, buf_req->desc.bytes, align_order,
+               reg->pages[bitmap_no]->map_num);
+
+       return 0;
+}
+
+static int aipu_mm_free_in_region_no_lock(struct aipu_memory_manager *mm,
+                                         struct aipu_buf_desc *buf, struct aipu_mem_region *reg,
+                                         struct file *filp)
+{
+       unsigned long bitmap_no = 0;
+       unsigned long alloc_nr = 0;
+       struct aipu_virt_page *page = NULL;
+
+       if (!mm || !buf || !reg || !filp)
+               return -EINVAL;
+
+       bitmap_no = (buf->pa + mm->host_aipu_offset - reg->base_iova) >> PAGE_SHIFT;
+       if (bitmap_no >= reg->count)
+               return -EINVAL;
+
+       page = reg->pages[bitmap_no];
+       if (!page)
+               return -EINVAL;
+
+       alloc_nr = page->contiguous_alloc_len;
+       if (page->filp != filp || !alloc_nr)
+               return -EINVAL;
+
+       bitmap_clear(reg->bitmap, bitmap_no, alloc_nr);
+       memset(page, 0, sizeof(struct aipu_virt_page));
+
+       dev_dbg(reg->dev, "[MM] free done: iova 0x%llx, bytes 0x%llx\n", buf->pa, buf->bytes);
+
+       return 0;
+}
+
+static struct aipu_mem_region *aipu_mm_find_region(struct aipu_memory_manager *mm, u64 iova)
+{
+       int i = 0;
+
+       for (i = AIPU_MEM_REGION_DRAM_ID; i < AIPU_MEM_REGION_MAX_ID; i++) {
+               if (iova >= mm->reg[i].base_iova &&
+                   (iova < mm->reg[i].base_iova + mm->reg[i].bytes))
+                       return &mm->reg[i];
+       }
+
+       return NULL;
+}
+
+static void aipu_mm_free_filp_in_region(struct aipu_memory_manager *mm,
+                                       struct aipu_mem_region *reg, struct file *filp)
+{
+       unsigned long i = 0;
+       unsigned long offset = 0;
+
+       if (!mm || !reg || !reg->bitmap || !filp)
+               return;
+
+       mutex_lock(&mm->lock);
+       while ((i = find_next_bit(reg->bitmap, reg->count, offset)) != reg->count) {
+               offset = i + reg->pages[i]->contiguous_alloc_len;
+               if (reg->pages[i] && reg->pages[i]->filp == filp) {
+                       bitmap_clear(reg->bitmap, i, reg->pages[i]->contiguous_alloc_len);
+                       memset(reg->pages[i], 0, sizeof(struct aipu_virt_page));
+               }
+       }
+       mutex_unlock(&mm->lock);
+}
+
+static struct aipu_virt_page *aipu_mm_find_page(struct aipu_memory_manager *mm,
+                                               struct aipu_mem_region *reg,
+                                               struct file *filp, u64 iova)
+{
+       unsigned long page_no = 0;
+       struct aipu_virt_page *page = NULL;
+
+       if (!mm || !reg || !filp || (iova % PAGE_SIZE))
+               return NULL;
+
+       page_no = (iova - reg->base_iova) >> PAGE_SHIFT;
+       if (page_no >= reg->count)
+               return NULL;
+
+       page = reg->pages[page_no];
+       if (!page || page->map_num || page->filp != filp)
+               return NULL;
+
+       return page;
+}
+
+/**
+ * @aipu_init_mm() - initialize mm module during driver probe phase
+ * @mm:      pointer to memory manager struct to be initialized
+ * @p_dev:   pointer to the platform device struct
+ * @version: AIPU ISA version
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_init_mm(struct aipu_memory_manager *mm, struct platform_device *p_dev, int version)
+{
+       int ret = 0;
+       int reg_id = 0;
+       struct iommu_group *group = NULL;
+       struct device_node *np = NULL;
+       struct resource res;
+       bool bypass_iommu = false;
+
+       if (!mm || !p_dev)
+               return -EINVAL;
+
+       memset(mm, 0, sizeof(*mm));
+       mm->version = version;
+       mm->limit = 0xC0000000;
+       mm->dev = &p_dev->dev;
+       mutex_init(&mm->lock);
+       mm->sram_dft_dtype = AIPU_MM_DATA_TYPE_NONE;
+       mm->sram_disable = false;
+       mm->sram_disable_head = devm_kzalloc(mm->dev, sizeof(*mm->sram_disable_head), GFP_KERNEL);
+       if (!mm->sram_disable_head)
+               return -ENOMEM;
+       INIT_LIST_HEAD(&mm->sram_disable_head->list);
+
+       if (of_property_read_u64(mm->dev->of_node, "host-aipu-offset", &mm->host_aipu_offset))
+               mm->host_aipu_offset = 0;
+
+       group = iommu_group_get(mm->dev);
+       if (group)
+               mm->has_iommu = true;
+       iommu_group_put(group);
+       dev_info(mm->dev, "AIPU is%s behind an IOMMU\n", mm->has_iommu ? "" : " not");
+
+       /*
+        * If AIPU is behind an IOMMU, in devicetree, memory-region attribute of DRAM is optional;
+        * otherwise DRAM must be specified;
+        *
+        * SRAM is always optional and should be specified after DRAM if any;
+        *
+        * KMD accepts at maximum one DRAM memory-region and one SRAM region;
+        */
+       for (reg_id = 0; reg_id < AIPU_MEM_REGION_MAX_ID; reg_id++) {
+               np = of_parse_phandle(mm->dev->of_node, "memory-region", reg_id);
+               if (!np)
+                       continue;
+
+               if (of_device_is_compatible(np, "shared-dma-pool")) {
+                       if (IS_ENABLED(CONFIG_CMA) && of_property_read_bool(np, "reusable")) {
+                               mm->reg[reg_id].type = AIPU_MEM_TYPE_CMA_RESERVED;
+                               dev_info(mm->dev, "AIPU %s mem type is [CMA reserved]\n",
+                                        reg_id ? "SRAM" : "DRAM");
+                       } else if (of_property_read_bool(np, "no-map")) {
+                               mm->reg[reg_id].type = AIPU_MEM_TYPE_DMA_RESERVED;
+                               dev_info(mm->dev, "AIPU %s mem type is [DMA reserved]\n",
+                                        reg_id ? "SRAM" : "DRAM");
+                       }
+               } else {
+                       mm->reg[reg_id].type = AIPU_MEM_TYPE_DEV_RESERVED;
+                       dev_info(mm->dev, "AIPU %s mem type is [Reserved]\n",
+                                reg_id ? "SRAM" : "DRAM");
+               }
+
+               if (of_address_to_resource(np, 0, &res)) {
+                       of_node_put(np);
+                       return -EINVAL;
+               }
+
+               mm->reg[reg_id].base_pa = res.start;
+               mm->reg[reg_id].bytes = res.end - res.start + 1;
+               of_node_put(np);
+       }
+
+       if (!mm->reg[AIPU_MEM_REGION_DRAM_ID].bytes) {
+               if (mm->has_iommu) {
+                       mm->reg[AIPU_MEM_REGION_DRAM_ID].type = AIPU_MEM_TYPE_KERNEL;
+                       dev_info(mm->dev, "AIPU DRAM mem type is [Kernel]\n");
+               } else {
+                       mm->reg[AIPU_MEM_REGION_DRAM_ID].type = AIPU_MEM_TYPE_CMA_DEFAULT;
+                       dev_info(mm->dev, "AIPU DRAM mem type is [CMA default]\n");
+               }
+       }
+
+       if (mm->has_iommu &&
+           mm->reg[AIPU_MEM_REGION_DRAM_ID].type == AIPU_MEM_TYPE_CMA_RESERVED &&
+           (mm->reg[AIPU_MEM_REGION_SRAM_ID].type == AIPU_MEM_TYPE_DEV_RESERVED ||
+            mm->reg[AIPU_MEM_REGION_SRAM_ID].type == AIPU_MEM_TYPE_DMA_RESERVED)) {
+               dev_err(mm->dev, "AIPU is behind an IOMMU and cannot issue SRAM PA\n");
+               return -EINVAL;
+       }
+
+       bypass_iommu = mm->has_iommu &&
+               (mm->reg[AIPU_MEM_REGION_DRAM_ID].type == AIPU_MEM_TYPE_DEV_RESERVED ||
+                mm->reg[AIPU_MEM_REGION_DRAM_ID].type == AIPU_MEM_TYPE_DMA_RESERVED);
+       if (bypass_iommu) {
+               dev_info(mm->dev, "%s reserved memory is used and IOMMU will be bypassed\n",
+                        (mm->reg[AIPU_MEM_REGION_DRAM_ID].type == AIPU_MEM_TYPE_DEV_RESERVED) ?
+                        "Native" : "DMA");
+       }
+
+       if ((!mm->has_iommu || bypass_iommu) && mm->reg[AIPU_MEM_REGION_SRAM_ID].bytes &&
+           ((mm->reg[AIPU_MEM_REGION_SRAM_ID].base_pa >> 32) !=
+            (mm->reg[AIPU_MEM_REGION_DRAM_ID].base_pa >> 32))) {
+               mm->reg[AIPU_MEM_REGION_SRAM_ID].bytes = 0;
+               mm->reg[AIPU_MEM_REGION_SRAM_ID].base_pa = 0;
+               dev_err(mm->dev, "SRAM is not in the same 4GB region with DRAM and cannot be used\n");
+       }
+
+       ret = aipu_mm_init_mem_region(mm, AIPU_MEM_REGION_DRAM_ID);
+       if (ret)
+               return ret;
+
+       ret = aipu_mm_init_mem_region(mm, AIPU_MEM_REGION_SRAM_ID);
+       if (ret)
+               goto err;
+
+       goto finish;
+
+err:
+       aipu_mm_deinit_mem_region(mm, AIPU_MEM_REGION_DRAM_ID);
+
+finish:
+       return ret;
+}
+
+/**
+ * @aipu_deinit_mm() - de-initialize mm module while kernel module unloading
+ * @mm: pointer to memory manager struct initialized in aipu_init_mm()
+ */
+void aipu_deinit_mm(struct aipu_memory_manager *mm)
+{
+       aipu_mm_deinit_mem_region(mm, AIPU_MEM_REGION_SRAM_ID);
+       aipu_mm_deinit_mem_region(mm, AIPU_MEM_REGION_DRAM_ID);
+}
+
+/**
+ * @aipu_mm_alloc() - alloc memory buffer for user request
+ * @mm:      pointer to memory manager struct initialized in aipu_init_mm()
+ * @buf_req: pointer to buffer request struct from userland
+ * @filp:    pointer to the file struct
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_mm_alloc(struct aipu_memory_manager *mm, struct aipu_buf_request *buf_req,
+                 struct file *filp)
+{
+       int ret = 0;
+
+       if (!mm || !buf_req || !filp)
+               return -EINVAL;
+
+       if (!buf_req->bytes || !is_power_of_2(buf_req->align_in_page))
+               return -EINVAL;
+
+       WARN_ON(!mm->reg[AIPU_MEM_REGION_DRAM_ID].bytes &&
+               !mm->reg[AIPU_MEM_REGION_SRAM_ID].bytes);
+
+       mutex_lock(&mm->lock);
+       /*
+        * Try to allocate from SRAM first if and only if:
+        * 1. System has SRAM region;
+        * 2. SRAM is in enable state;
+        * 3. The data types are matched;
+        */
+       if (mm->reg[AIPU_MEM_REGION_SRAM_ID].bytes && !mm->sram_disable &&
+           mm->sram_dft_dtype == buf_req->data_type) {
+               ret = aipu_mm_alloc_in_region_no_lock(mm, buf_req,
+                                                     &mm->reg[AIPU_MEM_REGION_SRAM_ID], filp);
+               if (!ret)
+                       goto unlock;
+       }
+
+       ret = aipu_mm_alloc_in_region_no_lock(mm, buf_req,
+                                             &mm->reg[AIPU_MEM_REGION_DRAM_ID], filp);
+       if (ret) {
+               dev_err(mm->dev,
+                       "[MM] buffer allocation failed for: bytes 0x%llx, page align %d\n",
+                       buf_req->bytes, buf_req->align_in_page);
+               goto unlock;
+       }
+
+       WARN_ON(buf_req->desc.pa % (buf_req->align_in_page << PAGE_SHIFT));
+
+unlock:
+       mutex_unlock(&mm->lock);
+       return ret;
+}
+
+/**
+ * @aipu_mm_free() - free buffer allocated by aipu_mm_alloc()
+ * @mm:   pointer to memory manager struct initialized in aipu_init_mm()
+ * @buf:  pointer to the buffer descriptor to be released
+ * @filp: pointer to the file struct
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_mm_free(struct aipu_memory_manager *mm, struct aipu_buf_desc *buf, struct file *filp)
+{
+       int ret = 0;
+       struct aipu_mem_region *reg = NULL;
+
+       if (!mm || !buf || !filp)
+               return -EINVAL;
+
+       reg = aipu_mm_find_region(mm, buf->pa);
+       if (!reg)
+               return -EINVAL;
+
+       mutex_lock(&mm->lock);
+       ret = aipu_mm_free_in_region_no_lock(mm, buf, reg, filp);
+       mutex_unlock(&mm->lock);
+
+       return ret;
+}
+
+/**
+ * @aipu_mm_free_buffers() - free all the buffers allocated from one fd
+ * @mm:   pointer to memory manager struct initialized in aipu_init_mm()
+ * @filp: pointer to the file struct
+ */
+void aipu_mm_free_buffers(struct aipu_memory_manager *mm, struct file *filp)
+{
+       aipu_mm_free_filp_in_region(mm, &mm->reg[AIPU_MEM_REGION_DRAM_ID], filp);
+       aipu_mm_free_filp_in_region(mm, &mm->reg[AIPU_MEM_REGION_SRAM_ID], filp);
+}
+
+/**
+ * @aipu_mm_mmap_buf() - mmap an allocated buffer for user thread
+ * @mm: pointer to memory manager struct initialized in aipu_init_mm()
+ * @vma: pointer to the vm_area_struct
+ * @filp: pointer to the file struct
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_mm_mmap_buf(struct aipu_memory_manager *mm, struct vm_area_struct *vma,
+                    struct file *filp)
+{
+       int ret = 0;
+       u64 offset = 0;
+       int len = 0;
+       size_t mmap_size = 0;
+       unsigned long vm_pgoff = 0;
+       struct aipu_mem_region *reg = NULL;
+       struct aipu_virt_page *first_page = NULL;
+
+       if (!mm || !vma)
+               return -EINVAL;
+
+       offset = vma->vm_pgoff * PAGE_SIZE;
+       len = vma->vm_end - vma->vm_start;
+
+       reg = aipu_mm_find_region(mm, offset);
+       if (!reg)
+               return -EINVAL;
+
+       first_page = aipu_mm_find_page(mm, reg, filp, offset);
+       if (!first_page)
+               return -EINVAL;
+
+       vm_pgoff = vma->vm_pgoff;
+       vma->vm_pgoff = 0;
+       vma->vm_flags |= VM_IO;
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+       if (reg->type == AIPU_MEM_TYPE_DEV_RESERVED) {
+               ret = remap_pfn_range(vma, vma->vm_start, offset >> PAGE_SHIFT,
+                                     vma->vm_end - vma->vm_start, vma->vm_page_prot);
+       } else {
+               if (reg->type == AIPU_MEM_TYPE_KERNEL) {
+                       vma->vm_pgoff = (offset - reg->base_iova) >> PAGE_SHIFT;
+                       mmap_size = reg->bytes;
+               } else {
+                       mmap_size = first_page->contiguous_alloc_len << PAGE_SHIFT;
+               }
+               ret = dma_mmap_attrs(reg->dev, vma,
+                                    (void *)((u64)reg->base_va + offset - reg->base_iova),
+                                    (dma_addr_t)offset, mmap_size, reg->attrs);
+       }
+
+       vma->vm_pgoff = vm_pgoff;
+       if (!ret)
+               first_page->map_num++;
+
+       return ret;
+}
+
+/**
+ * @aipu_mm_disable_sram_allocation() - disable buffer allocations from soc sram
+ * @mm: pointer to memory manager struct initialized in aipu_init_mm()
+ * @filp: pointer to the file struct
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_mm_disable_sram_allocation(struct aipu_memory_manager *mm, struct file *filp)
+{
+       int ret = 0;
+       struct aipu_sram_disable_per_fd *sram_disable_per_fd = NULL;
+
+       if (!mm)
+               return -EINVAL;
+
+       /* If there is no SRAM in this system, it cannot be disabled. */
+       if (!mm->reg[AIPU_MEM_REGION_SRAM_ID].bytes)
+               return -EPERM;
+
+       mutex_lock(&mm->lock);
+       /* If SRAM is under using by driver & AIPU, it cannot be disabled. */
+       if (!bitmap_empty(mm->reg[AIPU_MEM_REGION_SRAM_ID].bitmap,
+                         mm->reg[AIPU_MEM_REGION_SRAM_ID].count))
+               ret = -EPERM;
+
+       if (!ret) {
+               int found = 0;
+
+               list_for_each_entry(sram_disable_per_fd, &mm->sram_disable_head->list, list) {
+                       if (sram_disable_per_fd->filp == filp) {
+                               sram_disable_per_fd->cnt++;
+                               found = 1;
+                               break;
+                       }
+               }
+               if (!found) {
+                       sram_disable_per_fd = kzalloc(sizeof(*sram_disable_per_fd), GFP_KERNEL);
+                       if (!sram_disable_per_fd) {
+                               ret = -ENOMEM;
+                               goto unlock;
+                       }
+                       sram_disable_per_fd->cnt++;
+                       sram_disable_per_fd->filp = filp;
+                       list_add(&sram_disable_per_fd->list, &mm->sram_disable_head->list);
+               }
+               mm->sram_disable++;
+       }
+unlock:
+       mutex_unlock(&mm->lock);
+       return ret;
+}
+
+/**
+ * @aipu_mm_enable_sram_allocation() - enable buffer allocations from soc sram (disabled before)
+ * @mm:   pointer to memory manager struct initialized in aipu_init_mm()
+ * @filp: pointer to the file struct
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_mm_enable_sram_allocation(struct aipu_memory_manager *mm, struct file *filp)
+{
+       int ret = 0;
+       struct aipu_sram_disable_per_fd *sram_disable_per_fd = NULL;
+
+       if (!mm)
+               return -EINVAL;
+
+       if (!mm->reg[AIPU_MEM_REGION_SRAM_ID].bytes)
+               return -EPERM;
+
+       mutex_lock(&mm->lock);
+       if (mm->sram_disable == 0) {
+               ret = -EPERM;
+               goto unlock;
+       }
+
+       list_for_each_entry(sram_disable_per_fd, &mm->sram_disable_head->list, list) {
+               if (sram_disable_per_fd->filp == filp) {
+                       if (sram_disable_per_fd->cnt)
+                               sram_disable_per_fd->cnt--;
+                       break;
+               }
+       }
+       mm->sram_disable--;
+unlock:
+       mutex_unlock(&mm->lock);
+       return ret;
+}
diff --git a/drivers/misc/armchina-npu/aipu_mm.h b/drivers/misc/armchina-npu/aipu_mm.h
new file mode 100644
index 000000000000..f7e0018a5cec
--- /dev/null
+++ b/drivers/misc/armchina-npu/aipu_mm.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#ifndef __AIPU_MM_H__
+#define __AIPU_MM_H__
+
+#include <linux/platform_device.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <uapi/misc/armchina_aipu.h>
+
+/**
+ * enum aipu_mem_type - AIPU memory type (used for both DRAM & SRAM)
+ * @AIPU_MEM_TYPE_NONE: no type
+ * @AIPU_MEM_TYPE_DEV_RESERVED: device specific native reservation
+ * @AIPU_MEM_TYPE_DMA_RESERVED: device specific DMA reservation
+ * @AIPU_MEM_TYPE_CMA_RESERVED: device specific CMA reservation
+ * @AIPU_MEM_TYPE_CMA_DEFAULT: default CMA pool
+ * @AIPU_MEM_TYPE_KERNEL: kernel mapped memory
+ */
+enum aipu_mem_type {
+       AIPU_MEM_TYPE_NONE         = 0,
+       AIPU_MEM_TYPE_DEV_RESERVED = 1,
+       AIPU_MEM_TYPE_DMA_RESERVED = 2,
+       AIPU_MEM_TYPE_CMA_RESERVED = 3,
+       AIPU_MEM_TYPE_CMA_DEFAULT  = 4,
+       AIPU_MEM_TYPE_KERNEL = 5,
+};
+
+enum aipu_mem_region_id {
+       AIPU_MEM_REGION_DRAM_ID = 0,
+       AIPU_MEM_REGION_SRAM_ID = 1,
+       AIPU_MEM_REGION_MAX_ID = 2,
+};
+
+/**
+ * struct aipu_virt_page - virtual page
+ * @tid: ID of thread requested this page (and the following pages)
+ * @filp: filp requested this page
+ * @map_num: number of mmap to userspace
+ * @contiguous_alloc_len: count of immediately following pages allocated in together
+ */
+struct aipu_virt_page {
+       int tid;
+       struct file *filp;
+       int map_num;
+       unsigned long contiguous_alloc_len;
+};
+
+/**
+ * struct aipu_mem_region - AIPU memory region
+ * @base_iova: region base iova (bus address)
+ * @base_pa: region base physical address
+ * @base_va: region base virtual address
+ * @bytes: total bytes of this region
+ * @base_pfn: region base page frame number
+ * @type: region type (aipu_mem_type)
+ * @pages: page array
+ * @bitmap: region bitmap
+ * @count: bitmap bit count/page count
+ * @dev: region specific device (for multiple DMA/CMA regions)
+ * @attrs: attributes for DMA API
+ */
+struct aipu_mem_region {
+       dma_addr_t base_iova;
+       dma_addr_t base_pa;
+       void *base_va;
+       u64 bytes;
+       unsigned long base_pfn;
+       enum aipu_mem_type type;
+       struct aipu_virt_page **pages;
+       unsigned long *bitmap;
+       unsigned long count;
+       struct device *dev;
+       unsigned long attrs;
+};
+
+/**
+ * struct aipu_sram_disable_per_fd - SRAM disable list records disable operations
+ * @cnt: current total disable operation count
+ * @filp: file opinter
+ * @list: file pointer list
+ */
+struct aipu_sram_disable_per_fd {
+       int cnt;
+       struct file *filp;
+       struct list_head list;
+};
+
+/**
+ * struct aipu_memory_manager - AIPU memory management struct (MM)
+ * @version: AIPU ISA version number
+ * @limit: AIPU device address space upper bound
+ * @has_iommu: system has an IOMMU for AIPU to use or not
+ * @host_aipu_offset: offset between CPU address space and AIPU device address space
+ * @dev: device struct pointer (AIPU core 0)
+ * @lock: lock for reg and sram_disable_head
+ * @reg: memory region, contains DRAM and/or SRAM
+ * @sram_dft_dtype: default data type allocated from SRAM
+ * @sram_disable: is SRAM in disable state or not
+ * @sram_disable_head: sram disable list
+ */
+struct aipu_memory_manager {
+       int version;
+       u64 limit;
+       bool has_iommu;
+       u64 host_aipu_offset;
+       struct device *dev;
+       struct mutex lock; /* Protect sram disabled head struct */
+       struct aipu_mem_region reg[AIPU_MEM_REGION_MAX_ID];
+       int sram_dft_dtype;
+       int sram_disable;
+       struct aipu_sram_disable_per_fd *sram_disable_head;
+};
+
+int aipu_init_mm(struct aipu_memory_manager *mm, struct platform_device *p_dev, int version);
+void aipu_deinit_mm(struct aipu_memory_manager *mm);
+int aipu_mm_alloc(struct aipu_memory_manager *mm, struct aipu_buf_request *buf_req,
+                 struct file *filp);
+int aipu_mm_free(struct aipu_memory_manager *mm, struct aipu_buf_desc *buf, struct file *filp);
+void aipu_mm_free_buffers(struct aipu_memory_manager *mm, struct file *filp);
+int aipu_mm_mmap_buf(struct aipu_memory_manager *mm, struct vm_area_struct *vma,
+                    struct file *filp);
+int aipu_mm_disable_sram_allocation(struct aipu_memory_manager *mm, struct file *filp);
+int aipu_mm_enable_sram_allocation(struct aipu_memory_manager *mm, struct file *filp);
+
+#endif /* __AIPU_MM_H__ */
diff --git a/drivers/misc/armchina-npu/aipu_priv.c b/drivers/misc/armchina-npu/aipu_priv.c
new file mode 100644
index 000000000000..e1ae5b951618
--- /dev/null
+++ b/drivers/misc/armchina-npu/aipu_priv.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#include <linux/slab.h>
+#include <linux/of_address.h>
+#include "aipu_priv.h"
+#include "config.h"
+
+static int init_misc_dev(struct aipu_priv *aipu)
+{
+       aipu->misc.minor = MISC_DYNAMIC_MINOR;
+       aipu->misc.name = "aipu";
+       aipu->misc.fops = aipu->aipu_fops;
+       aipu->misc.mode = 0666;
+       return misc_register(&aipu->misc);
+}
+
+static void deinit_misc_dev(struct aipu_priv *aipu)
+{
+       if (aipu && aipu->misc.fops) {
+               misc_deregister(&aipu->misc);
+               memset(&aipu->misc, 0, sizeof(aipu->misc));
+       }
+}
+
+/**
+ * @init_aipu_priv() - initialize an input AIPU private data struct
+ * @aipu:  pointer to the aipu private struct to be initialized
+ * @p_dev: pointer to the platform device struct
+ * @fops:  pointer to the file_operations struct
+ * @soc:   pointer to the SoC private data structure
+ * @soc_ops: pointer to the SoC operations struct
+ *
+ * This function should be called while driver probing. It should be called
+ * only one time.
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int init_aipu_priv(struct aipu_priv *aipu, struct platform_device *p_dev,
+                  const struct file_operations *fops, struct aipu_soc *soc,
+                  struct aipu_soc_operations *soc_ops)
+{
+       int ret = 0;
+       int version = 0;
+       int config = 0;
+
+       if (!aipu || !p_dev || !fops)
+               return -EINVAL;
+
+       if (aipu->is_init)
+               return 0;
+
+       aipu->core_cnt = 0;
+       aipu->cores = NULL;
+       aipu->dev = &p_dev->dev;
+       aipu->aipu_fops = fops;
+       aipu->soc = soc;
+       aipu->soc_ops = soc_ops;
+
+       zhouyi_detect_aipu_version(p_dev, &version, &config);
+       dev_dbg(aipu->dev, "AIPU core0 ISA version %d, configuration %d\n", version, config);
+
+       ret = init_misc_dev(aipu);
+       if (ret)
+               goto err_handle;
+
+       ret = init_aipu_job_manager(&aipu->job_manager);
+       if (ret)
+               goto err_handle;
+
+       ret = aipu_init_mm(&aipu->mm, p_dev, version);
+       if (ret)
+               goto err_handle;
+
+       aipu->is_init = true;
+       goto finish;
+
+err_handle:
+       deinit_aipu_priv(aipu);
+
+finish:
+       return ret;
+}
+
+/**
+ * @brief deinit an AIPU private data struct
+ * @aipu: pointer to the aipu private struct initialized in init_aipu_priv()
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int deinit_aipu_priv(struct aipu_priv *aipu)
+{
+       int core_iter = 0;
+
+       if (!aipu)
+               return 0;
+
+       for (core_iter = 0; core_iter < aipu->core_cnt; core_iter++)
+               deinit_aipu_core(aipu->cores[core_iter]);
+
+       kfree(aipu->cores);
+       aipu->core_cnt = 0;
+
+       aipu_deinit_mm(&aipu->mm);
+       deinit_aipu_job_manager(&aipu->job_manager);
+       deinit_misc_dev(aipu);
+       aipu->is_init = 0;
+
+       return 0;
+}
+
+/**
+ * @aipu_priv_add_core() - add new detected core into aipu_priv struct in probe phase
+ * @aipu: pointer to the aipu private struct initialized in init_aipu_priv()
+ * @core:    pointer to an aipu core struct
+ * @version: aipu core hardware version number
+ * @id:      aipu core ID
+ * @p_dev:   pointer to the platform device struct
+ *
+ * This function is called when there is a new AIPU core is probed into driver.
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_priv_add_core(struct aipu_priv *aipu, struct aipu_core *core,
+                      int version, int id, struct platform_device *p_dev)
+{
+       int ret = 0;
+       struct aipu_core **new_core_arr = NULL;
+
+       if (!aipu || !core || !p_dev)
+               return -EINVAL;
+
+       WARN_ON(!aipu->is_init);
+
+       ret = init_aipu_core(core, version, id, aipu, p_dev);
+       if (ret)
+               return ret;
+
+       new_core_arr = kcalloc(aipu->core_cnt + 1, sizeof(*new_core_arr), GFP_KERNEL);
+       if (!new_core_arr)
+               return -ENOMEM;
+
+       if (aipu->core_cnt) {
+               WARN_ON(!aipu->cores);
+               memcpy(new_core_arr, aipu->cores, aipu->core_cnt * sizeof(*new_core_arr));
+               kfree(aipu->cores);
+               aipu->cores = NULL;
+       }
+
+       new_core_arr[aipu->core_cnt] = core;
+       aipu->cores = new_core_arr;
+       aipu->core_cnt++;
+
+       aipu_job_manager_set_cores_info(&aipu->job_manager, aipu->core_cnt, aipu->cores);
+       return ret;
+}
+
+/**
+ * @aipu_priv_get_version() - get AIPU hardware version number wrapper
+ * @aipu: pointer to the aipu private struct initialized in init_aipu_priv()
+ *
+ * Return: AIPU ISA version
+ */
+int aipu_priv_get_version(struct aipu_priv *aipu)
+{
+       if (likely(aipu))
+               return aipu->version;
+       return 0;
+}
+
+/**
+ * @aipu_priv_get_core_cnt() - get AIPU core count
+ * @aipu: pointer to the aipu private struct initialized in init_aipu_priv()
+ *
+ * Return AIPU core count
+ */
+int aipu_priv_get_core_cnt(struct aipu_priv *aipu)
+{
+       if (likely(aipu))
+               return aipu->core_cnt;
+       return 0;
+}
+
+/**
+ * @aipu_priv_query_core_capability() - query AIPU capability wrapper (per core capability)
+ * @aipu: pointer to the aipu private struct initialized in init_aipu_priv()
+ * @cap:  pointer to the capability struct
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_priv_query_core_capability(struct aipu_priv *aipu, struct aipu_core_cap *cap)
+{
+       int id = 0;
+       struct aipu_core *core = NULL;
+
+       if (unlikely(!aipu && !cap))
+               return -EINVAL;
+
+       for (id = 0; id < aipu->core_cnt; id++) {
+               core = aipu->cores[id];
+               cap[id].core_id = id;
+               cap[id].arch = core->arch;
+               cap[id].version = core->version;
+               cap[id].config = core->config;
+               cap[id].info.reg_base = core->reg.phys;
+       }
+
+       return 0;
+}
+
+/**
+ * @aipu_priv_query_capability() - query AIPU capability wrapper (multicore common capability)
+ * @aipu: pointer to the aipu private struct initialized in init_aipu_priv()
+ * @cap:  pointer to the capability struct
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_priv_query_capability(struct aipu_priv *aipu, struct aipu_cap *cap)
+{
+       int id = 0;
+       struct aipu_core_cap *core_cap = NULL;
+
+       if (unlikely(!aipu && !cap))
+               return -EINVAL;
+
+       cap->core_cnt = aipu->core_cnt;
+       cap->is_homogeneous = 1;
+
+       core_cap = kcalloc(aipu->core_cnt, sizeof(*core_cap), GFP_KERNEL);
+       if (!core_cap)
+               return -ENOMEM;
+
+       aipu_priv_query_core_capability(aipu, core_cap);
+       for (id = 1; id < aipu->core_cnt; id++) {
+               if (core_cap[id].arch != core_cap[id - 1].arch ||
+                   core_cap[id].version != core_cap[id - 1].version ||
+                   core_cap[id].config != core_cap[id - 1].config) {
+                       cap->is_homogeneous = 0;
+                       break;
+               }
+       }
+
+       if (cap->is_homogeneous)
+               cap->core_cap = core_cap[0];
+
+       kfree(core_cap);
+       return 0;
+}
+
+/**
+ * @aipu_priv_io_rw() - AIPU external register read/write wrapper
+ * @aipu:   pointer to the aipu private struct initialized in init_aipu_priv()
+ * @io_req: pointer to the io_req struct
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_priv_io_rw(struct aipu_priv *aipu, struct aipu_io_req *io_req)
+{
+       int ret = -EINVAL;
+       int id = 0;
+
+       if (!aipu || !io_req || io_req->core_id >= aipu->core_cnt)
+               return ret;
+
+       id = io_req->core_id;
+       return aipu->cores[id]->ops->io_rw(aipu->cores[id], io_req);
+}
+
+/**
+ * @aipu_priv_check_status() - check if aipu status is ready for usage
+ * @aipu: pointer to the aipu private struct initialized in init_aipu_priv()
+ *
+ * Return: 0 on success and error code otherwise.
+ */
+int aipu_priv_check_status(struct aipu_priv *aipu)
+{
+       if (aipu && aipu->is_init)
+               return 0;
+       return -EINVAL;
+}
diff --git a/drivers/misc/armchina-npu/aipu_priv.h b/drivers/misc/armchina-npu/aipu_priv.h
new file mode 100644
index 000000000000..ae9d596ff84c
--- /dev/null
+++ b/drivers/misc/armchina-npu/aipu_priv.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#ifndef __AIPU_PRIV_H__
+#define __AIPU_PRIV_H__
+
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/types.h>
+#include "include/armchina_aipu_soc.h"
+#include "aipu_irq.h"
+#include "aipu_io.h"
+#include "aipu_core.h"
+#include "aipu_job_manager.h"
+#include "aipu_mm.h"
+
+/**
+ * struct aipu_priv - AIPU private struct contains all core info and shared resources
+ * @version:     AIPU hardware version
+ * @core_cnt:    AIPU core count in system
+ * @cores:       core pointer array
+ * @dev:         device struct pointer of core 0
+ * @soc_ops:     SoC operation pointer
+ * @aipu_fops:   file operation struct
+ * @misc:        misc driver struct
+ * @job_manager: job manager struct
+ * @mm:          memory manager
+ * @is_init:     init flag
+ */
+struct aipu_priv {
+       int version;
+       int core_cnt;
+       struct aipu_core **cores;
+       struct device *dev;
+       struct aipu_soc              *soc;
+       struct aipu_soc_operations   *soc_ops;
+       const struct file_operations *aipu_fops;
+       struct miscdevice            misc;
+       struct aipu_job_manager      job_manager;
+       struct aipu_memory_manager   mm;
+       bool is_init;
+};
+
+int init_aipu_priv(struct aipu_priv *aipu, struct platform_device *p_dev,
+                  const struct file_operations *fops, struct aipu_soc *soc,
+                  struct aipu_soc_operations *soc_ops);
+int deinit_aipu_priv(struct aipu_priv *aipu);
+int aipu_priv_add_core(struct aipu_priv *aipu, struct aipu_core *core,
+                      int version, int id, struct platform_device *p_dev);
+int aipu_priv_get_version(struct aipu_priv *aipu);
+int aipu_priv_get_core_cnt(struct aipu_priv *aipu);
+int aipu_priv_query_core_capability(struct aipu_priv *aipu, struct aipu_core_cap *cap);
+int aipu_priv_query_capability(struct aipu_priv *aipu, struct aipu_cap *cap);
+int aipu_priv_io_rw(struct aipu_priv *aipu, struct aipu_io_req *io_req);
+int aipu_priv_check_status(struct aipu_priv *aipu);
+
+#endif /* __AIPU_PRIV_H__ */
diff --git a/drivers/misc/armchina-npu/aipu_soc_default.c b/drivers/misc/armchina-npu/aipu_soc_default.c
new file mode 100644
index 000000000000..a61f2f5d14c3
--- /dev/null
+++ b/drivers/misc/armchina-npu/aipu_soc_default.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+/*
+ * SoC: default SoC using Armchina platform driver methods
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include "include/armchina_aipu_soc.h"
+
+static struct aipu_soc default_soc = {
+       .priv = NULL,
+};
+
+static struct aipu_soc_operations default_ops = {
+       .start_bw_profiling = NULL,
+       .stop_bw_profiling = NULL,
+       .read_profiling_reg = NULL,
+       .enable_clk = NULL,
+       .disable_clk = NULL,
+       .is_clk_enabled = NULL,
+       .is_aipu_irq = NULL,
+};
+
+static int default_probe(struct platform_device *p_dev)
+{
+       return armchina_aipu_probe(p_dev, &default_soc, &default_ops);
+}
+
+static int default_remove(struct platform_device *p_dev)
+{
+       return armchina_aipu_remove(p_dev);
+}
+
+static int default_suspend(struct platform_device *p_dev, pm_message_t state)
+{
+       return armchina_aipu_suspend(p_dev, state);
+}
+
+static int default_resume(struct platform_device *p_dev)
+{
+       return armchina_aipu_resume(p_dev);
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id aipu_of_match[] = {
+       {
+               .compatible = "armchina,zhouyi-v1",
+       },
+       {
+               .compatible = "armchina,zhouyi-v2",
+       },
+       {
+               .compatible = "armchina,zhouyi",
+       },
+       { }
+};
+
+MODULE_DEVICE_TABLE(of, aipu_of_match);
+#endif
+
+static struct platform_driver aipu_platform_driver = {
+       .probe = default_probe,
+       .remove = default_remove,
+       .suspend = default_suspend,
+       .resume  = default_resume,
+       .driver = {
+               .name = "armchina",
+               .owner = THIS_MODULE,
+#ifdef CONFIG_OF
+               .of_match_table = of_match_ptr(aipu_of_match),
+#endif
+       },
+};
+
+module_platform_driver(aipu_platform_driver);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Dejia Shang");
+MODULE_AUTHOR("Toby Huang");
+MODULE_DESCRIPTION("ArmChina Zhouyi AI accelerator driver");
diff --git a/drivers/misc/armchina-npu/config.h b/drivers/misc/armchina-npu/config.h
new file mode 100644
index 000000000000..68e993ca6e3b
--- /dev/null
+++ b/drivers/misc/armchina-npu/config.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#ifndef __CONFIG_H__
+#define __CONFIG_H__
+
+#include <linux/sizes.h>
+
+#define AIPU_CONFIG_USE_DRAM_DEFAULT_SIZE   0
+#define AIPU_CONFIG_DRAM_DEFAULT_SIZE       (64 * SZ_1M)
+
+#endif /* __CONFIG_H__ */
diff --git a/drivers/misc/armchina-npu/include/armchina_aipu_soc.h b/drivers/misc/armchina-npu/include/armchina_aipu_soc.h
new file mode 100644
index 000000000000..a726a90e0feb
--- /dev/null
+++ b/drivers/misc/armchina-npu/include/armchina_aipu_soc.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#ifndef __AIPU_SOC_H__
+#define __AIPU_SOC_H__
+
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <uapi/misc/armchina_aipu.h>
+
+/**
+ * struct aipu_soc - a struct contains AIPU SoC specific information
+ * @priv: SoC private data structure
+ *
+ * This struct contains reference to SoC level private data, which is registered while probing,
+ * and used as arguments of the corresponding SoC operation methods.
+ */
+struct aipu_soc {
+       void *priv;
+};
+
+/**
+ * struct aipu_soc_operations - a struct contains SoC operation methods
+ * @start_bw_profiling: start bandwidth profiling
+ * @stop_bw_profiling:  stop bandwidth profiling
+ * @read_profiling_reg: read profiling register values
+ * @enable_clk:         enable clock/disable clock gating
+ * @disable_clk:        disable clock/enable clock gating
+ * @is_clk_enabled:     is in clock enabled or disabled
+ * @is_aipu_irq:        is the shared interrupt is for an AIPU core or not
+ *
+ * SoC vendors should register the SoC operations into struct aipu_private while
+ * probing if they would like to implement and use their private SoC operation methods.
+ */
+struct aipu_soc_operations {
+       void (*start_bw_profiling)(struct device *dev, struct aipu_soc *soc);
+       void (*stop_bw_profiling)(struct device *dev, struct aipu_soc *soc);
+       void (*read_profiling_reg)(struct device *dev, struct aipu_soc *soc,
+                                  struct aipu_ext_profiling_data *pdata);
+       int (*enable_clk)(struct device *dev, struct aipu_soc *soc);
+       int (*disable_clk)(struct device *dev, struct aipu_soc *soc);
+       bool (*is_clk_enabled)(struct device *dev, struct aipu_soc *soc);
+       bool (*is_aipu_irq)(struct device *dev, struct aipu_soc *soc, int core_id);
+};
+
+int armchina_aipu_probe(struct platform_device *p_dev, struct aipu_soc *soc,
+                       struct aipu_soc_operations *ops);
+int armchina_aipu_remove(struct platform_device *p_dev);
+int armchina_aipu_suspend(struct platform_device *p_dev, pm_message_t state);
+int armchina_aipu_resume(struct platform_device *p_dev);
+
+#endif /* __AIPU_SOC_H__ */
diff --git a/drivers/misc/armchina-npu/zhouyi/Makefile b/drivers/misc/armchina-npu/zhouyi/Makefile
new file mode 100644
index 000000000000..37975e220769
--- /dev/null
+++ b/drivers/misc/armchina-npu/zhouyi/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+subdir-ccflags-y += -I$(src)
+
+ZHOUYI_FILES := zhouyi/zhouyi.o zhouyi/z1.o zhouyi/z2.o
\ No newline at end of file
diff --git a/drivers/misc/armchina-npu/zhouyi/z1.c b/drivers/misc/armchina-npu/zhouyi/z1.c
new file mode 100644
index 000000000000..9abdd88d755e
--- /dev/null
+++ b/drivers/misc/armchina-npu/zhouyi/z1.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#include <linux/irqreturn.h>
+#include <linux/bitops.h>
+#include "aipu_priv.h"
+#include "z1.h"
+#include "aipu_io.h"
+#include "config.h"
+
+static int zhouyi_v1_get_hw_version_number(struct aipu_core *core)
+{
+       if (likely(core))
+               return zhouyi_get_hw_version_number(&core->reg);
+       return 0;
+}
+
+static int zhouyi_v1_get_hw_config_number(struct aipu_core *core)
+{
+       if (likely(core))
+               return zhouyi_get_hw_config_number(&core->reg);
+       return 0;
+}
+
+static void zhouyi_v1_enable_interrupt(struct aipu_core *core)
+{
+       if (likely(core))
+               aipu_write32(&core->reg, ZHOUYI_CTRL_REG_OFFSET,
+                            ZHOUYIV1_IRQ_ENABLE_FLAG);
+}
+
+static void zhouyi_v1_disable_interrupt(struct aipu_core *core)
+{
+       if (likely(core))
+               aipu_write32(&core->reg, ZHOUYI_CTRL_REG_OFFSET,
+                            ZHOUYIV1_IRQ_DISABLE_FLAG);
+}
+
+static void zhouyi_v1_clear_qempty_interrupt(struct aipu_core *core)
+{
+       if (likely(core))
+               zhouyi_clear_qempty_interrupt(&core->reg);
+}
+
+static void zhouyi_v1_clear_done_interrupt(struct aipu_core *core)
+{
+       if (likely(core))
+               zhouyi_clear_done_interrupt(&core->reg);
+}
+
+static void zhouyi_v1_clear_excep_interrupt(struct aipu_core *core)
+{
+       if (likely(core))
+               zhouyi_clear_excep_interrupt(&core->reg);
+}
+
+static void zhouyi_v1_trigger(struct aipu_core *core)
+{
+       int start_pc = 0;
+
+       if (likely(core)) {
+               start_pc = aipu_read32(&core->reg, ZHOUYI_START_PC_REG_OFFSET) & 0xFFFFFFF0;
+               aipu_write32(&core->reg, ZHOUYI_START_PC_REG_OFFSET, start_pc | 0xD);
+       }
+}
+
+static int zhouyi_v1_reserve(struct aipu_core *core, struct aipu_job_desc *udesc, int do_trigger)
+{
+       unsigned int phys_addr = 0;
+       unsigned int phys_addr0 = 0;
+       unsigned int phys_addr1 = 0;
+       unsigned int start_pc = 0;
+
+       if (unlikely(!core || !udesc))
+               return -EINVAL;
+
+       /* Load data addr 0 register */
+       phys_addr0 = (unsigned int)udesc->data_0_addr;
+       aipu_write32(&core->reg, ZHOUYI_DATA_ADDR_0_REG_OFFSET, phys_addr0);
+
+       /* Load data addr 1 register */
+       phys_addr1 = (unsigned int)udesc->data_1_addr;
+       aipu_write32(&core->reg, ZHOUYI_DATA_ADDR_1_REG_OFFSET, phys_addr1);
+
+       /* Load interrupt PC */
+       aipu_write32(&core->reg, ZHOUYI_INTR_PC_REG_OFFSET,
+                    (unsigned int)udesc->intr_handler_addr);
+
+       /* Load start PC register */
+       phys_addr = (unsigned int)udesc->start_pc_addr;
+       if (do_trigger)
+               start_pc = phys_addr | 0xD;
+       else
+               start_pc = phys_addr;
+       aipu_write32(&core->reg, ZHOUYI_START_PC_REG_OFFSET, start_pc);
+
+       dev_dbg(core->dev, "[Job %d] trigger done: start pc = 0x%x, dreg0 = 0x%x, dreg1 = 0x%x\n",
+               udesc->job_id, start_pc, phys_addr0, phys_addr1);
+
+       return 0;
+}
+
+static bool zhouyi_v1_is_idle(struct aipu_core *core)
+{
+       unsigned long val = 0;
+
+       if (unlikely(!core))
+               return false;
+
+       val = aipu_read32(&core->reg, ZHOUYI_STAT_REG_OFFSET);
+       return test_bit(16, &val) && test_bit(17, &val) && test_bit(18, &val);
+}
+
+static int zhouyi_v1_read_status_reg(struct aipu_core *core)
+{
+       return zhouyi_read_status_reg(&core->reg);
+}
+
+static void zhouyi_v1_print_hw_id_info(struct aipu_core *core)
+{
+       if (unlikely(!core))
+               return;
+
+       dev_info(core->dev, "AIPU Initial Status: 0x%x",
+                aipu_read32(&core->reg, ZHOUYI_STAT_REG_OFFSET));
+
+       dev_info(core->dev, "########## AIPU CORE %d: ZHOUYI V1 ##########", core->id);
+       dev_info(core->dev, "# ISA Version Register: 0x%x",
+                aipu_read32(&core->reg, ZHOUYI_ISA_VERSION_REG_OFFSET));
+       dev_info(core->dev, "# TPC Feature Register: 0x%x",
+                aipu_read32(&core->reg, ZHOUYI_TPC_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# SPU Feature Register: 0x%x",
+                aipu_read32(&core->reg, ZHOUYI_SPU_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# HWA Feature Register: 0x%x",
+                aipu_read32(&core->reg, ZHOUYI_HWA_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# Revision ID Register: 0x%x",
+                aipu_read32(&core->reg, ZHOUYI_REVISION_ID_REG_OFFSET));
+       dev_info(core->dev, "# Memory Hierarchy Feature Register: 0x%x",
+                aipu_read32(&core->reg, ZHOUYI_MEM_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# Instruction RAM Feature Register:  0x%x",
+                aipu_read32(&core->reg, ZHOUYI_INST_RAM_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# TEC Local SRAM Feature Register:   0x%x",
+                aipu_read32(&core->reg, ZHOUYI_LOCAL_SRAM_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# Global SRAM Feature Register:      0x%x",
+                aipu_read32(&core->reg, ZHOUYI_GLOBAL_SRAM_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# Instruction Cache Feature Register:0x%x",
+                aipu_read32(&core->reg, ZHOUYI_INST_CACHE_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# Data Cache Feature Register:       0x%x",
+                aipu_read32(&core->reg, ZHOUYI_DATA_CACHE_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# L2 Cache Feature Register:         0x%x",
+                aipu_read32(&core->reg, ZHOUYI_L2_CACHE_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "############################################");
+}
+
+static int zhouyi_v1_io_rw(struct aipu_core *core, struct aipu_io_req *io_req)
+{
+       if (unlikely(!io_req))
+               return -EINVAL;
+
+       if (!core || io_req->offset > ZHOUYI_V1_MAX_REG_OFFSET)
+               return -EINVAL;
+
+       zhouyi_io_rw(&core->reg, io_req);
+       return 0;
+}
+
+static int zhouyi_v1_upper_half(void *data)
+{
+       int ret = 0;
+       struct aipu_core *core = (struct aipu_core *)data;
+
+       if (get_soc_ops(core) &&
+           get_soc_ops(core)->is_aipu_irq &&
+           !get_soc_ops(core)->is_aipu_irq(core->dev, get_soc(core), core->id))
+               return IRQ_NONE;
+
+       ret = zhouyi_v1_read_status_reg(core);
+       if (ret & ZHOUYI_IRQ_QEMPTY)
+               zhouyi_v1_clear_qempty_interrupt(core);
+
+       if (ret & ZHOUYI_IRQ_DONE) {
+               zhouyi_v1_clear_done_interrupt(core);
+               aipu_job_manager_irq_upper_half(core, 0);
+               aipu_irq_schedulework(core->irq_obj);
+       }
+
+       if (ret & ZHOUYI_IRQ_EXCEP) {
+               zhouyi_v1_clear_excep_interrupt(core);
+               aipu_job_manager_irq_upper_half(core,
+                                               aipu_read32(&core->reg,
+                                                           ZHOUYI_INTR_CAUSE_REG_OFFSET));
+               aipu_irq_schedulework(core->irq_obj);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static void zhouyi_v1_bottom_half(void *data)
+{
+       aipu_job_manager_irq_bottom_half(data);
+}
+
+#ifdef CONFIG_SYSFS
+static int zhouyi_v1_sysfs_show(struct aipu_core *core, char *buf)
+{
+       int ret = 0;
+       char tmp[512];
+
+       if (unlikely(!core || !buf))
+               return -EINVAL;
+
+       ret += zhouyi_sysfs_show(&core->reg, buf);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "Intr Cause Reg",
+                                    ZHOUYI_INTR_CAUSE_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "Intr Status Reg",
+           ZHOUYI_INTR_STAT_REG_OFFSET);
+       strcat(buf, tmp);
+       return ret;
+}
+#endif
+
+static struct aipu_core_operations zhouyi_v1_ops = {
+       .get_version = zhouyi_v1_get_hw_version_number,
+       .get_config = zhouyi_v1_get_hw_config_number,
+       .enable_interrupt = zhouyi_v1_enable_interrupt,
+       .disable_interrupt = zhouyi_v1_disable_interrupt,
+       .trigger = zhouyi_v1_trigger,
+       .reserve = zhouyi_v1_reserve,
+       .is_idle = zhouyi_v1_is_idle,
+       .read_status_reg = zhouyi_v1_read_status_reg,
+       .print_hw_id_info = zhouyi_v1_print_hw_id_info,
+       .io_rw = zhouyi_v1_io_rw,
+       .upper_half = zhouyi_v1_upper_half,
+       .bottom_half = zhouyi_v1_bottom_half,
+#ifdef CONFIG_SYSFS
+       .sysfs_show = zhouyi_v1_sysfs_show,
+#endif
+};
+
+struct aipu_core_operations *get_zhouyi_v1_ops(void)
+{
+       return &zhouyi_v1_ops;
+}
diff --git a/drivers/misc/armchina-npu/zhouyi/z1.h b/drivers/misc/armchina-npu/zhouyi/z1.h
new file mode 100644
index 000000000000..63e9c5b2ee7f
--- /dev/null
+++ b/drivers/misc/armchina-npu/zhouyi/z1.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#ifndef __Z1_H__
+#define __Z1_H__
+
+#include "zhouyi.h"
+
+/*
+ * Zhouyi V1 AIPU Interrupts
+ */
+#define ZHOUYIV1_IRQ              (ZHOUYI_IRQ)
+#define ZHOUYIV1_IRQ_ENABLE_FLAG  (ZHOUYIV1_IRQ)
+#define ZHOUYIV1_IRQ_DISABLE_FLAG (ZHOUYI_IRQ_NONE)
+
+#define ZHOUYI_V1_MAX_SCHED_JOB_NUM  1
+
+/*
+ * Zhouyi V1 AIPU Specific Host Control Register Map
+ */
+#define ZHOUYI_INTR_CAUSE_REG_OFFSET          0x20
+#define ZHOUYI_INTR_STAT_REG_OFFSET           0x24
+#define ZHOUYI_INTR_BACKUP_STAT_REG_OFFSET    0x28
+#define ZHOUYI_INTR_BACKUP_PC_REG_OFFSET      0x2C
+#define ZHOUYI_DBG_ERR_CAUSE_REG_OFFSET       0x30
+#define ZHOUYI_DBG_DATA_REG_0_OFFSET          0x34
+#define ZHOUYI_DBG_DATA_REG_1_OFFSET          0x38
+#define ZHOUYI_L2_CACHE_FEATURE_REG_OFFSET    0x6C
+#define ZHOUYI_V1_MAX_REG_OFFSET              0x6C
+
+struct aipu_core_operations *get_zhouyi_v1_ops(void);
+
+#endif /* __Z1_H__ */
diff --git a/drivers/misc/armchina-npu/zhouyi/z2.c b/drivers/misc/armchina-npu/zhouyi/z2.c
new file mode 100644
index 000000000000..d9010b639528
--- /dev/null
+++ b/drivers/misc/armchina-npu/zhouyi/z2.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#include <linux/irqreturn.h>
+#include <linux/bitops.h>
+#include "aipu_priv.h"
+#include "z2.h"
+#include "aipu_io.h"
+#include "config.h"
+
+static int zhouyi_v2_get_hw_version_number(struct aipu_core *core)
+{
+       if (likely(core))
+               return zhouyi_get_hw_version_number(&core->reg);
+       return 0;
+}
+
+static int zhouyi_v2_get_hw_config_number(struct aipu_core *core)
+{
+       if (likely(core))
+               return zhouyi_get_hw_config_number(&core->reg);
+       return 0;
+}
+
+static void zhouyi_v2_enable_interrupt(struct aipu_core *core)
+{
+       if (likely(core))
+               aipu_write32(&core->reg, ZHOUYI_CTRL_REG_OFFSET, ZHOUYIV2_IRQ_ENABLE_FLAG);
+}
+
+static void zhouyi_v2_disable_interrupt(struct aipu_core *core)
+{
+       if (likely(core))
+               aipu_write32(&core->reg, ZHOUYI_CTRL_REG_OFFSET, ZHOUYIV2_IRQ_DISABLE_FLAG);
+}
+
+static void zhouyi_v2_clear_qempty_interrupt(struct aipu_core *core)
+{
+       if (likely(core))
+               zhouyi_clear_qempty_interrupt(&core->reg);
+}
+
+static void zhouyi_v2_clear_done_interrupt(struct aipu_core *core)
+{
+       if (likely(core))
+               zhouyi_clear_done_interrupt(&core->reg);
+}
+
+static void zhouyi_v2_clear_excep_interrupt(struct aipu_core *core)
+{
+       if (likely(core))
+               zhouyi_clear_excep_interrupt(&core->reg);
+}
+
+static void zhouyi_v2_clear_fault_interrupt(struct aipu_core *core)
+{
+       if (likely(core))
+               aipu_write32(&core->reg, ZHOUYI_STAT_REG_OFFSET, ZHOUYI_IRQ_FAULT);
+}
+
+static void zhouyi_v2_trigger(struct aipu_core *core)
+{
+       int start_pc = 0;
+
+       if (likely(core)) {
+               start_pc = aipu_read32(&core->reg, ZHOUYI_START_PC_REG_OFFSET) & 0xFFFFFFF0;
+               aipu_write32(&core->reg, ZHOUYI_START_PC_REG_OFFSET, start_pc | 0xD);
+       }
+}
+
+static int zhouyi_v2_reserve(struct aipu_core *core, struct aipu_job_desc *udesc, int do_trigger)
+{
+       u32 start_pc = 0;
+       u32 ase0_base_high = 0;
+
+       if (unlikely(!core || !udesc))
+               return -EINVAL;
+
+       start_pc = (u32)udesc->start_pc_addr;
+       ase0_base_high = udesc->start_pc_addr >> 32;
+
+       /* Load data addr 0 register */
+       aipu_write32(&core->reg, ZHOUYI_DATA_ADDR_0_REG_OFFSET, (u32)udesc->data_0_addr);
+
+       /* Load data addr 1 register */
+       aipu_write32(&core->reg, ZHOUYI_DATA_ADDR_1_REG_OFFSET, (u32)udesc->data_1_addr);
+
+       /* Load interrupt PC */
+       aipu_write32(&core->reg, ZHOUYI_INTR_PC_REG_OFFSET, (u32)udesc->intr_handler_addr);
+
+       /* Load ASE registers */
+       /* ASE 0 */
+       aipu_write32(&core->reg, AIPU_ADDR_EXT0_CTRL_REG_OFFSET, ZHOUYI_V2_ASE_RW_ENABLE);
+       aipu_write32(&core->reg, AIPU_ADDR_EXT0_HIGH_BASE_REG_OFFSET, ase0_base_high);
+       aipu_write32(&core->reg, AIPU_ADDR_EXT0_LOW_BASE_REG_OFFSET, 0);
+       dev_dbg(core->dev, "ASE 0 Ctrl 0x%x, ASE 0 PA 0x%llx",
+               aipu_read32(&core->reg, AIPU_ADDR_EXT0_CTRL_REG_OFFSET),
+               ((u64)aipu_read32(&core->reg, AIPU_ADDR_EXT0_HIGH_BASE_REG_OFFSET) << 32) +
+                aipu_read32(&core->reg, AIPU_ADDR_EXT0_LOW_BASE_REG_OFFSET));
+       /* ASE 1 */
+       aipu_write32(&core->reg, AIPU_ADDR_EXT1_CTRL_REG_OFFSET, ZHOUYI_V2_ASE_READ_ENABLE);
+       aipu_write32(&core->reg, AIPU_ADDR_EXT1_HIGH_BASE_REG_OFFSET, ase0_base_high);
+       aipu_write32(&core->reg, AIPU_ADDR_EXT1_LOW_BASE_REG_OFFSET, 0);
+       dev_dbg(core->dev, "ASE 1 Ctrl 0x%x, ASE 1 PA 0x%llx",
+               aipu_read32(&core->reg, AIPU_ADDR_EXT1_CTRL_REG_OFFSET),
+               ((u64)aipu_read32(&core->reg, AIPU_ADDR_EXT1_HIGH_BASE_REG_OFFSET) << 32) +
+                aipu_read32(&core->reg, AIPU_ADDR_EXT1_LOW_BASE_REG_OFFSET));
+       /* ASE 2 */
+       if (!udesc->enable_asid) {
+               aipu_write32(&core->reg, AIPU_ADDR_EXT2_CTRL_REG_OFFSET,
+                            ZHOUYI_V2_ASE_RW_ENABLE);
+               aipu_write32(&core->reg, AIPU_ADDR_EXT2_HIGH_BASE_REG_OFFSET, ase0_base_high);
+               aipu_write32(&core->reg, AIPU_ADDR_EXT2_LOW_BASE_REG_OFFSET, 0);
+               dev_dbg(core->dev, "Default: ASE 2 Ctrl 0x%x, ASE 0 PA 0x%llx",
+                       aipu_read32(&core->reg, AIPU_ADDR_EXT2_CTRL_REG_OFFSET),
+                       ((u64)aipu_read32(&core->reg,
+                                         AIPU_ADDR_EXT2_HIGH_BASE_REG_OFFSET) << 32) +
+                        aipu_read32(&core->reg, AIPU_ADDR_EXT2_LOW_BASE_REG_OFFSET));
+       }
+
+       /* Load start PC register */
+       if (do_trigger)
+               start_pc |= 0xD;
+       aipu_write32(&core->reg, ZHOUYI_START_PC_REG_OFFSET, start_pc);
+
+       dev_dbg(core->dev, "[Job %d] trigger done: start pc = 0x%x, dreg0 = 0x%x, dreg1 = 0x%x\n",
+               udesc->job_id, start_pc, (u32)udesc->data_0_addr, (u32)udesc->data_1_addr);
+
+       return 0;
+}
+
+static bool zhouyi_v2_is_idle(struct aipu_core *core)
+{
+       unsigned long val = 0;
+
+       if (unlikely(!core))
+               return false;
+
+       val = aipu_read32(&core->reg, ZHOUYI_STAT_REG_OFFSET);
+       return test_bit(16, &val) && test_bit(17, &val) && test_bit(18, &val);
+}
+
+static int zhouyi_v2_read_status_reg(struct aipu_core *core)
+{
+       if (unlikely(!core))
+               return 0;
+       return zhouyi_read_status_reg(&core->reg);
+}
+
+static void zhouyi_v2_print_hw_id_info(struct aipu_core *core)
+{
+       if (unlikely(!core))
+               return;
+
+       dev_info(core->dev, "AIPU Initial Status: 0x%x",
+                aipu_read32(&core->reg, ZHOUYI_STAT_REG_OFFSET));
+
+       dev_info(core->dev, "########## AIPU CORE %d: ZHOUYI V%d ##########",
+                core->id, core->version);
+       dev_info(core->dev, "# ISA Version Register: 0x%x",
+                aipu_read32(&core->reg, ZHOUYI_ISA_VERSION_REG_OFFSET));
+       dev_info(core->dev, "# TPC Feature Register: 0x%x",
+                aipu_read32(&core->reg, ZHOUYI_TPC_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# SPU Feature Register: 0x%x",
+                aipu_read32(&core->reg, ZHOUYI_SPU_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# HWA Feature Register: 0x%x",
+                aipu_read32(&core->reg, ZHOUYI_HWA_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# Revision ID Register: 0x%x",
+                aipu_read32(&core->reg, ZHOUYI_REVISION_ID_REG_OFFSET));
+       dev_info(core->dev, "# Memory Hierarchy Feature Register: 0x%x",
+                aipu_read32(&core->reg, ZHOUYI_MEM_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# Instruction RAM Feature Register:  0x%x",
+                aipu_read32(&core->reg, ZHOUYI_INST_RAM_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# TEC Local SRAM Feature Register:   0x%x",
+                aipu_read32(&core->reg, ZHOUYI_LOCAL_SRAM_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# Global SRAM Feature Register:      0x%x",
+                aipu_read32(&core->reg, ZHOUYI_GLOBAL_SRAM_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# Instruction Cache Feature Register:0x%x",
+                aipu_read32(&core->reg, ZHOUYI_INST_CACHE_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "# Data Cache Feature Register:       0x%x",
+                aipu_read32(&core->reg, ZHOUYI_DATA_CACHE_FEATURE_REG_OFFSET));
+       dev_info(core->dev, "############################################");
+}
+
+static int zhouyi_v2_io_rw(struct aipu_core *core, struct aipu_io_req *io_req)
+{
+       if (unlikely(!io_req))
+               return -EINVAL;
+
+       if (!core || io_req->offset > ZHOUYI_V2_MAX_REG_OFFSET)
+               return -EINVAL;
+
+       zhouyi_io_rw(&core->reg, io_req);
+       return 0;
+}
+
+static int zhouyi_v2_upper_half(void *data)
+{
+       int ret = 0;
+       struct aipu_core *core = (struct aipu_core *)data;
+
+       if (get_soc_ops(core) &&
+           get_soc_ops(core)->is_aipu_irq &&
+           !get_soc_ops(core)->is_aipu_irq(core->dev, get_soc(core), core->id))
+               return IRQ_NONE;
+
+       ret = zhouyi_v2_read_status_reg(core);
+       if (ret & ZHOUYI_IRQ_QEMPTY)
+               zhouyi_v2_clear_qempty_interrupt(core);
+
+       if (ret & ZHOUYI_IRQ_DONE) {
+               zhouyi_v2_clear_done_interrupt(core);
+               aipu_job_manager_irq_upper_half(core, 0);
+               aipu_irq_schedulework(core->irq_obj);
+       }
+
+       if (ret & ZHOUYI_IRQ_EXCEP) {
+               zhouyi_v2_clear_excep_interrupt(core);
+               aipu_job_manager_irq_upper_half(core, 1);
+               aipu_irq_schedulework(core->irq_obj);
+       }
+
+       if (ret & ZHOUYI_IRQ_FAULT)
+               zhouyi_v2_clear_fault_interrupt(core);
+
+       return IRQ_HANDLED;
+}
+
+static void zhouyi_v2_bottom_half(void *data)
+{
+       aipu_job_manager_irq_bottom_half(data);
+}
+
+#ifdef CONFIG_SYSFS
+static int zhouyi_v2_sysfs_show(struct aipu_core *core, char *buf)
+{
+       int ret = 0;
+       char tmp[512];
+
+       if (unlikely(!core || !buf))
+               return -EINVAL;
+
+       ret += zhouyi_sysfs_show(&core->reg, buf);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "Data Addr 2 Reg",
+           AIPU_DATA_ADDR_2_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "Data Addr 3 Reg",
+           AIPU_DATA_ADDR_3_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "ASE0 Ctrl Reg",
+           AIPU_ADDR_EXT0_CTRL_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "ASE0 High Base Reg",
+           AIPU_ADDR_EXT0_HIGH_BASE_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "ASE0 Low Base Reg",
+           AIPU_ADDR_EXT0_LOW_BASE_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "ASE1 Ctrl Reg",
+           AIPU_ADDR_EXT1_CTRL_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "ASE1 High Base Reg",
+           AIPU_ADDR_EXT1_HIGH_BASE_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "ASE1 Low Base Reg",
+           AIPU_ADDR_EXT1_LOW_BASE_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "ASE2 Ctrl Reg",
+           AIPU_ADDR_EXT2_CTRL_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "ASE2 High Base Reg",
+           AIPU_ADDR_EXT2_HIGH_BASE_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "ASE2 Low Base Reg",
+           AIPU_ADDR_EXT2_LOW_BASE_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "ASE3 Ctrl Reg",
+           AIPU_ADDR_EXT3_CTRL_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "ASE3 High Base Reg",
+           AIPU_ADDR_EXT3_HIGH_BASE_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(&core->reg, tmp, "ASE3 Low Base Reg",
+           AIPU_ADDR_EXT3_LOW_BASE_REG_OFFSET);
+       strcat(buf, tmp);
+       return ret;
+}
+#endif
+
+static struct aipu_core_operations zhouyi_v2_ops = {
+       .get_version = zhouyi_v2_get_hw_version_number,
+       .get_config = zhouyi_v2_get_hw_config_number,
+       .enable_interrupt = zhouyi_v2_enable_interrupt,
+       .disable_interrupt = zhouyi_v2_disable_interrupt,
+       .trigger = zhouyi_v2_trigger,
+       .reserve = zhouyi_v2_reserve,
+       .is_idle = zhouyi_v2_is_idle,
+       .read_status_reg = zhouyi_v2_read_status_reg,
+       .print_hw_id_info = zhouyi_v2_print_hw_id_info,
+       .io_rw = zhouyi_v2_io_rw,
+       .upper_half = zhouyi_v2_upper_half,
+       .bottom_half = zhouyi_v2_bottom_half,
+#ifdef CONFIG_SYSFS
+       .sysfs_show = zhouyi_v2_sysfs_show,
+#endif
+};
+
+struct aipu_core_operations *get_zhouyi_v2_ops(void)
+{
+       return &zhouyi_v2_ops;
+}
diff --git a/drivers/misc/armchina-npu/zhouyi/z2.h b/drivers/misc/armchina-npu/zhouyi/z2.h
new file mode 100644
index 000000000000..f303bcd6f8ab
--- /dev/null
+++ b/drivers/misc/armchina-npu/zhouyi/z2.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#ifndef __Z2_H__
+#define __Z2_H__
+
+#include "zhouyi.h"
+
+/*
+ * Zhouyi v2 AIPU Specific Interrupts
+ */
+#define ZHOUYI_IRQ_FAULT  0x8
+
+#define ZHOUYIV2_IRQ  (ZHOUYI_IRQ | ZHOUYI_IRQ_FAULT)
+#define ZHOUYIV2_IRQ_ENABLE_FLAG  (ZHOUYIV2_IRQ)
+#define ZHOUYIV2_IRQ_DISABLE_FLAG (ZHOUYI_IRQ_NONE)
+
+#define ZHOUYI_V2_MAX_SCHED_JOB_NUM  1
+
+#define ZHOUYI_V2_ASE_READ_ENABLE        BIT(31)
+#define ZHOUYI_V2_ASE_WRITE_ENABLE       BIT(30)
+#define ZHOUYI_V2_ASE_RW_ENABLE          (ZHOUYI_V2_ASE_READ_ENABLE | ZHOUYI_V2_ASE_WRITE_ENABLE)
+
+/*
+ * Zhouyi v2 AIPU Specific Host Control Register Map
+ */
+#define AIPU_DATA_ADDR_2_REG_OFFSET         0x1C
+#define AIPU_DATA_ADDR_3_REG_OFFSET         0x20
+#define AIPU_SECURE_CONFIG_REG_OFFSET       0x30
+#define AIPU_POWER_CTRL_REG_OFFSET          0x38
+#define AIPU_ADDR_EXT0_CTRL_REG_OFFSET      0xC0
+#define AIPU_ADDR_EXT0_HIGH_BASE_REG_OFFSET 0xC4
+#define AIPU_ADDR_EXT0_LOW_BASE_REG_OFFSET  0xC8
+#define AIPU_ADDR_EXT1_CTRL_REG_OFFSET      0xCC
+#define AIPU_ADDR_EXT1_HIGH_BASE_REG_OFFSET 0xD0
+#define AIPU_ADDR_EXT1_LOW_BASE_REG_OFFSET  0xD4
+#define AIPU_ADDR_EXT2_CTRL_REG_OFFSET      0xD8
+#define AIPU_ADDR_EXT2_HIGH_BASE_REG_OFFSET 0xDC
+#define AIPU_ADDR_EXT2_LOW_BASE_REG_OFFSET  0xE0
+#define AIPU_ADDR_EXT3_CTRL_REG_OFFSET      0xE4
+#define AIPU_ADDR_EXT3_HIGH_BASE_REG_OFFSET 0xE8
+#define AIPU_ADDR_EXT3_LOW_BASE_REG_OFFSET  0xEC
+#define ZHOUYI_V2_MAX_REG_OFFSET            0xEC
+
+struct aipu_core_operations *get_zhouyi_v2_ops(void);
+
+#endif /* __Z2_H__ */
diff --git a/drivers/misc/armchina-npu/zhouyi/zhouyi.c b/drivers/misc/armchina-npu/zhouyi/zhouyi.c
new file mode 100644
index 000000000000..370d497feab5
--- /dev/null
+++ b/drivers/misc/armchina-npu/zhouyi/zhouyi.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#include <linux/platform_device.h>
+#include "zhouyi.h"
+
+int zhouyi_read_status_reg(struct io_region *io)
+{
+       return aipu_read32(io, ZHOUYI_STAT_REG_OFFSET);
+}
+
+void zhouyi_clear_qempty_interrupt(struct io_region *io)
+{
+       aipu_write32(io, ZHOUYI_STAT_REG_OFFSET, ZHOUYI_IRQ_QEMPTY);
+}
+
+void zhouyi_clear_done_interrupt(struct io_region *io)
+{
+       aipu_write32(io, ZHOUYI_STAT_REG_OFFSET, ZHOUYI_IRQ_DONE);
+}
+
+void zhouyi_clear_excep_interrupt(struct io_region *io)
+{
+       aipu_write32(io, ZHOUYI_STAT_REG_OFFSET, ZHOUYI_IRQ_EXCEP);
+}
+
+void zhouyi_io_rw(struct io_region *io, struct aipu_io_req *io_req)
+{
+       if (unlikely(!io || !io_req))
+               return;
+
+       if (io_req->rw == AIPU_IO_READ)
+               io_req->value = aipu_read32(io, io_req->offset);
+       else if (io_req->rw == AIPU_IO_WRITE)
+               aipu_write32(io, io_req->offset, io_req->value);
+}
+
+int zhouyi_detect_aipu_version(struct platform_device *p_dev, int *version, int *config)
+{
+       struct resource *res = NULL;
+       struct io_region io;
+
+       if (!p_dev || !version || !config)
+               return -EINVAL;
+
+       res = platform_get_resource(p_dev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -EINVAL;
+
+       if (init_aipu_ioregion(&io, res->start, res->end - res->start + 1))
+               return -EINVAL;
+
+       *version = zhouyi_get_hw_version_number(&io);
+       *config = zhouyi_get_hw_config_number(&io);
+       deinit_aipu_ioregion(&io);
+       return 0;
+}
+
+#ifdef CONFIG_SYSFS
+int zhouyi_print_reg_info(struct io_region *io, char *buf, const char *name, int offset)
+{
+       if (unlikely(!io || !buf || !name))
+               return -EINVAL;
+
+       return snprintf(buf, 1024, "0x%-*x%-*s0x%08x\n", 6, offset, 22, name,
+           aipu_read32(io, offset));
+}
+#endif
+
+#ifdef CONFIG_SYSFS
+int zhouyi_sysfs_show(struct io_region *io, char *buf)
+{
+       int ret = 0;
+       char tmp[512];
+
+       if (unlikely(!io || !buf))
+               return -EINVAL;
+
+       ret += zhouyi_print_reg_info(io, tmp, "Ctrl Reg", ZHOUYI_CTRL_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(io, tmp, "Status Reg", ZHOUYI_STAT_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(io, tmp, "Start PC Reg", ZHOUYI_START_PC_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(io, tmp, "Intr PC Reg", ZHOUYI_INTR_PC_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(io, tmp, "IPI Ctrl Reg", ZHOUYI_IPI_CTRL_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(io, tmp, "Data Addr 0 Reg", ZHOUYI_DATA_ADDR_0_REG_OFFSET);
+       strcat(buf, tmp);
+       ret += zhouyi_print_reg_info(io, tmp, "Data Addr 1 Reg", ZHOUYI_DATA_ADDR_1_REG_OFFSET);
+       strcat(buf, tmp);
+       return ret;
+}
+#endif
+
+int zhouyi_get_hw_version_number(struct io_region *io)
+{
+       int isa_version = 0;
+       int revision_id = 0;
+
+       if (!io)
+               return 0;
+
+       isa_version = aipu_read32(io, ZHOUYI_ISA_VERSION_REG_OFFSET);
+       revision_id = aipu_read32(io, ZHOUYI_REVISION_ID_REG_OFFSET);
+       if (isa_version == ZHOUYI_V1_ISA_VERSION_ID)
+               return AIPU_ISA_VERSION_ZHOUYI_V1;
+       else if ((isa_version == ZHOUYI_V2_V3_ISA_VERSION_ID) &&
+                (revision_id == ZHOUYI_V2_REVISION_ID))
+               return AIPU_ISA_VERSION_ZHOUYI_V2;
+       else if ((isa_version == ZHOUYI_V2_V3_ISA_VERSION_ID) &&
+                (revision_id == ZHOUYI_V3_REVISION_ID))
+               return AIPU_ISA_VERSION_ZHOUYI_V3;
+       else
+               return 0;
+}
+
+int zhouyi_get_hw_config_number(struct io_region *io)
+{
+       int high = 0;
+       int low = 0;
+       int isa_version = 0;
+       int aiff_feature = 0;
+       int tpc_feature = 0;
+
+       if (!io)
+               return 0;
+
+       isa_version = aipu_read32(io, ZHOUYI_ISA_VERSION_REG_OFFSET);
+       aiff_feature = aipu_read32(io, ZHOUYI_HWA_FEATURE_REG_OFFSET);
+       tpc_feature = aipu_read32(io, ZHOUYI_TPC_FEATURE_REG_OFFSET);
+
+       if (isa_version == 0)
+               high = (aiff_feature & 0xF) + 6;
+       else if (isa_version == 1)
+               high = (aiff_feature & 0xF) + 8;
+
+       low = (tpc_feature) & 0x1F;
+
+       return high * 100 + low;
+}
diff --git a/drivers/misc/armchina-npu/zhouyi/zhouyi.h b/drivers/misc/armchina-npu/zhouyi/zhouyi.h
new file mode 100644
index 000000000000..6c49ca2e7063
--- /dev/null
+++ b/drivers/misc/armchina-npu/zhouyi/zhouyi.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#ifndef __AIPU_ZHOUYI_H__
+#define __AIPU_ZHOUYI_H__
+
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <uapi/misc/armchina_aipu.h>
+#include "aipu_io.h"
+#include "config.h"
+
+/*
+ * Zhouyi AIPU Common Interrupts
+ */
+#define ZHOUYI_IRQ_NONE                       0x0
+#define ZHOUYI_IRQ_QEMPTY                     0x1
+#define ZHOUYI_IRQ_DONE                       0x2
+#define ZHOUYI_IRQ_EXCEP                      0x4
+
+#define ZHOUYI_IRQ  (ZHOUYI_IRQ_QEMPTY | ZHOUYI_IRQ_DONE | ZHOUYI_IRQ_EXCEP)
+
+#define ZHOUYI_AIPU_IDLE_STATUS               0x70000
+
+/*
+ * Revision ID for Z1/Z2/Z3
+ */
+#define ZHOUYI_V1_ISA_VERSION_ID              0x0
+#define ZHOUYI_V2_V3_ISA_VERSION_ID           0x1
+
+/*
+ * Revision ID for Z2/Z3
+ */
+#define ZHOUYI_V2_REVISION_ID                 0x100
+#define ZHOUYI_V3_REVISION_ID                 0x200
+
+/*
+ * Zhouyi AIPU Common Host Control Register Map
+ */
+#define ZHOUYI_CTRL_REG_OFFSET                0x0
+#define ZHOUYI_STAT_REG_OFFSET                0x4
+#define ZHOUYI_START_PC_REG_OFFSET            0x8
+#define ZHOUYI_INTR_PC_REG_OFFSET             0xC
+#define ZHOUYI_IPI_CTRL_REG_OFFSET            0x10
+#define ZHOUYI_DATA_ADDR_0_REG_OFFSET         0x14
+#define ZHOUYI_DATA_ADDR_1_REG_OFFSET         0x18
+#define ZHOUYI_CLK_CTRL_REG_OFFSET            0x3C
+#define ZHOUYI_ISA_VERSION_REG_OFFSET         0x40
+#define ZHOUYI_TPC_FEATURE_REG_OFFSET         0x44
+#define ZHOUYI_SPU_FEATURE_REG_OFFSET         0x48
+#define ZHOUYI_HWA_FEATURE_REG_OFFSET         0x4C
+#define ZHOUYI_REVISION_ID_REG_OFFSET         0x50
+#define ZHOUYI_MEM_FEATURE_REG_OFFSET         0x54
+#define ZHOUYI_INST_RAM_FEATURE_REG_OFFSET    0x58
+#define ZHOUYI_LOCAL_SRAM_FEATURE_REG_OFFSET  0x5C
+#define ZHOUYI_GLOBAL_SRAM_FEATURE_REG_OFFSET 0x60
+#define ZHOUYI_INST_CACHE_FEATURE_REG_OFFSET  0x64
+#define ZHOUYI_DATA_CACHE_FEATURE_REG_OFFSET  0x68
+
+int zhouyi_read_status_reg(struct io_region *io);
+void zhouyi_clear_qempty_interrupt(struct io_region *io);
+void zhouyi_clear_done_interrupt(struct io_region *io);
+void zhouyi_clear_excep_interrupt(struct io_region *io);
+void zhouyi_io_rw(struct io_region *io, struct aipu_io_req *io_req);
+int zhouyi_detect_aipu_version(struct platform_device *p_dev, int *version, int *config);
+#ifdef CONFIG_SYSFS
+int zhouyi_print_reg_info(struct io_region *io, char *buf, const char *name, int offset);
+int zhouyi_sysfs_show(struct io_region *io, char *buf);
+#endif
+int zhouyi_get_hw_version_number(struct io_region *io);
+int zhouyi_get_hw_config_number(struct io_region *io);
+
+#endif /* __AIPU_ZHOUYI_H__ */
diff --git a/include/uapi/misc/armchina_aipu.h b/include/uapi/misc/armchina_aipu.h
new file mode 100644
index 000000000000..bc314febf194
--- /dev/null
+++ b/include/uapi/misc/armchina_aipu.h
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2018-2021 Arm Technology (China) Co., Ltd. All rights reserved. */
+
+#ifndef __UAPI_MISC_ARMCHINA_AIPU_H__
+#define __UAPI_MISC_ARMCHINA_AIPU_H__
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/*
+ * In the following member descriptions,
+ * [must]  mean that the fields must be filled by user mode driver any way.
+ * [alloc] mean that the buffer(s) represented by the fields must be allocated
+ *         by user mode driver before calling IOCTL.
+ * [kmd]   mean that the fields should be filled by kernel mode driver
+ *         if the calls are successful.
+ */
+
+/**
+ * emum aipu_arch - AIPU architecture number
+ * @AIPU_ARCH_ZHOUYI: AIPU architecture is Zhouyi.
+ *
+ * This enum is used to indicate the architecture of an AIPU core in the system.
+ */
+enum aipu_arch {
+       AIPU_ARCH_ZHOUYI = 0,
+};
+
+/**
+ * emum aipu_isa_version - AIPU ISA version number
+ * @AIPU_ISA_VERSION_ZHOUYI_V1: AIPU ISA version is Zhouyi v1.
+ * @AIPU_ISA_VERSION_ZHOUYI_V2: AIPU ISA version is Zhouyi v2.
+ * @AIPU_ISA_VERSION_ZHOUYI_V3: AIPU ISA version is Zhouyi v3.
+ *
+ * Zhouyi architecture has multiple ISA versions released.
+ * This enum is used to indicate the ISA version of an AIPU core in the system.
+ */
+enum aipu_isa_version {
+       AIPU_ISA_VERSION_ZHOUYI_V1 = 1,
+       AIPU_ISA_VERSION_ZHOUYI_V2 = 2,
+       AIPU_ISA_VERSION_ZHOUYI_V3 = 3,
+};
+
+/**
+ * struct aipu_core_cap - Capability of an AIPU core
+ * @core_id: [kmd] Core ID
+ * @arch:    [kmd] Architecture number
+ * @version: [kmd] ISA version number
+ * @config:  [kmd] Configuration number
+ * @info:    [kmd] Debugging information
+ *
+ * For example, Z2-1104:
+ *    arch == AIPU_ARCH_ZHOUYI (0)
+ *    version == AIPU_ISA_VERSION_ZHOUYI_V2 (2)
+ *    config == 1104
+ */
+struct aipu_core_cap {
+       __u32 core_id;
+       __u32 arch;
+       __u32 version;
+       __u32 config;
+       struct aipu_debugger_info {
+               __u64 reg_base; /* External register base address (physical) */
+       } info;
+};
+
+/**
+ * struct aipu_cap - Common capability of the AIPU core(s)
+ * @core_cnt:            [kmd] Count of AIPU core(s) in the system
+ * @host_to_aipu_offset: [kmd] Address space offset between host and AIPU
+ * @is_homogeneous:      [kmd] IS homogeneous AIPU system or not (1/0)
+ * @core_cap:            [kmd] Capability of the single AIPU core
+ *
+ * AIPU driver supports the management of multiple AIPU cores in the system.
+ * This struct is used to indicate the common capability of all AIPU core(s).
+ * User mode driver should get this capability via AIPU_IOCTL_QUERYCAP command.
+ * If the core count is 1, the per-core capability is in the core_cap member;
+ * otherwise user mode driver should get all the per-core capabilities as the
+ * core_cnt indicates via AIPU_IOCTL_QUERYCORECAP command.
+ */
+struct aipu_cap {
+       __u32 core_cnt;
+       __u64 host_to_aipu_offset;
+       __u32 is_homogeneous;
+       struct aipu_core_cap core_cap;
+};
+
+/**
+ * enum aipu_mm_data_type - Data/Buffer type
+ * @AIPU_MM_DATA_TYPE_NONE:   No type
+ * @AIPU_MM_DATA_TYPE_TEXT:   Text (instructions)
+ * @AIPU_MM_DATA_TYPE_RODATA: Read-only data (parameters)
+ * @AIPU_MM_DATA_TYPE_STACK:  Stack
+ * @AIPU_MM_DATA_TYPE_STATIC: Static data (weights)
+ * @AIPU_MM_DATA_TYPE_REUSE:  Reuse data (feature maps)
+ */
+enum aipu_mm_data_type {
+       AIPU_MM_DATA_TYPE_NONE,
+       AIPU_MM_DATA_TYPE_TEXT,
+       AIPU_MM_DATA_TYPE_RODATA,
+       AIPU_MM_DATA_TYPE_STACK,
+       AIPU_MM_DATA_TYPE_STATIC,
+       AIPU_MM_DATA_TYPE_REUSE,
+};
+
+/**
+ * struct aipu_buf_desc - Buffer description.
+ * @pa:         [kmd] Buffer physical base address
+ * @dev_offset: [kmd] Device offset used in mmap
+ * @bytes:      [kmd] Buffer size in bytes
+ */
+struct aipu_buf_desc {
+       __u64 pa;
+       __u64 dev_offset;
+       __u64 bytes;
+};
+
+/**
+ * struct aipu_buf_request - Buffer allocation request structure.
+ * @bytes:         [must] Buffer size to allocate (in bytes)
+ * @align_in_page: [must] Buffer address alignment (must be a power of 2)
+ * @data_type:     [must] Type of data in this buffer/Type of this buffer
+ * @desc:          [kmd]  Descriptor of the successfully allocated buffer
+ */
+struct aipu_buf_request {
+       __u64 bytes;
+       __u32 align_in_page;
+       __u32 data_type;
+       struct aipu_buf_desc desc;
+};
+
+/**
+ * enum aipu_job_execution_flag - Flags for AIPU's executions
+ * @AIPU_JOB_EXEC_FLAG_NONE:       No flag
+ * @AIPU_JOB_EXEC_FLAG_SRAM_MUTEX: The job uses SoC SRAM exclusively.
+ */
+enum aipu_job_execution_flag {
+       AIPU_JOB_EXEC_FLAG_NONE = 0x0,
+       AIPU_JOB_EXEC_FLAG_SRAM_MUTEX = 0x1,
+};
+
+/**
+ * struct aipu_job_desc - Description of a job to be scheduled.
+ * @is_defer_run:      Reserve an AIPU core for this job and defer the running of it
+ * @version_compatible:Is this job compatible on AIPUs with different ISA version
+ * @core_id:           ID of the core requested to reserve for the deferred job
+ * @do_trigger:        Trigger the previously scheduled deferred job to run
+ * @aipu_arch:         [must] Target device architecture
+ * @aipu_version:      [must] Target device ISA version
+ * @aipu_config:       [must] Target device configuration
+ * @start_pc_addr:     [must] Address of the start PC
+ * @intr_handler_addr: [must] Address of the AIPU interrupt handler
+ * @data_0_addr:       [must] Address of the 0th data buffer
+ * @data_1_addr:       [must] Address of the 1th data buffer
+ * @job_id:            [must] ID of this job
+ * @enable_prof:       Enable performance profiling counters in SoC (if any)
+ * @enable_asid:       Enable ASID feature
+ * @enable_poll_opt:   Enable optimizations for job status polling
+ * @exec_flag:         Combinations of execution flags
+ *
+ * For fields is_defer_run/do_trigger/enable_prof/enable_asid/enable_poll_opt,
+ * set them to be 1/0 to enable/disable the corresponding operations.
+ */
+struct aipu_job_desc {
+       __u32 is_defer_run;
+       __u32 version_compatible;
+       __u32 core_id;
+       __u32 do_trigger;
+       __u32 aipu_arch;
+       __u32 aipu_version;
+       __u32 aipu_config;
+       __u64 start_pc_addr;
+       __u64 intr_handler_addr;
+       __u64 data_0_addr;
+       __u64 data_1_addr;
+       __u32 job_id;
+       __u32 enable_prof;
+       __u32 enable_asid;
+       __u32 enable_poll_opt;
+       __u32 exec_flag;
+};
+
+/**
+ * struct aipu_job_status_desc - Jod execution status.
+ * @job_id:    [kmd] Job ID
+ * @thread_id: [kmd] ID of the thread scheduled this job
+ * @state:     [kmd] Execution state: done or exception
+ * @pdata:     [kmd] External profiling results
+ */
+struct aipu_job_status_desc {
+       __u32 job_id;
+       __u32 thread_id;
+#define AIPU_JOB_STATE_DONE      0x1
+#define AIPU_JOB_STATE_EXCEPTION 0x2
+       __u32 state;
+       struct aipu_ext_profiling_data {
+               __s64 execution_time_ns; /* [kmd] Execution time */
+               __u32 rdata_tot_msb;     /* [kmd] Total read transactions (MSB) */
+               __u32 rdata_tot_lsb;     /* [kmd] Total read transactions (LSB) */
+               __u32 wdata_tot_msb;     /* [kmd] Total write transactions (MSB) */
+               __u32 wdata_tot_lsb;     /* [kmd] Total write transactions (LSB) */
+               __u32 tot_cycle_msb;     /* [kmd] Total cycle counts (MSB) */
+               __u32 tot_cycle_lsb;     /* [kmd] Total cycle counts (LSB) */
+       } pdata;
+};
+
+/**
+ * struct aipu_job_status_query - Query status of (a) job(s) scheduled before.
+ * @max_cnt:        [must] Maximum number of job status to query
+ * @of_this_thread: [must] Get status of jobs scheduled by this thread/all threads share fd (1/0)
+ * @status:         [alloc] Pointer to an array (length is max_cnt) to store the status
+ * @poll_cnt:       [kmd] Count of the successfully polled job(s)
+ */
+struct aipu_job_status_query {
+       __u32 max_cnt;
+       __u32 of_this_thread;
+       struct aipu_job_status_desc *status;
+       __u32 poll_cnt;
+};
+
+/**
+ * struct aipu_io_req - AIPU core IO operations request.
+ * @core_id: [must] Core ID
+ * @offset:  [must] Register offset
+ * @rw:      [must] Read or write operation
+ * @value:   [must]/[kmd] Value to be written/value readback
+ */
+struct aipu_io_req {
+       __u32 core_id;
+       __u32 offset;
+       enum aipu_rw_attr {
+               AIPU_IO_READ,
+               AIPU_IO_WRITE
+       } rw;
+       __u32 value;
+};
+
+/*
+ * AIPU IOCTL List
+ */
+#define AIPU_IOCTL_MAGIC 'A'
+/**
+ * DOC: AIPU_IOCTL_QUERY_CAP
+ *
+ * @Description
+ *
+ * ioctl to query the common capability of AIPUs
+ *
+ * User mode driver should call this before calling AIPU_IOCTL_QUERYCORECAP.
+ */
+#define AIPU_IOCTL_QUERY_CAP _IOR(AIPU_IOCTL_MAGIC,  0, struct aipu_cap)
+/**
+ * DOC: AIPU_IOCTL_QUERY_CORE_CAP
+ *
+ * @Description
+ *
+ * ioctl to query the capability of an AIPU core
+ *
+ * User mode driver only need to call this when the core count returned by AIPU_IOCTL_QUERYCAP > 1.
+ */
+#define AIPU_IOCTL_QUERY_CORE_CAP _IOR(AIPU_IOCTL_MAGIC,  1, struct aipu_core_cap)
+/**
+ * DOC: AIPU_IOCTL_REQ_BUF
+ *
+ * @Description
+ *
+ * ioctl to request to allocate a coherent buffer
+ *
+ * This fails if kernel driver cannot find a free buffer meets the size/alignment request.
+ */
+#define AIPU_IOCTL_REQ_BUF _IOWR(AIPU_IOCTL_MAGIC, 2, struct aipu_buf_request)
+/**
+ * DOC: AIPU_IOCTL_FREE_BUF
+ *
+ * @Description
+ *
+ * ioctl to request to free a coherent buffer allocated by AIPU_IOCTL_REQBUF
+ *
+ */
+#define AIPU_IOCTL_FREE_BUF _IOW(AIPU_IOCTL_MAGIC,  3, struct aipu_buf_desc)
+/**
+ * DOC: AIPU_IOCTL_DISABLE_SRAM
+ *
+ * @Description
+ *
+ * ioctl to disable the management of SoC SRAM in kernel driver
+ *
+ * This fails if the there is no SRAM in the system or the SRAM has already been allocated.
+ */
+#define AIPU_IOCTL_DISABLE_SRAM _IO(AIPU_IOCTL_MAGIC,  4)
+/**
+ * DOC: AIPU_IOCTL_ENABLE_SRAM
+ *
+ * @Description
+ *
+ * ioctl to enable the management of SoC SRAM in kernel driver disabled by AIPU_IOCTL_DISABLE_SRAM
+ */
+#define AIPU_IOCTL_ENABLE_SRAM _IO(AIPU_IOCTL_MAGIC,  5)
+/**
+ * DOC: AIPU_IOCTL_SCHEDULE_JOB
+ *
+ * @Description
+ *
+ * ioctl to schedule a user job to kernel mode driver for execution
+ *
+ * This is a non-blocking operation therefore user mode driver should check the job status
+ * via AIPU_IOCTL_QUERY_STATUS.
+ */
+#define AIPU_IOCTL_SCHEDULE_JOB _IOW(AIPU_IOCTL_MAGIC,  6, struct aipu_job_desc)
+/**
+ * DOC: AIPU_IOCTL_QUERY_STATUS
+ *
+ * @Description
+ *
+ * ioctl to query the execution status of one or multiple scheduled job(s)
+ */
+#define AIPU_IOCTL_QUERY_STATUS _IOWR(AIPU_IOCTL_MAGIC, 7, struct aipu_job_status_query)
+/**
+ * DOC: AIPU_IOCTL_KILL_TIMEOUT_JOB
+ *
+ * @Description
+ *
+ * ioctl to kill a timeout job and clean it from kernel mode driver.
+ */
+#define AIPU_IOCTL_KILL_TIMEOUT_JOB _IOW(AIPU_IOCTL_MAGIC,  8, __u32)
+/**
+ * DOC: AIPU_IOCTL_REQ_IO
+ *
+ * @Description
+ *
+ * ioctl to read/write an external register of an AIPU core.
+ */
+#define AIPU_IOCTL_REQ_IO _IOWR(AIPU_IOCTL_MAGIC, 9, struct aipu_io_req)
+
+#endif /* __UAPI_MISC_ARMCHINA_AIPU_H__ */
--
2.17.1

IMPORTANT NOTICE: The contents of this email and any attachments may be privileged and confidential. If you are not the intended recipient, please delete the email immediately. It is strictly prohibited to disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you. ©Arm Technology (China) Co., Ltd copyright and reserve all rights. 重要提示:本邮件(包括任何附件)可能含有专供明确的个人或目的使用的机密信息,并受法律保护。如果您并非该收件人,请立即删除此邮件。严禁通过任何渠道,以任何目的,向任何人披露、储存或复制邮件信息或者据此采取任何行动。感谢您的配合。 ©安谋科技(中国)有限公司 版权所有并保留一切权利。

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ