[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <1442562390-91051-1-git-send-email-linyongting@huawei.com>
Date: Fri, 18 Sep 2015 15:46:30 +0800
From: Lin Yongting <linyongting@...wei.com>
To: <ross.zwisler@...ux.intel.com>, <dan.j.williams@...el.com>,
<axboe@...com>, <akpm@...ux-foundation.org>, <richard@....at>,
<linyongting@...wei.com>, <willy@...ux.intel.com>,
<wangxiaozhe@...wei.com>, <mingo@...nel.org>,
<gregkh@...uxfoundation.org>
CC: <linux-kernel@...r.kernel.org>
Subject: [PATCH] pramdisk: new block disk driver to perform persistent storage
In embed devices, user space applications will use reserved memory
(i.e. persistent memory) to store business data, the data is kept
in this memory region after system rebooting or panic.
pramdisk is a block disk driver based on Persistent memory, it provide
file system interface for application to read/write data in persistent
memory. Application can use pramdisk to store log file or business data
in persistent memory in the way of file system operation, avoid operating
or managing memory directly.
pramdisk support multiple Persistent menory regions and each one is a
block device named as /dev/pram<N>.
Usage:
modprobe pramdisk.ko pmem=<size1>@<addr2> [ pmem=<size2>@<addr2> ... ]
For example:
modprobe pramdisk.ko pmem=20M@...0M pmem=20M@...0M
mkfs.ext3 /dev/pram1
mkdir /tmp/test1
mount -t ext3 /dev/pram1 /tmp/test1
Signed-off-by: Lin Yongting <linyongting@...wei.com>
Signed-off-by: Wang xiaozhe <wangxiaozhe@...wei.com>
---
drivers/block/Kconfig | 18 +++
drivers/block/Makefile | 1 +
drivers/block/pramdisk.c | 305 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 324 insertions(+)
create mode 100644 drivers/block/pramdisk.c
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 1b8094d..9bd68a5 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -404,6 +404,24 @@ config BLK_DEV_RAM_DAX
and will prevent RAM block device backing store memory from being
allocated from highmem (only a problem for highmem systems).
+config BLK_DEV_PRAM_DISK
+ tristate "Persistent RAM Disk device support"
+ ---help---
+ Saying Y here will allow you to use some portion of your Persistent
+ RAM memory as disk(i.e. block device), so that you can make file
+ systems on it, read and write to it. The RAM memory region is
+ persistent and the data will be kept after system rebooting, panic
+ or NMI.
+
+ This functionality is very useful in embed device, can be used to
+ access log data or business data in the way of file systems.
+
+ Saying M here will compile this driver as a Module.
+ Saying N here if don't need this Persistent RAM Block Device
+ functionality.
+
+ Use devices /dev/pram$N.
+
config CDROM_PKTCDVD
tristate "Packet writing on CD/DVD media"
depends on !UML
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 02b688d..3cab7e5 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
obj-$(CONFIG_BLK_DEV_RBD) += rbd.o
+obj-$(CONFIG_BLK_DEV_PRAM_DISK) += pramdisk.o
obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
diff --git a/drivers/block/pramdisk.c b/drivers/block/pramdisk.c
new file mode 100644
index 0000000..36e7bfe
--- /dev/null
+++ b/drivers/block/pramdisk.c
@@ -0,0 +1,305 @@
+/*
+ * Persistent RAM Disk device
+ *
+ * Copyright (C), 2001-2015, Huawei Tech. Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/types.h>
+
+#define SECTOR_SHIFT 9
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+
+#define PRAM_DEVICE_NAME "pramdisk"
+
+struct pram_dev {
+ struct request_queue *queue;
+ struct gendisk *gendisk;
+ struct list_head dev_list;
+
+ unsigned long capability;
+ unsigned long pram_addr;
+ unsigned long pram_size;
+};
+
+static LIST_HEAD(pram_devices);
+static int pram_major;
+
+static const struct block_device_operations prbd_fops = {
+ .owner = THIS_MODULE,
+};
+
+static int pram_do_bvec(struct pram_dev *dev, struct page *page,
+ unsigned int len, unsigned int off, int rw,
+ unsigned long pram_ofs)
+{
+ void *mem;
+ phys_addr_t addr;
+ void __iomem *buf;
+
+ if (pram_ofs >= dev->pram_size) {
+ pr_debug("Access address %lx exceeds the disk size %lx.\n",
+ pram_ofs, dev->pram_size);
+ return -ERANGE;
+ }
+
+ addr = dev->pram_addr + pram_ofs;
+ buf = ioremap(addr, len);
+
+ if (!buf) {
+ pr_debug("ioremap fault\n");
+ return -EADDRNOTAVAIL;
+ }
+
+ mem = kmap_atomic(page);
+
+ if (rw == READ) {
+ memcpy(mem+off, (void *)buf, len);
+ flush_dcache_page(page);
+ } else {
+ flush_dcache_page(page);
+ memcpy((void *)buf, mem+off, len);
+ }
+
+ kunmap_atomic(mem);
+ iounmap(buf);
+
+ return 0;
+}
+
+static void pram_make_request(struct request_queue *q, struct bio *bio)
+{
+ struct block_device *bdev = bio->bi_bdev;
+ struct pram_dev *dev = bdev->bd_disk->private_data;
+ int rw;
+ struct bio_vec bvec;
+ sector_t sector;
+ unsigned long pram_ofs;
+ struct bvec_iter iter;
+ int ret;
+ int len = 0;
+
+ sector = bio->bi_iter.bi_sector;
+ if (sector + (bio->bi_iter.bi_size >> SECTOR_SHIFT) >
+ get_capacity(bdev->bd_disk)) {
+ pr_debug("sector %lx is out of range.\n",
+ sector + (bio->bi_iter.bi_size >> SECTOR_SHIFT));
+ goto fail;
+ }
+
+ rw = bio_rw(bio);
+ if (rw == READA)
+ rw = READ;
+
+ pram_ofs = sector << SECTOR_SHIFT;
+ bio_for_each_segment(bvec, bio, iter) {
+ len = bvec.bv_len;
+ ret = pram_do_bvec(dev, bvec.bv_page, len,
+ bvec.bv_offset, rw, pram_ofs);
+ if (ret)
+ goto fail;
+
+ pram_ofs += len;
+ }
+
+ bio_endio(bio);
+ return;
+fail:
+ bio_io_error(bio);
+}
+
+static struct pram_dev *pram_alloc(phys_addr_t addr, phys_addr_t size)
+{
+ struct pram_dev *dev;
+ struct gendisk *disk;
+ static int minor;
+ int ret;
+
+ ret = -ENOMEM;
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ goto out;
+
+ dev->queue = blk_alloc_queue(GFP_KERNEL);
+ if (!dev->queue)
+ goto out_free_dev;
+
+ dev->pram_addr = addr;
+ dev->pram_size = size;
+ blk_queue_make_request(dev->queue, pram_make_request);
+
+ disk = dev->gendisk = alloc_disk(1);
+ if (!disk)
+ goto out_free_queue;
+
+ disk->first_minor = ++minor;
+ disk->fops = &prbd_fops;
+ disk->private_data = dev;
+ disk->queue = dev->queue;
+ snprintf(disk->disk_name, DISK_NAME_LEN, "pram%d", minor);
+
+ ret = -EBUSY;
+ if (!request_mem_region(addr, size, disk->disk_name)) {
+ pr_err("request memory region [%llx - %llx] fail.\n",
+ (unsigned long long)addr,
+ (unsigned long long)(addr + size));
+ goto out_free_disk;
+ }
+
+ set_capacity(disk, size >> SECTOR_SHIFT);
+
+ return dev;
+
+out_free_disk:
+ put_disk(dev->gendisk);
+out_free_queue:
+ blk_cleanup_queue(dev->queue);
+out_free_dev:
+ kfree(dev);
+out:
+ return ERR_PTR(ret);
+}
+
+static void pram_free(struct pram_dev *dev)
+{
+ release_mem_region(dev->pram_addr, dev->pram_size);
+ put_disk(dev->gendisk);
+ blk_cleanup_queue(dev->queue);
+ kfree(dev);
+}
+
+static int pmem_cmdline_get(char *buffer, const struct kernel_param *kp)
+{
+ struct pram_dev *dev;
+ int len = 0;
+
+ list_for_each_entry(dev, &pram_devices, dev_list) {
+ len += scnprintf(buffer + len, PAGE_SIZE - len,
+ "%llx@...x\n",
+ (unsigned long long)dev->pram_size,
+ (unsigned long long)dev->pram_addr);
+ }
+
+ return len;
+}
+
+/*
+ * Module param format:
+ * pmem=<size1>@<addr1> [pmem=<size2>@<addr2> ... ]
+ *
+ * each pmem param represent a Persistent memory region and setup a memory
+ * disk.
+ * <sizeX> and <addrX> can be octl, decimal or hexadecimal. if followed by
+ * "K", "M" or "G", the numbers will be interpreted as kilo, mega or
+ * gigabytes.
+ *
+ * Example:
+ * pmem=100M@...0M pmem=20M@...0M
+ */
+static int pmem_cmdline_set(const char *val, const struct kernel_param *kp)
+{
+ char *p;
+ phys_addr_t addr;
+ phys_addr_t size;
+ struct pram_dev *dev, *next;
+ int ret;
+
+ ret = -EINVAL;
+ size = memparse(val, &p);
+ if (*p != '@')
+ goto fail;
+
+ addr = memparse(p+1, &p);
+ if ((!addr) || (!size) || (addr % SECTOR_SIZE) || (size % SECTOR_SIZE))
+ goto fail;
+
+ dev = pram_alloc(addr, size);
+ if (IS_ERR(dev)) {
+ ret = PTR_ERR(dev);
+ goto fail;
+ }
+
+ list_add_tail(&dev->dev_list, &pram_devices);
+
+ return 0;
+
+fail:
+ list_for_each_entry_safe(dev, next, &pram_devices, dev_list) {
+ list_del(&dev->dev_list);
+ pram_free(dev);
+ }
+
+ return ret;
+}
+
+static const struct kernel_param_ops pmem_cmdline_param_ops = {
+ .set = pmem_cmdline_set,
+ .get = pmem_cmdline_get,
+};
+
+device_param_cb(pmem, &pmem_cmdline_param_ops, NULL, 0444);
+MODULE_PARM_DESC(pmem, "size and start address of continuous Presistent RAM region.");
+
+static void pram_del_one(struct pram_dev *dev)
+{
+ list_del(&dev->dev_list);
+ del_gendisk(dev->gendisk);
+ pram_free(dev);
+}
+
+
+
+static int __init prbd_init(void)
+{
+ int ret;
+ struct pram_dev *dev;
+
+ ret = register_blkdev(0, PRAM_DEVICE_NAME);
+ if (ret < 0)
+ return ret;
+
+ pram_major = ret;
+ list_for_each_entry(dev, &pram_devices, dev_list) {
+ dev->gendisk->major = pram_major;
+ add_disk(dev->gendisk);
+ }
+
+ return 0;
+}
+
+static void __exit prbd_exit(void)
+{
+ struct pram_dev *dev, *next;
+
+ list_for_each_entry_safe(dev, next, &pram_devices, dev_list) {
+ pram_del_one(dev);
+ }
+
+ unregister_blkdev(pram_major, PRAM_DEVICE_NAME);
+}
+
+module_init(prbd_init);
+module_exit(prbd_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Persistent RAM Disk device");
+MODULE_AUTHOR("Lin Yongting <linyongting@...wei.com>");
+MODULE_AUTHOR("Wang xiaozhe <wangxiaozhe@...wei.com>");
--
1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists