lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ef1cd66f0808250522s5b661e21k772ce208afa33489@mail.gmail.com>
Date:	Mon, 25 Aug 2008 13:22:16 +0100
From:	"Jochen Voß" <jochen.voss@...glemail.com>
To:	jassi_singh_brar@...oo.com
Cc:	linux-kernel@...r.kernel.org
Subject: Re: An idea .... with code

Hi,

I like this approach: it is nice that you don't require a special tool
like losetup with it, and that virblk can handle partitioned disks is
a nice bonus.

Some minor remarks below.
[ disclaimer: I am not an expert in this and might talk nonsense ]

2008/8/24 jassi brar <jassi_singh_brar@...oo.com>:
> [root@...alhost jmod]# cat /sys/devices/virblk/manage
> /home/jassi/jmod/image' as 'v0blk'

Shouldn't this output be replaced by some format which is easier to
parse?  This would make it easier for tools like mount to
automatically allocated virblk devices.

> /*
>  * Kernel module to emulate a user space file as a block device transparently.
>  *
>  * Copyright (C) 2008 Jaswinder Singh Brar <jassi_singh_brar AT yahoo DOT com>
>  *
>  * This program is free software; you can redistribute it and/or
>  * modify it under the terms of the GNU General Public License
>  * as published by the Free Software Foundation; either version 2
>  * of the License, or (at your option) any later version.
>  *
>  * This program is distributed in the hope that it will be useful,
>  * but WITHOUT ANY WARRANTY; without even the implied warranty of
>  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>  * GNU General Public License for more details.
>  *
>  * You should have received a copy of the GNU General Public License
>  * along with this program; if not, write to the Free Software
>  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
>  */
>
> #include <linux/module.h>
> #include <linux/init.h>
> #include <linux/moduleparam.h>
> #include <linux/init.h>
> #include <linux/hdreg.h>
> #include <linux/blkdev.h>
> #include <linux/kthread.h>
>
> #define VBD_MAJOR       121 /* Just some available */
> #define VBD_NAME        "vblk"
>
> #define NUM_SPC_BITS    5 /* == 32 */
> #define NUM_HEADS_BITS  6 /* == 64 */
> #define SECT_SIZE_BITS  9 /* == 512 */
> #define NUM_MINORS      16
>
> #define REGBLKDEV  (1<<0)
> #define REGPDEV    (1<<1)
> #define CRTDEVFL   (1<<2)
>
> /*
>  * Various parameters could be configured at module load time or 'echo +' command.
>  */
> struct vir_blk_dev{
>        int                    index;
>        int                    heads_bits;
>        int                    cylinders;
>        int                    sectspercyl_bits;
>        int                    sect_size_bits;
>        int                    use_count;
>        int                    md_change;
>        int                    ro;
>        loff_t                 sects;
>        struct file           *filp;
>        struct gendisk        *gd;
>        struct request_queue  *rq;
>        struct bio            *vbd_biotail;
>        struct bio            *vbd_bio;
>        struct list_head       vbd_list;
>        struct task_struct    *vbd_thread;
>        struct semaphore       sem;
>        spinlock_t             vbd_lock;
>        wait_queue_head_t      vbd_event;
> };
>
> static LIST_HEAD(vbd_head);
>
> static struct device vir_dev = {
>        .bus_id = "virblk",
> };
>
> static int data_xfer(struct vir_blk_dev *vbd, struct page *page, unsigned int len,
>                                unsigned int off, int wr, sector_t sector)

strange indentation

> {
>        ssize_t ret;
>        loff_t pos = sector * (1<<(vbd->sect_size_bits));
>        void *buf = kmap(page) + off;
>
>        if(sector + (len >> vbd->sect_size_bits) > get_capacity(vbd->gd)){

Kernel coding style seems to prefer spaces after "if" and before "{"
(more instances below).

>                printk("Trying to move past the end!\n");
>                kunmap(page);
>                return -EIO;
>        }
>
>        if(wr)
>           ret = vbd->filp->f_op->write(vbd->filp, buf, len, &pos);
>        else
>           ret = vbd->filp->f_op->read(vbd->filp, buf, len, &pos);
>
>        kunmap(page);
>
>        if(likely(ret == len))
>                return 0;
>
>        if(ret >= 0)
>                ret = -EIO;
>
>        return ret;
> }
>
> static void handle_bio(struct vir_blk_dev *vbd, struct bio *bio)
> {
>        int i, ret = -EIO;
>        struct bio_vec *bvec;
>        sector_t sector;
>
>        sector = bio->bi_sector;
>        bio_for_each_segment(bvec, bio, i){
>
>                ret = data_xfer(vbd, bvec->bv_page, bvec->bv_len, bvec->bv_offset,
>                                                        bio_data_dir(bio) == WRITE, sector);

strange indentation

>                if(ret){
>                        break;

unnecessary brackets

>                }
>                sector += (bvec->bv_len >> vbd->sect_size_bits);
>        }
>
>        bio_endio(bio, ret);
> }
>
> static int vbd_make_request(struct request_queue *rq, struct bio *bio)
> {
>        struct vir_blk_dev *vbd = (struct vir_blk_dev *)rq->queuedata;
>
>        spin_lock_irq(&vbd->vbd_lock);
>
>        if(vbd->vbd_biotail != NULL){
>                vbd->vbd_biotail->bi_next = bio;
>                vbd->vbd_biotail = bio;
>        }else{
>                vbd->vbd_bio = vbd->vbd_biotail = bio;
>        }
>
>        wake_up(&vbd->vbd_event);
>        spin_unlock_irq(&vbd->vbd_lock);
>        return 0;
> }
>
> static int jmod_open(struct inode *inode, struct file *filp)
> {
>        struct vir_blk_dev *vbd = (struct vir_blk_dev *)inode->i_bdev->bd_disk->private_data;
>
>        down(&vbd->sem);
>        filp->private_data = (void *)vbd;
>        vbd->use_count++;
>        up(&vbd->sem);
>
>        return 0;
> }
>
> static int jmod_release(struct inode *inode, struct file *filp)
> {
>        struct vir_blk_dev *vbd = (struct vir_blk_dev *)inode->i_bdev->bd_disk->private_data;
>
>        down(&vbd->sem);
>        vbd->filp->f_op->fsync(vbd->filp, vbd->filp->f_path.dentry, 1);
>        vbd->use_count--;
>        up(&vbd->sem);
>
>        return 0;
> }
>
> static int jmod_getgeo(struct block_device *bdev, struct hd_geometry *gm)
> {
>        struct vir_blk_dev *vbd = bdev->bd_disk->private_data;
>
>        down(&vbd->sem);
>        gm->heads = 1<<(vbd->heads_bits);
>        gm->sectors = 1<<(vbd->sectspercyl_bits);
>        gm->cylinders = get_capacity(bdev->bd_disk) >> (vbd->heads_bits + vbd->sectspercyl_bits);
>        up(&vbd->sem);

Is locking required here?

>
>        return 0;
> }
>
> static int jmod_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
> {
>        return 0;
> }

Probably you can just omit this method?

>
>
> static int jmod_mediachanged(struct gendisk *gd)
> {
>        struct vir_blk_dev *vbd = gd->private_data;
>
>        return vbd->md_change;
> }
>
> static int jmod_revalidatedisk(struct gendisk *gd)
> {
>        struct vir_blk_dev *vbd = gd->private_data;
>
>        down(&vbd->sem);
>        if(vbd->md_change)
>                vbd->md_change = 0;

Maybe just set "vbd->md_change = 0" unconditionally?

>        up(&vbd->sem);
>
>        return 0;
> }
>
> static struct block_device_operations vbd_fops = {
>        .owner = THIS_MODULE,
>        .open = jmod_open,
>        .release = jmod_release,
>        .ioctl = jmod_ioctl,
>        .getgeo = jmod_getgeo,
>        .media_changed = jmod_mediachanged,
>        .revalidate_disk = jmod_revalidatedisk,
> };
>
> /*
>  * ind : First minor number of the disk
>  * */
> static struct vir_blk_dev *vblk_alloc(int ind)
> {
>        struct gendisk *disk;
>        struct vir_blk_dev *vbd;
>
>        vbd = kzalloc(sizeof(*vbd), GFP_KERNEL);
>        if (vbd == NULL)
>                goto out;
>
>        vbd->filp = NULL;
>        vbd->vbd_thread = NULL;
>        vbd->use_count = 0;
>        vbd->md_change = 1;
>        vbd->index = ind;
>        vbd->vbd_bio = NULL;
>        vbd->vbd_biotail = NULL;
>
>        vbd->rq = blk_alloc_queue(GFP_KERNEL);
>        if(vbd->rq == NULL){
>                goto out;
>        }
>        blk_queue_make_request(vbd->rq, vbd_make_request);
>        blk_queue_bounce_limit(vbd->rq, BLK_BOUNCE_ANY);
>
>        disk = vbd->gd = alloc_disk(NUM_MINORS);
>        if(disk == NULL){
>                goto out;
>        }
>        disk->major             = VBD_MAJOR;
>        disk->first_minor       = ind*NUM_MINORS;
>        disk->fops              = &vbd_fops;
>        disk->private_data      = (void *)vbd;
>        vbd->rq->queuedata      = (void *)vbd;
>        disk->queue             = vbd->rq;
>        disk->flags |= GENHD_FL_REMOVABLE;

inconsistent indentation in this block

>        sprintf(disk->disk_name, "v%dblk", ind);

Maybe snprintf?

>        init_waitqueue_head(&vbd->vbd_event);
>
>        return vbd;
>
> out:
>        if(vbd && vbd->rq){
>                blk_cleanup_queue(vbd->rq);
>        }
>        if(vbd){
>                kfree(vbd);
>        }

The "if(vbd)" is not neede, kfree can deal with NULL pointers.

>        return NULL;
> }
>
> static struct file* open_backing_file(const char *filename, int *ro, loff_t *size)
> {
>        struct file      *filp = NULL;
>        struct inode     *inode = NULL;
>
>        *ro = 0;
>        *size = 0;
>
>        filp = filp_open(filename, O_RDWR | O_LARGEFILE, 0);
>        if (-EROFS == PTR_ERR(filp))
>                *ro = 1;
>        if (*ro)
>                filp = filp_open(filename, O_RDONLY | O_LARGEFILE, 0);
>        if (IS_ERR(filp)) {
>                printk("unable to open backing file: %s\n", filename);
>                return NULL;
>        }
>
>        if(!(filp->f_mode & FMODE_WRITE))
>                *ro = 1;
>
>        if(filp->f_path.dentry)
>                inode = filp->f_path.dentry->d_inode;
>
>        if(inode && S_ISBLK(inode->i_mode)){
>                if (bdev_read_only(inode->i_bdev))
>                        *ro = 1;
>        }else if(!inode || !S_ISREG(inode->i_mode)){
>                printk("invalid file type: %s\n", filename);
>                filp_close(filp, current->files);
>                return NULL;
>        }
>
>        if (!filp->f_op || !(filp->f_op->read || filp->f_op->aio_read)) {
>                printk("file not readable: %s\n", filename);
>                filp_close(filp, current->files);
>                return NULL;
>        }
>        if (!(filp->f_op->write || filp->f_op->aio_write))
>                *ro = 1;
>
>        *size = i_size_read(inode->i_mapping->host);
>        if (*size < 0) {
>                printk("unable to find file size: %s\n", filename);
>                filp_close(filp, current->files);
>                return NULL;
>        }
>
>        get_file(filp);
>
>        filp_close(filp, current->files);
>        return filp;
> }
>
> static int io_handler(void *data)
> {
>        struct vir_blk_dev* vbd = (struct vir_blk_dev *)data;
>        struct bio *bio;
>
>        allow_signal(SIGINT);
>        allow_signal(SIGTERM);
>        allow_signal(SIGKILL);
>        allow_signal(SIGUSR1);
>
>        set_user_nice(current, -20);
>        set_fs(get_ds());
>
>        while(!kthread_should_stop()){
>
>                down(&vbd->sem);
>                while(vbd->vbd_bio != NULL){
>                        spin_lock_irq(&vbd->vbd_lock);
>                        bio = vbd->vbd_bio;
>                        if(bio == vbd->vbd_biotail){
>                                vbd->vbd_biotail = NULL;
>                        }

unneeded brackets

>                        vbd->vbd_bio = bio->bi_next;
>                        bio->bi_next = NULL;
>                        spin_unlock_irq(&vbd->vbd_lock);
>
>                        handle_bio(vbd, bio);
>                }
>                up(&vbd->sem);
>
>                wait_event_interruptible(vbd->vbd_event, (vbd->vbd_bio!=NULL) || kthread_should_stop());

Maybe break long lines into two (in this case after the comma)?

>        }
>
>        down(&vbd->sem);
>
>        /* Flush any pending bio's */
>        while(vbd->vbd_bio != NULL){
>                spin_lock_irq(&vbd->vbd_lock);
>
>                bio = vbd->vbd_bio;
>                if(bio == vbd->vbd_biotail)
>                        vbd->vbd_biotail = NULL;
>
>                vbd->vbd_bio = bio->bi_next;
>                bio->bi_next = NULL;
>
>                spin_unlock_irq(&vbd->vbd_lock);
>
>                bio_endio(bio, -EIO);
>        }
>
>        if(vbd->gd)
>                del_gendisk(vbd->gd);
>
>        if(vbd->rq)
>                blk_cleanup_queue(vbd->rq);
>
>        if(vbd->gd)
>                put_disk(vbd->gd);
>
>        vbd->vbd_thread = NULL;
>
>        up(&vbd->sem);
>
>        return 0;
> }
>
> static struct vir_blk_dev* create_vbd(int ind, const char *filename)
> {
>        int ro = 0;
>        loff_t size = 0;
>        struct file* fl;
>        struct vir_blk_dev *vbd = NULL;
>
>        fl = open_backing_file(filename, &ro, &size);
>        if(fl == NULL)
>                goto retnull;
>
>        /* Check if the file is already being emulated */
>        list_for_each_entry(vbd, &vbd_head, vbd_list){
>                if(vbd->filp->f_path.dentry->d_name.hash == fl->f_path.dentry->d_name.hash){
>                        printk("File already being emulated\n");

Use KERN_ERR?

>                        return NULL;
>                }
>        }
>
>        vbd = vblk_alloc(ind);
>        if(vbd == NULL){
>                printk(KERN_ALERT "Couldn't create anymore VBD!\n");

Is KERN_ALERT ("action must be taken immediately") appropriate here?

>                goto retnull;
>        }
>
>        vbd->filp = fl;
>        vbd->ro = ro;
>
>        vbd->sectspercyl_bits = NUM_SPC_BITS;
>        vbd->heads_bits = NUM_HEADS_BITS;
>        vbd->sect_size_bits = SECT_SIZE_BITS;
>        vbd->sects = size >> (vbd->sect_size_bits);
>
>        set_capacity(vbd->gd, vbd->sects);
>        vbd->cylinders = get_capacity(vbd->gd) >> (vbd->heads_bits + vbd->sectspercyl_bits);
>        spin_lock_init(&vbd->vbd_lock);
>        init_MUTEX(&vbd->sem);
>
>        /* Create a thread to handle Read/Write */
>        vbd->vbd_thread = kthread_create(io_handler, vbd, "vblk%d_thread", vbd->index);
>        if(IS_ERR(vbd->vbd_thread)){
>                printk(KERN_ALERT "Unable to create thread for %s!\n", vbd->gd->disk_name);
>                goto retnull;
>        }
>        wake_up_process(vbd->vbd_thread);
>        printk("File(%s) is being emulated as block device %s\n", filename, vbd->gd->disk_name);

Use KERN_INFO?

>        add_disk(vbd->gd);
>
>        return vbd;
>
> retnull:
>        if(vbd != NULL){
>                if(vbd->rq != NULL)
>                        blk_cleanup_queue(vbd->rq);
>
>                if(vbd->gd != NULL)
>                        put_disk(vbd->gd);
>
>                kfree(vbd);
>        }
>        return NULL;
> }
>
> static void pdev_release(struct device *dev)
> {
>        return;
> }
>
> static int detach_vbd(const char *filename)
> {
>        struct file *filp;
>        struct vir_blk_dev *next, *vbd;
>
>        filp = filp_open(filename, O_RDONLY | O_LARGEFILE, 0);
>        if(IS_ERR(filp))
>                return 0;
>
>        list_for_each_entry_safe(vbd, next, &vbd_head, vbd_list){
>                if(vbd->filp->f_path.dentry->d_name.hash == filp->f_path.dentry->d_name.hash){
>                        if(vbd->use_count != 0){
>                                printk("Can't unload busy device(%s)\n", vbd->gd->disk_name);

KERN_ERR?

>                                filp_close(filp, current->files);
>                                return 0;
>                        }
>                        kthread_stop(vbd->vbd_thread);
>                        while(vbd->vbd_thread != NULL)
>                                schedule();
>                        printk("Killed thread 'vblk%d_thread'\n", vbd->index);

KERN_DEBUG?

>                        list_del(&vbd->vbd_list);

Is some kind of locking needed here?

>                        kfree(vbd);
>                        filp_close(filp, current->files);
>                        return 1;
>                }
>        }
>        printk("File(%s) not being emulated\n", filename);

KERN_DEBUG?

>        filp_close(filp, current->files);
>        return 0;
> }
>
> static int attach_vbd(char *filename)
> {
>        int ind, i;
>        struct vir_blk_dev *vbd = NULL;
>
>        /* Find an available Ind */
>        ind = i = 0;
>        do{
>                i = ind;
>                list_for_each_entry(vbd, &vbd_head, vbd_list){

Can the list change while we are iterating over it?  If so, do wee
need locking here?

>                        if(vbd->index == ind){
>                                ind++;
>                                break;
>                        }
>                }
>        }while(ind != i);
>
>        vbd = create_vbd(ind, filename);
>        if(vbd != NULL){
>                list_add_tail(&vbd->vbd_list, &vbd_head);
>                return 1;
>        }else{
>                printk("Unable to load(%s)\n", filename);

KERN_ERR?

>                return 0;
>        }
> }
>
> static ssize_t nodes_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)

Maybe break the long line?

> {
>        if(count>0 && buf[count-1] == '\n')
>                ((char *) buf)[count-1] = 0;
>
>        if(buf[0] == '-' && detach_vbd(buf+1))
>                return count;
>        else if(buf[0] == '+' && attach_vbd(buf+1))
>                return count;
>        else
>                return -EINVAL;
> }

Maybe return more informative error codes here, e.g. -EBUSY if
detach_vdb fails due to a busy device?

>
> static ssize_t nodes_show(struct device *dev, struct device_attribute *attr, char *buf)
> {
>        ssize_t ret = 0;
>        struct vir_blk_dev *vbd;
>        char *fname;
>
>        list_for_each_entry(vbd, &vbd_head, vbd_list){

Can the list change while we are iterating over it?  If so, do we need
to take the lock outside the loop?

>                down(&vbd->sem);
>                fname = d_path(&vbd->filp->f_path, buf, PAGE_SIZE - 1);
>
>                if(IS_ERR(fname)){
>                        ret = PTR_ERR(fname);
>                        up(&vbd->sem);
>                        break;
>                }
>                ret += sprintf(buf+ret, "%s' as '%s'\n", fname, vbd->gd->disk_name);

Can buf overflow?

>                up(&vbd->sem);
>        }
>
>        return ret;
> }
>
> static DEVICE_ATTR(manage, S_IRUGO|S_IWUSR, nodes_show, nodes_store);
>
> static int __init vb_init(void)
> {
>        int ret = 0;
>        int op_done = 0;
>
>        ret = register_blkdev(VBD_MAJOR, VBD_NAME);
>        if(ret){
>                printk(KERN_NOTICE "Could not register_blkdev!\n");
>                ret = -EIO;
>                goto cleanup;

The example code in "Linux Device Drivers", third edition, p.469 uses
KERN_WARNING and returns -EBUSY in this case.  No idea whether this is
important.

>        }else{
>                op_done |= REGBLKDEV;
>        }
>
>        vir_dev.release = pdev_release;

Should this be in the initialiser alongside .bus_id?

>        if(device_register(&vir_dev)){
>                ret = -EIO;
>                printk(KERN_NOTICE "Could not device_register!\n");
>                goto cleanup;
>        }else{
>                op_done |= REGPDEV;
>        }
>
>        if(device_create_file(&vir_dev, &dev_attr_manage)){
>                ret = -EIO;
>                printk(KERN_NOTICE "Could not device_create_file!\n");
>                goto cleanup;
>        }else{
>                op_done |= CRTDEVFL;
>        }
>
>        return ret;
>
> cleanup:
>        if(op_done & CRTDEVFL){
>                device_remove_file(&vir_dev, &dev_attr_manage);

Can this ever be called?

>                op_done &= ~CRTDEVFL;
>        }
>
>        if(op_done & REGPDEV){
>                device_unregister(&vir_dev);
>                op_done &= ~REGPDEV;
>        }
>
>        if(op_done & REGBLKDEV){
>                unregister_blkdev(VBD_MAJOR, VBD_NAME);
>                op_done &= ~REGBLKDEV;
>        }
>
>        return ret;
> }
>
> static void __exit vb_exit(void)
> {
>        struct vir_blk_dev *vbd, *next;
>
>        list_for_each_entry_safe(vbd, next, &vbd_head, vbd_list){
>                kthread_stop(vbd->vbd_thread);
>                while(vbd->vbd_thread != NULL)
>                        schedule();
>                printk("Killed thread 'vblk%d_thread'\n", vbd->index);

KERN_INFO?

>                list_del(&vbd->vbd_list);
>                kfree(vbd);
>        }
>
>        device_remove_file(&vir_dev, &dev_attr_manage);
>        device_unregister(&vir_dev);
>
>        unregister_blkdev(VBD_MAJOR, VBD_NAME);
> }
>
> module_init(vb_init);
> module_exit(vb_exit);
>
> MODULE_LICENSE("GPL");
> MODULE_DESCRIPTION("A Block Device Emulator");

I hope this helps,
Jochen
-- 
http://seehuhn.de/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ