[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6d6a94c50704181759w12eee19agcb56058b8261dded@mail.gmail.com>
Date: Thu, 19 Apr 2007 08:59:05 +0800
From: "Aubrey Li" <aubreylee@...il.com>
To: "David Howells" <dhowells@...hat.com>
Cc: "Robin Getz" <rgetz@...ckfin.uclinux.org>, uaca@...mni.uv.es,
bryan.wu@...log.com, "Alan Cox" <alan@...rguk.ukuu.org.uk>,
waltje@...lt.nl.mugnet.org, netdev@...r.kernel.org,
"Andrew Morton" <akpm@...l.org>,
"Linux Kernel" <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH] CONFIG_PACKET_MMAP should depend on MMU
On 4/18/07, David Howells <dhowells@...hat.com> wrote:
> Aubrey Li <aubreylee@...il.com> wrote:
>
> > Here, in the attachment I wrote a small test app. Please correct if
> > there is anything wrong, and feel free to improve it.
>
> Okay... I have that working... probably. I don't know what output it's
> supposed to produce, but I see this:
>
> # /packet-mmap/sample_packet_mmap
> 00-00-00-01-00-00-00-8a-00-00-00-8a-00-42-00-50-
> 38-43-13-a0-00-07-ff-3c-00-00-00-00-00-00-00-00-
> 00-11-08-00-00-00-00-01-00-01-00-06-00-d0-b7-de-
> 32-7b-00-00-00-00-00-00-00-00-00-00-00-00-00-00-
> 00-00-00-90-cc-a2-75-6b-00-d0-b7-de-32-7b-08-00-
> 45-00-00-7c-00-00-40-00-40-11-b4-13-c0-a8-02-80-
> c0-a8-02-8d-08-01-03-20-00-68-8e-65-7f-5b-7e-03-
> 00-00-00-01-00-00-00-00-00-00-00-00-00-00-00-00-
> 00-00-00-00-00-00-00-00-00-00-00-01-00-00-81-a4-
> 00-00-00-01-00-00-00-00-00-00-00-00-00-1d-b8-86-
> 00-00-10-00-ff-ff-ff-ff-00-00-0e-f0-00-00-09-02-
> 01-cb-03-16-46-26-38-0d-00-00-00-00-46-26-38-1e-
> 00-00-00-00-46-26-38-1e-00-00-00-00-00-00-00-00-
> 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00- [repeated]
>
> Does that look reasonable?
Yes, it's reasonable for me, as long as your
host IP is 192.168.2.128
and
target IP is 192.168.2.141
See below
> 00-90-cc-a2-75-6b-|___ MAC Address
> 00-d0-b7-de-32-7b-|
> 08-00------------------------Type: IP
> 45-00------------------------Ver, IHL, TOS
> 00-7c------------------------IP.total.length
> 00-00-
> 40-00-
> 40----------------------------TTL
> 11----------------------------UDP protocol
> b4-13------------------------Checksum
> c0-a8-02-80---------------Source IP: 192.168.2.128
> c0-a8-02-8d---------------Dest IP: 192.168.2.141
------------snip--------------
>
> I've attached the preliminary patch.
Thanks, I'll take a look and try to see if I can give some feedback.
-Aubrey
> Note four things about it:
>
> (1) I've had to add the get_unmapped_area() op to the proto_ops struct, but
> I've only done it for CONFIG_MMU=n as making it available for CONFIG_MMU=y
> could cause problems.
>
> (2) There's a race between packet_get_unmapped_area() being called and
> packet_mmap() being called.
>
> (3) I've added an extra check into packet_set_ring() to make sure the caller
> isn't asking for a combination of buffer size and count that will exceed
> ULONG_MAX. This protects a multiply done elsewhere.
>
> (4) The entire data buffer is allocated as one contiguous lump in NOMMU-mode.
>
> David
>
> ---
> [PATCH] NOMMU: Support mmap() on AF_PACKET sockets
>
> From: David Howells <dhowells@...hat.com>
>
> Support mmap() on AF_PACKET sockets in NOMMU-mode kernels.
>
> Signed-Off-By: David Howells <dhowells@...hat.com>
> ---
>
> include/linux/net.h | 7 +++
> include/net/sock.h | 8 +++
> net/core/sock.c | 10 ++++
> net/packet/af_packet.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++
> net/socket.c | 77 +++++++++++++++++++++++++++++++
> 5 files changed, 219 insertions(+), 1 deletions(-)
>
> diff --git a/include/linux/net.h b/include/linux/net.h
> index 4db21e6..9e77cf6 100644
> --- a/include/linux/net.h
> +++ b/include/linux/net.h
> @@ -161,6 +161,11 @@ struct proto_ops {
> int (*recvmsg) (struct kiocb *iocb, struct socket *sock,
> struct msghdr *m, size_t total_len,
> int flags);
> +#ifndef CONFIG_MMU
> + unsigned long (*get_unmapped_area)(struct file *file, struct socket *sock,
> + unsigned long addr, unsigned long len,
> + unsigned long pgoff, unsigned long flags);
> +#endif
> int (*mmap) (struct file *file, struct socket *sock,
> struct vm_area_struct * vma);
> ssize_t (*sendpage) (struct socket *sock, struct page *page,
> @@ -191,6 +196,8 @@ extern int sock_sendmsg(struct socket *sock, struct msghdr *msg,
> extern int sock_recvmsg(struct socket *sock, struct msghdr *msg,
> size_t size, int flags);
> extern int sock_map_fd(struct socket *sock);
> +extern void sock_make_mappable(struct socket *sock,
> + unsigned long prot);
> extern struct socket *sockfd_lookup(int fd, int *err);
> #define sockfd_put(sock) fput(sock->file)
> extern int net_ratelimit(void);
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 2c7d60c..d91edea 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -841,6 +841,14 @@ extern int sock_no_sendmsg(struct kiocb *, struct socket *,
> struct msghdr *, size_t);
> extern int sock_no_recvmsg(struct kiocb *, struct socket *,
> struct msghdr *, size_t, int);
> +#ifndef CONFIG_MMU
> +extern unsigned long sock_no_get_unmapped_area(struct file *,
> + struct socket *,
> + unsigned long,
> + unsigned long,
> + unsigned long,
> + unsigned long);
> +#endif
> extern int sock_no_mmap(struct file *file,
> struct socket *sock,
> struct vm_area_struct *vma);
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 27c4f62..b288799 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -1364,6 +1364,15 @@ int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
> return -EOPNOTSUPP;
> }
>
> +#ifndef CONFIG_MMU
> +unsigned long sock_no_get_unmapped_area(struct file *file, struct socket *sock,
> + unsigned long addr, unsigned long len,
> + unsigned long pgoff, unsigned long flags)
> +{
> + return (unsigned long) -ENODEV;
> +}
> +#endif
> +
> int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
> {
> /* Mirror missing mmap method error code */
> @@ -1943,6 +1952,7 @@ EXPORT_SYMBOL(sock_no_getname);
> EXPORT_SYMBOL(sock_no_getsockopt);
> EXPORT_SYMBOL(sock_no_ioctl);
> EXPORT_SYMBOL(sock_no_listen);
> +EXPORT_SYMBOL(sock_no_get_unmapped_area);
> EXPORT_SYMBOL(sock_no_mmap);
> EXPORT_SYMBOL(sock_no_poll);
> EXPORT_SYMBOL(sock_no_recvmsg);
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 28d47e8..12d970a 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -78,6 +78,7 @@
> #include <linux/poll.h>
> #include <linux/module.h>
> #include <linux/init.h>
> +#include <asm/mman.h>
>
> #ifdef CONFIG_INET
> #include <net/inet_common.h>
> @@ -1023,6 +1024,7 @@ static int packet_create(struct socket *sock, int protocol)
> sock->ops = &packet_ops_spkt;
> #endif
> sock_init_data(sock, sk);
> + sock_make_mappable(sock, PROT_READ | PROT_WRITE);
>
> po = pkt_sk(sk);
> sk->sk_family = PF_PACKET;
> @@ -1629,6 +1631,7 @@ static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
> return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
> }
>
> +#ifdef CONFIG_MMU
> static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
> {
> int i;
> @@ -1671,6 +1674,45 @@ out_free_pgvec:
> goto out;
> }
>
> +#else
> +/*
> + * free the buffer pointer block and all the buffers for NOMMU
> + */
> +static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
> +{
> + kfree(pg_vec[0]);
> + kfree(pg_vec);
> +}
> +
> +/*
> + * allocate the buffer pointer block and all the buffer blocks for the mappable
> + * AF_PACKET buffers for NOMMU mode
> + * - in NOMMU mode the buffers must be contiguous with each other
> + */
> +static char **alloc_pg_vec(struct tpacket_req *req, int order)
> +{
> + char *buffers;
> + char **ptrblock;
> + int loop;
> +
> + buffers = kcalloc(req->tp_block_size, req->tp_block_nr, GFP_KERNEL);
> + if (!buffers)
> + return ERR_PTR(-ENOMEM);
> + ptrblock = kmalloc(sizeof(char *) * req->tp_block_nr, GFP_KERNEL);
> + if (!ptrblock) {
> + kfree(buffers);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + for (loop = 0; loop < req->tp_block_nr; loop++) {
> + ptrblock[loop] = buffers;
> + buffers += req->tp_block_size;
> + }
> +
> + return ptrblock;
> +}
> +#endif
> +
> static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
> {
> char **pg_vec = NULL;
> @@ -1695,6 +1737,8 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
> return -EINVAL;
> if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
> return -EINVAL;
> + if (unlikely(ULONG_MAX / req->tp_block_size < req->tp_block_nr))
> + return -ENOMEM;
>
> po->frames_per_block = req->tp_block_size/req->tp_frame_size;
> if (unlikely(po->frames_per_block <= 0))
> @@ -1783,6 +1827,7 @@ out:
> return err;
> }
>
> +#ifdef CONFIG_MMU
> static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
> {
> struct sock *sk = sock->sk;
> @@ -1823,7 +1868,72 @@ out:
> release_sock(sk);
> return err;
> }
> -#endif
> +
> +#else
> +/*
> + * find out where the socket buffers are so that NOMMU mmap can return the
> + * address directly
> + */
> +static unsigned long packet_get_unmapped_area(struct file *file,
> + struct socket *sock,
> + unsigned long addr,
> + unsigned long len,
> + unsigned long pgoff,
> + unsigned long flags)
> +{
> + struct sock *sk = sock->sk;
> + struct packet_sock *po = pkt_sk(sk);
> +
> + /* check that they want to map the whole buffer */
> + if (pgoff)
> + return (unsigned long) -EINVAL;
> +
> + lock_sock(sk);
> + if (!po->pg_vec ||
> + len != po->pg_vec_len * po->pg_vec_pages * PAGE_SIZE)
> + addr = (unsigned long) -EINVAL;
> + else
> + addr = (unsigned long) po->pg_vec[0];
> + release_sock(sk);
> + return addr;
> +}
> +
> +/*
> + * set the mapping of a packet socket buffer for NOMMU mmap
> + */
> +static int packet_mmap(struct file *file, struct socket *sock,
> + struct vm_area_struct *vma)
> +{
> + struct sock *sk = sock->sk;
> + struct packet_sock *po = pkt_sk(sk);
> + unsigned long len;
> + int ret;
> +
> + if (vma->vm_pgoff)
> + return -EINVAL;
> +
> + lock_sock(sk);
> +
> + /* check that they want to map the whole buffer */
> + len = vma->vm_end - vma->vm_start;
> + if (!po->pg_vec ||
> + len != po->pg_vec_len * po->pg_vec_pages * PAGE_SIZE) {
> + ret = -EINVAL;
> + } else if (vma->vm_start != (unsigned long) po->pg_vec[0]) {
> + /* someone found the gap between get_unmapped_area and mmap */
> + ret = -ESTALE;
> + } else {
> + atomic_inc(&po->mapped);
> + vma->vm_ops = &packet_mmap_ops;
> + ret = 0;
> + }
> +
> + release_sock(sk);
> + return ret;
> +}
> +
> +#endif /* CONFIG_MMU */
> +#endif /* CONFIG_PACKET_MMAP */
>
>
> #ifdef CONFIG_SOCK_PACKET
> @@ -1844,6 +1954,9 @@ static const struct proto_ops packet_ops_spkt = {
> .getsockopt = sock_no_getsockopt,
> .sendmsg = packet_sendmsg_spkt,
> .recvmsg = packet_recvmsg,
> +#ifndef CONFIG_MMU
> + .get_unmapped_area = sock_no_get_unmapped_area,
> +#endif
> .mmap = sock_no_mmap,
> .sendpage = sock_no_sendpage,
> };
> @@ -1866,6 +1979,9 @@ static const struct proto_ops packet_ops = {
> .getsockopt = packet_getsockopt,
> .sendmsg = packet_sendmsg,
> .recvmsg = packet_recvmsg,
> +#ifndef CONFIG_MMU
> + .get_unmapped_area = packet_get_unmapped_area,
> +#endif
> .mmap = packet_mmap,
> .sendpage = sock_no_sendpage,
> };
> diff --git a/net/socket.c b/net/socket.c
> index ea8f81a..4109db6 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -87,6 +87,7 @@
>
> #include <asm/uaccess.h>
> #include <asm/unistd.h>
> +#include <asm/mman.h>
>
> #include <net/compat.h>
>
> @@ -98,6 +99,13 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
> unsigned long nr_segs, loff_t pos);
> static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
> unsigned long nr_segs, loff_t pos);
> +#ifndef CONFIG_MMU
> +static unsigned long sock_get_unmapped_area(struct file *file,
> + unsigned long addr,
> + unsigned long len,
> + unsigned long pgoff,
> + unsigned long flags);
> +#endif
> static int sock_mmap(struct file *file, struct vm_area_struct *vma);
>
> static int sock_close(struct inode *inode, struct file *file);
> @@ -127,6 +135,9 @@ static const struct file_operations socket_file_ops = {
> #ifdef CONFIG_COMPAT
> .compat_ioctl = compat_sock_ioctl,
> #endif
> +#ifndef CONFIG_MMU
> + .get_unmapped_area = sock_get_unmapped_area,
> +#endif
> .mmap = sock_mmap,
> .open = sock_no_open, /* special open code to disallow open via /proc */
> .release = sock_close,
> @@ -136,6 +147,26 @@ static const struct file_operations socket_file_ops = {
> };
>
> /*
> + * capabilities for non-mappable sockets
> + * - does not permit mmap at all
> + * - no readahead or I/O queue unplugging required
> + */
> +struct backing_dev_info non_mappable_socket_bdi = {
> + .capabilities = 0,
> +};
> +
> +/*
> + * capabilities for read/write mappable sockets
> + * - permits shared-mmap for read and write only
> + * - does not permit private mmap
> + * - no readahead or I/O queue unplugging required
> + */
> +struct backing_dev_info rw_mappable_socket_bdi = {
> + .capabilities = (BDI_CAP_MAP_DIRECT | BDI_CAP_READ_MAP |
> + BDI_CAP_WRITE_MAP),
> +};
> +
> +/*
> * The protocol list. Each protocol is registered in here.
> */
>
> @@ -415,6 +446,34 @@ static struct socket *sock_from_file(struct file *file, int *err)
> }
>
> /**
> + * sock_make_mappable - Change the mmappability of a socket
> + * @sock: The socket to alter
> + * @prot: The mmap protection to permit
> + *
> + * The nominated socket has its backing device information selection
> + * changed to indicate to mmap() what sort of mappings are available on
> + * this socket.
> + */
> +void sock_make_mappable(struct socket *sock, unsigned long prot)
> +{
> + struct address_space *mapping = &SOCK_INODE(sock)->i_data;
> +
> + switch (prot) {
> + case 0:
> + mapping->backing_dev_info = &non_mappable_socket_bdi;
> + break;
> + case PROT_READ | PROT_WRITE:
> + mapping->backing_dev_info = &rw_mappable_socket_bdi;
> + break;
> + case PROT_READ:
> + case PROT_WRITE:
> + default:
> + BUG();
> + break;
> + }
> +}
> +
> +/**
> * sockfd_lookup - Go from a file number to its socket slot
> * @fd: file handle
> * @err: pointer to an error code return
> @@ -482,6 +541,7 @@ static struct socket *sock_alloc(void)
> inode->i_mode = S_IFSOCK | S_IRWXUGO;
> inode->i_uid = current->fsuid;
> inode->i_gid = current->fsgid;
> + inode->i_data.backing_dev_info = &non_mappable_socket_bdi;
>
> get_cpu_var(sockets_in_use)++;
> put_cpu_var(sockets_in_use);
> @@ -918,6 +978,22 @@ static unsigned int sock_poll(struct file *file, poll_table *wait)
> return sock->ops->poll(file, sock, wait);
> }
>
> +#ifndef CONFIG_MMU
> +static unsigned long sock_get_unmapped_area(struct file *file,
> + unsigned long addr,
> + unsigned long len,
> + unsigned long pgoff,
> + unsigned long flags)
> +{
> + struct socket *sock = file->private_data;
> +
> + if (!sock->ops->get_unmapped_area)
> + return -ENOSYS;
> + return sock->ops->get_unmapped_area(file, sock,
> + addr, len, pgoff, flags);
> +}
> +#endif
> +
> static int sock_mmap(struct file *file, struct vm_area_struct *vma)
> {
> struct socket *sock = file->private_data;
> @@ -2288,6 +2364,7 @@ EXPORT_SYMBOL(sock_create);
> EXPORT_SYMBOL(sock_create_kern);
> EXPORT_SYMBOL(sock_create_lite);
> EXPORT_SYMBOL(sock_map_fd);
> +EXPORT_SYMBOL(sock_make_mappable);
> EXPORT_SYMBOL(sock_recvmsg);
> EXPORT_SYMBOL(sock_register);
> EXPORT_SYMBOL(sock_release);
>
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists