[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <530E5A22.5050000@redhat.com>
Date: Wed, 26 Feb 2014 22:18:26 +0100
From: Daniel Borkmann <dborkman@...hat.com>
To: Stephen Hemminger <stephen@...workplumber.org>
CC: netdev@...r.kernel.org, karl@....com
Subject: Re: Bug 70021 - Call to munmap() causes system to partially hang;
power cycle needed to recover.
On 02/26/2014 10:05 PM, Stephen Hemminger wrote:
> This bug appears to have been stuck in MM bugzilla and never
> addressed...
Sorry, just noticed your email to netdev right now.
mlock() fix for THP that indirectly affects PF_PACKET sits in AM's tree:
http://ozlabs.org/~akpm/mmotm/broken-out/mm-include-vm_mixedmap-flag-in-the-vm_special-list-to-avoid-munlocking.patch
Thanks,
Daniel
> https://bugzilla.kernel.org/show_bug.cgi?id=70021
>
>
> [reply] [−] Private Description Karl Auerbach 2014-02-05 00:51:02 UTC
> Created attachment 124531 [details]
> This is a small program that triggers the fault. It requires root privilege to run.
>
> Before kernel 3.12.1 one could mmap() the RX and TX ring buffers for a network socket and reliably release them with munmap().
>
> Starting with kernel 3.12.1 and running through the latest kernel I tested (3.1.14) this no longer works. The call to munmap() never returns.
>
> Parts of the system may continue to operate, but the system can not be shut down by normal means. It takes a hardware reset or power cycle to recover.
>
> I've got a short program, extracted from something we've been running for several years, that triggers the problem.
>
> I believe that every kernel from 3.12.1 and forward faults when this is run.
>
> This has been reported to the Fedora crew, and it was suggested that I kick this upstream. So here I am.
>
> ---
> #include <assert.h>
> #include <errno.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <stdarg.h>
> #include <unistd.h>
>
> #include <arpa/inet.h>
> #include <sys/ioctl.h>
> #include <sys/mman.h>
> #include <sys/socket.h>
> #include <sys/un.h>
> #include <sys/user.h>
>
> #include <features.h> /* for the glibc version number */
> #include <net/ethernet.h>
> #include <linux/if_packet.h>
> #include <linux/if_ether.h>
>
> #include <net/if.h>
> #include <netinet/in.h>
> #include <sys/ioctl.h>
> #include <sys/types.h>
> #include <linux/if_tun.h>
>
> #define NIL 0
>
> typedef int SOCKET;
>
> int main(int argc, char *argv)
> {
> size_t RxMmap_Size;
> size_t TxMmap_Size;
> unsigned int Ether_Sz;
> unsigned int Block_Sz_Order;
> unsigned int Block_Sz;
> unsigned int Block_Cnt;
> unsigned int Frame_Sz;
> unsigned int Frame_Cnt;
> unsigned int Frames_Per_Block;
> int rcode;
>
> void * Mmap_Addr;
> size_t Mmap_Size;
> size_t TXDataOffset;
>
> SOCKET Socket;
>
> struct tpacket_req ring_req;
>
> Ether_Sz = 1518;
> Block_Sz = Ether_Sz;
> Block_Sz_Order = 2; // 16384 byte blocks
> Block_Cnt = 1000;
>
> Socket = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
> if (Socket == -1)
> {
> perror("socket failed");
> return 1;
> }
>
> Frame_Sz = TPACKET_ALIGN(TPACKET_ALIGN(TPACKET2_HDRLEN) + Ether_Sz);
> TXDataOffset = TPACKET2_HDRLEN - sizeof(struct sockaddr_ll);
>
> Block_Sz = getpagesize() << Block_Sz_Order;
>
> Frames_Per_Block = Block_Sz / Frame_Sz;
> Frame_Cnt = Frames_Per_Block * Block_Cnt;
>
> RxMmap_Size = Block_Sz * Block_Cnt;
> TxMmap_Size = RxMmap_Size;
>
> Mmap_Size = RxMmap_Size + TxMmap_Size;
>
> // Establish receive ring
> // For convenience we will let it be the same size as the TX ring
> // The mmap size calculations, far above, assume that the
> // rings are the same size.
> ring_req.tp_block_nr = Block_Cnt;
> ring_req.tp_frame_size = Frame_Sz;
> ring_req.tp_block_size = Block_Sz;
> ring_req.tp_frame_nr = Frame_Cnt;
> if (setsockopt(Socket, SOL_PACKET, PACKET_RX_RING,
> (char *)&ring_req, sizeof(ring_req)) < 0)
> {
> perror("Setsockopt RX_RING failed");
> close(Socket);
> return -1;
> }
>
> // Establish transmit ring
> // For convenience we will let it be the same size as the RX ring
> // The mmap size calculations, far above, assume that the
> // rings are the same size.
> ring_req.tp_block_nr = Block_Cnt;
> ring_req.tp_frame_size = Frame_Sz;
> ring_req.tp_block_size = Block_Sz;
> ring_req.tp_frame_nr = Frame_Cnt;
> if (setsockopt(Socket, SOL_PACKET, PACKET_TX_RING,
> (char *)&ring_req, sizeof(ring_req)) < 0)
> {
> perror("Setsockopt TX_RING failed");
> close(Socket);
> return -1;
> }
>
> fprintf(stderr, "Calling mmap\n");
> Mmap_Addr = mmap(NIL, Mmap_Size, PROT_READ | PROT_WRITE,
> MAP_SHARED | MAP_LOCKED, Socket, 0);
>
> if (Mmap_Addr == MAP_FAILED)
> {
> perror("mmap failed");
> return 1;
> }
>
> fprintf(stderr, "Calling munmap\n");
> if (Mmap_Addr != MAP_FAILED)
> {
> if (munmap(Mmap_Addr, Mmap_Size) != 0)
> {
> perror("munmap failed");
> return 1;
> }
> }
>
> fprintf(stderr, "Closing socket\n");
> if (close(Socket) != 0)
> {
> perror("close failed");
> return 1;
> }
>
> fprintf(stderr, "Program returning\n");
> return 0;
> }
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists