diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt deleted file mode 100644 index c6af4bac5aa8..000000000000 --- a/Documentation/networking/netlink_mmap.txt +++ /dev/null @@ -1,339 +0,0 @@ -This file documents how to use memory mapped I/O with netlink. - -Author: Patrick McHardy - -Overview --------- - -Memory mapped netlink I/O can be used to increase throughput and decrease -overhead of unicast receive and transmit operations. Some netlink subsystems -require high throughput, these are mainly the netfilter subsystems -nfnetlink_queue and nfnetlink_log, but it can also help speed up large -dump operations of f.i. the routing database. - -Memory mapped netlink I/O used two circular ring buffers for RX and TX which -are mapped into the processes address space. - -The RX ring is used by the kernel to directly construct netlink messages into -user-space memory without copying them as done with regular socket I/O, -additionally as long as the ring contains messages no recvmsg() or poll() -syscalls have to be issued by user-space to get more message. - -The TX ring is used to process messages directly from user-space memory, the -kernel processes all messages contained in the ring using a single sendmsg() -call. - -Usage overview --------------- - -In order to use memory mapped netlink I/O, user-space needs three main changes: - -- ring setup -- conversion of the RX path to get messages from the ring instead of recvmsg() -- conversion of the TX path to construct messages into the ring - -Ring setup is done using setsockopt() to provide the ring parameters to the -kernel, then a call to mmap() to map the ring into the processes address space: - -- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, ¶ms, sizeof(params)); -- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, ¶ms, sizeof(params)); -- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0) - -Usage of either ring is optional, but even if only the RX ring is used the -mapping still needs to be writable in order to update the frame status after -processing. - -Conversion of the reception path involves calling poll() on the file -descriptor, once the socket is readable the frames from the ring are -processed in order until no more messages are available, as indicated by -a status word in the frame header. - -On kernel side, in order to make use of memory mapped I/O on receive, the -originating netlink subsystem needs to support memory mapped I/O, otherwise -it will use an allocated socket buffer as usual and the contents will be - copied to the ring on transmission, nullifying most of the performance gains. -Dumps of kernel databases automatically support memory mapped I/O. - -Conversion of the transmit path involves changing message construction to -use memory from the TX ring instead of (usually) a buffer declared on the -stack and setting up the frame header appropriately. Optionally poll() can -be used to wait for free frames in the TX ring. - -Structured and definitions for using memory mapped I/O are contained in -. - -RX and TX rings ----------------- - -Each ring contains a number of continuous memory blocks, containing frames of -fixed size dependent on the parameters used for ring setup. - -Ring: [ block 0 ] - [ frame 0 ] - [ frame 1 ] - [ block 1 ] - [ frame 2 ] - [ frame 3 ] - ... - [ block n ] - [ frame 2 * n ] - [ frame 2 * n + 1 ] - -The blocks are only visible to the kernel, from the point of view of user-space -the ring just contains the frames in a continuous memory zone. - -The ring parameters used for setting up the ring are defined as follows: - -struct nl_mmap_req { - unsigned int nm_block_size; - unsigned int nm_block_nr; - unsigned int nm_frame_size; - unsigned int nm_frame_nr; -}; - -Frames are grouped into blocks, where each block is a continuous region of memory -and holds nm_block_size / nm_frame_size frames. The total number of frames in -the ring is nm_frame_nr. The following invariants hold: - -- frames_per_block = nm_block_size / nm_frame_size - -- nm_frame_nr = frames_per_block * nm_block_nr - -Some parameters are constrained, specifically: - -- nm_block_size must be a multiple of the architectures memory page size. - The getpagesize() function can be used to get the page size. - -- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be - able to hold at least the frame header - -- nm_frame_size must be smaller or equal to nm_block_size - -- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT - -- nm_frame_nr must equal the actual number of frames as specified above. - -When the kernel can't allocate physically continuous memory for a ring block, -it will fall back to use physically discontinuous memory. This might affect -performance negatively, in order to avoid this the nm_frame_size parameter -should be chosen to be as small as possible for the required frame size and -the number of blocks should be increased instead. - -Ring frames ------------- - -Each frames contain a frame header, consisting of a synchronization word and some -meta-data, and the message itself. - -Frame: [ header message ] - -The frame header is defined as follows: - -struct nl_mmap_hdr { - unsigned int nm_status; - unsigned int nm_len; - __u32 nm_group; - /* credentials */ - __u32 nm_pid; - __u32 nm_uid; - __u32 nm_gid; -}; - -- nm_status is used for synchronizing processing between the kernel and user- - space and specifies ownership of the frame as well as the operation to perform - -- nm_len contains the length of the message contained in the data area - -- nm_group specified the destination multicast group of message - -- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending - process. These values correspond to the data available using SOCK_PASSCRED in - the SCM_CREDENTIALS cmsg. - -The possible values in the status word are: - -- NL_MMAP_STATUS_UNUSED: - RX ring: frame belongs to the kernel and contains no message - for user-space. Approriate action is to invoke poll() - to wait for new messages. - - TX ring: frame belongs to user-space and can be used for - message construction. - -- NL_MMAP_STATUS_RESERVED: - RX ring only: frame is currently used by the kernel for message - construction and contains no valid message yet. - Appropriate action is to invoke poll() to wait for - new messages. - -- NL_MMAP_STATUS_VALID: - RX ring: frame contains a valid message. Approriate action is - to process the message and release the frame back to - the kernel by setting the status to - NL_MMAP_STATUS_UNUSED or queue the frame by setting the - status to NL_MMAP_STATUS_SKIP. - - TX ring: the frame contains a valid message from user-space to - be processed by the kernel. After completing processing - the kernel will release the frame back to user-space by - setting the status to NL_MMAP_STATUS_UNUSED. - -- NL_MMAP_STATUS_COPY: - RX ring only: a message is ready to be processed but could not be - stored in the ring, either because it exceeded the - frame size or because the originating subsystem does - not support memory mapped I/O. Appropriate action is - to invoke recvmsg() to receive the message and release - the frame back to the kernel by setting the status to - NL_MMAP_STATUS_UNUSED. - -- NL_MMAP_STATUS_SKIP: - RX ring only: user-space queued the message for later processing, but - processed some messages following it in the ring. The - kernel should skip this frame when looking for unused - frames. - -The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the -frame header. - -TX limitations --------------- - -Kernel processing usually involves validation of the message received by -user-space, then processing its contents. The kernel must assure that -userspace is not able to modify the message contents after they have been -validated. In order to do so, the message is copied from the ring frame -to an allocated buffer if either of these conditions is false: - -- only a single mapping of the ring exists -- the file descriptor is not shared between processes - -This means that for threaded programs, the kernel will fall back to copying. - -Example -------- - -Ring setup: - - unsigned int block_size = 16 * getpagesize(); - struct nl_mmap_req req = { - .nm_block_size = block_size, - .nm_block_nr = 64, - .nm_frame_size = 16384, - .nm_frame_nr = 64 * block_size / 16384, - }; - unsigned int ring_size; - void *rx_ring, *tx_ring; - - /* Configure ring parameters */ - if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0) - exit(1); - if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0) - exit(1) - - /* Calculate size of each individual ring */ - ring_size = req.nm_block_nr * req.nm_block_size; - - /* Map RX/TX rings. The TX ring is located after the RX ring */ - rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - if ((long)rx_ring == -1L) - exit(1); - tx_ring = rx_ring + ring_size: - -Message reception: - -This example assumes some ring parameters of the ring setup are available. - - unsigned int frame_offset = 0; - struct nl_mmap_hdr *hdr; - struct nlmsghdr *nlh; - unsigned char buf[16384]; - ssize_t len; - - while (1) { - struct pollfd pfds[1]; - - pfds[0].fd = fd; - pfds[0].events = POLLIN | POLLERR; - pfds[0].revents = 0; - - if (poll(pfds, 1, -1) < 0 && errno != -EINTR) - exit(1); - - /* Check for errors. Error handling omitted */ - if (pfds[0].revents & POLLERR) - - - /* If no new messages, poll again */ - if (!(pfds[0].revents & POLLIN)) - continue; - - /* Process all frames */ - while (1) { - /* Get next frame header */ - hdr = rx_ring + frame_offset; - - if (hdr->nm_status == NL_MMAP_STATUS_VALID) { - /* Regular memory mapped frame */ - nlh = (void *)hdr + NL_MMAP_HDRLEN; - len = hdr->nm_len; - - /* Release empty message immediately. May happen - * on error during message construction. - */ - if (len == 0) - goto release; - } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) { - /* Frame queued to socket receive queue */ - len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); - if (len <= 0) - break; - nlh = buf; - } else - /* No more messages to process, continue polling */ - break; - - process_msg(nlh); -release: - /* Release frame back to the kernel */ - hdr->nm_status = NL_MMAP_STATUS_UNUSED; - - /* Advance frame offset to next frame */ - frame_offset = (frame_offset + frame_size) % ring_size; - } - } - -Message transmission: - -This example assumes some ring parameters of the ring setup are available. -A single message is constructed and transmitted, to send multiple messages -at once they would be constructed in consecutive frames before a final call -to sendto(). - - unsigned int frame_offset = 0; - struct nl_mmap_hdr *hdr; - struct nlmsghdr *nlh; - struct sockaddr_nl addr = { - .nl_family = AF_NETLINK, - }; - - hdr = tx_ring + frame_offset; - if (hdr->nm_status != NL_MMAP_STATUS_UNUSED) - /* No frame available. Use poll() to avoid. */ - exit(1); - - nlh = (void *)hdr + NL_MMAP_HDRLEN; - - /* Build message */ - build_message(nlh); - - /* Fill frame header: length and status need to be set */ - hdr->nm_len = nlh->nlmsg_len; - hdr->nm_status = NL_MMAP_STATUS_VALID; - - if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0) - exit(1); - - /* Advance frame offset to next frame */ - frame_offset = (frame_offset + frame_size) % ring_size; diff --git a/Makefile b/Makefile index 9deaac9255ff..ad5dae4a309e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 16 -SUBLEVEL = 42 -EXTRAVERSION = +SUBLEVEL = 43 +EXTRAVERSION = -rc2 NAME = Museum of Fishiegoodies # *DOCUMENTATION* diff --git a/arch/mips/Makefile b/arch/mips/Makefile index a8521de14791..d964dcf0762b 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -93,6 +93,15 @@ LDFLAGS_vmlinux += -G 0 -static -n -nostdlib KBUILD_AFLAGS_MODULE += -mlong-calls KBUILD_CFLAGS_MODULE += -mlong-calls +# +# pass -msoft-float to GAS if it supports it. However on newer binutils +# (specifically newer than 2.24.51.20140728) we then also need to explicitly +# set ".set hardfloat" in all files which manipulate floating point registers. +# +ifneq ($(call as-option,-Wa$(comma)-msoft-float,),) + cflags-y += -DGAS_HAS_SET_HARDFLOAT -Wa,-msoft-float +endif + cflags-y += -ffreestanding # diff --git a/arch/mips/include/asm/asmmacro-32.h b/arch/mips/include/asm/asmmacro-32.h index e38c2811d4e2..cdac7b3eeaf7 100644 --- a/arch/mips/include/asm/asmmacro-32.h +++ b/arch/mips/include/asm/asmmacro-32.h @@ -13,6 +13,8 @@ #include .macro fpu_save_single thread tmp=t0 + .set push + SET_HARDFLOAT cfc1 \tmp, fcr31 swc1 $f0, THREAD_FPR0_LS64(\thread) swc1 $f1, THREAD_FPR1_LS64(\thread) @@ -47,9 +49,12 @@ swc1 $f30, THREAD_FPR30_LS64(\thread) swc1 $f31, THREAD_FPR31_LS64(\thread) sw \tmp, THREAD_FCR31(\thread) + .set pop .endm .macro fpu_restore_single thread tmp=t0 + .set push + SET_HARDFLOAT lw \tmp, THREAD_FCR31(\thread) lwc1 $f0, THREAD_FPR0_LS64(\thread) lwc1 $f1, THREAD_FPR1_LS64(\thread) @@ -84,6 +89,7 @@ lwc1 $f30, THREAD_FPR30_LS64(\thread) lwc1 $f31, THREAD_FPR31_LS64(\thread) ctc1 \tmp, fcr31 + .set pop .endm .macro cpu_save_nonscratch thread diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index 9dbc454ad14e..5f7d529daef3 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -10,6 +10,7 @@ #include #include +#include #ifdef CONFIG_32BIT #include @@ -56,6 +57,8 @@ #endif /* CONFIG_CPU_MIPSR2 */ .macro fpu_save_16even thread tmp=t0 + .set push + SET_HARDFLOAT cfc1 \tmp, fcr31 sdc1 $f0, THREAD_FPR0_LS64(\thread) sdc1 $f2, THREAD_FPR2_LS64(\thread) @@ -74,11 +77,13 @@ sdc1 $f28, THREAD_FPR28_LS64(\thread) sdc1 $f30, THREAD_FPR30_LS64(\thread) sw \tmp, THREAD_FCR31(\thread) + .set pop .endm .macro fpu_save_16odd thread .set push .set mips64r2 + SET_HARDFLOAT sdc1 $f1, THREAD_FPR1_LS64(\thread) sdc1 $f3, THREAD_FPR3_LS64(\thread) sdc1 $f5, THREAD_FPR5_LS64(\thread) @@ -109,6 +114,8 @@ .endm .macro fpu_restore_16even thread tmp=t0 + .set push + SET_HARDFLOAT lw \tmp, THREAD_FCR31(\thread) ldc1 $f0, THREAD_FPR0_LS64(\thread) ldc1 $f2, THREAD_FPR2_LS64(\thread) @@ -132,6 +139,7 @@ .macro fpu_restore_16odd thread .set push .set mips64r2 + SET_HARDFLOAT ldc1 $f1, THREAD_FPR1_LS64(\thread) ldc1 $f3, THREAD_FPR3_LS64(\thread) ldc1 $f5, THREAD_FPR5_LS64(\thread) @@ -201,6 +209,22 @@ .endm #ifdef TOOLCHAIN_SUPPORTS_MSA + .macro _cfcmsa rd, cs + .set push + .set mips32r2 + .set msa + cfcmsa \rd, $\cs + .set pop + .endm + + .macro _ctcmsa cd, rs + .set push + .set mips32r2 + .set msa + ctcmsa $\cd, \rs + .set pop + .endm + .macro ld_d wd, off, base .set push .set mips32r2 @@ -217,35 +241,35 @@ .set pop .endm - .macro copy_u_w rd, ws, n + .macro copy_u_w ws, n .set push .set mips32r2 .set msa - copy_u.w \rd, $w\ws[\n] + copy_u.w $1, $w\ws[\n] .set pop .endm - .macro copy_u_d rd, ws, n + .macro copy_u_d ws, n .set push .set mips64r2 .set msa - copy_u.d \rd, $w\ws[\n] + copy_u.d $1, $w\ws[\n] .set pop .endm - .macro insert_w wd, n, rs + .macro insert_w wd, n .set push .set mips32r2 .set msa - insert.w $w\wd[\n], \rs + insert.w $w\wd[\n], $1 .set pop .endm - .macro insert_d wd, n, rs + .macro insert_d wd, n .set push .set mips64r2 .set msa - insert.d $w\wd[\n], \rs + insert.d $w\wd[\n], $1 .set pop .endm #else @@ -273,18 +297,20 @@ /* * Temporary until all toolchains in use include MSA support. */ - .macro cfcmsa rd, cs + .macro _cfcmsa rd, cs .set push .set noat + SET_HARDFLOAT .insn .word CFC_MSA_INSN | (\cs << 11) move \rd, $1 .set pop .endm - .macro ctcmsa cd, rs + .macro _ctcmsa cd, rs .set push .set noat + SET_HARDFLOAT move $1, \rs .word CTC_MSA_INSN | (\cd << 6) .set pop @@ -293,6 +319,7 @@ .macro ld_d wd, off, base .set push .set noat + SET_HARDFLOAT addu $1, \base, \off .word LDD_MSA_INSN | (\wd << 6) .set pop @@ -301,45 +328,42 @@ .macro st_d wd, off, base .set push .set noat + SET_HARDFLOAT addu $1, \base, \off .word STD_MSA_INSN | (\wd << 6) .set pop .endm - .macro copy_u_w rd, ws, n + .macro copy_u_w ws, n .set push .set noat + SET_HARDFLOAT .insn .word COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11) - /* move triggers an assembler bug... */ - or \rd, $1, zero .set pop .endm - .macro copy_u_d rd, ws, n + .macro copy_u_d ws, n .set push .set noat + SET_HARDFLOAT .insn .word COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11) - /* move triggers an assembler bug... */ - or \rd, $1, zero .set pop .endm - .macro insert_w wd, n, rs + .macro insert_w wd, n .set push .set noat - /* move triggers an assembler bug... */ - or $1, \rs, zero + SET_HARDFLOAT .word INSERT_W_MSA_INSN | (\n << 16) | (\wd << 6) .set pop .endm - .macro insert_d wd, n, rs + .macro insert_d wd, n .set push .set noat - /* move triggers an assembler bug... */ - or $1, \rs, zero + SET_HARDFLOAT .word INSERT_D_MSA_INSN | (\n << 16) | (\wd << 6) .set pop .endm @@ -378,9 +402,21 @@ st_d 29, THREAD_FPR29, \thread st_d 30, THREAD_FPR30, \thread st_d 31, THREAD_FPR31, \thread + .set push + .set noat + SET_HARDFLOAT + _cfcmsa $1, MSA_CSR + sw $1, THREAD_MSA_CSR(\thread) + .set pop .endm .macro msa_restore_all thread + .set push + .set noat + SET_HARDFLOAT + lw $1, THREAD_MSA_CSR(\thread) + _ctcmsa MSA_CSR, $1 + .set pop ld_d 0, THREAD_FPR0, \thread ld_d 1, THREAD_FPR1, \thread ld_d 2, THREAD_FPR2, \thread @@ -415,4 +451,53 @@ ld_d 31, THREAD_FPR31, \thread .endm + .macro msa_init_upper wd +#ifdef CONFIG_64BIT + insert_d \wd, 1 +#else + insert_w \wd, 2 + insert_w \wd, 3 +#endif + .endm + + .macro msa_init_all_upper + .set push + .set noat + SET_HARDFLOAT + not $1, zero + msa_init_upper 0 + msa_init_upper 1 + msa_init_upper 2 + msa_init_upper 3 + msa_init_upper 4 + msa_init_upper 5 + msa_init_upper 6 + msa_init_upper 7 + msa_init_upper 8 + msa_init_upper 9 + msa_init_upper 10 + msa_init_upper 11 + msa_init_upper 12 + msa_init_upper 13 + msa_init_upper 14 + msa_init_upper 15 + msa_init_upper 16 + msa_init_upper 17 + msa_init_upper 18 + msa_init_upper 19 + msa_init_upper 20 + msa_init_upper 21 + msa_init_upper 22 + msa_init_upper 23 + msa_init_upper 24 + msa_init_upper 25 + msa_init_upper 26 + msa_init_upper 27 + msa_init_upper 28 + msa_init_upper 29 + msa_init_upper 30 + msa_init_upper 31 + .set pop + .endm + #endif /* _ASM_ASMMACRO_H */ diff --git a/arch/mips/include/asm/fpregdef.h b/arch/mips/include/asm/fpregdef.h index 429481f9028d..f184ba088532 100644 --- a/arch/mips/include/asm/fpregdef.h +++ b/arch/mips/include/asm/fpregdef.h @@ -14,6 +14,20 @@ #include +/* + * starting with binutils 2.24.51.20140729, MIPS binutils warn about mixing + * hardfloat and softfloat object files. The kernel build uses soft-float by + * default, so we also need to pass -msoft-float along to GAS if it supports it. + * But this in turn causes assembler errors in files which access hardfloat + * registers. We detect if GAS supports "-msoft-float" in the Makefile and + * explicitly put ".set hardfloat" where floating point registers are touched. + */ +#ifdef GAS_HAS_SET_HARDFLOAT +#define SET_HARDFLOAT .set hardfloat +#else +#define SET_HARDFLOAT +#endif + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index a939574f8293..9256467b2a6c 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef CONFIG_MIPS_MT_FPAFF #include @@ -141,13 +142,21 @@ static inline int own_fpu(int restore) static inline void lose_fpu(int save) { preempt_disable(); - if (is_fpu_owner()) { + if (is_msa_enabled()) { + if (save) { + save_msa(current); + current->thread.fpu.fcr31 = + read_32bit_cp1_register(CP1_STATUS); + } + disable_msa(); + clear_thread_flag(TIF_USEDMSA); + } else if (is_fpu_owner()) { if (save) _save_fp(current); - KSTK_STATUS(current) &= ~ST0_CU1; - clear_thread_flag(TIF_USEDFPU); __disable_fpu(); } + KSTK_STATUS(current) &= ~ST0_CU1; + clear_thread_flag(TIF_USEDFPU); preempt_enable(); } diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 6ad0208b50fb..e2377c7765a6 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -1270,7 +1270,7 @@ do { \ /* * Macros to access the floating point coprocessor control registers */ -#define read_32bit_cp1_register(source) \ +#define _read_32bit_cp1_register(source, gas_hardfloat) \ ({ \ int __res; \ \ @@ -1280,12 +1280,21 @@ do { \ " # gas fails to assemble cfc1 for some archs, \n" \ " # like Octeon. \n" \ " .set mips1 \n" \ + " "STR(gas_hardfloat)" \n" \ " cfc1 %0,"STR(source)" \n" \ " .set pop \n" \ : "=r" (__res)); \ __res; \ }) +#ifdef GAS_HAS_SET_HARDFLOAT +#define read_32bit_cp1_register(source) \ + _read_32bit_cp1_register(source, .set hardfloat) +#else +#define read_32bit_cp1_register(source) \ + _read_32bit_cp1_register(source, ) +#endif + #ifdef HAVE_AS_DSP #define rddsp(mask) \ ({ \ diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h index d2227285383f..af5638b12c75 100644 --- a/arch/mips/include/asm/msa.h +++ b/arch/mips/include/asm/msa.h @@ -12,8 +12,11 @@ #include +#ifndef __ASSEMBLY__ + extern void _save_msa(struct task_struct *); extern void _restore_msa(struct task_struct *); +extern void _init_msa_upper(void); static inline void enable_msa(void) { @@ -133,15 +136,6 @@ static inline void write_msa_##name(unsigned int val) \ #endif /* !TOOLCHAIN_SUPPORTS_MSA */ -#define MSA_IR 0 -#define MSA_CSR 1 -#define MSA_ACCESS 2 -#define MSA_SAVE 3 -#define MSA_MODIFY 4 -#define MSA_REQUEST 5 -#define MSA_MAP 6 -#define MSA_UNMAP 7 - __BUILD_MSA_CTL_REG(ir, 0) __BUILD_MSA_CTL_REG(csr, 1) __BUILD_MSA_CTL_REG(access, 2) @@ -151,6 +145,17 @@ __BUILD_MSA_CTL_REG(request, 5) __BUILD_MSA_CTL_REG(map, 6) __BUILD_MSA_CTL_REG(unmap, 7) +#endif /* !__ASSEMBLY__ */ + +#define MSA_IR 0 +#define MSA_CSR 1 +#define MSA_ACCESS 2 +#define MSA_SAVE 3 +#define MSA_MODIFY 4 +#define MSA_REQUEST 5 +#define MSA_MAP 6 +#define MSA_UNMAP 7 + /* MSA Implementation Register (MSAIR) */ #define MSA_IR_REVB 0 #define MSA_IR_REVF (_ULCAST_(0xff) << MSA_IR_REVB) diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index cb4a3ee31983..a752de97840f 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -302,7 +302,8 @@ do { \ __get_kernel_common((x), size, __gu_ptr); \ else \ __get_user_common((x), size, __gu_ptr); \ - } \ + } else \ + (x) = 0; \ \ __gu_err; \ }) @@ -317,6 +318,7 @@ do { \ " .insn \n" \ " .section .fixup,\"ax\" \n" \ "3: li %0, %4 \n" \ + " move %1, $0 \n" \ " j 2b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ @@ -631,6 +633,7 @@ do { \ " .insn \n" \ " .section .fixup,\"ax\" \n" \ "3: li %0, %4 \n" \ + " move %1, $0 \n" \ " j 2b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index 4bb5107511e2..b1d84bd4efb3 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -234,6 +234,7 @@ void output_thread_fpu_defines(void) thread.fpu.fpr[31].val64[FPR_IDX(64, 0)]); OFFSET(THREAD_FCR31, task_struct, thread.fpu.fcr31); + OFFSET(THREAD_MSA_CSR, task_struct, thread.fpu.msacsr); BLANK(); } diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index 7b2df224f041..4d7d99d601cc 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -144,7 +144,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case mm_bc1t_op: preempt_disable(); if (is_fpu_owner()) - asm volatile("cfc1\t%0,$31" : "=r" (fcr31)); + fcr31 = read_32bit_cp1_register(CP1_STATUS); else fcr31 = current->thread.fpu.fcr31; preempt_enable(); @@ -562,11 +562,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case cop1_op: preempt_disable(); if (is_fpu_owner()) - asm volatile( - ".set push\n" - "\t.set mips1\n" - "\tcfc1\t%0,$31\n" - "\t.set pop" : "=r" (fcr31)); + fcr31 = read_32bit_cp1_register(CP1_STATUS); else fcr31 = current->thread.fpu.fcr31; preempt_enable(); diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index ac35e12cb1f3..a5e26dd90592 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -358,6 +358,7 @@ NESTED(nmi_handler, PT_SIZE, sp) .set push /* gas fails to assemble cfc1 for some archs (octeon).*/ \ .set mips1 + SET_HARDFLOAT cfc1 a1, fcr31 li a2, ~(0x3f << 12) and a2, a1 diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index f31063dbdaeb..5ce3b746cedc 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -28,6 +28,8 @@ .set mips1 /* Save floating point context */ LEAF(_save_fp_context) + .set push + SET_HARDFLOAT li v0, 0 # assume success cfc1 t1,fcr31 EX(swc1 $f0,(SC_FPREGS+0)(a0)) @@ -65,6 +67,7 @@ LEAF(_save_fp_context) EX(sw t1,(SC_FPC_CSR)(a0)) cfc1 t0,$0 # implementation/version jr ra + .set pop .set nomacro EX(sw t0,(SC_FPC_EIR)(a0)) .set macro @@ -80,6 +83,8 @@ LEAF(_save_fp_context) * stack frame which might have been changed by the user. */ LEAF(_restore_fp_context) + .set push + SET_HARDFLOAT li v0, 0 # assume success EX(lw t0,(SC_FPC_CSR)(a0)) EX(lwc1 $f0,(SC_FPREGS+0)(a0)) @@ -116,6 +121,7 @@ LEAF(_restore_fp_context) EX(lwc1 $f31,(SC_FPREGS+248)(a0)) jr ra ctc1 t0,fcr31 + .set pop END(_restore_fp_context) .set reorder diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S index 20b7b040e76f..435ea652f5fa 100644 --- a/arch/mips/kernel/r2300_switch.S +++ b/arch/mips/kernel/r2300_switch.S @@ -120,6 +120,9 @@ LEAF(_restore_fp) #define FPU_DEFAULT 0x00000000 + .set push + SET_HARDFLOAT + LEAF(_init_fpu) mfc0 t0, CP0_STATUS li t1, ST0_CU1 @@ -165,3 +168,5 @@ LEAF(_init_fpu) mtc1 t0, $f31 jr ra END(_init_fpu) + + .set pop diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S index 8352523568e6..02872df35ff2 100644 --- a/arch/mips/kernel/r4k_fpu.S +++ b/arch/mips/kernel/r4k_fpu.S @@ -19,8 +19,12 @@ #include #include +/* preprocessor replaces the fp in ".set fp=64" with $30 otherwise */ +#undef fp + .macro EX insn, reg, src .set push + SET_HARDFLOAT .set nomacro .ex\@: \insn \reg, \src .set pop @@ -30,15 +34,19 @@ .endm .set noreorder - .set arch=r4000 LEAF(_save_fp_context) + .set push + SET_HARDFLOAT cfc1 t1, fcr31 + .set pop #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2) .set push + SET_HARDFLOAT #ifdef CONFIG_CPU_MIPS32_R2 - .set mips64r2 + .set mips32r2 + .set fp=64 mfc0 t0, CP0_STATUS sll t0, t0, 5 bgez t0, 1f # skip storing odd if FR=0 @@ -64,6 +72,8 @@ LEAF(_save_fp_context) 1: .set pop #endif + .set push + SET_HARDFLOAT /* Store the 16 even double precision registers */ EX sdc1 $f0, SC_FPREGS+0(a0) EX sdc1 $f2, SC_FPREGS+16(a0) @@ -84,11 +94,15 @@ LEAF(_save_fp_context) EX sw t1, SC_FPC_CSR(a0) jr ra li v0, 0 # success + .set pop END(_save_fp_context) #ifdef CONFIG_MIPS32_COMPAT /* Save 32-bit process floating point context */ LEAF(_save_fp_context32) + .set push + .set arch=r4000 + SET_HARDFLOAT cfc1 t1, fcr31 mfc0 t0, CP0_STATUS @@ -134,6 +148,7 @@ LEAF(_save_fp_context32) EX sw t1, SC32_FPC_CSR(a0) cfc1 t0, $0 # implementation/version EX sw t0, SC32_FPC_EIR(a0) + .set pop jr ra li v0, 0 # success @@ -150,8 +165,10 @@ LEAF(_restore_fp_context) #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2) .set push + SET_HARDFLOAT #ifdef CONFIG_CPU_MIPS32_R2 - .set mips64r2 + .set mips32r2 + .set fp=64 mfc0 t0, CP0_STATUS sll t0, t0, 5 bgez t0, 1f # skip loading odd if FR=0 @@ -175,6 +192,8 @@ LEAF(_restore_fp_context) EX ldc1 $f31, SC_FPREGS+248(a0) 1: .set pop #endif + .set push + SET_HARDFLOAT EX ldc1 $f0, SC_FPREGS+0(a0) EX ldc1 $f2, SC_FPREGS+16(a0) EX ldc1 $f4, SC_FPREGS+32(a0) @@ -192,6 +211,7 @@ LEAF(_restore_fp_context) EX ldc1 $f28, SC_FPREGS+224(a0) EX ldc1 $f30, SC_FPREGS+240(a0) ctc1 t1, fcr31 + .set pop jr ra li v0, 0 # success END(_restore_fp_context) @@ -199,6 +219,8 @@ LEAF(_restore_fp_context) #ifdef CONFIG_MIPS32_COMPAT LEAF(_restore_fp_context32) /* Restore an o32 sigcontext. */ + .set push + SET_HARDFLOAT EX lw t1, SC32_FPC_CSR(a0) mfc0 t0, CP0_STATUS @@ -242,6 +264,7 @@ LEAF(_restore_fp_context32) ctc1 t1, fcr31 jr ra li v0, 0 # success + .set pop END(_restore_fp_context32) #endif diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S index 81ca3f70fe29..64591e671878 100644 --- a/arch/mips/kernel/r4k_switch.S +++ b/arch/mips/kernel/r4k_switch.S @@ -22,6 +22,9 @@ #include +/* preprocessor replaces the fp in ".set fp=64" with $30 otherwise */ +#undef fp + /* * Offset to the current process status flags, the first 32 bytes of the * stack are not used. @@ -64,8 +67,14 @@ /* Check whether we're saving scalar or vector context. */ bgtz a3, 1f - /* Save 128b MSA vector context. */ + /* Save 128b MSA vector context + scalar FP control & status. */ + .set push + SET_HARDFLOAT + cfc1 t1, fcr31 msa_save_all a0 + .set pop /* SET_HARDFLOAT */ + + sw t1, THREAD_FCR31(a0) b 2f 1: /* Save 32b/64b scalar FP context. */ @@ -142,6 +151,11 @@ LEAF(_restore_msa) jr ra END(_restore_msa) +LEAF(_init_msa_upper) + msa_init_all_upper + jr ra + END(_init_msa_upper) + #endif /* @@ -154,6 +168,9 @@ LEAF(_restore_msa) #define FPU_DEFAULT 0x00000000 + .set push + SET_HARDFLOAT + LEAF(_init_fpu) mfc0 t0, CP0_STATUS li t1, ST0_CU1 @@ -225,7 +242,8 @@ LEAF(_init_fpu) #ifdef CONFIG_CPU_MIPS32_R2 .set push - .set mips64r2 + .set mips32r2 + .set fp=64 sll t0, t0, 5 # is Status.FR set? bgez t0, 1f # no: skip setting upper 32b @@ -284,3 +302,5 @@ LEAF(_init_fpu) #endif jr ra END(_init_fpu) + + .set pop /* SET_HARDFLOAT */ diff --git a/arch/mips/kernel/r6000_fpu.S b/arch/mips/kernel/r6000_fpu.S index da0fbe46d83b..47077380c15c 100644 --- a/arch/mips/kernel/r6000_fpu.S +++ b/arch/mips/kernel/r6000_fpu.S @@ -18,6 +18,9 @@ .set noreorder .set mips2 + .set push + SET_HARDFLOAT + /* Save floating point context */ LEAF(_save_fp_context) mfc0 t0,CP0_STATUS @@ -85,3 +88,5 @@ 1: jr ra nop END(_restore_fp_context) + + .set pop /* SET_HARDFLOAT */ diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 2868b3927cb4..e6e055d19f6c 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1094,13 +1094,15 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action, static int enable_restore_fp_context(int msa) { - int err, was_fpu_owner; + int err, was_fpu_owner, prior_msa; if (!used_math()) { /* First time FP context user. */ err = init_fpu(); - if (msa && !err) + if (msa && !err) { enable_msa(); + _init_msa_upper(); + } if (!err) set_used_math(); return err; @@ -1152,13 +1154,38 @@ static int enable_restore_fp_context(int msa) /* * If this is the first time that the task is using MSA and it has * previously used scalar FP in this time slice then we already nave - * FP context which we shouldn't clobber. + * FP context which we shouldn't clobber. We do however need to clear + * the upper 64b of each vector register so that this task has no + * opportunity to see data left behind by another. */ - if (!test_and_set_thread_flag(TIF_MSA_CTX_LIVE) && was_fpu_owner) + prior_msa = test_and_set_thread_flag(TIF_MSA_CTX_LIVE); + if (!prior_msa && was_fpu_owner) { + _init_msa_upper(); return 0; + } + + if (!prior_msa) { + /* + * Restore the least significant 64b of each vector register + * from the existing scalar FP context. + */ + _restore_fp(current); - /* We need to restore the vector context. */ - restore_msa(current); + /* + * The task has not formerly used MSA, so clear the upper 64b + * of each vector register such that it cannot see data left + * behind by another task. + */ + _init_msa_upper(); + } else { + /* We need to restore the vector context. */ + restore_msa(current); + + /* Restore the scalar FP control & status register */ + if (!was_fpu_owner) + write_32bit_cp1_register(CP1_STATUS, + current->thread.fpu.fcr31); + } return 0; } diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index bc5ab0dbdf91..22a2e15bd91b 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -586,11 +586,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, if (insn.i_format.rs == bc_op) { preempt_disable(); if (is_fpu_owner()) - asm volatile( - ".set push\n" - "\t.set mips1\n" - "\tcfc1\t%0,$31\n" - "\t.set pop" : "=r" (fcr31)); + fcr31 = read_32bit_cp1_register(CP1_STATUS); else fcr31 = current->thread.fpu.fcr31; preempt_enable(); diff --git a/arch/mips/ralink/prom.c b/arch/mips/ralink/prom.c index 9c64f029d047..87312dfcee38 100644 --- a/arch/mips/ralink/prom.c +++ b/arch/mips/ralink/prom.c @@ -24,8 +24,10 @@ const char *get_system_type(void) return soc_info.sys_type; } -static __init void prom_init_cmdline(int argc, char **argv) +static __init void prom_init_cmdline(void) { + int argc; + char **argv; int i; pr_debug("prom: fw_arg0=%08x fw_arg1=%08x fw_arg2=%08x fw_arg3=%08x\n", @@ -54,14 +56,11 @@ static __init void prom_init_cmdline(int argc, char **argv) void __init prom_init(void) { - int argc; - char **argv; - prom_soc_init(&soc_info); pr_info("SoC Type: %s\n", get_system_type()); - prom_init_cmdline(argc, argv); + prom_init_cmdline(); } void __init prom_free_prom_memory(void) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 17483a492f18..0c59df3664d5 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -165,6 +165,7 @@ struct kimage_arch { typedef void crash_vmclear_fn(void); extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; +extern void kdump_nmi_shootdown_cpus(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 8cd27e08e23c..63baf16934d0 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -69,6 +69,7 @@ struct smp_ops { void (*smp_cpus_done)(unsigned max_cpus); void (*stop_other_cpus)(int wait); + void (*crash_stop_other_cpus)(void); void (*smp_send_reschedule)(int cpu); int (*cpu_up)(unsigned cpu, struct task_struct *tidle); diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 507de8066594..19f10dac8fe6 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -82,7 +82,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) disable_local_APIC(); } -static void kdump_nmi_shootdown_cpus(void) +void kdump_nmi_shootdown_cpus(void) { in_crash_kexec = 1; nmi_shootdown_cpus(kdump_nmi_callback); @@ -90,8 +90,24 @@ static void kdump_nmi_shootdown_cpus(void) disable_local_APIC(); } +/* Override the weak function in kernel/panic.c */ +void crash_smp_send_stop(void) +{ + static int cpus_stopped; + + if (cpus_stopped) + return; + + if (smp_ops.crash_stop_other_cpus) + smp_ops.crash_stop_other_cpus(); + else + smp_send_stop(); + + cpus_stopped = 1; +} + #else -static void kdump_nmi_shootdown_cpus(void) +void crash_smp_send_stop(void) { /* There are no cpus to shootdown */ } @@ -110,7 +126,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) /* The kernel is broken so disable interrupts */ local_irq_disable(); - kdump_nmi_shootdown_cpus(); + crash_smp_send_stop(); /* * VMCLEAR VMCSs loaded on this cpu if needed. diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index be8e1bde07aa..00e67d05cbd0 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -31,6 +31,8 @@ #include #include #include +#include + /* * Some notes on x86 processor bugs affecting SMP operation: * @@ -347,6 +349,9 @@ struct smp_ops smp_ops = { .smp_cpus_done = native_smp_cpus_done, .stop_other_cpus = native_stop_other_cpus, +#if defined(CONFIG_KEXEC_CORE) + .crash_stop_other_cpus = kdump_nmi_shootdown_cpus, +#endif .smp_send_reschedule = native_smp_send_reschedule, .cpu_up = native_cpu_up, diff --git a/crypto/Makefile b/crypto/Makefile index 38e64231dcd3..52c3eb6a997f 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o +CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o obj-$(CONFIG_CRYPTO_ECB) += ecb.o @@ -67,6 +68,7 @@ obj-$(CONFIG_CRYPTO_BLOWFISH_COMMON) += blowfish_common.o obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o +CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index 4217f29a85e0..816087b68ec4 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -1175,7 +1175,7 @@ static int rx_pkt(struct atm_dev *dev) if (!(skb = atm_alloc_charge(vcc, len, GFP_ATOMIC))) { if (vcc->vci < 32) printk("Drop control packets\n"); - goto out_free_desc; + goto out_free_desc; } skb_put(skb,len); // pwang_test diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 197128ed8225..ee1af4565828 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1119,7 +1119,7 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct return; /* report the usage code as scancode if the key status has changed */ - if (usage->type == EV_KEY && !!test_bit(usage->code, input->key) != value) + if (usage->type == EV_KEY && (!!test_bit(usage->code, input->key)) != value) input_event(input, EV_MSC, MSC_SCAN, usage->hid); input_event(input, usage->type, usage->code, value); diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c index 97823c68600a..bec1b97a56df 100644 --- a/drivers/mmc/host/sunxi-mmc.c +++ b/drivers/mmc/host/sunxi-mmc.c @@ -294,7 +294,7 @@ static void sunxi_mmc_init_idma_des(struct sunxi_mmc_host *host, struct mmc_data *data) { struct sunxi_idma_des *pdes = (struct sunxi_idma_des *)host->sg_cpu; - struct sunxi_idma_des *pdes_pa = (struct sunxi_idma_des *)host->sg_dma; + dma_addr_t next_desc = host->sg_dma; int i, max_len = (1 << host->idma_des_size_bits); for (i = 0; i < data->sg_len; i++) { @@ -306,8 +306,9 @@ static void sunxi_mmc_init_idma_des(struct sunxi_mmc_host *host, else pdes[i].buf_size = data->sg[i].length; + next_desc += sizeof(struct sunxi_idma_des); pdes[i].buf_addr_ptr1 = sg_dma_address(&data->sg[i]); - pdes[i].buf_addr_ptr2 = (u32)&pdes_pa[i + 1]; + pdes[i].buf_addr_ptr2 = (u32)next_desc; } pdes[0].config |= SDXC_IDMAC_DES0_FD; diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index 2a2a54db86ff..91f29f2975c1 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -1231,8 +1231,8 @@ static int s3c24xx_serial_init_port(struct s3c24xx_uart_port *ourport, wr_regl(port, S3C64XX_UINTSP, 0xf); } - dbg("port: map=%08x, mem=%p, irq=%d (%d,%d), clock=%u\n", - port->mapbase, port->membase, port->irq, + dbg("port: map=%pa, mem=%p, irq=%d (%d,%d), clock=%u\n", + &port->mapbase, port->membase, port->irq, ourport->rx_irq, ourport->tx_irq, port->uartclk); /* reset the fifos (and setup the uart) */ diff --git a/fs/aio.c b/fs/aio.c index 7aaa4164bba5..3e6d6974434c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -226,6 +226,7 @@ static int __init aio_setup(void) .name = "aio", .mount = aio_mount, .kill_sb = kill_anon_super, + .fs_flags = FS_NOEXEC, }; aio_mnt = kern_mount(&aio_fs); if (IS_ERR(aio_mnt)) diff --git a/fs/compat.c b/fs/compat.c index 66d3d3c6b4b2..6205c247a6e3 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -797,8 +797,9 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, struct filename *dir; int retval; - retval = copy_mount_string(type, &kernel_type); - if (retval < 0) + kernel_type = copy_mount_string(type); + retval = PTR_ERR(kernel_type); + if (IS_ERR(kernel_type)) goto out; dir = getname(dir_name); @@ -806,8 +807,9 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, if (IS_ERR(dir)) goto out1; - retval = copy_mount_string(dev_name, &kernel_dev); - if (retval < 0) + kernel_dev = copy_mount_string(dev_name); + retval = PTR_ERR(kernel_dev); + if (IS_ERR(kernel_dev)) goto out2; retval = copy_mount_options(data, &data_page); diff --git a/fs/exec.c b/fs/exec.c index 5b653a126b20..8cb7fc4ab789 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -98,6 +98,12 @@ static inline void put_binfmt(struct linux_binfmt * fmt) module_put(fmt->module); } +bool path_noexec(const struct path *path) +{ + return (path->mnt->mnt_flags & MNT_NOEXEC) || + (path->mnt->mnt_sb->s_type->fs_flags & FS_NOEXEC); +} + #ifdef CONFIG_USELIB /* * Note that a shared library must be both readable and executable due to @@ -132,7 +138,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) goto exit; error = -EACCES; - if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) + if (path_noexec(&file->f_path)) goto exit; fsnotify_open(file); @@ -773,7 +779,7 @@ static struct file *do_open_exec(struct filename *name) if (!S_ISREG(file_inode(file)->i_mode)) goto exit; - if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) + if (path_noexec(&file->f_path)) goto exit; fsnotify_open(file); diff --git a/fs/internal.h b/fs/internal.h index 465742407466..3ce29c92cc44 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -51,7 +51,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, * namespace.c */ extern int copy_mount_options(const void __user *, unsigned long *); -extern int copy_mount_string(const void __user *, char **); +extern char *copy_mount_string(const void __user *); extern struct vfsmount *lookup_mnt(struct path *); extern int finish_automount(struct vfsmount *, struct path *); diff --git a/fs/namespace.c b/fs/namespace.c index c2eb7ec1bb08..c1c0c8179498 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2482,21 +2482,9 @@ int copy_mount_options(const void __user * data, unsigned long *where) return 0; } -int copy_mount_string(const void __user *data, char **where) +char *copy_mount_string(const void __user *data) { - char *tmp; - - if (!data) { - *where = NULL; - return 0; - } - - tmp = strndup_user(data, PAGE_SIZE); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - - *where = tmp; - return 0; + return data ? strndup_user(data, PAGE_SIZE) : NULL; } /* @@ -2766,8 +2754,9 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, char *kernel_dev; unsigned long data_page; - ret = copy_mount_string(type, &kernel_type); - if (ret < 0) + kernel_type = copy_mount_string(type); + ret = PTR_ERR(kernel_type); + if (IS_ERR(kernel_type)) goto out_type; kernel_dir = getname(dir_name); @@ -2776,8 +2765,9 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, goto out_dir; } - ret = copy_mount_string(dev_name, &kernel_dev); - if (ret < 0) + kernel_dev = copy_mount_string(dev_name); + ret = PTR_ERR(kernel_dev); + if (IS_ERR(kernel_dev)) goto out_dev; ret = copy_mount_options(data, &data_page); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1522ec81c26f..e07fbf97a93b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3000,16 +3000,13 @@ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info, bool auth_probe) { - int status; + int status = 0; - switch (auth_probe) { - case false: + if (!auth_probe) status = nfs4_lookup_root(server, fhandle, info); - if (status != -NFS4ERR_WRONGSEC) - break; - default: + + if (auth_probe || status == NFS4ERR_WRONGSEC) status = nfs4_do_find_root_sec(server, fhandle, info); - } if (status == 0) status = nfs4_server_capabilities(server, fhandle); diff --git a/fs/open.c b/fs/open.c index 79a52f146182..fc44237e4a2e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -359,7 +359,7 @@ retry: * with the "noexec" flag. */ res = -EACCES; - if (path.mnt->mnt_flags & MNT_NOEXEC) + if (path_noexec(&path)) goto out_path_release; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 98992cb44c35..d81d0baeb772 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -161,7 +161,7 @@ static struct file_system_type proc_fs_type = { .name = "proc", .mount = proc_mount, .kill_sb = proc_kill_sb, - .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT | FS_NOEXEC, }; void __init proc_root_init(void) diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 1c6ac6fcee9f..0e6c02f27140 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -40,6 +40,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, SYSFS_MAGIC, &new_sb, ns); if (IS_ERR(root) || !new_sb) kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); + return root; } @@ -55,7 +56,7 @@ static struct file_system_type sysfs_fs_type = { .name = "sysfs", .mount = sysfs_mount, .kill_sb = sysfs_kill_sb, - .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT | FS_NOEXEC, }; int __init sysfs_init(void) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d19d996482ab..2ec13bc4deb3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -620,7 +620,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -#define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0) +#define rq_data_dir(rq) ((int)((rq)->cmd_flags & 1)) /* * Driver can handle struct request, if it either has an old style diff --git a/include/linux/fs.h b/include/linux/fs.h index 3b69a82bbdd9..da912e6cd2ea 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1767,6 +1767,7 @@ struct file_system_type { #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ #define FS_USERNS_VISIBLE 32 /* FS must already be visible */ +#define FS_NOEXEC 64 /* Ignore executables on this fs */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); @@ -2782,4 +2783,6 @@ static inline bool dir_relax(struct inode *inode) return !IS_DEADDIR(inode); } +extern bool path_noexec(const struct path *path); + #endif /* _LINUX_FS_H */ diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 1a85940f8ab7..8a8135c4e99a 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -106,8 +106,10 @@ struct nlmsgerr { #define NETLINK_PKTINFO 3 #define NETLINK_BROADCAST_ERROR 4 #define NETLINK_NO_ENOBUFS 5 +#ifndef __KERNEL__ #define NETLINK_RX_RING 6 #define NETLINK_TX_RING 7 +#endif struct nl_pktinfo { __u32 group; @@ -130,6 +132,7 @@ struct nl_mmap_hdr { __u32 nm_gid; }; +#ifndef __KERNEL__ enum nl_mmap_status { NL_MMAP_STATUS_UNUSED, NL_MMAP_STATUS_RESERVED, @@ -141,6 +144,7 @@ enum nl_mmap_status { #define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO #define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) #define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) +#endif #define NET_MAJOR 36 /* Major 36 is reserved for networking */ diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h index f2159d30d1f5..d79399394b46 100644 --- a/include/uapi/linux/netlink_diag.h +++ b/include/uapi/linux/netlink_diag.h @@ -48,6 +48,8 @@ enum { #define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */ #define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */ +#ifndef __KERNEL__ #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ +#endif #endif diff --git a/kernel/panic.c b/kernel/panic.c index 51266521e173..4de988c2aaec 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -60,6 +60,32 @@ void __weak panic_smp_self_stop(void) cpu_relax(); } +/* + * Stop other CPUs in panic. Architecture dependent code may override this + * with more suitable version. For example, if the architecture supports + * crash dump, it should save registers of each stopped CPU and disable + * per-CPU features such as virtualization extensions. + */ +void __weak crash_smp_send_stop(void) +{ + static int cpus_stopped; + + /* + * This function can be called twice in panic path, but obviously + * we execute this only once. + */ + if (cpus_stopped) + return; + + /* + * Note smp_send_stop is the usual smp shutdown function, which + * unfortunately means it may not be hardened to work in a panic + * situation. + */ + smp_send_stop(); + cpus_stopped = 1; +} + /** * panic - halt the system * @fmt: The text string to print @@ -117,15 +143,23 @@ void panic(const char *fmt, ...) * If we want to run this after calling panic_notifiers, pass * the "crash_kexec_post_notifiers" option to the kernel. */ - if (!crash_kexec_post_notifiers) + if (!crash_kexec_post_notifiers) { crash_kexec(NULL); - /* - * Note smp_send_stop is the usual smp shutdown function, which - * unfortunately means it may not be hardened to work in a panic - * situation. - */ - smp_send_stop(); + /* + * Note smp_send_stop is the usual smp shutdown function, which + * unfortunately means it may not be hardened to work in a + * panic situation. + */ + smp_send_stop(); + } else { + /* + * If we want to do crash dump after notifier calls and + * kmsg_dump, we will need architecture dependent extra + * works in addition to stopping other CPUs. + */ + crash_smp_send_stop(); + } /* * Run any panic handlers, including those that might need to diff --git a/kernel/sys.c b/kernel/sys.c index 66a751ebf9d9..6fe6c5986c59 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1646,8 +1646,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) * overall picture. */ err = -EACCES; - if (!S_ISREG(inode->i_mode) || - exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) + if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path)) goto exit; err = inode_permission(inode, MAY_EXEC); diff --git a/mm/mmap.c b/mm/mmap.c index 2859a1cb378a..b49641901093 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1250,7 +1250,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, * mounted, in which case we dont add PROT_EXEC.) */ if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) - if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC))) + if (!(file && path_noexec(&file->f_path))) prot |= PROT_EXEC; if (!len) @@ -1322,7 +1322,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, case MAP_PRIVATE: if (!(file->f_mode & FMODE_READ)) return -EACCES; - if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { + if (path_noexec(&file->f_path)) { if (vm_flags & VM_EXEC) return -EPERM; vm_flags &= ~VM_MAYEXEC; diff --git a/mm/nommu.c b/mm/nommu.c index 2991b074aeae..53a59f41e2e5 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1043,7 +1043,7 @@ static int validate_mmap_request(struct file *file, /* handle executable mappings and implied executable * mappings */ - if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { + if (path_noexec(&file->f_path)) { if (prot & PROT_EXEC) return -EPERM; } else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig index 2c5e95e9bfbd..5d6e8c05b3d4 100644 --- a/net/netlink/Kconfig +++ b/net/netlink/Kconfig @@ -2,15 +2,6 @@ # Netlink Sockets # -config NETLINK_MMAP - bool "NETLINK: mmaped IO" - ---help--- - This option enables support for memory mapped netlink IO. This - reduces overhead by avoiding copying data between kernel- and - userspace. - - If unsure, say N. - config NETLINK_DIAG tristate "NETLINK: socket monitoring interface" default n diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 381b004c857a..3bf0c16023ec 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -218,7 +218,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, dev_hold(dev); - if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) + if (is_vmalloc_addr(skb->head)) nskb = netlink_to_full_skb(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); @@ -292,599 +292,8 @@ static void netlink_rcv_wake(struct sock *sk) wake_up_interruptible(&nlk->wait); } -#ifdef CONFIG_NETLINK_MMAP -static bool netlink_rx_is_mmaped(struct sock *sk) -{ - return nlk_sk(sk)->rx_ring.pg_vec != NULL; -} - -static bool netlink_tx_is_mmaped(struct sock *sk) -{ - return nlk_sk(sk)->tx_ring.pg_vec != NULL; -} - -static __pure struct page *pgvec_to_page(const void *addr) -{ - if (is_vmalloc_addr(addr)) - return vmalloc_to_page(addr); - else - return virt_to_page(addr); -} - -static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len) -{ - unsigned int i; - - for (i = 0; i < len; i++) { - if (pg_vec[i] != NULL) { - if (is_vmalloc_addr(pg_vec[i])) - vfree(pg_vec[i]); - else - free_pages((unsigned long)pg_vec[i], order); - } - } - kfree(pg_vec); -} - -static void *alloc_one_pg_vec_page(unsigned long order) -{ - void *buffer; - gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | - __GFP_NOWARN | __GFP_NORETRY; - - buffer = (void *)__get_free_pages(gfp_flags, order); - if (buffer != NULL) - return buffer; - - buffer = vzalloc((1 << order) * PAGE_SIZE); - if (buffer != NULL) - return buffer; - - gfp_flags &= ~__GFP_NORETRY; - return (void *)__get_free_pages(gfp_flags, order); -} - -static void **alloc_pg_vec(struct netlink_sock *nlk, - struct nl_mmap_req *req, unsigned int order) -{ - unsigned int block_nr = req->nm_block_nr; - unsigned int i; - void **pg_vec; - - pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); - if (pg_vec == NULL) - return NULL; - - for (i = 0; i < block_nr; i++) { - pg_vec[i] = alloc_one_pg_vec_page(order); - if (pg_vec[i] == NULL) - goto err1; - } - - return pg_vec; -err1: - free_pg_vec(pg_vec, order, block_nr); - return NULL; -} - - -static void -__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, - unsigned int order) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct sk_buff_head *queue; - struct netlink_ring *ring; - - queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - - spin_lock_bh(&queue->lock); - - ring->frame_max = req->nm_frame_nr - 1; - ring->head = 0; - ring->frame_size = req->nm_frame_size; - ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; - - swap(ring->pg_vec_len, req->nm_block_nr); - swap(ring->pg_vec_order, order); - swap(ring->pg_vec, pg_vec); - - __skb_queue_purge(queue); - spin_unlock_bh(&queue->lock); - - WARN_ON(atomic_read(&nlk->mapped)); - - if (pg_vec) - free_pg_vec(pg_vec, order, req->nm_block_nr); -} - -static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, - bool tx_ring) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - void **pg_vec = NULL; - unsigned int order = 0; - - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - - if (atomic_read(&nlk->mapped)) - return -EBUSY; - if (atomic_read(&ring->pending)) - return -EBUSY; - - if (req->nm_block_nr) { - if (ring->pg_vec != NULL) - return -EBUSY; - - if ((int)req->nm_block_size <= 0) - return -EINVAL; - if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE)) - return -EINVAL; - if (req->nm_frame_size < NL_MMAP_HDRLEN) - return -EINVAL; - if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT)) - return -EINVAL; - - ring->frames_per_block = req->nm_block_size / - req->nm_frame_size; - if (ring->frames_per_block == 0) - return -EINVAL; - if (ring->frames_per_block * req->nm_block_nr != - req->nm_frame_nr) - return -EINVAL; - - order = get_order(req->nm_block_size); - pg_vec = alloc_pg_vec(nlk, req, order); - if (pg_vec == NULL) - return -ENOMEM; - } else { - if (req->nm_frame_nr) - return -EINVAL; - } - - mutex_lock(&nlk->pg_vec_lock); - if (atomic_read(&nlk->mapped) == 0) { - __netlink_set_ring(sk, req, tx_ring, pg_vec, order); - mutex_unlock(&nlk->pg_vec_lock); - return 0; - } - - mutex_unlock(&nlk->pg_vec_lock); - - if (pg_vec) - free_pg_vec(pg_vec, order, req->nm_block_nr); - - return -EBUSY; -} - -static void netlink_mm_open(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct socket *sock = file->private_data; - struct sock *sk = sock->sk; - - if (sk) - atomic_inc(&nlk_sk(sk)->mapped); -} - -static void netlink_mm_close(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct socket *sock = file->private_data; - struct sock *sk = sock->sk; - - if (sk) - atomic_dec(&nlk_sk(sk)->mapped); -} - -static const struct vm_operations_struct netlink_mmap_ops = { - .open = netlink_mm_open, - .close = netlink_mm_close, -}; - -static int netlink_mmap(struct file *file, struct socket *sock, - struct vm_area_struct *vma) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - unsigned long start, size, expected; - unsigned int i; - int err = -EINVAL; - - if (vma->vm_pgoff) - return -EINVAL; - - mutex_lock(&nlk->pg_vec_lock); - - expected = 0; - for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { - if (ring->pg_vec == NULL) - continue; - expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE; - } - - if (expected == 0) - goto out; - - size = vma->vm_end - vma->vm_start; - if (size != expected) - goto out; - - start = vma->vm_start; - for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { - if (ring->pg_vec == NULL) - continue; - - for (i = 0; i < ring->pg_vec_len; i++) { - struct page *page; - void *kaddr = ring->pg_vec[i]; - unsigned int pg_num; - - for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) { - page = pgvec_to_page(kaddr); - err = vm_insert_page(vma, start, page); - if (err < 0) - goto out; - start += PAGE_SIZE; - kaddr += PAGE_SIZE; - } - } - } - - atomic_inc(&nlk->mapped); - vma->vm_ops = &netlink_mmap_ops; - err = 0; -out: - mutex_unlock(&nlk->pg_vec_lock); - return err; -} - -static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len) -{ -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 - struct page *p_start, *p_end; - - /* First page is flushed through netlink_{get,set}_status */ - p_start = pgvec_to_page(hdr + PAGE_SIZE); - p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1); - while (p_start <= p_end) { - flush_dcache_page(p_start); - p_start++; - } -#endif -} - -static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) -{ - smp_rmb(); - flush_dcache_page(pgvec_to_page(hdr)); - return hdr->nm_status; -} - -static void netlink_set_status(struct nl_mmap_hdr *hdr, - enum nl_mmap_status status) -{ - smp_mb(); - hdr->nm_status = status; - flush_dcache_page(pgvec_to_page(hdr)); -} - -static struct nl_mmap_hdr * -__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) -{ - unsigned int pg_vec_pos, frame_off; - - pg_vec_pos = pos / ring->frames_per_block; - frame_off = pos % ring->frames_per_block; - - return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); -} - -static struct nl_mmap_hdr * -netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, - enum nl_mmap_status status) -{ - struct nl_mmap_hdr *hdr; - - hdr = __netlink_lookup_frame(ring, pos); - if (netlink_get_status(hdr) != status) - return NULL; - - return hdr; -} - -static struct nl_mmap_hdr * -netlink_current_frame(const struct netlink_ring *ring, - enum nl_mmap_status status) -{ - return netlink_lookup_frame(ring, ring->head, status); -} - -static struct nl_mmap_hdr * -netlink_previous_frame(const struct netlink_ring *ring, - enum nl_mmap_status status) -{ - unsigned int prev; - - prev = ring->head ? ring->head - 1 : ring->frame_max; - return netlink_lookup_frame(ring, prev, status); -} - -static void netlink_increment_head(struct netlink_ring *ring) -{ - ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; -} - -static void netlink_forward_ring(struct netlink_ring *ring) -{ - unsigned int head = ring->head, pos = head; - const struct nl_mmap_hdr *hdr; - - do { - hdr = __netlink_lookup_frame(ring, pos); - if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) - break; - if (hdr->nm_status != NL_MMAP_STATUS_SKIP) - break; - netlink_increment_head(ring); - } while (ring->head != head); -} - -static bool netlink_dump_space(struct netlink_sock *nlk) -{ - struct netlink_ring *ring = &nlk->rx_ring; - struct nl_mmap_hdr *hdr; - unsigned int n; - - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) - return false; - - n = ring->head + ring->frame_max / 2; - if (n > ring->frame_max) - n -= ring->frame_max; - - hdr = __netlink_lookup_frame(ring, n); - - return hdr->nm_status == NL_MMAP_STATUS_UNUSED; -} - -static unsigned int netlink_poll(struct file *file, struct socket *sock, - poll_table *wait) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - unsigned int mask; - int err; - - if (nlk->rx_ring.pg_vec != NULL) { - /* Memory mapped sockets don't call recvmsg(), so flow control - * for dumps is performed here. A dump is allowed to continue - * if at least half the ring is unused. - */ - while (nlk->cb_running && netlink_dump_space(nlk)) { - err = netlink_dump(sk); - if (err < 0) { - sk->sk_err = -err; - sk->sk_error_report(sk); - break; - } - } - netlink_rcv_wake(sk); - } - - mask = datagram_poll(file, sock, wait); - - spin_lock_bh(&sk->sk_receive_queue.lock); - if (nlk->rx_ring.pg_vec) { - netlink_forward_ring(&nlk->rx_ring); - if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED)) - mask |= POLLIN | POLLRDNORM; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - - spin_lock_bh(&sk->sk_write_queue.lock); - if (nlk->tx_ring.pg_vec) { - if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) - mask |= POLLOUT | POLLWRNORM; - } - spin_unlock_bh(&sk->sk_write_queue.lock); - - return mask; -} - -static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) -{ - return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); -} - -static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, - struct netlink_ring *ring, - struct nl_mmap_hdr *hdr) -{ - unsigned int size; - void *data; - - size = ring->frame_size - NL_MMAP_HDRLEN; - data = (void *)hdr + NL_MMAP_HDRLEN; - - skb->head = data; - skb->data = data; - skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; - skb->len = 0; - - skb->destructor = netlink_skb_destructor; - NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; - NETLINK_CB(skb).sk = sk; -} - -static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, - u32 dst_portid, u32 dst_group, - struct sock_iocb *siocb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - struct nl_mmap_hdr *hdr; - struct sk_buff *skb; - unsigned int maxlen; - int err = 0, len = 0; - - mutex_lock(&nlk->pg_vec_lock); - - ring = &nlk->tx_ring; - maxlen = ring->frame_size - NL_MMAP_HDRLEN; - - do { - unsigned int nm_len; - - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); - if (hdr == NULL) { - if (!(msg->msg_flags & MSG_DONTWAIT) && - atomic_read(&nlk->tx_ring.pending)) - schedule(); - continue; - } - - nm_len = ACCESS_ONCE(hdr->nm_len); - if (nm_len > maxlen) { - err = -EINVAL; - goto out; - } - - netlink_frame_flush_dcache(hdr, nm_len); - - skb = alloc_skb(nm_len, GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - __skb_put(skb, nm_len); - memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); - - netlink_increment_head(ring); - - NETLINK_CB(skb).portid = nlk->portid; - NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).creds = siocb->scm->creds; - - err = security_netlink_send(sk, skb); - if (err) { - kfree_skb(skb); - goto out; - } - - if (unlikely(dst_group)) { - atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_portid, dst_group, - GFP_KERNEL); - } - err = netlink_unicast(sk, skb, dst_portid, - msg->msg_flags & MSG_DONTWAIT); - if (err < 0) - goto out; - len += err; - - } while (hdr != NULL || - (!(msg->msg_flags & MSG_DONTWAIT) && - atomic_read(&nlk->tx_ring.pending))); - - if (len > 0) - err = len; -out: - mutex_unlock(&nlk->pg_vec_lock); - return err; -} - -static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) -{ - struct nl_mmap_hdr *hdr; - - hdr = netlink_mmap_hdr(skb); - hdr->nm_len = skb->len; - hdr->nm_group = NETLINK_CB(skb).dst_group; - hdr->nm_pid = NETLINK_CB(skb).creds.pid; - hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); - hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_frame_flush_dcache(hdr, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_VALID); - - NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; - kfree_skb(skb); -} - -static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring = &nlk->rx_ring; - struct nl_mmap_hdr *hdr; - - spin_lock_bh(&sk->sk_receive_queue.lock); - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) { - spin_unlock_bh(&sk->sk_receive_queue.lock); - kfree_skb(skb); - netlink_overrun(sk); - return; - } - netlink_increment_head(ring); - __skb_queue_tail(&sk->sk_receive_queue, skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - - hdr->nm_len = skb->len; - hdr->nm_group = NETLINK_CB(skb).dst_group; - hdr->nm_pid = NETLINK_CB(skb).creds.pid; - hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); - hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_set_status(hdr, NL_MMAP_STATUS_COPY); -} - -#else /* CONFIG_NETLINK_MMAP */ -#define netlink_rx_is_mmaped(sk) false -#define netlink_tx_is_mmaped(sk) false -#define netlink_mmap sock_no_mmap -#define netlink_poll datagram_poll -#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0 -#endif /* CONFIG_NETLINK_MMAP */ - static void netlink_skb_destructor(struct sk_buff *skb) { -#ifdef CONFIG_NETLINK_MMAP - struct nl_mmap_hdr *hdr; - struct netlink_ring *ring; - struct sock *sk; - - /* If a packet from the kernel to userspace was freed because of an - * error without being delivered to userspace, the kernel must reset - * the status. In the direction userspace to kernel, the status is - * always reset here after the packet was processed and freed. - */ - if (netlink_skb_is_mmaped(skb)) { - hdr = netlink_mmap_hdr(skb); - sk = NETLINK_CB(skb).sk; - - if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) { - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); - ring = &nlk_sk(sk)->tx_ring; - } else { - if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { - hdr->nm_len = 0; - netlink_set_status(hdr, NL_MMAP_STATUS_VALID); - } - ring = &nlk_sk(sk)->rx_ring; - } - - WARN_ON(atomic_read(&ring->pending) == 0); - atomic_dec(&ring->pending); - sock_put(sk); - - skb->head = NULL; - } -#endif if (is_vmalloc_addr(skb->head)) { if (!skb->cloned || !atomic_dec_return(&(skb_shinfo(skb)->dataref))) @@ -918,18 +327,6 @@ static void netlink_sock_destruct(struct sock *sk) } skb_queue_purge(&sk->sk_receive_queue); -#ifdef CONFIG_NETLINK_MMAP - if (1) { - struct nl_mmap_req req; - - memset(&req, 0, sizeof(req)); - if (nlk->rx_ring.pg_vec) - __netlink_set_ring(sk, &req, false, NULL, 0); - memset(&req, 0, sizeof(req)); - if (nlk->tx_ring.pg_vec) - __netlink_set_ring(sk, &req, true, NULL, 0); - } -#endif /* CONFIG_NETLINK_MMAP */ if (!sock_flag(sk, SOCK_DEAD)) { printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); @@ -1202,9 +599,6 @@ static int __netlink_create(struct net *net, struct socket *sock, mutex_init(nlk->cb_mutex); } init_waitqueue_head(&nlk->wait); -#ifdef CONFIG_NETLINK_MMAP - mutex_init(&nlk->pg_vec_lock); -#endif sk->sk_destruct = netlink_sock_destruct; sk->sk_protocol = protocol; @@ -1708,8 +1102,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, nlk = nlk_sk(sk); if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_CONGESTED, &nlk->state)) && - !netlink_skb_is_mmaped(skb)) { + test_bit(NETLINK_CONGESTED, &nlk->state))) { DECLARE_WAITQUEUE(wait, current); if (!*timeo) { if (!ssk || netlink_is_kernel(ssk)) @@ -1747,14 +1140,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) netlink_deliver_tap(skb); -#ifdef CONFIG_NETLINK_MMAP - if (netlink_skb_is_mmaped(skb)) - netlink_queue_mmaped_skb(sk, skb); - else if (netlink_rx_is_mmaped(sk)) - netlink_ring_set_copied(sk, skb); - else -#endif /* CONFIG_NETLINK_MMAP */ - skb_queue_tail(&sk->sk_receive_queue, skb); + skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); return len; } @@ -1778,9 +1164,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) int delta; WARN_ON(skb->sk != NULL); - if (netlink_skb_is_mmaped(skb)) - return skb; - delta = skb->end - skb->tail; if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) return skb; @@ -1860,71 +1243,6 @@ struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size, unsigned int ldiff, u32 dst_portid, gfp_t gfp_mask) { -#ifdef CONFIG_NETLINK_MMAP - unsigned int maxlen, linear_size; - struct sock *sk = NULL; - struct sk_buff *skb; - struct netlink_ring *ring; - struct nl_mmap_hdr *hdr; - - sk = netlink_getsockbyportid(ssk, dst_portid); - if (IS_ERR(sk)) - goto out; - - ring = &nlk_sk(sk)->rx_ring; - /* fast-path without atomic ops for common case: non-mmaped receiver */ - if (ring->pg_vec == NULL) - goto out_put; - - /* We need to account the full linear size needed as a ring - * slot cannot have non-linear parts. - */ - linear_size = size + ldiff; - if (ring->frame_size - NL_MMAP_HDRLEN < linear_size) - goto out_put; - - skb = alloc_skb_head(gfp_mask); - if (skb == NULL) - goto err1; - - spin_lock_bh(&sk->sk_receive_queue.lock); - /* check again under lock */ - if (ring->pg_vec == NULL) - goto out_free; - - /* check again under lock */ - maxlen = ring->frame_size - NL_MMAP_HDRLEN; - if (maxlen < linear_size) - goto out_free; - - netlink_forward_ring(ring); - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) - goto err2; - - netlink_ring_setup_skb(skb, sk, ring, hdr); - netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); - atomic_inc(&ring->pending); - netlink_increment_head(ring); - - spin_unlock_bh(&sk->sk_receive_queue.lock); - return skb; - -err2: - kfree_skb(skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - netlink_overrun(sk); -err1: - sock_put(sk); - return NULL; - -out_free: - kfree_skb(skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); -out_put: - sock_put(sk); -out: -#endif return alloc_skb(size, gfp_mask); } EXPORT_SYMBOL_GPL(__netlink_alloc_skb); @@ -2189,8 +1507,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (level != SOL_NETLINK) return -ENOPROTOOPT; - if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && - optlen >= sizeof(int) && + if (optlen >= sizeof(int) && get_user(val, (unsigned int __user *)optval)) return -EFAULT; @@ -2243,25 +1560,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, } err = 0; break; -#ifdef CONFIG_NETLINK_MMAP - case NETLINK_RX_RING: - case NETLINK_TX_RING: { - struct nl_mmap_req req; - - /* Rings might consume more memory than queue limits, require - * CAP_NET_ADMIN. - */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (optlen < sizeof(req)) - return -EINVAL; - if (copy_from_user(&req, optval, sizeof(req))) - return -EFAULT; - err = netlink_set_ring(sk, &req, - optname == NETLINK_TX_RING); - break; - } -#endif /* CONFIG_NETLINK_MMAP */ default: err = -ENOPROTOOPT; } @@ -2374,13 +1672,6 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; } - if (netlink_tx_is_mmaped(sk) && - msg->msg_iov->iov_base == NULL) { - err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, - siocb); - goto out; - } - err = -EMSGSIZE; if (len > sk->sk_sndbuf - 32) goto out; @@ -2704,8 +1995,7 @@ static int netlink_dump(struct sock *sk) goto errout_skb; } - if (!netlink_rx_is_mmaped(sk) && - atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) + if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto errout_skb; /* NLMSG_GOODSIZE is small to avoid high order allocations being @@ -2740,8 +2030,7 @@ static int netlink_dump(struct sock *sk) * reasonable static buffer based on the expected largest dump of a * single netdev. The outcome is MSG_TRUNC error. */ - if (!netlink_rx_is_mmaped(sk)) - skb_reserve(skb, skb_tailroom(skb) - alloc_size); + skb_reserve(skb, skb_tailroom(skb) - alloc_size); netlink_skb_set_owner_r(skb, sk); len = cb->dump(skb, cb); @@ -2795,16 +2084,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct netlink_sock *nlk; int ret; - /* Memory mapped dump requests need to be copied to avoid looping - * on the pending state in netlink_mmap_sendmsg() while the CB hold - * a reference to the skb. - */ - if (netlink_skb_is_mmaped(skb)) { - skb = skb_copy(skb, GFP_KERNEL); - if (skb == NULL) - return -ENOBUFS; - } else - atomic_inc(&skb->users); + atomic_inc(&skb->users); sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); if (sk == NULL) { @@ -3140,7 +2420,7 @@ static const struct proto_ops netlink_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = netlink_getname, - .poll = netlink_poll, + .poll = datagram_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -3148,7 +2428,7 @@ static const struct proto_ops netlink_ops = { .getsockopt = netlink_getsockopt, .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, - .mmap = netlink_mmap, + .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 4b0a5eb0c6b4..3e696636e070 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -41,12 +41,6 @@ struct netlink_sock { int (*netlink_bind)(int group); void (*netlink_unbind)(int group); struct module *module; -#ifdef CONFIG_NETLINK_MMAP - struct mutex pg_vec_lock; - struct netlink_ring rx_ring; - struct netlink_ring tx_ring; - atomic_t mapped; -#endif /* CONFIG_NETLINK_MMAP */ }; static inline struct netlink_sock *nlk_sk(struct sock *sk) @@ -67,15 +61,6 @@ struct nl_portid_hash { u32 rnd; }; -static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) -{ -#ifdef CONFIG_NETLINK_MMAP - return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; -#else - return false; -#endif /* CONFIG_NETLINK_MMAP */ -} - struct netlink_table { struct nl_portid_hash hash; struct hlist_head mc_list; diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 1af29624b92f..5ffb1d1cf402 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -7,41 +7,6 @@ #include "af_netlink.h" -#ifdef CONFIG_NETLINK_MMAP -static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type, - struct sk_buff *nlskb) -{ - struct netlink_diag_ring ndr; - - ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT; - ndr.ndr_block_nr = ring->pg_vec_len; - ndr.ndr_frame_size = ring->frame_size; - ndr.ndr_frame_nr = ring->frame_max + 1; - - return nla_put(nlskb, nl_type, sizeof(ndr), &ndr); -} - -static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - int ret; - - mutex_lock(&nlk->pg_vec_lock); - ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb); - if (!ret) - ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING, - nlskb); - mutex_unlock(&nlk->pg_vec_lock); - - return ret; -} -#else -static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) -{ - return 0; -} -#endif - static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) { struct netlink_sock *nlk = nlk_sk(sk); @@ -86,10 +51,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) goto out_nlmsg_trim; - if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) && - sk_diag_put_rings_cfg(sk, skb)) - goto out_nlmsg_trim; - return nlmsg_end(skb, nlh); out_nlmsg_trim: diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 860345cb05f1..796256db1004 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -848,6 +848,9 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, return ERR_PTR(err); } + if (!ctx->match) + return ERR_PTR(-ENOKEY); + rcu_read_lock(); ctx->now = current_kernel_time(); if (search_nested_keyrings(keyring, ctx)) @@ -879,9 +882,6 @@ key_ref_t keyring_search(key_ref_t keyring, KEYRING_SEARCH_DO_STATE_CHECK), }; - if (!ctx.match) - return ERR_PTR(-ENOKEY); - return keyring_search_aux(keyring, &ctx); } EXPORT_SYMBOL(keyring_search); diff --git a/security/security.c b/security/security.c index 31614e9e96e5..7c9f959c298e 100644 --- a/security/security.c +++ b/security/security.c @@ -728,7 +728,7 @@ static inline unsigned long mmap_prot(struct file *file, unsigned long prot) * ditto if it's not on noexec mount, except that on !MMU we need * BDI_CAP_EXEC_MMAP (== VM_MAYEXEC) in this case */ - if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) { + if (!path_noexec(&file->f_path)) { #ifndef CONFIG_MMU unsigned long caps = 0; struct address_space *mapping = file->f_mapping;