diff options
author | 2014-11-22 15:14:03 -0500 | |
---|---|---|
committer | 2014-11-22 15:14:03 -0500 | |
commit | 2eed7d083266b0fb026036cd7a4f183f0dfe4e84 (patch) | |
tree | 05df242263a06dc966452b1472663765ef00d86b | |
parent | Linux patch 3.10.60 (diff) | |
download | linux-patches-2eed7d083266b0fb026036cd7a4f183f0dfe4e84.tar.gz linux-patches-2eed7d083266b0fb026036cd7a4f183f0dfe4e84.tar.bz2 linux-patches-2eed7d083266b0fb026036cd7a4f183f0dfe4e84.zip |
Linux patch 3.10.613.10-68
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1060_linux-3.10.61.patch | 4431 |
2 files changed, 4435 insertions, 0 deletions
diff --git a/0000_README b/0000_README index 6a1392d2..6cb8a740 100644 --- a/0000_README +++ b/0000_README @@ -282,6 +282,10 @@ Patch: 1059_linux-3.10.60.patch From: http://www.kernel.org Desc: Linux 3.10.60 +Patch: 1060_linux-3.10.61.patch +From: http://www.kernel.org +Desc: Linux 3.10.61 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1060_linux-3.10.61.patch b/1060_linux-3.10.61.patch new file mode 100644 index 00000000..e58cd174 --- /dev/null +++ b/1060_linux-3.10.61.patch @@ -0,0 +1,4431 @@ +diff --git a/Makefile b/Makefile +index 9d4f30d0d201..0d5ba80786b8 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,6 +1,6 @@ + VERSION = 3 + PATCHLEVEL = 10 +-SUBLEVEL = 60 ++SUBLEVEL = 61 + EXTRAVERSION = + NAME = TOSSUG Baby Fish + +diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c +index 0c4132dd3507..98838a05ba6d 100644 +--- a/arch/alpha/mm/fault.c ++++ b/arch/alpha/mm/fault.c +@@ -89,8 +89,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, + const struct exception_table_entry *fixup; + int fault, si_code = SEGV_MAPERR; + siginfo_t info; +- unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +- (cause > 0 ? FAULT_FLAG_WRITE : 0)); ++ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + /* As of EV6, a load into $31/$f31 is a prefetch, and never faults + (or is suppressed by the PALcode). Support that for older CPUs +@@ -115,7 +114,8 @@ do_page_fault(unsigned long address, unsigned long mmcsr, + if (address >= TASK_SIZE) + goto vmalloc_fault; + #endif +- ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; + retry: + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); +@@ -142,6 +142,7 @@ retry: + } else { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; ++ flags |= FAULT_FLAG_WRITE; + } + + /* If for any reason at all we couldn't handle the fault, +diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c +index 331a0846628e..50533b750a99 100644 +--- a/arch/arc/mm/fault.c ++++ b/arch/arc/mm/fault.c +@@ -59,8 +59,7 @@ void do_page_fault(struct pt_regs *regs, int write, unsigned long address, + struct mm_struct *mm = tsk->mm; + siginfo_t info; + int fault, ret; +- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +- (write ? FAULT_FLAG_WRITE : 0); ++ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + /* + * We fault-in kernel-space virtual memory on-demand. The +@@ -88,6 +87,8 @@ void do_page_fault(struct pt_regs *regs, int write, unsigned long address, + if (in_atomic() || !mm) + goto no_context; + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; + retry: + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); +@@ -115,12 +116,12 @@ good_area: + if (write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; ++ flags |= FAULT_FLAG_WRITE; + } else { + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + +-survive: + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo +@@ -200,14 +201,12 @@ no_context: + die("Oops", regs, address, cause_code); + + out_of_memory: +- if (is_global_init(tsk)) { +- yield(); +- goto survive; +- } + up_read(&mm->mmap_sem); + +- if (user_mode(regs)) +- do_group_exit(SIGKILL); /* This will never return */ ++ if (user_mode(regs)) { ++ pagefault_out_of_memory(); ++ return; ++ } + + goto no_context; + +diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h +index 7af5c6c3653a..b274bde24905 100644 +--- a/arch/arm/include/asm/bug.h ++++ b/arch/arm/include/asm/bug.h +@@ -2,6 +2,8 @@ + #define _ASMARM_BUG_H + + #include <linux/linkage.h> ++#include <linux/types.h> ++#include <asm/opcodes.h> + + #ifdef CONFIG_BUG + +@@ -12,10 +14,10 @@ + */ + #ifdef CONFIG_THUMB2_KERNEL + #define BUG_INSTR_VALUE 0xde02 +-#define BUG_INSTR_TYPE ".hword " ++#define BUG_INSTR(__value) __inst_thumb16(__value) + #else + #define BUG_INSTR_VALUE 0xe7f001f2 +-#define BUG_INSTR_TYPE ".word " ++#define BUG_INSTR(__value) __inst_arm(__value) + #endif + + +@@ -33,7 +35,7 @@ + + #define __BUG(__file, __line, __value) \ + do { \ +- asm volatile("1:\t" BUG_INSTR_TYPE #__value "\n" \ ++ asm volatile("1:\t" BUG_INSTR(__value) "\n" \ + ".pushsection .rodata.str, \"aMS\", %progbits, 1\n" \ + "2:\t.asciz " #__file "\n" \ + ".popsection\n" \ +@@ -48,7 +50,7 @@ do { \ + + #define __BUG(__file, __line, __value) \ + do { \ +- asm volatile(BUG_INSTR_TYPE #__value); \ ++ asm volatile(BUG_INSTR(__value) "\n"); \ + unreachable(); \ + } while (0) + #endif /* CONFIG_DEBUG_BUGVERBOSE */ +diff --git a/arch/arm/kernel/kprobes-common.c b/arch/arm/kernel/kprobes-common.c +index 18a76282970e..380c20fb9c85 100644 +--- a/arch/arm/kernel/kprobes-common.c ++++ b/arch/arm/kernel/kprobes-common.c +@@ -14,6 +14,7 @@ + #include <linux/kernel.h> + #include <linux/kprobes.h> + #include <asm/system_info.h> ++#include <asm/opcodes.h> + + #include "kprobes.h" + +@@ -305,7 +306,8 @@ kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi) + + if (handler) { + /* We can emulate the instruction in (possibly) modified form */ +- asi->insn[0] = (insn & 0xfff00000) | (rn << 16) | reglist; ++ asi->insn[0] = __opcode_to_mem_arm((insn & 0xfff00000) | ++ (rn << 16) | reglist); + asi->insn_handler = handler; + return INSN_GOOD; + } +@@ -334,13 +336,14 @@ prepare_emulated_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi, + #ifdef CONFIG_THUMB2_KERNEL + if (thumb) { + u16 *thumb_insn = (u16 *)asi->insn; +- thumb_insn[1] = 0x4770; /* Thumb bx lr */ +- thumb_insn[2] = 0x4770; /* Thumb bx lr */ ++ /* Thumb bx lr */ ++ thumb_insn[1] = __opcode_to_mem_thumb16(0x4770); ++ thumb_insn[2] = __opcode_to_mem_thumb16(0x4770); + return insn; + } +- asi->insn[1] = 0xe12fff1e; /* ARM bx lr */ ++ asi->insn[1] = __opcode_to_mem_arm(0xe12fff1e); /* ARM bx lr */ + #else +- asi->insn[1] = 0xe1a0f00e; /* mov pc, lr */ ++ asi->insn[1] = __opcode_to_mem_arm(0xe1a0f00e); /* mov pc, lr */ + #endif + /* Make an ARM instruction unconditional */ + if (insn < 0xe0000000) +@@ -360,12 +363,12 @@ set_emulated_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi, + if (thumb) { + u16 *ip = (u16 *)asi->insn; + if (is_wide_instruction(insn)) +- *ip++ = insn >> 16; +- *ip++ = insn; ++ *ip++ = __opcode_to_mem_thumb16(insn >> 16); ++ *ip++ = __opcode_to_mem_thumb16(insn); + return; + } + #endif +- asi->insn[0] = insn; ++ asi->insn[0] = __opcode_to_mem_arm(insn); + } + + /* +diff --git a/arch/arm/kernel/kprobes-thumb.c b/arch/arm/kernel/kprobes-thumb.c +index 6123daf397a7..b82e798983c4 100644 +--- a/arch/arm/kernel/kprobes-thumb.c ++++ b/arch/arm/kernel/kprobes-thumb.c +@@ -163,9 +163,9 @@ t32_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi) + enum kprobe_insn ret = kprobe_decode_ldmstm(insn, asi); + + /* Fixup modified instruction to have halfwords in correct order...*/ +- insn = asi->insn[0]; +- ((u16 *)asi->insn)[0] = insn >> 16; +- ((u16 *)asi->insn)[1] = insn & 0xffff; ++ insn = __mem_to_opcode_arm(asi->insn[0]); ++ ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(insn >> 16); ++ ((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0xffff); + + return ret; + } +@@ -1153,7 +1153,7 @@ t16_decode_hiregs(kprobe_opcode_t insn, struct arch_specific_insn *asi) + { + insn &= ~0x00ff; + insn |= 0x001; /* Set Rdn = R1 and Rm = R0 */ +- ((u16 *)asi->insn)[0] = insn; ++ ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(insn); + asi->insn_handler = t16_emulate_hiregs; + return INSN_GOOD; + } +@@ -1182,8 +1182,10 @@ t16_decode_push(kprobe_opcode_t insn, struct arch_specific_insn *asi) + * and call it with R9=SP and LR in the register list represented + * by R8. + */ +- ((u16 *)asi->insn)[0] = 0xe929; /* 1st half STMDB R9!,{} */ +- ((u16 *)asi->insn)[1] = insn & 0x1ff; /* 2nd half (register list) */ ++ /* 1st half STMDB R9!,{} */ ++ ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(0xe929); ++ /* 2nd half (register list) */ ++ ((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0x1ff); + asi->insn_handler = t16_emulate_push; + return INSN_GOOD; + } +@@ -1232,8 +1234,10 @@ t16_decode_pop(kprobe_opcode_t insn, struct arch_specific_insn *asi) + * and call it with R9=SP and PC in the register list represented + * by R8. + */ +- ((u16 *)asi->insn)[0] = 0xe8b9; /* 1st half LDMIA R9!,{} */ +- ((u16 *)asi->insn)[1] = insn & 0x1ff; /* 2nd half (register list) */ ++ /* 1st half LDMIA R9!,{} */ ++ ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(0xe8b9); ++ /* 2nd half (register list) */ ++ ((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0x1ff); + asi->insn_handler = insn & 0x100 ? t16_emulate_pop_pc + : t16_emulate_pop_nopc; + return INSN_GOOD; +diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c +index 170e9f34003f..1c6ece51781c 100644 +--- a/arch/arm/kernel/kprobes.c ++++ b/arch/arm/kernel/kprobes.c +@@ -26,6 +26,7 @@ + #include <linux/stop_machine.h> + #include <linux/stringify.h> + #include <asm/traps.h> ++#include <asm/opcodes.h> + #include <asm/cacheflush.h> + + #include "kprobes.h" +@@ -62,10 +63,10 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) + #ifdef CONFIG_THUMB2_KERNEL + thumb = true; + addr &= ~1; /* Bit 0 would normally be set to indicate Thumb code */ +- insn = ((u16 *)addr)[0]; ++ insn = __mem_to_opcode_thumb16(((u16 *)addr)[0]); + if (is_wide_instruction(insn)) { +- insn <<= 16; +- insn |= ((u16 *)addr)[1]; ++ u16 inst2 = __mem_to_opcode_thumb16(((u16 *)addr)[1]); ++ insn = __opcode_thumb32_compose(insn, inst2); + decode_insn = thumb32_kprobe_decode_insn; + } else + decode_insn = thumb16_kprobe_decode_insn; +@@ -73,7 +74,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) + thumb = false; + if (addr & 0x3) + return -EINVAL; +- insn = *p->addr; ++ insn = __mem_to_opcode_arm(*p->addr); + decode_insn = arm_kprobe_decode_insn; + #endif + +diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c +index d6a0fdb6c2ee..a2a2804b1bc2 100644 +--- a/arch/arm/kernel/traps.c ++++ b/arch/arm/kernel/traps.c +@@ -347,15 +347,17 @@ void arm_notify_die(const char *str, struct pt_regs *regs, + int is_valid_bugaddr(unsigned long pc) + { + #ifdef CONFIG_THUMB2_KERNEL +- unsigned short bkpt; ++ u16 bkpt; ++ u16 insn = __opcode_to_mem_thumb16(BUG_INSTR_VALUE); + #else +- unsigned long bkpt; ++ u32 bkpt; ++ u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); + #endif + + if (probe_kernel_address((unsigned *)pc, bkpt)) + return 0; + +- return bkpt == BUG_INSTR_VALUE; ++ return bkpt == insn; + } + + #endif +diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig +index c21082d664ed..c6926eae4fe0 100644 +--- a/arch/arm/mm/Kconfig ++++ b/arch/arm/mm/Kconfig +@@ -778,6 +778,7 @@ config NEED_KUSER_HELPERS + + config KUSER_HELPERS + bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS ++ depends on MMU + default y + help + Warning: disabling this option may break user programs. +diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c +index 5dbf13f954f6..160da6d65546 100644 +--- a/arch/arm/mm/fault.c ++++ b/arch/arm/mm/fault.c +@@ -261,9 +261,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) + struct task_struct *tsk; + struct mm_struct *mm; + int fault, sig, code; +- int write = fsr & FSR_WRITE; +- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +- (write ? FAULT_FLAG_WRITE : 0); ++ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + if (notify_page_fault(regs, fsr)) + return 0; +@@ -282,6 +280,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) + if (in_atomic() || !mm) + goto no_context; + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; ++ if (fsr & FSR_WRITE) ++ flags |= FAULT_FLAG_WRITE; ++ + /* + * As per x86, we may deadlock here. However, since the kernel only + * validly references user space from well defined areas of the code, +@@ -349,6 +352,13 @@ retry: + if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) + return 0; + ++ /* ++ * If we are in kernel mode at this point, we ++ * have no context to handle this fault with. ++ */ ++ if (!user_mode(regs)) ++ goto no_context; ++ + if (fault & VM_FAULT_OOM) { + /* + * We ran out of memory, call the OOM killer, and return to +@@ -359,13 +369,6 @@ retry: + return 0; + } + +- /* +- * If we are in kernel mode at this point, we +- * have no context to handle this fault with. +- */ +- if (!user_mode(regs)) +- goto no_context; +- + if (fault & VM_FAULT_SIGBUS) { + /* + * We had some memory, but were unable to +diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S +index 6e0ed93d51fe..c17967fdf5f6 100644 +--- a/arch/arm64/lib/clear_user.S ++++ b/arch/arm64/lib/clear_user.S +@@ -46,7 +46,7 @@ USER(9f, strh wzr, [x0], #2 ) + sub x1, x1, #2 + 4: adds x1, x1, #1 + b.mi 5f +- strb wzr, [x0] ++USER(9f, strb wzr, [x0] ) + 5: mov x0, #0 + ret + ENDPROC(__clear_user) +diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c +index f51d669c8ebd..b5d458769b65 100644 +--- a/arch/arm64/mm/fault.c ++++ b/arch/arm64/mm/fault.c +@@ -199,13 +199,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, + unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + +- if (esr & ESR_LNX_EXEC) { +- vm_flags = VM_EXEC; +- } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) { +- vm_flags = VM_WRITE; +- mm_flags |= FAULT_FLAG_WRITE; +- } +- + tsk = current; + mm = tsk->mm; + +@@ -220,6 +213,16 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, + if (in_atomic() || !mm) + goto no_context; + ++ if (user_mode(regs)) ++ mm_flags |= FAULT_FLAG_USER; ++ ++ if (esr & ESR_LNX_EXEC) { ++ vm_flags = VM_EXEC; ++ } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) { ++ vm_flags = VM_WRITE; ++ mm_flags |= FAULT_FLAG_WRITE; ++ } ++ + /* + * As per x86, we may deadlock here. However, since the kernel only + * validly references user space from well defined areas of the code, +@@ -288,6 +291,13 @@ retry: + VM_FAULT_BADACCESS)))) + return 0; + ++ /* ++ * If we are in kernel mode at this point, we have no context to ++ * handle this fault with. ++ */ ++ if (!user_mode(regs)) ++ goto no_context; ++ + if (fault & VM_FAULT_OOM) { + /* + * We ran out of memory, call the OOM killer, and return to +@@ -298,13 +308,6 @@ retry: + return 0; + } + +- /* +- * If we are in kernel mode at this point, we have no context to +- * handle this fault with. +- */ +- if (!user_mode(regs)) +- goto no_context; +- + if (fault & VM_FAULT_SIGBUS) { + /* + * We had some memory, but were unable to successfully fix up +diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c +index b2f2d2d66849..0eca93327195 100644 +--- a/arch/avr32/mm/fault.c ++++ b/arch/avr32/mm/fault.c +@@ -86,6 +86,8 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs) + + local_irq_enable(); + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; + retry: + down_read(&mm->mmap_sem); + +@@ -228,9 +230,9 @@ no_context: + */ + out_of_memory: + up_read(&mm->mmap_sem); +- pagefault_out_of_memory(); + if (!user_mode(regs)) + goto no_context; ++ pagefault_out_of_memory(); + return; + + do_sigbus: +diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c +index 73312ab6c696..1790f22e71a2 100644 +--- a/arch/cris/mm/fault.c ++++ b/arch/cris/mm/fault.c +@@ -58,8 +58,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs, + struct vm_area_struct * vma; + siginfo_t info; + int fault; +- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +- ((writeaccess & 1) ? FAULT_FLAG_WRITE : 0); ++ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + D(printk(KERN_DEBUG + "Page fault for %lX on %X at %lX, prot %d write %d\n", +@@ -117,6 +116,8 @@ do_page_fault(unsigned long address, struct pt_regs *regs, + if (in_atomic() || !mm) + goto no_context; + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; + retry: + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); +@@ -155,6 +156,7 @@ retry: + } else if (writeaccess == 1) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; ++ flags |= FAULT_FLAG_WRITE; + } else { + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; +diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c +index 331c1e2cfb67..9a66372fc7c7 100644 +--- a/arch/frv/mm/fault.c ++++ b/arch/frv/mm/fault.c +@@ -34,11 +34,11 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear + struct vm_area_struct *vma; + struct mm_struct *mm; + unsigned long _pme, lrai, lrad, fixup; ++ unsigned long flags = 0; + siginfo_t info; + pgd_t *pge; + pud_t *pue; + pte_t *pte; +- int write; + int fault; + + #if 0 +@@ -81,6 +81,9 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear + if (in_atomic() || !mm) + goto no_context; + ++ if (user_mode(__frame)) ++ flags |= FAULT_FLAG_USER; ++ + down_read(&mm->mmap_sem); + + vma = find_vma(mm, ear0); +@@ -129,7 +132,6 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear + */ + good_area: + info.si_code = SEGV_ACCERR; +- write = 0; + switch (esr0 & ESR0_ATXC) { + default: + /* handle write to write protected page */ +@@ -140,7 +142,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear + #endif + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; +- write = 1; ++ flags |= FAULT_FLAG_WRITE; + break; + + /* handle read from protected page */ +@@ -162,7 +164,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ +- fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0); ++ fault = handle_mm_fault(mm, vma, ear0, flags); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; +diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c +index 1bd276dbec7d..8704c9320032 100644 +--- a/arch/hexagon/mm/vm_fault.c ++++ b/arch/hexagon/mm/vm_fault.c +@@ -53,8 +53,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) + int si_code = SEGV_MAPERR; + int fault; + const struct exception_table_entry *fixup; +- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +- (cause > 0 ? FAULT_FLAG_WRITE : 0); ++ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + /* + * If we're in an interrupt or have no user context, +@@ -65,6 +64,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) + + local_irq_enable(); + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; + retry: + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); +@@ -96,6 +97,7 @@ good_area: + case FLT_STORE: + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; ++ flags |= FAULT_FLAG_WRITE; + break; + } + +diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c +index 6cf0341f978e..7225dad87094 100644 +--- a/arch/ia64/mm/fault.c ++++ b/arch/ia64/mm/fault.c +@@ -90,8 +90,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re + mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) + | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); + +- flags |= ((mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0); +- + /* mmap_sem is performance critical.... */ + prefetchw(&mm->mmap_sem); + +@@ -119,6 +117,10 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re + if (notify_page_fault(regs, TRAP_BRKPT)) + return; + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; ++ if (mask & VM_WRITE) ++ flags |= FAULT_FLAG_WRITE; + retry: + down_read(&mm->mmap_sem); + +diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c +index 3cdfa9c1d091..e9c6a8014bd6 100644 +--- a/arch/m32r/mm/fault.c ++++ b/arch/m32r/mm/fault.c +@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, + struct mm_struct *mm; + struct vm_area_struct * vma; + unsigned long page, addr; +- int write; ++ unsigned long flags = 0; + int fault; + siginfo_t info; + +@@ -117,6 +117,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, + if (in_atomic() || !mm) + goto bad_area_nosemaphore; + ++ if (error_code & ACE_USERMODE) ++ flags |= FAULT_FLAG_USER; ++ + /* When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the + * kernel and should generate an OOPS. Unfortunately, in the case of an +@@ -166,14 +169,13 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, + */ + good_area: + info.si_code = SEGV_ACCERR; +- write = 0; + switch (error_code & (ACE_WRITE|ACE_PROTECTION)) { + default: /* 3: write, present */ + /* fall through */ + case ACE_WRITE: /* write, not present */ + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; +- write++; ++ flags |= FAULT_FLAG_WRITE; + break; + case ACE_PROTECTION: /* read, present */ + case 0: /* read, not present */ +@@ -194,7 +196,7 @@ good_area: + */ + addr = (address & PAGE_MASK); + set_thread_fault_code(error_code); +- fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0); ++ fault = handle_mm_fault(mm, vma, addr, flags); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; +diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c +index a563727806bf..eb1d61f68725 100644 +--- a/arch/m68k/mm/fault.c ++++ b/arch/m68k/mm/fault.c +@@ -88,6 +88,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, + if (in_atomic() || !mm) + goto no_context; + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; + retry: + down_read(&mm->mmap_sem); + +diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c +index 2c75bf7357c5..332680e5ebf2 100644 +--- a/arch/metag/mm/fault.c ++++ b/arch/metag/mm/fault.c +@@ -53,8 +53,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, + struct vm_area_struct *vma, *prev_vma; + siginfo_t info; + int fault; +- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +- (write_access ? FAULT_FLAG_WRITE : 0); ++ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + tsk = current; + +@@ -109,6 +108,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, + if (in_atomic() || !mm) + goto no_context; + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; + retry: + down_read(&mm->mmap_sem); + +@@ -121,6 +122,7 @@ good_area: + if (write_access) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; ++ flags |= FAULT_FLAG_WRITE; + } else { + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) + goto bad_area; +@@ -224,8 +226,10 @@ do_sigbus: + */ + out_of_memory: + up_read(&mm->mmap_sem); +- if (user_mode(regs)) +- do_group_exit(SIGKILL); ++ if (user_mode(regs)) { ++ pagefault_out_of_memory(); ++ return 1; ++ } + + no_context: + /* Are we prepared to handle this kernel fault? */ +diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c +index 731f739d17a1..fa4cf52aa7a6 100644 +--- a/arch/microblaze/mm/fault.c ++++ b/arch/microblaze/mm/fault.c +@@ -92,8 +92,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, + int code = SEGV_MAPERR; + int is_write = error_code & ESR_S; + int fault; +- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +- (is_write ? FAULT_FLAG_WRITE : 0); ++ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + regs->ear = address; + regs->esr = error_code; +@@ -121,6 +120,9 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, + die("Weird page fault", regs, SIGSEGV); + } + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; ++ + /* When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the + * kernel and should generate an OOPS. Unfortunately, in the case of an +@@ -199,6 +201,7 @@ good_area: + if (unlikely(is_write)) { + if (unlikely(!(vma->vm_flags & VM_WRITE))) + goto bad_area; ++ flags |= FAULT_FLAG_WRITE; + /* a read */ + } else { + /* protection fault */ +diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c +index 5495101d32c8..c2ec87e5d1cc 100644 +--- a/arch/mips/mm/c-r4k.c ++++ b/arch/mips/mm/c-r4k.c +@@ -608,6 +608,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) + r4k_blast_scache(); + else + blast_scache_range(addr, addr + size); ++ preempt_enable(); + __sync(); + return; + } +@@ -649,6 +650,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) + */ + blast_inv_scache_range(addr, addr + size); + } ++ preempt_enable(); + __sync(); + return; + } +diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c +index 0fead53d1c26..0214a43b9911 100644 +--- a/arch/mips/mm/fault.c ++++ b/arch/mips/mm/fault.c +@@ -41,8 +41,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ + const int field = sizeof(unsigned long) * 2; + siginfo_t info; + int fault; +- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +- (write ? FAULT_FLAG_WRITE : 0); ++ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + #if 0 + printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(), +@@ -92,6 +91,8 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ + if (in_atomic() || !mm) + goto bad_area_nosemaphore; + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; + retry: + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); +@@ -113,6 +114,7 @@ good_area: + if (write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; ++ flags |= FAULT_FLAG_WRITE; + } else { + if (cpu_has_rixi) { + if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) { +@@ -240,6 +242,8 @@ out_of_memory: + * (which will retry the fault, or kill us if we got oom-killed). + */ + up_read(&mm->mmap_sem); ++ if (!user_mode(regs)) ++ goto no_context; + pagefault_out_of_memory(); + return; + +diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c +index d48a84fd7fae..3516cbdf1ee9 100644 +--- a/arch/mn10300/mm/fault.c ++++ b/arch/mn10300/mm/fault.c +@@ -171,6 +171,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code, + if (in_atomic() || !mm) + goto no_context; + ++ if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) ++ flags |= FAULT_FLAG_USER; + retry: + down_read(&mm->mmap_sem); + +@@ -345,9 +347,10 @@ no_context: + */ + out_of_memory: + up_read(&mm->mmap_sem); +- printk(KERN_ALERT "VM: killing process %s\n", tsk->comm); +- if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) +- do_exit(SIGKILL); ++ if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) { ++ pagefault_out_of_memory(); ++ return; ++ } + goto no_context; + + do_sigbus: +diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c +index e2bfafce66c5..0703acf7d327 100644 +--- a/arch/openrisc/mm/fault.c ++++ b/arch/openrisc/mm/fault.c +@@ -86,6 +86,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, + if (user_mode(regs)) { + /* Exception was in userspace: reenable interrupts */ + local_irq_enable(); ++ flags |= FAULT_FLAG_USER; + } else { + /* If exception was in a syscall, then IRQ's may have + * been enabled or disabled. If they were enabled, +@@ -267,10 +268,10 @@ out_of_memory: + __asm__ __volatile__("l.nop 1"); + + up_read(&mm->mmap_sem); +- printk("VM: killing process %s\n", tsk->comm); +- if (user_mode(regs)) +- do_exit(SIGKILL); +- goto no_context; ++ if (!user_mode(regs)) ++ goto no_context; ++ pagefault_out_of_memory(); ++ return; + + do_sigbus: + up_read(&mm->mmap_sem); +diff --git a/arch/parisc/include/uapi/asm/shmbuf.h b/arch/parisc/include/uapi/asm/shmbuf.h +index 0a3eada1863b..f395cde7b593 100644 +--- a/arch/parisc/include/uapi/asm/shmbuf.h ++++ b/arch/parisc/include/uapi/asm/shmbuf.h +@@ -36,23 +36,16 @@ struct shmid64_ds { + unsigned int __unused2; + }; + +-#ifdef CONFIG_64BIT +-/* The 'unsigned int' (formerly 'unsigned long') data types below will +- * ensure that a 32-bit app calling shmctl(*,IPC_INFO,*) will work on +- * a wide kernel, but if some of these values are meant to contain pointers +- * they may need to be 'long long' instead. -PB XXX FIXME +- */ +-#endif + struct shminfo64 { +- unsigned int shmmax; +- unsigned int shmmin; +- unsigned int shmmni; +- unsigned int shmseg; +- unsigned int shmall; +- unsigned int __unused1; +- unsigned int __unused2; +- unsigned int __unused3; +- unsigned int __unused4; ++ unsigned long shmmax; ++ unsigned long shmmin; ++ unsigned long shmmni; ++ unsigned long shmseg; ++ unsigned long shmall; ++ unsigned long __unused1; ++ unsigned long __unused2; ++ unsigned long __unused3; ++ unsigned long __unused4; + }; + + #endif /* _PARISC_SHMBUF_H */ +diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S +index 10a0c2aad8cf..b24732d1bdbf 100644 +--- a/arch/parisc/kernel/syscall_table.S ++++ b/arch/parisc/kernel/syscall_table.S +@@ -286,11 +286,11 @@ + ENTRY_COMP(msgsnd) + ENTRY_COMP(msgrcv) + ENTRY_SAME(msgget) /* 190 */ +- ENTRY_SAME(msgctl) +- ENTRY_SAME(shmat) ++ ENTRY_COMP(msgctl) ++ ENTRY_COMP(shmat) + ENTRY_SAME(shmdt) + ENTRY_SAME(shmget) +- ENTRY_SAME(shmctl) /* 195 */ ++ ENTRY_COMP(shmctl) /* 195 */ + ENTRY_SAME(ni_syscall) /* streams1 */ + ENTRY_SAME(ni_syscall) /* streams2 */ + ENTRY_SAME(lstat64) +@@ -323,7 +323,7 @@ + ENTRY_SAME(epoll_ctl) /* 225 */ + ENTRY_SAME(epoll_wait) + ENTRY_SAME(remap_file_pages) +- ENTRY_SAME(semtimedop) ++ ENTRY_COMP(semtimedop) + ENTRY_COMP(mq_open) + ENTRY_SAME(mq_unlink) /* 230 */ + ENTRY_COMP(mq_timedsend) +diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c +index f247a3480e8e..d10d27a720c0 100644 +--- a/arch/parisc/mm/fault.c ++++ b/arch/parisc/mm/fault.c +@@ -180,6 +180,10 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, + if (in_atomic() || !mm) + goto no_context; + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; ++ if (acc_type & VM_WRITE) ++ flags |= FAULT_FLAG_WRITE; + retry: + down_read(&mm->mmap_sem); + vma = find_vma_prev(mm, address, &prev_vma); +@@ -203,8 +207,7 @@ good_area: + * fault. + */ + +- fault = handle_mm_fault(mm, vma, address, +- flags | ((acc_type & VM_WRITE) ? FAULT_FLAG_WRITE : 0)); ++ fault = handle_mm_fault(mm, vma, address, flags); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return; +diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c +index 8726779e1409..d9196c9f93d9 100644 +--- a/arch/powerpc/mm/fault.c ++++ b/arch/powerpc/mm/fault.c +@@ -223,9 +223,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, + is_write = error_code & ESR_DST; + #endif /* CONFIG_4xx || CONFIG_BOOKE */ + +- if (is_write) +- flags |= FAULT_FLAG_WRITE; +- + #ifdef CONFIG_PPC_ICSWX + /* + * we need to do this early because this "data storage +@@ -280,6 +277,9 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; ++ + /* When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the + * kernel and should generate an OOPS. Unfortunately, in the case of an +@@ -408,6 +408,7 @@ good_area: + } else if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; ++ flags |= FAULT_FLAG_WRITE; + /* a read */ + } else { + /* protection fault */ +diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c +index 047c3e4c59a2..416facec4a33 100644 +--- a/arch/s390/mm/fault.c ++++ b/arch/s390/mm/fault.c +@@ -302,6 +302,8 @@ static inline int do_exception(struct pt_regs *regs, int access) + address = trans_exc_code & __FAIL_ADDR_MASK; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; + if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) + flags |= FAULT_FLAG_WRITE; + down_read(&mm->mmap_sem); +diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c +index 47b600e4b2c5..52238983527d 100644 +--- a/arch/score/mm/fault.c ++++ b/arch/score/mm/fault.c +@@ -47,6 +47,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + const int field = sizeof(unsigned long) * 2; ++ unsigned long flags = 0; + siginfo_t info; + int fault; + +@@ -75,6 +76,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, + if (in_atomic() || !mm) + goto bad_area_nosemaphore; + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; ++ + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); + if (!vma) +@@ -95,18 +99,18 @@ good_area: + if (write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; ++ flags |= FAULT_FLAG_WRITE; + } else { + if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) + goto bad_area; + } + +-survive: + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ +- fault = handle_mm_fault(mm, vma, address, write); ++ fault = handle_mm_fault(mm, vma, address, flags); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; +@@ -167,15 +171,10 @@ no_context: + */ + out_of_memory: + up_read(&mm->mmap_sem); +- if (is_global_init(tsk)) { +- yield(); +- down_read(&mm->mmap_sem); +- goto survive; +- } +- printk("VM: killing process %s\n", tsk->comm); +- if (user_mode(regs)) +- do_group_exit(SIGKILL); +- goto no_context; ++ if (!user_mode(regs)) ++ goto no_context; ++ pagefault_out_of_memory(); ++ return; + + do_sigbus: + up_read(&mm->mmap_sem); +diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c +index 1f49c28affa9..541dc6101508 100644 +--- a/arch/sh/mm/fault.c ++++ b/arch/sh/mm/fault.c +@@ -400,9 +400,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, + struct mm_struct *mm; + struct vm_area_struct * vma; + int fault; +- int write = error_code & FAULT_CODE_WRITE; +- unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +- (write ? FAULT_FLAG_WRITE : 0)); ++ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + tsk = current; + mm = tsk->mm; +@@ -476,6 +474,11 @@ good_area: + + set_thread_fault_code(error_code); + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; ++ if (error_code & FAULT_CODE_WRITE) ++ flags |= FAULT_FLAG_WRITE; ++ + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo +diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h +index 905832aa9e9e..a0ed182ae73c 100644 +--- a/arch/sparc/include/asm/atomic_32.h ++++ b/arch/sparc/include/asm/atomic_32.h +@@ -21,7 +21,7 @@ + + extern int __atomic_add_return(int, atomic_t *); + extern int atomic_cmpxchg(atomic_t *, int, int); +-#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) ++extern int atomic_xchg(atomic_t *, int); + extern int __atomic_add_unless(atomic_t *, int, int); + extern void atomic_set(atomic_t *, int); + +diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h +index 1fae1a02e3c2..ae0f9a7a314d 100644 +--- a/arch/sparc/include/asm/cmpxchg_32.h ++++ b/arch/sparc/include/asm/cmpxchg_32.h +@@ -11,22 +11,14 @@ + #ifndef __ARCH_SPARC_CMPXCHG__ + #define __ARCH_SPARC_CMPXCHG__ + +-static inline unsigned long xchg_u32(__volatile__ unsigned long *m, unsigned long val) +-{ +- __asm__ __volatile__("swap [%2], %0" +- : "=&r" (val) +- : "0" (val), "r" (m) +- : "memory"); +- return val; +-} +- ++extern unsigned long __xchg_u32(volatile u32 *m, u32 new); + extern void __xchg_called_with_bad_pointer(void); + + static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int size) + { + switch (size) { + case 4: +- return xchg_u32(ptr, x); ++ return __xchg_u32(ptr, x); + } + __xchg_called_with_bad_pointer(); + return x; +diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h +index 432afa838861..55841c184e6d 100644 +--- a/arch/sparc/include/asm/vio.h ++++ b/arch/sparc/include/asm/vio.h +@@ -118,12 +118,18 @@ struct vio_disk_attr_info { + u8 vdisk_type; + #define VD_DISK_TYPE_SLICE 0x01 /* Slice in block device */ + #define VD_DISK_TYPE_DISK 0x02 /* Entire block device */ +- u16 resv1; ++ u8 vdisk_mtype; /* v1.1 */ ++#define VD_MEDIA_TYPE_FIXED 0x01 /* Fixed device */ ++#define VD_MEDIA_TYPE_CD 0x02 /* CD Device */ ++#define VD_MEDIA_TYPE_DVD 0x03 /* DVD Device */ ++ u8 resv1; + u32 vdisk_block_size; + u64 operations; +- u64 vdisk_size; ++ u64 vdisk_size; /* v1.1 */ + u64 max_xfer_size; +- u64 resv2[2]; ++ u32 phys_block_size; /* v1.2 */ ++ u32 resv2; ++ u64 resv3[1]; + }; + + struct vio_disk_desc { +@@ -259,7 +265,7 @@ static inline u32 vio_dring_avail(struct vio_dring_state *dr, + unsigned int ring_size) + { + return (dr->pending - +- ((dr->prod - dr->cons) & (ring_size - 1))); ++ ((dr->prod - dr->cons) & (ring_size - 1)) - 1); + } + + #define VIO_MAX_TYPE_LEN 32 +diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c +index 8f76f23dac38..f9c6813c132d 100644 +--- a/arch/sparc/kernel/pci_schizo.c ++++ b/arch/sparc/kernel/pci_schizo.c +@@ -581,7 +581,7 @@ static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm) + { + unsigned long csr_reg, csr, csr_error_bits; + irqreturn_t ret = IRQ_NONE; +- u16 stat; ++ u32 stat; + + csr_reg = pbm->pbm_regs + SCHIZO_PCI_CTRL; + csr = upa_readq(csr_reg); +@@ -617,7 +617,7 @@ static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm) + pbm->name); + ret = IRQ_HANDLED; + } +- pci_read_config_word(pbm->pci_bus->self, PCI_STATUS, &stat); ++ pbm->pci_ops->read(pbm->pci_bus, 0, PCI_STATUS, 2, &stat); + if (stat & (PCI_STATUS_PARITY | + PCI_STATUS_SIG_TARGET_ABORT | + PCI_STATUS_REC_TARGET_ABORT | +@@ -625,7 +625,7 @@ static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm) + PCI_STATUS_SIG_SYSTEM_ERROR)) { + printk("%s: PCI bus error, PCI_STATUS[%04x]\n", + pbm->name, stat); +- pci_write_config_word(pbm->pci_bus->self, PCI_STATUS, 0xffff); ++ pbm->pci_ops->write(pbm->pci_bus, 0, PCI_STATUS, 2, 0xffff); + ret = IRQ_HANDLED; + } + return ret; +diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c +index 8565ecd7d48a..173964d5e948 100644 +--- a/arch/sparc/kernel/smp_64.c ++++ b/arch/sparc/kernel/smp_64.c +@@ -821,13 +821,17 @@ void arch_send_call_function_single_ipi(int cpu) + void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs) + { + clear_softint(1 << irq); ++ irq_enter(); + generic_smp_call_function_interrupt(); ++ irq_exit(); + } + + void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs) + { + clear_softint(1 << irq); ++ irq_enter(); + generic_smp_call_function_single_interrupt(); ++ irq_exit(); + } + + static void tsb_sync(void *info) +diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c +index 1d32b54089aa..8f2f94d53434 100644 +--- a/arch/sparc/lib/atomic32.c ++++ b/arch/sparc/lib/atomic32.c +@@ -40,6 +40,19 @@ int __atomic_add_return(int i, atomic_t *v) + } + EXPORT_SYMBOL(__atomic_add_return); + ++int atomic_xchg(atomic_t *v, int new) ++{ ++ int ret; ++ unsigned long flags; ++ ++ spin_lock_irqsave(ATOMIC_HASH(v), flags); ++ ret = v->counter; ++ v->counter = new; ++ spin_unlock_irqrestore(ATOMIC_HASH(v), flags); ++ return ret; ++} ++EXPORT_SYMBOL(atomic_xchg); ++ + int atomic_cmpxchg(atomic_t *v, int old, int new) + { + int ret; +@@ -132,3 +145,17 @@ unsigned long __cmpxchg_u32(volatile u32 *ptr, u32 old, u32 new) + return (unsigned long)prev; + } + EXPORT_SYMBOL(__cmpxchg_u32); ++ ++unsigned long __xchg_u32(volatile u32 *ptr, u32 new) ++{ ++ unsigned long flags; ++ u32 prev; ++ ++ spin_lock_irqsave(ATOMIC_HASH(ptr), flags); ++ prev = *ptr; ++ *ptr = new; ++ spin_unlock_irqrestore(ATOMIC_HASH(ptr), flags); ++ ++ return (unsigned long)prev; ++} ++EXPORT_SYMBOL(__xchg_u32); +diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c +index e98bfda205a2..59dbd4645725 100644 +--- a/arch/sparc/mm/fault_32.c ++++ b/arch/sparc/mm/fault_32.c +@@ -177,8 +177,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, + unsigned long g2; + int from_user = !(regs->psr & PSR_PS); + int fault, code; +- unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +- (write ? FAULT_FLAG_WRITE : 0)); ++ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + if (text_fault) + address = regs->pc; +@@ -235,6 +234,11 @@ good_area: + goto bad_area; + } + ++ if (from_user) ++ flags |= FAULT_FLAG_USER; ++ if (write) ++ flags |= FAULT_FLAG_WRITE; ++ + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo +@@ -383,6 +387,7 @@ static void force_user_fault(unsigned long address, int write) + struct vm_area_struct *vma; + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; ++ unsigned int flags = FAULT_FLAG_USER; + int code; + + code = SEGV_MAPERR; +@@ -402,11 +407,12 @@ good_area: + if (write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; ++ flags |= FAULT_FLAG_WRITE; + } else { + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } +- switch (handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0)) { ++ switch (handle_mm_fault(mm, vma, address, flags)) { + case VM_FAULT_SIGBUS: + case VM_FAULT_OOM: + goto do_sigbus; +diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c +index ea83f82464da..3841a081beb3 100644 +--- a/arch/sparc/mm/fault_64.c ++++ b/arch/sparc/mm/fault_64.c +@@ -323,7 +323,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) + bad_kernel_pc(regs, address); + return; + } +- } ++ } else ++ flags |= FAULT_FLAG_USER; + + /* + * If we're in an interrupt or have no user +@@ -426,13 +427,14 @@ good_area: + vma->vm_file != NULL) + set_thread_fault_code(fault_code | + FAULT_CODE_BLKCOMMIT); ++ ++ flags |= FAULT_FLAG_WRITE; + } else { + /* Allow reads even for write-only mappings */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + +- flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(mm, vma, address, flags); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) +diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c +index 3d2b81c163a6..3ff289f422e6 100644 +--- a/arch/tile/mm/fault.c ++++ b/arch/tile/mm/fault.c +@@ -280,8 +280,7 @@ static int handle_page_fault(struct pt_regs *regs, + if (!is_page_fault) + write = 1; + +- flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +- (write ? FAULT_FLAG_WRITE : 0)); ++ flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL); + +@@ -365,6 +364,9 @@ static int handle_page_fault(struct pt_regs *regs, + goto bad_area_nosemaphore; + } + ++ if (!is_kernel_mode) ++ flags |= FAULT_FLAG_USER; ++ + /* + * When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the +@@ -425,12 +427,12 @@ good_area: + #endif + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; ++ flags |= FAULT_FLAG_WRITE; + } else { + if (!is_page_fault || !(vma->vm_flags & VM_READ)) + goto bad_area; + } + +- survive: + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo +@@ -568,15 +570,10 @@ no_context: + */ + out_of_memory: + up_read(&mm->mmap_sem); +- if (is_global_init(tsk)) { +- yield(); +- down_read(&mm->mmap_sem); +- goto survive; +- } +- pr_alert("VM: killing process %s\n", tsk->comm); +- if (!is_kernel_mode) +- do_group_exit(SIGKILL); +- goto no_context; ++ if (is_kernel_mode) ++ goto no_context; ++ pagefault_out_of_memory(); ++ return 0; + + do_sigbus: + up_read(&mm->mmap_sem); +diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c +index 089f3987e273..5c3aef74237f 100644 +--- a/arch/um/kernel/trap.c ++++ b/arch/um/kernel/trap.c +@@ -30,8 +30,7 @@ int handle_page_fault(unsigned long address, unsigned long ip, + pmd_t *pmd; + pte_t *pte; + int err = -EFAULT; +- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +- (is_write ? FAULT_FLAG_WRITE : 0); ++ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + *code_out = SEGV_MAPERR; + +@@ -42,6 +41,8 @@ int handle_page_fault(unsigned long address, unsigned long ip, + if (in_atomic()) + goto out_nosemaphore; + ++ if (is_user) ++ flags |= FAULT_FLAG_USER; + retry: + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); +@@ -58,12 +59,15 @@ retry: + + good_area: + *code_out = SEGV_ACCERR; +- if (is_write && !(vma->vm_flags & VM_WRITE)) +- goto out; +- +- /* Don't require VM_READ|VM_EXEC for write faults! */ +- if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC))) +- goto out; ++ if (is_write) { ++ if (!(vma->vm_flags & VM_WRITE)) ++ goto out; ++ flags |= FAULT_FLAG_WRITE; ++ } else { ++ /* Don't require VM_READ|VM_EXEC for write faults! */ ++ if (!(vma->vm_flags & (VM_READ | VM_EXEC))) ++ goto out; ++ } + + do { + int fault; +@@ -124,6 +128,8 @@ out_of_memory: + * (which will retry the fault, or kill us if we got oom-killed). + */ + up_read(&mm->mmap_sem); ++ if (!is_user) ++ goto out_nosemaphore; + pagefault_out_of_memory(); + return 0; + } +diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c +index f9b5c10bccee..0dc922dba915 100644 +--- a/arch/unicore32/mm/fault.c ++++ b/arch/unicore32/mm/fault.c +@@ -209,8 +209,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs) + struct task_struct *tsk; + struct mm_struct *mm; + int fault, sig, code; +- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +- ((!(fsr ^ 0x12)) ? FAULT_FLAG_WRITE : 0); ++ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + tsk = current; + mm = tsk->mm; +@@ -222,6 +221,11 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs) + if (in_atomic() || !mm) + goto no_context; + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; ++ if (!(fsr ^ 0x12)) ++ flags |= FAULT_FLAG_WRITE; ++ + /* + * As per x86, we may deadlock here. However, since the kernel only + * validly references user space from well defined areas of the code, +@@ -278,6 +282,13 @@ retry: + (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) + return 0; + ++ /* ++ * If we are in kernel mode at this point, we ++ * have no context to handle this fault with. ++ */ ++ if (!user_mode(regs)) ++ goto no_context; ++ + if (fault & VM_FAULT_OOM) { + /* + * We ran out of memory, call the OOM killer, and return to +@@ -288,13 +299,6 @@ retry: + return 0; + } + +- /* +- * If we are in kernel mode at this point, we +- * have no context to handle this fault with. +- */ +- if (!user_mode(regs)) +- goto no_context; +- + if (fault & VM_FAULT_SIGBUS) { + /* + * We had some memory, but were unable to +diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c +index b45ac6affa9c..6d6bb6f4fd43 100644 +--- a/arch/x86/kernel/cpu/perf_event_intel.c ++++ b/arch/x86/kernel/cpu/perf_event_intel.c +@@ -2172,6 +2172,9 @@ __init int intel_pmu_init(void) + case 62: /* IvyBridge EP */ + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); ++ /* dTLB-load-misses on IVB is different than SNB */ ++ hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ ++ + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + +diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c +index 29a8120e6fe8..baa61e7370b7 100644 +--- a/arch/x86/kernel/ptrace.c ++++ b/arch/x86/kernel/ptrace.c +@@ -1475,15 +1475,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, + force_sig_info(SIGTRAP, &info, tsk); + } + +- +-#ifdef CONFIG_X86_32 +-# define IS_IA32 1 +-#elif defined CONFIG_IA32_EMULATION +-# define IS_IA32 is_compat_task() +-#else +-# define IS_IA32 0 +-#endif +- + /* + * We must return the syscall number to actually look up in the table. + * This can be -1L to skip running any syscall at all. +@@ -1521,7 +1512,7 @@ long syscall_trace_enter(struct pt_regs *regs) + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->orig_ax); + +- if (IS_IA32) ++ if (is_ia32_task()) + audit_syscall_entry(AUDIT_ARCH_I386, + regs->orig_ax, + regs->bx, regs->cx, +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 684f46dc87de..adfc30d9f9f4 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4834,7 +4834,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) + + ++vcpu->stat.insn_emulation_fail; + trace_kvm_emulate_insn_failed(vcpu); +- if (!is_guest_mode(vcpu)) { ++ if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; +diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c +index c1e9e4cbbd76..d8b1ff68dbb9 100644 +--- a/arch/x86/mm/fault.c ++++ b/arch/x86/mm/fault.c +@@ -842,23 +842,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, + force_sig_info_fault(SIGBUS, code, address, tsk, fault); + } + +-static noinline int ++static noinline void + mm_fault_error(struct pt_regs *regs, unsigned long error_code, + unsigned long address, unsigned int fault) + { +- /* +- * Pagefault was interrupted by SIGKILL. We have no reason to +- * continue pagefault. +- */ +- if (fatal_signal_pending(current)) { +- if (!(fault & VM_FAULT_RETRY)) +- up_read(¤t->mm->mmap_sem); +- if (!(error_code & PF_USER)) +- no_context(regs, error_code, address, 0, 0); +- return 1; ++ if (fatal_signal_pending(current) && !(error_code & PF_USER)) { ++ up_read(¤t->mm->mmap_sem); ++ no_context(regs, error_code, address, 0, 0); ++ return; + } +- if (!(fault & VM_FAULT_ERROR)) +- return 0; + + if (fault & VM_FAULT_OOM) { + /* Kernel mode? Handle exceptions or die: */ +@@ -866,7 +858,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, + up_read(¤t->mm->mmap_sem); + no_context(regs, error_code, address, + SIGSEGV, SEGV_MAPERR); +- return 1; ++ return; + } + + up_read(¤t->mm->mmap_sem); +@@ -884,7 +876,6 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, + else + BUG(); + } +- return 1; + } + + static int spurious_fault_check(unsigned long error_code, pte_t *pte) +@@ -1017,9 +1008,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) + unsigned long address; + struct mm_struct *mm; + int fault; +- int write = error_code & PF_WRITE; +- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +- (write ? FAULT_FLAG_WRITE : 0); ++ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + tsk = current; + mm = tsk->mm; +@@ -1089,6 +1078,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) + if (user_mode_vm(regs)) { + local_irq_enable(); + error_code |= PF_USER; ++ flags |= FAULT_FLAG_USER; + } else { + if (regs->flags & X86_EFLAGS_IF) + local_irq_enable(); +@@ -1113,6 +1103,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) + return; + } + ++ if (error_code & PF_WRITE) ++ flags |= FAULT_FLAG_WRITE; ++ + /* + * When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in +@@ -1191,9 +1184,17 @@ good_area: + */ + fault = handle_mm_fault(mm, vma, address, flags); + +- if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { +- if (mm_fault_error(regs, error_code, address, fault)) +- return; ++ /* ++ * If we need to retry but a fatal signal is pending, handle the ++ * signal first. We do not need to release the mmap_sem because it ++ * would already be released in __lock_page_or_retry in mm/filemap.c. ++ */ ++ if (unlikely((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))) ++ return; ++ ++ if (unlikely(fault & VM_FAULT_ERROR)) { ++ mm_fault_error(regs, error_code, address, fault); ++ return; + } + + /* +diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h +index 51940fec6990..513effd48060 100644 +--- a/arch/xtensa/include/uapi/asm/unistd.h ++++ b/arch/xtensa/include/uapi/asm/unistd.h +@@ -384,7 +384,8 @@ __SYSCALL(174, sys_chroot, 1) + #define __NR_pivot_root 175 + __SYSCALL(175, sys_pivot_root, 2) + #define __NR_umount 176 +-__SYSCALL(176, sys_umount, 2) ++__SYSCALL(176, sys_oldumount, 1) ++#define __ARCH_WANT_SYS_OLDUMOUNT + #define __NR_swapoff 177 + __SYSCALL(177, sys_swapoff, 1) + #define __NR_sync 178 +diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c +index 4b7bc8db170f..70fa7bc42b4a 100644 +--- a/arch/xtensa/mm/fault.c ++++ b/arch/xtensa/mm/fault.c +@@ -72,6 +72,8 @@ void do_page_fault(struct pt_regs *regs) + address, exccause, regs->pc, is_write? "w":"", is_exec? "x":""); + #endif + ++ if (user_mode(regs)) ++ flags |= FAULT_FLAG_USER; + retry: + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); +diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c +index c3f09505f795..64150a9ffff3 100644 +--- a/drivers/ata/ahci.c ++++ b/drivers/ata/ahci.c +@@ -61,6 +61,7 @@ enum board_ids { + /* board IDs by feature in alphabetical order */ + board_ahci, + board_ahci_ign_iferr, ++ board_ahci_nomsi, + board_ahci_noncq, + board_ahci_nosntf, + board_ahci_yes_fbs, +@@ -120,6 +121,13 @@ static const struct ata_port_info ahci_port_info[] = { + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_ops, + }, ++ [board_ahci_nomsi] = { ++ AHCI_HFLAGS (AHCI_HFLAG_NO_MSI), ++ .flags = AHCI_FLAG_COMMON, ++ .pio_mask = ATA_PIO4, ++ .udma_mask = ATA_UDMA6, ++ .port_ops = &ahci_ops, ++ }, + [board_ahci_noncq] = { + AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ), + .flags = AHCI_FLAG_COMMON, +@@ -312,6 +320,11 @@ static const struct pci_device_id ahci_pci_tbl[] = { + { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c8e), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series RAID */ ++ { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H AHCI */ ++ { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H RAID */ ++ { PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */ ++ { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */ ++ { PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */ + + /* JMicron 360/1/3/5/6, match class to avoid IDE function */ + { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, +@@ -474,10 +487,10 @@ static const struct pci_device_id ahci_pci_tbl[] = { + { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci }, /* ASM1062 */ + + /* +- * Samsung SSDs found on some macbooks. NCQ times out. +- * https://bugzilla.kernel.org/show_bug.cgi?id=60731 ++ * Samsung SSDs found on some macbooks. NCQ times out if MSI is ++ * enabled. https://bugzilla.kernel.org/show_bug.cgi?id=60731 + */ +- { PCI_VDEVICE(SAMSUNG, 0x1600), board_ahci_noncq }, ++ { PCI_VDEVICE(SAMSUNG, 0x1600), board_ahci_nomsi }, + + /* Enmotus */ + { PCI_DEVICE(0x1c44, 0x8000), board_ahci }, +diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c +index 5814deb6963d..0ebadf93b6c5 100644 +--- a/drivers/block/sunvdc.c ++++ b/drivers/block/sunvdc.c +@@ -9,6 +9,7 @@ + #include <linux/blkdev.h> + #include <linux/hdreg.h> + #include <linux/genhd.h> ++#include <linux/cdrom.h> + #include <linux/slab.h> + #include <linux/spinlock.h> + #include <linux/completion.h> +@@ -22,8 +23,8 @@ + + #define DRV_MODULE_NAME "sunvdc" + #define PFX DRV_MODULE_NAME ": " +-#define DRV_MODULE_VERSION "1.0" +-#define DRV_MODULE_RELDATE "June 25, 2007" ++#define DRV_MODULE_VERSION "1.1" ++#define DRV_MODULE_RELDATE "February 13, 2013" + + static char version[] = + DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; +@@ -32,7 +33,7 @@ MODULE_DESCRIPTION("Sun LDOM virtual disk client driver"); + MODULE_LICENSE("GPL"); + MODULE_VERSION(DRV_MODULE_VERSION); + +-#define VDC_TX_RING_SIZE 256 ++#define VDC_TX_RING_SIZE 512 + + #define WAITING_FOR_LINK_UP 0x01 + #define WAITING_FOR_TX_SPACE 0x02 +@@ -65,11 +66,9 @@ struct vdc_port { + u64 operations; + u32 vdisk_size; + u8 vdisk_type; ++ u8 vdisk_mtype; + + char disk_name[32]; +- +- struct vio_disk_geom geom; +- struct vio_disk_vtoc label; + }; + + static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio) +@@ -79,9 +78,16 @@ static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio) + + /* Ordered from largest major to lowest */ + static struct vio_version vdc_versions[] = { ++ { .major = 1, .minor = 1 }, + { .major = 1, .minor = 0 }, + }; + ++static inline int vdc_version_supported(struct vdc_port *port, ++ u16 major, u16 minor) ++{ ++ return port->vio.ver.major == major && port->vio.ver.minor >= minor; ++} ++ + #define VDCBLK_NAME "vdisk" + static int vdc_major; + #define PARTITION_SHIFT 3 +@@ -94,18 +100,54 @@ static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr) + static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo) + { + struct gendisk *disk = bdev->bd_disk; +- struct vdc_port *port = disk->private_data; ++ sector_t nsect = get_capacity(disk); ++ sector_t cylinders = nsect; + +- geo->heads = (u8) port->geom.num_hd; +- geo->sectors = (u8) port->geom.num_sec; +- geo->cylinders = port->geom.num_cyl; ++ geo->heads = 0xff; ++ geo->sectors = 0x3f; ++ sector_div(cylinders, geo->heads * geo->sectors); ++ geo->cylinders = cylinders; ++ if ((sector_t)(geo->cylinders + 1) * geo->heads * geo->sectors < nsect) ++ geo->cylinders = 0xffff; + + return 0; + } + ++/* Add ioctl/CDROM_GET_CAPABILITY to support cdrom_id in udev ++ * when vdisk_mtype is VD_MEDIA_TYPE_CD or VD_MEDIA_TYPE_DVD. ++ * Needed to be able to install inside an ldom from an iso image. ++ */ ++static int vdc_ioctl(struct block_device *bdev, fmode_t mode, ++ unsigned command, unsigned long argument) ++{ ++ int i; ++ struct gendisk *disk; ++ ++ switch (command) { ++ case CDROMMULTISESSION: ++ pr_debug(PFX "Multisession CDs not supported\n"); ++ for (i = 0; i < sizeof(struct cdrom_multisession); i++) ++ if (put_user(0, (char __user *)(argument + i))) ++ return -EFAULT; ++ return 0; ++ ++ case CDROM_GET_CAPABILITY: ++ disk = bdev->bd_disk; ++ ++ if (bdev->bd_disk && (disk->flags & GENHD_FL_CD)) ++ return 0; ++ return -EINVAL; ++ ++ default: ++ pr_debug(PFX "ioctl %08x not supported\n", command); ++ return -EINVAL; ++ } ++} ++ + static const struct block_device_operations vdc_fops = { + .owner = THIS_MODULE, + .getgeo = vdc_getgeo, ++ .ioctl = vdc_ioctl, + }; + + static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for) +@@ -165,9 +207,9 @@ static int vdc_handle_attr(struct vio_driver_state *vio, void *arg) + struct vio_disk_attr_info *pkt = arg; + + viodbg(HS, "GOT ATTR stype[0x%x] ops[%llx] disk_size[%llu] disk_type[%x] " +- "xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n", ++ "mtype[0x%x] xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n", + pkt->tag.stype, pkt->operations, +- pkt->vdisk_size, pkt->vdisk_type, ++ pkt->vdisk_size, pkt->vdisk_type, pkt->vdisk_mtype, + pkt->xfer_mode, pkt->vdisk_block_size, + pkt->max_xfer_size); + +@@ -192,8 +234,11 @@ static int vdc_handle_attr(struct vio_driver_state *vio, void *arg) + } + + port->operations = pkt->operations; +- port->vdisk_size = pkt->vdisk_size; + port->vdisk_type = pkt->vdisk_type; ++ if (vdc_version_supported(port, 1, 1)) { ++ port->vdisk_size = pkt->vdisk_size; ++ port->vdisk_mtype = pkt->vdisk_mtype; ++ } + if (pkt->max_xfer_size < port->max_xfer_size) + port->max_xfer_size = pkt->max_xfer_size; + port->vdisk_block_size = pkt->vdisk_block_size; +@@ -236,7 +281,9 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr, + + __blk_end_request(req, (desc->status ? -EIO : 0), desc->size); + +- if (blk_queue_stopped(port->disk->queue)) ++ /* restart blk queue when ring is half emptied */ ++ if (blk_queue_stopped(port->disk->queue) && ++ vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50) + blk_start_queue(port->disk->queue); + } + +@@ -388,12 +435,6 @@ static int __send_request(struct request *req) + for (i = 0; i < nsg; i++) + len += sg[i].length; + +- if (unlikely(vdc_tx_dring_avail(dr) < 1)) { +- blk_stop_queue(port->disk->queue); +- err = -ENOMEM; +- goto out; +- } +- + desc = vio_dring_cur(dr); + + err = ldc_map_sg(port->vio.lp, sg, nsg, +@@ -433,21 +474,32 @@ static int __send_request(struct request *req) + port->req_id++; + dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1); + } +-out: + + return err; + } + +-static void do_vdc_request(struct request_queue *q) ++static void do_vdc_request(struct request_queue *rq) + { +- while (1) { +- struct request *req = blk_fetch_request(q); ++ struct request *req; + +- if (!req) +- break; ++ while ((req = blk_peek_request(rq)) != NULL) { ++ struct vdc_port *port; ++ struct vio_dring_state *dr; + +- if (__send_request(req) < 0) +- __blk_end_request_all(req, -EIO); ++ port = req->rq_disk->private_data; ++ dr = &port->vio.drings[VIO_DRIVER_TX_RING]; ++ if (unlikely(vdc_tx_dring_avail(dr) < 1)) ++ goto wait; ++ ++ blk_start_request(req); ++ ++ if (__send_request(req) < 0) { ++ blk_requeue_request(rq, req); ++wait: ++ /* Avoid pointless unplugs. */ ++ blk_stop_queue(rq); ++ break; ++ } + } + } + +@@ -656,25 +708,27 @@ static int probe_disk(struct vdc_port *port) + if (comp.err) + return comp.err; + +- err = generic_request(port, VD_OP_GET_VTOC, +- &port->label, sizeof(port->label)); +- if (err < 0) { +- printk(KERN_ERR PFX "VD_OP_GET_VTOC returns error %d\n", err); +- return err; +- } +- +- err = generic_request(port, VD_OP_GET_DISKGEOM, +- &port->geom, sizeof(port->geom)); +- if (err < 0) { +- printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns " +- "error %d\n", err); +- return err; ++ if (vdc_version_supported(port, 1, 1)) { ++ /* vdisk_size should be set during the handshake, if it wasn't ++ * then the underlying disk is reserved by another system ++ */ ++ if (port->vdisk_size == -1) ++ return -ENODEV; ++ } else { ++ struct vio_disk_geom geom; ++ ++ err = generic_request(port, VD_OP_GET_DISKGEOM, ++ &geom, sizeof(geom)); ++ if (err < 0) { ++ printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns " ++ "error %d\n", err); ++ return err; ++ } ++ port->vdisk_size = ((u64)geom.num_cyl * ++ (u64)geom.num_hd * ++ (u64)geom.num_sec); + } + +- port->vdisk_size = ((u64)port->geom.num_cyl * +- (u64)port->geom.num_hd * +- (u64)port->geom.num_sec); +- + q = blk_init_queue(do_vdc_request, &port->vio.lock); + if (!q) { + printk(KERN_ERR PFX "%s: Could not allocate queue.\n", +@@ -691,6 +745,10 @@ static int probe_disk(struct vdc_port *port) + + port->disk = g; + ++ /* Each segment in a request is up to an aligned page in size. */ ++ blk_queue_segment_boundary(q, PAGE_SIZE - 1); ++ blk_queue_max_segment_size(q, PAGE_SIZE); ++ + blk_queue_max_segments(q, port->ring_cookies); + blk_queue_max_hw_sectors(q, port->max_xfer_size); + g->major = vdc_major; +@@ -704,9 +762,32 @@ static int probe_disk(struct vdc_port *port) + + set_capacity(g, port->vdisk_size); + +- printk(KERN_INFO PFX "%s: %u sectors (%u MB)\n", ++ if (vdc_version_supported(port, 1, 1)) { ++ switch (port->vdisk_mtype) { ++ case VD_MEDIA_TYPE_CD: ++ pr_info(PFX "Virtual CDROM %s\n", port->disk_name); ++ g->flags |= GENHD_FL_CD; ++ g->flags |= GENHD_FL_REMOVABLE; ++ set_disk_ro(g, 1); ++ break; ++ ++ case VD_MEDIA_TYPE_DVD: ++ pr_info(PFX "Virtual DVD %s\n", port->disk_name); ++ g->flags |= GENHD_FL_CD; ++ g->flags |= GENHD_FL_REMOVABLE; ++ set_disk_ro(g, 1); ++ break; ++ ++ case VD_MEDIA_TYPE_FIXED: ++ pr_info(PFX "Virtual Hard disk %s\n", port->disk_name); ++ break; ++ } ++ } ++ ++ pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n", + g->disk_name, +- port->vdisk_size, (port->vdisk_size >> (20 - 9))); ++ port->vdisk_size, (port->vdisk_size >> (20 - 9)), ++ port->vio.ver.major, port->vio.ver.minor); + + add_disk(g); + +@@ -765,6 +846,7 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) + else + snprintf(port->disk_name, sizeof(port->disk_name), + VDCBLK_NAME "%c", 'a' + ((int)vdev->dev_no % 26)); ++ port->vdisk_size = -1; + + err = vio_driver_init(&port->vio, vdev, VDEV_DISK, + vdc_versions, ARRAY_SIZE(vdc_versions), +diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c +index ac1b43a04285..4f73c727a97a 100644 +--- a/drivers/firewire/core-cdev.c ++++ b/drivers/firewire/core-cdev.c +@@ -1637,8 +1637,7 @@ static int dispatch_ioctl(struct client *client, + _IOC_SIZE(cmd) > sizeof(buffer)) + return -ENOTTY; + +- if (_IOC_DIR(cmd) == _IOC_READ) +- memset(&buffer, 0, _IOC_SIZE(cmd)); ++ memset(&buffer, 0, sizeof(buffer)); + + if (_IOC_DIR(cmd) & _IOC_WRITE) + if (copy_from_user(&buffer, arg, _IOC_SIZE(cmd))) +diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c +index e62a9ce3e4dc..ead08a49bec0 100644 +--- a/drivers/gpu/drm/radeon/evergreen.c ++++ b/drivers/gpu/drm/radeon/evergreen.c +@@ -2379,6 +2379,7 @@ void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *sav + WREG32(EVERGREEN_CRTC_UPDATE_LOCK + crtc_offsets[i], 1); + tmp |= EVERGREEN_CRTC_BLANK_DATA_EN; + WREG32(EVERGREEN_CRTC_BLANK_CONTROL + crtc_offsets[i], tmp); ++ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + crtc_offsets[i], 0); + } + } else { + tmp = RREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i]); +diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c +index 7c5d72a6a26a..19e070f16e6b 100644 +--- a/drivers/input/mouse/alps.c ++++ b/drivers/input/mouse/alps.c +@@ -873,7 +873,13 @@ static psmouse_ret_t alps_process_byte(struct psmouse *psmouse) + { + struct alps_data *priv = psmouse->private; + +- if ((psmouse->packet[0] & 0xc8) == 0x08) { /* PS/2 packet */ ++ /* ++ * Check if we are dealing with a bare PS/2 packet, presumably from ++ * a device connected to the external PS/2 port. Because bare PS/2 ++ * protocol does not have enough constant bits to self-synchronize ++ * properly we only do this if the device is fully synchronized. ++ */ ++ if (!psmouse->out_of_sync_cnt && (psmouse->packet[0] & 0xc8) == 0x08) { + if (psmouse->pktcnt == 3) { + alps_report_bare_ps2_packet(psmouse, psmouse->packet, + true); +@@ -1816,6 +1822,9 @@ int alps_init(struct psmouse *psmouse) + /* We are having trouble resyncing ALPS touchpads so disable it for now */ + psmouse->resync_time = 0; + ++ /* Allow 2 invalid packets without resetting device */ ++ psmouse->resetafter = psmouse->pktsize * 2; ++ + return 0; + + init_fail: +diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c +index 2dea49c4279e..84cddccc0249 100644 +--- a/drivers/md/dm-raid.c ++++ b/drivers/md/dm-raid.c +@@ -785,8 +785,7 @@ struct dm_raid_superblock { + __le32 layout; + __le32 stripe_sectors; + +- __u8 pad[452]; /* Round struct to 512 bytes. */ +- /* Always set to 0 when writing. */ ++ /* Remainder of a logical block is zero-filled when writing (see super_sync()). */ + } __packed; + + static int read_disk_sb(struct md_rdev *rdev, int size) +@@ -823,7 +822,7 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev) + test_bit(Faulty, &(rs->dev[i].rdev.flags))) + failed_devices |= (1ULL << i); + +- memset(sb, 0, sizeof(*sb)); ++ memset(sb + 1, 0, rdev->sb_size - sizeof(*sb)); + + sb->magic = cpu_to_le32(DM_RAID_MAGIC); + sb->features = cpu_to_le32(0); /* No features yet */ +@@ -858,7 +857,11 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev) + uint64_t events_sb, events_refsb; + + rdev->sb_start = 0; +- rdev->sb_size = sizeof(*sb); ++ rdev->sb_size = bdev_logical_block_size(rdev->meta_bdev); ++ if (rdev->sb_size < sizeof(*sb) || rdev->sb_size > PAGE_SIZE) { ++ DMERR("superblock size of a logical block is no longer valid"); ++ return -EINVAL; ++ } + + ret = read_disk_sb(rdev, rdev->sb_size); + if (ret) +diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h +index 37d367bb9aa8..bf2b80d5c470 100644 +--- a/drivers/md/persistent-data/dm-btree-internal.h ++++ b/drivers/md/persistent-data/dm-btree-internal.h +@@ -42,6 +42,12 @@ struct btree_node { + } __packed; + + ++/* ++ * Locks a block using the btree node validator. ++ */ ++int bn_read_lock(struct dm_btree_info *info, dm_block_t b, ++ struct dm_block **result); ++ + void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, + struct dm_btree_value_type *vt); + +diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c +index cf9fd676ae44..1b5e13ec7f96 100644 +--- a/drivers/md/persistent-data/dm-btree-spine.c ++++ b/drivers/md/persistent-data/dm-btree-spine.c +@@ -92,7 +92,7 @@ struct dm_block_validator btree_node_validator = { + + /*----------------------------------------------------------------*/ + +-static int bn_read_lock(struct dm_btree_info *info, dm_block_t b, ++int bn_read_lock(struct dm_btree_info *info, dm_block_t b, + struct dm_block **result) + { + return dm_tm_read_lock(info->tm, b, &btree_node_validator, result); +diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c +index 35865425e4b4..0a7592e88811 100644 +--- a/drivers/md/persistent-data/dm-btree.c ++++ b/drivers/md/persistent-data/dm-btree.c +@@ -812,22 +812,26 @@ EXPORT_SYMBOL_GPL(dm_btree_find_highest_key); + * FIXME: We shouldn't use a recursive algorithm when we have limited stack + * space. Also this only works for single level trees. + */ +-static int walk_node(struct ro_spine *s, dm_block_t block, ++static int walk_node(struct dm_btree_info *info, dm_block_t block, + int (*fn)(void *context, uint64_t *keys, void *leaf), + void *context) + { + int r; + unsigned i, nr; ++ struct dm_block *node; + struct btree_node *n; + uint64_t keys; + +- r = ro_step(s, block); +- n = ro_node(s); ++ r = bn_read_lock(info, block, &node); ++ if (r) ++ return r; ++ ++ n = dm_block_data(node); + + nr = le32_to_cpu(n->header.nr_entries); + for (i = 0; i < nr; i++) { + if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) { +- r = walk_node(s, value64(n, i), fn, context); ++ r = walk_node(info, value64(n, i), fn, context); + if (r) + goto out; + } else { +@@ -839,7 +843,7 @@ static int walk_node(struct ro_spine *s, dm_block_t block, + } + + out: +- ro_pop(s); ++ dm_tm_unlock(info->tm, node); + return r; + } + +@@ -847,15 +851,7 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, + int (*fn)(void *context, uint64_t *keys, void *leaf), + void *context) + { +- int r; +- struct ro_spine spine; +- + BUG_ON(info->levels > 1); +- +- init_ro_spine(&spine, info); +- r = walk_node(&spine, root, fn, context); +- exit_ro_spine(&spine); +- +- return r; ++ return walk_node(info, root, fn, context); + } + EXPORT_SYMBOL_GPL(dm_btree_walk); +diff --git a/drivers/media/usb/ttusb-dec/ttusbdecfe.c b/drivers/media/usb/ttusb-dec/ttusbdecfe.c +index 5c45c9d0712d..9c29552aedec 100644 +--- a/drivers/media/usb/ttusb-dec/ttusbdecfe.c ++++ b/drivers/media/usb/ttusb-dec/ttusbdecfe.c +@@ -156,6 +156,9 @@ static int ttusbdecfe_dvbs_diseqc_send_master_cmd(struct dvb_frontend* fe, struc + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00 }; + ++ if (cmd->msg_len > sizeof(b) - 4) ++ return -EINVAL; ++ + memcpy(&b[4], cmd->msg, cmd->msg_len); + + state->config->send_command(fe, 0x72, +diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c +index 99cc0b07a713..0513ea0906dd 100644 +--- a/drivers/misc/mei/bus.c ++++ b/drivers/misc/mei/bus.c +@@ -71,7 +71,7 @@ static int mei_cl_device_probe(struct device *dev) + + dev_dbg(dev, "Device probe\n"); + +- strncpy(id.name, dev_name(dev), MEI_CL_NAME_SIZE); ++ strlcpy(id.name, dev_name(dev), sizeof(id.name)); + + return driver->probe(device, &id); + } +diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c +index 4e6877a032a8..bd8800c85525 100644 +--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c +@@ -191,6 +191,39 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, + MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); + } + ++static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, ++ struct mlx4_en_tx_ring *ring, int index, ++ u8 owner) ++{ ++ __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); ++ struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; ++ struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; ++ void *end = ring->buf + ring->buf_size; ++ __be32 *ptr = (__be32 *)tx_desc; ++ int i; ++ ++ /* Optimize the common case when there are no wraparounds */ ++ if (likely((void *)tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { ++ /* Stamp the freed descriptor */ ++ for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; ++ i += STAMP_STRIDE) { ++ *ptr = stamp; ++ ptr += STAMP_DWORDS; ++ } ++ } else { ++ /* Stamp the freed descriptor */ ++ for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; ++ i += STAMP_STRIDE) { ++ *ptr = stamp; ++ ptr += STAMP_DWORDS; ++ if ((void *)ptr >= end) { ++ ptr = ring->buf; ++ stamp ^= cpu_to_be32(0x80000000); ++ } ++ } ++ } ++} ++ + + static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, +@@ -205,8 +238,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + void *end = ring->buf + ring->buf_size; + int frags = skb_shinfo(skb)->nr_frags; + int i; +- __be32 *ptr = (__be32 *)tx_desc; +- __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); + struct skb_shared_hwtstamps hwts; + + if (timestamp) { +@@ -232,12 +263,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + skb_frag_size(frag), PCI_DMA_TODEVICE); + } + } +- /* Stamp the freed descriptor */ +- for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { +- *ptr = stamp; +- ptr += STAMP_DWORDS; +- } +- + } else { + if (!tx_info->inl) { + if ((void *) data >= end) { +@@ -263,16 +288,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + ++data; + } + } +- /* Stamp the freed descriptor */ +- for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { +- *ptr = stamp; +- ptr += STAMP_DWORDS; +- if ((void *) ptr >= end) { +- ptr = ring->buf; +- stamp ^= cpu_to_be32(0x80000000); +- } +- } +- + } + dev_kfree_skb_any(skb); + return tx_info->nr_txbb; +@@ -318,8 +333,9 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) + struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; + struct mlx4_cqe *cqe; + u16 index; +- u16 new_index, ring_index; ++ u16 new_index, ring_index, stamp_index; + u32 txbbs_skipped = 0; ++ u32 txbbs_stamp = 0; + u32 cons_index = mcq->cons_index; + int size = cq->size; + u32 size_mask = ring->size_mask; +@@ -335,6 +351,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) + index = cons_index & size_mask; + cqe = &buf[(index << factor) + factor]; + ring_index = ring->cons & size_mask; ++ stamp_index = ring_index; + + /* Process all completed CQEs */ + while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, +@@ -359,6 +376,12 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) + priv, ring, ring_index, + !!((ring->cons + txbbs_skipped) & + ring->size), timestamp); ++ ++ mlx4_en_stamp_wqe(priv, ring, stamp_index, ++ !!((ring->cons + txbbs_stamp) & ++ ring->size)); ++ stamp_index = ring_index; ++ txbbs_stamp = txbbs_skipped; + packets++; + bytes += ring->tx_info[ring_index].nr_bytes; + } while (ring_index != new_index); +diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c +index 398faff8be7a..ade8bdfc03af 100644 +--- a/drivers/net/ethernet/sun/sunvnet.c ++++ b/drivers/net/ethernet/sun/sunvnet.c +@@ -656,7 +656,7 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) + spin_lock_irqsave(&port->vio.lock, flags); + + dr = &port->vio.drings[VIO_DRIVER_TX_RING]; +- if (unlikely(vnet_tx_dring_avail(dr) < 2)) { ++ if (unlikely(vnet_tx_dring_avail(dr) < 1)) { + if (!netif_queue_stopped(dev)) { + netif_stop_queue(dev); + +@@ -704,7 +704,7 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) + dev->stats.tx_bytes += skb->len; + + dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1); +- if (unlikely(vnet_tx_dring_avail(dr) < 2)) { ++ if (unlikely(vnet_tx_dring_avail(dr) < 1)) { + netif_stop_queue(dev); + if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr)) + netif_wake_queue(dev); +diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c +index 9e56eb479a4f..2d255ba911d5 100644 +--- a/drivers/net/macvtap.c ++++ b/drivers/net/macvtap.c +@@ -625,6 +625,8 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, + if (skb->ip_summed == CHECKSUM_PARTIAL) { + vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + vnet_hdr->csum_start = skb_checksum_start_offset(skb); ++ if (vlan_tx_tag_present(skb)) ++ vnet_hdr->csum_start += VLAN_HLEN; + vnet_hdr->csum_offset = skb->csum_offset; + } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; +diff --git a/drivers/net/wireless/iwlwifi/iwl-trans.h b/drivers/net/wireless/iwlwifi/iwl-trans.h +index 72d2ecce0b8d..d8df1d9b0de3 100644 +--- a/drivers/net/wireless/iwlwifi/iwl-trans.h ++++ b/drivers/net/wireless/iwlwifi/iwl-trans.h +@@ -489,6 +489,7 @@ enum iwl_trans_state { + * Set during transport allocation. + * @hw_id_str: a string with info about HW ID. Set during transport allocation. + * @pm_support: set to true in start_hw if link pm is supported ++ * @ltr_enabled: set to true if the LTR is enabled + * @dev_cmd_pool: pool for Tx cmd allocation - for internal use only. + * The user should use iwl_trans_{alloc,free}_tx_cmd. + * @dev_cmd_headroom: room needed for the transport's private use before the +@@ -513,6 +514,7 @@ struct iwl_trans { + u8 rx_mpdu_cmd, rx_mpdu_cmd_hdr_size; + + bool pm_support; ++ bool ltr_enabled; + + /* The following fields are internal only */ + struct kmem_cache *dev_cmd_pool; +diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h +index 81fe45f46be7..ac38ecf13c18 100644 +--- a/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h ++++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h +@@ -67,7 +67,40 @@ + /* Power Management Commands, Responses, Notifications */ + + /** +- * enum iwl_scan_flags - masks for power table command flags ++ * enum iwl_ltr_config_flags - masks for LTR config command flags ++ * @LTR_CFG_FLAG_FEATURE_ENABLE: Feature operational status ++ * @LTR_CFG_FLAG_HW_DIS_ON_SHADOW_REG_ACCESS: allow LTR change on shadow ++ * memory access ++ * @LTR_CFG_FLAG_HW_EN_SHRT_WR_THROUGH: allow LTR msg send on ANY LTR ++ * reg change ++ * @LTR_CFG_FLAG_HW_DIS_ON_D0_2_D3: allow LTR msg send on transition from ++ * D0 to D3 ++ * @LTR_CFG_FLAG_SW_SET_SHORT: fixed static short LTR register ++ * @LTR_CFG_FLAG_SW_SET_LONG: fixed static short LONG register ++ * @LTR_CFG_FLAG_DENIE_C10_ON_PD: allow going into C10 on PD ++ */ ++enum iwl_ltr_config_flags { ++ LTR_CFG_FLAG_FEATURE_ENABLE = BIT(0), ++ LTR_CFG_FLAG_HW_DIS_ON_SHADOW_REG_ACCESS = BIT(1), ++ LTR_CFG_FLAG_HW_EN_SHRT_WR_THROUGH = BIT(2), ++ LTR_CFG_FLAG_HW_DIS_ON_D0_2_D3 = BIT(3), ++ LTR_CFG_FLAG_SW_SET_SHORT = BIT(4), ++ LTR_CFG_FLAG_SW_SET_LONG = BIT(5), ++ LTR_CFG_FLAG_DENIE_C10_ON_PD = BIT(6), ++}; ++ ++/** ++ * struct iwl_ltr_config_cmd - configures the LTR ++ * @flags: See %enum iwl_ltr_config_flags ++ */ ++struct iwl_ltr_config_cmd { ++ __le32 flags; ++ __le32 static_long; ++ __le32 static_short; ++} __packed; ++ ++/** ++ * enum iwl_power_flags - masks for power table command flags + * @POWER_FLAGS_POWER_SAVE_ENA_MSK: '1' Allow to save power by turning off + * receiver and transmitter. '0' - does not allow. + * @POWER_FLAGS_POWER_MANAGEMENT_ENA_MSK: '0' Driver disables power management, +diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/iwlwifi/mvm/fw-api.h +index c6384555aab4..4b6730db42a5 100644 +--- a/drivers/net/wireless/iwlwifi/mvm/fw-api.h ++++ b/drivers/net/wireless/iwlwifi/mvm/fw-api.h +@@ -138,6 +138,7 @@ enum { + + /* Power */ + POWER_TABLE_CMD = 0x77, ++ LTR_CONFIG = 0xee, + + /* Scanning */ + SCAN_REQUEST_CMD = 0x80, +diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c +index e18c92dd60ec..d250d451fd01 100644 +--- a/drivers/net/wireless/iwlwifi/mvm/fw.c ++++ b/drivers/net/wireless/iwlwifi/mvm/fw.c +@@ -443,6 +443,15 @@ int iwl_mvm_up(struct iwl_mvm *mvm) + if (ret) + goto error; + ++ if (mvm->trans->ltr_enabled) { ++ struct iwl_ltr_config_cmd cmd = { ++ .flags = cpu_to_le32(LTR_CFG_FLAG_FEATURE_ENABLE), ++ }; ++ ++ WARN_ON(iwl_mvm_send_cmd_pdu(mvm, LTR_CONFIG, 0, ++ sizeof(cmd), &cmd)); ++ } ++ + IWL_DEBUG_INFO(mvm, "RT uCode started.\n"); + + return 0; +diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c +index 388c8a914960..649d301cfa2a 100644 +--- a/drivers/net/wireless/iwlwifi/mvm/ops.c ++++ b/drivers/net/wireless/iwlwifi/mvm/ops.c +@@ -293,6 +293,7 @@ static const char *iwl_mvm_cmd_strings[REPLY_MAX] = { + CMD(BT_PROFILE_NOTIFICATION), + CMD(BT_CONFIG), + CMD(MCAST_FILTER_CMD), ++ CMD(LTR_CONFIG), + }; + #undef CMD + +diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c +index ff04135d37af..6a5eb2b29418 100644 +--- a/drivers/net/wireless/iwlwifi/pcie/trans.c ++++ b/drivers/net/wireless/iwlwifi/pcie/trans.c +@@ -116,11 +116,13 @@ static void iwl_pcie_set_pwr(struct iwl_trans *trans, bool vaux) + + /* PCI registers */ + #define PCI_CFG_RETRY_TIMEOUT 0x041 ++#define PCI_EXP_DEVCTL2_LTR_EN 0x0400 + + static void iwl_pcie_apm_config(struct iwl_trans *trans) + { + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + u16 lctl; ++ u16 cap; + + /* + * HW bug W/A for instability in PCIe bus L0S->L1 transition. +@@ -131,16 +133,17 @@ static void iwl_pcie_apm_config(struct iwl_trans *trans) + * power savings, even without L1. + */ + pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_LNKCTL, &lctl); +- if (lctl & PCI_EXP_LNKCTL_ASPM_L1) { +- /* L1-ASPM enabled; disable(!) L0S */ ++ if (lctl & PCI_EXP_LNKCTL_ASPM_L1) + iwl_set_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED); +- dev_info(trans->dev, "L1 Enabled; Disabling L0S\n"); +- } else { +- /* L1-ASPM disabled; enable(!) L0S */ ++ else + iwl_clear_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED); +- dev_info(trans->dev, "L1 Disabled; Enabling L0S\n"); +- } + trans->pm_support = !(lctl & PCI_EXP_LNKCTL_ASPM_L0S); ++ ++ pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_DEVCTL2, &cap); ++ trans->ltr_enabled = cap & PCI_EXP_DEVCTL2_LTR_EN; ++ dev_info(trans->dev, "L1 %sabled - LTR %sabled\n", ++ (lctl & PCI_EXP_LNKCTL_ASPM_L1) ? "En" : "Dis", ++ trans->ltr_enabled ? "En" : "Dis"); + } + + /* +diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c +index fa9a2171cc13..b264d8fe1908 100644 +--- a/drivers/platform/x86/dell-wmi.c ++++ b/drivers/platform/x86/dell-wmi.c +@@ -163,18 +163,24 @@ static void dell_wmi_notify(u32 value, void *context) + const struct key_entry *key; + int reported_key; + u16 *buffer_entry = (u16 *)obj->buffer.pointer; ++ int buffer_size = obj->buffer.length/2; + +- if (dell_new_hk_type && (buffer_entry[1] != 0x10)) { ++ if (buffer_size >= 2 && dell_new_hk_type && buffer_entry[1] != 0x10) { + pr_info("Received unknown WMI event (0x%x)\n", + buffer_entry[1]); + kfree(obj); + return; + } + +- if (dell_new_hk_type || buffer_entry[1] == 0x0) ++ if (buffer_size >= 3 && (dell_new_hk_type || buffer_entry[1] == 0x0)) + reported_key = (int)buffer_entry[2]; +- else ++ else if (buffer_size >= 2) + reported_key = (int)buffer_entry[1] & 0xffff; ++ else { ++ pr_info("Received unknown WMI event\n"); ++ kfree(obj); ++ return; ++ } + + key = sparse_keymap_entry_from_scancode(dell_wmi_input_dev, + reported_key); +diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c +index 287667c20c6a..62ed744bbe06 100644 +--- a/drivers/scsi/hpsa.c ++++ b/drivers/scsi/hpsa.c +@@ -1206,8 +1206,8 @@ static void complete_scsi_command(struct CommandList *cp) + scsi_set_resid(cmd, ei->ResidualCnt); + + if (ei->CommandStatus == 0) { +- cmd->scsi_done(cmd); + cmd_free(h, cp); ++ cmd->scsi_done(cmd); + return; + } + +@@ -1380,8 +1380,8 @@ static void complete_scsi_command(struct CommandList *cp) + dev_warn(&h->pdev->dev, "cp %p returned unknown status %x\n", + cp, ei->CommandStatus); + } +- cmd->scsi_done(cmd); + cmd_free(h, cp); ++ cmd->scsi_done(cmd); + } + + static void hpsa_pci_unmap(struct pci_dev *pdev, +diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c +index f43de1e56420..3668b1b23b5a 100644 +--- a/drivers/scsi/scsi_error.c ++++ b/drivers/scsi/scsi_error.c +@@ -1689,8 +1689,10 @@ static void scsi_restart_operations(struct Scsi_Host *shost) + * is no point trying to lock the door of an off-line device. + */ + shost_for_each_device(sdev, shost) { +- if (scsi_device_online(sdev) && sdev->locked) ++ if (scsi_device_online(sdev) && sdev->was_reset && sdev->locked) { + scsi_eh_lock_door(sdev); ++ sdev->was_reset = 0; ++ } + } + + /* +diff --git a/fs/ioprio.c b/fs/ioprio.c +index e50170ca7c33..31666c92b46a 100644 +--- a/fs/ioprio.c ++++ b/fs/ioprio.c +@@ -157,14 +157,16 @@ out: + + int ioprio_best(unsigned short aprio, unsigned short bprio) + { +- unsigned short aclass = IOPRIO_PRIO_CLASS(aprio); +- unsigned short bclass = IOPRIO_PRIO_CLASS(bprio); ++ unsigned short aclass; ++ unsigned short bclass; + +- if (aclass == IOPRIO_CLASS_NONE) +- aclass = IOPRIO_CLASS_BE; +- if (bclass == IOPRIO_CLASS_NONE) +- bclass = IOPRIO_CLASS_BE; ++ if (!ioprio_valid(aprio)) ++ aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); ++ if (!ioprio_valid(bprio)) ++ bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); + ++ aclass = IOPRIO_PRIO_CLASS(aprio); ++ bclass = IOPRIO_PRIO_CLASS(bprio); + if (aclass == bclass) + return min(aprio, bprio); + if (aclass > bclass) +diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c +index 4b49a8c6ccad..ef0c394b7bf5 100644 +--- a/fs/nfs/delegation.c ++++ b/fs/nfs/delegation.c +@@ -108,6 +108,8 @@ again: + continue; + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + continue; ++ if (!nfs4_valid_open_stateid(state)) ++ continue; + if (!nfs4_stateid_match(&state->stateid, stateid)) + continue; + get_nfs_open_context(ctx); +@@ -175,7 +177,11 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * + { + int res = 0; + +- res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync); ++ if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) ++ res = nfs4_proc_delegreturn(inode, ++ delegation->cred, ++ &delegation->stateid, ++ issync); + nfs_free_delegation(delegation); + return res; + } +@@ -361,11 +367,13 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation + { + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_inode *nfsi = NFS_I(inode); +- int err; ++ int err = 0; + + if (delegation == NULL) + return 0; + do { ++ if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) ++ break; + err = nfs_delegation_claim_opens(inode, &delegation->stateid); + if (!issync || err != -EAGAIN) + break; +@@ -586,10 +594,23 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl + rcu_read_unlock(); + } + ++static void nfs_revoke_delegation(struct inode *inode) ++{ ++ struct nfs_delegation *delegation; ++ rcu_read_lock(); ++ delegation = rcu_dereference(NFS_I(inode)->delegation); ++ if (delegation != NULL) { ++ set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); ++ nfs_mark_return_delegation(NFS_SERVER(inode), delegation); ++ } ++ rcu_read_unlock(); ++} ++ + void nfs_remove_bad_delegation(struct inode *inode) + { + struct nfs_delegation *delegation; + ++ nfs_revoke_delegation(inode); + delegation = nfs_inode_detach_delegation(inode); + if (delegation) { + nfs_inode_find_state_and_recover(inode, &delegation->stateid); +diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h +index 9a79c7a99d6d..e02b090ab9da 100644 +--- a/fs/nfs/delegation.h ++++ b/fs/nfs/delegation.h +@@ -31,6 +31,7 @@ enum { + NFS_DELEGATION_RETURN_IF_CLOSED, + NFS_DELEGATION_REFERENCED, + NFS_DELEGATION_RETURNING, ++ NFS_DELEGATION_REVOKED, + }; + + int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); +diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c +index 0bd7a55a5f07..725e87538c98 100644 +--- a/fs/nfs/direct.c ++++ b/fs/nfs/direct.c +@@ -180,6 +180,7 @@ static void nfs_direct_req_free(struct kref *kref) + { + struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); + ++ nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo); + if (dreq->l_ctx != NULL) + nfs_put_lock_context(dreq->l_ctx); + if (dreq->ctx != NULL) +diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c +index cd4b9073dd20..e9be01b2cc5a 100644 +--- a/fs/nfs/inode.c ++++ b/fs/nfs/inode.c +@@ -519,7 +519,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) + { + struct inode *inode = dentry->d_inode; + int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; +- int err; ++ int err = 0; + + /* Flush out writes to the server in order to update c/mtime. */ + if (S_ISREG(inode->i_mode)) { +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 69fc437be661..78787948f69d 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -1416,7 +1416,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct + nfs_inode_find_state_and_recover(state->inode, + stateid); + nfs4_schedule_stateid_recovery(server, state); +- return 0; ++ return -EAGAIN; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + set_bit(NFS_DELEGATED_STATE, &state->flags); +@@ -1845,6 +1845,28 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta + return ret; + } + ++static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state) ++{ ++ nfs_remove_bad_delegation(state->inode); ++ write_seqlock(&state->seqlock); ++ nfs4_stateid_copy(&state->stateid, &state->open_stateid); ++ write_sequnlock(&state->seqlock); ++ clear_bit(NFS_DELEGATED_STATE, &state->flags); ++} ++ ++static void nfs40_clear_delegation_stateid(struct nfs4_state *state) ++{ ++ if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL) ++ nfs_finish_clear_delegation_stateid(state); ++} ++ ++static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) ++{ ++ /* NFSv4.0 doesn't allow for delegation recovery on open expire */ ++ nfs40_clear_delegation_stateid(state); ++ return nfs4_open_expired(sp, state); ++} ++ + #if defined(CONFIG_NFS_V4_1) + static void nfs41_clear_delegation_stateid(struct nfs4_state *state) + { +@@ -6974,7 +6996,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { + static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { + .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, + .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, +- .recover_open = nfs4_open_expired, ++ .recover_open = nfs40_open_expired, + .recover_lock = nfs4_lock_expired, + .establish_clid = nfs4_init_clientid, + .get_clid_cred = nfs4_get_setclientid_cred, +diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h +index 7279b94c01da..91aa89e1aaa0 100644 +--- a/include/linux/clocksource.h ++++ b/include/linux/clocksource.h +@@ -285,7 +285,7 @@ extern struct clocksource* clocksource_get_next(void); + extern void clocksource_change_rating(struct clocksource *cs, int rating); + extern void clocksource_suspend(void); + extern void clocksource_resume(void); +-extern struct clocksource * __init __weak clocksource_default_clock(void); ++extern struct clocksource * __init clocksource_default_clock(void); + extern void clocksource_mark_unstable(struct clocksource *cs); + + extern void +diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h +index c6e091bf39a5..bdfc95bddde9 100644 +--- a/include/linux/kgdb.h ++++ b/include/linux/kgdb.h +@@ -283,7 +283,7 @@ struct kgdb_io { + + extern struct kgdb_arch arch_kgdb_ops; + +-extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs); ++extern unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs); + + #ifdef CONFIG_SERIAL_KGDB_NMI + extern int kgdb_register_nmi_console(void); +diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h +index d6183f06d8c1..a3b4812f494f 100644 +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -124,6 +124,25 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, + extern void mem_cgroup_replace_page_cache(struct page *oldpage, + struct page *newpage); + ++static inline void mem_cgroup_oom_enable(void) ++{ ++ WARN_ON(current->memcg_oom.may_oom); ++ current->memcg_oom.may_oom = 1; ++} ++ ++static inline void mem_cgroup_oom_disable(void) ++{ ++ WARN_ON(!current->memcg_oom.may_oom); ++ current->memcg_oom.may_oom = 0; ++} ++ ++static inline bool task_in_memcg_oom(struct task_struct *p) ++{ ++ return p->memcg_oom.memcg; ++} ++ ++bool mem_cgroup_oom_synchronize(bool wait); ++ + #ifdef CONFIG_MEMCG_SWAP + extern int do_swap_account; + #endif +@@ -347,6 +366,24 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page, + { + } + ++static inline void mem_cgroup_oom_enable(void) ++{ ++} ++ ++static inline void mem_cgroup_oom_disable(void) ++{ ++} ++ ++static inline bool task_in_memcg_oom(struct task_struct *p) ++{ ++ return false; ++} ++ ++static inline bool mem_cgroup_oom_synchronize(bool wait) ++{ ++ return false; ++} ++ + static inline void mem_cgroup_inc_page_stat(struct page *page, + enum mem_cgroup_page_stat_item idx) + { +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 7da14357aa76..d4cdac903468 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -167,6 +167,7 @@ extern pgprot_t protection_map[16]; + #define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ + #define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ + #define FAULT_FLAG_TRIED 0x40 /* second try */ ++#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */ + + /* + * vm_fault is filled by the the pagefault handler and passed to the vma's +diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h +index 104b62f23ee0..54e351aa4d2e 100644 +--- a/include/linux/nfs_xdr.h ++++ b/include/linux/nfs_xdr.h +@@ -1184,11 +1184,22 @@ struct nfs41_free_stateid_res { + unsigned int status; + }; + ++static inline void ++nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo) ++{ ++ kfree(cinfo->buckets); ++} ++ + #else + + struct pnfs_ds_commit_info { + }; + ++static inline void ++nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo) ++{ ++} ++ + #endif /* CONFIG_NFS_V4_1 */ + + struct nfs_page; +diff --git a/include/linux/sched.h b/include/linux/sched.h +index f87e9a8d364f..00c1d4f45072 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1411,6 +1411,12 @@ struct task_struct { + unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ + } memcg_batch; + unsigned int memcg_kmem_skip_account; ++ struct memcg_oom_info { ++ struct mem_cgroup *memcg; ++ gfp_t gfp_mask; ++ int order; ++ unsigned int may_oom:1; ++ } memcg_oom; + #endif + #ifdef CONFIG_HAVE_HW_BREAKPOINT + atomic_t ptrace_bp_refcnt; +diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h +index cd89510eab2a..845ab6decc45 100644 +--- a/include/net/sctp/sctp.h ++++ b/include/net/sctp/sctp.h +@@ -540,6 +540,11 @@ static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_associat + asoc->pmtu_pending = 0; + } + ++static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk) ++{ ++ return !list_empty(&chunk->list); ++} ++ + /* Walk through a list of TLV parameters. Don't trust the + * individual parameter lengths and instead depend on + * the chunk length to indicate when to stop. Make sure +diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h +index 2a82d1384706..c4c9458f37cd 100644 +--- a/include/net/sctp/sm.h ++++ b/include/net/sctp/sm.h +@@ -255,9 +255,9 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *, + int, __be16); + struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc, + union sctp_addr *addr); +-int sctp_verify_asconf(const struct sctp_association *asoc, +- struct sctp_paramhdr *param_hdr, void *chunk_end, +- struct sctp_paramhdr **errp); ++bool sctp_verify_asconf(const struct sctp_association *asoc, ++ struct sctp_chunk *chunk, bool addr_param_needed, ++ struct sctp_paramhdr **errp); + struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, + struct sctp_chunk *asconf); + int sctp_process_asconf_ack(struct sctp_association *asoc, +diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h +index 5dda450eb55b..2ec9fbcd06f9 100644 +--- a/include/uapi/linux/netfilter/xt_bpf.h ++++ b/include/uapi/linux/netfilter/xt_bpf.h +@@ -6,6 +6,8 @@ + + #define XT_BPF_MAX_NUM_INSTR 64 + ++struct sk_filter; ++ + struct xt_bpf_info { + __u16 bpf_program_num_elem; + struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; +diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c +index b0e99deb6d05..a0f0ab2ac2a8 100644 +--- a/ipc/ipc_sysctl.c ++++ b/ipc/ipc_sysctl.c +@@ -123,7 +123,6 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) + { + struct ctl_table ipc_table; +- size_t lenp_bef = *lenp; + int oldval; + int rc; + +@@ -133,7 +132,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, + + rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); + +- if (write && !rc && lenp_bef == *lenp) { ++ if (write && !rc) { + int newval = *((int *)(ipc_table.data)); + /* + * The file "auto_msgmni" has correctly been set. +diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c +index 43c307dc9453..00c4459f76df 100644 +--- a/kernel/audit_tree.c ++++ b/kernel/audit_tree.c +@@ -154,6 +154,7 @@ static struct audit_chunk *alloc_chunk(int count) + chunk->owners[i].index = i; + } + fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch); ++ chunk->mark.mask = FS_IN_IGNORED; + return chunk; + } + +diff --git a/kernel/events/core.c b/kernel/events/core.c +index 0b4733447151..3f63ea6464ca 100644 +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -39,6 +39,7 @@ + #include <linux/hw_breakpoint.h> + #include <linux/mm_types.h> + #include <linux/cgroup.h> ++#include <linux/compat.h> + + #include "internal.h" + +@@ -3490,6 +3491,25 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) + return 0; + } + ++#ifdef CONFIG_COMPAT ++static long perf_compat_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ switch (_IOC_NR(cmd)) { ++ case _IOC_NR(PERF_EVENT_IOC_SET_FILTER): ++ /* Fix up pointer size (usually 4 -> 8 in 32-on-64-bit case */ ++ if (_IOC_SIZE(cmd) == sizeof(compat_uptr_t)) { ++ cmd &= ~IOCSIZE_MASK; ++ cmd |= sizeof(void *) << IOCSIZE_SHIFT; ++ } ++ break; ++ } ++ return perf_ioctl(file, cmd, arg); ++} ++#else ++# define perf_compat_ioctl NULL ++#endif ++ + int perf_event_task_enable(void) + { + struct perf_event *event; +@@ -3961,7 +3981,7 @@ static const struct file_operations perf_fops = { + .read = perf_read, + .poll = perf_poll, + .unlocked_ioctl = perf_ioctl, +- .compat_ioctl = perf_ioctl, ++ .compat_ioctl = perf_compat_ioctl, + .mmap = perf_mmap, + .fasync = perf_fasync, + }; +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index f45e21ab9cea..eaa3accb01e7 100644 +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -302,6 +302,7 @@ struct mem_cgroup { + + bool oom_lock; + atomic_t under_oom; ++ atomic_t oom_wakeups; + + atomic_t refcnt; + +@@ -2075,15 +2076,18 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, + return total; + } + ++static DEFINE_SPINLOCK(memcg_oom_lock); ++ + /* + * Check OOM-Killer is already running under our hierarchy. + * If someone is running, return false. +- * Has to be called with memcg_oom_lock + */ +-static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg) ++static bool mem_cgroup_oom_trylock(struct mem_cgroup *memcg) + { + struct mem_cgroup *iter, *failed = NULL; + ++ spin_lock(&memcg_oom_lock); ++ + for_each_mem_cgroup_tree(iter, memcg) { + if (iter->oom_lock) { + /* +@@ -2097,33 +2101,33 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg) + iter->oom_lock = true; + } + +- if (!failed) +- return true; +- +- /* +- * OK, we failed to lock the whole subtree so we have to clean up +- * what we set up to the failing subtree +- */ +- for_each_mem_cgroup_tree(iter, memcg) { +- if (iter == failed) { +- mem_cgroup_iter_break(memcg, iter); +- break; ++ if (failed) { ++ /* ++ * OK, we failed to lock the whole subtree so we have ++ * to clean up what we set up to the failing subtree ++ */ ++ for_each_mem_cgroup_tree(iter, memcg) { ++ if (iter == failed) { ++ mem_cgroup_iter_break(memcg, iter); ++ break; ++ } ++ iter->oom_lock = false; + } +- iter->oom_lock = false; + } +- return false; ++ ++ spin_unlock(&memcg_oom_lock); ++ ++ return !failed; + } + +-/* +- * Has to be called with memcg_oom_lock +- */ +-static int mem_cgroup_oom_unlock(struct mem_cgroup *memcg) ++static void mem_cgroup_oom_unlock(struct mem_cgroup *memcg) + { + struct mem_cgroup *iter; + ++ spin_lock(&memcg_oom_lock); + for_each_mem_cgroup_tree(iter, memcg) + iter->oom_lock = false; +- return 0; ++ spin_unlock(&memcg_oom_lock); + } + + static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg) +@@ -2147,7 +2151,6 @@ static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg) + atomic_add_unless(&iter->under_oom, -1, 0); + } + +-static DEFINE_SPINLOCK(memcg_oom_lock); + static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); + + struct oom_wait_info { +@@ -2177,6 +2180,7 @@ static int memcg_oom_wake_function(wait_queue_t *wait, + + static void memcg_wakeup_oom(struct mem_cgroup *memcg) + { ++ atomic_inc(&memcg->oom_wakeups); + /* for filtering, pass "memcg" as argument. */ + __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg); + } +@@ -2187,57 +2191,97 @@ static void memcg_oom_recover(struct mem_cgroup *memcg) + memcg_wakeup_oom(memcg); + } + +-/* +- * try to call OOM killer. returns false if we should exit memory-reclaim loop. ++static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) ++{ ++ if (!current->memcg_oom.may_oom) ++ return; ++ /* ++ * We are in the middle of the charge context here, so we ++ * don't want to block when potentially sitting on a callstack ++ * that holds all kinds of filesystem and mm locks. ++ * ++ * Also, the caller may handle a failed allocation gracefully ++ * (like optional page cache readahead) and so an OOM killer ++ * invocation might not even be necessary. ++ * ++ * That's why we don't do anything here except remember the ++ * OOM context and then deal with it at the end of the page ++ * fault when the stack is unwound, the locks are released, ++ * and when we know whether the fault was overall successful. ++ */ ++ css_get(&memcg->css); ++ current->memcg_oom.memcg = memcg; ++ current->memcg_oom.gfp_mask = mask; ++ current->memcg_oom.order = order; ++} ++ ++/** ++ * mem_cgroup_oom_synchronize - complete memcg OOM handling ++ * @handle: actually kill/wait or just clean up the OOM state ++ * ++ * This has to be called at the end of a page fault if the memcg OOM ++ * handler was enabled. ++ * ++ * Memcg supports userspace OOM handling where failed allocations must ++ * sleep on a waitqueue until the userspace task resolves the ++ * situation. Sleeping directly in the charge context with all kinds ++ * of locks held is not a good idea, instead we remember an OOM state ++ * in the task and mem_cgroup_oom_synchronize() has to be called at ++ * the end of the page fault to complete the OOM handling. ++ * ++ * Returns %true if an ongoing memcg OOM situation was detected and ++ * completed, %false otherwise. + */ +-static bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask, +- int order) ++bool mem_cgroup_oom_synchronize(bool handle) + { ++ struct mem_cgroup *memcg = current->memcg_oom.memcg; + struct oom_wait_info owait; +- bool locked, need_to_kill; ++ bool locked; ++ ++ /* OOM is global, do not handle */ ++ if (!memcg) ++ return false; ++ ++ if (!handle) ++ goto cleanup; + + owait.memcg = memcg; + owait.wait.flags = 0; + owait.wait.func = memcg_oom_wake_function; + owait.wait.private = current; + INIT_LIST_HEAD(&owait.wait.task_list); +- need_to_kill = true; +- mem_cgroup_mark_under_oom(memcg); + +- /* At first, try to OOM lock hierarchy under memcg.*/ +- spin_lock(&memcg_oom_lock); +- locked = mem_cgroup_oom_lock(memcg); +- /* +- * Even if signal_pending(), we can't quit charge() loop without +- * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL +- * under OOM is always welcomed, use TASK_KILLABLE here. +- */ + prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); +- if (!locked || memcg->oom_kill_disable) +- need_to_kill = false; ++ mem_cgroup_mark_under_oom(memcg); ++ ++ locked = mem_cgroup_oom_trylock(memcg); ++ + if (locked) + mem_cgroup_oom_notify(memcg); +- spin_unlock(&memcg_oom_lock); + +- if (need_to_kill) { ++ if (locked && !memcg->oom_kill_disable) { ++ mem_cgroup_unmark_under_oom(memcg); + finish_wait(&memcg_oom_waitq, &owait.wait); +- mem_cgroup_out_of_memory(memcg, mask, order); ++ mem_cgroup_out_of_memory(memcg, current->memcg_oom.gfp_mask, ++ current->memcg_oom.order); + } else { + schedule(); ++ mem_cgroup_unmark_under_oom(memcg); + finish_wait(&memcg_oom_waitq, &owait.wait); + } +- spin_lock(&memcg_oom_lock); +- if (locked) +- mem_cgroup_oom_unlock(memcg); +- memcg_wakeup_oom(memcg); +- spin_unlock(&memcg_oom_lock); + +- mem_cgroup_unmark_under_oom(memcg); +- +- if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) +- return false; +- /* Give chance to dying process */ +- schedule_timeout_uninterruptible(1); ++ if (locked) { ++ mem_cgroup_oom_unlock(memcg); ++ /* ++ * There is no guarantee that an OOM-lock contender ++ * sees the wakeups triggered by the OOM kill ++ * uncharges. Wake any sleepers explicitely. ++ */ ++ memcg_oom_recover(memcg); ++ } ++cleanup: ++ current->memcg_oom.memcg = NULL; ++ css_put(&memcg->css); + return true; + } + +@@ -2550,12 +2594,11 @@ enum { + CHARGE_RETRY, /* need to retry but retry is not bad */ + CHARGE_NOMEM, /* we can't do more. return -ENOMEM */ + CHARGE_WOULDBLOCK, /* GFP_WAIT wasn't set and no enough res. */ +- CHARGE_OOM_DIE, /* the current is killed because of OOM */ + }; + + static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, + unsigned int nr_pages, unsigned int min_pages, +- bool oom_check) ++ bool invoke_oom) + { + unsigned long csize = nr_pages * PAGE_SIZE; + struct mem_cgroup *mem_over_limit; +@@ -2612,14 +2655,10 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, + if (mem_cgroup_wait_acct_move(mem_over_limit)) + return CHARGE_RETRY; + +- /* If we don't need to call oom-killer at el, return immediately */ +- if (!oom_check) +- return CHARGE_NOMEM; +- /* check OOM */ +- if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask, get_order(csize))) +- return CHARGE_OOM_DIE; ++ if (invoke_oom) ++ mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(csize)); + +- return CHARGE_RETRY; ++ return CHARGE_NOMEM; + } + + /* +@@ -2663,6 +2702,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, + || fatal_signal_pending(current))) + goto bypass; + ++ if (unlikely(task_in_memcg_oom(current))) ++ goto bypass; ++ + /* + * We always charge the cgroup the mm_struct belongs to. + * The mm_struct's mem_cgroup changes on task migration if the +@@ -2722,7 +2764,7 @@ again: + } + + do { +- bool oom_check; ++ bool invoke_oom = oom && !nr_oom_retries; + + /* If killed, bypass charge */ + if (fatal_signal_pending(current)) { +@@ -2730,14 +2772,8 @@ again: + goto bypass; + } + +- oom_check = false; +- if (oom && !nr_oom_retries) { +- oom_check = true; +- nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; +- } +- +- ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, nr_pages, +- oom_check); ++ ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, ++ nr_pages, invoke_oom); + switch (ret) { + case CHARGE_OK: + break; +@@ -2750,16 +2786,12 @@ again: + css_put(&memcg->css); + goto nomem; + case CHARGE_NOMEM: /* OOM routine works */ +- if (!oom) { ++ if (!oom || invoke_oom) { + css_put(&memcg->css); + goto nomem; + } +- /* If oom, we never return -ENOMEM */ + nr_oom_retries--; + break; +- case CHARGE_OOM_DIE: /* Killed by OOM Killer */ +- css_put(&memcg->css); +- goto bypass; + } + } while (ret != CHARGE_OK); + +diff --git a/mm/memory.c b/mm/memory.c +index ebe0f285c0e7..0984f398d746 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -3754,22 +3754,14 @@ unlock: + /* + * By the time we get here, we already hold the mm semaphore + */ +-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, +- unsigned long address, unsigned int flags) ++static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, ++ unsigned long address, unsigned int flags) + { + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + +- __set_current_state(TASK_RUNNING); +- +- count_vm_event(PGFAULT); +- mem_cgroup_count_vm_event(mm, PGFAULT); +- +- /* do counter updates before entering really critical section. */ +- check_sync_rss_stat(current); +- + if (unlikely(is_vm_hugetlb_page(vma))) + return hugetlb_fault(mm, vma, address, flags); + +@@ -3850,6 +3842,43 @@ retry: + return handle_pte_fault(mm, vma, address, pte, pmd, flags); + } + ++int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, ++ unsigned long address, unsigned int flags) ++{ ++ int ret; ++ ++ __set_current_state(TASK_RUNNING); ++ ++ count_vm_event(PGFAULT); ++ mem_cgroup_count_vm_event(mm, PGFAULT); ++ ++ /* do counter updates before entering really critical section. */ ++ check_sync_rss_stat(current); ++ ++ /* ++ * Enable the memcg OOM handling for faults triggered in user ++ * space. Kernel faults are handled more gracefully. ++ */ ++ if (flags & FAULT_FLAG_USER) ++ mem_cgroup_oom_enable(); ++ ++ ret = __handle_mm_fault(mm, vma, address, flags); ++ ++ if (flags & FAULT_FLAG_USER) { ++ mem_cgroup_oom_disable(); ++ /* ++ * The task may have entered a memcg OOM situation but ++ * if the allocation error was handled gracefully (no ++ * VM_FAULT_OOM), there is no need to kill anything. ++ * Just clean up the OOM state peacefully. ++ */ ++ if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)) ++ mem_cgroup_oom_synchronize(false); ++ } ++ ++ return ret; ++} ++ + #ifndef __PAGETABLE_PUD_FOLDED + /* + * Allocate page upper directory. +diff --git a/mm/oom_kill.c b/mm/oom_kill.c +index f104c7e9f61e..4d87d7c4ed2e 100644 +--- a/mm/oom_kill.c ++++ b/mm/oom_kill.c +@@ -702,9 +702,12 @@ out: + */ + void pagefault_out_of_memory(void) + { +- struct zonelist *zonelist = node_zonelist(first_online_node, +- GFP_KERNEL); ++ struct zonelist *zonelist; + ++ if (mem_cgroup_oom_synchronize(true)) ++ return; ++ ++ zonelist = node_zonelist(first_online_node, GFP_KERNEL); + if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) { + out_of_memory(NULL, 0, 0, NULL, false); + clear_zonelist_oom(zonelist, GFP_KERNEL); +diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h +index e696833a31b5..11ab6628027a 100644 +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -429,6 +429,16 @@ extern netdev_features_t br_features_recompute(struct net_bridge *br, + extern int br_handle_frame_finish(struct sk_buff *skb); + extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb); + ++static inline bool br_rx_handler_check_rcu(const struct net_device *dev) ++{ ++ return rcu_dereference(dev->rx_handler) == br_handle_frame; ++} ++ ++static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev) ++{ ++ return br_rx_handler_check_rcu(dev) ? br_port_get_rcu(dev) : NULL; ++} ++ + /* br_ioctl.c */ + extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); + extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *arg); +diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c +index 8660ea3be705..bdb459d21ad8 100644 +--- a/net/bridge/br_stp_bpdu.c ++++ b/net/bridge/br_stp_bpdu.c +@@ -153,7 +153,7 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, + if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0) + goto err; + +- p = br_port_get_rcu(dev); ++ p = br_port_get_check_rcu(dev); + if (!p) + goto err; + +diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c +index 6e7a236525b6..06f19b9e159a 100644 +--- a/net/ceph/crypto.c ++++ b/net/ceph/crypto.c +@@ -89,11 +89,82 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) + + static const u8 *aes_iv = (u8 *)CEPH_AES_IV; + ++/* ++ * Should be used for buffers allocated with ceph_kvmalloc(). ++ * Currently these are encrypt out-buffer (ceph_buffer) and decrypt ++ * in-buffer (msg front). ++ * ++ * Dispose of @sgt with teardown_sgtable(). ++ * ++ * @prealloc_sg is to avoid memory allocation inside sg_alloc_table() ++ * in cases where a single sg is sufficient. No attempt to reduce the ++ * number of sgs by squeezing physically contiguous pages together is ++ * made though, for simplicity. ++ */ ++static int setup_sgtable(struct sg_table *sgt, struct scatterlist *prealloc_sg, ++ const void *buf, unsigned int buf_len) ++{ ++ struct scatterlist *sg; ++ const bool is_vmalloc = is_vmalloc_addr(buf); ++ unsigned int off = offset_in_page(buf); ++ unsigned int chunk_cnt = 1; ++ unsigned int chunk_len = PAGE_ALIGN(off + buf_len); ++ int i; ++ int ret; ++ ++ if (buf_len == 0) { ++ memset(sgt, 0, sizeof(*sgt)); ++ return -EINVAL; ++ } ++ ++ if (is_vmalloc) { ++ chunk_cnt = chunk_len >> PAGE_SHIFT; ++ chunk_len = PAGE_SIZE; ++ } ++ ++ if (chunk_cnt > 1) { ++ ret = sg_alloc_table(sgt, chunk_cnt, GFP_NOFS); ++ if (ret) ++ return ret; ++ } else { ++ WARN_ON(chunk_cnt != 1); ++ sg_init_table(prealloc_sg, 1); ++ sgt->sgl = prealloc_sg; ++ sgt->nents = sgt->orig_nents = 1; ++ } ++ ++ for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) { ++ struct page *page; ++ unsigned int len = min(chunk_len - off, buf_len); ++ ++ if (is_vmalloc) ++ page = vmalloc_to_page(buf); ++ else ++ page = virt_to_page(buf); ++ ++ sg_set_page(sg, page, len, off); ++ ++ off = 0; ++ buf += len; ++ buf_len -= len; ++ } ++ WARN_ON(buf_len != 0); ++ ++ return 0; ++} ++ ++static void teardown_sgtable(struct sg_table *sgt) ++{ ++ if (sgt->orig_nents > 1) ++ sg_free_table(sgt); ++} ++ + static int ceph_aes_encrypt(const void *key, int key_len, + void *dst, size_t *dst_len, + const void *src, size_t src_len) + { +- struct scatterlist sg_in[2], sg_out[1]; ++ struct scatterlist sg_in[2], prealloc_sg; ++ struct sg_table sg_out; + struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); + struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; + int ret; +@@ -109,16 +180,18 @@ static int ceph_aes_encrypt(const void *key, int key_len, + + *dst_len = src_len + zero_padding; + +- crypto_blkcipher_setkey((void *)tfm, key, key_len); + sg_init_table(sg_in, 2); + sg_set_buf(&sg_in[0], src, src_len); + sg_set_buf(&sg_in[1], pad, zero_padding); +- sg_init_table(sg_out, 1); +- sg_set_buf(sg_out, dst, *dst_len); ++ ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); ++ if (ret) ++ goto out_tfm; ++ ++ crypto_blkcipher_setkey((void *)tfm, key, key_len); + iv = crypto_blkcipher_crt(tfm)->iv; + ivsize = crypto_blkcipher_ivsize(tfm); +- + memcpy(iv, aes_iv, ivsize); ++ + /* + print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, + key, key_len, 1); +@@ -127,16 +200,22 @@ static int ceph_aes_encrypt(const void *key, int key_len, + print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, + pad, zero_padding, 1); + */ +- ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in, ++ ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in, + src_len + zero_padding); +- crypto_free_blkcipher(tfm); +- if (ret < 0) ++ if (ret < 0) { + pr_err("ceph_aes_crypt failed %d\n", ret); ++ goto out_sg; ++ } + /* + print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, + dst, *dst_len, 1); + */ +- return 0; ++ ++out_sg: ++ teardown_sgtable(&sg_out); ++out_tfm: ++ crypto_free_blkcipher(tfm); ++ return ret; + } + + static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, +@@ -144,7 +223,8 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, + const void *src1, size_t src1_len, + const void *src2, size_t src2_len) + { +- struct scatterlist sg_in[3], sg_out[1]; ++ struct scatterlist sg_in[3], prealloc_sg; ++ struct sg_table sg_out; + struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); + struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; + int ret; +@@ -160,17 +240,19 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, + + *dst_len = src1_len + src2_len + zero_padding; + +- crypto_blkcipher_setkey((void *)tfm, key, key_len); + sg_init_table(sg_in, 3); + sg_set_buf(&sg_in[0], src1, src1_len); + sg_set_buf(&sg_in[1], src2, src2_len); + sg_set_buf(&sg_in[2], pad, zero_padding); +- sg_init_table(sg_out, 1); +- sg_set_buf(sg_out, dst, *dst_len); ++ ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); ++ if (ret) ++ goto out_tfm; ++ ++ crypto_blkcipher_setkey((void *)tfm, key, key_len); + iv = crypto_blkcipher_crt(tfm)->iv; + ivsize = crypto_blkcipher_ivsize(tfm); +- + memcpy(iv, aes_iv, ivsize); ++ + /* + print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, + key, key_len, 1); +@@ -181,23 +263,30 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, + print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, + pad, zero_padding, 1); + */ +- ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in, ++ ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in, + src1_len + src2_len + zero_padding); +- crypto_free_blkcipher(tfm); +- if (ret < 0) ++ if (ret < 0) { + pr_err("ceph_aes_crypt2 failed %d\n", ret); ++ goto out_sg; ++ } + /* + print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, + dst, *dst_len, 1); + */ +- return 0; ++ ++out_sg: ++ teardown_sgtable(&sg_out); ++out_tfm: ++ crypto_free_blkcipher(tfm); ++ return ret; + } + + static int ceph_aes_decrypt(const void *key, int key_len, + void *dst, size_t *dst_len, + const void *src, size_t src_len) + { +- struct scatterlist sg_in[1], sg_out[2]; ++ struct sg_table sg_in; ++ struct scatterlist sg_out[2], prealloc_sg; + struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); + struct blkcipher_desc desc = { .tfm = tfm }; + char pad[16]; +@@ -209,16 +298,16 @@ static int ceph_aes_decrypt(const void *key, int key_len, + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + +- crypto_blkcipher_setkey((void *)tfm, key, key_len); +- sg_init_table(sg_in, 1); + sg_init_table(sg_out, 2); +- sg_set_buf(sg_in, src, src_len); + sg_set_buf(&sg_out[0], dst, *dst_len); + sg_set_buf(&sg_out[1], pad, sizeof(pad)); ++ ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); ++ if (ret) ++ goto out_tfm; + ++ crypto_blkcipher_setkey((void *)tfm, key, key_len); + iv = crypto_blkcipher_crt(tfm)->iv; + ivsize = crypto_blkcipher_ivsize(tfm); +- + memcpy(iv, aes_iv, ivsize); + + /* +@@ -227,12 +316,10 @@ static int ceph_aes_decrypt(const void *key, int key_len, + print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, + src, src_len, 1); + */ +- +- ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len); +- crypto_free_blkcipher(tfm); ++ ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len); + if (ret < 0) { + pr_err("ceph_aes_decrypt failed %d\n", ret); +- return ret; ++ goto out_sg; + } + + if (src_len <= *dst_len) +@@ -250,7 +337,12 @@ static int ceph_aes_decrypt(const void *key, int key_len, + print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1, + dst, *dst_len, 1); + */ +- return 0; ++ ++out_sg: ++ teardown_sgtable(&sg_in); ++out_tfm: ++ crypto_free_blkcipher(tfm); ++ return ret; + } + + static int ceph_aes_decrypt2(const void *key, int key_len, +@@ -258,7 +350,8 @@ static int ceph_aes_decrypt2(const void *key, int key_len, + void *dst2, size_t *dst2_len, + const void *src, size_t src_len) + { +- struct scatterlist sg_in[1], sg_out[3]; ++ struct sg_table sg_in; ++ struct scatterlist sg_out[3], prealloc_sg; + struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); + struct blkcipher_desc desc = { .tfm = tfm }; + char pad[16]; +@@ -270,17 +363,17 @@ static int ceph_aes_decrypt2(const void *key, int key_len, + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + +- sg_init_table(sg_in, 1); +- sg_set_buf(sg_in, src, src_len); + sg_init_table(sg_out, 3); + sg_set_buf(&sg_out[0], dst1, *dst1_len); + sg_set_buf(&sg_out[1], dst2, *dst2_len); + sg_set_buf(&sg_out[2], pad, sizeof(pad)); ++ ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); ++ if (ret) ++ goto out_tfm; + + crypto_blkcipher_setkey((void *)tfm, key, key_len); + iv = crypto_blkcipher_crt(tfm)->iv; + ivsize = crypto_blkcipher_ivsize(tfm); +- + memcpy(iv, aes_iv, ivsize); + + /* +@@ -289,12 +382,10 @@ static int ceph_aes_decrypt2(const void *key, int key_len, + print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, + src, src_len, 1); + */ +- +- ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len); +- crypto_free_blkcipher(tfm); ++ ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len); + if (ret < 0) { + pr_err("ceph_aes_decrypt failed %d\n", ret); +- return ret; ++ goto out_sg; + } + + if (src_len <= *dst1_len) +@@ -324,7 +415,11 @@ static int ceph_aes_decrypt2(const void *key, int key_len, + dst2, *dst2_len, 1); + */ + +- return 0; ++out_sg: ++ teardown_sgtable(&sg_in); ++out_tfm: ++ crypto_free_blkcipher(tfm); ++ return ret; + } + + +diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c +index 250a73e77f57..6c20f4731f1a 100644 +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -962,8 +962,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) + else + dev->flags &= ~IFF_POINTOPOINT; + +- dev->iflink = p->link; +- + /* Precalculate GRE options length */ + if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { + if (t->parms.o_flags&GRE_CSUM) +@@ -1267,6 +1265,8 @@ static int ip6gre_tunnel_init(struct net_device *dev) + if (!dev->tstats) + return -ENOMEM; + ++ dev->iflink = tunnel->parms.link; ++ + return 0; + } + +@@ -1282,7 +1282,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) + dev_hold(dev); + } + +- + static struct inet6_protocol ip6gre_protocol __read_mostly = { + .handler = ip6gre_rcv, + .err_handler = ip6gre_err, +@@ -1458,6 +1457,8 @@ static int ip6gre_tap_init(struct net_device *dev) + if (!dev->tstats) + return -ENOMEM; + ++ dev->iflink = tunnel->parms.link; ++ + return 0; + } + +diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c +index a0ecdf596f2f..14f46af17704 100644 +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -265,9 +265,6 @@ static int ip6_tnl_create2(struct net_device *dev) + int err; + + t = netdev_priv(dev); +- err = ip6_tnl_dev_init(dev); +- if (err < 0) +- goto out; + + err = register_netdevice(dev); + if (err < 0) +@@ -1433,6 +1430,7 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) + + + static const struct net_device_ops ip6_tnl_netdev_ops = { ++ .ndo_init = ip6_tnl_dev_init, + .ndo_uninit = ip6_tnl_dev_uninit, + .ndo_start_xmit = ip6_tnl_xmit, + .ndo_do_ioctl = ip6_tnl_ioctl, +@@ -1514,16 +1512,10 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = dev_net(dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); +- int err = ip6_tnl_dev_init_gen(dev); +- +- if (err) +- return err; + + t->parms.proto = IPPROTO_IPV6; + dev_hold(dev); + +- ip6_tnl_link_config(t); +- + rcu_assign_pointer(ip6n->tnls_wc[0], t); + return 0; + } +diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c +index fae73b0ef14b..85bc6d498b46 100644 +--- a/net/mac80211/rx.c ++++ b/net/mac80211/rx.c +@@ -1585,11 +1585,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) + sc = le16_to_cpu(hdr->seq_ctrl); + frag = sc & IEEE80211_SCTL_FRAG; + +- if (likely((!ieee80211_has_morefrags(fc) && frag == 0) || +- is_multicast_ether_addr(hdr->addr1))) { +- /* not fragmented */ ++ if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) ++ goto out; ++ ++ if (is_multicast_ether_addr(hdr->addr1)) { ++ rx->local->dot11MulticastReceivedFrameCount++; + goto out; + } ++ + I802_DEBUG_INC(rx->local->rx_handlers_fragments); + + if (skb_linearize(rx->skb)) +@@ -1682,10 +1685,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) + out: + if (rx->sta) + rx->sta->rx_packets++; +- if (is_multicast_ether_addr(hdr->addr1)) +- rx->local->dot11MulticastReceivedFrameCount++; +- else +- ieee80211_led_rx(rx->local); ++ ieee80211_led_rx(rx->local); + return RX_CONTINUE; + } + +diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c +index 038eee5c8f85..2bb801e3ee8c 100644 +--- a/net/netfilter/nf_nat_core.c ++++ b/net/netfilter/nf_nat_core.c +@@ -487,6 +487,39 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data) + return i->status & IPS_NAT_MASK ? 1 : 0; + } + ++static int nf_nat_proto_clean(struct nf_conn *ct, void *data) ++{ ++ struct nf_conn_nat *nat = nfct_nat(ct); ++ ++ if (nf_nat_proto_remove(ct, data)) ++ return 1; ++ ++ if (!nat || !nat->ct) ++ return 0; ++ ++ /* This netns is being destroyed, and conntrack has nat null binding. ++ * Remove it from bysource hash, as the table will be freed soon. ++ * ++ * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() ++ * will delete entry from already-freed table. ++ */ ++ if (!del_timer(&ct->timeout)) ++ return 1; ++ ++ spin_lock_bh(&nf_nat_lock); ++ hlist_del_rcu(&nat->bysource); ++ ct->status &= ~IPS_NAT_DONE_MASK; ++ nat->ct = NULL; ++ spin_unlock_bh(&nf_nat_lock); ++ ++ add_timer(&ct->timeout); ++ ++ /* don't delete conntrack. Although that would make things a lot ++ * simpler, we'd end up flushing all conntracks on nat rmmod. ++ */ ++ return 0; ++} ++ + static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) + { + struct nf_nat_proto_clean clean = { +@@ -749,7 +782,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) + { + struct nf_nat_proto_clean clean = {}; + +- nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean); ++ nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + synchronize_rcu(); + nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); + } +diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c +index 962e9792e317..216261dd32ae 100644 +--- a/net/netfilter/nfnetlink_log.c ++++ b/net/netfilter/nfnetlink_log.c +@@ -45,7 +45,8 @@ + #define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE + #define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ + #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ +-#define NFULNL_COPY_RANGE_MAX 0xFFFF /* max packet size is limited by 16-bit struct nfattr nfa_len field */ ++/* max packet size is limited by 16-bit struct nfattr nfa_len field */ ++#define NFULNL_COPY_RANGE_MAX (0xFFFF - NLA_HDRLEN) + + #define PRINTR(x, args...) do { if (net_ratelimit()) \ + printk(x, ## args); } while (0); +@@ -255,6 +256,8 @@ nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode, + + case NFULNL_COPY_PACKET: + inst->copy_mode = mode; ++ if (range == 0) ++ range = NFULNL_COPY_RANGE_MAX; + inst->copy_range = min_t(unsigned int, + range, NFULNL_COPY_RANGE_MAX); + break; +@@ -345,26 +348,25 @@ nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size) + return skb; + } + +-static int ++static void + __nfulnl_send(struct nfulnl_instance *inst) + { +- int status = -1; +- + if (inst->qlen > 1) { + struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0, + NLMSG_DONE, + sizeof(struct nfgenmsg), + 0); +- if (!nlh) ++ if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n", ++ inst->skb->len, skb_tailroom(inst->skb))) { ++ kfree_skb(inst->skb); + goto out; ++ } + } +- status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, +- MSG_DONTWAIT); +- ++ nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, ++ MSG_DONTWAIT); ++out: + inst->qlen = 0; + inst->skb = NULL; +-out: +- return status; + } + + static void +@@ -647,7 +649,8 @@ nfulnl_log_packet(struct net *net, + + nla_total_size(sizeof(u_int32_t)) /* gid */ + + nla_total_size(plen) /* prefix */ + + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) +- + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); ++ + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)) ++ + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ + + if (in && skb_mac_header_was_set(skb)) { + size += nla_total_size(skb->dev->hard_header_len) +@@ -676,8 +679,7 @@ nfulnl_log_packet(struct net *net, + break; + + case NFULNL_COPY_PACKET: +- if (inst->copy_range == 0 +- || inst->copy_range > skb->len) ++ if (inst->copy_range > skb->len) + data_len = skb->len; + else + data_len = inst->copy_range; +@@ -690,8 +692,7 @@ nfulnl_log_packet(struct net *net, + goto unlock_and_release; + } + +- if (inst->skb && +- size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) { ++ if (inst->skb && size > skb_tailroom(inst->skb)) { + /* either the queue len is too high or we don't have + * enough room in the skb left. flush to userspace. */ + __nfulnl_flush(inst); +diff --git a/net/sctp/associola.c b/net/sctp/associola.c +index 62e86d98bc36..ca4a1a1b8e69 100644 +--- a/net/sctp/associola.c ++++ b/net/sctp/associola.c +@@ -1659,6 +1659,8 @@ struct sctp_chunk *sctp_assoc_lookup_asconf_ack( + * ack chunk whose serial number matches that of the request. + */ + list_for_each_entry(ack, &asoc->asconf_ack_list, transmitted_list) { ++ if (sctp_chunk_pending(ack)) ++ continue; + if (ack->subh.addip_hdr->serial == serial) { + sctp_chunk_hold(ack); + return ack; +diff --git a/net/sctp/auth.c b/net/sctp/auth.c +index 7a19117254db..bc2fae7e67be 100644 +--- a/net/sctp/auth.c ++++ b/net/sctp/auth.c +@@ -874,8 +874,6 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, + list_add(&cur_key->key_list, sh_keys); + + cur_key->key = key; +- sctp_auth_key_hold(key); +- + return 0; + nomem: + if (!replace) +diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c +index 3221d073448c..49c58eadbfa2 100644 +--- a/net/sctp/inqueue.c ++++ b/net/sctp/inqueue.c +@@ -147,18 +147,9 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) + } else { + /* Nothing to do. Next chunk in the packet, please. */ + ch = (sctp_chunkhdr_t *) chunk->chunk_end; +- + /* Force chunk->skb->data to chunk->chunk_end. */ +- skb_pull(chunk->skb, +- chunk->chunk_end - chunk->skb->data); +- +- /* Verify that we have at least chunk headers +- * worth of buffer left. +- */ +- if (skb_headlen(chunk->skb) < sizeof(sctp_chunkhdr_t)) { +- sctp_chunk_free(chunk); +- chunk = queue->in_progress = NULL; +- } ++ skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data); ++ /* We are guaranteed to pull a SCTP header. */ + } + } + +@@ -194,24 +185,14 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) + skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); + chunk->subh.v = NULL; /* Subheader is no longer valid. */ + +- if (chunk->chunk_end < skb_tail_pointer(chunk->skb)) { ++ if (chunk->chunk_end + sizeof(sctp_chunkhdr_t) < ++ skb_tail_pointer(chunk->skb)) { + /* This is not a singleton */ + chunk->singleton = 0; + } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) { +- /* RFC 2960, Section 6.10 Bundling +- * +- * Partial chunks MUST NOT be placed in an SCTP packet. +- * If the receiver detects a partial chunk, it MUST drop +- * the chunk. +- * +- * Since the end of the chunk is past the end of our buffer +- * (which contains the whole packet, we can freely discard +- * the whole packet. +- */ +- sctp_chunk_free(chunk); +- chunk = queue->in_progress = NULL; +- +- return NULL; ++ /* Discard inside state machine. */ ++ chunk->pdiscard = 1; ++ chunk->chunk_end = skb_tail_pointer(chunk->skb); + } else { + /* We are at the end of the packet, so mark the chunk + * in case we need to send a SACK. +diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c +index 87e244be899a..29fc16f3633f 100644 +--- a/net/sctp/sm_make_chunk.c ++++ b/net/sctp/sm_make_chunk.c +@@ -2596,6 +2596,9 @@ do_addr_param: + addr_param = param.v + sizeof(sctp_addip_param_t); + + af = sctp_get_af_specific(param_type2af(param.p->type)); ++ if (af == NULL) ++ break; ++ + af->from_addr_param(&addr, addr_param, + htons(asoc->peer.port), 0); + +@@ -3094,50 +3097,63 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, + return SCTP_ERROR_NO_ERROR; + } + +-/* Verify the ASCONF packet before we process it. */ +-int sctp_verify_asconf(const struct sctp_association *asoc, +- struct sctp_paramhdr *param_hdr, void *chunk_end, +- struct sctp_paramhdr **errp) { +- sctp_addip_param_t *asconf_param; ++/* Verify the ASCONF packet before we process it. */ ++bool sctp_verify_asconf(const struct sctp_association *asoc, ++ struct sctp_chunk *chunk, bool addr_param_needed, ++ struct sctp_paramhdr **errp) ++{ ++ sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) chunk->chunk_hdr; + union sctp_params param; +- int length, plen; ++ bool addr_param_seen = false; + +- param.v = (sctp_paramhdr_t *) param_hdr; +- while (param.v <= chunk_end - sizeof(sctp_paramhdr_t)) { +- length = ntohs(param.p->length); +- *errp = param.p; +- +- if (param.v > chunk_end - length || +- length < sizeof(sctp_paramhdr_t)) +- return 0; ++ sctp_walk_params(param, addip, addip_hdr.params) { ++ size_t length = ntohs(param.p->length); + ++ *errp = param.p; + switch (param.p->type) { ++ case SCTP_PARAM_ERR_CAUSE: ++ break; ++ case SCTP_PARAM_IPV4_ADDRESS: ++ if (length != sizeof(sctp_ipv4addr_param_t)) ++ return false; ++ addr_param_seen = true; ++ break; ++ case SCTP_PARAM_IPV6_ADDRESS: ++ if (length != sizeof(sctp_ipv6addr_param_t)) ++ return false; ++ addr_param_seen = true; ++ break; + case SCTP_PARAM_ADD_IP: + case SCTP_PARAM_DEL_IP: + case SCTP_PARAM_SET_PRIMARY: +- asconf_param = (sctp_addip_param_t *)param.v; +- plen = ntohs(asconf_param->param_hdr.length); +- if (plen < sizeof(sctp_addip_param_t) + +- sizeof(sctp_paramhdr_t)) +- return 0; ++ /* In ASCONF chunks, these need to be first. */ ++ if (addr_param_needed && !addr_param_seen) ++ return false; ++ length = ntohs(param.addip->param_hdr.length); ++ if (length < sizeof(sctp_addip_param_t) + ++ sizeof(sctp_paramhdr_t)) ++ return false; + break; + case SCTP_PARAM_SUCCESS_REPORT: + case SCTP_PARAM_ADAPTATION_LAYER_IND: + if (length != sizeof(sctp_addip_param_t)) +- return 0; +- ++ return false; + break; + default: +- break; ++ /* This is unkown to us, reject! */ ++ return false; + } +- +- param.v += WORD_ROUND(length); + } + +- if (param.v != chunk_end) +- return 0; ++ /* Remaining sanity checks. */ ++ if (addr_param_needed && !addr_param_seen) ++ return false; ++ if (!addr_param_needed && addr_param_seen) ++ return false; ++ if (param.v != chunk->chunk_end) ++ return false; + +- return 1; ++ return true; + } + + /* Process an incoming ASCONF chunk with the next expected serial no. and +@@ -3146,16 +3162,17 @@ int sctp_verify_asconf(const struct sctp_association *asoc, + struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, + struct sctp_chunk *asconf) + { ++ sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) asconf->chunk_hdr; ++ bool all_param_pass = true; ++ union sctp_params param; + sctp_addiphdr_t *hdr; + union sctp_addr_param *addr_param; + sctp_addip_param_t *asconf_param; + struct sctp_chunk *asconf_ack; +- + __be16 err_code; + int length = 0; + int chunk_len; + __u32 serial; +- int all_param_pass = 1; + + chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t); + hdr = (sctp_addiphdr_t *)asconf->skb->data; +@@ -3183,9 +3200,14 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, + goto done; + + /* Process the TLVs contained within the ASCONF chunk. */ +- while (chunk_len > 0) { ++ sctp_walk_params(param, addip, addip_hdr.params) { ++ /* Skip preceeding address parameters. */ ++ if (param.p->type == SCTP_PARAM_IPV4_ADDRESS || ++ param.p->type == SCTP_PARAM_IPV6_ADDRESS) ++ continue; ++ + err_code = sctp_process_asconf_param(asoc, asconf, +- asconf_param); ++ param.addip); + /* ADDIP 4.1 A7) + * If an error response is received for a TLV parameter, + * all TLVs with no response before the failed TLV are +@@ -3193,28 +3215,20 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, + * the failed response are considered unsuccessful unless + * a specific success indication is present for the parameter. + */ +- if (SCTP_ERROR_NO_ERROR != err_code) +- all_param_pass = 0; +- ++ if (err_code != SCTP_ERROR_NO_ERROR) ++ all_param_pass = false; + if (!all_param_pass) +- sctp_add_asconf_response(asconf_ack, +- asconf_param->crr_id, err_code, +- asconf_param); ++ sctp_add_asconf_response(asconf_ack, param.addip->crr_id, ++ err_code, param.addip); + + /* ADDIP 4.3 D11) When an endpoint receiving an ASCONF to add + * an IP address sends an 'Out of Resource' in its response, it + * MUST also fail any subsequent add or delete requests bundled + * in the ASCONF. + */ +- if (SCTP_ERROR_RSRC_LOW == err_code) ++ if (err_code == SCTP_ERROR_RSRC_LOW) + goto done; +- +- /* Move to the next ASCONF param. */ +- length = ntohs(asconf_param->param_hdr.length); +- asconf_param = (void *)asconf_param + length; +- chunk_len -= length; + } +- + done: + asoc->peer.addip_serial++; + +diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c +index edc204b05c82..c52763a26297 100644 +--- a/net/sctp/sm_statefuns.c ++++ b/net/sctp/sm_statefuns.c +@@ -177,6 +177,9 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, + { + __u16 chunk_length = ntohs(chunk->chunk_hdr->length); + ++ /* Previously already marked? */ ++ if (unlikely(chunk->pdiscard)) ++ return 0; + if (unlikely(chunk_length < required_length)) + return 0; + +@@ -3593,9 +3596,7 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, + struct sctp_chunk *asconf_ack = NULL; + struct sctp_paramhdr *err_param = NULL; + sctp_addiphdr_t *hdr; +- union sctp_addr_param *addr_param; + __u32 serial; +- int length; + + if (!sctp_vtag_verify(chunk, asoc)) { + sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, +@@ -3620,17 +3621,8 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, + hdr = (sctp_addiphdr_t *)chunk->skb->data; + serial = ntohl(hdr->serial); + +- addr_param = (union sctp_addr_param *)hdr->params; +- length = ntohs(addr_param->p.length); +- if (length < sizeof(sctp_paramhdr_t)) +- return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, +- (void *)addr_param, commands); +- + /* Verify the ASCONF chunk before processing it. */ +- if (!sctp_verify_asconf(asoc, +- (sctp_paramhdr_t *)((void *)addr_param + length), +- (void *)chunk->chunk_end, +- &err_param)) ++ if (!sctp_verify_asconf(asoc, chunk, true, &err_param)) + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, + (void *)err_param, commands); + +@@ -3748,10 +3740,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, + rcvd_serial = ntohl(addip_hdr->serial); + + /* Verify the ASCONF-ACK chunk before processing it. */ +- if (!sctp_verify_asconf(asoc, +- (sctp_paramhdr_t *)addip_hdr->params, +- (void *)asconf_ack->chunk_end, +- &err_param)) ++ if (!sctp_verify_asconf(asoc, asconf_ack, false, &err_param)) + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, + (void *)err_param, commands); + +diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c +index ebe91440a068..c89a5bf5c00e 100644 +--- a/sound/usb/mixer_quirks.c ++++ b/sound/usb/mixer_quirks.c +@@ -799,6 +799,11 @@ static int snd_ftu_eff_switch_put(struct snd_kcontrol *kctl, + return changed; + } + ++static void kctl_private_value_free(struct snd_kcontrol *kctl) ++{ ++ kfree((void *)kctl->private_value); ++} ++ + static int snd_ftu_create_effect_switch(struct usb_mixer_interface *mixer, + int validx, int bUnitID) + { +@@ -833,6 +838,7 @@ static int snd_ftu_create_effect_switch(struct usb_mixer_interface *mixer, + return -ENOMEM; + } + ++ kctl->private_free = kctl_private_value_free; + err = snd_ctl_add(mixer->chip->card, kctl); + if (err < 0) + return err; |